From 4de2c09422c6f49991e82e22816a9a39dedddbab Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Thu, 27 Nov 2025 10:22:09 +0200 Subject: [PATCH 01/15] add script to dump mcp tool list --- cmd/src/mcp_tools.json | 1008 ++++++++++++++++++++++++++++++++++ scripts/gen-mcp-tool-json.sh | 21 + 2 files changed, 1029 insertions(+) create mode 100644 cmd/src/mcp_tools.json create mode 100755 scripts/gen-mcp-tool-json.sh diff --git a/cmd/src/mcp_tools.json b/cmd/src/mcp_tools.json new file mode 100644 index 0000000000..e38eba472d --- /dev/null +++ b/cmd/src/mcp_tools.json @@ -0,0 +1,1008 @@ +{ + "tools": [ + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. A commit search tool.\nSearch for commits in the repository. Find who made changes, when features were implemented, or track code history.\n\nKey features:\n- Search commit messages for relevant terms\n- Find commits by specific authors\n- Search for changes containing specific code\n- Filter by file paths or repository\n- Filter by date ranges with before/after\n- Regex pattern support\n\nLogic: All parameter types are combined with AND, but within each parameter type, multiple values use OR.\nExample: messageTerms=[\"bug\",\"fix\"] + authors=[\"jane\"] finds commits by jane with \"bug\" OR \"fix\" in message.\n\nExamples:\n\n \n Search for commits mentioning 'implement feature' or 'create feature' in the message\n calls the commit search tool with messageTerms=[\"implement feature\", \"create feature\"] repos=[\"github.com/myorg/repo\"]\n \n\t\n Search for commits that added pandas imports\n calls the commit search tool with contentTerms=[\"import pandas\"] repos=[\"github.com/myorg/repo\"]\n \n \n Find commits by jane.doe that changed files in the ui/components directory\n calls the commit search tool with authors=[\"jane.doe\"] files=[\"ui/components/**\"] repos=[\"github.com/myorg/frontend\"]\n \n \n Find changes to authentication code in May 2025\n calls the commit search tool with contentTerms=[\"auth\"] repos=[\"github.com/myorg/auth-service\", \"github.com/myorg/user-service\"] after=\"2025-05-01\" before=\"2025-05-31\"\n \n \n What has John been working on for the past month?\n calls the commit search tool with authors=[\"John\"] repos=[\"github.com/myorg/repo1\", \"github.com/myorg/repo2\"] after=\"1 month ago\"\n \n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repos" + ], + "properties": { + "after": { + "type": "string", + "description": "Search for commits after this date. Supports various formats including structured dates (e.g. \"YYYY-MM-DD\" or \"MM/DD/YYYY\", or \"november 2023\") and natural language (e.g. \"1 month ago\", \"last week\", or \"yesterday\")" + }, + "authors": { + "type": "array", + "description": "Authors to filter by. Multiple authors will be combined with OR logic.", + "items": { + "type": "string" + } + }, + "before": { + "type": "string", + "description": "Search for commits before this date. Supports various formats including structured dates (e.g. \"YYYY-MM-DD\", \"MM/DD/YYYY\", or \"november 2023\") and natural language (e.g. \"1 month ago\", \"last week\", or \"yesterday\")" + }, + "contentTerms": { + "type": "array", + "description": "Code content terms to search for in the actual changes. Multiple terms will be combined with OR logic.", + "items": { + "type": "string" + } + }, + "count": { + "type": "integer", + "description": "Maximum number of results to return" + }, + "files": { + "type": "array", + "description": "File paths to filter by. Multiple files will be combined with OR logic.", + "items": { + "type": "string" + } + }, + "messageTerms": { + "type": "array", + "description": "Terms to search for in commit messages. Multiple terms will be combined with OR logic by default.", + "items": { + "type": "string" + } + }, + "repos": { + "type": "array", + "description": "REQUIRED: Repositories to search in (e.g., [\"github.com/gohugoio/hugo\"] or [\"repo1\", \"repo2\"]). Multiple repositories will be combined with OR logic.", + "items": { + "type": "string" + } + }, + "useRegex": { + "type": "boolean", + "description": "Use regular expressions for searching (default: false)" + } + }, + "additionalProperties": false + }, + "name": "sg_commit_search", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "query", + "totalCount", + "commits", + "limitHit" + ], + "properties": { + "commits": { + "type": "array", + "items": { + "type": "object", + "required": [ + "repository", + "commit", + "author", + "date", + "title", + "message" + ], + "properties": { + "author": { + "type": "string" + }, + "commit": { + "type": "string" + }, + "date": { + "type": "string" + }, + "message": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "limitHit": { + "type": "boolean" + }, + "query": { + "type": "string" + }, + "totalCount": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. A tool that compares changes between two specific revisions in a repository.\nUse this tool when you need to:\n- Compare changes between two versions (commits, branches, tags) in a single repository\n- View specific file changes between revisions in detail\n- Examine code differences in a pull request, branch or commit\n- See all files modified between two specific points in a repository\n\nTips for effective revision comparison:\n- Use specific revisions for precise comparisons\n- For very large diffs, start with a smaller \"first\" value (e.g., 5) to preview changes\n- For comparing across branches, use the branch names directly\n- To see changes from a specific commit, use commitHash~1 as base and commitHash as head\n- Use \"after\" with the provided endCursor value to paginate through large diffs\n\nExamples:\n\n \n What changed in commit abc123 in the github.com/django/django repository?\n calls the compare revisions tool with repo=\"github.com/django/django\" base=\"abc123~1\" head=\"abc123\"\n \n \n Compare the feature-auth branch with main in the github.com/microsoft/vscode repository\n calls the compare revisions tool with repo=\"github.com/microsoft/vscode\" base=\"main\" head=\"feature-auth\"\n \n \n Show me all file changes between commits abc123 and def456 in kubernetes\n calls the compare revisions tool with repo=\"github.com/kubernetes/kubernetes\" base=\"abc123\" head=\"def456\"\n \n \n What files were modified in the last 3 commits on main branch of the rails repo?\n calls the compare revisions tool with repo=\"github.com/rails/rails\" base=\"main~3\" head=\"main\"\n \n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repo", + "base", + "head" + ], + "properties": { + "after": { + "type": "string", + "description": "Pagination cursor for fetching more results" + }, + "base": { + "type": "string", + "description": "The base revision (older version, e.g., \"main~5\", a commit hash, or a tag)" + }, + "first": { + "type": "integer", + "description": "Maximum number of file diffs to return (default: 50, max: 100)" + }, + "head": { + "type": "string", + "description": "The head revision (newer version, e.g., \"main\", a commit hash, or a tag)" + }, + "repo": { + "type": "string", + "description": "The repository name to compare revisions in (e.g., \"github.com/grafana/loki\")" + } + }, + "additionalProperties": false + }, + "name": "sg_compare_revisions", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repository", + "base", + "head", + "totalAdded", + "totalDeleted", + "totalModified", + "files", + "hasNextPage", + "endCursor" + ], + "properties": { + "base": { + "type": "string" + }, + "endCursor": { + "type": "string" + }, + "files": { + "type": "array", + "items": { + "type": "object", + "required": [ + "path", + "added", + "deleted" + ], + "properties": { + "added": { + "type": "integer" + }, + "deleted": { + "type": "integer" + }, + "hunks": { + "type": "array", + "items": { + "type": "object", + "required": [ + "oldRange", + "newRange", + "body" + ], + "properties": { + "body": { + "type": "string" + }, + "newRange": { + "type": "object", + "required": [ + "startLine", + "lines" + ], + "properties": { + "lines": { + "type": "integer" + }, + "startLine": { + "type": "integer" + } + }, + "additionalProperties": false + }, + "oldRange": { + "type": "object", + "required": [ + "startLine", + "lines" + ], + "properties": { + "lines": { + "type": "integer" + }, + "startLine": { + "type": "integer" + } + }, + "additionalProperties": false + }, + "section": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "path": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "hasNextPage": { + "type": "boolean" + }, + "head": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "totalAdded": { + "type": "integer" + }, + "totalDeleted": { + "type": "integer" + }, + "totalModified": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + { + "description": "\nCreate a new deep search conversation to answer complex questions about your codebase.\n\nPowered by an agentic LLM, this deep research tool performs an in-depth investigation of your codebase\n- Performs comprehensive analysis of your question\n- Uses multiple search and analysis tools automatically\n- Provides detailed, well-researched answers with evidence\n- Generates related follow-up suggestions\n\nUse this tool when you need:\n- Comprehensive analysis of complex technical questions\n- Multi-step research across one or many remote codebases\n- Detailed explanations with supporting evidence\n- Questions that require combining information from multiple sources\n\nExamples:\n\n\n\tHow does authentication work in this codebase?\n\tcalls the deep search tool with question: \"How does authentication work in this codebase?\"\n\n\n\tFind all the security vulnerabilities related to user input validation\n\tcalls the deep search tool with question: \"Find all the security vulnerabilities related to user input validation\"\n\n\n\tExplain the architecture and data flow of the payment processing system\n\tcalls the deep search tool with question: \"Explain the architecture and data flow of the payment processing system\"\n\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "question" + ], + "properties": { + "question": { + "type": "string", + "description": "The question to research using deep search. Should be detailed and specific about what you want to understand." + } + }, + "additionalProperties": false + }, + "name": "sg_deepsearch", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "conversation_id", + "question_id" + ], + "properties": { + "answer": { + "type": "string", + "description": "The answer generated by the deep search tool." + }, + "conversation_id": { + "type": "integer", + "description": "The ID of the conversation that was created to process the question. Can be used for follow-up questions" + }, + "error": { + "type": "object", + "description": "Any error that occurred during the deep search process.", + "required": [ + "title", + "kind", + "message" + ], + "properties": { + "details": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "message": { + "type": "string" + }, + "title": { + "type": "string" + } + }, + "additionalProperties": false + }, + "link": { + "type": "string", + "description": "A link to the deep search conversation app." + }, + "question_id": { + "type": "integer", + "description": "The globally unique ID of the question that was processed." + }, + "suggested_followups": { + "type": "array", + "description": "Suggested follow-up questions based on the deep search result.", + "items": { + "type": "string" + } + }, + "title": { + "type": "string", + "description": "The title or summary of the deep search result." + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. A tool that searches for code changes (diffs) across multiple repositories.\nThis tool searches ONLY the actual code changes (added/removed lines), not commit messages.\n\nUse this tool when you need to:\n- Find specific code patterns that were added or removed across repositories\n- Search for keywords in code changes across multiple repositories\n- Find changes related to specific features or bugs\n\nTips for effective diff searches:\n- Use the 'repos' parameter to focus your search on specific repositories\n- Use 'after' and 'before' to define a time range for the changes\n- Use 'pattern' to specify what you're looking for in the changed code\n- Add 'useRegex: true' for more complex search patterns\n- Use 'added: true' or 'removed: true' to search only in added or removed code\n\nExamples:\n\n \n Find instances where console.log was added across our repos in the past month\n calls the diff search tool with pattern=\"console.log\" after=\"1 month ago\" added=true\n \n \n Search for any hardcoded passwords that were removed in our backend services\n calls the diff search tool with pattern=\"password.*=\" repos=[\"github.com/myorg/backend*\"] removed=true useRegex=true\n \n \n Look for recent changes that mention 'API key' across all our microservices\n calls the diff search tool with pattern=\"API key\" repos=[\"github.com/myorg/service-*\"] after=\"2 weeks ago\"\n \n \n Find security-related code changes by author john.doe in the last quarter\n calls the diff search tool with pattern=\"security|auth|login\" author=\"john.doe\" after=\"3 months ago\" useRegex=true\n \n \n Search for any database connection changes across all repositories recently\n calls the diff search tool with pattern=\"database|db_connect|connection\" after=\"1 week ago\" useRegex=true\n \n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "pattern", + "repos" + ], + "properties": { + "added": { + "type": "boolean", + "description": "If true, only search in added code (default: search in both added and removed code)" + }, + "after": { + "type": "string", + "description": "Only include results from changes after this time (e.g., \"2 weeks ago\", \"2023-01-01\")" + }, + "author": { + "type": "string", + "description": "Filter by the author of the changes (e.g., \"username\")" + }, + "before": { + "type": "string", + "description": "Only include results from changes before this time (e.g., \"1 week ago\", \"2023-12-31\")" + }, + "count": { + "type": "integer", + "description": "Maximum number of results to return" + }, + "pattern": { + "type": "string", + "description": "The search pattern to look for in diff content (code changes, actual added/removed lines)" + }, + "removed": { + "type": "boolean", + "description": "If true, only search in removed code (default: search in both added and removed code)" + }, + "repos": { + "type": "array", + "description": "REQUIRED: Array of repository patterns to search in (e.g., [\"github.com/myorg/repo\", \"github.com/otherorg/*\"]). Multiple repositories will be combined with OR logic.", + "items": { + "type": "string" + } + }, + "useRegex": { + "type": "boolean", + "description": "Use regular expressions for searching (default: false)" + } + }, + "additionalProperties": false + }, + "name": "sg_diff_search", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "query", + "results", + "totalCount", + "limitHit" + ], + "properties": { + "limitHit": { + "type": "boolean" + }, + "query": { + "type": "string" + }, + "results": { + "type": "array", + "items": { + "type": "object", + "required": [ + "repository", + "commit", + "diffHunk", + "url", + "matchRanges" + ], + "properties": { + "authorName": { + "type": "string" + }, + "commit": { + "type": "string" + }, + "diffHunk": { + "type": "string" + }, + "matchRanges": { + "type": "array", + "items": { + "type": "object", + "required": [ + "startLine", + "endLine", + "startCharacter", + "endCharacter" + ], + "properties": { + "endCharacter": { + "type": "integer" + }, + "endLine": { + "type": "integer" + }, + "startCharacter": { + "type": "integer" + }, + "startLine": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + "message": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false + } + }, + "totalCount": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. Finds references to a provided symbol in a repository.\nA symbol is any code identifier, such as a function name, variable name, or class name.\nIt handles overloading by leveraging compiler information to ensure references are to the exact symbol requested. It can even handle cross-repository references.\n\nReturns a list of usages of that symbol, specifically:\n- Where the symbol is referenced in the code\n- The file and line number of each reference\n- Surrounding context of each reference to help understand its usage\nIf the symbol is not found, returns \"Symbol not found\"\n\nThis tool is the opposite of the sg_go_to_definition tool - it finds references (usages) to a symbol given its definition.\n\nYou should use this tool when you have a specific symbol in mind (function, method, variable, class, etc.), you know where it is defined (a file path) and want to see where it is referenced / used in the codebase.\n\nYou should choose to use this tool over sg_keyword_search, sg_nls_search or sg_read_file when you have encountered the definition of a specific symbol (function, variable, class)\nand you want to see how that specific symbol is used throughout the codebase, understand code flow or performing impact analysis.\n\nExamples:\n\n\t\n\t\tFind where the AbstractPaymentProcessorClass is used. It's defined in src/processors/AbstractPaymentProcessor.ts in the ecommerce/payment-service repository.\n\t\t [calls the find references tool with repo=\"ecommerce/payment-service\", path=\"src/processors/AbstractPaymentProcessor.ts\", symbol=\"AbstractPaymentProcessor\"]\n\t\t {\n \"repo\": \"ecommerce/payment-service\",\n \"path\": \"src/processors/StripePaymentProcessor.ts\",\n \"rev\": \"HEAD\",\n \"chunks\": [\n\t\t\t\t\t{\n\t\t\t\t\t\t\"startLine\": 2,\n\t\t\t\t\t\t\"endLine\": 2,\n\t\t\t\t\t\t\"content\": \"2: import { AbstractPaymentProcessor } from './AbstractPaymentProcessor';\\n\"\n\t\t\t\t\t},\n {\n \"startLine\": 102,\n \"endLine\": 103,\n \"content\": \"102: class StripePaymentProcessor extends AbstractPaymentProcessor {\\n103: \\tprivate readonly Status status;\\n\"\n }\n ]\n }\n\t\t\n\t\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repo", + "path", + "symbol" + ], + "properties": { + "path": { + "type": "string", + "description": "The path to the file within the repository containing the symbol reference, e.g. \"src/utils/date.ts\"." + }, + "repo": { + "type": "string", + "description": "The name of the repository containing the file, e.g. \"github.com/tuckersoft/thronglets\"." + }, + "revision": { + "type": "string", + "description": "The revision or branch to find references in. If not specified, defaults to the HEAD of the default branch." + }, + "symbol": { + "type": "string", + "description": "The name of the symbol (function name, method name, class, variable, constant, etc) to find references for, e.g. \"handleSearch\"." + } + }, + "additionalProperties": false + }, + "name": "sg_find_references", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "fileBlocks" + ], + "properties": { + "fileBlocks": { + "type": "array", + "items": { + "type": "object", + "required": [ + "type", + "repo", + "file", + "chunks" + ], + "properties": { + "chunks": { + "type": "array", + "items": { + "type": "object", + "required": [ + "startLine", + "endLine", + "content" + ], + "properties": { + "content": { + "type": "string" + }, + "endLine": { + "type": "integer" + }, + "startLine": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + "file": { + "type": "string" + }, + "repo": { + "type": "string" + }, + "rev": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. Find repositories where a specific contributor has made commits.\nThis tool helps identify which repositories a person has contributed to, making it useful for scoping commit searches.\n\nUse this tool when you need to:\n- Find all repositories a person has worked on\n- Scope commit or diff searches to relevant repositories\n- Understand a contributor's involvement across the codebase\n\nThe tool searches by author name or email address and returns repositories with contribution statistics.\n\nExamples:\n\n\t\n\t\tWhat repositories has Jim contributed to?\n\t\tcalls get contributor repos tool with author=\"Jim\"\n\t\n\t\n\t\tWhat has john.doe@company.com been working on?\n\t\tcalls get contributor repos tool with author=\"john.doe@company.com\"\n\t\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "author" + ], + "properties": { + "author": { + "type": "string", + "description": "Author name or email address to search for. Case-insensitive partial matching (e.g., \"john\" will match \"john.doe@company.com\" and \"John Smith\")." + }, + "limit": { + "type": "integer", + "description": "Maximum number of repositories to return. Defaults to 20, max 100." + }, + "minCommits": { + "type": "integer", + "description": "Minimum number of commits the author must have in a repository to include it. Defaults to 1." + } + }, + "additionalProperties": false + }, + "name": "sg_get_contributor_repos", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repositories" + ], + "properties": { + "repositories": { + "type": "array", + "items": { + "type": "object", + "required": [ + "repoName", + "repoCommitCount", + "repoMostRecentCommitDate" + ], + "properties": { + "repoCommitCount": { + "type": "integer" + }, + "repoMostRecentCommitDate": { + "type": "string" + }, + "repoName": { + "type": "string" + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. Finds the definition of a specified symbol in a repository.\nThis tool can leverage compiler-level symbol information to provide accurate results and can handle cross-repository references.\n\nA symbol is any code identifier, such as a function or method name, variable name, or class name such as 'MyClass' or 'readFile'.\nThe symbol name should be the identifier as it appears in code (e.g., 'validateToken', 'MyClass', 'API_ENDPOINT').\nFor methods, use just the method name without the class prefix.\nFor nested symbols, use the simple name rather than fully qualified paths.\n\nThe tool will return the code snippet containing the definition of the symbol in the codebase, as well as its location - the file name and line number.\nIt will return up to 50 lines of the code snippet containing the definition of the symbol.\n\nThis tool is the opposite of the sg_find_references tool - it finds the definition to a symbol given a reference/usage symbol.\n\nYou should use this tool when you have a specific symbol in mind (function, method, variable, class, etc.), you know where it is used (a file path) and want to see its definition in the codebase.\n\nYou should choose to use this tool over sg_keyword_search, sg_nls_search or sg_read_file when you have encountered a specific symbol (function, method, variable, class, etc.)\nthat you want to understand better by seeing its definition.\n\nExamples:\n\n \n I'm working in the auth-service/backend repo, in the file src/middleware/authMiddleware.js. This code calls await validateToken(session, token). Where is the function validateToken defined?\n [calls the go to definition tool with repo=\"auth-service/backend\", path=\"src/middleware/authMiddleware.js\", symbol=\"validateToken\"]\n {\n \"repo\": \"auth-service/backend\",\n \"path\": \"src/middleware/validation.js\",\n \"rev\": \"HEAD\",\n \"chunks\": [\n {\n \"startLine\": 12,\n \"endLine\": 62,\n \"content\": \"12: function validateToken(session, token) { ... }\"\n }\n ]\n }\n \n \n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repo", + "path", + "symbol" + ], + "properties": { + "path": { + "type": "string", + "description": "The path to the file within the repository containing the symbol reference." + }, + "repo": { + "type": "string", + "description": "The name of the repository containing the file. For example, \"github.com/burntsushi/ripgrep\"." + }, + "revision": { + "type": "string", + "description": "The revision or branch to find the definition in. If not specified, defaults to the HEAD of the default branch." + }, + "symbol": { + "type": "string", + "description": "The name of the symbol to find the definition for." + } + }, + "additionalProperties": false + }, + "name": "sg_go_to_definition", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "fileBlocks" + ], + "properties": { + "fileBlocks": { + "type": "array", + "items": { + "type": "object", + "required": [ + "type", + "repo", + "file", + "chunks" + ], + "properties": { + "chunks": { + "type": "array", + "items": { + "type": "object", + "required": [ + "startLine", + "endLine", + "content" + ], + "properties": { + "content": { + "type": "string" + }, + "endLine": { + "type": "integer" + }, + "startLine": { + "type": "integer" + } + }, + "additionalProperties": false + } + }, + "file": { + "type": "string" + }, + "repo": { + "type": "string" + }, + "rev": { + "type": "string" + }, + "type": { + "type": "string" + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. \nA keyword code search tool that helps you find relevant code snippets across repositories. Use this tool when you need to:\n- Find specific code with exact matching\n- Verify if certain code exists in the codebase\n- Find examples of code usage\n\nSearch capabilities:\n- Basic search: Enter keywords to find code containing those terms\n- Boolean search: Use AND/OR operators to combine terms. By default, all search terms are combined with AND, meaning that all search terms must match for a result to be included.\n- Filters:\n\t* repo: to search specific repositories (supports regex) Example: repo:github.com/google will select all repositories with the matching prefix (github.com/google-gemini/cookbook and github.com/google/neuroglancer both match). Use ^ to match the start of the string and $ to match the end of the string to avoid unwanted matches. You can chain multiple repo filters with OR while putting them in parentheses.\n\t* file: to search specific file patterns (supports regex) Example: file:README.md will only search files that have README.md in their name. file:.*.ts will search all files with a .ts extension.\n\t* rev: to search specific revisions (branches, tags, commits/sha). MUST always be used together with a repo: filter. Example: repo:^foo/bar$ rev:feat/xyz will only search the branch feat/xyz of the repo foo/bar. If not specified, defaults to the HEAD of the default branch.\n\nWhen not to use this tool:\n- For semantic or conceptual searches like 'authentication implementation'\n- For queries that are similar to natural language like \"API calls to middleware in admin panel\".\n- When you are not sure if the term exists in the codebase.\n- For queries with several search terms.\n- For exploratory searching of the codebase, when you are not sure what a certain term or name means.\nImportant: In these cases, use the sg_nls_search tool instead.\n\nBest practices:\n- Use a small (1-3) number of search terms. Because we combine terms with AND by default, using too many terms may result in no results.\n- Use specific, descriptive search terms\n- Start with broader searches and narrow down using filters\n- Use repo: and file: filters to improve result relevance\n- The results are case insensitive, so you should not repeat the same search term with different capitalization.\n- Use regex (file:.*.py) instead of glob (file:*.py) for searching files with a .py extension\n- Don't use the rev: filter unless the user specifically asks for a particular branch, tag, or commit. The default behavior is to search the HEAD of the default branch.\nImportant: Use the literal OR between search terms if you are looking for code containing at least one of the terms. Do not use regex or with |.\n\nReturns the top 3 most relevant code chunks (truncated to 1024 characters) from each matching file (up to 15 files).\nIndividual lines longer than 256 characters are truncated.\n\nExamples:\n\n\n\tFind all code in the secret-fellowship organization on Github using log4j\n\tcalls the keyword search tool with query: \"repo:^github.com/secret-fellowship log4j\"\n\n\n\tFind all code in the github.com/kubernetes/autoscaler repo that uses the http.NewRequest function\n\tcalls the keyword search tool with query: \"repo:^github.com/kubernetes/autoscaler$ http.NewRequest\"\n\n\n\tFind Go code in ollama where memory mapping is used on darwin or linux\n\tcalls the keyword search tool with query: \"repo:ollama file:.*.go mmap AND (darwin OR linux)\"\n\n\n\tFind FAQs in all readme files across all repositories\n\tcalls the keyword search tool with query: \"file:README.md FAQ\"\n\n\n\tFind code in bar or foo repos that uses lodash\n\tcalls the keyword search tool with query: \"(repo:foo OR repo:bar) lodash\"\n\n\n\tFind code containing either foo or bar in the dev/docs directory\n\tcalls the keyword search tool with query: \"file:^dev/docs foo OR bar\"\n\n\n\tFind code containing foo in dev/docs directory of the repo bar/bas on branch quz/user>\n\tcalls the keyword search tool with query: \"repo:^bar/bas$ rev:quz file:^dev/docs foo\"\n\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "query" + ], + "properties": { + "query": { + "type": "string", + "description": "The search query. Can include keywords, regex patterns, and filters." + } + }, + "additionalProperties": false + }, + "name": "sg_keyword_search", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "blocks" + ], + "properties": { + "blocks": { + "type": "array", + "items": true + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. Lists the files and subdirectories in the workspace in a given directory.\nDirect subdirectories are included and the path is returned with a trailing slash.\nThe input path is relative to the root of the repository.\n\nLists up to 1000 files or directories. If there are more, the response will be truncated.\nDirectories are listed first.\n\nExamples:\n\n\t\n\t\tList all files in the src directory of the repository.\n\t\tCalls the list files tool with path: \"src\"\n\t\t[{\"path\": \"app/\", \"isDirectory\": true}, {\"path\": \"DEV.md\", \"isDirectory\": false}]\n\t\n\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repo" + ], + "properties": { + "path": { + "type": "string", + "description": "The directory path within the workspace to list files from. Defaults to workspace root if not specified." + }, + "repo": { + "type": "string", + "description": "The name of the repository containing the files. For example, \"github.com/torvalds/linux\"." + }, + "revision": { + "type": "string", + "description": "The revision or branch to list files from. If not specified, defaults to the HEAD of the default branch." + } + }, + "additionalProperties": false + }, + "name": "sg_list_files", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "files" + ], + "properties": { + "files": { + "type": "array", + "items": { + "type": "object", + "required": [ + "path", + "isDirectory" + ], + "properties": { + "isDirectory": { + "type": "boolean" + }, + "path": { + "type": "string" + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. \nLists repositories that match a search query.\nYou can use this tool to figure out the repositories the user wants to talk about.\nReturns repository names, descriptions, and other metadata.\nBy default, returns up to 50 repositories per page.\nSupports cursor-based pagination - the response will include cursor information that can be used to fetch the next or previous page of results.\nUse the \"after\" parameter with the \"endCursor\" from the previous response to get the next page, or \"before\" with \"startCursor\" to get the previous page.\n\nWhen to use this tool:\n- When you do not know the full name of a repository (e.g. the user asks to search the xyz repo, you can use this tool to get github.com/my-org/xyz)\n- When you have to find repositories by substring matching (e.g. you want to find all repositories that contain the word \"foobar\")\nThe pattern is not a regular expression, but uses substring matching on the name of the repo.\n\nExamples:\n\n\t\n\t\tFind repositories in the cool-kids-club org on Github\n\t\tCalls the list repos tool with query: \"github.com/cool-kids-club\"\n\t\n\t\n\t\tExplain the foo repository\n\t\tCalls the list repos tool with query: \"foo\"\n\t\n\t\n\t\tDo we have repositories named \"baz\" and \"qux\"?\n\t\tCalls the list repos tool with query \"baz\", calls the list repos tool with query \"qux\"\n\t\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "query" + ], + "properties": { + "after": { + "type": "string", + "description": "Cursor to start fetching results after. Use the \"endCursor\" from the previous response." + }, + "before": { + "type": "string", + "description": "Cursor to start fetching results before. Use the \"startCursor\" from the previous response." + }, + "limit": { + "type": "integer", + "description": "Maximum number of repositories to return per page. Defaults to 50." + }, + "query": { + "type": "string", + "description": "A search query to filter repositories. For example, \"django\" to find repositories containing that term." + } + }, + "additionalProperties": false + }, + "name": "sg_list_repos", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repositories", + "pagination" + ], + "properties": { + "pagination": { + "type": "object", + "required": [ + "totalCount", + "hasNextPage", + "hasPreviousPage" + ], + "properties": { + "endCursor": { + "type": "string" + }, + "hasNextPage": { + "type": "boolean" + }, + "hasPreviousPage": { + "type": "boolean" + }, + "startCursor": { + "type": "string" + }, + "totalCount": { + "type": "integer" + } + }, + "additionalProperties": false + }, + "repositories": { + "type": "array", + "items": { + "type": "object", + "required": [ + "name", + "description", + "private", + "archived", + "fork", + "stars" + ], + "properties": { + "archived": { + "type": "boolean" + }, + "description": { + "type": "string" + }, + "fork": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "private": { + "type": "boolean" + }, + "stars": { + "type": "integer" + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. \nA semantic code search tool that helps find relevant code snippets using flexible linguistic matching.\n\nUse this tool over sg_keyword_search when you need to:\n- Find code related to concepts rather than exact matches\n- Get broader results with more potential matches\n- Find code when you don't know the exact keyword to search for like symbol (function or variable) names\n- When the sg_keyword_search tool returns too few results\n\nSearch capabilities:\n- The core matching is also keyword-based, but it's more flexible and can match more terms\n- Uses stemming and OR binding between terms for broader matching (e.g. \"auth impl\" matches on either term)\n- Filters: same as sg_keyword_search tool\n\nWhen not to use this tool:\n- When you need exact string matches (use the sg_keyword_search tool instead)\n- When you're looking for specific function names or variables\n\nBest practices:\n- IMPORTANT: Extract relevant keywords from a natural language query and search with only the keywords - if a user asks \"How are database connections created or closed in the codebase?\", search with \"database connection create close\", not \"how are database connections created closed\"\n- For complex topics with multiple aspects, do separate searches for each main concept\n- Use repo: and file: filters to improve result relevance\n- If results seem off-target, try refining your query to be more specific or use filters to avoid unwanted results\n- The results are case insensitive, so you should not repeat the same search term with different capitalization\n\nReturns the top 3 most relevant code chunks (truncated to 1024 characters) from each matching file (up to 15 files).\nIndividual lines longer than 256 characters are truncated.\n\nExamples:\n\n\n\tFind where event listener handlers are set up in the codebase\n\tcalls the nls search tool with query: \"event listen handler\"\n\n\n\tExplain the code that controls the chat view in barfoo repo\n\tcalls the nls search tool with query: \"repo:barfoo$ view controller chat\"\n\n\n\tHow are database connections managed in the github.com/django/django repo?\n\tcalls the nls search tool with query: \"repo:^github.com/django/django$ database connection manage\"\n\n\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "query" + ], + "properties": { + "query": { + "type": "string", + "description": "The search query. Can include keywords, regex patterns, and filters." + } + }, + "additionalProperties": false + }, + "name": "sg_nls_search", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "blocks" + ], + "properties": { + "blocks": { + "type": "array", + "items": true + } + }, + "additionalProperties": false + } + }, + { + "description": "This is a Sourcegraph search tool and is best used with other sourcegraph search tools. \nReads the content of a file in the workspace.\nReturns the file content as a string.\nEach line is prefixed with its actual line number from the file. The line numbers are 1-indexed. If a file contains \"abc\\\\ndef\", you will receive \"1: abc\\\\n2: def\".\nYou can optionally specify a line range to read only a portion of the file.\nIf you attempt to read a file that is too large (limit is 128 KB), you will receive an error. In that case, retry with a smaller line range.\n\nIMPORTANT: Use this tool ONLY when you have already located the specific file.\nBefore using this tool:\n- ALWAYS verify both the repository and file exist by using sg_list_repos, sg_list_files, sg_keyword_search, or sg_nls_search first\n- NEVER try to read a file without first confirming it exists at the exact path\n- NEVER guess file paths or assume common structures like \"lib/shared/src\" exist - always verify\n\nExamples:\n\n\n\tSummarize the readme in the pytorch/torcheval repository\n\t[First checks that the repository exists by calling sg_list_repos]\n\t[Then verifies README.md exists using sg_list_files with repo: \"pytorch/torcheval\"]\n\tCalls the read file tool with repo: \"pytorch/torcheval\", path: \"README.md\"\n\n\n\tRead the first 100 lines of the code in cli/build.rs in the tokio-rs/tokio repository\n\t[First confirms the file exists using sg_list_files or sg_keyword_search]\n\tCalls the read file tool with repo: \"tokio-rs/tokio\", path: \"cli/build.rs\", startLine: 1, endLine: 100\n\n\n\tRead the first 100 lines of the code in cli/build.rs in the tokio-rs/tokio repository at commit 1234567890\n\t[First confirms the file exists using sg_list_files or sg_keyword_search]\n\tCalls the read file tool with repo: \"tokio-rs/tokio\", path: \"cli/build.rs\", startLine: 1, endLine: 100, revision: \"1234567890\"\n\n\n\tRead the first 100 lines of the code in cli/build.rs in the tokio-rs/tokio repository at branch feat/xzy\n\t[First confirms the file exists using sg_list_files or sg_keyword_search]\n\tCalls the read file tool with repo: \"tokio-rs/tokio\", path: \"cli/build.rs\", startLine: 1, endLine: 100, revision: \"feat/xzy\"\n\n", + "inputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "repo", + "path" + ], + "properties": { + "endLine": { + "type": "integer", + "description": "The 1-based line number to end reading at. If not specified, reads to the end of the file." + }, + "path": { + "type": "string", + "description": "The path to the file within the repository." + }, + "repo": { + "type": "string", + "description": "The name of the repository containing the file. For example, \"github.com/sveltejs/svelte\"." + }, + "revision": { + "type": "string", + "description": "The revision to read the file from. If not specified, reads from the default branch. This can be a commit hash or a branch name" + }, + "startLine": { + "type": "integer", + "description": "The 1-based line number to start reading from. If not specified, starts from the beginning of the file." + } + }, + "additionalProperties": false + }, + "name": "sg_read_file", + "outputSchema": { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": [ + "content" + ], + "properties": { + "content": { + "type": "string" + } + }, + "additionalProperties": false + } + } + ] +} diff --git a/scripts/gen-mcp-tool-json.sh b/scripts/gen-mcp-tool-json.sh new file mode 100755 index 0000000000..82430a0e55 --- /dev/null +++ b/scripts/gen-mcp-tool-json.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +DST=$1 + +if [[ -z "${SRC_ACCESS_TOKEN}" ]]; then + echo "SRC_ACCESS_TOKEN is not set. Please set a access token for S2 (sourcegraph.sourcegraph.com)" + exit 1 +fi + +if [[ -z "$DST" ]]; then + echo "Usage: $0 " + exit 1 +fi + +curl \ + -H "Content-Type: application/json" \ + -H "Authorization: token ${SRC_ACCESS_TOKEN}" \ + -X POST \ + -d '{ "jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}' \ + https://sourcegraph.sourcegraph.com/.api/mcp/v1 | grep 'data:' | cut -b 6- | jq '.result' > ${DST} + From bd209849a9c65f1c911f1b13a59f053ac901f956 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Thu, 27 Nov 2025 11:11:40 +0200 Subject: [PATCH 02/15] parse mcp tool json --- cmd/src/mcp_parse.go | 171 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 cmd/src/mcp_parse.go diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go new file mode 100644 index 0000000000..bb18e44657 --- /dev/null +++ b/cmd/src/mcp_parse.go @@ -0,0 +1,171 @@ +//go:generate ../../scripts/gen-mcp-tool-json.sh mcp_tools.json +package main + +import ( + _ "embed" + "encoding/json" + "errors" + "fmt" +) + +//go:embed mcp_tools.json +var mcpToolListJSON []byte + +type MCPToolDef struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema Schema `json:"inputSchema"` + OutputSchema Schema `json:"outputSchema"` +} + +type InputProperty struct { + Name string + Type string + Description string + ItemType string +} + +type Schema struct { + Schema string `json:"$schema"` + SchemaObject +} + +type RawSchema struct { + Type string `json:"type"` + Description string `json:"description"` + Schema string `json:"$schema"` + Required []string `json:"required,omitempty"` + AdditionalProperties bool `json:"additionalProperties"` + Properties map[string]json.RawMessage `json:"properties"` + Items json.RawMessage `json:"items"` +} + +type SchemaValue interface { + Type() string +} + +type SchemaObject struct { + Kind string `json:"type"` + Description string `json:"description"` + Required []string `json:"required,omitempty"` + AdditionalProperties bool `json:"additionalProperties"` + Properties map[string]SchemaValue `json:"properties"` +} + +func (s SchemaObject) Type() string { return s.Kind } + +type SchemaArray struct { + Kind string `json:"type"` + Description string `json:"description"` + Items []SchemaValue `json:"items"` +} + +func (s SchemaArray) Type() string { return s.Kind } + +type SchemaPrimitive struct { + Description string `json:"description"` + Kind string `json:"type"` +} + +func (s SchemaPrimitive) Type() string { return s.Kind } + +type PropertyType struct { + Type string `json:"type"` +} + +type Parser struct { + errors []error +} + +func (p *Parser) parseRootSchema(r RawSchema) Schema { + return Schema{ + Schema: r.Schema, + SchemaObject: SchemaObject{ + Kind: r.Type, + Description: r.Description, + Required: r.Required, + AdditionalProperties: r.AdditionalProperties, + Properties: p.parseProperties(r.Properties), + }, + } +} + +func (p *Parser) parseSchema(r *RawSchema) SchemaValue { + switch r.Type { + case "object": + return &SchemaObject{ + Kind: r.Type, + Description: r.Description, + Required: r.Required, + AdditionalProperties: r.AdditionalProperties, + Properties: p.parseProperties(r.Properties), + } + case "array": + var items []SchemaValue + if len(r.Items) > 0 { + var itemRaw RawSchema + if err := json.Unmarshal(r.Items, &itemRaw); err == nil { + items = append(items, p.parseSchema(&itemRaw)) + } else { + p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + } + } + return &SchemaArray{ + Kind: r.Type, + Description: r.Description, + Items: items, + } + default: + return &SchemaPrimitive{ + Kind: r.Type, + Description: r.Description, + } + } +} + +func (p *Parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { + res := make(map[string]SchemaValue) + for name, raw := range props { + var r RawSchema + if err := json.Unmarshal(raw, &r); err != nil { + p.errors = append(p.errors, fmt.Errorf("failed to parse property %q: %w", name, err)) + continue + } + res[name] = p.parseSchema(&r) + } + return res +} + +func LoadMCPToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { + defs := struct { + Tools []struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema RawSchema `json:"inputSchema"` + OutputSchema RawSchema `json:"outputSchema"` + } `json:"tools"` + }{} + + if err := json.Unmarshal(data, &defs); err != nil { + // TODO: think we should panic instead + return nil, err + } + + tools := map[string]*MCPToolDef{} + parser := &Parser{} + + for _, t := range defs.Tools { + tools[t.Name] = &MCPToolDef{ + Name: t.Name, + Description: t.Description, + InputSchema: parser.parseRootSchema(t.InputSchema), + OutputSchema: parser.parseRootSchema(t.OutputSchema), + } + } + + if len(parser.errors) > 0 { + return tools, errors.Join(parser.errors...) + } + + return tools, nil +} From 6d269dd722f6f77383621066ae459a790d0ae15c Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Thu, 27 Nov 2025 11:22:45 +0200 Subject: [PATCH 03/15] fix parsing for when items: true --- cmd/src/mcp_parse.go | 22 ++++++--- cmd/src/mcp_parse_test.go | 100 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 cmd/src/mcp_parse_test.go diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go index bb18e44657..ea8756facf 100644 --- a/cmd/src/mcp_parse.go +++ b/cmd/src/mcp_parse.go @@ -55,9 +55,9 @@ type SchemaObject struct { func (s SchemaObject) Type() string { return s.Kind } type SchemaArray struct { - Kind string `json:"type"` - Description string `json:"description"` - Items []SchemaValue `json:"items"` + Kind string `json:"type"` + Description string `json:"description"` + Items SchemaValue `json:"items,omitempty"` } func (s SchemaArray) Type() string { return s.Kind } @@ -101,13 +101,19 @@ func (p *Parser) parseSchema(r *RawSchema) SchemaValue { Properties: p.parseProperties(r.Properties), } case "array": - var items []SchemaValue + var items SchemaValue if len(r.Items) > 0 { - var itemRaw RawSchema - if err := json.Unmarshal(r.Items, &itemRaw); err == nil { - items = append(items, p.parseSchema(&itemRaw)) + var boolItems bool + if err := json.Unmarshal(r.Items, &boolItems); err == nil { + // Sometimes items is defined as "items: true", so we handle it here and + // consider it "empty" array } else { - p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + var itemRaw RawSchema + if err := json.Unmarshal(r.Items, &itemRaw); err == nil { + items = p.parseSchema(&itemRaw) + } else { + p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + } } } return &SchemaArray{ diff --git a/cmd/src/mcp_parse_test.go b/cmd/src/mcp_parse_test.go new file mode 100644 index 0000000000..41e9fd90e2 --- /dev/null +++ b/cmd/src/mcp_parse_test.go @@ -0,0 +1,100 @@ +package main + +import ( + "testing" +) + +func TestLoadMCPToolDefinitions(t *testing.T) { + toolJSON := []byte(`{ + "tools": [ + { + "name": "test_tool", + "description": "test description", + "inputSchema": { + "type": "object", + "$schema": "https://localhost/schema-draft/2025-07", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "value": { "type": "string" } + } + } + } + } + }, + "outputSchema": { + "type": "object", + "$schema": "https://localhost/schema-draft/2025-07", + "properties": { + "result": { "type": "string" } + } + } + } + ] + }`) + + tools, err := LoadMCPToolDefinitions(toolJSON) + if err != nil { + t.Fatalf("Failed to load tool definitions: %v", err) + } + + if len(tools) != 1 { + t.Fatalf("Expected 1 tool, got %d", len(tools)) + } + + tool := tools["test_tool"] + if tool == nil { + t.Fatal("Tool 'test_tool' not found") + } + + if tool.Name != "test_tool" { + t.Errorf("Expected name 'test_tool', got '%s'", tool.Name) + } + + inputSchema := tool.InputSchema + outputSchema := tool.OutputSchema + schemaVersion := "https://localhost/schema-draft/2025-07" + + if inputSchema.Schema != schemaVersion { + t.Errorf("Expected input schema version %q, got %q", schemaVersion, inputSchema.Schema) + } + if outputSchema.Schema != schemaVersion { + t.Errorf("Expected output schema version %q, got %q", schemaVersion, outputSchema.Schema) + } + + tagsProp, ok := inputSchema.Properties["tags"] + if !ok { + t.Fatal("Property 'tags' not found in inputSchema") + } + + if tagsProp.Type() != "array" { + t.Errorf("Expected tags type 'array', got '%s'", tagsProp.Type()) + } + + arraySchema, ok := tagsProp.(*SchemaArray) + if !ok { + t.Fatal("Expected SchemaArray for tags") + } + + if arraySchema.Items == nil { + t.Fatal("Expected items schema in array, got nil") + } + + itemSchema := arraySchema.Items + if itemSchema.Type() != "object" { + t.Errorf("Expected item type 'object', got '%s'", itemSchema.Type()) + } + + objectSchema, ok := itemSchema.(*SchemaObject) + if !ok { + t.Fatal("Expected SchemaObject for item") + } + + if _, ok := objectSchema.Properties["key"]; !ok { + t.Error("Property 'key' not found in item schema") + } +} From 4cf488918aa60db0a1bc76503d5add4f171cb554 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Nov 2025 11:22:35 +0200 Subject: [PATCH 04/15] use lib/errors --- cmd/src/mcp_parse.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go index ea8756facf..3f10fae62f 100644 --- a/cmd/src/mcp_parse.go +++ b/cmd/src/mcp_parse.go @@ -4,8 +4,9 @@ package main import ( _ "embed" "encoding/json" - "errors" "fmt" + + "github.com/sourcegraph/sourcegraph/lib/errors" ) //go:embed mcp_tools.json @@ -112,7 +113,7 @@ func (p *Parser) parseSchema(r *RawSchema) SchemaValue { if err := json.Unmarshal(r.Items, &itemRaw); err == nil { items = p.parseSchema(&itemRaw) } else { - p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + p.errors = append(p.errors, errors.Errorf("failed to unmarshal array items: %w", err)) } } } @@ -170,7 +171,7 @@ func LoadMCPToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { } if len(parser.errors) > 0 { - return tools, errors.Join(parser.errors...) + return tools, errors.Append(nil, parser.errors...) } return tools, nil From e479290b29ed62a8048c015f87d112d4d434b45b Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Nov 2025 11:26:23 +0200 Subject: [PATCH 05/15] temporarily ignore embedded json --- cmd/src/mcp_parse.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go index 3f10fae62f..cadd967c61 100644 --- a/cmd/src/mcp_parse.go +++ b/cmd/src/mcp_parse.go @@ -10,7 +10,7 @@ import ( ) //go:embed mcp_tools.json -var mcpToolListJSON []byte +var _ []byte type MCPToolDef struct { Name string `json:"name"` From 1164172c3f09665a9dfd632b6b73c54c7b462991 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 2 Dec 2025 11:25:06 +0200 Subject: [PATCH 06/15] move mcp files to internal/mcp --- {cmd/src => internal/mcp}/mcp_parse.go | 2 +- {cmd/src => internal/mcp}/mcp_parse_test.go | 0 {cmd/src => internal/mcp}/mcp_tools.json | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename {cmd/src => internal/mcp}/mcp_parse.go (99%) rename {cmd/src => internal/mcp}/mcp_parse_test.go (100%) rename {cmd/src => internal/mcp}/mcp_tools.json (100%) diff --git a/cmd/src/mcp_parse.go b/internal/mcp/mcp_parse.go similarity index 99% rename from cmd/src/mcp_parse.go rename to internal/mcp/mcp_parse.go index cadd967c61..ba034504dc 100644 --- a/cmd/src/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -1,5 +1,5 @@ //go:generate ../../scripts/gen-mcp-tool-json.sh mcp_tools.json -package main +package mcp import ( _ "embed" diff --git a/cmd/src/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go similarity index 100% rename from cmd/src/mcp_parse_test.go rename to internal/mcp/mcp_parse_test.go diff --git a/cmd/src/mcp_tools.json b/internal/mcp/mcp_tools.json similarity index 100% rename from cmd/src/mcp_tools.json rename to internal/mcp/mcp_tools.json From 5fab60fc67837475a555d7f3e938ff10ceb7bfd7 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 2 Dec 2025 11:31:01 +0200 Subject: [PATCH 07/15] unexport and remove unused structs --- internal/mcp/mcp_parse.go | 85 +++++++++++++++------------------- internal/mcp/mcp_parse_test.go | 6 +-- 2 files changed, 40 insertions(+), 51 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index ba034504dc..d1e1b93828 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -19,13 +19,6 @@ type MCPToolDef struct { OutputSchema Schema `json:"outputSchema"` } -type InputProperty struct { - Name string - Type string - Description string - ItemType string -} - type Schema struct { Schema string `json:"$schema"` SchemaObject @@ -70,15 +63,45 @@ type SchemaPrimitive struct { func (s SchemaPrimitive) Type() string { return s.Kind } -type PropertyType struct { - Type string `json:"type"` +type parser struct { + errors []error } -type Parser struct { - errors []error +func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { + defs := struct { + Tools []struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema RawSchema `json:"inputSchema"` + OutputSchema RawSchema `json:"outputSchema"` + } `json:"tools"` + }{} + + if err := json.Unmarshal(data, &defs); err != nil { + // TODO: think we should panic instead + return nil, err + } + + tools := map[string]*MCPToolDef{} + parser := &parser{} + + for _, t := range defs.Tools { + tools[t.Name] = &MCPToolDef{ + Name: t.Name, + Description: t.Description, + InputSchema: parser.parseRootSchema(t.InputSchema), + OutputSchema: parser.parseRootSchema(t.OutputSchema), + } + } + + if len(parser.errors) > 0 { + return tools, errors.Append(nil, parser.errors...) + } + + return tools, nil } -func (p *Parser) parseRootSchema(r RawSchema) Schema { +func (p *parser) parseRootSchema(r RawSchema) Schema { return Schema{ Schema: r.Schema, SchemaObject: SchemaObject{ @@ -91,7 +114,7 @@ func (p *Parser) parseRootSchema(r RawSchema) Schema { } } -func (p *Parser) parseSchema(r *RawSchema) SchemaValue { +func (p *parser) parseSchema(r *RawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ @@ -130,7 +153,7 @@ func (p *Parser) parseSchema(r *RawSchema) SchemaValue { } } -func (p *Parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { +func (p *parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { res := make(map[string]SchemaValue) for name, raw := range props { var r RawSchema @@ -142,37 +165,3 @@ func (p *Parser) parseProperties(props map[string]json.RawMessage) map[string]Sc } return res } - -func LoadMCPToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { - defs := struct { - Tools []struct { - Name string `json:"name"` - Description string `json:"description"` - InputSchema RawSchema `json:"inputSchema"` - OutputSchema RawSchema `json:"outputSchema"` - } `json:"tools"` - }{} - - if err := json.Unmarshal(data, &defs); err != nil { - // TODO: think we should panic instead - return nil, err - } - - tools := map[string]*MCPToolDef{} - parser := &Parser{} - - for _, t := range defs.Tools { - tools[t.Name] = &MCPToolDef{ - Name: t.Name, - Description: t.Description, - InputSchema: parser.parseRootSchema(t.InputSchema), - OutputSchema: parser.parseRootSchema(t.OutputSchema), - } - } - - if len(parser.errors) > 0 { - return tools, errors.Append(nil, parser.errors...) - } - - return tools, nil -} diff --git a/internal/mcp/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go index 41e9fd90e2..e29281e9a3 100644 --- a/internal/mcp/mcp_parse_test.go +++ b/internal/mcp/mcp_parse_test.go @@ -1,10 +1,10 @@ -package main +package mcp import ( "testing" ) -func TestLoadMCPToolDefinitions(t *testing.T) { +func TestLoadToolDefinitions(t *testing.T) { toolJSON := []byte(`{ "tools": [ { @@ -37,7 +37,7 @@ func TestLoadMCPToolDefinitions(t *testing.T) { ] }`) - tools, err := LoadMCPToolDefinitions(toolJSON) + tools, err := LoadToolDefinitions(toolJSON) if err != nil { t.Fatalf("Failed to load tool definitions: %v", err) } From 72974c40a6edd60df9ee1d0a7a673520adbff2f6 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 2 Dec 2025 12:07:14 +0200 Subject: [PATCH 08/15] rename MCPToolDef to ToolDef and move around structs --- internal/mcp/mcp_parse.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index d1e1b93828..b5fb843804 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -12,28 +12,28 @@ import ( //go:embed mcp_tools.json var _ []byte -type MCPToolDef struct { +type ToolDef struct { Name string `json:"name"` Description string `json:"description"` InputSchema Schema `json:"inputSchema"` OutputSchema Schema `json:"outputSchema"` } -type Schema struct { - Schema string `json:"$schema"` - SchemaObject -} - type RawSchema struct { Type string `json:"type"` Description string `json:"description"` - Schema string `json:"$schema"` + SchemaVersion string `json:"$schema"` Required []string `json:"required,omitempty"` AdditionalProperties bool `json:"additionalProperties"` Properties map[string]json.RawMessage `json:"properties"` Items json.RawMessage `json:"items"` } +type Schema struct { + Schema string `json:"$schema"` + SchemaObject +} + type SchemaValue interface { Type() string } @@ -67,7 +67,7 @@ type parser struct { errors []error } -func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { +func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { defs := struct { Tools []struct { Name string `json:"name"` @@ -78,15 +78,14 @@ func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { }{} if err := json.Unmarshal(data, &defs); err != nil { - // TODO: think we should panic instead return nil, err } - tools := map[string]*MCPToolDef{} + tools := map[string]*ToolDef{} parser := &parser{} for _, t := range defs.Tools { - tools[t.Name] = &MCPToolDef{ + tools[t.Name] = &ToolDef{ Name: t.Name, Description: t.Description, InputSchema: parser.parseRootSchema(t.InputSchema), @@ -103,7 +102,7 @@ func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { func (p *parser) parseRootSchema(r RawSchema) Schema { return Schema{ - Schema: r.Schema, + Schema: r.SchemaVersion, SchemaObject: SchemaObject{ Kind: r.Type, Description: r.Description, From 58bbc3cf53d7f2c7211dc7644a73e254b7a02394 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 5 Dec 2025 12:03:28 +0200 Subject: [PATCH 09/15] rename parser + method to decoder + decode* --- internal/mcp/mcp_parse.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index b5fb843804..37d1025c34 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -63,7 +63,7 @@ type SchemaPrimitive struct { func (s SchemaPrimitive) Type() string { return s.Kind } -type parser struct { +type decoder struct { errors []error } @@ -82,25 +82,25 @@ func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { } tools := map[string]*ToolDef{} - parser := &parser{} + decoder := &decoder{} for _, t := range defs.Tools { tools[t.Name] = &ToolDef{ Name: t.Name, Description: t.Description, - InputSchema: parser.parseRootSchema(t.InputSchema), - OutputSchema: parser.parseRootSchema(t.OutputSchema), + InputSchema: decoder.decodeRootSchema(t.InputSchema), + OutputSchema: decoder.decodeRootSchema(t.OutputSchema), } } - if len(parser.errors) > 0 { - return tools, errors.Append(nil, parser.errors...) + if len(decoder.errors) > 0 { + return tools, errors.Append(nil, decoder.errors...) } return tools, nil } -func (p *parser) parseRootSchema(r RawSchema) Schema { +func (d *decoder) decodeRootSchema(r RawSchema) Schema { return Schema{ Schema: r.SchemaVersion, SchemaObject: SchemaObject{ @@ -108,12 +108,12 @@ func (p *parser) parseRootSchema(r RawSchema) Schema { Description: r.Description, Required: r.Required, AdditionalProperties: r.AdditionalProperties, - Properties: p.parseProperties(r.Properties), + Properties: d.decodeProperties(r.Properties), }, } } -func (p *parser) parseSchema(r *RawSchema) SchemaValue { +func (d *decoder) decodeSchema(r *RawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ @@ -121,7 +121,7 @@ func (p *parser) parseSchema(r *RawSchema) SchemaValue { Description: r.Description, Required: r.Required, AdditionalProperties: r.AdditionalProperties, - Properties: p.parseProperties(r.Properties), + Properties: d.decodeProperties(r.Properties), } case "array": var items SchemaValue @@ -133,9 +133,9 @@ func (p *parser) parseSchema(r *RawSchema) SchemaValue { } else { var itemRaw RawSchema if err := json.Unmarshal(r.Items, &itemRaw); err == nil { - items = p.parseSchema(&itemRaw) + items = d.decodeSchema(&itemRaw) } else { - p.errors = append(p.errors, errors.Errorf("failed to unmarshal array items: %w", err)) + d.errors = append(d.errors, errors.Errorf("failed to unmarshal array items: %w", err)) } } } @@ -152,15 +152,15 @@ func (p *parser) parseSchema(r *RawSchema) SchemaValue { } } -func (p *parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { +func (d *decoder) decodeProperties(props map[string]json.RawMessage) map[string]SchemaValue { res := make(map[string]SchemaValue) for name, raw := range props { var r RawSchema if err := json.Unmarshal(raw, &r); err != nil { - p.errors = append(p.errors, fmt.Errorf("failed to parse property %q: %w", name, err)) + d.errors = append(d.errors, fmt.Errorf("failed to parse property %q: %w", name, err)) continue } - res[name] = p.parseSchema(&r) + res[name] = d.decodeSchema(&r) } return res } From 505a7b0ef12c780b48d374c314dc01f903460d2a Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Mon, 8 Dec 2025 12:06:08 +0200 Subject: [PATCH 10/15] simplify types fold: Schema into SchemaObject --- internal/mcp/mcp_parse.go | 53 +++++++++++++++------------------- internal/mcp/mcp_parse_test.go | 8 ++--- 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index 37d1025c34..6e0ad4ec18 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -1,4 +1,3 @@ -//go:generate ../../scripts/gen-mcp-tool-json.sh mcp_tools.json package mcp import ( @@ -13,10 +12,10 @@ import ( var _ []byte type ToolDef struct { - Name string `json:"name"` - Description string `json:"description"` - InputSchema Schema `json:"inputSchema"` - OutputSchema Schema `json:"outputSchema"` + Name string `json:"name"` + Description string `json:"description"` + InputSchema SchemaObject `json:"inputSchema"` + OutputSchema SchemaObject `json:"outputSchema"` } type RawSchema struct { @@ -29,39 +28,35 @@ type RawSchema struct { Items json.RawMessage `json:"items"` } -type Schema struct { - Schema string `json:"$schema"` - SchemaObject -} - type SchemaValue interface { - Type() string + ValueType() string } type SchemaObject struct { - Kind string `json:"type"` + Type string `json:"type"` Description string `json:"description"` + Schema string `json:"$schema,omitempty"` Required []string `json:"required,omitempty"` AdditionalProperties bool `json:"additionalProperties"` Properties map[string]SchemaValue `json:"properties"` } -func (s SchemaObject) Type() string { return s.Kind } +func (s SchemaObject) ValueType() string { return s.Type } type SchemaArray struct { - Kind string `json:"type"` + Type string `json:"type"` Description string `json:"description"` Items SchemaValue `json:"items,omitempty"` } -func (s SchemaArray) Type() string { return s.Kind } +func (s SchemaArray) ValueType() string { return s.Type } type SchemaPrimitive struct { + Type string `json:"type"` Description string `json:"description"` - Kind string `json:"type"` } -func (s SchemaPrimitive) Type() string { return s.Kind } +func (s SchemaPrimitive) ValueType() string { return s.Type } type decoder struct { errors []error @@ -100,16 +95,14 @@ func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { return tools, nil } -func (d *decoder) decodeRootSchema(r RawSchema) Schema { - return Schema{ - Schema: r.SchemaVersion, - SchemaObject: SchemaObject{ - Kind: r.Type, - Description: r.Description, - Required: r.Required, - AdditionalProperties: r.AdditionalProperties, - Properties: d.decodeProperties(r.Properties), - }, +func (d *decoder) decodeRootSchema(r RawSchema) SchemaObject { + return SchemaObject{ + Schema: r.SchemaVersion, + Type: r.Type, + Description: r.Description, + Required: r.Required, + AdditionalProperties: r.AdditionalProperties, + Properties: d.decodeProperties(r.Properties), } } @@ -117,7 +110,7 @@ func (d *decoder) decodeSchema(r *RawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ - Kind: r.Type, + Type: r.Type, Description: r.Description, Required: r.Required, AdditionalProperties: r.AdditionalProperties, @@ -140,13 +133,13 @@ func (d *decoder) decodeSchema(r *RawSchema) SchemaValue { } } return &SchemaArray{ - Kind: r.Type, + Type: r.Type, Description: r.Description, Items: items, } default: return &SchemaPrimitive{ - Kind: r.Type, + Type: r.Type, Description: r.Description, } } diff --git a/internal/mcp/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go index e29281e9a3..b2bd9b073e 100644 --- a/internal/mcp/mcp_parse_test.go +++ b/internal/mcp/mcp_parse_test.go @@ -71,8 +71,8 @@ func TestLoadToolDefinitions(t *testing.T) { t.Fatal("Property 'tags' not found in inputSchema") } - if tagsProp.Type() != "array" { - t.Errorf("Expected tags type 'array', got '%s'", tagsProp.Type()) + if tagsProp.ValueType() != "array" { + t.Errorf("Expected tags type 'array', got '%s'", tagsProp.ValueType()) } arraySchema, ok := tagsProp.(*SchemaArray) @@ -85,8 +85,8 @@ func TestLoadToolDefinitions(t *testing.T) { } itemSchema := arraySchema.Items - if itemSchema.Type() != "object" { - t.Errorf("Expected item type 'object', got '%s'", itemSchema.Type()) + if itemSchema.ValueType() != "object" { + t.Errorf("Expected item type 'object', got '%s'", itemSchema.ValueType()) } objectSchema, ok := itemSchema.(*SchemaObject) From 36ee38119f192b0a0835af75d44b08eae01ac92c Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 9 Dec 2025 10:42:16 +0200 Subject: [PATCH 11/15] review comments --- internal/mcp/mcp_parse.go | 59 ++++++++++++++++------------------ internal/mcp/mcp_parse_test.go | 9 ++---- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index 6e0ad4ec18..d671bb5119 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -18,14 +18,12 @@ type ToolDef struct { OutputSchema SchemaObject `json:"outputSchema"` } -type RawSchema struct { - Type string `json:"type"` - Description string `json:"description"` - SchemaVersion string `json:"$schema"` - Required []string `json:"required,omitempty"` - AdditionalProperties bool `json:"additionalProperties"` - Properties map[string]json.RawMessage `json:"properties"` - Items json.RawMessage `json:"items"` +type rawSchema struct { + Type string `json:"type"` + Description string `json:"description"` + Required []string `json:"required,omitempty"` + Properties map[string]json.RawMessage `json:"properties"` + Items json.RawMessage `json:"items"` } type SchemaValue interface { @@ -33,12 +31,14 @@ type SchemaValue interface { } type SchemaObject struct { - Type string `json:"type"` - Description string `json:"description"` - Schema string `json:"$schema,omitempty"` - Required []string `json:"required,omitempty"` - AdditionalProperties bool `json:"additionalProperties"` - Properties map[string]SchemaValue `json:"properties"` + Type string `json:"type"` + Description string `json:"description"` + Required []string `json:"required,omitempty"` + Properties map[string]SchemaValue `json:"properties"` + + // two fields which we do not use from the schema: + // - $schema + // - additionalPropterties } func (s SchemaObject) ValueType() string { return s.Type } @@ -67,8 +67,8 @@ func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { Tools []struct { Name string `json:"name"` Description string `json:"description"` - InputSchema RawSchema `json:"inputSchema"` - OutputSchema RawSchema `json:"outputSchema"` + InputSchema rawSchema `json:"inputSchema"` + OutputSchema rawSchema `json:"outputSchema"` } `json:"tools"` }{} @@ -95,26 +95,23 @@ func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { return tools, nil } -func (d *decoder) decodeRootSchema(r RawSchema) SchemaObject { +func (d *decoder) decodeRootSchema(r rawSchema) SchemaObject { return SchemaObject{ - Schema: r.SchemaVersion, - Type: r.Type, - Description: r.Description, - Required: r.Required, - AdditionalProperties: r.AdditionalProperties, - Properties: d.decodeProperties(r.Properties), + Type: r.Type, + Description: r.Description, + Required: r.Required, + Properties: d.decodeProperties(r.Properties), } } -func (d *decoder) decodeSchema(r *RawSchema) SchemaValue { +func (d *decoder) decodeSchema(r *rawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ - Type: r.Type, - Description: r.Description, - Required: r.Required, - AdditionalProperties: r.AdditionalProperties, - Properties: d.decodeProperties(r.Properties), + Type: r.Type, + Description: r.Description, + Required: r.Required, + Properties: d.decodeProperties(r.Properties), } case "array": var items SchemaValue @@ -124,7 +121,7 @@ func (d *decoder) decodeSchema(r *RawSchema) SchemaValue { // Sometimes items is defined as "items: true", so we handle it here and // consider it "empty" array } else { - var itemRaw RawSchema + var itemRaw rawSchema if err := json.Unmarshal(r.Items, &itemRaw); err == nil { items = d.decodeSchema(&itemRaw) } else { @@ -148,7 +145,7 @@ func (d *decoder) decodeSchema(r *RawSchema) SchemaValue { func (d *decoder) decodeProperties(props map[string]json.RawMessage) map[string]SchemaValue { res := make(map[string]SchemaValue) for name, raw := range props { - var r RawSchema + var r rawSchema if err := json.Unmarshal(raw, &r); err != nil { d.errors = append(d.errors, fmt.Errorf("failed to parse property %q: %w", name, err)) continue diff --git a/internal/mcp/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go index b2bd9b073e..c826ae9517 100644 --- a/internal/mcp/mcp_parse_test.go +++ b/internal/mcp/mcp_parse_test.go @@ -56,14 +56,9 @@ func TestLoadToolDefinitions(t *testing.T) { } inputSchema := tool.InputSchema - outputSchema := tool.OutputSchema - schemaVersion := "https://localhost/schema-draft/2025-07" - if inputSchema.Schema != schemaVersion { - t.Errorf("Expected input schema version %q, got %q", schemaVersion, inputSchema.Schema) - } - if outputSchema.Schema != schemaVersion { - t.Errorf("Expected output schema version %q, got %q", schemaVersion, outputSchema.Schema) + if len(tool.OutputSchema.Properties) == 0 { + t.Fatalf("expected tool.OutputSchema.Properties not be empty") } tagsProp, ok := inputSchema.Properties["tags"] From 92fc3691734e6fd0bdd19ec27554839540fa45b9 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 9 Dec 2025 08:28:12 +0000 Subject: [PATCH 12/15] feat(mcp): build flagset from mcp input schema to parse args (#1215) - parse input schema to build flagset for mcp command --- cmd/src/mcp.go | 115 ++++++++++++++++++++++++++++++++++ internal/mcp/mcp_args.go | 94 +++++++++++++++++++++++++++ internal/mcp/mcp_args_test.go | 97 ++++++++++++++++++++++++++++ internal/mcp/mcp_parse.go | 49 +++++++++------ internal/mcp/mcp_request.go | 93 +++++++++++++++++++++++++++ 5 files changed, 430 insertions(+), 18 deletions(-) create mode 100644 cmd/src/mcp.go create mode 100644 internal/mcp/mcp_args.go create mode 100644 internal/mcp/mcp_args_test.go create mode 100644 internal/mcp/mcp_request.go diff --git a/cmd/src/mcp.go b/cmd/src/mcp.go new file mode 100644 index 0000000000..d7a9127f6a --- /dev/null +++ b/cmd/src/mcp.go @@ -0,0 +1,115 @@ +package main + +import ( + "flag" + "fmt" + + "github.com/sourcegraph/src-cli/internal/mcp" +) + +func init() { + flagSet := flag.NewFlagSet("mcp", flag.ExitOnError) + commands = append(commands, &command{ + flagSet: flagSet, + handler: mcpMain, + }) +} +func mcpMain(args []string) error { + fmt.Println("NOTE: This command is still experimental") + tools, err := mcp.LoadToolDefinitions() + if err != nil { + return err + } + + subcmd := args[0] + if subcmd == "list-tools" { + fmt.Println("The following tools are available:") + for name := range tools { + fmt.Printf(" • %s\n", name) + } + fmt.Println("\nUSAGE:") + fmt.Printf(" • Invoke a tool\n") + fmt.Printf(" src mcp \n") + fmt.Printf("\n • View the Input / Output Schema of a tool\n") + fmt.Printf(" src mcp schema\n") + fmt.Printf("\n • List the available flags of a tool\n") + fmt.Printf(" src mcp -h\n") + fmt.Printf("\n • View the Input / Output Schema of a tool\n") + fmt.Printf(" src mcp schema\n") + return nil + } + + tool, ok := tools[subcmd] + if !ok { + return fmt.Errorf("tool definition for %q not found - run src mcp list-tools to see a list of available tools", subcmd) + } + + flagArgs := args[1:] // skip subcommand name + if len(args) > 1 && args[1] == "schema" { + return printSchemas(tool) + } + + flags, vars, err := mcp.BuildArgFlagSet(tool) + if err != nil { + return err + } + if err := flags.Parse(flagArgs); err != nil { + return err + } + mcp.DerefFlagValues(vars) + + if err := validateToolArgs(tool.InputSchema, args, vars); err != nil { + return err + } + + apiClient := cfg.apiClient(nil, flags.Output()) + return handleMcpTool(context.Background(), apiClient, tool, vars) +} + +func printSchemas(tool *mcp.ToolDef) error { + input, err := json.MarshalIndent(tool.InputSchema, "", " ") + if err != nil { + return err + } + output, err := json.MarshalIndent(tool.OutputSchema, "", " ") + if err != nil { + return err + } + + fmt.Printf("Input:\n%v\nOutput:\n%v\n", string(input), string(output)) + return nil +} + +func validateToolArgs(inputSchema mcp.SchemaObject, args []string, vars map[string]any) error { + for _, reqName := range inputSchema.Required { + if vars[reqName] == nil { + return errors.Newf("no value provided for required flag --%s", reqName) + } + } + + if len(args) < len(inputSchema.Required) { + return errors.Newf("not enough arguments provided - the following flags are required:\n%s", strings.Join(inputSchema.Required, "\n")) + } + + return nil +} + +func handleMcpTool(ctx context.Context, client api.Client, tool *mcp.ToolDef, vars map[string]any) error { + resp, err := mcp.DoToolRequest(ctx, client, tool, vars) + if err != nil { + return err + } + + result, err := mcp.DecodeToolResponse(resp) + if err != nil { + return err + } + defer resp.Body.Close() + + output, err := json.MarshalIndent(result, "", " ") + if err != nil { + return err + } + fmt.Println(string(output)) + return nil +} diff --git a/internal/mcp/mcp_args.go b/internal/mcp/mcp_args.go new file mode 100644 index 0000000000..ecfb98e2b0 --- /dev/null +++ b/internal/mcp/mcp_args.go @@ -0,0 +1,94 @@ +package mcp + +import ( + "flag" + "fmt" + "reflect" + "strings" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +var _ flag.Value = (*strSliceFlag)(nil) + +type strSliceFlag struct { + vals []string +} + +func (s *strSliceFlag) Set(v string) error { + s.vals = append(s.vals, v) + return nil +} + +func (s *strSliceFlag) String() string { + return strings.Join(s.vals, ",") +} + +func DerefFlagValues(vars map[string]any) { + for k, v := range vars { + rfl := reflect.ValueOf(v) + if rfl.Kind() == reflect.Pointer { + vv := rfl.Elem().Interface() + if slice, ok := vv.(strSliceFlag); ok { + vv = slice.vals + } + if isNil(vv) { + delete(vars, k) + } else { + vars[k] = vv + } + } + } +} + +func isNil(v any) bool { + if v == nil { + return true + } + rv := reflect.ValueOf(v) + switch rv.Kind() { + case reflect.Slice, reflect.Map, reflect.Pointer, reflect.Interface: + return rv.IsNil() + default: + return false + } +} + +func BuildArgFlagSet(tool *ToolDef) (*flag.FlagSet, map[string]any, error) { + if tool == nil { + return nil, nil, errors.New("cannot build flagset on nil Tool Definition") + } + fs := flag.NewFlagSet(tool.Name, flag.ContinueOnError) + flagVars := map[string]any{} + + for name, pVal := range tool.InputSchema.Properties { + switch pv := pVal.(type) { + case *SchemaPrimitive: + switch pv.Type { + case "integer": + dst := fs.Int(name, 0, pv.Description) + flagVars[name] = dst + + case "boolean": + dst := fs.Bool(name, false, pv.Description) + flagVars[name] = dst + case "string": + dst := fs.String(name, "", pv.Description) + flagVars[name] = dst + default: + return nil, nil, fmt.Errorf("unknown schema primitive kind %q", pv.Type) + + } + case *SchemaArray: + strSlice := new(strSliceFlag) + fs.Var(strSlice, name, pv.Description) + flagVars[name] = strSlice + case *SchemaObject: + // TODO(burmudar): we can support SchemaObject as part of stdin echo '{ stuff }' | sg mcp commit-search + // not supported yet + // Also support sg mcp commit-search --json '{ stuff }' + } + } + + return fs, flagVars, nil +} diff --git a/internal/mcp/mcp_args_test.go b/internal/mcp/mcp_args_test.go new file mode 100644 index 0000000000..17d5b466e0 --- /dev/null +++ b/internal/mcp/mcp_args_test.go @@ -0,0 +1,97 @@ +package mcp + +import ( + "testing" +) + +func TestFlagSetParse(t *testing.T) { + toolJSON := []byte(`{ + "tools": [ + { + "name": "sg_test_tool", + "description": "test description", + "inputSchema": { + "type": "object", + "$schema": "https://localhost/schema-draft/2025-07", + "required": ["values"], + "properties": { + "repos": { + "type": "array", + "items": { + "type": "string" + } + }, + "tag": { + "type": "string", + "items": true + }, + "count": { + "type": "integer" + }, + "boolFlag": { + "type": "boolean" + } + } + }, + "outputSchema": { + "type": "object", + "$schema": "https://localhost/schema-draft/2025-07", + "properties": { + "result": { "type": "string" } + } + } + } + ] + }`) + + defs, err := loadToolDefinitions(toolJSON) + if err != nil { + t.Fatalf("failed to load tool json: %v", err) + } + + flagSet, vars, err := BuildArgFlagSet(defs["test-tool"]) + if err != nil { + t.Fatalf("failed to build flagset from mcp tool definition: %v", err) + } + + if len(vars) == 0 { + t.Fatalf("vars from buildArgFlagSet should not be empty") + } + + args := []string{"-repos=A", "-repos=B", "-count=10", "-boolFlag", "-tag=testTag"} + + if err := flagSet.Parse(args); err != nil { + t.Fatalf("flagset parsing failed: %v", err) + } + DerefFlagValues(vars) + + if v, ok := vars["repos"].([]string); ok { + if len(v) != 2 { + t.Fatalf("expected flag 'repos' values to have length %d but got %d", 2, len(v)) + } + } else { + t.Fatalf("expected flag 'repos' to have type of []string but got %T", v) + } + if v, ok := vars["tag"].(string); ok { + if v != "testTag" { + t.Fatalf("expected flag 'tag' values to have value %q but got %q", "testTag", v) + } + } else { + t.Fatalf("expected flag 'tag' to have type of string but got %T", v) + } + if v, ok := vars["count"].(int); ok { + if v != 10 { + t.Fatalf("expected flag 'count' values to have value %d but got %d", 10, v) + } + } else { + t.Fatalf("expected flag 'count' to have type of int but got %T", v) + } + if v, ok := vars["boolFlag"].(bool); ok { + if v != true { + t.Fatalf("expected flag 'boolFlag' values to have value %v but got %v", true, v) + } + } else { + t.Fatalf("expected flag 'boolFlag' to have type of bool but got %T", v) + } + +} diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index d671bb5119..b7518056a7 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -3,16 +3,17 @@ package mcp import ( _ "embed" "encoding/json" - "fmt" + "strings" "github.com/sourcegraph/sourcegraph/lib/errors" ) //go:embed mcp_tools.json -var _ []byte +var mcpToolListJSON []byte type ToolDef struct { - Name string `json:"name"` + Name string + RawName string `json:"name"` Description string `json:"description"` InputSchema SchemaObject `json:"inputSchema"` OutputSchema SchemaObject `json:"outputSchema"` @@ -62,13 +63,17 @@ type decoder struct { errors []error } -func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { +func LoadDefaultToolDefinitions() (map[string]*ToolDef, error) { + return loadToolDefinitions(mcpToolListJSON) +} + +func loadToolDefinitions(data []byte) (map[string]*ToolDef, error) { defs := struct { Tools []struct { Name string `json:"name"` Description string `json:"description"` - InputSchema rawSchema `json:"inputSchema"` - OutputSchema rawSchema `json:"outputSchema"` + InputSchema RawSchema `json:"inputSchema"` + OutputSchema RawSchema `json:"outputSchema"` } `json:"tools"` }{} @@ -80,12 +85,19 @@ func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { decoder := &decoder{} for _, t := range defs.Tools { - tools[t.Name] = &ToolDef{ - Name: t.Name, + // normalize the raw mcp tool name to be without the mcp identifiers + rawName := t.Name + name, _ := strings.CutPrefix(rawName, "sg_") + name = strings.ReplaceAll(name, "_", "-") + + tool := &ToolDef{ + Name: name, + RawName: rawName, Description: t.Description, InputSchema: decoder.decodeRootSchema(t.InputSchema), OutputSchema: decoder.decodeRootSchema(t.OutputSchema), } + tools[tool.Name] = tool } if len(decoder.errors) > 0 { @@ -97,10 +109,11 @@ func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { func (d *decoder) decodeRootSchema(r rawSchema) SchemaObject { return SchemaObject{ - Type: r.Type, - Description: r.Description, - Required: r.Required, - Properties: d.decodeProperties(r.Properties), + Schema: r.SchemaVersion, + Type: r.Type, + Description: r.Description, + Required: r.Required, + Properties: d.decodeProperties(r.Properties), } } @@ -108,10 +121,10 @@ func (d *decoder) decodeSchema(r *rawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ - Type: r.Type, - Description: r.Description, - Required: r.Required, - Properties: d.decodeProperties(r.Properties), + Type: r.Type, + Description: r.Description, + Required: r.Required, + Properties: d.decodeProperties(r.Properties), } case "array": var items SchemaValue @@ -125,7 +138,7 @@ func (d *decoder) decodeSchema(r *rawSchema) SchemaValue { if err := json.Unmarshal(r.Items, &itemRaw); err == nil { items = d.decodeSchema(&itemRaw) } else { - d.errors = append(d.errors, errors.Errorf("failed to unmarshal array items: %w", err)) + d.errors = append(d.errors, errors.Wrap(err, "failed to unmarshal array items")) } } } @@ -147,7 +160,7 @@ func (d *decoder) decodeProperties(props map[string]json.RawMessage) map[string] for name, raw := range props { var r rawSchema if err := json.Unmarshal(raw, &r); err != nil { - d.errors = append(d.errors, fmt.Errorf("failed to parse property %q: %w", name, err)) + d.errors = append(d.errors, errors.Wrapf(err, "failed to parse property %q: %w", name)) continue } res[name] = d.decodeSchema(&r) diff --git a/internal/mcp/mcp_request.go b/internal/mcp/mcp_request.go new file mode 100644 index 0000000000..dbcb0ed97b --- /dev/null +++ b/internal/mcp/mcp_request.go @@ -0,0 +1,93 @@ +package mcp + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + + "github.com/sourcegraph/src-cli/internal/api" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +const McpURLPath = ".api/mcp/v1" + +func DoToolRequest(ctx context.Context, client api.Client, tool *ToolDef, vars map[string]any) (*http.Response, error) { + jsonRPC := struct { + Version string `json:"jsonrpc"` + ID int `json:"id"` + Method string `json:"method"` + Params any `json:"params"` + }{ + Version: "2.0", + ID: 1, + Method: "tools/call", + Params: struct { + Name string `json:"name"` + Arguments map[string]any `json:"arguments"` + }{ + Name: tool.RawName, + Arguments: vars, + }, + } + + buf := bytes.NewBuffer(nil) + data, err := json.Marshal(jsonRPC) + if err != nil { + return nil, err + } + buf.Write(data) + + req, err := client.NewHTTPRequest(ctx, http.MethodPost, McpURLPath, buf) + if err != nil { + return nil, err + } + req.Header.Add("Content-Type", "application/json") + req.Header.Add("Accept", "*/*") + + return client.Do(req) +} + +func DecodeToolResponse(resp *http.Response) (map[string]json.RawMessage, error) { + data, err := readSSEResponseData(resp) + if err != nil { + return nil, err + } + + if data == nil { + return map[string]json.RawMessage{}, nil + } + + jsonRPCResp := struct { + Version string `json:"jsonrpc"` + ID int `json:"id"` + Result struct { + Content []json.RawMessage `json:"content"` + StructuredContent map[string]json.RawMessage `json:"structuredContent"` + } `json:"result"` + }{} + if err := json.Unmarshal(data, &jsonRPCResp); err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal MCP JSON-RPC response") + } + + return jsonRPCResp.Result.StructuredContent, nil +} +func readSSEResponseData(resp *http.Response) ([]byte, error) { + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + // The response is an SSE reponse + // event: + // data: + lines := bytes.SplitSeq(data, []byte("\n")) + for line := range lines { + if jsonData, ok := bytes.CutPrefix(line, []byte("data: ")); ok { + return jsonData, nil + } + } + return nil, errors.New("no data found in SSE response") + +} From 720a04ade8837f0629a4ddf1885d53a6b2a5fabc Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 9 Dec 2025 12:13:37 +0200 Subject: [PATCH 13/15] merge fixup --- cmd/src/mcp.go | 8 +++++++- internal/mcp/mcp_parse.go | 21 ++++++++++----------- internal/mcp/mcp_parse_test.go | 8 ++++---- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/cmd/src/mcp.go b/cmd/src/mcp.go index d7a9127f6a..4510eb320f 100644 --- a/cmd/src/mcp.go +++ b/cmd/src/mcp.go @@ -1,10 +1,16 @@ package main import ( + "context" + "encoding/json" "flag" "fmt" + "strings" + "github.com/sourcegraph/src-cli/internal/api" "github.com/sourcegraph/src-cli/internal/mcp" + + "github.com/sourcegraph/sourcegraph/lib/errors" ) func init() { @@ -16,7 +22,7 @@ func init() { } func mcpMain(args []string) error { fmt.Println("NOTE: This command is still experimental") - tools, err := mcp.LoadToolDefinitions() + tools, err := mcp.LoadDefaultToolDefinitions() if err != nil { return err } diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index b7518056a7..31243fee70 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -72,8 +72,8 @@ func loadToolDefinitions(data []byte) (map[string]*ToolDef, error) { Tools []struct { Name string `json:"name"` Description string `json:"description"` - InputSchema RawSchema `json:"inputSchema"` - OutputSchema RawSchema `json:"outputSchema"` + InputSchema rawSchema `json:"inputSchema"` + OutputSchema rawSchema `json:"outputSchema"` } `json:"tools"` }{} @@ -109,11 +109,10 @@ func loadToolDefinitions(data []byte) (map[string]*ToolDef, error) { func (d *decoder) decodeRootSchema(r rawSchema) SchemaObject { return SchemaObject{ - Schema: r.SchemaVersion, - Type: r.Type, - Description: r.Description, - Required: r.Required, - Properties: d.decodeProperties(r.Properties), + Type: r.Type, + Description: r.Description, + Required: r.Required, + Properties: d.decodeProperties(r.Properties), } } @@ -121,10 +120,10 @@ func (d *decoder) decodeSchema(r *rawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ - Type: r.Type, - Description: r.Description, - Required: r.Required, - Properties: d.decodeProperties(r.Properties), + Type: r.Type, + Description: r.Description, + Required: r.Required, + Properties: d.decodeProperties(r.Properties), } case "array": var items SchemaValue diff --git a/internal/mcp/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go index c826ae9517..ecaf691d16 100644 --- a/internal/mcp/mcp_parse_test.go +++ b/internal/mcp/mcp_parse_test.go @@ -37,7 +37,7 @@ func TestLoadToolDefinitions(t *testing.T) { ] }`) - tools, err := LoadToolDefinitions(toolJSON) + tools, err := loadToolDefinitions(toolJSON) if err != nil { t.Fatalf("Failed to load tool definitions: %v", err) } @@ -46,13 +46,13 @@ func TestLoadToolDefinitions(t *testing.T) { t.Fatalf("Expected 1 tool, got %d", len(tools)) } - tool := tools["test_tool"] + tool := tools["test-tool"] if tool == nil { t.Fatal("Tool 'test_tool' not found") } - if tool.Name != "test_tool" { - t.Errorf("Expected name 'test_tool', got '%s'", tool.Name) + if tool.RawName != "test_tool" { + t.Errorf("Expected name 'test_tool', got '%s'", tool.RawName) } inputSchema := tool.InputSchema From e8857dcaace342ee68e28637fce592da0ed9c620 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 9 Dec 2025 12:20:22 +0200 Subject: [PATCH 14/15] fix lint --- cmd/src/config_list.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/src/config_list.go b/cmd/src/config_list.go index 04d172dbf9..d15aefe135 100644 --- a/cmd/src/config_list.go +++ b/cmd/src/config_list.go @@ -4,8 +4,8 @@ import ( "flag" "fmt" + "context" "github.com/sourcegraph/src-cli/internal/api" - "golang.org/x/net/context" ) func init() { From 49fddd07dafd3c15e5402b55b1f893d5e8091b42 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 9 Dec 2025 12:26:23 +0200 Subject: [PATCH 15/15] fix imports --- cmd/src/config_list.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/src/config_list.go b/cmd/src/config_list.go index d15aefe135..ea09f8066f 100644 --- a/cmd/src/config_list.go +++ b/cmd/src/config_list.go @@ -5,6 +5,7 @@ import ( "fmt" "context" + "github.com/sourcegraph/src-cli/internal/api" )