From 8c829795501e5ac7300d1d11f96bf9d74f8edb5e Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Thu, 20 Nov 2025 10:07:39 -0500 Subject: [PATCH 01/10] feat: add temporal query parameters to MCP tools Add comprehensive temporal filtering capabilities with new search_commits tool, enhanced list_repos activity filtering, and improved search_code temporal support. **New Features:** - Add search_commits tool for Git commit history search with time range, author, and query filtering - Add dateUtils.ts with comprehensive date parsing supporting ISO 8601 and relative formats ("30 days ago", "last week", "yesterday") - Add date range validation to prevent invalid ranges (since > until) - Add activeAfter/activeBefore parameters to list_repos for commit activity filtering - Add gitRevision, since/until, and includeDeletedFiles parameters to search_code - Add 30-second timeout for git operations to handle large repositories - Add detailed error messages for common git failures - Move cache directory constants to @sourcebot/shared package **Fixes:** - Fix getRepos() pagination bug where take limit was applied before activity filtering, returning fewer results than requested **Testing & Documentation:** - Add comprehensive test coverage: 106 tests (59 dateUtils, 24 gitApi, 23 searchApi) - Clarify temporal filtering semantics: search_code filters by INDEX time (when Sourcebot indexed), while list_repos and search_commits filter by COMMIT time - Clarify that repositories are cloned on Sourcebot server disk, not user's local disk, and cloning process may not be finished when search_commits is called - Update MCP tool descriptions with temporal parameter examples and date format documentation - Add "Date Format Examples" section to README - Update CHANGELOG with all improvements All changes are backward compatible with optional parameters. Resolves #511 Signed-off-by: Wayne Sun --- packages/backend/src/constants.ts | 6 +- packages/mcp/CHANGELOG.md | 21 + packages/mcp/README.md | 53 ++- packages/mcp/src/client.ts | 34 +- packages/mcp/src/index.ts | 156 ++++++- packages/mcp/src/schemas.ts | 21 + packages/mcp/src/types.ts | 7 + packages/shared/src/constants.ts | 6 + packages/web/src/actions.ts | 70 ++- .../web/src/app/api/(server)/commits/route.ts | 34 ++ .../web/src/app/api/(server)/repos/route.ts | 18 +- .../web/src/features/search/dateUtils.test.ts | 379 +++++++++++++++ packages/web/src/features/search/dateUtils.ts | 187 ++++++++ .../web/src/features/search/gitApi.test.ts | 438 ++++++++++++++++++ packages/web/src/features/search/gitApi.ts | 150 ++++++ packages/web/src/features/search/schemas.ts | 8 + .../web/src/features/search/searchApi.test.ts | 324 +++++++++++++ packages/web/src/features/search/searchApi.ts | 64 ++- 18 files changed, 1932 insertions(+), 44 deletions(-) create mode 100644 packages/web/src/app/api/(server)/commits/route.ts create mode 100644 packages/web/src/features/search/dateUtils.test.ts create mode 100644 packages/web/src/features/search/dateUtils.ts create mode 100644 packages/web/src/features/search/gitApi.test.ts create mode 100644 packages/web/src/features/search/gitApi.ts create mode 100644 packages/web/src/features/search/searchApi.test.ts diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index b11f51020..0cdc4eb48 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -1,6 +1,5 @@ import { CodeHostType } from "@sourcebot/db"; -import { env } from "@sourcebot/shared"; -import path from "path"; + export const SINGLE_TENANT_ORG_ID = 1; @@ -9,8 +8,7 @@ export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES: CodeHostType[] = [ 'gitlab', ]; -export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); -export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index'); +export { REPOS_CACHE_DIR, INDEX_CACHE_DIR } from "@sourcebot/shared"; // Maximum time to wait for current job to finish export const GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS = 5 * 1000; // 5 seconds diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 06f9e2a4a..2a9f4e3e3 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added comprehensive relative date support for all temporal parameters (e.g., "30 days ago", "last week", "yesterday") +- Added `search_commits` tool to search commits by actual commit time with full temporal filtering +- Added `since`/`until` parameters to `search_code` (filters by index time - when Sourcebot indexed the repo) +- Added `gitRevision`, and `includeDeletedFiles` parameters to `search_code` +- Added `activeAfter`/`activeBefore` parameters to `list_repos` (filters by commit time - actual git commit dates) +- Added date range validation to prevent invalid date ranges (since > until) +- Added 30-second timeout for git operations to handle large repositories +- Added enhanced error messages for git operations (timeout, repository not found, invalid git repository, ambiguous arguments) +- Added clarification that repositories must be cloned on Sourcebot server disk for `search_commits` to work +- Added comprehensive temporal parameter documentation to README with clear distinction between index time and commit time filtering +- Added comprehensive unit tests for date parsing utilities (90+ test cases) +- Added unit tests for git commit search functionality with mocking +- Added integration tests for temporal parameter validation + +### Changed +- Enhanced `list_repos` tool with better pagination feedback messages + +### Fixed +- Fixed `list_repos` pagination bug where `take` limit was applied before activity filtering, returning fewer results than requested + ## [1.0.9] - 2025-11-17 ### Added diff --git a/packages/mcp/README.md b/packages/mcp/README.md index a0a875a0f..5ac09c1b7 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -166,6 +166,8 @@ For a more detailed guide, checkout [the docs](https://docs.sourcebot.dev/docs/f Fetches code that matches the provided regex pattern in `query`. +**Temporal Filtering**: Use `since` and `until` to filter by repository index time (when Sourcebot last indexed the repo). This is different from commit time. See `search_commits` for commit-time filtering. +
Parameters @@ -176,6 +178,10 @@ Fetches code that matches the provided regex pattern in `query`. | `filterByLanguages` | no | Restrict search to specific languages (GitHub linguist format, e.g., Python, JavaScript). | | `caseSensitive` | no | Case sensitive search (default: false). | | `includeCodeSnippets` | no | Include code snippets in results (default: false). | +| `gitRevision` | no | Git revision to search (e.g., 'main', 'develop', 'v1.0.0'). Defaults to HEAD. | +| `since` | no | Only search repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | +| `until` | no | Only search repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). | +| `includeDeletedFiles` | no | Include deleted files in search results (default: false). | | `maxTokens` | no | Max tokens to return (default: env.DEFAULT_MINIMUM_TOKENS). |
@@ -184,14 +190,18 @@ Fetches code that matches the provided regex pattern in `query`. Lists repositories indexed by Sourcebot with optional filtering and pagination. +**Temporal Filtering**: Use `activeAfter` and `activeBefore` to filter repositories by actual commit activity (requires repositories to be cloned locally). +
Parameters -| Name | Required | Description | -|:-------------|:---------|:--------------------------------------------------------------------| -| `query` | no | Filter repositories by name (case-insensitive). | -| `pageNumber` | no | Page number (1-indexed, default: 1). | -| `limit` | no | Number of repositories per page (default: 50). | +| Name | Required | Description | +|:----------------|:---------|:-----------------------------------------------------------------------------------------------| +| `query` | no | Filter repositories by name (case-insensitive). | +| `pageNumber` | no | Page number (1-indexed, default: 1). | +| `limit` | no | Number of repositories per page (default: 50). | +| `activeAfter` | no | Only return repos with commits after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | +| `activeBefore` | no | Only return repos with commits before this date. Supports ISO 8601 or relative (e.g., "yesterday"). |
@@ -208,6 +218,39 @@ Fetches the source code for a given file. | `repoId` | yes | The Sourcebot repository ID. | +### search_commits + +Searches for commits in a specific repository based on actual commit time (NOT index time). + +**Requirements**: Repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. Use `list_repos` first to get the repository ID. + +**Date Formats**: Supports ISO 8601 dates (e.g., "2024-01-01") and relative formats (e.g., "30 days ago", "last week", "yesterday"). + +
+Parameters + +| Name | Required | Description | +|:-----------|:---------|:-----------------------------------------------------------------------------------------------| +| `repoId` | yes | Repository ID (obtain from `list_repos`). | +| `query` | no | Search query to filter commits by message (case-insensitive). | +| `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. | +| `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. | +| `author` | no | Filter by author name or email (supports partial matches). | +| `maxCount` | no | Maximum number of commits to return (default: 50). | + +
+ +## Date Format Examples + +All temporal parameters support: +- **ISO 8601**: `"2024-01-01"`, `"2024-12-31T23:59:59Z"` +- **Relative dates**: `"30 days ago"`, `"1 week ago"`, `"last month"`, `"yesterday"` + +**Important**: `search_code` and `list_repos` temporal filters work differently: +- `search_code` `since`/`until`: Filters by **index time** (when Sourcebot indexed the repo) +- `list_repos` `activeAfter`/`activeBefore`: Filters by **commit time** (actual git commit dates) +- `search_commits` `since`/`until`: Filters by **commit time** (actual git commit dates) + ## Supported Code Hosts Sourcebot supports the following code hosts: diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 3754c605f..fe3995c32 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js'; -import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js'; +import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema, searchCommitsResponseSchema } from './schemas.js'; +import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError, SearchCommitsRequest, SearchCommitsResponse } from './types.js'; import { isServiceError } from './utils.js'; export const search = async (request: SearchRequest): Promise => { @@ -21,8 +21,16 @@ export const search = async (request: SearchRequest): Promise => { - const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, { +export const listRepos = async (params?: { activeAfter?: string, activeBefore?: string }): Promise => { + const url = new URL(`${env.SOURCEBOT_HOST}/api/repos`); + if (params?.activeAfter) { + url.searchParams.append('activeAfter', params.activeAfter); + } + if (params?.activeBefore) { + url.searchParams.append('activeBefore', params.activeBefore); + } + + const result = await fetch(url.toString(), { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -55,3 +63,21 @@ export const getFileSource = async (request: FileSourceRequest): Promise => { + const result = await fetch(`${env.SOURCEBOT_HOST}/api/commits`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Org-Domain': '~', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + body: JSON.stringify(request) + }).then(response => response.json()); + + if (isServiceError(result)) { + return result; + } + + return searchCommitsResponseSchema.parse(result); +} diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 3e4750a72..d6e9e5866 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { listRepos, search, getFileSource } from './client.js'; +import { listRepos, search, getFileSource, searchCommits } from './client.js'; import { env, numberSchema } from './env.js'; import { listReposRequestSchema } from './schemas.js'; import { TextContent } from './types.js'; @@ -49,6 +49,22 @@ server.tool( .boolean() .describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`) .optional(), + gitRevision: z + .string() + .describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), + since: z + .string() + .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed after this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week', 'yesterday').`) + .optional(), + until: z + .string() + .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`) + .optional(), + includeDeletedFiles: z + .boolean() + .describe(`Whether to include deleted files in the search results (default: false).`) + .optional(), maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -61,6 +77,10 @@ server.tool( maxTokens = env.DEFAULT_MINIMUM_TOKENS, includeCodeSnippets = false, caseSensitive = false, + gitRevision, + since, + until, + includeDeletedFiles = false, }) => { if (repoIds.length > 0) { query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`; @@ -80,6 +100,10 @@ server.tool( query, matches: env.DEFAULT_MATCHES, contextLines: env.DEFAULT_CONTEXT_LINES, + gitRevision, + since, + until, + includeDeletedFiles, }); if (isServiceError(response)) { @@ -164,22 +188,98 @@ server.tool( } ); +server.tool( + "search_commits", + `Searches for commits in a specific repository based on actual commit time (NOT index time). + + **Requirements**: The repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. If the repository is not found on the server disk, an error will be returned asking you to try again later. + + **Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") or relative formats (e.g., "30 days ago", "last week", "yesterday"). + + **YOU MUST** call 'list_repos' first to obtain the exact repository ID. + + If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + repoId: z.number().describe(`The ID of the repository to search in. Obtain this by calling 'list_repos' first.`), + query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(), + since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(), + until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(), + author: z.string().describe(`Filter commits by author name or email (supports partial matches and patterns).`).optional(), + maxCount: z.number().describe(`Maximum number of commits to return (default: 50).`).optional(), + }, + async ({ repoId, query, since, until, author, maxCount }) => { + const result = await searchCommits({ + repoId, + query, + since, + until, + author, + maxCount, + }); + + if (isServiceError(result)) { + return { + content: [{ type: "text", text: `Error: ${result.message}` }], + isError: true, + }; + } + + return { + content: [{ type: "text", text: JSON.stringify(result, null, 2) }], + }; + } +); + server.tool( "list_repos", - "Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.", - listReposRequestSchema.shape, - async ({ query, pageNumber = 1, limit = 50 }: { + `Lists repositories in the organization with optional filtering and pagination. + + **Temporal Filtering**: When using 'activeAfter' or 'activeBefore', only repositories with commits in the specified timeframe are returned. This requires repositories to be cloned locally. Repositories not on disk will be silently excluded. + + **Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") and relative dates (e.g., "30 days ago", "last week", "yesterday"). + + If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + query: z + .string() + .describe("Filter repositories by name (case-insensitive).") + .optional(), + pageNumber: z + .number() + .int() + .positive() + .describe("Page number (1-indexed, default: 1)") + .default(1), + limit: z + .number() + .int() + .positive() + .describe("Number of repositories per page (default: 50)") + .default(50), + activeAfter: z + .string() + .describe("Only return repositories with commits after this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').") + .optional(), + activeBefore: z + .string() + .describe("Only return repositories with commits before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').") + .optional(), + }, + async ({ query, pageNumber = 1, limit = 50, activeAfter, activeBefore }: { query?: string; pageNumber?: number; limit?: number; + activeAfter?: string; + activeBefore?: string; }) => { - const response = await listRepos(); + const response = await listRepos({ activeAfter, activeBefore }); if (isServiceError(response)) { return { content: [{ type: "text", text: `Error listing repositories: ${response.message}`, }], + isError: true, }; } @@ -196,30 +296,54 @@ server.tool( // Sort alphabetically for consistent pagination filtered.sort((a, b) => a.repoName.localeCompare(b.repoName)); + // Calculate total count before pagination + const totalCount = filtered.length; + // Apply pagination const startIndex = (pageNumber - 1) * limit; const endIndex = startIndex + limit; const paginated = filtered.slice(startIndex, endIndex); // Format output - const content: TextContent[] = paginated.map(repo => { - return { - type: "text", - text: `id: ${repo.repoName}\nurl: ${repo.webUrl}`, - } - }); + const content: TextContent[] = []; - // Add pagination info - if (content.length === 0 && filtered.length > 0) { + if (paginated.length === 0 && totalCount > 0) { + // User requested a page beyond available results + const totalPages = Math.ceil(totalCount / limit); content.push({ type: "text", - text: `No results on page ${pageNumber}. Total matching repositories: ${filtered.length}`, + text: `No results on page ${pageNumber}. Total matching repositories: ${totalCount} (${totalPages} page${totalPages !== 1 ? 's' : ''}). Try pageNumber between 1 and ${totalPages}.`, }); - } else if (filtered.length > endIndex) { + } else if (paginated.length === 0) { + // No repositories match the filters content.push({ type: "text", - text: `Showing ${paginated.length} repositories (page ${pageNumber}). Total matching: ${filtered.length}. Use pageNumber ${pageNumber + 1} to see more.`, + text: `No repositories found matching the specified criteria.${activeAfter || activeBefore ? ' Note: Temporal filtering requires repositories to be cloned locally.' : ''}`, }); + } else { + // Add repository listings + content.push(...paginated.map(repo => ({ + type: "text" as const, + text: `id: ${repo.repoName}\nurl: ${repo.webUrl}${repo.indexedAt ? `\nlast_indexed: ${repo.indexedAt}` : ''}`, + }))); + + // Add pagination info if there are more results + const totalPages = Math.ceil(totalCount / limit); + if (totalPages > 1) { + const paginationInfo = [ + `Page ${pageNumber} of ${totalPages}`, + `Showing ${paginated.length} of ${totalCount} repositories`, + ]; + + if (pageNumber < totalPages) { + paginationInfo.push(`Use pageNumber=${pageNumber + 1} to see more`); + } + + content.push({ + type: "text", + text: `\n--- ${paginationInfo.join(' | ')} ---`, + }); + } } return { diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index ba46b2f15..d5ca0f436 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -191,3 +191,24 @@ export const serviceErrorSchema = z.object({ errorCode: z.string(), message: z.string(), }); + +export const searchCommitsRequestSchema = z.object({ + repoId: z.number(), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().optional(), +}); + +export const commitSchema = z.object({ + hash: z.string(), + date: z.string(), + message: z.string(), + refs: z.string(), + body: z.string(), + author_name: z.string(), + author_email: z.string(), +}); + +export const searchCommitsResponseSchema = z.array(commitSchema); diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 9c858fe5b..880addfe4 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -10,6 +10,9 @@ import { fileSourceRequestSchema, symbolSchema, serviceErrorSchema, + searchCommitsRequestSchema, + searchCommitsResponseSchema, + commitSchema, } from "./schemas.js"; import { z } from "zod"; @@ -29,3 +32,7 @@ export type FileSourceResponse = z.infer; export type TextContent = { type: "text", text: string }; export type ServiceError = z.infer; + +export type SearchCommitsRequest = z.infer; +export type SearchCommitsResponse = z.infer; +export type Commit = z.infer; diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts index 42e1aa694..c416d5878 100644 --- a/packages/shared/src/constants.ts +++ b/packages/shared/src/constants.ts @@ -30,3 +30,9 @@ export const DEFAULT_CONFIG_SETTINGS: ConfigSettings = { experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours } + +import { env } from "./env.server.js"; +import path from "path"; + +export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); +export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index'); diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index e194f808a..17eab0bfc 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -312,12 +312,12 @@ export const createApiKey = async (name: string, domain: string): Promise<{ key: withAuth((userId) => withOrgMembership(userId, domain, async ({ org, userRole }) => { if (env.EXPERIMENT_DISABLE_API_KEY_CREATION_FOR_NON_ADMIN_USERS === 'true' && userRole !== OrgRole.OWNER) { - logger.error(`API key creation is disabled for non-admin users. User ${userId} is not an owner.`); - return { - statusCode: StatusCodes.FORBIDDEN, - errorCode: ErrorCode.INSUFFICIENT_PERMISSIONS, - message: "API key creation is disabled for non-admin users.", - } satisfies ServiceError; + logger.error(`API key creation is disabled for non-admin users. User ${userId} is not an owner.`); + return { + statusCode: StatusCodes.FORBIDDEN, + errorCode: ErrorCode.INSUFFICIENT_PERMISSIONS, + message: "API key creation is disabled for non-admin users.", + } satisfies ServiceError; } const existingApiKey = await prisma.apiKey.findFirst({ @@ -460,23 +460,75 @@ export const getUserApiKeys = async (domain: string): Promise<{ name: string; cr })); }))); +import { searchCommits } from "./features/search/gitApi"; + export const getRepos = async ({ where, take, + activeAfter, + activeBefore, }: { where?: Prisma.RepoWhereInput, - take?: number + take?: number, + activeAfter?: string, + activeBefore?: string, } = {}) => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { + // When filtering by activity, we need to fetch all repos first, + // then apply pagination after filtering + const shouldFilterByActivity = activeAfter || activeBefore; + const repos = await prisma.repo.findMany({ where: { orgId: org.id, ...where, }, - take, + // Only apply take limit if NOT filtering by activity + // Otherwise, we'll apply it after activity filtering + take: shouldFilterByActivity ? undefined : take, + orderBy: { + name: 'asc', // Consistent ordering for pagination + }, }); - return repos.map((repo) => repositoryQuerySchema.parse({ + let filteredRepos = repos; + + if (shouldFilterByActivity) { + // Filter repos by commit activity using git log + // Note: This requires repos to be cloned locally + const activityChecks = await Promise.all(repos.map(async (repo) => { + try { + const commits = await searchCommits({ + repoId: repo.id, + since: activeAfter, + until: activeBefore, + maxCount: 1, + }); + + if (Array.isArray(commits) && commits.length > 0) { + return repo; + } + } catch (e) { + // If error (e.g. repo not found on disk), exclude it from results + // This is expected for repos that haven't been cloned yet + const errorMessage = e instanceof Error ? e.message : String(e); + if (!errorMessage.includes('does not exist')) { + // Log unexpected errors, but not "repo not on disk" errors + console.error(`Error checking activity for repo ${repo.id} (${repo.name}):`, e); + } + } + return null; + })); + + filteredRepos = activityChecks.filter((r): r is typeof repos[0] => r !== null); + + // Apply pagination after filtering + if (take) { + filteredRepos = filteredRepos.slice(0, take); + } + } + + return filteredRepos.map((repo) => repositoryQuerySchema.parse({ codeHostType: repo.external_codeHostType, repoId: repo.id, repoName: repo.name, diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts new file mode 100644 index 000000000..4408a8e03 --- /dev/null +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -0,0 +1,34 @@ +import { searchCommits, SearchCommitsRequest } from "@/features/search/gitApi"; +import { serviceErrorResponse } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { NextRequest } from "next/server"; +import { z } from "zod"; +import { schemaValidationError } from "@/lib/serviceError"; + +const searchCommitsRequestSchema = z.object({ + repoId: z.number(), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().optional(), +}); + +export const POST = async (request: NextRequest) => { + const body = await request.json(); + const parsed = await searchCommitsRequestSchema.safeParseAsync(body); + + if (!parsed.success) { + return serviceErrorResponse( + schemaValidationError(parsed.error) + ); + } + + const response = await searchCommits(parsed.data); + + if (isServiceError(response)) { + return serviceErrorResponse(response); + } + + return Response.json(response); +} diff --git a/packages/web/src/app/api/(server)/repos/route.ts b/packages/web/src/app/api/(server)/repos/route.ts index acc3f9ce0..2ec8bc3be 100644 --- a/packages/web/src/app/api/(server)/repos/route.ts +++ b/packages/web/src/app/api/(server)/repos/route.ts @@ -2,12 +2,20 @@ import { getRepos } from "@/actions"; import { serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { GetReposResponse } from "@/lib/types"; +import { NextRequest } from "next/server"; -export const GET = async () => { - const response: GetReposResponse = await getRepos(); - if (isServiceError(response)) { - return serviceErrorResponse(response); +export const GET = async (request: NextRequest) => { + const searchParams = request.nextUrl.searchParams; + const activeAfter = searchParams.get('activeAfter') ?? undefined; + const activeBefore = searchParams.get('activeBefore') ?? undefined; + + const repos: GetReposResponse = await getRepos({ + activeAfter, + activeBefore, + }); + if (isServiceError(repos)) { + return serviceErrorResponse(repos); } - return Response.json(response); + return Response.json(repos); } diff --git a/packages/web/src/features/search/dateUtils.test.ts b/packages/web/src/features/search/dateUtils.test.ts new file mode 100644 index 000000000..58324fbc5 --- /dev/null +++ b/packages/web/src/features/search/dateUtils.test.ts @@ -0,0 +1,379 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { + parseTemporalDate, + validateDateRange, + toDbDate, + toGitDate, +} from './dateUtils'; + +describe('dateUtils', () => { + // Mock the current time for consistent testing + const MOCK_NOW = new Date('2024-06-15T12:00:00.000Z'); + + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(MOCK_NOW); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe('parseTemporalDate', () => { + describe('ISO 8601 dates', () => { + it('should parse ISO date (YYYY-MM-DD)', () => { + const result = parseTemporalDate('2024-01-01'); + expect(result).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should parse ISO datetime with timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00.000Z'); + }); + + it('should parse ISO datetime without timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00'); + expect(result).toBeDefined(); + expect(result).toContain('2024-01-01'); + }); + + it('should return undefined for undefined input', () => { + const result = parseTemporalDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = parseTemporalDate(''); + expect(result).toBeUndefined(); + }); + }); + + describe('relative dates - yesterday', () => { + it('should parse "yesterday"', () => { + const result = parseTemporalDate('yesterday'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "YESTERDAY" (case insensitive)', () => { + const result = parseTemporalDate('YESTERDAY'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + }); + + describe('relative dates - N units ago', () => { + it('should parse "1 day ago"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "30 days ago"', () => { + const result = parseTemporalDate('30 days ago'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should parse "1 week ago"', () => { + const result = parseTemporalDate('1 week ago'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "2 weeks ago"', () => { + const result = parseTemporalDate('2 weeks ago'); + expect(result).toBe('2024-06-01T12:00:00.000Z'); + }); + + it('should parse "1 month ago"', () => { + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "3 months ago"', () => { + const result = parseTemporalDate('3 months ago'); + expect(result).toBe('2024-03-15T12:00:00.000Z'); + }); + + it('should parse "1 year ago"', () => { + const result = parseTemporalDate('1 year ago'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should parse "2 hours ago"', () => { + const result = parseTemporalDate('2 hours ago'); + expect(result).toBe('2024-06-15T10:00:00.000Z'); + }); + + it('should parse "30 minutes ago"', () => { + const result = parseTemporalDate('30 minutes ago'); + expect(result).toBe('2024-06-15T11:30:00.000Z'); + }); + + it('should parse "45 seconds ago"', () => { + const result = parseTemporalDate('45 seconds ago'); + expect(result).toBe('2024-06-15T11:59:15.000Z'); + }); + + it('should handle singular "day" without "s"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('30 DAYS AGO'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + }); + + describe('relative dates - last unit', () => { + it('should parse "last week"', () => { + const result = parseTemporalDate('last week'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "last month"', () => { + const result = parseTemporalDate('last month'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "last year"', () => { + const result = parseTemporalDate('last year'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('LAST WEEK'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('invalid or unknown formats', () => { + it('should return original string for unrecognized format', () => { + const result = parseTemporalDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should return original string for git-specific formats', () => { + // Git understands these but our parser doesn't convert them + const result = parseTemporalDate('2 weeks 3 days ago'); + expect(result).toBe('2 weeks 3 days ago'); + }); + }); + }); + + describe('validateDateRange', () => { + it('should return null for valid date range', () => { + const error = validateDateRange('2024-01-01', '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when only since is provided', () => { + const error = validateDateRange('2024-01-01', undefined); + expect(error).toBeNull(); + }); + + it('should return null when only until is provided', () => { + const error = validateDateRange(undefined, '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when both are undefined', () => { + const error = validateDateRange(undefined, undefined); + expect(error).toBeNull(); + }); + + it('should return error when since > until', () => { + const error = validateDateRange('2024-12-31', '2024-01-01'); + expect(error).toContain('since'); + expect(error).toContain('until'); + expect(error).toContain('before'); + }); + + it('should validate relative dates', () => { + const error = validateDateRange('30 days ago', '1 day ago'); + expect(error).toBeNull(); + }); + + it('should return error for invalid relative date range', () => { + const error = validateDateRange('1 day ago', '30 days ago'); + expect(error).toContain('since'); + expect(error).toContain('until'); + }); + + it('should handle mixed ISO and relative dates', () => { + const error = validateDateRange('2024-01-01', '30 days ago'); + expect(error).toBeNull(); // 2024-01-01 is before 30 days ago + }); + + it('should return null for same date', () => { + const error = validateDateRange('2024-06-15', '2024-06-15'); + expect(error).toBeNull(); + }); + }); + + describe('toDbDate', () => { + it('should convert ISO date to Date object', () => { + const result = toDbDate('2024-01-01'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should convert relative date to Date object', () => { + const result = toDbDate('30 days ago'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should return undefined for undefined input', () => { + const result = toDbDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = toDbDate(''); + expect(result).toBeUndefined(); + }); + + it('should handle "yesterday"', () => { + const result = toDbDate('yesterday'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should handle "last week"', () => { + const result = toDbDate('last week'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('toGitDate', () => { + it('should preserve ISO date format', () => { + const result = toGitDate('2024-01-01'); + expect(result).toBe('2024-01-01'); + }); + + it('should preserve ISO datetime format', () => { + const result = toGitDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00Z'); + }); + + it('should preserve "N days ago" format', () => { + const result = toGitDate('30 days ago'); + expect(result).toBe('30 days ago'); + }); + + it('should preserve "yesterday" format', () => { + const result = toGitDate('yesterday'); + expect(result).toBe('yesterday'); + }); + + it('should preserve "last week" format', () => { + const result = toGitDate('last week'); + expect(result).toBe('last week'); + }); + + it('should preserve "last month" format', () => { + const result = toGitDate('last month'); + expect(result).toBe('last month'); + }); + + it('should preserve "last year" format', () => { + const result = toGitDate('last year'); + expect(result).toBe('last year'); + }); + + it('should return undefined for undefined input', () => { + const result = toGitDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should convert unrecognized format to ISO', () => { + // For formats git doesn't natively understand, we convert to ISO + const result = toGitDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should preserve relative time formats', () => { + const result = toGitDate('2 weeks ago'); + expect(result).toBe('2 weeks ago'); + }); + }); + + describe('edge cases', () => { + it('should handle dates at month boundaries', () => { + vi.setSystemTime(new Date('2024-03-31T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + // JavaScript Date handles month rollover + expect(result).toBeDefined(); + }); + + it('should handle dates at year boundaries', () => { + vi.setSystemTime(new Date('2024-01-15T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2023-12-15T12:00:00.000Z'); + }); + + it('should handle leap year February', () => { + vi.setSystemTime(new Date('2024-03-01T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-02-01T12:00:00.000Z'); + }); + + it('should handle midnight times', () => { + vi.setSystemTime(new Date('2024-06-15T00:00:00.000Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T00:00:00.000Z'); + }); + + it('should handle end of day times', () => { + vi.setSystemTime(new Date('2024-06-15T23:59:59.999Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T23:59:59.999Z'); + }); + }); + + describe('integration scenarios', () => { + it('should correctly validate a typical user query range', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + const validationError = validateDateRange(since, until); + + expect(parsedSince).toBe('2024-05-16T12:00:00.000Z'); + expect(parsedUntil).toBe('2024-06-14T12:00:00.000Z'); + expect(validationError).toBeNull(); + }); + + it('should correctly convert for database queries', () => { + const since = '7 days ago'; + const until = 'yesterday'; + + const dbSince = toDbDate(since); + const dbUntil = toDbDate(until); + + expect(dbSince).toBeInstanceOf(Date); + expect(dbUntil).toBeInstanceOf(Date); + expect(dbSince!.getTime()).toBeLessThan(dbUntil!.getTime()); + }); + + it('should correctly preserve for git commands', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + // Git natively understands these, so they're preserved + expect(gitSince).toBe('30 days ago'); + expect(gitUntil).toBe('yesterday'); + }); + + it('should handle mixed ISO and relative dates in range validation', () => { + const since = '2024-01-01'; + const until = '7 days ago'; + + const validationError = validateDateRange(since, until); + expect(validationError).toBeNull(); + }); + }); +}); diff --git a/packages/web/src/features/search/dateUtils.ts b/packages/web/src/features/search/dateUtils.ts new file mode 100644 index 000000000..d51dbb622 --- /dev/null +++ b/packages/web/src/features/search/dateUtils.ts @@ -0,0 +1,187 @@ +/** + * Utilities for parsing and validating date parameters for temporal queries. + * Supports both absolute (ISO 8601) and relative date formats. + */ + +/** + * Parse a date string that can be either: + * - ISO 8601 format (e.g., "2024-01-01", "2024-01-01T12:00:00Z") + * - Relative format (e.g., "30 days ago", "1 week ago", "yesterday") + * + * Returns an ISO 8601 string suitable for both database queries and git log. + * + * @param dateStr - The date string to parse + * @returns ISO 8601 date string or null if invalid + */ +export function parseTemporalDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Try parsing as ISO date first + const isoDate = new Date(dateStr); + if (!isNaN(isoDate.getTime())) { + return isoDate.toISOString(); + } + + // Parse relative dates (Git-compatible format) + // Git accepts these natively, but we normalize to ISO for consistency + const relativePatterns = [ + { pattern: /^(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i, unit: 'relative' }, + { pattern: /^yesterday$/i, days: 1 }, + { pattern: /^last\s+(week|month|year)$/i, unit: 'last' }, + ]; + + const lowerStr = dateStr.toLowerCase().trim(); + + // Handle "yesterday" + if (lowerStr === 'yesterday') { + const date = new Date(); + date.setDate(date.getDate() - 1); + return date.toISOString(); + } + + // Handle "N s ago" format + const matchRelative = lowerStr.match(/^(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i); + if (matchRelative) { + const amount = parseInt(matchRelative[1]); + const unit = matchRelative[2].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'second': + date.setSeconds(date.getSeconds() - amount); + break; + case 'minute': + date.setMinutes(date.getMinutes() - amount); + break; + case 'hour': + date.setHours(date.getHours() - amount); + break; + case 'day': + date.setDate(date.getDate() - amount); + break; + case 'week': + date.setDate(date.getDate() - (amount * 7)); + break; + case 'month': + date.setMonth(date.getMonth() - amount); + break; + case 'year': + date.setFullYear(date.getFullYear() - amount); + break; + } + + return date.toISOString(); + } + + // Handle "last " format + const matchLast = lowerStr.match(/^last\s+(week|month|year)$/i); + if (matchLast) { + const unit = matchLast[1].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'week': + date.setDate(date.getDate() - 7); + break; + case 'month': + date.setMonth(date.getMonth() - 1); + break; + case 'year': + date.setFullYear(date.getFullYear() - 1); + break; + } + + return date.toISOString(); + } + + // If we can't parse it, return the original string + // This allows git log to try parsing it with its own logic + return dateStr; +} + +/** + * Validate that a date range is consistent (since < until). + * + * @param since - Start date (inclusive) + * @param until - End date (inclusive) + * @returns Error message if invalid, null if valid + */ +export function validateDateRange(since: string | undefined, until: string | undefined): string | null { + if (!since || !until) { + return null; // No validation needed if either is missing + } + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + + if (!parsedSince || !parsedUntil) { + return null; // Let individual date parsing handle invalid formats + } + + const sinceDate = new Date(parsedSince); + const untilDate = new Date(parsedUntil); + + if (isNaN(sinceDate.getTime()) || isNaN(untilDate.getTime())) { + return null; + } + + if (sinceDate > untilDate) { + return `Invalid date range: 'since' (${since}) must be before 'until' (${until})`; + } + + return null; +} + +/** + * Convert a date to a format suitable for Prisma database queries. + * Returns a Date object or undefined. + * + * @param dateStr - The date string to convert + * @returns Date object or undefined + */ +export function toDbDate(dateStr: string | undefined): Date | undefined { + if (!dateStr) { + return undefined; + } + + const parsed = parseTemporalDate(dateStr); + if (!parsed) { + return undefined; + } + + const date = new Date(parsed); + return isNaN(date.getTime()) ? undefined : date; +} + +/** + * Convert a date to a format suitable for git log commands. + * Git accepts relative formats directly, so we preserve them when possible. + * + * @param dateStr - The date string to convert + * @returns Git-compatible date string or undefined + */ +export function toGitDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Git natively understands these formats, so preserve them + const gitNativeFormats = [ + /^\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i, + /^yesterday$/i, + /^last\s+(week|month|year)$/i, + /^\d{4}-\d{2}-\d{2}$/, // ISO date + /^\d{4}-\d{2}-\d{2}T/, // ISO datetime + ]; + + for (const pattern of gitNativeFormats) { + if (pattern.test(dateStr)) { + return dateStr; // Git can handle this directly + } + } + + // Otherwise, parse and convert to ISO + return parseTemporalDate(dateStr); +} diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts new file mode 100644 index 000000000..79be0d8bd --- /dev/null +++ b/packages/web/src/features/search/gitApi.test.ts @@ -0,0 +1,438 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { searchCommits } from './gitApi'; +import * as dateUtils from './dateUtils'; + +// Mock dependencies +vi.mock('simple-git'); +vi.mock('fs'); +vi.mock('@sourcebot/shared', () => ({ + REPOS_CACHE_DIR: '/mock/cache/dir', +})); +vi.mock('@/lib/serviceError', () => ({ + unexpectedError: (message: string) => ({ + errorCode: 'UNEXPECTED_ERROR', + message, + }), +})); +vi.mock('@/actions', () => ({ + sew: (fn: () => any) => fn(), +})); + +// Import mocked modules +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; + +describe('searchCommits', () => { + const mockGitLog = vi.fn(); + const mockSimpleGit = simpleGit as any; + const mockExistsSync = existsSync as any; + + beforeEach(() => { + vi.clearAllMocks(); + + // Setup default mocks + mockExistsSync.mockReturnValue(true); + mockSimpleGit.mockReturnValue({ + log: mockGitLog, + }); + }); + + describe('repository validation', () => { + it('should return error when repository does not exist on disk', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not found on Sourcebot server disk'), + }); + expect(result).toMatchObject({ + message: expect.stringContaining('123'), + }); + }); + + it('should check the correct repository path', async () => { + mockExistsSync.mockReturnValue(false); + + await searchCommits({ + repoId: 456, + }); + + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/456'); + }); + }); + + describe('date range validation', () => { + it('should validate date range and return error for invalid range', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue( + 'Invalid date range: since must be before until' + ); + + const result = await searchCommits({ + repoId: 123, + since: '2024-12-31', + until: '2024-01-01', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: 'Invalid date range: since must be before until', + }); + }); + + it('should proceed when date range is valid', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + since: '2024-01-01', + until: '2024-12-31', + }); + + expect(Array.isArray(result)).toBe(true); + }); + }); + + describe('date parsing', () => { + it('should parse dates using toGitDate', async () => { + const toGitDateSpy = vi.spyOn(dateUtils, 'toGitDate'); + toGitDateSpy.mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + since: '30 days ago', + until: 'yesterday', + }); + + expect(toGitDateSpy).toHaveBeenCalledWith('30 days ago'); + expect(toGitDateSpy).toHaveBeenCalledWith('yesterday'); + }); + + it('should pass parsed dates to git log', async () => { + vi.spyOn(dateUtils, 'toGitDate') + .mockReturnValueOnce('2024-01-01') + .mockReturnValueOnce('2024-12-31'); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + since: '30 days ago', + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '2024-01-01', + '--until': '2024-12-31', + }) + ); + }); + }); + + describe('git log options', () => { + beforeEach(() => { + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + }); + + it('should set default maxCount', async () => { + await searchCommits({ + repoId: 123, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 50, + }) + ); + }); + + it('should use custom maxCount', async () => { + await searchCommits({ + repoId: 123, + maxCount: 100, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 100, + }) + ); + }); + + it('should add --since when since is provided', async () => { + await searchCommits({ + repoId: 123, + since: '30 days ago', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '30 days ago', + }) + ); + }); + + it('should add --until when until is provided', async () => { + await searchCommits({ + repoId: 123, + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--until': 'yesterday', + }) + ); + }); + + it('should add --author when author is provided', async () => { + await searchCommits({ + repoId: 123, + author: 'john@example.com', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--author': 'john@example.com', + }) + ); + }); + + it('should add --grep and --regexp-ignore-case when query is provided', async () => { + await searchCommits({ + repoId: 123, + query: 'fix bug', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--grep': 'fix bug', + '--regexp-ignore-case': null, + }) + ); + }); + + it('should combine all options', async () => { + await searchCommits({ + repoId: 123, + query: 'feature', + since: '2024-01-01', + until: '2024-12-31', + author: 'jane@example.com', + maxCount: 25, + }); + + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 25, + '--since': '2024-01-01', + '--until': '2024-12-31', + '--author': 'jane@example.com', + '--grep': 'feature', + '--regexp-ignore-case': null, + }); + }); + }); + + describe('successful responses', () => { + it('should return commit array from git log', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-15', + message: 'feat: add feature', + refs: 'HEAD -> main', + body: '', + author_name: 'John Doe', + author_email: 'john@example.com', + }, + { + hash: 'def456', + date: '2024-06-14', + message: 'fix: bug fix', + refs: '', + body: '', + author_name: 'Jane Smith', + author_email: 'jane@example.com', + }, + ]; + + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toEqual(mockCommits); + }); + + it('should return empty array when no commits match', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + query: 'nonexistent', + }); + + expect(result).toEqual([]); + }); + }); + + describe('error handling', () => { + it('should return error for "not a git repository"', async () => { + mockGitLog.mockRejectedValue(new Error('not a git repository')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not a valid git repository'), + }); + }); + + it('should return error for "ambiguous argument"', async () => { + mockGitLog.mockRejectedValue(new Error('ambiguous argument')); + + const result = await searchCommits({ + repoId: 123, + since: 'invalid-date', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Invalid git reference or date format'), + }); + }); + + it('should return error for timeout', async () => { + mockGitLog.mockRejectedValue(new Error('timeout exceeded')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('timed out'), + }); + }); + + it('should throw for other Error instances', async () => { + mockGitLog.mockRejectedValue(new Error('some other error')); + + await expect( + searchCommits({ + repoId: 123, + }) + ).rejects.toThrow('Failed to search commits in repository 123'); + }); + + it('should throw for non-Error exceptions', async () => { + mockGitLog.mockRejectedValue('string error'); + + await expect( + searchCommits({ + repoId: 123, + }) + ).rejects.toThrow('Failed to search commits in repository 123'); + }); + }); + + describe('git client configuration', () => { + it('should configure simple-git with correct options', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + }); + + expect(mockSimpleGit).toHaveBeenCalledWith({ + baseDir: '/mock/cache/dir/123', + binary: 'git', + maxConcurrentProcesses: 6, + timeout: { + block: 30000, + }, + }); + }); + + it('should create git client for the correct repository path', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 456, + }); + + expect(mockSimpleGit).toHaveBeenCalledWith( + expect.objectContaining({ + baseDir: '/mock/cache/dir/456', + }) + ); + }); + }); + + describe('integration scenarios', () => { + it('should handle a typical commit search with filters', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-10T14:30:00Z', + message: 'fix: resolve authentication bug', + refs: 'HEAD -> main', + body: 'Fixed issue with JWT token validation', + author_name: 'Security Team', + author_email: 'security@example.com', + }, + ]; + + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 123, + query: 'authentication', + since: '30 days ago', + until: 'yesterday', + author: 'security', + maxCount: 20, + }); + + expect(result).toEqual(mockCommits); + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 20, + '--since': '30 days ago', + '--until': 'yesterday', + '--author': 'security', + '--grep': 'authentication', + '--regexp-ignore-case': null, + }); + }); + + it('should handle repository not cloned yet', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await searchCommits({ + repoId: 999, + query: 'feature', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + }); + expect(result).toHaveProperty('message'); + const message = (result as any).message; + expect(message).toContain('999'); + expect(message).toContain('not found on Sourcebot server disk'); + expect(message).toContain('cloning process may not be finished yet'); + }); + }); +}); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts new file mode 100644 index 000000000..81ce71fa6 --- /dev/null +++ b/packages/web/src/features/search/gitApi.ts @@ -0,0 +1,150 @@ +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; +import { REPOS_CACHE_DIR } from '@sourcebot/shared'; +import path from 'path'; +import { ServiceError, unexpectedError } from '@/lib/serviceError'; +import { sew } from '@/actions'; +import { toGitDate, validateDateRange } from './dateUtils'; + +const createGitClientForPath = (repoPath: string) => { + if (!existsSync(repoPath)) { + throw new Error(`Path ${repoPath} does not exist`); + } + + return simpleGit({ + baseDir: repoPath, + binary: 'git', + maxConcurrentProcesses: 6, + timeout: { + block: 30000, // 30 second timeout for git operations + }, + }); +} + +export interface SearchCommitsRequest { + repoId: number; + query?: string; + since?: string; + until?: string; + author?: string; + maxCount?: number; +} + +export interface Commit { + hash: string; + date: string; + message: string; + refs: string; + body: string; + author_name: string; + author_email: string; +} + +/** + * Search commits in a repository using git log. + * + * **Date Formats**: Supports both ISO 8601 dates and relative formats + * (e.g., "30 days ago", "last week", "yesterday"). Git natively handles + * these formats in the --since and --until flags. + * + * **Requirements**: The repository must be cloned on the Sourcebot server disk. + * Sourcebot automatically clones repositories during indexing, but the cloning + * process might not be finished when this query is executed. If the repository + * is not found on the server disk, an error will be returned. + * + * @param request - Search parameters including timeframe filters + * @returns Array of commits or ServiceError + */ +export const searchCommits = async ({ + repoId, + query, + since, + until, + author, + maxCount = 50, +}: SearchCommitsRequest): Promise => sew(async () => { + const repoPath = path.join(REPOS_CACHE_DIR, repoId.toString()); + + // Check if repository exists on Sourcebot server disk + if (!existsSync(repoPath)) { + return unexpectedError( + `Repository ${repoId} not found on Sourcebot server disk. ` + + `Sourcebot automatically clones repositories during indexing, but the ` + + `cloning process may not be finished yet. Please try again later. ` + + `Path checked: ${repoPath}` + ); + } + + // Validate date range if both since and until are provided + const dateRangeError = validateDateRange(since, until); + if (dateRangeError) { + return unexpectedError(dateRangeError); + } + + // Parse dates to git-compatible format + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + const git = createGitClientForPath(repoPath); + + try { + const logOptions: any = { + maxCount, + }; + + if (gitSince) { + logOptions['--since'] = gitSince; + } + + if (gitUntil) { + logOptions['--until'] = gitUntil; + } + + if (author) { + logOptions['--author'] = author; + } + + if (query) { + logOptions['--grep'] = query; + logOptions['--regexp-ignore-case'] = null; // Case insensitive + } + + const log = await git.log(logOptions); + return log.all as unknown as Commit[]; + } catch (error: unknown) { + // Provide detailed error messages for common git errors + const errorMessage = error instanceof Error ? error.message : String(error); + + if (errorMessage.includes('not a git repository')) { + return unexpectedError( + `Invalid git repository at ${repoPath}. ` + + `The directory exists but is not a valid git repository.` + ); + } + + if (errorMessage.includes('ambiguous argument')) { + return unexpectedError( + `Invalid git reference or date format. ` + + `Please check your date parameters: since="${since}", until="${until}"` + ); + } + + if (errorMessage.includes('timeout')) { + return unexpectedError( + `Git operation timed out after 30 seconds for repository ${repoId}. ` + + `The repository may be too large or the git operation is taking too long.` + ); + } + + // Generic error fallback + if (error instanceof Error) { + throw new Error( + `Failed to search commits in repository ${repoId}: ${error.message}` + ); + } else { + throw new Error( + `Failed to search commits in repository ${repoId}: ${errorMessage}` + ); + } + } +}); diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index 711c810d9..b8ff1f69a 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -30,6 +30,14 @@ export const searchRequestSchema = z.object({ contextLines: z.number().optional(), // Whether to return the whole file as part of the response. whole: z.boolean().optional(), + // The git revision to search in. + gitRevision: z.string().optional(), + // Filter by date. Only show results that were indexed after this date. + since: z.string().optional(), + // Filter by date. Only show results that were indexed before this date. + until: z.string().optional(), + // Whether to include deleted files in the search results. + includeDeletedFiles: z.boolean().optional(), }); export const repositoryInfoSchema = z.object({ diff --git a/packages/web/src/features/search/searchApi.test.ts b/packages/web/src/features/search/searchApi.test.ts new file mode 100644 index 000000000..e9ec93b46 --- /dev/null +++ b/packages/web/src/features/search/searchApi.test.ts @@ -0,0 +1,324 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import * as dateUtils from './dateUtils'; + +// Mock dependencies +vi.mock('@/lib/serviceError', () => ({ + ServiceError: class ServiceError extends Error { + errorCode: string; + constructor(errorCode: string, message: string) { + super(message); + this.errorCode = errorCode; + } + }, + isServiceError: (obj: any) => obj?.errorCode !== undefined, +})); + +vi.mock('@/actions', () => ({ + sew: (fn: () => any) => fn(), +})); + +vi.mock('@/lib/auth', () => ({ + withOptionalAuthV2: (fn: (context: any) => any) => { + return fn({ + org: { id: 1 }, + prisma: { + repo: { + findMany: vi.fn(), + }, + }, + }); + }, +})); + +describe('searchApi temporal parameter handling', () => { + describe('date utility integration', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('toDbDate conversion', () => { + it('should convert ISO date strings to Date objects', () => { + const result = dateUtils.toDbDate('2024-01-01'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should convert relative dates to Date objects', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const result = dateUtils.toDbDate('30 days ago'); + expect(result).toBeInstanceOf(Date); + + vi.useRealTimers(); + }); + + it('should handle undefined gracefully', () => { + const result = dateUtils.toDbDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should handle empty string gracefully', () => { + const result = dateUtils.toDbDate(''); + expect(result).toBeUndefined(); + }); + }); + + describe('date range validation scenarios', () => { + it('should accept valid date ranges', () => { + const error = dateUtils.validateDateRange('2024-01-01', '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should reject invalid date ranges', () => { + const error = dateUtils.validateDateRange('2024-12-31', '2024-01-01'); + expect(error).not.toBeNull(); + expect(error).toContain('since'); + expect(error).toContain('until'); + }); + + it('should validate relative date ranges', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const error = dateUtils.validateDateRange('30 days ago', 'yesterday'); + expect(error).toBeNull(); + + vi.useRealTimers(); + }); + + it('should detect invalid relative date ranges', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const error = dateUtils.validateDateRange('yesterday', '30 days ago'); + expect(error).not.toBeNull(); + + vi.useRealTimers(); + }); + }); + + describe('temporal parameter edge cases', () => { + it('should handle missing since parameter', () => { + const dbDate = dateUtils.toDbDate(undefined); + expect(dbDate).toBeUndefined(); + }); + + it('should handle missing until parameter', () => { + const dbDate = dateUtils.toDbDate(undefined); + expect(dbDate).toBeUndefined(); + }); + + it('should handle both parameters missing', () => { + const sinceDate = dateUtils.toDbDate(undefined); + const untilDate = dateUtils.toDbDate(undefined); + expect(sinceDate).toBeUndefined(); + expect(untilDate).toBeUndefined(); + }); + }); + }); + + describe('Prisma query construction with temporal filters', () => { + it('should construct valid date range for Prisma query', () => { + const since = '2024-01-01'; + const until = '2024-12-31'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + + const whereClause = { + indexedAt: { + gte: sinceDate, + lte: untilDate, + }, + }; + + expect(whereClause.indexedAt.gte).toBeInstanceOf(Date); + expect(whereClause.indexedAt.lte).toBeInstanceOf(Date); + expect(whereClause.indexedAt.gte!.getTime()).toBeLessThan( + whereClause.indexedAt.lte!.getTime() + ); + }); + + it('should handle only since parameter', () => { + const since = '2024-01-01'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(undefined); + + const whereClause = { + indexedAt: { + gte: sinceDate, + lte: untilDate, + }, + }; + + expect(whereClause.indexedAt.gte).toBeInstanceOf(Date); + expect(whereClause.indexedAt.lte).toBeUndefined(); + }); + + it('should handle only until parameter', () => { + const since = undefined; + const until = '2024-12-31'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + + const whereClause = { + indexedAt: { + gte: sinceDate, + lte: untilDate, + }, + }; + + expect(whereClause.indexedAt.gte).toBeUndefined(); + expect(whereClause.indexedAt.lte).toBeInstanceOf(Date); + }); + + it('should construct Prisma query with relative dates', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const since = '30 days ago'; + const until = 'yesterday'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + + expect(sinceDate).toBeInstanceOf(Date); + expect(untilDate).toBeInstanceOf(Date); + expect(sinceDate!.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + expect(untilDate!.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + + vi.useRealTimers(); + }); + }); + + describe('repository filtering logic', () => { + it('should filter repositories by indexedAt timestamp', () => { + const repos = [ + { id: 1, name: 'repo1', indexedAt: new Date('2024-01-15') }, + { id: 2, name: 'repo2', indexedAt: new Date('2024-06-01') }, + { id: 3, name: 'repo3', indexedAt: new Date('2024-12-01') }, + ]; + + const since = dateUtils.toDbDate('2024-05-01'); + const until = dateUtils.toDbDate('2024-11-01'); + + const filtered = repos.filter((repo) => { + if (!repo.indexedAt) return false; + if (since && repo.indexedAt < since) return false; + if (until && repo.indexedAt > until) return false; + return true; + }); + + expect(filtered).toHaveLength(1); + expect(filtered[0].id).toBe(2); + }); + + it('should return empty array when no repos match date range', () => { + const repos = [ + { id: 1, name: 'repo1', indexedAt: new Date('2020-01-01') }, + { id: 2, name: 'repo2', indexedAt: new Date('2021-01-01') }, + ]; + + const since = dateUtils.toDbDate('2024-01-01'); + const until = dateUtils.toDbDate('2024-12-31'); + + const filtered = repos.filter((repo) => { + if (!repo.indexedAt) return false; + if (since && repo.indexedAt < since) return false; + if (until && repo.indexedAt > until) return false; + return true; + }); + + expect(filtered).toHaveLength(0); + }); + + it('should handle repos without indexedAt timestamp', () => { + const repos = [ + { id: 1, name: 'repo1', indexedAt: new Date('2024-06-01') }, + { id: 2, name: 'repo2', indexedAt: null }, + { id: 3, name: 'repo3', indexedAt: undefined }, + ]; + + const since = dateUtils.toDbDate('2024-01-01'); + + const filtered = repos.filter((repo) => { + if (!repo.indexedAt) return false; + if (since && repo.indexedAt < since) return false; + return true; + }); + + expect(filtered).toHaveLength(1); + expect(filtered[0].id).toBe(1); + }); + }); + + describe('real-world scenarios', () => { + it('should handle "last 30 days" user query', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const userInput = '30 days ago'; + const parsedDate = dateUtils.toDbDate(userInput); + + expect(parsedDate).toBeInstanceOf(Date); + expect(parsedDate?.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + + vi.useRealTimers(); + }); + + it('should handle "since last week" user query', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const userInput = 'last week'; + const parsedDate = dateUtils.toDbDate(userInput); + + expect(parsedDate).toBeInstanceOf(Date); + expect(parsedDate?.toISOString()).toBe('2024-06-08T12:00:00.000Z'); + + vi.useRealTimers(); + }); + + it('should handle "before yesterday" user query', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const userInput = 'yesterday'; + const parsedDate = dateUtils.toDbDate(userInput); + + expect(parsedDate).toBeInstanceOf(Date); + expect(parsedDate?.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + + vi.useRealTimers(); + }); + + it('should handle explicit ISO date range', () => { + const since = '2024-01-01'; + const until = '2024-03-31'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + const validationError = dateUtils.validateDateRange(since, until); + + expect(sinceDate).toBeInstanceOf(Date); + expect(untilDate).toBeInstanceOf(Date); + expect(validationError).toBeNull(); + expect(sinceDate!.getTime()).toBeLessThan(untilDate!.getTime()); + }); + + it('should prevent user from providing inverted date range', () => { + const since = '2024-12-31'; + const until = '2024-01-01'; + + const validationError = dateUtils.validateDateRange(since, until); + + expect(validationError).not.toBeNull(); + expect(validationError).toContain('since'); + expect(validationError).toContain('until'); + expect(validationError).toContain('before'); + }); + }); +}); diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 1ca57ef46..38c658fc9 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -127,8 +127,70 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri return encodeURI(url + optionalQueryParams); } -export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise => sew(() => +export const search = async ({ query, matches, contextLines, whole, gitRevision, since, until, includeDeletedFiles }: SearchRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { + // If gitRevision is provided, append it to the query as a branch filter. + if (gitRevision) { + query = `${query} branch:${gitRevision}`; + } + + // If since or until are provided, filter repositories by indexedAt. + // Note: This filters by when the repo was last indexed by Sourcebot, + // not by the actual commit time in the repository. + if (since || until) { + const { toDbDate } = await import('./dateUtils.js'); + + const sinceDate = toDbDate(since); + const untilDate = toDbDate(until); + + const timeFilterRepos = await prisma.repo.findMany({ + where: { + orgId: org.id, + indexedAt: { + gte: sinceDate, + lte: untilDate, + } + }, + select: { name: true } + }); + + if (timeFilterRepos.length === 0) { + // If no repos match the time filter, return empty results immediately. + return { + stats: { + actualMatchCount: 0, + totalMatchCount: 0, + duration: 0, + fileCount: 0, + filesSkipped: 0, + contentBytesLoaded: 0, + indexBytesLoaded: 0, + crashes: 0, + shardFilesConsidered: 0, + filesConsidered: 0, + filesLoaded: 0, + shardsScanned: 0, + shardsSkipped: 0, + shardsSkippedFilter: 0, + ngramMatches: 0, + ngramLookups: 0, + wait: 0, + matchTreeConstruction: 0, + matchTreeSearch: 0, + regexpsConsidered: 0, + flushReason: 0, + }, + files: [], + repositoryInfo: [], + isBranchFilteringEnabled: false, + isSearchExhaustive: true, + } satisfies SearchResponse; + } + + const repoFilter = timeFilterRepos.map(r => `repo:${r.name}`).join(" or "); + query = `(${query}) and (${repoFilter})`; + } + const transformedQuery = await transformZoektQuery(query, org.id, prisma); if (isServiceError(transformedQuery)) { return transformedQuery; From 1f09bd0444bf9f3e97ed684985c0cbf50108af54 Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Thu, 20 Nov 2025 18:31:14 -0500 Subject: [PATCH 02/10] fix: address review feedback - Remove unused mocks from searchApi.test.ts - Fix test description in dateUtils.test.ts to match behavior - Align error handling tests in gitApi.test.ts with production sew behavior - Add comprehensive JSDoc to parseTemporalDate function - Remove unused relativePatterns variable in dateUtils.ts - Add validation to maxCount parameter (max 500) in schemas - Normalize empty query params in repos route - Extract searchCommitsRequestSchema to shared schemas file - Use SearchCommitsRequest type for type safety in commits route - Remove duplicate existsSync check in gitApi.ts - Fix ServiceError handling in activity filtering logic in actions.ts - Add explicit error logging for unexpected errors - Remove unused includeDeletedFiles parameter (Brendan's feedback) Signed-off-by: Wayne Sun --- packages/mcp/CHANGELOG.md | 2 +- packages/mcp/README.md | 1 - packages/mcp/src/index.ts | 6 --- packages/mcp/src/schemas.ts | 2 +- packages/web/src/actions.ts | 37 ++++++++-------- .../web/src/app/api/(server)/commits/route.ts | 13 +----- .../web/src/app/api/(server)/repos/route.ts | 4 +- .../web/src/features/search/dateUtils.test.ts | 4 +- packages/web/src/features/search/dateUtils.ts | 19 ++++----- .../web/src/features/search/gitApi.test.ts | 42 +++++++++++++------ packages/web/src/features/search/gitApi.ts | 4 -- packages/web/src/features/search/schemas.ts | 25 +++++++++-- .../web/src/features/search/searchApi.test.ts | 29 ------------- packages/web/src/features/search/searchApi.ts | 2 +- 14 files changed, 88 insertions(+), 102 deletions(-) diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 2a9f4e3e3..a84483233 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added comprehensive relative date support for all temporal parameters (e.g., "30 days ago", "last week", "yesterday") - Added `search_commits` tool to search commits by actual commit time with full temporal filtering - Added `since`/`until` parameters to `search_code` (filters by index time - when Sourcebot indexed the repo) -- Added `gitRevision`, and `includeDeletedFiles` parameters to `search_code` +- Added `gitRevision` parameter to `search_code` - Added `activeAfter`/`activeBefore` parameters to `list_repos` (filters by commit time - actual git commit dates) - Added date range validation to prevent invalid date ranges (since > until) - Added 30-second timeout for git operations to handle large repositories diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 5ac09c1b7..1a0c4987a 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -181,7 +181,6 @@ Fetches code that matches the provided regex pattern in `query`. | `gitRevision` | no | Git revision to search (e.g., 'main', 'develop', 'v1.0.0'). Defaults to HEAD. | | `since` | no | Only search repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | | `until` | no | Only search repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). | -| `includeDeletedFiles` | no | Include deleted files in search results (default: false). | | `maxTokens` | no | Max tokens to return (default: env.DEFAULT_MINIMUM_TOKENS). | diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index d6e9e5866..0a5cf3070 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -61,10 +61,6 @@ server.tool( .string() .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`) .optional(), - includeDeletedFiles: z - .boolean() - .describe(`Whether to include deleted files in the search results (default: false).`) - .optional(), maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -80,7 +76,6 @@ server.tool( gitRevision, since, until, - includeDeletedFiles = false, }) => { if (repoIds.length > 0) { query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`; @@ -103,7 +98,6 @@ server.tool( gitRevision, since, until, - includeDeletedFiles, }); if (isServiceError(response)) { diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index d5ca0f436..d8a43222b 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -198,7 +198,7 @@ export const searchCommitsRequestSchema = z.object({ since: z.string().optional(), until: z.string().optional(), author: z.string().optional(), - maxCount: z.number().optional(), + maxCount: z.number().int().positive().max(500).optional(), }); export const commitSchema = z.object({ diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 17eab0bfc..085ea6e62 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -497,25 +497,26 @@ export const getRepos = async ({ // Filter repos by commit activity using git log // Note: This requires repos to be cloned locally const activityChecks = await Promise.all(repos.map(async (repo) => { - try { - const commits = await searchCommits({ - repoId: repo.id, - since: activeAfter, - until: activeBefore, - maxCount: 1, - }); + const result = await searchCommits({ + repoId: repo.id, + since: activeAfter, + until: activeBefore, + maxCount: 1, + }); - if (Array.isArray(commits) && commits.length > 0) { - return repo; - } - } catch (e) { - // If error (e.g. repo not found on disk), exclude it from results - // This is expected for repos that haven't been cloned yet - const errorMessage = e instanceof Error ? e.message : String(e); - if (!errorMessage.includes('does not exist')) { - // Log unexpected errors, but not "repo not on disk" errors - console.error(`Error checking activity for repo ${repo.id} (${repo.name}):`, e); - } + // Handle successful result + if (Array.isArray(result)) { + return result.length > 0 ? repo : null; + } + + // Handle ServiceError + const errorMessage = result.message ?? ''; + if (!errorMessage.includes('not found on Sourcebot server disk')) { + // Log unexpected errors, but not "repo not cloned yet" errors + console.error( + `Error checking activity for repo ${repo.id} (${repo.name}):`, + result + ); } return null; })); diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts index 4408a8e03..ceba959e9 100644 --- a/packages/web/src/app/api/(server)/commits/route.ts +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -2,17 +2,8 @@ import { searchCommits, SearchCommitsRequest } from "@/features/search/gitApi"; import { serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; -import { z } from "zod"; import { schemaValidationError } from "@/lib/serviceError"; - -const searchCommitsRequestSchema = z.object({ - repoId: z.number(), - query: z.string().optional(), - since: z.string().optional(), - until: z.string().optional(), - author: z.string().optional(), - maxCount: z.number().optional(), -}); +import { searchCommitsRequestSchema } from "@/features/search/schemas"; export const POST = async (request: NextRequest) => { const body = await request.json(); @@ -24,7 +15,7 @@ export const POST = async (request: NextRequest) => { ); } - const response = await searchCommits(parsed.data); + const response = await searchCommits(parsed.data as SearchCommitsRequest); if (isServiceError(response)) { return serviceErrorResponse(response); diff --git a/packages/web/src/app/api/(server)/repos/route.ts b/packages/web/src/app/api/(server)/repos/route.ts index 2ec8bc3be..e74e5090e 100644 --- a/packages/web/src/app/api/(server)/repos/route.ts +++ b/packages/web/src/app/api/(server)/repos/route.ts @@ -7,8 +7,8 @@ import { NextRequest } from "next/server"; export const GET = async (request: NextRequest) => { const searchParams = request.nextUrl.searchParams; - const activeAfter = searchParams.get('activeAfter') ?? undefined; - const activeBefore = searchParams.get('activeBefore') ?? undefined; + const activeAfter = searchParams.get('activeAfter') || undefined; + const activeBefore = searchParams.get('activeBefore') || undefined; const repos: GetReposResponse = await getRepos({ activeAfter, diff --git a/packages/web/src/features/search/dateUtils.test.ts b/packages/web/src/features/search/dateUtils.test.ts index 58324fbc5..a64a83c14 100644 --- a/packages/web/src/features/search/dateUtils.test.ts +++ b/packages/web/src/features/search/dateUtils.test.ts @@ -285,8 +285,8 @@ describe('dateUtils', () => { expect(result).toBeUndefined(); }); - it('should convert unrecognized format to ISO', () => { - // For formats git doesn't natively understand, we convert to ISO + it('should pass through unrecognized format unchanged', () => { + // For formats git doesn't natively understand, pass through to git const result = toGitDate('some random string'); expect(result).toBe('some random string'); }); diff --git a/packages/web/src/features/search/dateUtils.ts b/packages/web/src/features/search/dateUtils.ts index d51dbb622..f28e9a5bd 100644 --- a/packages/web/src/features/search/dateUtils.ts +++ b/packages/web/src/features/search/dateUtils.ts @@ -6,12 +6,17 @@ /** * Parse a date string that can be either: * - ISO 8601 format (e.g., "2024-01-01", "2024-01-01T12:00:00Z") - * - Relative format (e.g., "30 days ago", "1 week ago", "yesterday") - * - * Returns an ISO 8601 string suitable for both database queries and git log. + * - Relative format (e.g., "30 days ago", "1 week ago", "yesterday", "last week") * * @param dateStr - The date string to parse - * @returns ISO 8601 date string or null if invalid + * @returns ISO 8601 string if successfully parsed, original string if not parseable (to allow git to try), or undefined if input is falsy + * + * @example + * parseTemporalDate('2024-01-01') // '2024-01-01T00:00:00.000Z' + * parseTemporalDate('30 days ago') // Calculates and returns ISO string + * parseTemporalDate('yesterday') // Yesterday's date as ISO string + * parseTemporalDate('some-git-format') // 'some-git-format' (passed through) + * parseTemporalDate(undefined) // undefined */ export function parseTemporalDate(dateStr: string | undefined): string | undefined { if (!dateStr) { @@ -26,12 +31,6 @@ export function parseTemporalDate(dateStr: string | undefined): string | undefin // Parse relative dates (Git-compatible format) // Git accepts these natively, but we normalize to ISO for consistency - const relativePatterns = [ - { pattern: /^(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i, unit: 'relative' }, - { pattern: /^yesterday$/i, days: 1 }, - { pattern: /^last\s+(week|month|year)$/i, unit: 'last' }, - ]; - const lowerStr = dateStr.toLowerCase().trim(); // Handle "yesterday" diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts index 79be0d8bd..28a528253 100644 --- a/packages/web/src/features/search/gitApi.test.ts +++ b/packages/web/src/features/search/gitApi.test.ts @@ -15,7 +15,17 @@ vi.mock('@/lib/serviceError', () => ({ }), })); vi.mock('@/actions', () => ({ - sew: (fn: () => any) => fn(), + sew: async (fn: () => any) => { + try { + return await fn(); + } catch (error) { + // Mock sew to convert thrown errors to ServiceError + return { + errorCode: 'UNEXPECTED_ERROR', + message: error instanceof Error ? error.message : String(error), + }; + } + }, })); // Import mocked modules @@ -325,24 +335,30 @@ describe('searchCommits', () => { }); }); - it('should throw for other Error instances', async () => { + it('should return ServiceError for other Error instances', async () => { mockGitLog.mockRejectedValue(new Error('some other error')); - await expect( - searchCommits({ - repoId: 123, - }) - ).rejects.toThrow('Failed to search commits in repository 123'); + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository 123'), + }); }); - it('should throw for non-Error exceptions', async () => { + it('should return ServiceError for non-Error exceptions', async () => { mockGitLog.mockRejectedValue('string error'); - await expect( - searchCommits({ - repoId: 123, - }) - ).rejects.toThrow('Failed to search commits in repository 123'); + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository 123'), + }); }); }); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts index 81ce71fa6..880bb0afb 100644 --- a/packages/web/src/features/search/gitApi.ts +++ b/packages/web/src/features/search/gitApi.ts @@ -7,10 +7,6 @@ import { sew } from '@/actions'; import { toGitDate, validateDateRange } from './dateUtils'; const createGitClientForPath = (repoPath: string) => { - if (!existsSync(repoPath)) { - throw new Error(`Path ${repoPath} does not exist`); - } - return simpleGit({ baseDir: repoPath, binary: 'git', diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index b8ff1f69a..47ff56bcd 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -36,8 +36,6 @@ export const searchRequestSchema = z.object({ since: z.string().optional(), // Filter by date. Only show results that were indexed before this date. until: z.string().optional(), - // Whether to include deleted files in the search results. - includeDeletedFiles: z.boolean().optional(), }); export const repositoryInfoSchema = z.object({ @@ -168,4 +166,25 @@ export const fileSourceResponseSchema = z.object({ repositoryWebUrl: z.string().optional(), branch: z.string().optional(), webUrl: z.string().optional(), -}); \ No newline at end of file +}); + +export const searchCommitsRequestSchema = z.object({ + repoId: z.number(), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); + +export const commitSchema = z.object({ + hash: z.string(), + date: z.string(), + message: z.string(), + refs: z.string(), + body: z.string(), + author_name: z.string(), + author_email: z.string(), +}); + +export const searchCommitsResponseSchema = z.array(commitSchema); \ No newline at end of file diff --git a/packages/web/src/features/search/searchApi.test.ts b/packages/web/src/features/search/searchApi.test.ts index e9ec93b46..3bc87c89b 100644 --- a/packages/web/src/features/search/searchApi.test.ts +++ b/packages/web/src/features/search/searchApi.test.ts @@ -1,35 +1,6 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import * as dateUtils from './dateUtils'; -// Mock dependencies -vi.mock('@/lib/serviceError', () => ({ - ServiceError: class ServiceError extends Error { - errorCode: string; - constructor(errorCode: string, message: string) { - super(message); - this.errorCode = errorCode; - } - }, - isServiceError: (obj: any) => obj?.errorCode !== undefined, -})); - -vi.mock('@/actions', () => ({ - sew: (fn: () => any) => fn(), -})); - -vi.mock('@/lib/auth', () => ({ - withOptionalAuthV2: (fn: (context: any) => any) => { - return fn({ - org: { id: 1 }, - prisma: { - repo: { - findMany: vi.fn(), - }, - }, - }); - }, -})); - describe('searchApi temporal parameter handling', () => { describe('date utility integration', () => { beforeEach(() => { diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 38c658fc9..e07a7ef91 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -127,7 +127,7 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri return encodeURI(url + optionalQueryParams); } -export const search = async ({ query, matches, contextLines, whole, gitRevision, since, until, includeDeletedFiles }: SearchRequest): Promise => sew(() => +export const search = async ({ query, matches, contextLines, whole, gitRevision, since, until }: SearchRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { // If gitRevision is provided, append it to the query as a branch filter. if (gitRevision) { From 40194d6265fbb01c542f884f1acef4e8f3639cec Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 10:23:11 -0500 Subject: [PATCH 03/10] feat: implement safe activity filtering and refactor constants - Implement concurrency-limited activity filtering in getRepos with safety cap - Move activity filtering constants to @sourcebot/shared - Fix pagination logic to ensure accurate results while protecting server --- packages/shared/src/constants.ts | 3 ++ packages/web/src/actions.ts | 86 +++++++++++++++++++------------- 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts index c416d5878..5a3ba88ed 100644 --- a/packages/shared/src/constants.ts +++ b/packages/shared/src/constants.ts @@ -36,3 +36,6 @@ import path from "path"; export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index'); + +export const ACTIVITY_FILTER_MAX_SCAN_LIMIT = 200; +export const ACTIVITY_FILTER_CONCURRENCY_LIMIT = 10; diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 085ea6e62..99d8fb007 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -1,7 +1,18 @@ 'use server'; import { getAuditService } from "@/ee/features/audit/factory"; -import { env } from "@sourcebot/shared"; +import { + env, + REPOS_CACHE_DIR, + ACTIVITY_FILTER_MAX_SCAN_LIMIT, + ACTIVITY_FILTER_CONCURRENCY_LIMIT, + generateApiKey, + getTokenFromConfig, + hashSecret, + createLogger, + getPlan, + hasEntitlement +} from "@sourcebot/shared"; import { addUserToOrganization, orgHasAvailability } from "@/lib/authUtils"; import { ErrorCode } from "@/lib/errorCodes"; import { notAuthenticated, notFound, orgNotFound, ServiceError, ServiceErrorException, unexpectedError } from "@/lib/serviceError"; @@ -9,13 +20,10 @@ import { getOrgMetadata, isHttpError, isServiceError } from "@/lib/utils"; import { prisma } from "@/prisma"; import { render } from "@react-email/components"; import * as Sentry from '@sentry/nextjs'; -import { generateApiKey, getTokenFromConfig, hashSecret } from "@sourcebot/shared"; import { ApiKey, ConnectionSyncJobStatus, Org, OrgRole, Prisma, RepoIndexingJobStatus, RepoIndexingJobType, StripeSubscriptionStatus } from "@sourcebot/db"; -import { createLogger } from "@sourcebot/shared"; import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; -import { getPlan, hasEntitlement } from "@sourcebot/shared"; import { StatusCodes } from "http-status-codes"; import { cookies, headers } from "next/headers"; import { createTransport } from "nodemailer"; @@ -483,50 +491,60 @@ export const getRepos = async ({ orgId: org.id, ...where, }, - // Only apply take limit if NOT filtering by activity - // Otherwise, we'll apply it after activity filtering - take: shouldFilterByActivity ? undefined : take, + // If filtering by activity, fetch candidates up to the safety cap. + // Otherwise, apply the requested limit directly. + take: shouldFilterByActivity ? ACTIVITY_FILTER_MAX_SCAN_LIMIT : take, orderBy: { name: 'asc', // Consistent ordering for pagination }, }); - let filteredRepos = repos; + let filteredRepos: typeof repos = []; if (shouldFilterByActivity) { - // Filter repos by commit activity using git log - // Note: This requires repos to be cloned locally - const activityChecks = await Promise.all(repos.map(async (repo) => { - const result = await searchCommits({ - repoId: repo.id, - since: activeAfter, - until: activeBefore, - maxCount: 1, - }); - - // Handle successful result - if (Array.isArray(result)) { - return result.length > 0 ? repo : null; + // Process in chunks to limit concurrency + for (let i = 0; i < repos.length; i += ACTIVITY_FILTER_CONCURRENCY_LIMIT) { + // Stop if we have enough matches + if (take && filteredRepos.length >= take) { + break; } - // Handle ServiceError - const errorMessage = result.message ?? ''; - if (!errorMessage.includes('not found on Sourcebot server disk')) { - // Log unexpected errors, but not "repo not cloned yet" errors - console.error( - `Error checking activity for repo ${repo.id} (${repo.name}):`, - result - ); - } - return null; - })); + const chunk = repos.slice(i, i + ACTIVITY_FILTER_CONCURRENCY_LIMIT); + const activityChecks = await Promise.all(chunk.map(async (repo) => { + const result = await searchCommits({ + repoId: repo.id, + since: activeAfter, + until: activeBefore, + maxCount: 1, + }); + + // Handle successful result + if (Array.isArray(result)) { + return result.length > 0 ? repo : null; + } + + // Handle ServiceError + const errorMessage = result.message ?? ''; + if (!errorMessage.includes('not found on Sourcebot server disk')) { + // Log unexpected errors, but not "repo not cloned yet" errors + console.error( + `Error checking activity for repo ${repo.id} (${repo.name}):`, + result + ); + } + return null; + })); - filteredRepos = activityChecks.filter((r): r is typeof repos[0] => r !== null); + const activeInChunk = activityChecks.filter((r): r is typeof repos[0] => r !== null); + filteredRepos.push(...activeInChunk); + } - // Apply pagination after filtering + // Apply final limit if (take) { filteredRepos = filteredRepos.slice(0, take); } + } else { + filteredRepos = repos; } return filteredRepos.map((repo) => repositoryQuerySchema.parse({ From 66fb4add876a1c243953c6e09205783ca56461bb Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 10:25:26 -0500 Subject: [PATCH 04/10] feat: add auth guard to commits API route --- packages/web/src/app/api/(server)/commits/route.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts index ceba959e9..5cf913c5a 100644 --- a/packages/web/src/app/api/(server)/commits/route.ts +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -4,8 +4,9 @@ import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; import { schemaValidationError } from "@/lib/serviceError"; import { searchCommitsRequestSchema } from "@/features/search/schemas"; +import { withOptionalAuthV2 } from "@/withAuthV2"; -export const POST = async (request: NextRequest) => { +export const POST = async (request: NextRequest) => withOptionalAuthV2(async () => { const body = await request.json(); const parsed = await searchCommitsRequestSchema.safeParseAsync(body); @@ -22,4 +23,4 @@ export const POST = async (request: NextRequest) => { } return Response.json(response); -} +}); From b30bbd398532d16d9c9ab697febd9803cd825686 Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 12:06:25 -0500 Subject: [PATCH 05/10] feat: add dual repository identifier support to search_commits Add support for both numeric database IDs and string repository names in the search_commits tool, allowing users to directly use repository identifiers from list_repos output. **Implementation:** - Add resolveRepoId utility function to convert string repo names to numeric IDs via database lookup - Update SearchCommitsRequest interface to accept repoId: string | number - Wrap searchCommits in withOptionalAuthV2 for database access - Update MCP schema and API route schema with z.union([z.number(), z.string()]) **Testing:** - Add comprehensive unit tests for repository identifier resolution - Test both numeric ID and string name inputs - Test error handling for non-existent repositories - Verify database query parameters - All 29 tests passing **Documentation:** - Update README to document both identifier types - Update CHANGELOG with new functionality - Clear error messages guide users to use list_repos for valid identifiers This enhancement improves the user experience by eliminating the need to manually convert repository names to numeric IDs when using search_commits after list_repos. Signed-off-by: Wayne Sun --- packages/mcp/CHANGELOG.md | 9 +- packages/mcp/README.md | 2 +- packages/mcp/src/index.ts | 6 +- .../web/src/features/search/gitApi.test.ts | 121 ++++++++++++++++++ packages/web/src/features/search/gitApi.ts | 60 ++++++++- packages/web/src/features/search/schemas.ts | 2 +- 6 files changed, 185 insertions(+), 15 deletions(-) diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index a84483233..e05418faa 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added comprehensive relative date support for all temporal parameters (e.g., "30 days ago", "last week", "yesterday") -- Added `search_commits` tool to search commits by actual commit time with full temporal filtering +- Added `search_commits` tool to search commits by actual commit time with full temporal filtering. Accepts both numeric database IDs (e.g., 123) and string repository names (e.g., "github.com/owner/repo") for the `repoId` parameter, allowing direct use of repository names from `list_repos` output - Added `since`/`until` parameters to `search_code` (filters by index time - when Sourcebot indexed the repo) - Added `gitRevision` parameter to `search_code` - Added `activeAfter`/`activeBefore` parameters to `list_repos` (filters by commit time - actual git commit dates) @@ -21,12 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added comprehensive unit tests for date parsing utilities (90+ test cases) - Added unit tests for git commit search functionality with mocking - Added integration tests for temporal parameter validation - -### Changed -- Enhanced `list_repos` tool with better pagination feedback messages - -### Fixed -- Fixed `list_repos` pagination bug where `take` limit was applied before activity filtering, returning fewer results than requested +- Added unit tests for repository identifier resolution (both string and number types) ## [1.0.9] - 2025-11-17 diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 1a0c4987a..2e0ef4e72 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -230,7 +230,7 @@ Searches for commits in a specific repository based on actual commit time (NOT i | Name | Required | Description | |:-----------|:---------|:-----------------------------------------------------------------------------------------------| -| `repoId` | yes | Repository ID (obtain from `list_repos`). | +| `repoId` | yes | Repository identifier: either numeric database ID (e.g., 123) or full repository name (e.g., "github.com/owner/repo") as returned by `list_repos`. | | `query` | no | Search query to filter commits by message (case-insensitive). | | `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. | | `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. | diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 0a5cf3070..922f0b802 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -194,7 +194,11 @@ server.tool( If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, { - repoId: z.number().describe(`The ID of the repository to search in. Obtain this by calling 'list_repos' first.`), + repoId: z.union([z.number(), z.string()]).describe(`Repository identifier. Can be either: + - Numeric database ID (e.g., 123) + - Full repository name (e.g., "github.com/owner/repo") as returned by 'list_repos' + + **YOU MUST** call 'list_repos' first to obtain the repository identifier.`), query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(), since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(), until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(), diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts index 28a528253..e8bcb1fbc 100644 --- a/packages/web/src/features/search/gitApi.test.ts +++ b/packages/web/src/features/search/gitApi.test.ts @@ -27,6 +27,26 @@ vi.mock('@/actions', () => ({ } }, })); +// Create a mock findFirst function that we can configure per-test +const mockFindFirst = vi.fn(); + +vi.mock('@/withAuthV2', () => ({ + withOptionalAuthV2: async (fn: (args: any) => any) => { + // Mock withOptionalAuthV2 to provide org and prisma context + const mockOrg = { id: 1, name: 'test-org' }; + const mockPrisma = { + repo: { + findFirst: mockFindFirst, + }, + }; + return await fn({ org: mockOrg, prisma: mockPrisma }); + }, +})); +vi.mock('@/lib/utils', () => ({ + isServiceError: (obj: any) => { + return obj && typeof obj === 'object' && 'errorCode' in obj; + }, +})); // Import mocked modules import { simpleGit } from 'simple-git'; @@ -451,4 +471,105 @@ describe('searchCommits', () => { expect(message).toContain('cloning process may not be finished yet'); }); }); + + describe('repository identifier resolution', () => { + beforeEach(() => { + // Reset mockFindFirst before each test in this suite + mockFindFirst.mockReset(); + }); + + it('should accept numeric repository ID', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/123'); + // mockFindFirst should not be called for numeric IDs + expect(mockFindFirst).not.toHaveBeenCalled(); + }); + + it('should accept string repository name and resolve to numeric ID', async () => { + mockFindFirst.mockResolvedValue({ id: 456 }); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 'github.com/owner/repo', + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/456'); + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/owner/repo', + orgId: 1, + }, + select: { id: true }, + }); + }); + + it('should return error when string repository name is not found', async () => { + mockFindFirst.mockResolvedValue(null); + + const result = await searchCommits({ + repoId: 'github.com/nonexistent/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Repository "github.com/nonexistent/repo" not found'), + }); + expect(result).toMatchObject({ + message: expect.stringContaining('Use \'list_repos\' to get valid repository identifiers'), + }); + }); + + it('should query database with correct parameters for string repo name', async () => { + mockFindFirst.mockResolvedValue({ id: 789 }); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 'github.com/example/project', + }); + + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/example/project', + orgId: 1, + }, + select: { id: true }, + }); + }); + + it('should work with string repo name in full search scenario', async () => { + const mockCommits = [ + { + hash: 'xyz789', + date: '2024-06-20T10:00:00Z', + message: 'feat: new feature', + refs: 'main', + body: 'Added new functionality', + author_name: 'Developer', + author_email: 'dev@example.com', + }, + ]; + + mockFindFirst.mockResolvedValue({ id: 555 }); + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 'github.com/test/repository', + query: 'feature', + since: '7 days ago', + author: 'Developer', + }); + + expect(result).toEqual(mockCommits); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/555'); + }); + }); }); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts index 880bb0afb..a079bb4e3 100644 --- a/packages/web/src/features/search/gitApi.ts +++ b/packages/web/src/features/search/gitApi.ts @@ -5,6 +5,8 @@ import path from 'path'; import { ServiceError, unexpectedError } from '@/lib/serviceError'; import { sew } from '@/actions'; import { toGitDate, validateDateRange } from './dateUtils'; +import { withOptionalAuthV2 } from '@/withAuthV2'; +import { isServiceError } from '@/lib/utils'; const createGitClientForPath = (repoPath: string) => { return simpleGit({ @@ -17,8 +19,46 @@ const createGitClientForPath = (repoPath: string) => { }); } +/** + * Resolves a repository identifier to a numeric ID. + * Accepts both numeric IDs and string repository names. + * + * @param identifier - Either a numeric repo ID or a string repo name (e.g., "github.com/owner/repo") + * @param orgId - Organization ID to scope the lookup + * @param prisma - Prisma client instance + * @returns Numeric repository ID or ServiceError if not found + */ +const resolveRepoId = async ( + identifier: string | number, + orgId: number, + prisma: any +): Promise => { + // If already numeric, return as-is + if (typeof identifier === 'number') { + return identifier; + } + + // Convert string name to numeric ID + const repo = await prisma.repo.findFirst({ + where: { + name: identifier, + orgId: orgId, + }, + select: { id: true } + }); + + if (!repo) { + return unexpectedError( + `Repository "${identifier}" not found. ` + + `Use 'list_repos' to get valid repository identifiers.` + ); + } + + return repo.id; +} + export interface SearchCommitsRequest { - repoId: number; + repoId: string | number; query?: string; since?: string; until?: string; @@ -48,18 +88,28 @@ export interface Commit { * process might not be finished when this query is executed. If the repository * is not found on the server disk, an error will be returned. * + * **Repository ID**: Accepts either a numeric database ID or a string repository name + * (e.g., "github.com/owner/repo") as returned by list_repos. + * * @param request - Search parameters including timeframe filters * @returns Array of commits or ServiceError */ export const searchCommits = async ({ - repoId, + repoId: repoIdInput, query, since, until, author, maxCount = 50, -}: SearchCommitsRequest): Promise => sew(async () => { - const repoPath = path.join(REPOS_CACHE_DIR, repoId.toString()); +}: SearchCommitsRequest): Promise => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + // Resolve repository identifier to numeric ID + const repoId = await resolveRepoId(repoIdInput, org.id, prisma); + if (isServiceError(repoId)) { + return repoId; + } + + const repoPath = path.join(REPOS_CACHE_DIR, repoId.toString()); // Check if repository exists on Sourcebot server disk if (!existsSync(repoPath)) { @@ -143,4 +193,4 @@ export const searchCommits = async ({ ); } } -}); +})); diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index 47ff56bcd..94ef889c2 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -169,7 +169,7 @@ export const fileSourceResponseSchema = z.object({ }); export const searchCommitsRequestSchema = z.object({ - repoId: z.number(), + repoId: z.union([z.number(), z.string()]), query: z.string().optional(), since: z.string().optional(), until: z.string().optional(), From 1bdf681294640f9ec63d11d17ba0c0c068ed7be3 Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 13:11:21 -0500 Subject: [PATCH 06/10] fix: guard against env.DATA_CACHE_DIR being undefined Add guards to REPOS_CACHE_DIR and INDEX_CACHE_DIR constants to prevent module load errors when env.DATA_CACHE_DIR is undefined (e.g., when SKIP_ENV_VALIDATION=1 during builds). Use safe fallback paths (/tmp/sourcebot/*) that prevent path.join from being called with undefined, fixing the Test Web pipeline error: 'TypeError: The "path" argument must be of type string. Received undefined' Addresses PR review comment about guarding against undefined DATA_CACHE_DIR. Signed-off-by: Wayne Sun --- packages/shared/src/constants.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts index 5a3ba88ed..97174da63 100644 --- a/packages/shared/src/constants.ts +++ b/packages/shared/src/constants.ts @@ -34,8 +34,10 @@ export const DEFAULT_CONFIG_SETTINGS: ConfigSettings = { import { env } from "./env.server.js"; import path from "path"; -export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); -export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index'); +// Guard against env.DATA_CACHE_DIR being undefined (e.g., when SKIP_ENV_VALIDATION=1) +// Use fallback to prevent module load errors in non-runtime contexts like builds +export const REPOS_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'repos') : '/tmp/sourcebot/repos'; +export const INDEX_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'index') : '/tmp/sourcebot/index'; export const ACTIVITY_FILTER_MAX_SCAN_LIMIT = 200; export const ACTIVITY_FILTER_CONCURRENCY_LIMIT = 10; From 0071ca68bcdeb8537d247b09a5334c1fe36157f5 Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 13:13:55 -0500 Subject: [PATCH 07/10] fix: sync MCP schema with web schema for search requests Add missing optional fields to searchRequestSchema in MCP package to match the web schema at packages/web/src/features/search/schemas.ts: - gitRevision: git revision to search in - since: filter by index date (after this date) - until: filter by index date (before this date) This resolves the schema synchronization violation noted in the code header comment and addresses the PR review feedback about missing fields. Keeps both schemas in sync as required by the @NOTE comment at the top of the file. Signed-off-by: Wayne Sun --- packages/mcp/src/schemas.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index d8a43222b..b0948ee4b 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -30,6 +30,12 @@ export const searchRequestSchema = z.object({ contextLines: z.number().optional(), // Whether to return the whole file as part of the response. whole: z.boolean().optional(), + // The git revision to search in. + gitRevision: z.string().optional(), + // Filter by date. Only show results that were indexed after this date. + since: z.string().optional(), + // Filter by date. Only show results that were indexed before this date. + until: z.string().optional(), }); export const repositoryInfoSchema = z.object({ From f2d33abbd076ba5f1b2aec3d051c1038671c302b Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 13:39:23 -0500 Subject: [PATCH 08/10] fix: separate server-only constants and fix dateUtils import **Server Code Leak Fix:** - Move REPOS_CACHE_DIR and INDEX_CACHE_DIR to new constants.server.ts - Remove env.server.js import from constants.ts to prevent server dependencies from leaking into client bundles through index.client.js - Export server-only constants from index.server.ts but not index.client.ts - This prevents Webpack from trying to bundle Node.js modules (node:https, node:http, node:fs, node:net) into client code **Dynamic Import Fix:** - Change './dateUtils.js' to './dateUtils' in searchApi.ts dynamic import - Matches Next.js module resolution conventions used elsewhere in the codebase - Fixes "Module not found: Can't resolve './dateUtils.js'" build error **Root Cause:** The guard fix for env.DATA_CACHE_DIR added server-side imports to constants.ts, which was exported through index.client.ts, causing Google Cloud Secret Manager dependencies to leak into client bundles and fail the Docker build. This fix properly separates client-safe constants from server-only constants while maintaining the guard against undefined env.DATA_CACHE_DIR. Signed-off-by: Wayne Sun --- packages/shared/src/constants.server.ts | 7 +++++++ packages/shared/src/constants.ts | 8 -------- packages/shared/src/index.server.ts | 4 ++++ packages/web/src/features/search/searchApi.ts | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 packages/shared/src/constants.server.ts diff --git a/packages/shared/src/constants.server.ts b/packages/shared/src/constants.server.ts new file mode 100644 index 000000000..a9efd184b --- /dev/null +++ b/packages/shared/src/constants.server.ts @@ -0,0 +1,7 @@ +import { env } from "./env.server.js"; +import path from "path"; + +// Guard against env.DATA_CACHE_DIR being undefined (e.g., when SKIP_ENV_VALIDATION=1) +// Use fallback to prevent module load errors in non-runtime contexts like builds +export const REPOS_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'repos') : '/tmp/sourcebot/repos'; +export const INDEX_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'index') : '/tmp/sourcebot/index'; diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts index 97174da63..5a6176374 100644 --- a/packages/shared/src/constants.ts +++ b/packages/shared/src/constants.ts @@ -31,13 +31,5 @@ export const DEFAULT_CONFIG_SETTINGS: ConfigSettings = { experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours } -import { env } from "./env.server.js"; -import path from "path"; - -// Guard against env.DATA_CACHE_DIR being undefined (e.g., when SKIP_ENV_VALIDATION=1) -// Use fallback to prevent module load errors in non-runtime contexts like builds -export const REPOS_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'repos') : '/tmp/sourcebot/repos'; -export const INDEX_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'index') : '/tmp/sourcebot/index'; - export const ACTIVITY_FILTER_MAX_SCAN_LIMIT = 200; export const ACTIVITY_FILTER_CONCURRENCY_LIMIT = 10; diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index fabe608e7..4cb7bffbe 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -26,6 +26,10 @@ export { getConfigSettings, } from "./utils.js"; export * from "./constants.js"; +export { + REPOS_CACHE_DIR, + INDEX_CACHE_DIR, +} from "./constants.server.js"; export { env, resolveEnvironmentVariableOverridesFromConfig, diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index e07a7ef91..69dae3df3 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -138,7 +138,7 @@ export const search = async ({ query, matches, contextLines, whole, gitRevision, // Note: This filters by when the repo was last indexed by Sourcebot, // not by the actual commit time in the repository. if (since || until) { - const { toDbDate } = await import('./dateUtils.js'); + const { toDbDate } = await import('./dateUtils'); const sinceDate = toDbDate(since); const untilDate = toDbDate(until); From 40087d7d3d5f04864a48afd23adef690b3b3cc42 Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 13:54:53 -0500 Subject: [PATCH 09/10] fix: remove unused import and replace any types with proper types **ESLint/TypeScript Fixes:** - Remove unused REPOS_CACHE_DIR import from actions.ts - Replace `any` types in gitApi.test.ts with proper generic types: - Use generic function signatures for mock implementations - Use `unknown` for type guards (matches actual isServiceError signature) - Use `as unknown as vi.Mock` for mocked functions - Use explicit object types instead of `any` for type assertions - Replace `any` types in gitApi.ts with proper types: - Import PrismaClient type from @sourcebot/db - Type prisma parameter as PrismaClient in resolveRepoId - Type logOptions as Record **Why These Changes:** - Follows TypeScript best practices for type safety - Matches the actual type signatures used in the codebase - Satisfies @typescript-eslint/no-explicit-any linting rule - Maintains type safety while allowing proper mocking in tests Fixes the CI build failures from Next.js linting step. Signed-off-by: Wayne Sun --- packages/web/src/actions.ts | 1 - packages/web/src/features/search/gitApi.test.ts | 16 ++++++++-------- packages/web/src/features/search/gitApi.ts | 5 +++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 99d8fb007..710b973c3 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -3,7 +3,6 @@ import { getAuditService } from "@/ee/features/audit/factory"; import { env, - REPOS_CACHE_DIR, ACTIVITY_FILTER_MAX_SCAN_LIMIT, ACTIVITY_FILTER_CONCURRENCY_LIMIT, generateApiKey, diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts index e8bcb1fbc..1c4a46a58 100644 --- a/packages/web/src/features/search/gitApi.test.ts +++ b/packages/web/src/features/search/gitApi.test.ts @@ -15,7 +15,7 @@ vi.mock('@/lib/serviceError', () => ({ }), })); vi.mock('@/actions', () => ({ - sew: async (fn: () => any) => { + sew: async (fn: () => Promise | T): Promise => { try { return await fn(); } catch (error) { @@ -23,7 +23,7 @@ vi.mock('@/actions', () => ({ return { errorCode: 'UNEXPECTED_ERROR', message: error instanceof Error ? error.message : String(error), - }; + } as T; } }, })); @@ -31,7 +31,7 @@ vi.mock('@/actions', () => ({ const mockFindFirst = vi.fn(); vi.mock('@/withAuthV2', () => ({ - withOptionalAuthV2: async (fn: (args: any) => any) => { + withOptionalAuthV2: async (fn: (args: { org: { id: number; name: string }; prisma: unknown }) => Promise): Promise => { // Mock withOptionalAuthV2 to provide org and prisma context const mockOrg = { id: 1, name: 'test-org' }; const mockPrisma = { @@ -43,8 +43,8 @@ vi.mock('@/withAuthV2', () => ({ }, })); vi.mock('@/lib/utils', () => ({ - isServiceError: (obj: any) => { - return obj && typeof obj === 'object' && 'errorCode' in obj; + isServiceError: (obj: unknown): obj is { errorCode: string } => { + return obj !== null && typeof obj === 'object' && 'errorCode' in obj; }, })); @@ -54,8 +54,8 @@ import { existsSync } from 'fs'; describe('searchCommits', () => { const mockGitLog = vi.fn(); - const mockSimpleGit = simpleGit as any; - const mockExistsSync = existsSync as any; + const mockSimpleGit = simpleGit as unknown as vi.Mock; + const mockExistsSync = existsSync as unknown as vi.Mock; beforeEach(() => { vi.clearAllMocks(); @@ -465,7 +465,7 @@ describe('searchCommits', () => { errorCode: 'UNEXPECTED_ERROR', }); expect(result).toHaveProperty('message'); - const message = (result as any).message; + const message = (result as { message: string }).message; expect(message).toContain('999'); expect(message).toContain('not found on Sourcebot server disk'); expect(message).toContain('cloning process may not be finished yet'); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts index a079bb4e3..e6e1269f3 100644 --- a/packages/web/src/features/search/gitApi.ts +++ b/packages/web/src/features/search/gitApi.ts @@ -7,6 +7,7 @@ import { sew } from '@/actions'; import { toGitDate, validateDateRange } from './dateUtils'; import { withOptionalAuthV2 } from '@/withAuthV2'; import { isServiceError } from '@/lib/utils'; +import type { PrismaClient } from '@sourcebot/db'; const createGitClientForPath = (repoPath: string) => { return simpleGit({ @@ -31,7 +32,7 @@ const createGitClientForPath = (repoPath: string) => { const resolveRepoId = async ( identifier: string | number, orgId: number, - prisma: any + prisma: PrismaClient ): Promise => { // If already numeric, return as-is if (typeof identifier === 'number') { @@ -134,7 +135,7 @@ export const searchCommits = async ({ const git = createGitClientForPath(repoPath); try { - const logOptions: any = { + const logOptions: Record = { maxCount, }; From 602c53cad772ae3fd80fa0edebdae71827269eae Mon Sep 17 00:00:00 2001 From: Wayne Sun Date: Fri, 21 Nov 2025 14:06:48 -0500 Subject: [PATCH 10/10] fix: add explicit Response return type to commits API route Add Promise return type annotation to the POST function in the commits API route to satisfy Next.js type checking requirements. Next.js route handlers must return Promise, and the explicit return type annotation ensures TypeScript understands that all code paths (including those through withOptionalAuthV2) return Response objects via serviceErrorResponse() or Response.json(). This fixes the type error: 'Promise' is not assignable to 'Promise' Signed-off-by: Wayne Sun --- .../web/src/app/api/(server)/commits/route.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts index 5cf913c5a..3decdb7f7 100644 --- a/packages/web/src/app/api/(server)/commits/route.ts +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -1,12 +1,11 @@ import { searchCommits, SearchCommitsRequest } from "@/features/search/gitApi"; -import { serviceErrorResponse } from "@/lib/serviceError"; +import { serviceErrorResponse, schemaValidationError } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; -import { schemaValidationError } from "@/lib/serviceError"; import { searchCommitsRequestSchema } from "@/features/search/schemas"; import { withOptionalAuthV2 } from "@/withAuthV2"; -export const POST = async (request: NextRequest) => withOptionalAuthV2(async () => { +export async function POST(request: NextRequest): Promise { const body = await request.json(); const parsed = await searchCommitsRequestSchema.safeParseAsync(body); @@ -16,11 +15,13 @@ export const POST = async (request: NextRequest) => withOptionalAuthV2(async () ); } - const response = await searchCommits(parsed.data as SearchCommitsRequest); + const result = await withOptionalAuthV2(async () => { + return await searchCommits(parsed.data as SearchCommitsRequest); + }); - if (isServiceError(response)) { - return serviceErrorResponse(response); + if (isServiceError(result)) { + return serviceErrorResponse(result); } - return Response.json(response); -}); + return Response.json(result); +}