diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index b11f51020..0cdc4eb48 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -1,6 +1,5 @@ import { CodeHostType } from "@sourcebot/db"; -import { env } from "@sourcebot/shared"; -import path from "path"; + export const SINGLE_TENANT_ORG_ID = 1; @@ -9,8 +8,7 @@ export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES: CodeHostType[] = [ 'gitlab', ]; -export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); -export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index'); +export { REPOS_CACHE_DIR, INDEX_CACHE_DIR } from "@sourcebot/shared"; // Maximum time to wait for current job to finish export const GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS = 5 * 1000; // 5 seconds diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 06f9e2a4a..e05418faa 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added comprehensive relative date support for all temporal parameters (e.g., "30 days ago", "last week", "yesterday") +- Added `search_commits` tool to search commits by actual commit time with full temporal filtering. Accepts both numeric database IDs (e.g., 123) and string repository names (e.g., "github.com/owner/repo") for the `repoId` parameter, allowing direct use of repository names from `list_repos` output +- Added `since`/`until` parameters to `search_code` (filters by index time - when Sourcebot indexed the repo) +- Added `gitRevision` parameter to `search_code` +- Added `activeAfter`/`activeBefore` parameters to `list_repos` (filters by commit time - actual git commit dates) +- Added date range validation to prevent invalid date ranges (since > until) +- Added 30-second timeout for git operations to handle large repositories +- Added enhanced error messages for git operations (timeout, repository not found, invalid git repository, ambiguous arguments) +- Added clarification that repositories must be cloned on Sourcebot server disk for `search_commits` to work +- Added comprehensive temporal parameter documentation to README with clear distinction between index time and commit time filtering +- Added comprehensive unit tests for date parsing utilities (90+ test cases) +- Added unit tests for git commit search functionality with mocking +- Added integration tests for temporal parameter validation +- Added unit tests for repository identifier resolution (both string and number types) + ## [1.0.9] - 2025-11-17 ### Added diff --git a/packages/mcp/README.md b/packages/mcp/README.md index a0a875a0f..2e0ef4e72 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -166,6 +166,8 @@ For a more detailed guide, checkout [the docs](https://docs.sourcebot.dev/docs/f Fetches code that matches the provided regex pattern in `query`. +**Temporal Filtering**: Use `since` and `until` to filter by repository index time (when Sourcebot last indexed the repo). This is different from commit time. See `search_commits` for commit-time filtering. +
Parameters @@ -176,6 +178,9 @@ Fetches code that matches the provided regex pattern in `query`. | `filterByLanguages` | no | Restrict search to specific languages (GitHub linguist format, e.g., Python, JavaScript). | | `caseSensitive` | no | Case sensitive search (default: false). | | `includeCodeSnippets` | no | Include code snippets in results (default: false). | +| `gitRevision` | no | Git revision to search (e.g., 'main', 'develop', 'v1.0.0'). Defaults to HEAD. | +| `since` | no | Only search repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | +| `until` | no | Only search repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). | | `maxTokens` | no | Max tokens to return (default: env.DEFAULT_MINIMUM_TOKENS). |
@@ -184,14 +189,18 @@ Fetches code that matches the provided regex pattern in `query`. Lists repositories indexed by Sourcebot with optional filtering and pagination. +**Temporal Filtering**: Use `activeAfter` and `activeBefore` to filter repositories by actual commit activity (requires repositories to be cloned locally). +
Parameters -| Name | Required | Description | -|:-------------|:---------|:--------------------------------------------------------------------| -| `query` | no | Filter repositories by name (case-insensitive). | -| `pageNumber` | no | Page number (1-indexed, default: 1). | -| `limit` | no | Number of repositories per page (default: 50). | +| Name | Required | Description | +|:----------------|:---------|:-----------------------------------------------------------------------------------------------| +| `query` | no | Filter repositories by name (case-insensitive). | +| `pageNumber` | no | Page number (1-indexed, default: 1). | +| `limit` | no | Number of repositories per page (default: 50). | +| `activeAfter` | no | Only return repos with commits after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). | +| `activeBefore` | no | Only return repos with commits before this date. Supports ISO 8601 or relative (e.g., "yesterday"). |
@@ -208,6 +217,39 @@ Fetches the source code for a given file. | `repoId` | yes | The Sourcebot repository ID. | +### search_commits + +Searches for commits in a specific repository based on actual commit time (NOT index time). + +**Requirements**: Repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. Use `list_repos` first to get the repository ID. + +**Date Formats**: Supports ISO 8601 dates (e.g., "2024-01-01") and relative formats (e.g., "30 days ago", "last week", "yesterday"). + +
+Parameters + +| Name | Required | Description | +|:-----------|:---------|:-----------------------------------------------------------------------------------------------| +| `repoId` | yes | Repository identifier: either numeric database ID (e.g., 123) or full repository name (e.g., "github.com/owner/repo") as returned by `list_repos`. | +| `query` | no | Search query to filter commits by message (case-insensitive). | +| `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. | +| `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. | +| `author` | no | Filter by author name or email (supports partial matches). | +| `maxCount` | no | Maximum number of commits to return (default: 50). | + +
+ +## Date Format Examples + +All temporal parameters support: +- **ISO 8601**: `"2024-01-01"`, `"2024-12-31T23:59:59Z"` +- **Relative dates**: `"30 days ago"`, `"1 week ago"`, `"last month"`, `"yesterday"` + +**Important**: `search_code` and `list_repos` temporal filters work differently: +- `search_code` `since`/`until`: Filters by **index time** (when Sourcebot indexed the repo) +- `list_repos` `activeAfter`/`activeBefore`: Filters by **commit time** (actual git commit dates) +- `search_commits` `since`/`until`: Filters by **commit time** (actual git commit dates) + ## Supported Code Hosts Sourcebot supports the following code hosts: diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 3754c605f..fe3995c32 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js'; -import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js'; +import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema, searchCommitsResponseSchema } from './schemas.js'; +import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError, SearchCommitsRequest, SearchCommitsResponse } from './types.js'; import { isServiceError } from './utils.js'; export const search = async (request: SearchRequest): Promise => { @@ -21,8 +21,16 @@ export const search = async (request: SearchRequest): Promise => { - const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, { +export const listRepos = async (params?: { activeAfter?: string, activeBefore?: string }): Promise => { + const url = new URL(`${env.SOURCEBOT_HOST}/api/repos`); + if (params?.activeAfter) { + url.searchParams.append('activeAfter', params.activeAfter); + } + if (params?.activeBefore) { + url.searchParams.append('activeBefore', params.activeBefore); + } + + const result = await fetch(url.toString(), { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -55,3 +63,21 @@ export const getFileSource = async (request: FileSourceRequest): Promise => { + const result = await fetch(`${env.SOURCEBOT_HOST}/api/commits`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Org-Domain': '~', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + body: JSON.stringify(request) + }).then(response => response.json()); + + if (isServiceError(result)) { + return result; + } + + return searchCommitsResponseSchema.parse(result); +} diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 3e4750a72..922f0b802 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { listRepos, search, getFileSource } from './client.js'; +import { listRepos, search, getFileSource, searchCommits } from './client.js'; import { env, numberSchema } from './env.js'; import { listReposRequestSchema } from './schemas.js'; import { TextContent } from './types.js'; @@ -49,6 +49,18 @@ server.tool( .boolean() .describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`) .optional(), + gitRevision: z + .string() + .describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), + since: z + .string() + .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed after this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week', 'yesterday').`) + .optional(), + until: z + .string() + .describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`) + .optional(), maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -61,6 +73,9 @@ server.tool( maxTokens = env.DEFAULT_MINIMUM_TOKENS, includeCodeSnippets = false, caseSensitive = false, + gitRevision, + since, + until, }) => { if (repoIds.length > 0) { query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`; @@ -80,6 +95,9 @@ server.tool( query, matches: env.DEFAULT_MATCHES, contextLines: env.DEFAULT_CONTEXT_LINES, + gitRevision, + since, + until, }); if (isServiceError(response)) { @@ -164,22 +182,102 @@ server.tool( } ); +server.tool( + "search_commits", + `Searches for commits in a specific repository based on actual commit time (NOT index time). + + **Requirements**: The repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. If the repository is not found on the server disk, an error will be returned asking you to try again later. + + **Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") or relative formats (e.g., "30 days ago", "last week", "yesterday"). + + **YOU MUST** call 'list_repos' first to obtain the exact repository ID. + + If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + repoId: z.union([z.number(), z.string()]).describe(`Repository identifier. Can be either: + - Numeric database ID (e.g., 123) + - Full repository name (e.g., "github.com/owner/repo") as returned by 'list_repos' + + **YOU MUST** call 'list_repos' first to obtain the repository identifier.`), + query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(), + since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(), + until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(), + author: z.string().describe(`Filter commits by author name or email (supports partial matches and patterns).`).optional(), + maxCount: z.number().describe(`Maximum number of commits to return (default: 50).`).optional(), + }, + async ({ repoId, query, since, until, author, maxCount }) => { + const result = await searchCommits({ + repoId, + query, + since, + until, + author, + maxCount, + }); + + if (isServiceError(result)) { + return { + content: [{ type: "text", text: `Error: ${result.message}` }], + isError: true, + }; + } + + return { + content: [{ type: "text", text: JSON.stringify(result, null, 2) }], + }; + } +); + server.tool( "list_repos", - "Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.", - listReposRequestSchema.shape, - async ({ query, pageNumber = 1, limit = 50 }: { + `Lists repositories in the organization with optional filtering and pagination. + + **Temporal Filtering**: When using 'activeAfter' or 'activeBefore', only repositories with commits in the specified timeframe are returned. This requires repositories to be cloned locally. Repositories not on disk will be silently excluded. + + **Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") and relative dates (e.g., "30 days ago", "last week", "yesterday"). + + If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + query: z + .string() + .describe("Filter repositories by name (case-insensitive).") + .optional(), + pageNumber: z + .number() + .int() + .positive() + .describe("Page number (1-indexed, default: 1)") + .default(1), + limit: z + .number() + .int() + .positive() + .describe("Number of repositories per page (default: 50)") + .default(50), + activeAfter: z + .string() + .describe("Only return repositories with commits after this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').") + .optional(), + activeBefore: z + .string() + .describe("Only return repositories with commits before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').") + .optional(), + }, + async ({ query, pageNumber = 1, limit = 50, activeAfter, activeBefore }: { query?: string; pageNumber?: number; limit?: number; + activeAfter?: string; + activeBefore?: string; }) => { - const response = await listRepos(); + const response = await listRepos({ activeAfter, activeBefore }); if (isServiceError(response)) { return { content: [{ type: "text", text: `Error listing repositories: ${response.message}`, }], + isError: true, }; } @@ -196,30 +294,54 @@ server.tool( // Sort alphabetically for consistent pagination filtered.sort((a, b) => a.repoName.localeCompare(b.repoName)); + // Calculate total count before pagination + const totalCount = filtered.length; + // Apply pagination const startIndex = (pageNumber - 1) * limit; const endIndex = startIndex + limit; const paginated = filtered.slice(startIndex, endIndex); // Format output - const content: TextContent[] = paginated.map(repo => { - return { - type: "text", - text: `id: ${repo.repoName}\nurl: ${repo.webUrl}`, - } - }); + const content: TextContent[] = []; - // Add pagination info - if (content.length === 0 && filtered.length > 0) { + if (paginated.length === 0 && totalCount > 0) { + // User requested a page beyond available results + const totalPages = Math.ceil(totalCount / limit); content.push({ type: "text", - text: `No results on page ${pageNumber}. Total matching repositories: ${filtered.length}`, + text: `No results on page ${pageNumber}. Total matching repositories: ${totalCount} (${totalPages} page${totalPages !== 1 ? 's' : ''}). Try pageNumber between 1 and ${totalPages}.`, }); - } else if (filtered.length > endIndex) { + } else if (paginated.length === 0) { + // No repositories match the filters content.push({ type: "text", - text: `Showing ${paginated.length} repositories (page ${pageNumber}). Total matching: ${filtered.length}. Use pageNumber ${pageNumber + 1} to see more.`, + text: `No repositories found matching the specified criteria.${activeAfter || activeBefore ? ' Note: Temporal filtering requires repositories to be cloned locally.' : ''}`, }); + } else { + // Add repository listings + content.push(...paginated.map(repo => ({ + type: "text" as const, + text: `id: ${repo.repoName}\nurl: ${repo.webUrl}${repo.indexedAt ? `\nlast_indexed: ${repo.indexedAt}` : ''}`, + }))); + + // Add pagination info if there are more results + const totalPages = Math.ceil(totalCount / limit); + if (totalPages > 1) { + const paginationInfo = [ + `Page ${pageNumber} of ${totalPages}`, + `Showing ${paginated.length} of ${totalCount} repositories`, + ]; + + if (pageNumber < totalPages) { + paginationInfo.push(`Use pageNumber=${pageNumber + 1} to see more`); + } + + content.push({ + type: "text", + text: `\n--- ${paginationInfo.join(' | ')} ---`, + }); + } } return { diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index ba46b2f15..b0948ee4b 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -30,6 +30,12 @@ export const searchRequestSchema = z.object({ contextLines: z.number().optional(), // Whether to return the whole file as part of the response. whole: z.boolean().optional(), + // The git revision to search in. + gitRevision: z.string().optional(), + // Filter by date. Only show results that were indexed after this date. + since: z.string().optional(), + // Filter by date. Only show results that were indexed before this date. + until: z.string().optional(), }); export const repositoryInfoSchema = z.object({ @@ -191,3 +197,24 @@ export const serviceErrorSchema = z.object({ errorCode: z.string(), message: z.string(), }); + +export const searchCommitsRequestSchema = z.object({ + repoId: z.number(), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); + +export const commitSchema = z.object({ + hash: z.string(), + date: z.string(), + message: z.string(), + refs: z.string(), + body: z.string(), + author_name: z.string(), + author_email: z.string(), +}); + +export const searchCommitsResponseSchema = z.array(commitSchema); diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 9c858fe5b..880addfe4 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -10,6 +10,9 @@ import { fileSourceRequestSchema, symbolSchema, serviceErrorSchema, + searchCommitsRequestSchema, + searchCommitsResponseSchema, + commitSchema, } from "./schemas.js"; import { z } from "zod"; @@ -29,3 +32,7 @@ export type FileSourceResponse = z.infer; export type TextContent = { type: "text", text: string }; export type ServiceError = z.infer; + +export type SearchCommitsRequest = z.infer; +export type SearchCommitsResponse = z.infer; +export type Commit = z.infer; diff --git a/packages/shared/src/constants.server.ts b/packages/shared/src/constants.server.ts new file mode 100644 index 000000000..a9efd184b --- /dev/null +++ b/packages/shared/src/constants.server.ts @@ -0,0 +1,7 @@ +import { env } from "./env.server.js"; +import path from "path"; + +// Guard against env.DATA_CACHE_DIR being undefined (e.g., when SKIP_ENV_VALIDATION=1) +// Use fallback to prevent module load errors in non-runtime contexts like builds +export const REPOS_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'repos') : '/tmp/sourcebot/repos'; +export const INDEX_CACHE_DIR = env.DATA_CACHE_DIR ? path.join(env.DATA_CACHE_DIR, 'index') : '/tmp/sourcebot/index'; diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts index 42e1aa694..5a6176374 100644 --- a/packages/shared/src/constants.ts +++ b/packages/shared/src/constants.ts @@ -30,3 +30,6 @@ export const DEFAULT_CONFIG_SETTINGS: ConfigSettings = { experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours } + +export const ACTIVITY_FILTER_MAX_SCAN_LIMIT = 200; +export const ACTIVITY_FILTER_CONCURRENCY_LIMIT = 10; diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index fabe608e7..4cb7bffbe 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -26,6 +26,10 @@ export { getConfigSettings, } from "./utils.js"; export * from "./constants.js"; +export { + REPOS_CACHE_DIR, + INDEX_CACHE_DIR, +} from "./constants.server.js"; export { env, resolveEnvironmentVariableOverridesFromConfig, diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index e194f808a..710b973c3 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -1,7 +1,17 @@ 'use server'; import { getAuditService } from "@/ee/features/audit/factory"; -import { env } from "@sourcebot/shared"; +import { + env, + ACTIVITY_FILTER_MAX_SCAN_LIMIT, + ACTIVITY_FILTER_CONCURRENCY_LIMIT, + generateApiKey, + getTokenFromConfig, + hashSecret, + createLogger, + getPlan, + hasEntitlement +} from "@sourcebot/shared"; import { addUserToOrganization, orgHasAvailability } from "@/lib/authUtils"; import { ErrorCode } from "@/lib/errorCodes"; import { notAuthenticated, notFound, orgNotFound, ServiceError, ServiceErrorException, unexpectedError } from "@/lib/serviceError"; @@ -9,13 +19,10 @@ import { getOrgMetadata, isHttpError, isServiceError } from "@/lib/utils"; import { prisma } from "@/prisma"; import { render } from "@react-email/components"; import * as Sentry from '@sentry/nextjs'; -import { generateApiKey, getTokenFromConfig, hashSecret } from "@sourcebot/shared"; import { ApiKey, ConnectionSyncJobStatus, Org, OrgRole, Prisma, RepoIndexingJobStatus, RepoIndexingJobType, StripeSubscriptionStatus } from "@sourcebot/db"; -import { createLogger } from "@sourcebot/shared"; import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; -import { getPlan, hasEntitlement } from "@sourcebot/shared"; import { StatusCodes } from "http-status-codes"; import { cookies, headers } from "next/headers"; import { createTransport } from "nodemailer"; @@ -312,12 +319,12 @@ export const createApiKey = async (name: string, domain: string): Promise<{ key: withAuth((userId) => withOrgMembership(userId, domain, async ({ org, userRole }) => { if (env.EXPERIMENT_DISABLE_API_KEY_CREATION_FOR_NON_ADMIN_USERS === 'true' && userRole !== OrgRole.OWNER) { - logger.error(`API key creation is disabled for non-admin users. User ${userId} is not an owner.`); - return { - statusCode: StatusCodes.FORBIDDEN, - errorCode: ErrorCode.INSUFFICIENT_PERMISSIONS, - message: "API key creation is disabled for non-admin users.", - } satisfies ServiceError; + logger.error(`API key creation is disabled for non-admin users. User ${userId} is not an owner.`); + return { + statusCode: StatusCodes.FORBIDDEN, + errorCode: ErrorCode.INSUFFICIENT_PERMISSIONS, + message: "API key creation is disabled for non-admin users.", + } satisfies ServiceError; } const existingApiKey = await prisma.apiKey.findFirst({ @@ -460,23 +467,86 @@ export const getUserApiKeys = async (domain: string): Promise<{ name: string; cr })); }))); +import { searchCommits } from "./features/search/gitApi"; + export const getRepos = async ({ where, take, + activeAfter, + activeBefore, }: { where?: Prisma.RepoWhereInput, - take?: number + take?: number, + activeAfter?: string, + activeBefore?: string, } = {}) => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { + // When filtering by activity, we need to fetch all repos first, + // then apply pagination after filtering + const shouldFilterByActivity = activeAfter || activeBefore; + const repos = await prisma.repo.findMany({ where: { orgId: org.id, ...where, }, - take, + // If filtering by activity, fetch candidates up to the safety cap. + // Otherwise, apply the requested limit directly. + take: shouldFilterByActivity ? ACTIVITY_FILTER_MAX_SCAN_LIMIT : take, + orderBy: { + name: 'asc', // Consistent ordering for pagination + }, }); - return repos.map((repo) => repositoryQuerySchema.parse({ + let filteredRepos: typeof repos = []; + + if (shouldFilterByActivity) { + // Process in chunks to limit concurrency + for (let i = 0; i < repos.length; i += ACTIVITY_FILTER_CONCURRENCY_LIMIT) { + // Stop if we have enough matches + if (take && filteredRepos.length >= take) { + break; + } + + const chunk = repos.slice(i, i + ACTIVITY_FILTER_CONCURRENCY_LIMIT); + const activityChecks = await Promise.all(chunk.map(async (repo) => { + const result = await searchCommits({ + repoId: repo.id, + since: activeAfter, + until: activeBefore, + maxCount: 1, + }); + + // Handle successful result + if (Array.isArray(result)) { + return result.length > 0 ? repo : null; + } + + // Handle ServiceError + const errorMessage = result.message ?? ''; + if (!errorMessage.includes('not found on Sourcebot server disk')) { + // Log unexpected errors, but not "repo not cloned yet" errors + console.error( + `Error checking activity for repo ${repo.id} (${repo.name}):`, + result + ); + } + return null; + })); + + const activeInChunk = activityChecks.filter((r): r is typeof repos[0] => r !== null); + filteredRepos.push(...activeInChunk); + } + + // Apply final limit + if (take) { + filteredRepos = filteredRepos.slice(0, take); + } + } else { + filteredRepos = repos; + } + + return filteredRepos.map((repo) => repositoryQuerySchema.parse({ codeHostType: repo.external_codeHostType, repoId: repo.id, repoName: repo.name, diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts new file mode 100644 index 000000000..3decdb7f7 --- /dev/null +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -0,0 +1,27 @@ +import { searchCommits, SearchCommitsRequest } from "@/features/search/gitApi"; +import { serviceErrorResponse, schemaValidationError } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { NextRequest } from "next/server"; +import { searchCommitsRequestSchema } from "@/features/search/schemas"; +import { withOptionalAuthV2 } from "@/withAuthV2"; + +export async function POST(request: NextRequest): Promise { + const body = await request.json(); + const parsed = await searchCommitsRequestSchema.safeParseAsync(body); + + if (!parsed.success) { + return serviceErrorResponse( + schemaValidationError(parsed.error) + ); + } + + const result = await withOptionalAuthV2(async () => { + return await searchCommits(parsed.data as SearchCommitsRequest); + }); + + if (isServiceError(result)) { + return serviceErrorResponse(result); + } + + return Response.json(result); +} diff --git a/packages/web/src/app/api/(server)/repos/route.ts b/packages/web/src/app/api/(server)/repos/route.ts index acc3f9ce0..e74e5090e 100644 --- a/packages/web/src/app/api/(server)/repos/route.ts +++ b/packages/web/src/app/api/(server)/repos/route.ts @@ -2,12 +2,20 @@ import { getRepos } from "@/actions"; import { serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { GetReposResponse } from "@/lib/types"; +import { NextRequest } from "next/server"; -export const GET = async () => { - const response: GetReposResponse = await getRepos(); - if (isServiceError(response)) { - return serviceErrorResponse(response); +export const GET = async (request: NextRequest) => { + const searchParams = request.nextUrl.searchParams; + const activeAfter = searchParams.get('activeAfter') || undefined; + const activeBefore = searchParams.get('activeBefore') || undefined; + + const repos: GetReposResponse = await getRepos({ + activeAfter, + activeBefore, + }); + if (isServiceError(repos)) { + return serviceErrorResponse(repos); } - return Response.json(response); + return Response.json(repos); } diff --git a/packages/web/src/features/search/dateUtils.test.ts b/packages/web/src/features/search/dateUtils.test.ts new file mode 100644 index 000000000..a64a83c14 --- /dev/null +++ b/packages/web/src/features/search/dateUtils.test.ts @@ -0,0 +1,379 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { + parseTemporalDate, + validateDateRange, + toDbDate, + toGitDate, +} from './dateUtils'; + +describe('dateUtils', () => { + // Mock the current time for consistent testing + const MOCK_NOW = new Date('2024-06-15T12:00:00.000Z'); + + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(MOCK_NOW); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe('parseTemporalDate', () => { + describe('ISO 8601 dates', () => { + it('should parse ISO date (YYYY-MM-DD)', () => { + const result = parseTemporalDate('2024-01-01'); + expect(result).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should parse ISO datetime with timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00.000Z'); + }); + + it('should parse ISO datetime without timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00'); + expect(result).toBeDefined(); + expect(result).toContain('2024-01-01'); + }); + + it('should return undefined for undefined input', () => { + const result = parseTemporalDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = parseTemporalDate(''); + expect(result).toBeUndefined(); + }); + }); + + describe('relative dates - yesterday', () => { + it('should parse "yesterday"', () => { + const result = parseTemporalDate('yesterday'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "YESTERDAY" (case insensitive)', () => { + const result = parseTemporalDate('YESTERDAY'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + }); + + describe('relative dates - N units ago', () => { + it('should parse "1 day ago"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "30 days ago"', () => { + const result = parseTemporalDate('30 days ago'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should parse "1 week ago"', () => { + const result = parseTemporalDate('1 week ago'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "2 weeks ago"', () => { + const result = parseTemporalDate('2 weeks ago'); + expect(result).toBe('2024-06-01T12:00:00.000Z'); + }); + + it('should parse "1 month ago"', () => { + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "3 months ago"', () => { + const result = parseTemporalDate('3 months ago'); + expect(result).toBe('2024-03-15T12:00:00.000Z'); + }); + + it('should parse "1 year ago"', () => { + const result = parseTemporalDate('1 year ago'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should parse "2 hours ago"', () => { + const result = parseTemporalDate('2 hours ago'); + expect(result).toBe('2024-06-15T10:00:00.000Z'); + }); + + it('should parse "30 minutes ago"', () => { + const result = parseTemporalDate('30 minutes ago'); + expect(result).toBe('2024-06-15T11:30:00.000Z'); + }); + + it('should parse "45 seconds ago"', () => { + const result = parseTemporalDate('45 seconds ago'); + expect(result).toBe('2024-06-15T11:59:15.000Z'); + }); + + it('should handle singular "day" without "s"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('30 DAYS AGO'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + }); + + describe('relative dates - last unit', () => { + it('should parse "last week"', () => { + const result = parseTemporalDate('last week'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "last month"', () => { + const result = parseTemporalDate('last month'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "last year"', () => { + const result = parseTemporalDate('last year'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('LAST WEEK'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('invalid or unknown formats', () => { + it('should return original string for unrecognized format', () => { + const result = parseTemporalDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should return original string for git-specific formats', () => { + // Git understands these but our parser doesn't convert them + const result = parseTemporalDate('2 weeks 3 days ago'); + expect(result).toBe('2 weeks 3 days ago'); + }); + }); + }); + + describe('validateDateRange', () => { + it('should return null for valid date range', () => { + const error = validateDateRange('2024-01-01', '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when only since is provided', () => { + const error = validateDateRange('2024-01-01', undefined); + expect(error).toBeNull(); + }); + + it('should return null when only until is provided', () => { + const error = validateDateRange(undefined, '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when both are undefined', () => { + const error = validateDateRange(undefined, undefined); + expect(error).toBeNull(); + }); + + it('should return error when since > until', () => { + const error = validateDateRange('2024-12-31', '2024-01-01'); + expect(error).toContain('since'); + expect(error).toContain('until'); + expect(error).toContain('before'); + }); + + it('should validate relative dates', () => { + const error = validateDateRange('30 days ago', '1 day ago'); + expect(error).toBeNull(); + }); + + it('should return error for invalid relative date range', () => { + const error = validateDateRange('1 day ago', '30 days ago'); + expect(error).toContain('since'); + expect(error).toContain('until'); + }); + + it('should handle mixed ISO and relative dates', () => { + const error = validateDateRange('2024-01-01', '30 days ago'); + expect(error).toBeNull(); // 2024-01-01 is before 30 days ago + }); + + it('should return null for same date', () => { + const error = validateDateRange('2024-06-15', '2024-06-15'); + expect(error).toBeNull(); + }); + }); + + describe('toDbDate', () => { + it('should convert ISO date to Date object', () => { + const result = toDbDate('2024-01-01'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should convert relative date to Date object', () => { + const result = toDbDate('30 days ago'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should return undefined for undefined input', () => { + const result = toDbDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = toDbDate(''); + expect(result).toBeUndefined(); + }); + + it('should handle "yesterday"', () => { + const result = toDbDate('yesterday'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should handle "last week"', () => { + const result = toDbDate('last week'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('toGitDate', () => { + it('should preserve ISO date format', () => { + const result = toGitDate('2024-01-01'); + expect(result).toBe('2024-01-01'); + }); + + it('should preserve ISO datetime format', () => { + const result = toGitDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00Z'); + }); + + it('should preserve "N days ago" format', () => { + const result = toGitDate('30 days ago'); + expect(result).toBe('30 days ago'); + }); + + it('should preserve "yesterday" format', () => { + const result = toGitDate('yesterday'); + expect(result).toBe('yesterday'); + }); + + it('should preserve "last week" format', () => { + const result = toGitDate('last week'); + expect(result).toBe('last week'); + }); + + it('should preserve "last month" format', () => { + const result = toGitDate('last month'); + expect(result).toBe('last month'); + }); + + it('should preserve "last year" format', () => { + const result = toGitDate('last year'); + expect(result).toBe('last year'); + }); + + it('should return undefined for undefined input', () => { + const result = toGitDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should pass through unrecognized format unchanged', () => { + // For formats git doesn't natively understand, pass through to git + const result = toGitDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should preserve relative time formats', () => { + const result = toGitDate('2 weeks ago'); + expect(result).toBe('2 weeks ago'); + }); + }); + + describe('edge cases', () => { + it('should handle dates at month boundaries', () => { + vi.setSystemTime(new Date('2024-03-31T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + // JavaScript Date handles month rollover + expect(result).toBeDefined(); + }); + + it('should handle dates at year boundaries', () => { + vi.setSystemTime(new Date('2024-01-15T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2023-12-15T12:00:00.000Z'); + }); + + it('should handle leap year February', () => { + vi.setSystemTime(new Date('2024-03-01T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-02-01T12:00:00.000Z'); + }); + + it('should handle midnight times', () => { + vi.setSystemTime(new Date('2024-06-15T00:00:00.000Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T00:00:00.000Z'); + }); + + it('should handle end of day times', () => { + vi.setSystemTime(new Date('2024-06-15T23:59:59.999Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T23:59:59.999Z'); + }); + }); + + describe('integration scenarios', () => { + it('should correctly validate a typical user query range', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + const validationError = validateDateRange(since, until); + + expect(parsedSince).toBe('2024-05-16T12:00:00.000Z'); + expect(parsedUntil).toBe('2024-06-14T12:00:00.000Z'); + expect(validationError).toBeNull(); + }); + + it('should correctly convert for database queries', () => { + const since = '7 days ago'; + const until = 'yesterday'; + + const dbSince = toDbDate(since); + const dbUntil = toDbDate(until); + + expect(dbSince).toBeInstanceOf(Date); + expect(dbUntil).toBeInstanceOf(Date); + expect(dbSince!.getTime()).toBeLessThan(dbUntil!.getTime()); + }); + + it('should correctly preserve for git commands', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + // Git natively understands these, so they're preserved + expect(gitSince).toBe('30 days ago'); + expect(gitUntil).toBe('yesterday'); + }); + + it('should handle mixed ISO and relative dates in range validation', () => { + const since = '2024-01-01'; + const until = '7 days ago'; + + const validationError = validateDateRange(since, until); + expect(validationError).toBeNull(); + }); + }); +}); diff --git a/packages/web/src/features/search/dateUtils.ts b/packages/web/src/features/search/dateUtils.ts new file mode 100644 index 000000000..f28e9a5bd --- /dev/null +++ b/packages/web/src/features/search/dateUtils.ts @@ -0,0 +1,186 @@ +/** + * Utilities for parsing and validating date parameters for temporal queries. + * Supports both absolute (ISO 8601) and relative date formats. + */ + +/** + * Parse a date string that can be either: + * - ISO 8601 format (e.g., "2024-01-01", "2024-01-01T12:00:00Z") + * - Relative format (e.g., "30 days ago", "1 week ago", "yesterday", "last week") + * + * @param dateStr - The date string to parse + * @returns ISO 8601 string if successfully parsed, original string if not parseable (to allow git to try), or undefined if input is falsy + * + * @example + * parseTemporalDate('2024-01-01') // '2024-01-01T00:00:00.000Z' + * parseTemporalDate('30 days ago') // Calculates and returns ISO string + * parseTemporalDate('yesterday') // Yesterday's date as ISO string + * parseTemporalDate('some-git-format') // 'some-git-format' (passed through) + * parseTemporalDate(undefined) // undefined + */ +export function parseTemporalDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Try parsing as ISO date first + const isoDate = new Date(dateStr); + if (!isNaN(isoDate.getTime())) { + return isoDate.toISOString(); + } + + // Parse relative dates (Git-compatible format) + // Git accepts these natively, but we normalize to ISO for consistency + const lowerStr = dateStr.toLowerCase().trim(); + + // Handle "yesterday" + if (lowerStr === 'yesterday') { + const date = new Date(); + date.setDate(date.getDate() - 1); + return date.toISOString(); + } + + // Handle "N s ago" format + const matchRelative = lowerStr.match(/^(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i); + if (matchRelative) { + const amount = parseInt(matchRelative[1]); + const unit = matchRelative[2].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'second': + date.setSeconds(date.getSeconds() - amount); + break; + case 'minute': + date.setMinutes(date.getMinutes() - amount); + break; + case 'hour': + date.setHours(date.getHours() - amount); + break; + case 'day': + date.setDate(date.getDate() - amount); + break; + case 'week': + date.setDate(date.getDate() - (amount * 7)); + break; + case 'month': + date.setMonth(date.getMonth() - amount); + break; + case 'year': + date.setFullYear(date.getFullYear() - amount); + break; + } + + return date.toISOString(); + } + + // Handle "last " format + const matchLast = lowerStr.match(/^last\s+(week|month|year)$/i); + if (matchLast) { + const unit = matchLast[1].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'week': + date.setDate(date.getDate() - 7); + break; + case 'month': + date.setMonth(date.getMonth() - 1); + break; + case 'year': + date.setFullYear(date.getFullYear() - 1); + break; + } + + return date.toISOString(); + } + + // If we can't parse it, return the original string + // This allows git log to try parsing it with its own logic + return dateStr; +} + +/** + * Validate that a date range is consistent (since < until). + * + * @param since - Start date (inclusive) + * @param until - End date (inclusive) + * @returns Error message if invalid, null if valid + */ +export function validateDateRange(since: string | undefined, until: string | undefined): string | null { + if (!since || !until) { + return null; // No validation needed if either is missing + } + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + + if (!parsedSince || !parsedUntil) { + return null; // Let individual date parsing handle invalid formats + } + + const sinceDate = new Date(parsedSince); + const untilDate = new Date(parsedUntil); + + if (isNaN(sinceDate.getTime()) || isNaN(untilDate.getTime())) { + return null; + } + + if (sinceDate > untilDate) { + return `Invalid date range: 'since' (${since}) must be before 'until' (${until})`; + } + + return null; +} + +/** + * Convert a date to a format suitable for Prisma database queries. + * Returns a Date object or undefined. + * + * @param dateStr - The date string to convert + * @returns Date object or undefined + */ +export function toDbDate(dateStr: string | undefined): Date | undefined { + if (!dateStr) { + return undefined; + } + + const parsed = parseTemporalDate(dateStr); + if (!parsed) { + return undefined; + } + + const date = new Date(parsed); + return isNaN(date.getTime()) ? undefined : date; +} + +/** + * Convert a date to a format suitable for git log commands. + * Git accepts relative formats directly, so we preserve them when possible. + * + * @param dateStr - The date string to convert + * @returns Git-compatible date string or undefined + */ +export function toGitDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Git natively understands these formats, so preserve them + const gitNativeFormats = [ + /^\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i, + /^yesterday$/i, + /^last\s+(week|month|year)$/i, + /^\d{4}-\d{2}-\d{2}$/, // ISO date + /^\d{4}-\d{2}-\d{2}T/, // ISO datetime + ]; + + for (const pattern of gitNativeFormats) { + if (pattern.test(dateStr)) { + return dateStr; // Git can handle this directly + } + } + + // Otherwise, parse and convert to ISO + return parseTemporalDate(dateStr); +} diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts new file mode 100644 index 000000000..1c4a46a58 --- /dev/null +++ b/packages/web/src/features/search/gitApi.test.ts @@ -0,0 +1,575 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { searchCommits } from './gitApi'; +import * as dateUtils from './dateUtils'; + +// Mock dependencies +vi.mock('simple-git'); +vi.mock('fs'); +vi.mock('@sourcebot/shared', () => ({ + REPOS_CACHE_DIR: '/mock/cache/dir', +})); +vi.mock('@/lib/serviceError', () => ({ + unexpectedError: (message: string) => ({ + errorCode: 'UNEXPECTED_ERROR', + message, + }), +})); +vi.mock('@/actions', () => ({ + sew: async (fn: () => Promise | T): Promise => { + try { + return await fn(); + } catch (error) { + // Mock sew to convert thrown errors to ServiceError + return { + errorCode: 'UNEXPECTED_ERROR', + message: error instanceof Error ? error.message : String(error), + } as T; + } + }, +})); +// Create a mock findFirst function that we can configure per-test +const mockFindFirst = vi.fn(); + +vi.mock('@/withAuthV2', () => ({ + withOptionalAuthV2: async (fn: (args: { org: { id: number; name: string }; prisma: unknown }) => Promise): Promise => { + // Mock withOptionalAuthV2 to provide org and prisma context + const mockOrg = { id: 1, name: 'test-org' }; + const mockPrisma = { + repo: { + findFirst: mockFindFirst, + }, + }; + return await fn({ org: mockOrg, prisma: mockPrisma }); + }, +})); +vi.mock('@/lib/utils', () => ({ + isServiceError: (obj: unknown): obj is { errorCode: string } => { + return obj !== null && typeof obj === 'object' && 'errorCode' in obj; + }, +})); + +// Import mocked modules +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; + +describe('searchCommits', () => { + const mockGitLog = vi.fn(); + const mockSimpleGit = simpleGit as unknown as vi.Mock; + const mockExistsSync = existsSync as unknown as vi.Mock; + + beforeEach(() => { + vi.clearAllMocks(); + + // Setup default mocks + mockExistsSync.mockReturnValue(true); + mockSimpleGit.mockReturnValue({ + log: mockGitLog, + }); + }); + + describe('repository validation', () => { + it('should return error when repository does not exist on disk', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not found on Sourcebot server disk'), + }); + expect(result).toMatchObject({ + message: expect.stringContaining('123'), + }); + }); + + it('should check the correct repository path', async () => { + mockExistsSync.mockReturnValue(false); + + await searchCommits({ + repoId: 456, + }); + + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/456'); + }); + }); + + describe('date range validation', () => { + it('should validate date range and return error for invalid range', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue( + 'Invalid date range: since must be before until' + ); + + const result = await searchCommits({ + repoId: 123, + since: '2024-12-31', + until: '2024-01-01', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: 'Invalid date range: since must be before until', + }); + }); + + it('should proceed when date range is valid', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + since: '2024-01-01', + until: '2024-12-31', + }); + + expect(Array.isArray(result)).toBe(true); + }); + }); + + describe('date parsing', () => { + it('should parse dates using toGitDate', async () => { + const toGitDateSpy = vi.spyOn(dateUtils, 'toGitDate'); + toGitDateSpy.mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + since: '30 days ago', + until: 'yesterday', + }); + + expect(toGitDateSpy).toHaveBeenCalledWith('30 days ago'); + expect(toGitDateSpy).toHaveBeenCalledWith('yesterday'); + }); + + it('should pass parsed dates to git log', async () => { + vi.spyOn(dateUtils, 'toGitDate') + .mockReturnValueOnce('2024-01-01') + .mockReturnValueOnce('2024-12-31'); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + since: '30 days ago', + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '2024-01-01', + '--until': '2024-12-31', + }) + ); + }); + }); + + describe('git log options', () => { + beforeEach(() => { + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + }); + + it('should set default maxCount', async () => { + await searchCommits({ + repoId: 123, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 50, + }) + ); + }); + + it('should use custom maxCount', async () => { + await searchCommits({ + repoId: 123, + maxCount: 100, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 100, + }) + ); + }); + + it('should add --since when since is provided', async () => { + await searchCommits({ + repoId: 123, + since: '30 days ago', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '30 days ago', + }) + ); + }); + + it('should add --until when until is provided', async () => { + await searchCommits({ + repoId: 123, + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--until': 'yesterday', + }) + ); + }); + + it('should add --author when author is provided', async () => { + await searchCommits({ + repoId: 123, + author: 'john@example.com', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--author': 'john@example.com', + }) + ); + }); + + it('should add --grep and --regexp-ignore-case when query is provided', async () => { + await searchCommits({ + repoId: 123, + query: 'fix bug', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--grep': 'fix bug', + '--regexp-ignore-case': null, + }) + ); + }); + + it('should combine all options', async () => { + await searchCommits({ + repoId: 123, + query: 'feature', + since: '2024-01-01', + until: '2024-12-31', + author: 'jane@example.com', + maxCount: 25, + }); + + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 25, + '--since': '2024-01-01', + '--until': '2024-12-31', + '--author': 'jane@example.com', + '--grep': 'feature', + '--regexp-ignore-case': null, + }); + }); + }); + + describe('successful responses', () => { + it('should return commit array from git log', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-15', + message: 'feat: add feature', + refs: 'HEAD -> main', + body: '', + author_name: 'John Doe', + author_email: 'john@example.com', + }, + { + hash: 'def456', + date: '2024-06-14', + message: 'fix: bug fix', + refs: '', + body: '', + author_name: 'Jane Smith', + author_email: 'jane@example.com', + }, + ]; + + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toEqual(mockCommits); + }); + + it('should return empty array when no commits match', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + query: 'nonexistent', + }); + + expect(result).toEqual([]); + }); + }); + + describe('error handling', () => { + it('should return error for "not a git repository"', async () => { + mockGitLog.mockRejectedValue(new Error('not a git repository')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not a valid git repository'), + }); + }); + + it('should return error for "ambiguous argument"', async () => { + mockGitLog.mockRejectedValue(new Error('ambiguous argument')); + + const result = await searchCommits({ + repoId: 123, + since: 'invalid-date', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Invalid git reference or date format'), + }); + }); + + it('should return error for timeout', async () => { + mockGitLog.mockRejectedValue(new Error('timeout exceeded')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('timed out'), + }); + }); + + it('should return ServiceError for other Error instances', async () => { + mockGitLog.mockRejectedValue(new Error('some other error')); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository 123'), + }); + }); + + it('should return ServiceError for non-Error exceptions', async () => { + mockGitLog.mockRejectedValue('string error'); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository 123'), + }); + }); + }); + + describe('git client configuration', () => { + it('should configure simple-git with correct options', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 123, + }); + + expect(mockSimpleGit).toHaveBeenCalledWith({ + baseDir: '/mock/cache/dir/123', + binary: 'git', + maxConcurrentProcesses: 6, + timeout: { + block: 30000, + }, + }); + }); + + it('should create git client for the correct repository path', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 456, + }); + + expect(mockSimpleGit).toHaveBeenCalledWith( + expect.objectContaining({ + baseDir: '/mock/cache/dir/456', + }) + ); + }); + }); + + describe('integration scenarios', () => { + it('should handle a typical commit search with filters', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-10T14:30:00Z', + message: 'fix: resolve authentication bug', + refs: 'HEAD -> main', + body: 'Fixed issue with JWT token validation', + author_name: 'Security Team', + author_email: 'security@example.com', + }, + ]; + + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 123, + query: 'authentication', + since: '30 days ago', + until: 'yesterday', + author: 'security', + maxCount: 20, + }); + + expect(result).toEqual(mockCommits); + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 20, + '--since': '30 days ago', + '--until': 'yesterday', + '--author': 'security', + '--grep': 'authentication', + '--regexp-ignore-case': null, + }); + }); + + it('should handle repository not cloned yet', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await searchCommits({ + repoId: 999, + query: 'feature', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + }); + expect(result).toHaveProperty('message'); + const message = (result as { message: string }).message; + expect(message).toContain('999'); + expect(message).toContain('not found on Sourcebot server disk'); + expect(message).toContain('cloning process may not be finished yet'); + }); + }); + + describe('repository identifier resolution', () => { + beforeEach(() => { + // Reset mockFindFirst before each test in this suite + mockFindFirst.mockReset(); + }); + + it('should accept numeric repository ID', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 123, + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/123'); + // mockFindFirst should not be called for numeric IDs + expect(mockFindFirst).not.toHaveBeenCalled(); + }); + + it('should accept string repository name and resolve to numeric ID', async () => { + mockFindFirst.mockResolvedValue({ id: 456 }); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repoId: 'github.com/owner/repo', + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/456'); + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/owner/repo', + orgId: 1, + }, + select: { id: true }, + }); + }); + + it('should return error when string repository name is not found', async () => { + mockFindFirst.mockResolvedValue(null); + + const result = await searchCommits({ + repoId: 'github.com/nonexistent/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Repository "github.com/nonexistent/repo" not found'), + }); + expect(result).toMatchObject({ + message: expect.stringContaining('Use \'list_repos\' to get valid repository identifiers'), + }); + }); + + it('should query database with correct parameters for string repo name', async () => { + mockFindFirst.mockResolvedValue({ id: 789 }); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repoId: 'github.com/example/project', + }); + + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/example/project', + orgId: 1, + }, + select: { id: true }, + }); + }); + + it('should work with string repo name in full search scenario', async () => { + const mockCommits = [ + { + hash: 'xyz789', + date: '2024-06-20T10:00:00Z', + message: 'feat: new feature', + refs: 'main', + body: 'Added new functionality', + author_name: 'Developer', + author_email: 'dev@example.com', + }, + ]; + + mockFindFirst.mockResolvedValue({ id: 555 }); + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repoId: 'github.com/test/repository', + query: 'feature', + since: '7 days ago', + author: 'Developer', + }); + + expect(result).toEqual(mockCommits); + expect(mockExistsSync).toHaveBeenCalledWith('/mock/cache/dir/555'); + }); + }); +}); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts new file mode 100644 index 000000000..e6e1269f3 --- /dev/null +++ b/packages/web/src/features/search/gitApi.ts @@ -0,0 +1,197 @@ +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; +import { REPOS_CACHE_DIR } from '@sourcebot/shared'; +import path from 'path'; +import { ServiceError, unexpectedError } from '@/lib/serviceError'; +import { sew } from '@/actions'; +import { toGitDate, validateDateRange } from './dateUtils'; +import { withOptionalAuthV2 } from '@/withAuthV2'; +import { isServiceError } from '@/lib/utils'; +import type { PrismaClient } from '@sourcebot/db'; + +const createGitClientForPath = (repoPath: string) => { + return simpleGit({ + baseDir: repoPath, + binary: 'git', + maxConcurrentProcesses: 6, + timeout: { + block: 30000, // 30 second timeout for git operations + }, + }); +} + +/** + * Resolves a repository identifier to a numeric ID. + * Accepts both numeric IDs and string repository names. + * + * @param identifier - Either a numeric repo ID or a string repo name (e.g., "github.com/owner/repo") + * @param orgId - Organization ID to scope the lookup + * @param prisma - Prisma client instance + * @returns Numeric repository ID or ServiceError if not found + */ +const resolveRepoId = async ( + identifier: string | number, + orgId: number, + prisma: PrismaClient +): Promise => { + // If already numeric, return as-is + if (typeof identifier === 'number') { + return identifier; + } + + // Convert string name to numeric ID + const repo = await prisma.repo.findFirst({ + where: { + name: identifier, + orgId: orgId, + }, + select: { id: true } + }); + + if (!repo) { + return unexpectedError( + `Repository "${identifier}" not found. ` + + `Use 'list_repos' to get valid repository identifiers.` + ); + } + + return repo.id; +} + +export interface SearchCommitsRequest { + repoId: string | number; + query?: string; + since?: string; + until?: string; + author?: string; + maxCount?: number; +} + +export interface Commit { + hash: string; + date: string; + message: string; + refs: string; + body: string; + author_name: string; + author_email: string; +} + +/** + * Search commits in a repository using git log. + * + * **Date Formats**: Supports both ISO 8601 dates and relative formats + * (e.g., "30 days ago", "last week", "yesterday"). Git natively handles + * these formats in the --since and --until flags. + * + * **Requirements**: The repository must be cloned on the Sourcebot server disk. + * Sourcebot automatically clones repositories during indexing, but the cloning + * process might not be finished when this query is executed. If the repository + * is not found on the server disk, an error will be returned. + * + * **Repository ID**: Accepts either a numeric database ID or a string repository name + * (e.g., "github.com/owner/repo") as returned by list_repos. + * + * @param request - Search parameters including timeframe filters + * @returns Array of commits or ServiceError + */ +export const searchCommits = async ({ + repoId: repoIdInput, + query, + since, + until, + author, + maxCount = 50, +}: SearchCommitsRequest): Promise => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + // Resolve repository identifier to numeric ID + const repoId = await resolveRepoId(repoIdInput, org.id, prisma); + if (isServiceError(repoId)) { + return repoId; + } + + const repoPath = path.join(REPOS_CACHE_DIR, repoId.toString()); + + // Check if repository exists on Sourcebot server disk + if (!existsSync(repoPath)) { + return unexpectedError( + `Repository ${repoId} not found on Sourcebot server disk. ` + + `Sourcebot automatically clones repositories during indexing, but the ` + + `cloning process may not be finished yet. Please try again later. ` + + `Path checked: ${repoPath}` + ); + } + + // Validate date range if both since and until are provided + const dateRangeError = validateDateRange(since, until); + if (dateRangeError) { + return unexpectedError(dateRangeError); + } + + // Parse dates to git-compatible format + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + const git = createGitClientForPath(repoPath); + + try { + const logOptions: Record = { + maxCount, + }; + + if (gitSince) { + logOptions['--since'] = gitSince; + } + + if (gitUntil) { + logOptions['--until'] = gitUntil; + } + + if (author) { + logOptions['--author'] = author; + } + + if (query) { + logOptions['--grep'] = query; + logOptions['--regexp-ignore-case'] = null; // Case insensitive + } + + const log = await git.log(logOptions); + return log.all as unknown as Commit[]; + } catch (error: unknown) { + // Provide detailed error messages for common git errors + const errorMessage = error instanceof Error ? error.message : String(error); + + if (errorMessage.includes('not a git repository')) { + return unexpectedError( + `Invalid git repository at ${repoPath}. ` + + `The directory exists but is not a valid git repository.` + ); + } + + if (errorMessage.includes('ambiguous argument')) { + return unexpectedError( + `Invalid git reference or date format. ` + + `Please check your date parameters: since="${since}", until="${until}"` + ); + } + + if (errorMessage.includes('timeout')) { + return unexpectedError( + `Git operation timed out after 30 seconds for repository ${repoId}. ` + + `The repository may be too large or the git operation is taking too long.` + ); + } + + // Generic error fallback + if (error instanceof Error) { + throw new Error( + `Failed to search commits in repository ${repoId}: ${error.message}` + ); + } else { + throw new Error( + `Failed to search commits in repository ${repoId}: ${errorMessage}` + ); + } + } +})); diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts index 711c810d9..94ef889c2 100644 --- a/packages/web/src/features/search/schemas.ts +++ b/packages/web/src/features/search/schemas.ts @@ -30,6 +30,12 @@ export const searchRequestSchema = z.object({ contextLines: z.number().optional(), // Whether to return the whole file as part of the response. whole: z.boolean().optional(), + // The git revision to search in. + gitRevision: z.string().optional(), + // Filter by date. Only show results that were indexed after this date. + since: z.string().optional(), + // Filter by date. Only show results that were indexed before this date. + until: z.string().optional(), }); export const repositoryInfoSchema = z.object({ @@ -160,4 +166,25 @@ export const fileSourceResponseSchema = z.object({ repositoryWebUrl: z.string().optional(), branch: z.string().optional(), webUrl: z.string().optional(), -}); \ No newline at end of file +}); + +export const searchCommitsRequestSchema = z.object({ + repoId: z.union([z.number(), z.string()]), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); + +export const commitSchema = z.object({ + hash: z.string(), + date: z.string(), + message: z.string(), + refs: z.string(), + body: z.string(), + author_name: z.string(), + author_email: z.string(), +}); + +export const searchCommitsResponseSchema = z.array(commitSchema); \ No newline at end of file diff --git a/packages/web/src/features/search/searchApi.test.ts b/packages/web/src/features/search/searchApi.test.ts new file mode 100644 index 000000000..3bc87c89b --- /dev/null +++ b/packages/web/src/features/search/searchApi.test.ts @@ -0,0 +1,295 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import * as dateUtils from './dateUtils'; + +describe('searchApi temporal parameter handling', () => { + describe('date utility integration', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('toDbDate conversion', () => { + it('should convert ISO date strings to Date objects', () => { + const result = dateUtils.toDbDate('2024-01-01'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should convert relative dates to Date objects', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const result = dateUtils.toDbDate('30 days ago'); + expect(result).toBeInstanceOf(Date); + + vi.useRealTimers(); + }); + + it('should handle undefined gracefully', () => { + const result = dateUtils.toDbDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should handle empty string gracefully', () => { + const result = dateUtils.toDbDate(''); + expect(result).toBeUndefined(); + }); + }); + + describe('date range validation scenarios', () => { + it('should accept valid date ranges', () => { + const error = dateUtils.validateDateRange('2024-01-01', '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should reject invalid date ranges', () => { + const error = dateUtils.validateDateRange('2024-12-31', '2024-01-01'); + expect(error).not.toBeNull(); + expect(error).toContain('since'); + expect(error).toContain('until'); + }); + + it('should validate relative date ranges', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const error = dateUtils.validateDateRange('30 days ago', 'yesterday'); + expect(error).toBeNull(); + + vi.useRealTimers(); + }); + + it('should detect invalid relative date ranges', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const error = dateUtils.validateDateRange('yesterday', '30 days ago'); + expect(error).not.toBeNull(); + + vi.useRealTimers(); + }); + }); + + describe('temporal parameter edge cases', () => { + it('should handle missing since parameter', () => { + const dbDate = dateUtils.toDbDate(undefined); + expect(dbDate).toBeUndefined(); + }); + + it('should handle missing until parameter', () => { + const dbDate = dateUtils.toDbDate(undefined); + expect(dbDate).toBeUndefined(); + }); + + it('should handle both parameters missing', () => { + const sinceDate = dateUtils.toDbDate(undefined); + const untilDate = dateUtils.toDbDate(undefined); + expect(sinceDate).toBeUndefined(); + expect(untilDate).toBeUndefined(); + }); + }); + }); + + describe('Prisma query construction with temporal filters', () => { + it('should construct valid date range for Prisma query', () => { + const since = '2024-01-01'; + const until = '2024-12-31'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + + const whereClause = { + indexedAt: { + gte: sinceDate, + lte: untilDate, + }, + }; + + expect(whereClause.indexedAt.gte).toBeInstanceOf(Date); + expect(whereClause.indexedAt.lte).toBeInstanceOf(Date); + expect(whereClause.indexedAt.gte!.getTime()).toBeLessThan( + whereClause.indexedAt.lte!.getTime() + ); + }); + + it('should handle only since parameter', () => { + const since = '2024-01-01'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(undefined); + + const whereClause = { + indexedAt: { + gte: sinceDate, + lte: untilDate, + }, + }; + + expect(whereClause.indexedAt.gte).toBeInstanceOf(Date); + expect(whereClause.indexedAt.lte).toBeUndefined(); + }); + + it('should handle only until parameter', () => { + const since = undefined; + const until = '2024-12-31'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + + const whereClause = { + indexedAt: { + gte: sinceDate, + lte: untilDate, + }, + }; + + expect(whereClause.indexedAt.gte).toBeUndefined(); + expect(whereClause.indexedAt.lte).toBeInstanceOf(Date); + }); + + it('should construct Prisma query with relative dates', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const since = '30 days ago'; + const until = 'yesterday'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + + expect(sinceDate).toBeInstanceOf(Date); + expect(untilDate).toBeInstanceOf(Date); + expect(sinceDate!.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + expect(untilDate!.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + + vi.useRealTimers(); + }); + }); + + describe('repository filtering logic', () => { + it('should filter repositories by indexedAt timestamp', () => { + const repos = [ + { id: 1, name: 'repo1', indexedAt: new Date('2024-01-15') }, + { id: 2, name: 'repo2', indexedAt: new Date('2024-06-01') }, + { id: 3, name: 'repo3', indexedAt: new Date('2024-12-01') }, + ]; + + const since = dateUtils.toDbDate('2024-05-01'); + const until = dateUtils.toDbDate('2024-11-01'); + + const filtered = repos.filter((repo) => { + if (!repo.indexedAt) return false; + if (since && repo.indexedAt < since) return false; + if (until && repo.indexedAt > until) return false; + return true; + }); + + expect(filtered).toHaveLength(1); + expect(filtered[0].id).toBe(2); + }); + + it('should return empty array when no repos match date range', () => { + const repos = [ + { id: 1, name: 'repo1', indexedAt: new Date('2020-01-01') }, + { id: 2, name: 'repo2', indexedAt: new Date('2021-01-01') }, + ]; + + const since = dateUtils.toDbDate('2024-01-01'); + const until = dateUtils.toDbDate('2024-12-31'); + + const filtered = repos.filter((repo) => { + if (!repo.indexedAt) return false; + if (since && repo.indexedAt < since) return false; + if (until && repo.indexedAt > until) return false; + return true; + }); + + expect(filtered).toHaveLength(0); + }); + + it('should handle repos without indexedAt timestamp', () => { + const repos = [ + { id: 1, name: 'repo1', indexedAt: new Date('2024-06-01') }, + { id: 2, name: 'repo2', indexedAt: null }, + { id: 3, name: 'repo3', indexedAt: undefined }, + ]; + + const since = dateUtils.toDbDate('2024-01-01'); + + const filtered = repos.filter((repo) => { + if (!repo.indexedAt) return false; + if (since && repo.indexedAt < since) return false; + return true; + }); + + expect(filtered).toHaveLength(1); + expect(filtered[0].id).toBe(1); + }); + }); + + describe('real-world scenarios', () => { + it('should handle "last 30 days" user query', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const userInput = '30 days ago'; + const parsedDate = dateUtils.toDbDate(userInput); + + expect(parsedDate).toBeInstanceOf(Date); + expect(parsedDate?.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + + vi.useRealTimers(); + }); + + it('should handle "since last week" user query', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const userInput = 'last week'; + const parsedDate = dateUtils.toDbDate(userInput); + + expect(parsedDate).toBeInstanceOf(Date); + expect(parsedDate?.toISOString()).toBe('2024-06-08T12:00:00.000Z'); + + vi.useRealTimers(); + }); + + it('should handle "before yesterday" user query', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2024-06-15T12:00:00.000Z')); + + const userInput = 'yesterday'; + const parsedDate = dateUtils.toDbDate(userInput); + + expect(parsedDate).toBeInstanceOf(Date); + expect(parsedDate?.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + + vi.useRealTimers(); + }); + + it('should handle explicit ISO date range', () => { + const since = '2024-01-01'; + const until = '2024-03-31'; + + const sinceDate = dateUtils.toDbDate(since); + const untilDate = dateUtils.toDbDate(until); + const validationError = dateUtils.validateDateRange(since, until); + + expect(sinceDate).toBeInstanceOf(Date); + expect(untilDate).toBeInstanceOf(Date); + expect(validationError).toBeNull(); + expect(sinceDate!.getTime()).toBeLessThan(untilDate!.getTime()); + }); + + it('should prevent user from providing inverted date range', () => { + const since = '2024-12-31'; + const until = '2024-01-01'; + + const validationError = dateUtils.validateDateRange(since, until); + + expect(validationError).not.toBeNull(); + expect(validationError).toContain('since'); + expect(validationError).toContain('until'); + expect(validationError).toContain('before'); + }); + }); +}); diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 1ca57ef46..69dae3df3 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -127,8 +127,70 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri return encodeURI(url + optionalQueryParams); } -export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise => sew(() => +export const search = async ({ query, matches, contextLines, whole, gitRevision, since, until }: SearchRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { + // If gitRevision is provided, append it to the query as a branch filter. + if (gitRevision) { + query = `${query} branch:${gitRevision}`; + } + + // If since or until are provided, filter repositories by indexedAt. + // Note: This filters by when the repo was last indexed by Sourcebot, + // not by the actual commit time in the repository. + if (since || until) { + const { toDbDate } = await import('./dateUtils'); + + const sinceDate = toDbDate(since); + const untilDate = toDbDate(until); + + const timeFilterRepos = await prisma.repo.findMany({ + where: { + orgId: org.id, + indexedAt: { + gte: sinceDate, + lte: untilDate, + } + }, + select: { name: true } + }); + + if (timeFilterRepos.length === 0) { + // If no repos match the time filter, return empty results immediately. + return { + stats: { + actualMatchCount: 0, + totalMatchCount: 0, + duration: 0, + fileCount: 0, + filesSkipped: 0, + contentBytesLoaded: 0, + indexBytesLoaded: 0, + crashes: 0, + shardFilesConsidered: 0, + filesConsidered: 0, + filesLoaded: 0, + shardsScanned: 0, + shardsSkipped: 0, + shardsSkippedFilter: 0, + ngramMatches: 0, + ngramLookups: 0, + wait: 0, + matchTreeConstruction: 0, + matchTreeSearch: 0, + regexpsConsidered: 0, + flushReason: 0, + }, + files: [], + repositoryInfo: [], + isBranchFilteringEnabled: false, + isSearchExhaustive: true, + } satisfies SearchResponse; + } + + const repoFilter = timeFilterRepos.map(r => `repo:${r.name}`).join(" or "); + query = `(${query}) and (${repoFilter})`; + } + const transformedQuery = await transformZoektQuery(query, org.id, prisma); if (isServiceError(transformedQuery)) { return transformedQuery;