From afb6bd0fb53505e29e961f5a78f0afe36c3b3071 Mon Sep 17 00:00:00 2001 From: Patrick Erichsen Date: Mon, 12 May 2025 11:28:12 -0700 Subject: [PATCH 1/4] fix: truncate `tagToString` to max filename len --- core/indexing/CodeSnippetsIndex.ts | 3 +- core/indexing/FullTextSearchCodebaseIndex.ts | 10 +++++-- core/indexing/LanceDbIndex.ts | 3 +- core/indexing/chunk/ChunkCodebaseIndex.ts | 5 ++-- core/indexing/refreshIndex.ts | 4 --- core/indexing/test/indexing.ts | 2 +- core/indexing/utils.ts | 29 ++++++++++++++++++++ 7 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 core/indexing/utils.ts diff --git a/core/indexing/CodeSnippetsIndex.ts b/core/indexing/CodeSnippetsIndex.ts index 9a2da5008b7..bf79b093ef3 100644 --- a/core/indexing/CodeSnippetsIndex.ts +++ b/core/indexing/CodeSnippetsIndex.ts @@ -7,7 +7,7 @@ import { getQueryForFile, } from "../util/treeSitter"; -import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex"; +import { DatabaseConnection, SqliteDb } from "./refreshIndex"; import { IndexResultType, MarkCompleteCallback, @@ -29,6 +29,7 @@ import { getLastNUriRelativePathParts, getUriPathBasename, } from "../util/uri"; +import { tagToString } from "./utils"; type SnippetChunk = ChunkWithoutID & { title: string; signature: string }; diff --git a/core/indexing/FullTextSearchCodebaseIndex.ts b/core/indexing/FullTextSearchCodebaseIndex.ts index de183e158d1..1bac74e4a5e 100644 --- a/core/indexing/FullTextSearchCodebaseIndex.ts +++ b/core/indexing/FullTextSearchCodebaseIndex.ts @@ -3,13 +3,14 @@ import { RETRIEVAL_PARAMS } from "../util/parameters"; import { getUriPathBasename } from "../util/uri"; import { ChunkCodebaseIndex } from "./chunk/ChunkCodebaseIndex"; -import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex"; +import { DatabaseConnection, SqliteDb } from "./refreshIndex"; import { IndexResultType, MarkCompleteCallback, RefreshIndexResults, type CodebaseIndex, } from "./types"; +import { tagToString } from "./utils"; export interface RetrieveConfig { tags: BranchAndDir[]; @@ -97,11 +98,14 @@ export class FullTextSearchCodebaseIndex implements CodebaseIndex { // Delete for (const item of results.del) { - await db.run(` + await db.run( + ` DELETE FROM fts WHERE rowid IN ( SELECT id FROM fts_metadata WHERE path = ? AND cacheKey = ? ) - `,[item.path, item.cacheKey]); + `, + [item.path, item.cacheKey], + ); await db.run("DELETE FROM fts_metadata WHERE path = ? AND cacheKey = ?", [ item.path, item.cacheKey, diff --git a/core/indexing/LanceDbIndex.ts b/core/indexing/LanceDbIndex.ts index 3bf9a645d2f..cf41beaca46 100644 --- a/core/indexing/LanceDbIndex.ts +++ b/core/indexing/LanceDbIndex.ts @@ -14,7 +14,7 @@ import { getUriPathBasename } from "../util/uri"; import { basicChunker } from "./chunk/basic.js"; import { chunkDocument, shouldChunk } from "./chunk/chunk.js"; -import { DatabaseConnection, SqliteDb, tagToString } from "./refreshIndex.js"; +import { DatabaseConnection, SqliteDb } from "./refreshIndex.js"; import { CodebaseIndex, IndexResultType, @@ -24,6 +24,7 @@ import { } from "./types"; import type * as LanceType from "vectordb"; +import { tagToString } from "./utils"; interface LanceDbRow { uuid: string; diff --git a/core/indexing/chunk/ChunkCodebaseIndex.ts b/core/indexing/chunk/ChunkCodebaseIndex.ts index 9b39e506a7f..412ff74800d 100644 --- a/core/indexing/chunk/ChunkCodebaseIndex.ts +++ b/core/indexing/chunk/ChunkCodebaseIndex.ts @@ -4,7 +4,7 @@ import { RunResult } from "sqlite3"; import { IContinueServerClient } from "../../continueServer/interface.js"; import { Chunk, IndexTag, IndexingProgressUpdate } from "../../index.js"; -import { DatabaseConnection, SqliteDb, tagToString } from "../refreshIndex.js"; +import { DatabaseConnection, SqliteDb } from "../refreshIndex.js"; import { IndexResultType, MarkCompleteCallback, @@ -13,8 +13,9 @@ import { type CodebaseIndex, } from "../types.js"; -import { chunkDocument, shouldChunk } from "./chunk.js"; import { getUriPathBasename } from "../../util/uri.js"; +import { tagToString } from "../utils.js"; +import { chunkDocument, shouldChunk } from "./chunk.js"; export class ChunkCodebaseIndex implements CodebaseIndex { relativeExpectedTime: number = 1; diff --git a/core/indexing/refreshIndex.ts b/core/indexing/refreshIndex.ts index f76c6a7f64a..ec84f631b3b 100644 --- a/core/indexing/refreshIndex.ts +++ b/core/indexing/refreshIndex.ts @@ -18,10 +18,6 @@ import { export type DatabaseConnection = Database; -export function tagToString(tag: IndexTag): string { - return `${tag.directory}::${tag.branch}::${tag.artifactId}`; -} - export class SqliteDb { static db: DatabaseConnection | null = null; diff --git a/core/indexing/test/indexing.ts b/core/indexing/test/indexing.ts index 5f22d0aeb92..b90f5256422 100644 --- a/core/indexing/test/indexing.ts +++ b/core/indexing/test/indexing.ts @@ -3,11 +3,11 @@ import { jest } from "@jest/globals"; import { IndexTag } from "../.."; import { IContinueServerClient } from "../../continueServer/interface"; import { ChunkCodebaseIndex } from "../chunk/ChunkCodebaseIndex"; -import { tagToString } from "../refreshIndex"; import { CodebaseIndex, RefreshIndexResults } from "../types"; import { testIde } from "../../test/fixtures"; import { addToTestDir, TEST_DIR } from "../../test/testDir"; +import { tagToString } from "../utils"; export const mockFilename = "test.py"; export const mockPathAndCacheKey = { diff --git a/core/indexing/utils.ts b/core/indexing/utils.ts new file mode 100644 index 00000000000..37084411188 --- /dev/null +++ b/core/indexing/utils.ts @@ -0,0 +1,29 @@ +import { IndexTag } from ".."; + +/** + * Converts an IndexTag to a string representation, safely handling long paths. + * + * The string is used as a table name and identifier in various places, so it needs + * to stay under OS filename length limits (typically 255 chars). This is especially + * important for dev containers where the directory path can be very long due to + * containing container configuration. + * + * The format is: "{directory}::{branch}::{artifactId}" + * + * To handle long paths: + * 1. First truncates directory to 200 chars to leave room for branch and artifactId + * 2. Then ensures entire string stays under 240 chars for OS compatibility + * + * @param tag The tag containing directory, branch, and artifactId + * @returns A string representation safe for use as a table name + */ +export function tagToString(tag: IndexTag): string { + const maxDirLength = 200; // Leave room for branch and artifactId + const dir = + tag.directory.length > maxDirLength + ? tag.directory.slice(0, maxDirLength) + : tag.directory; + + const result = `${dir}::${tag.branch}::${tag.artifactId}`; + return result.slice(0, 240); // Ensure final string is not too long +} From c452f90014055b7fd05f6eb9310915343e6dc0ea Mon Sep 17 00:00:00 2001 From: Patrick Erichsen Date: Mon, 12 May 2025 11:30:39 -0700 Subject: [PATCH 2/4] Update utils.ts --- core/indexing/utils.ts | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/core/indexing/utils.ts b/core/indexing/utils.ts index 37084411188..f87da055d6e 100644 --- a/core/indexing/utils.ts +++ b/core/indexing/utils.ts @@ -1,5 +1,8 @@ import { IndexTag } from ".."; +// Maximum length for table names to stay under OS filename limits +const MAX_TABLE_NAME_LENGTH = 240; + /** * Converts an IndexTag to a string representation, safely handling long paths. * @@ -11,19 +14,28 @@ import { IndexTag } from ".."; * The format is: "{directory}::{branch}::{artifactId}" * * To handle long paths: - * 1. First truncates directory to 200 chars to leave room for branch and artifactId - * 2. Then ensures entire string stays under 240 chars for OS compatibility + * 1. First tries the full string - most backwards compatible + * 2. If too long, truncates directory to 200 chars to leave room for branch and artifactId + * 3. Finally ensures entire string stays under MAX_TABLE_NAME_LENGTH for OS compatibility * * @param tag The tag containing directory, branch, and artifactId * @returns A string representation safe for use as a table name */ export function tagToString(tag: IndexTag): string { + const result = `${tag.directory}::${tag.branch}::${tag.artifactId}`; + + if (result.length <= MAX_TABLE_NAME_LENGTH) { + return result; + } + const maxDirLength = 200; // Leave room for branch and artifactId const dir = tag.directory.length > maxDirLength ? tag.directory.slice(0, maxDirLength) : tag.directory; - const result = `${dir}::${tag.branch}::${tag.artifactId}`; - return result.slice(0, 240); // Ensure final string is not too long + return `${dir}::${tag.branch}::${tag.artifactId}`.slice( + 0, + MAX_TABLE_NAME_LENGTH, + ); } From 98efbbd80f075928a45334f8cc4103221c2656c9 Mon Sep 17 00:00:00 2001 From: Patrick Erichsen Date: Mon, 12 May 2025 18:31:56 -0700 Subject: [PATCH 3/4] fix: incorporate feedback --- .continue/rules/unit-testing-rules.yaml | 24 ++++++++++ core/indexing/utils.test.ts | 58 +++++++++++++++++++++++++ core/indexing/utils.ts | 12 +++-- 3 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 .continue/rules/unit-testing-rules.yaml create mode 100644 core/indexing/utils.test.ts diff --git a/.continue/rules/unit-testing-rules.yaml b/.continue/rules/unit-testing-rules.yaml new file mode 100644 index 00000000000..41791807511 --- /dev/null +++ b/.continue/rules/unit-testing-rules.yaml @@ -0,0 +1,24 @@ +name: unit-testing-rules +version: 0.0.1 +schema: v1 +rules: + - name: unit-testing-rules + rule: >- + For unit testing in this project: + + + 1. The project uses Jest as the testing framework + + 2. Run tests using `npm test` from within the specific package/module + directory + + 3. Command structure: `cd [directory] && npm test -- [test file path]` + + 4. The test script uses experimental VM modules via NODE_OPTIONS flag + + 5. Test files follow the pattern `*.test.ts` + + 6. Tests must import Jest with `import { jest } from "@jest/globals";` + + 7. Run tests from within the specific package directory (e.g., `cd core` + for core module tests) diff --git a/core/indexing/utils.test.ts b/core/indexing/utils.test.ts new file mode 100644 index 00000000000..f432da02354 --- /dev/null +++ b/core/indexing/utils.test.ts @@ -0,0 +1,58 @@ +import { IndexTag } from ".."; +import { tagToString } from "./utils"; + +test("tagToString returns full tag string when under length limit", () => { + const tag: IndexTag = { + directory: "/normal/path/to/repo", + branch: "main", + artifactId: "12345", + }; + + expect(tagToString(tag)).toBe("/normal/path/to/repo::main::12345"); +}); + +test("tagToString truncates beginning of directory when path is too long", () => { + // Create a very long directory path that exceeds MAX_DIR_LENGTH (200) + const longPrefix = "/very/long/path/that/will/be/truncated/"; + const importantSuffix = "/user/important-project/src/feature"; + const longPath = longPrefix + "x".repeat(200) + importantSuffix; + + const tag: IndexTag = { + directory: longPath, + branch: "feature-branch", + artifactId: "67890", + }; + + const result = tagToString(tag); + + // The result should keep the important suffix part + expect(result).toContain(importantSuffix); + // The result should NOT contain the beginning of the path + expect(result).not.toContain(longPrefix); + // The result should include the branch and artifactId + expect(result).toContain("::feature-branch::67890"); + // The result should be within the MAX_TABLE_NAME_LENGTH limit (240) + expect(result.length).toBeLessThanOrEqual(240); +}); + +test("tagToString preserves branch and artifactId exactly, even when truncating", () => { + const longPath = "/a".repeat(300); // Much longer than MAX_DIR_LENGTH + const tag: IndexTag = { + directory: longPath, + branch: "release-v2.0", + artifactId: "build-123", + }; + + const result = tagToString(tag); + + // Should contain the exact branch and artifactId + expect(result).toContain("::release-v2.0::build-123"); + // Should contain the end of the path + expect(result).toContain("/a/a/a"); + // Should not contain the full original path (it should be truncated) + expect(result.length).toBeLessThan( + longPath.length + "::release-v2.0::build-123".length, + ); + // The result should be within the MAX_TABLE_NAME_LENGTH limit + expect(result.length).toBeLessThanOrEqual(240); +}); diff --git a/core/indexing/utils.ts b/core/indexing/utils.ts index f87da055d6e..0713507f035 100644 --- a/core/indexing/utils.ts +++ b/core/indexing/utils.ts @@ -3,6 +3,9 @@ import { IndexTag } from ".."; // Maximum length for table names to stay under OS filename limits const MAX_TABLE_NAME_LENGTH = 240; +// Leave room for branch and artifactId +const MAX_DIR_LENGTH = 200; + /** * Converts an IndexTag to a string representation, safely handling long paths. * @@ -15,7 +18,8 @@ const MAX_TABLE_NAME_LENGTH = 240; * * To handle long paths: * 1. First tries the full string - most backwards compatible - * 2. If too long, truncates directory to 200 chars to leave room for branch and artifactId + * 2. If too long, truncates directory from the beginning to maintain uniqueness + * (since final parts of paths are more unique than prefixes) * 3. Finally ensures entire string stays under MAX_TABLE_NAME_LENGTH for OS compatibility * * @param tag The tag containing directory, branch, and artifactId @@ -28,10 +32,10 @@ export function tagToString(tag: IndexTag): string { return result; } - const maxDirLength = 200; // Leave room for branch and artifactId + // Truncate from the beginning of directory path to preserve the more unique end parts const dir = - tag.directory.length > maxDirLength - ? tag.directory.slice(0, maxDirLength) + tag.directory.length > MAX_DIR_LENGTH + ? tag.directory.slice(tag.directory.length - MAX_DIR_LENGTH) : tag.directory; return `${dir}::${tag.branch}::${tag.artifactId}`.slice( From d56201cb7d33c13f4bae31ef2f072ab58a055a1c Mon Sep 17 00:00:00 2001 From: Patrick Erichsen Date: Mon, 12 May 2025 18:35:08 -0700 Subject: [PATCH 4/4] Update unit-testing-rules.yaml --- .continue/rules/unit-testing-rules.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.continue/rules/unit-testing-rules.yaml b/.continue/rules/unit-testing-rules.yaml index 41791807511..ec22e94675e 100644 --- a/.continue/rules/unit-testing-rules.yaml +++ b/.continue/rules/unit-testing-rules.yaml @@ -22,3 +22,9 @@ rules: 7. Run tests from within the specific package directory (e.g., `cd core` for core module tests) + + 8. Write tests as top-level `test()` functions - DO NOT use `describe()` + blocks + + 9. Include the function name being tested in the test description for + clarity