From b7fd6c10b43521ae92c4f32649f07d843bd4f990 Mon Sep 17 00:00:00 2001 From: bkellam Date: Mon, 16 Dec 2024 23:50:01 -0800 Subject: [PATCH 1/3] add size prop --- packages/backend/src/github.ts | 38 ++++++++++++++++++++++++++++++ packages/backend/src/schemas/v2.ts | 13 ++++++++++ packages/backend/src/types.ts | 1 + schemas/v2/index.json | 15 ++++++++++++ 4 files changed, 67 insertions(+) diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index 498dda6c..5ecf430e 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -22,6 +22,7 @@ type OctokitRepository = { forks_count?: number, archived?: boolean, topics?: string[], + size?: number, } export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => { @@ -94,6 +95,7 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo 'zoekt.fork': marshalBool(repo.fork), 'zoekt.public': marshalBool(repo.private === false) }, + sizeInBytes: repo.size ? repo.size * 1000 : undefined, branches: [], tags: [], } satisfies GitRepository; @@ -121,6 +123,42 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo const topics = config.exclude.topics.map(topic => topic.toLowerCase()); repos = excludeReposByTopic(repos, topics, logger); } + + if (config.exclude.size) { + const min = config.exclude.size.min; + const max = config.exclude.size.max; + if (min) { + repos = repos.filter((repo) => { + // If we don't have a size, we can't filter by size. + if (!repo.sizeInBytes) { + return true; + } + + if (repo.sizeInBytes < min) { + logger.debug(`Excluding repo ${repo.name}. Reason: repo is less than \`exclude.size.min\`=${min} bytes.`); + return false; + } + + return true; + }); + } + + if (max) { + repos = repos.filter((repo) => { + // If we don't have a size, we can't filter by size. + if (!repo.sizeInBytes) { + return true; + } + + if (repo.sizeInBytes > max) { + logger.debug(`Excluding repo ${repo.name}. Reason: repo is greater than \`exclude.size.max\`=${max} bytes.`); + return false; + } + + return true; + }); + } + } } logger.debug(`Found ${repos.length} total repositories.`); diff --git a/packages/backend/src/schemas/v2.ts b/packages/backend/src/schemas/v2.ts index 3ef76dca..67897cd8 100644 --- a/packages/backend/src/schemas/v2.ts +++ b/packages/backend/src/schemas/v2.ts @@ -89,6 +89,19 @@ export interface GitHubConfig { * List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported. */ topics?: string[]; + /** + * Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned. + */ + size?: { + /** + * Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing. + */ + min?: number; + /** + * Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing. + */ + max?: number; + }; }; revisions?: GitRevisions; } diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index c1029b07..2b0eca3a 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -9,6 +9,7 @@ interface BaseRepository { isArchived?: boolean; codeHost?: string; topics?: string[]; + sizeInBytes?: number; } export interface GitRepository extends BaseRepository { diff --git a/schemas/v2/index.json b/schemas/v2/index.json index c1bc3966..ffdcff56 100644 --- a/schemas/v2/index.json +++ b/schemas/v2/index.json @@ -171,6 +171,21 @@ "examples": [ ["tests", "ci"] ] + }, + "size": { + "type": "object", + "description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.", + "properties": { + "min": { + "type": "integer", + "description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing." + }, + "max": { + "type": "integer", + "description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing." + } + }, + "additionalProperties": false } }, "additionalProperties": false From 10598349fe22d321ef2da1cb5ec9b7d98a0a2cbf Mon Sep 17 00:00:00 2001 From: bkellam Date: Mon, 16 Dec 2024 23:54:06 -0800 Subject: [PATCH 2/3] Add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c8bc43e..725ceedf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added config option `settings.reindexInterval` and `settings.resyncInterval` to control how often the index should be re-indexed and re-synced. ([#134](https://github.com/sourcebot-dev/sourcebot/pull/134)) +- Added `exclude.size` to the GitHub config to allow excluding repositories by size. ([#137](https://github.com/sourcebot-dev/sourcebot/pull/137)) ## [2.6.2] - 2024-12-13 From cea8cf4465cd43511d65574c9226aa5b3b58832b Mon Sep 17 00:00:00 2001 From: bkellam Date: Mon, 16 Dec 2024 23:55:37 -0800 Subject: [PATCH 3/3] Add size constraint to demo config --- demo-site-config.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/demo-site-config.json b/demo-site-config.json index e6aa8572..70019544 100644 --- a/demo-site-config.json +++ b/demo-site-config.json @@ -11,6 +11,11 @@ "token": { "env": "GITHUB_TOKEN" }, + "exclude": { + "size": { + "max": 1000000000 // Limit to 1GB + } + }, "repos": [ "torvalds/linux", "pytorch/pytorch",