From d70332dca61b967b14a698043e0a9d68325401d7 Mon Sep 17 00:00:00 2001 From: fridaystreet Date: Wed, 9 Oct 2024 08:30:15 +0800 Subject: [PATCH 1/3] Add split files by maxTokenSize feature --- src/cli/actions/defaultActionRunner.ts | 9 ++- src/cli/actions/remoteActionRunner.ts | 2 +- src/cli/cliRunner.ts | 2 + src/config/configLoader.ts | 2 +- src/config/configTypes.ts | 35 ++++----- src/config/defaultConfig.ts | 1 + src/core/output/outputGenerator.ts | 78 ++++++++++++++----- src/core/output/outputGeneratorTypes.ts | 6 +- src/core/output/outputSplitter.ts | 54 +++++++++++++ .../styleGenerators/markdownStyleGenerator.ts | 10 ++- .../styleGenerators/plainStyleGenerator.ts | 11 ++- .../styleGenerators/xmlStyleGenerator.ts | 14 +++- src/core/packager.ts | 63 ++++++++++++--- 13 files changed, 225 insertions(+), 62 deletions(-) create mode 100644 src/core/output/outputSplitter.ts diff --git a/src/cli/actions/defaultActionRunner.ts b/src/cli/actions/defaultActionRunner.ts index 5b7d9d9..5cc14ac 100644 --- a/src/cli/actions/defaultActionRunner.ts +++ b/src/cli/actions/defaultActionRunner.ts @@ -56,8 +56,8 @@ export const runDefaultAction = async ( spinner.succeed('Packing completed successfully!'); logger.log(''); - if (config.output.topFilesLength > 0) { - printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength); + if (config.output.topFilesLength && config.output.topFilesLength > 0) { + printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength ?? 0); logger.log(''); } @@ -68,7 +68,7 @@ export const runDefaultAction = async ( packResult.totalFiles, packResult.totalCharacters, packResult.totalTokens, - config.output.filePath, + config.output.filePath ?? 'No output file specified', packResult.suspiciousFilesResults, config, ); @@ -103,6 +103,9 @@ const buildCliConfig = (options: CliOptions): RepopackConfigCli => { if (options.style) { cliConfig.output = { ...cliConfig.output, style: options.style.toLowerCase() as RepopackOutputStyle }; } + if (options.maxTokens !== undefined) { + cliConfig.output = { ...cliConfig.output, maxTokensPerFile: options.maxTokens }; + } return cliConfig; }; diff --git a/src/cli/actions/remoteActionRunner.ts b/src/cli/actions/remoteActionRunner.ts index e17521c..7d5e537 100644 --- a/src/cli/actions/remoteActionRunner.ts +++ b/src/cli/actions/remoteActionRunner.ts @@ -29,7 +29,7 @@ export const runRemoteAction = async (repoUrl: string, options: CliOptions): Pro logger.log(''); const result = await runDefaultAction(tempDir, tempDir, options); - await copyOutputToCurrentDirectory(tempDir, process.cwd(), result.config.output.filePath); + await copyOutputToCurrentDirectory(tempDir, process.cwd(), result.config.output.filePath ?? 'repopack-output.txt'); } finally { // Clean up the temporary directory await cleanupTempDirectory(tempDir); diff --git a/src/cli/cliRunner.ts b/src/cli/cliRunner.ts index 047e227..beb2c5a 100644 --- a/src/cli/cliRunner.ts +++ b/src/cli/cliRunner.ts @@ -23,6 +23,7 @@ export interface CliOptions extends OptionValues { init?: boolean; global?: boolean; remote?: string; + maxTokens?: number; // Add the maxTokens option } export async function run() { @@ -44,6 +45,7 @@ export async function run() { .option('--init', 'initialize a new repopack.config.json file') .option('--global', 'use global configuration (only applicable with --init)') .option('--remote ', 'process a remote Git repository') + .option('--max-tokens ', 'maximum number of tokens per output file', Number.parseInt) // Add the maxTokens option .action((directory = '.', options: CliOptions = {}) => executeAction(directory, process.cwd(), options)); await program.parseAsync(process.argv); diff --git a/src/config/configLoader.ts b/src/config/configLoader.ts index 2f10574..beee547 100644 --- a/src/config/configLoader.ts +++ b/src/config/configLoader.ts @@ -85,7 +85,7 @@ export const mergeConfigs = ( // If the output file path is not provided in the config file or CLI, use the default file path for the style if (cliConfig.output?.filePath == null && fileConfig.output?.filePath == null) { const style = cliConfig.output?.style || fileConfig.output?.style || defaultConfig.output.style; - defaultConfig.output.filePath = defaultFilePathMap[style]; + defaultConfig.output.filePath = defaultFilePathMap[style ?? 'plain']; } return { diff --git a/src/config/configTypes.ts b/src/config/configTypes.ts index 9c61a09..95c5092 100644 --- a/src/config/configTypes.ts +++ b/src/config/configTypes.ts @@ -1,16 +1,20 @@ export type RepopackOutputStyle = 'plain' | 'xml' | 'markdown'; +export interface RepopackOutputConfig { + filePath?: string; + style?: RepopackOutputStyle; + headerText?: string; + instructionFilePath?: string; + removeComments?: boolean; + removeEmptyLines?: boolean; + topFilesLength?: number; + showLineNumbers?: boolean; + maxTokensPerFile?: number; // Added maxTokensPerFile + onlyShowPartFilesInRepoStructure?: boolean; +} + interface RepopackConfigBase { - output?: { - filePath?: string; - style?: RepopackOutputStyle; - headerText?: string; - instructionFilePath?: string; - removeComments?: boolean; - removeEmptyLines?: boolean; - topFilesLength?: number; - showLineNumbers?: boolean; - }; + output?: RepopackOutputConfig; include?: string[]; ignore?: { useGitignore?: boolean; @@ -23,16 +27,7 @@ interface RepopackConfigBase { } export type RepopackConfigDefault = RepopackConfigBase & { - output: { - filePath: string; - style: RepopackOutputStyle; - headerText?: string; - instructionFilePath?: string; - removeComments: boolean; - removeEmptyLines: boolean; - topFilesLength: number; - showLineNumbers: boolean; - }; + output: RepopackOutputConfig; include: string[]; ignore: { useGitignore: boolean; diff --git a/src/config/defaultConfig.ts b/src/config/defaultConfig.ts index 01442f6..ec64918 100644 --- a/src/config/defaultConfig.ts +++ b/src/config/defaultConfig.ts @@ -14,6 +14,7 @@ export const defaultConfig: RepopackConfigDefault = { removeEmptyLines: false, topFilesLength: 5, showLineNumbers: false, + onlyShowPartFilesInRepoStructure: false }, include: [], ignore: { diff --git a/src/core/output/outputGenerator.ts b/src/core/output/outputGenerator.ts index 73179cb..23ecd58 100644 --- a/src/core/output/outputGenerator.ts +++ b/src/core/output/outputGenerator.ts @@ -8,52 +8,88 @@ import type { OutputGeneratorContext } from './outputGeneratorTypes.js'; import { generateMarkdownStyle } from './styleGenerators/markdownStyleGenerator.js'; import { generatePlainStyle } from './styleGenerators/plainStyleGenerator.js'; import { generateXmlStyle } from './styleGenerators/xmlStyleGenerator.js'; +import { splitOutput, type OutputSplit } from './outputSplitter.js'; export const generateOutput = async ( rootDir: string, config: RepopackConfigMerged, processedFiles: ProcessedFile[], allFilePaths: string[], -): Promise => { - const outputGeneratorContext = await buildOutputGeneratorContext(rootDir, config, allFilePaths, processedFiles); - - let output: string; - switch (config.output.style) { - case 'xml': - output = generateXmlStyle(outputGeneratorContext); - break; - case 'markdown': - output = generateMarkdownStyle(outputGeneratorContext); - break; - default: - output = generatePlainStyle(outputGeneratorContext); - } +): Promise => { + const maxTokensPerFile = config.output.maxTokensPerFile ?? Infinity; // Use Infinity if no limit is set + + const outputSplits: OutputSplit[] = + maxTokensPerFile < Infinity + ? splitOutput( + processedFiles, + maxTokensPerFile + ) + : [{ partNumber: 1, tokenCount: 0, includedFiles: processedFiles }]; + + const outputs = await Promise.all( + outputSplits.map(async (outputSplit) => { + const outputGeneratorContext = await buildOutputGeneratorContext( + rootDir, + config, + outputSplit.includedFiles, + config.output.onlyShowPartFilesInRepoStructure ? outputSplit.includedFiles.map(f => f.path) : allFilePaths, + processedFiles.length, + outputSplits.length, + outputSplit.partNumber, + ) - return output; + let output: string; + switch (config.output.style) { + case 'xml': + output = generateXmlStyle(outputGeneratorContext); + break; + case 'markdown': + output = generateMarkdownStyle(outputGeneratorContext); + break; + default: + output = generatePlainStyle(outputGeneratorContext); + } + return output; + }), + ); + + return outputs; }; export const buildOutputGeneratorContext = async ( rootDir: string, config: RepopackConfigMerged, - allFilePaths: string[], - processedFiles: ProcessedFile[], + includedFiles: ProcessedFile[] = [], // Add includedFiles parameter + repositoryStructure: string[] = [], + totalFiles: number = 1, + totalParts: number = 1, + partNumber: number = 1 ): Promise => { let repositoryInstruction = ''; if (config.output.instructionFilePath) { - const instructionPath = path.resolve(rootDir, config.output.instructionFilePath); + const instructionPath = path.resolve( + rootDir, + config.output.instructionFilePath, + ); try { repositoryInstruction = await fs.readFile(instructionPath, 'utf-8'); } catch { - throw new RepopackError(`Instruction file not found at ${instructionPath}`); + throw new RepopackError( + `Instruction file not found at ${instructionPath}`, + ); } } return { generationDate: new Date().toISOString(), - treeString: generateTreeString(allFilePaths), - processedFiles, + treeString: generateTreeString(repositoryStructure), // Use includedFiles for treeString config, instruction: repositoryInstruction, + content: '', + includedFiles, + totalFiles, + totalParts, + partNumber }; }; diff --git a/src/core/output/outputGeneratorTypes.ts b/src/core/output/outputGeneratorTypes.ts index bef9275..ca31aee 100644 --- a/src/core/output/outputGeneratorTypes.ts +++ b/src/core/output/outputGeneratorTypes.ts @@ -4,7 +4,11 @@ import type { ProcessedFile } from '../file/fileTypes.js'; export interface OutputGeneratorContext { generationDate: string; treeString: string; - processedFiles: ProcessedFile[]; config: RepopackConfigMerged; instruction: string; + content: string; + includedFiles: ProcessedFile[]; // Add the includedFiles property + totalFiles: number, + partNumber: number, + totalParts: number } diff --git a/src/core/output/outputSplitter.ts b/src/core/output/outputSplitter.ts new file mode 100644 index 0000000..88acbe5 --- /dev/null +++ b/src/core/output/outputSplitter.ts @@ -0,0 +1,54 @@ +import type { RepopackConfigMerged } from '../../config/configTypes.js'; +import type { ProcessedFile } from '../file/fileTypes.js'; +import { TokenCounter } from '../tokenCounter/tokenCounter.js'; + +export interface OutputSplit { + partNumber: number; + tokenCount: number; + includedFiles: ProcessedFile[]; // Add includedFiles property +} + +export const splitOutput = ( + processedFiles: ProcessedFile[], + maxTokensPerFile: number, +): OutputSplit[] => { + const tokenCounter = new TokenCounter(); + const outputSplits: OutputSplit[] = []; + let currentTokenCount = 0; + let currentOutput = ''; + let currentIncludedFiles: ProcessedFile[] = []; // Initialize currentIncludedFiles + + for (const file of processedFiles) { + const fileTokenCount = tokenCounter.countTokens(file.content, file.path); + + if (currentTokenCount + fileTokenCount > maxTokensPerFile) { + // Start a new part + outputSplits.push({ + partNumber: outputSplits.length+1, + tokenCount: currentTokenCount, + includedFiles: currentIncludedFiles, // Add includedFiles to the outputSplit + }); + + currentTokenCount = 0; + currentOutput = ''; + currentIncludedFiles = []; // Reset currentIncludedFiles + } + + currentOutput += file.content; + currentTokenCount += fileTokenCount; + currentIncludedFiles.push(file); // Add file path to currentIncludedFiles + + } + + if (currentIncludedFiles.length) { + // Add the last part + outputSplits.push({ + partNumber: outputSplits.length+1, + tokenCount: currentTokenCount, + includedFiles: currentIncludedFiles, // Add includedFiles to the outputSplit + }); + } + tokenCounter.free(); + + return outputSplits; +}; diff --git a/src/core/output/styleGenerators/markdownStyleGenerator.ts b/src/core/output/styleGenerators/markdownStyleGenerator.ts index e8ce564..11c27e4 100644 --- a/src/core/output/styleGenerators/markdownStyleGenerator.ts +++ b/src/core/output/styleGenerators/markdownStyleGenerator.ts @@ -25,7 +25,11 @@ export const generateMarkdownStyle = (outputGeneratorContext: OutputGeneratorCon headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, - processedFiles: outputGeneratorContext.processedFiles, + includedFiles: outputGeneratorContext.includedFiles, + partNumber: outputGeneratorContext.partNumber, + totalParts: outputGeneratorContext.totalParts, + totalPartFiles: outputGeneratorContext.includedFiles.length, + totalFiles: outputGeneratorContext.totalFiles }; return `${template(renderContext).trim()}\n`; @@ -48,6 +52,10 @@ const markdownTemplate = /* md */ ` ## Usage Guidelines {{{summaryUsageGuidelines}}} +## Repository Size +This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. +This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. + ## Notes {{{summaryNotes}}} diff --git a/src/core/output/styleGenerators/plainStyleGenerator.ts b/src/core/output/styleGenerators/plainStyleGenerator.ts index 445cdd0..52e53ed 100644 --- a/src/core/output/styleGenerators/plainStyleGenerator.ts +++ b/src/core/output/styleGenerators/plainStyleGenerator.ts @@ -25,7 +25,11 @@ export const generatePlainStyle = (outputGeneratorContext: OutputGeneratorContex headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, - processedFiles: outputGeneratorContext.processedFiles, + includedFiles: outputGeneratorContext.includedFiles, + partNumber: outputGeneratorContext.partNumber, + totalParts: outputGeneratorContext.totalParts, + totalPartFiles: outputGeneratorContext.includedFiles.length, + totalFiles: outputGeneratorContext.totalFiles }; return `${template(renderContext).trim()}\n`; @@ -59,6 +63,11 @@ Usage Guidelines: ----------------- {{{summaryUsageGuidelines}}} +Repository Size: +----------------- +This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. +This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. + Notes: ------ {{{summaryNotes}}} diff --git a/src/core/output/styleGenerators/xmlStyleGenerator.ts b/src/core/output/styleGenerators/xmlStyleGenerator.ts index 4575d87..41bdf5e 100644 --- a/src/core/output/styleGenerators/xmlStyleGenerator.ts +++ b/src/core/output/styleGenerators/xmlStyleGenerator.ts @@ -25,7 +25,11 @@ export const generateXmlStyle = (outputGeneratorContext: OutputGeneratorContext) headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, - processedFiles: outputGeneratorContext.processedFiles, + includedFiles: outputGeneratorContext.includedFiles, + partNumber: outputGeneratorContext.partNumber, + totalParts: outputGeneratorContext.totalParts, + totalPartFiles: outputGeneratorContext.includedFiles.length, + totalFiles: outputGeneratorContext.totalFiles }; return `${template(renderContext).trim()}\n`; @@ -52,6 +56,12 @@ This section contains a summary of this file. {{{summaryUsageGuidelines}}} + +This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. +This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. + + + {{{summaryNotes}}} @@ -75,7 +85,7 @@ This section contains a summary of this file. This section contains the contents of the repository's files. -{{#each processedFiles}} +{{#each includedFiles}} {{{this.content}}} diff --git a/src/core/packager.ts b/src/core/packager.ts index f38a3e3..01d2478 100644 --- a/src/core/packager.ts +++ b/src/core/packager.ts @@ -60,9 +60,15 @@ export const pack = async ( if (config.security.enableSecurityCheck) { // Perform security check and filter out suspicious files progressCallback('Running security check...'); - suspiciousFilesResults = await deps.runSecurityCheck(rawFiles, progressCallback); + suspiciousFilesResults = await deps.runSecurityCheck( + rawFiles, + progressCallback, + ); safeRawFiles = rawFiles.filter( - (rawFile) => !suspiciousFilesResults.some((result) => result.filePath === rawFile.path), + (rawFile) => + !suspiciousFilesResults.some( + (result) => result.filePath === rawFile.path, + ), ); } @@ -75,13 +81,37 @@ export const pack = async ( // Generate output progressCallback('Generating output...'); - const output = await deps.generateOutput(rootDir, config, processedFiles, safeFilePaths); + const outputs = await deps.generateOutput( + rootDir, + config, + processedFiles, + safeFilePaths + ); - // Write output to file. path is relative to the cwd - progressCallback('Writing output file...'); - const outputPath = path.resolve(config.cwd, config.output.filePath); - logger.trace(`Writing output to: ${outputPath}`); - await fs.writeFile(outputPath, output); + // Write output to file(s). path is relative to the cwd + progressCallback('Writing output file(s)...'); + + // Handle the case where filePath is undefined + if (config.output.filePath) { + const outputFileBase = path.parse(config.output.filePath).name; + const outputFileExt = path.parse(config.output.filePath).ext; + + await Promise.all( + outputs.map(async (output, index) => { + const outputPath = path.resolve( + config.cwd, + `${outputFileBase}${ + outputs.length > 1 ? `-${index + 1}` : '' + }${outputFileExt}`, + ); + logger.trace(`Writing output to: ${outputPath}`); + await fs.writeFile(outputPath, output); + }), + ); + } else { + // Handle the case where filePath is undefined (e.g., log a warning) + logger.warn('Output file path is not defined. Skipping file writing.'); + } // Setup token counter const tokenCounter = new TokenCounter(); @@ -94,7 +124,11 @@ export const pack = async ( const charCount = file.content.length; const tokenCount = tokenCounter.countTokens(file.content, file.path); - progressCallback(`Calculating metrics... (${index + 1}/${processedFiles.length}) ${pc.dim(file.path)}`); + progressCallback( + `Calculating metrics... (${index + 1}/${ + processedFiles.length + }) ${pc.dim(file.path ?? 'Unknown File')}`, + ); // Sleep for a short time to prevent blocking the event loop await sleep(1); @@ -109,8 +143,14 @@ export const pack = async ( tokenCounter.free(); const totalFiles = processedFiles.length; - const totalCharacters = fileMetrics.reduce((sum, fileMetric) => sum + fileMetric.charCount, 0); - const totalTokens = fileMetrics.reduce((sum, fileMetric) => sum + fileMetric.tokenCount, 0); + const totalCharacters = fileMetrics.reduce( + (sum, fileMetric) => sum + fileMetric.charCount, + 0, + ); + const totalTokens = fileMetrics.reduce( + (sum, fileMetric) => sum + fileMetric.tokenCount, + 0, + ); const fileCharCounts: Record = {}; const fileTokenCounts: Record = {}; @@ -128,3 +168,4 @@ export const pack = async ( suspiciousFilesResults, }; }; + From f40353b08cf6f578aab8f07c020ec0f80331327b Mon Sep 17 00:00:00 2001 From: fridaystreet Date: Wed, 9 Oct 2024 20:43:23 +0800 Subject: [PATCH 2/3] merge with latest updates --- src/core/output/outputGenerate.ts | 6 +++--- src/core/output/outputSplitter.ts | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/output/outputGenerate.ts b/src/core/output/outputGenerate.ts index a23b6cb..a1a7cf7 100644 --- a/src/core/output/outputGenerate.ts +++ b/src/core/output/outputGenerate.ts @@ -5,9 +5,9 @@ import { RepopackError } from '../../shared/errorHandle.js'; import { generateTreeString } from '../file/fileTreeGenerate.js'; import type { ProcessedFile } from '../file/fileTypes.js'; import type { OutputGeneratorContext } from './outputGeneratorTypes.js'; -import { generateMarkdownStyle } from './styleGenerators/markdownStyleGenerator.js'; -import { generatePlainStyle } from './styleGenerators/plainStyleGenerator.js'; -import { generateXmlStyle } from './styleGenerators/xmlStyleGenerator.js'; +import { generateMarkdownStyle } from './outputStyles/markdownStyle.js'; +import { generatePlainStyle } from './outputStyles/plainStyle.js'; +import { generateXmlStyle } from './outputStyles/xmlStyle.js'; import { splitOutput, type OutputSplit } from './outputSplitter.js'; export const generateOutput = async ( diff --git a/src/core/output/outputSplitter.ts b/src/core/output/outputSplitter.ts index 88acbe5..016cb1b 100644 --- a/src/core/output/outputSplitter.ts +++ b/src/core/output/outputSplitter.ts @@ -1,6 +1,5 @@ -import type { RepopackConfigMerged } from '../../config/configTypes.js'; import type { ProcessedFile } from '../file/fileTypes.js'; -import { TokenCounter } from '../tokenCounter/tokenCounter.js'; +import { TokenCounter } from '../tokenCount/tokenCount.js'; export interface OutputSplit { partNumber: number; From 5910bc2191629e3264b74e10a3414a5ba0371a87 Mon Sep 17 00:00:00 2001 From: fridaystreet Date: Thu, 10 Oct 2024 10:22:02 +0800 Subject: [PATCH 3/3] fix markdown and plain files not displaying in outputs --- src/core/output/outputStyles/markdownStyle.ts | 2 +- src/core/output/outputStyles/plainStyle.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/output/outputStyles/markdownStyle.ts b/src/core/output/outputStyles/markdownStyle.ts index 1c68073..190fd29 100644 --- a/src/core/output/outputStyles/markdownStyle.ts +++ b/src/core/output/outputStyles/markdownStyle.ts @@ -74,7 +74,7 @@ This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files # Repository Files -{{#each processedFiles}} +{{#each includedFiles}} ## File: {{{this.path}}} \`\`\`{{{getFileExtension this.path}}} {{{this.content}}} diff --git a/src/core/output/outputStyles/plainStyle.ts b/src/core/output/outputStyles/plainStyle.ts index fd6947c..c5c57b2 100644 --- a/src/core/output/outputStyles/plainStyle.ts +++ b/src/core/output/outputStyles/plainStyle.ts @@ -91,7 +91,7 @@ ${PLAIN_LONG_SEPARATOR} Repository Files ${PLAIN_LONG_SEPARATOR} -{{#each processedFiles}} +{{#each includedFiles}} ${PLAIN_SEPARATOR} File: {{{this.path}}} ${PLAIN_SEPARATOR}