diff --git a/src/cli/actions/defaultAction.ts b/src/cli/actions/defaultAction.ts index d6eba38..307e58a 100644 --- a/src/cli/actions/defaultAction.ts +++ b/src/cli/actions/defaultAction.ts @@ -56,8 +56,8 @@ export const runDefaultAction = async ( spinner.succeed('Packing completed successfully!'); logger.log(''); - if (config.output.topFilesLength > 0) { - printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength); + if (config.output.topFilesLength && config.output.topFilesLength > 0) { + printTopFiles(packResult.fileCharCounts, packResult.fileTokenCounts, config.output.topFilesLength ?? 0); logger.log(''); } @@ -68,7 +68,7 @@ export const runDefaultAction = async ( packResult.totalFiles, packResult.totalCharacters, packResult.totalTokens, - config.output.filePath, + config.output.filePath ?? 'No output file specified', packResult.suspiciousFilesResults, config, ); @@ -103,6 +103,9 @@ const buildCliConfig = (options: CliOptions): RepopackConfigCli => { if (options.style) { cliConfig.output = { ...cliConfig.output, style: options.style.toLowerCase() as RepopackOutputStyle }; } + if (options.maxTokens !== undefined) { + cliConfig.output = { ...cliConfig.output, maxTokensPerFile: options.maxTokens }; + } return cliConfig; }; diff --git a/src/cli/actions/remoteAction.ts b/src/cli/actions/remoteAction.ts index 2f39a23..d3093ea 100644 --- a/src/cli/actions/remoteAction.ts +++ b/src/cli/actions/remoteAction.ts @@ -29,7 +29,7 @@ export const runRemoteAction = async (repoUrl: string, options: CliOptions): Pro logger.log(''); const result = await runDefaultAction(tempDir, tempDir, options); - await copyOutputToCurrentDirectory(tempDir, process.cwd(), result.config.output.filePath); + await copyOutputToCurrentDirectory(tempDir, process.cwd(), result.config.output.filePath ?? 'repopack-output.txt'); } finally { // Clean up the temporary directory await cleanupTempDirectory(tempDir); diff --git a/src/cli/cliRun.ts b/src/cli/cliRun.ts index 3d9d89d..4b3be7f 100644 --- a/src/cli/cliRun.ts +++ b/src/cli/cliRun.ts @@ -23,6 +23,7 @@ export interface CliOptions extends OptionValues { init?: boolean; global?: boolean; remote?: string; + maxTokens?: number; // Add the maxTokens option } export async function run() { @@ -44,6 +45,7 @@ export async function run() { .option('--init', 'initialize a new repopack.config.json file') .option('--global', 'use global configuration (only applicable with --init)') .option('--remote ', 'process a remote Git repository') + .option('--max-tokens ', 'maximum number of tokens per output file', Number.parseInt) // Add the maxTokens option .action((directory = '.', options: CliOptions = {}) => executeAction(directory, process.cwd(), options)); await program.parseAsync(process.argv); diff --git a/src/config/configLoad.ts b/src/config/configLoad.ts index d986aec..6d4d49d 100644 --- a/src/config/configLoad.ts +++ b/src/config/configLoad.ts @@ -85,7 +85,7 @@ export const mergeConfigs = ( // If the output file path is not provided in the config file or CLI, use the default file path for the style if (cliConfig.output?.filePath == null && fileConfig.output?.filePath == null) { const style = cliConfig.output?.style || fileConfig.output?.style || defaultConfig.output.style; - defaultConfig.output.filePath = defaultFilePathMap[style]; + defaultConfig.output.filePath = defaultFilePathMap[style ?? 'plain']; } return { diff --git a/src/config/configTypes.ts b/src/config/configTypes.ts index 9c61a09..95c5092 100644 --- a/src/config/configTypes.ts +++ b/src/config/configTypes.ts @@ -1,16 +1,20 @@ export type RepopackOutputStyle = 'plain' | 'xml' | 'markdown'; +export interface RepopackOutputConfig { + filePath?: string; + style?: RepopackOutputStyle; + headerText?: string; + instructionFilePath?: string; + removeComments?: boolean; + removeEmptyLines?: boolean; + topFilesLength?: number; + showLineNumbers?: boolean; + maxTokensPerFile?: number; // Added maxTokensPerFile + onlyShowPartFilesInRepoStructure?: boolean; +} + interface RepopackConfigBase { - output?: { - filePath?: string; - style?: RepopackOutputStyle; - headerText?: string; - instructionFilePath?: string; - removeComments?: boolean; - removeEmptyLines?: boolean; - topFilesLength?: number; - showLineNumbers?: boolean; - }; + output?: RepopackOutputConfig; include?: string[]; ignore?: { useGitignore?: boolean; @@ -23,16 +27,7 @@ interface RepopackConfigBase { } export type RepopackConfigDefault = RepopackConfigBase & { - output: { - filePath: string; - style: RepopackOutputStyle; - headerText?: string; - instructionFilePath?: string; - removeComments: boolean; - removeEmptyLines: boolean; - topFilesLength: number; - showLineNumbers: boolean; - }; + output: RepopackOutputConfig; include: string[]; ignore: { useGitignore: boolean; diff --git a/src/config/defaultConfig.ts b/src/config/defaultConfig.ts index 01442f6..ec64918 100644 --- a/src/config/defaultConfig.ts +++ b/src/config/defaultConfig.ts @@ -14,6 +14,7 @@ export const defaultConfig: RepopackConfigDefault = { removeEmptyLines: false, topFilesLength: 5, showLineNumbers: false, + onlyShowPartFilesInRepoStructure: false }, include: [], ignore: { diff --git a/src/core/output/outputGenerate.ts b/src/core/output/outputGenerate.ts index 272d34b..a1a7cf7 100644 --- a/src/core/output/outputGenerate.ts +++ b/src/core/output/outputGenerate.ts @@ -8,52 +8,88 @@ import type { OutputGeneratorContext } from './outputGeneratorTypes.js'; import { generateMarkdownStyle } from './outputStyles/markdownStyle.js'; import { generatePlainStyle } from './outputStyles/plainStyle.js'; import { generateXmlStyle } from './outputStyles/xmlStyle.js'; +import { splitOutput, type OutputSplit } from './outputSplitter.js'; export const generateOutput = async ( rootDir: string, config: RepopackConfigMerged, processedFiles: ProcessedFile[], allFilePaths: string[], -): Promise => { - const outputGeneratorContext = await buildOutputGeneratorContext(rootDir, config, allFilePaths, processedFiles); - - let output: string; - switch (config.output.style) { - case 'xml': - output = generateXmlStyle(outputGeneratorContext); - break; - case 'markdown': - output = generateMarkdownStyle(outputGeneratorContext); - break; - default: - output = generatePlainStyle(outputGeneratorContext); - } +): Promise => { + const maxTokensPerFile = config.output.maxTokensPerFile ?? Infinity; // Use Infinity if no limit is set + + const outputSplits: OutputSplit[] = + maxTokensPerFile < Infinity + ? splitOutput( + processedFiles, + maxTokensPerFile + ) + : [{ partNumber: 1, tokenCount: 0, includedFiles: processedFiles }]; + + const outputs = await Promise.all( + outputSplits.map(async (outputSplit) => { + const outputGeneratorContext = await buildOutputGeneratorContext( + rootDir, + config, + outputSplit.includedFiles, + config.output.onlyShowPartFilesInRepoStructure ? outputSplit.includedFiles.map(f => f.path) : allFilePaths, + processedFiles.length, + outputSplits.length, + outputSplit.partNumber, + ) - return output; + let output: string; + switch (config.output.style) { + case 'xml': + output = generateXmlStyle(outputGeneratorContext); + break; + case 'markdown': + output = generateMarkdownStyle(outputGeneratorContext); + break; + default: + output = generatePlainStyle(outputGeneratorContext); + } + return output; + }), + ); + + return outputs; }; export const buildOutputGeneratorContext = async ( rootDir: string, config: RepopackConfigMerged, - allFilePaths: string[], - processedFiles: ProcessedFile[], + includedFiles: ProcessedFile[] = [], // Add includedFiles parameter + repositoryStructure: string[] = [], + totalFiles: number = 1, + totalParts: number = 1, + partNumber: number = 1 ): Promise => { let repositoryInstruction = ''; if (config.output.instructionFilePath) { - const instructionPath = path.resolve(rootDir, config.output.instructionFilePath); + const instructionPath = path.resolve( + rootDir, + config.output.instructionFilePath, + ); try { repositoryInstruction = await fs.readFile(instructionPath, 'utf-8'); } catch { - throw new RepopackError(`Instruction file not found at ${instructionPath}`); + throw new RepopackError( + `Instruction file not found at ${instructionPath}`, + ); } } return { generationDate: new Date().toISOString(), - treeString: generateTreeString(allFilePaths), - processedFiles, + treeString: generateTreeString(repositoryStructure), // Use includedFiles for treeString config, instruction: repositoryInstruction, + content: '', + includedFiles, + totalFiles, + totalParts, + partNumber }; }; diff --git a/src/core/output/outputGeneratorTypes.ts b/src/core/output/outputGeneratorTypes.ts index bef9275..ca31aee 100644 --- a/src/core/output/outputGeneratorTypes.ts +++ b/src/core/output/outputGeneratorTypes.ts @@ -4,7 +4,11 @@ import type { ProcessedFile } from '../file/fileTypes.js'; export interface OutputGeneratorContext { generationDate: string; treeString: string; - processedFiles: ProcessedFile[]; config: RepopackConfigMerged; instruction: string; + content: string; + includedFiles: ProcessedFile[]; // Add the includedFiles property + totalFiles: number, + partNumber: number, + totalParts: number } diff --git a/src/core/output/outputSplitter.ts b/src/core/output/outputSplitter.ts new file mode 100644 index 0000000..016cb1b --- /dev/null +++ b/src/core/output/outputSplitter.ts @@ -0,0 +1,53 @@ +import type { ProcessedFile } from '../file/fileTypes.js'; +import { TokenCounter } from '../tokenCount/tokenCount.js'; + +export interface OutputSplit { + partNumber: number; + tokenCount: number; + includedFiles: ProcessedFile[]; // Add includedFiles property +} + +export const splitOutput = ( + processedFiles: ProcessedFile[], + maxTokensPerFile: number, +): OutputSplit[] => { + const tokenCounter = new TokenCounter(); + const outputSplits: OutputSplit[] = []; + let currentTokenCount = 0; + let currentOutput = ''; + let currentIncludedFiles: ProcessedFile[] = []; // Initialize currentIncludedFiles + + for (const file of processedFiles) { + const fileTokenCount = tokenCounter.countTokens(file.content, file.path); + + if (currentTokenCount + fileTokenCount > maxTokensPerFile) { + // Start a new part + outputSplits.push({ + partNumber: outputSplits.length+1, + tokenCount: currentTokenCount, + includedFiles: currentIncludedFiles, // Add includedFiles to the outputSplit + }); + + currentTokenCount = 0; + currentOutput = ''; + currentIncludedFiles = []; // Reset currentIncludedFiles + } + + currentOutput += file.content; + currentTokenCount += fileTokenCount; + currentIncludedFiles.push(file); // Add file path to currentIncludedFiles + + } + + if (currentIncludedFiles.length) { + // Add the last part + outputSplits.push({ + partNumber: outputSplits.length+1, + tokenCount: currentTokenCount, + includedFiles: currentIncludedFiles, // Add includedFiles to the outputSplit + }); + } + tokenCounter.free(); + + return outputSplits; +}; diff --git a/src/core/output/outputStyles/markdownStyle.ts b/src/core/output/outputStyles/markdownStyle.ts index 5f19a68..190fd29 100644 --- a/src/core/output/outputStyles/markdownStyle.ts +++ b/src/core/output/outputStyles/markdownStyle.ts @@ -25,7 +25,11 @@ export const generateMarkdownStyle = (outputGeneratorContext: OutputGeneratorCon headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, - processedFiles: outputGeneratorContext.processedFiles, + includedFiles: outputGeneratorContext.includedFiles, + partNumber: outputGeneratorContext.partNumber, + totalParts: outputGeneratorContext.totalParts, + totalPartFiles: outputGeneratorContext.includedFiles.length, + totalFiles: outputGeneratorContext.totalFiles }; return `${template(renderContext).trim()}\n`; @@ -48,6 +52,10 @@ const markdownTemplate = /* md */ ` ## Usage Guidelines {{{summaryUsageGuidelines}}} +## Repository Size +This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. +This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. + ## Notes {{{summaryNotes}}} @@ -66,7 +74,7 @@ const markdownTemplate = /* md */ ` # Repository Files -{{#each processedFiles}} +{{#each includedFiles}} ## File: {{{this.path}}} \`\`\`{{{getFileExtension this.path}}} {{{this.content}}} diff --git a/src/core/output/outputStyles/plainStyle.ts b/src/core/output/outputStyles/plainStyle.ts index 7c15436..c5c57b2 100644 --- a/src/core/output/outputStyles/plainStyle.ts +++ b/src/core/output/outputStyles/plainStyle.ts @@ -25,7 +25,11 @@ export const generatePlainStyle = (outputGeneratorContext: OutputGeneratorContex headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, - processedFiles: outputGeneratorContext.processedFiles, + includedFiles: outputGeneratorContext.includedFiles, + partNumber: outputGeneratorContext.partNumber, + totalParts: outputGeneratorContext.totalParts, + totalPartFiles: outputGeneratorContext.includedFiles.length, + totalFiles: outputGeneratorContext.totalFiles }; return `${template(renderContext).trim()}\n`; @@ -59,6 +63,11 @@ Usage Guidelines: ----------------- {{{summaryUsageGuidelines}}} +Repository Size: +----------------- +This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. +This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. + Notes: ------ {{{summaryNotes}}} @@ -82,7 +91,7 @@ ${PLAIN_LONG_SEPARATOR} Repository Files ${PLAIN_LONG_SEPARATOR} -{{#each processedFiles}} +{{#each includedFiles}} ${PLAIN_SEPARATOR} File: {{{this.path}}} ${PLAIN_SEPARATOR} diff --git a/src/core/output/outputStyles/xmlStyle.ts b/src/core/output/outputStyles/xmlStyle.ts index f343872..bdba9fc 100644 --- a/src/core/output/outputStyles/xmlStyle.ts +++ b/src/core/output/outputStyles/xmlStyle.ts @@ -25,7 +25,11 @@ export const generateXmlStyle = (outputGeneratorContext: OutputGeneratorContext) headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, - processedFiles: outputGeneratorContext.processedFiles, + includedFiles: outputGeneratorContext.includedFiles, + partNumber: outputGeneratorContext.partNumber, + totalParts: outputGeneratorContext.totalParts, + totalPartFiles: outputGeneratorContext.includedFiles.length, + totalFiles: outputGeneratorContext.totalFiles }; return `${template(renderContext).trim()}\n`; @@ -52,6 +56,12 @@ This section contains a summary of this file. {{{summaryUsageGuidelines}}} + +This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. +This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. + + + {{{summaryNotes}}} @@ -75,7 +85,7 @@ This section contains a summary of this file. This section contains the contents of the repository's files. -{{#each processedFiles}} +{{#each includedFiles}} {{{this.content}}} diff --git a/src/core/packager.ts b/src/core/packager.ts index fc3bc6b..dfdaab2 100644 --- a/src/core/packager.ts +++ b/src/core/packager.ts @@ -57,9 +57,15 @@ export const pack = async ( if (config.security.enableSecurityCheck) { // Perform security check and filter out suspicious files progressCallback('Running security check...'); - suspiciousFilesResults = await deps.runSecurityCheck(rawFiles, progressCallback); + suspiciousFilesResults = await deps.runSecurityCheck( + rawFiles, + progressCallback, + ); safeRawFiles = rawFiles.filter( - (rawFile) => !suspiciousFilesResults.some((result) => result.filePath === rawFile.path), + (rawFile) => + !suspiciousFilesResults.some( + (result) => result.filePath === rawFile.path, + ), ); } @@ -72,13 +78,37 @@ export const pack = async ( // Generate output progressCallback('Generating output...'); - const output = await deps.generateOutput(rootDir, config, processedFiles, safeFilePaths); + const outputs = await deps.generateOutput( + rootDir, + config, + processedFiles, + safeFilePaths + ); - // Write output to file. path is relative to the cwd - progressCallback('Writing output file...'); - const outputPath = path.resolve(config.cwd, config.output.filePath); - logger.trace(`Writing output to: ${outputPath}`); - await fs.writeFile(outputPath, output); + // Write output to file(s). path is relative to the cwd + progressCallback('Writing output file(s)...'); + + // Handle the case where filePath is undefined + if (config.output.filePath) { + const outputFileBase = path.parse(config.output.filePath).name; + const outputFileExt = path.parse(config.output.filePath).ext; + + await Promise.all( + outputs.map(async (output, index) => { + const outputPath = path.resolve( + config.cwd, + `${outputFileBase}${ + outputs.length > 1 ? `-${index + 1}` : '' + }${outputFileExt}`, + ); + logger.trace(`Writing output to: ${outputPath}`); + await fs.writeFile(outputPath, output); + }), + ); + } else { + // Handle the case where filePath is undefined (e.g., log a warning) + logger.warn('Output file path is not defined. Skipping file writing.'); + } // Setup token counter const tokenCounter = new TokenCounter(); @@ -91,7 +121,11 @@ export const pack = async ( const charCount = file.content.length; const tokenCount = tokenCounter.countTokens(file.content, file.path); - progressCallback(`Calculating metrics... (${index + 1}/${processedFiles.length}) ${pc.dim(file.path)}`); + progressCallback( + `Calculating metrics... (${index + 1}/${ + processedFiles.length + }) ${pc.dim(file.path ?? 'Unknown File')}`, + ); // Sleep for a short time to prevent blocking the event loop await sleep(1); @@ -106,8 +140,14 @@ export const pack = async ( tokenCounter.free(); const totalFiles = processedFiles.length; - const totalCharacters = fileMetrics.reduce((sum, fileMetric) => sum + fileMetric.charCount, 0); - const totalTokens = fileMetrics.reduce((sum, fileMetric) => sum + fileMetric.tokenCount, 0); + const totalCharacters = fileMetrics.reduce( + (sum, fileMetric) => sum + fileMetric.charCount, + 0, + ); + const totalTokens = fileMetrics.reduce( + (sum, fileMetric) => sum + fileMetric.tokenCount, + 0, + ); const fileCharCounts: Record = {}; const fileTokenCounts: Record = {}; @@ -125,3 +165,4 @@ export const pack = async ( suspiciousFilesResults, }; }; +