-
-
Notifications
You must be signed in to change notification settings - Fork 289
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add feature split files by maxTokenSize per file #113
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -29,7 +29,7 @@ export const runRemoteAction = async (repoUrl: string, options: CliOptions): Pro | |||||
logger.log(''); | ||||||
|
||||||
const result = await runDefaultAction(tempDir, tempDir, options); | ||||||
await copyOutputToCurrentDirectory(tempDir, process.cwd(), result.config.output.filePath); | ||||||
await copyOutputToCurrentDirectory(tempDir, process.cwd(), result.config.output.filePath ?? 'repopack-output.txt'); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not necessary because it will eventually become the value of defaultConfig in the process of config merging.
Suggested change
|
||||||
} finally { | ||||||
// Clean up the temporary directory | ||||||
await cleanupTempDirectory(tempDir); | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -23,6 +23,7 @@ export interface CliOptions extends OptionValues { | |||||
init?: boolean; | ||||||
global?: boolean; | ||||||
remote?: string; | ||||||
maxTokens?: number; // Add the maxTokens option | ||||||
} | ||||||
|
||||||
export async function run() { | ||||||
|
@@ -44,6 +45,7 @@ export async function run() { | |||||
.option('--init', 'initialize a new repopack.config.json file') | ||||||
.option('--global', 'use global configuration (only applicable with --init)') | ||||||
.option('--remote <url>', 'process a remote Git repository') | ||||||
.option('--max-tokens <number>', 'maximum number of tokens per output file', Number.parseInt) // Add the maxTokens option | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would like to make it output-max-tokens if possible since it is max-tokens regarding output. I am concerned about the possibility of max-tokens for other uses in the future.
Suggested change
|
||||||
.action((directory = '.', options: CliOptions = {}) => executeAction(directory, process.cwd(), options)); | ||||||
|
||||||
await program.parseAsync(process.argv); | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -85,7 +85,7 @@ export const mergeConfigs = ( | |||||
// If the output file path is not provided in the config file or CLI, use the default file path for the style | ||||||
if (cliConfig.output?.filePath == null && fileConfig.output?.filePath == null) { | ||||||
const style = cliConfig.output?.style || fileConfig.output?.style || defaultConfig.output.style; | ||||||
defaultConfig.output.filePath = defaultFilePathMap[style]; | ||||||
defaultConfig.output.filePath = defaultFilePathMap[style ?? 'plain']; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is not necessary since the defaultConfig will eventually make it plain.
Suggested change
|
||||||
} | ||||||
|
||||||
return { | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,20 @@ | ||
export type RepopackOutputStyle = 'plain' | 'xml' | 'markdown'; | ||
|
||
export interface RepopackOutputConfig { | ||
filePath?: string; | ||
style?: RepopackOutputStyle; | ||
headerText?: string; | ||
instructionFilePath?: string; | ||
removeComments?: boolean; | ||
removeEmptyLines?: boolean; | ||
topFilesLength?: number; | ||
showLineNumbers?: boolean; | ||
maxTokensPerFile?: number; // Added maxTokensPerFile | ||
onlyShowPartFilesInRepoStructure?: boolean; | ||
} | ||
|
||
interface RepopackConfigBase { | ||
output?: { | ||
filePath?: string; | ||
style?: RepopackOutputStyle; | ||
headerText?: string; | ||
instructionFilePath?: string; | ||
removeComments?: boolean; | ||
removeEmptyLines?: boolean; | ||
topFilesLength?: number; | ||
showLineNumbers?: boolean; | ||
}; | ||
output?: RepopackOutputConfig; | ||
include?: string[]; | ||
ignore?: { | ||
useGitignore?: boolean; | ||
|
@@ -23,16 +27,7 @@ interface RepopackConfigBase { | |
} | ||
|
||
export type RepopackConfigDefault = RepopackConfigBase & { | ||
output: { | ||
filePath: string; | ||
style: RepopackOutputStyle; | ||
headerText?: string; | ||
instructionFilePath?: string; | ||
removeComments: boolean; | ||
removeEmptyLines: boolean; | ||
topFilesLength: number; | ||
showLineNumbers: boolean; | ||
}; | ||
output: RepopackOutputConfig; | ||
yamadashy marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since defaultConfig dares to make some parts of the code non-nullable, I would like it to be the original code with this option added. |
||
include: string[]; | ||
ignore: { | ||
useGitignore: boolean; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import type { ProcessedFile } from '../file/fileTypes.js'; | ||
import { TokenCounter } from '../tokenCount/tokenCount.js'; | ||
|
||
export interface OutputSplit { | ||
partNumber: number; | ||
tokenCount: number; | ||
includedFiles: ProcessedFile[]; // Add includedFiles property | ||
} | ||
|
||
export const splitOutput = ( | ||
processedFiles: ProcessedFile[], | ||
maxTokensPerFile: number, | ||
): OutputSplit[] => { | ||
const tokenCounter = new TokenCounter(); | ||
const outputSplits: OutputSplit[] = []; | ||
let currentTokenCount = 0; | ||
let currentOutput = ''; | ||
yamadashy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let currentIncludedFiles: ProcessedFile[] = []; // Initialize currentIncludedFiles | ||
|
||
for (const file of processedFiles) { | ||
const fileTokenCount = tokenCounter.countTokens(file.content, file.path); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there is a large number of files, the heavy processing of token count may be affected. Currently, the parts where token counts are being performed are slightly mitigated by adding sleep. However, I thought it would be better to leave it as it is for now and fix it when the problem actually occurs. |
||
|
||
if (currentTokenCount + fileTokenCount > maxTokensPerFile) { | ||
// Start a new part | ||
outputSplits.push({ | ||
partNumber: outputSplits.length+1, | ||
tokenCount: currentTokenCount, | ||
includedFiles: currentIncludedFiles, // Add includedFiles to the outputSplit | ||
}); | ||
|
||
currentTokenCount = 0; | ||
currentOutput = ''; | ||
currentIncludedFiles = []; // Reset currentIncludedFiles | ||
} | ||
yamadashy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
currentOutput += file.content; | ||
currentTokenCount += fileTokenCount; | ||
currentIncludedFiles.push(file); // Add file path to currentIncludedFiles | ||
|
||
} | ||
|
||
if (currentIncludedFiles.length) { | ||
// Add the last part | ||
outputSplits.push({ | ||
partNumber: outputSplits.length+1, | ||
tokenCount: currentTokenCount, | ||
includedFiles: currentIncludedFiles, // Add includedFiles to the outputSplit | ||
}); | ||
} | ||
tokenCounter.free(); | ||
|
||
return outputSplits; | ||
}; |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -25,7 +25,11 @@ export const generatePlainStyle = (outputGeneratorContext: OutputGeneratorContex | |||||
headerText: outputGeneratorContext.config.output.headerText, | ||||||
instruction: outputGeneratorContext.instruction, | ||||||
treeString: outputGeneratorContext.treeString, | ||||||
processedFiles: outputGeneratorContext.processedFiles, | ||||||
includedFiles: outputGeneratorContext.includedFiles, | ||||||
partNumber: outputGeneratorContext.partNumber, | ||||||
totalParts: outputGeneratorContext.totalParts, | ||||||
totalPartFiles: outputGeneratorContext.includedFiles.length, | ||||||
totalFiles: outputGeneratorContext.totalFiles | ||||||
yamadashy marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
}; | ||||||
|
||||||
return `${template(renderContext).trim()}\n`; | ||||||
|
@@ -59,6 +63,11 @@ Usage Guidelines: | |||||
----------------- | ||||||
{{{summaryUsageGuidelines}}} | ||||||
|
||||||
Repository Size: | ||||||
----------------- | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This hyphen is aligned with the letter above.
Suggested change
|
||||||
This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. | ||||||
This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. | ||||||
|
||||||
Notes: | ||||||
------ | ||||||
{{{summaryNotes}}} | ||||||
|
@@ -82,7 +91,7 @@ ${PLAIN_LONG_SEPARATOR} | |||||
Repository Files | ||||||
${PLAIN_LONG_SEPARATOR} | ||||||
|
||||||
{{#each processedFiles}} | ||||||
{{#each includedFiles}} | ||||||
${PLAIN_SEPARATOR} | ||||||
File: {{{this.path}}} | ||||||
${PLAIN_SEPARATOR} | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,11 @@ export const generateXmlStyle = (outputGeneratorContext: OutputGeneratorContext) | |
headerText: outputGeneratorContext.config.output.headerText, | ||
instruction: outputGeneratorContext.instruction, | ||
treeString: outputGeneratorContext.treeString, | ||
processedFiles: outputGeneratorContext.processedFiles, | ||
includedFiles: outputGeneratorContext.includedFiles, | ||
partNumber: outputGeneratorContext.partNumber, | ||
totalParts: outputGeneratorContext.totalParts, | ||
totalPartFiles: outputGeneratorContext.includedFiles.length, | ||
totalFiles: outputGeneratorContext.totalFiles | ||
}; | ||
|
||
return `${template(renderContext).trim()}\n`; | ||
|
@@ -52,6 +56,12 @@ This section contains a summary of this file. | |
{{{summaryUsageGuidelines}}} | ||
</usage_guidelines> | ||
|
||
<repository_size> | ||
This file is part {{{partNumber}}} of {{{totalParts}}} of a split representation of the entire codebase. | ||
This file contains {{{totalPartFiles}}} out of a total of {{{totalFiles}}} files. | ||
|
||
</repository_size> | ||
|
||
<notes> | ||
{{{summaryNotes}}} | ||
</notes> | ||
|
@@ -75,7 +85,7 @@ This section contains a summary of this file. | |
<repository_files> | ||
This section contains the contents of the repository's files. | ||
|
||
{{#each processedFiles}} | ||
{{#each includedFiles}} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make the same correction to plainStyle.ts and markdownStyle.ts. |
||
<file path="{{{this.path}}}"> | ||
{{{this.content}}} | ||
</file> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regarding the whole thing,
please fix the tests that fail on npm run lint run and npm run test