-
-
Notifications
You must be signed in to change notification settings - Fork 364
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(pack): Simplify the process and make it testable with DI
- Loading branch information
Showing
31 changed files
with
539 additions
and
686 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,67 @@ | ||
import * as fs from 'node:fs/promises'; | ||
import path from 'node:path'; | ||
import iconv from 'iconv-lite'; | ||
import { isBinary } from 'istextorbinary'; | ||
import jschardet from 'jschardet'; | ||
import pMap from 'p-map'; | ||
import pc from 'picocolors'; | ||
import { Piscina } from 'piscina'; | ||
import { logger } from '../../shared/logger.js'; | ||
import { getProcessConcurrency } from '../../shared/processConcurrency.js'; | ||
import { getWorkerThreadCount } from '../../shared/processConcurrency.js'; | ||
import type { RepomixProgressCallback } from '../../shared/types.js'; | ||
import type { RawFile } from './fileTypes.js'; | ||
import type { FileCollectTask } from './workers/fileCollectWorker.js'; | ||
|
||
// Maximum file size to process (50MB) | ||
// This prevents out-of-memory errors when processing very large files | ||
export const MAX_FILE_SIZE = 50 * 1024 * 1024; | ||
const initTaskRunner = (numOfTasks: number) => { | ||
const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); | ||
logger.trace(`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads`); | ||
|
||
export const collectFiles = async (filePaths: string[], rootDir: string): Promise<RawFile[]> => { | ||
const rawFiles = await pMap( | ||
filePaths, | ||
async (filePath) => { | ||
const fullPath = path.resolve(rootDir, filePath); | ||
const content = await readRawFile(fullPath); | ||
if (content) { | ||
return { path: filePath, content }; | ||
} | ||
return null; | ||
}, | ||
{ | ||
concurrency: getProcessConcurrency(), | ||
}, | ||
); | ||
const pool = new Piscina({ | ||
filename: new URL('./workers/fileCollectWorker.js', import.meta.url).href, | ||
minThreads, | ||
maxThreads, | ||
idleTimeout: 5000, | ||
}); | ||
|
||
return rawFiles.filter((file): file is RawFile => file != null); | ||
return (task: FileCollectTask) => pool.run(task); | ||
}; | ||
|
||
const readRawFile = async (filePath: string): Promise<string | null> => { | ||
try { | ||
const stats = await fs.stat(filePath); | ||
|
||
if (stats.size > MAX_FILE_SIZE) { | ||
const sizeMB = (stats.size / 1024 / 1024).toFixed(1); | ||
logger.log(''); | ||
logger.log('⚠️ Large File Warning:'); | ||
logger.log('──────────────────────'); | ||
logger.log(`File exceeds size limit: ${sizeMB}MB > ${MAX_FILE_SIZE / 1024 / 1024}MB (${filePath})`); | ||
logger.note('Add this file to .repomixignore if you want to exclude it permanently'); | ||
logger.log(''); | ||
return null; | ||
} | ||
|
||
if (isBinary(filePath)) { | ||
logger.debug(`Skipping binary file: ${filePath}`); | ||
return null; | ||
} | ||
export const collectFiles = async ( | ||
filePaths: string[], | ||
rootDir: string, | ||
progressCallback: RepomixProgressCallback = () => {}, | ||
deps = { | ||
initTaskRunner, | ||
}, | ||
): Promise<RawFile[]> => { | ||
const runTask = deps.initTaskRunner(filePaths.length); | ||
const tasks = filePaths.map( | ||
(filePath) => | ||
({ | ||
filePath, | ||
rootDir, | ||
}) satisfies FileCollectTask, | ||
); | ||
|
||
logger.trace(`Reading file: ${filePath}`); | ||
try { | ||
const startTime = process.hrtime.bigint(); | ||
logger.trace(`Starting file collection for ${filePaths.length} files using worker pool`); | ||
|
||
const buffer = await fs.readFile(filePath); | ||
let completedTasks = 0; | ||
const totalTasks = tasks.length; | ||
|
||
if (isBinary(null, buffer)) { | ||
logger.debug(`Skipping binary file (content check): ${filePath}`); | ||
return null; | ||
} | ||
const results = await Promise.all( | ||
tasks.map((task) => | ||
runTask(task).then((result) => { | ||
completedTasks++; | ||
progressCallback(`Collect file... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); | ||
logger.trace(`Collect files... (${completedTasks}/${totalTasks}) ${task.filePath}`); | ||
return result; | ||
}), | ||
), | ||
); | ||
|
||
const encoding = jschardet.detect(buffer).encoding || 'utf-8'; | ||
const content = iconv.decode(buffer, encoding); | ||
const endTime = process.hrtime.bigint(); | ||
const duration = Number(endTime - startTime) / 1e6; | ||
logger.trace(`File collection completed in ${duration.toFixed(2)}ms`); | ||
|
||
return content; | ||
return results.filter((file): file is RawFile => file !== null); | ||
} catch (error) { | ||
logger.warn(`Failed to read file: ${filePath}`, error); | ||
return null; | ||
logger.error('Error during file collection:', error); | ||
throw error; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.