diff --git a/src/cli/actions/remoteAction.ts b/src/cli/actions/remoteAction.ts index e3ab05ed..27cb6ab6 100644 --- a/src/cli/actions/remoteAction.ts +++ b/src/cli/actions/remoteAction.ts @@ -19,6 +19,7 @@ export const runRemoteAction = async ( deps = { isGitInstalled, execGitShallowClone, + runDefaultAction, }, ): Promise => { if (!(await deps.isGitInstalled())) { @@ -46,7 +47,7 @@ export const runRemoteAction = async ( logger.log(''); // Run the default action on the cloned repository - result = await runDefaultAction(tempDirPath, tempDirPath, options); + result = await deps.runDefaultAction(tempDirPath, tempDirPath, options); await copyOutputToCurrentDirectory(tempDirPath, process.cwd(), result.config.output.filePath); } catch (error) { spinner.fail('Error during repository cloning. cleanup...'); diff --git a/src/core/file/fileCollect.ts b/src/core/file/fileCollect.ts index 4e14731d..9e4070c2 100644 --- a/src/core/file/fileCollect.ts +++ b/src/core/file/fileCollect.ts @@ -1,71 +1,67 @@ -import * as fs from 'node:fs/promises'; -import path from 'node:path'; -import iconv from 'iconv-lite'; -import { isBinary } from 'istextorbinary'; -import jschardet from 'jschardet'; -import pMap from 'p-map'; +import pc from 'picocolors'; +import { Piscina } from 'piscina'; import { logger } from '../../shared/logger.js'; -import { getProcessConcurrency } from '../../shared/processConcurrency.js'; +import { getWorkerThreadCount } from '../../shared/processConcurrency.js'; +import type { RepomixProgressCallback } from '../../shared/types.js'; import type { RawFile } from './fileTypes.js'; +import type { FileCollectTask } from './workers/fileCollectWorker.js'; -// Maximum file size to process (50MB) -// This prevents out-of-memory errors when processing very large files -export const MAX_FILE_SIZE = 50 * 1024 * 1024; +const initTaskRunner = (numOfTasks: number) => { + const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); + logger.trace(`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads`); -export const collectFiles = async (filePaths: string[], rootDir: string): Promise => { - const rawFiles = await pMap( - filePaths, - async (filePath) => { - const fullPath = path.resolve(rootDir, filePath); - const content = await readRawFile(fullPath); - if (content) { - return { path: filePath, content }; - } - return null; - }, - { - concurrency: getProcessConcurrency(), - }, - ); + const pool = new Piscina({ + filename: new URL('./workers/fileCollectWorker.js', import.meta.url).href, + minThreads, + maxThreads, + idleTimeout: 5000, + }); - return rawFiles.filter((file): file is RawFile => file != null); + return (task: FileCollectTask) => pool.run(task); }; -const readRawFile = async (filePath: string): Promise => { - try { - const stats = await fs.stat(filePath); - - if (stats.size > MAX_FILE_SIZE) { - const sizeMB = (stats.size / 1024 / 1024).toFixed(1); - logger.log(''); - logger.log('⚠️ Large File Warning:'); - logger.log('──────────────────────'); - logger.log(`File exceeds size limit: ${sizeMB}MB > ${MAX_FILE_SIZE / 1024 / 1024}MB (${filePath})`); - logger.note('Add this file to .repomixignore if you want to exclude it permanently'); - logger.log(''); - return null; - } - - if (isBinary(filePath)) { - logger.debug(`Skipping binary file: ${filePath}`); - return null; - } +export const collectFiles = async ( + filePaths: string[], + rootDir: string, + progressCallback: RepomixProgressCallback = () => {}, + deps = { + initTaskRunner, + }, +): Promise => { + const runTask = deps.initTaskRunner(filePaths.length); + const tasks = filePaths.map( + (filePath) => + ({ + filePath, + rootDir, + }) satisfies FileCollectTask, + ); - logger.trace(`Reading file: ${filePath}`); + try { + const startTime = process.hrtime.bigint(); + logger.trace(`Starting file collection for ${filePaths.length} files using worker pool`); - const buffer = await fs.readFile(filePath); + let completedTasks = 0; + const totalTasks = tasks.length; - if (isBinary(null, buffer)) { - logger.debug(`Skipping binary file (content check): ${filePath}`); - return null; - } + const results = await Promise.all( + tasks.map((task) => + runTask(task).then((result) => { + completedTasks++; + progressCallback(`Collect file... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); + logger.trace(`Collect files... (${completedTasks}/${totalTasks}) ${task.filePath}`); + return result; + }), + ), + ); - const encoding = jschardet.detect(buffer).encoding || 'utf-8'; - const content = iconv.decode(buffer, encoding); + const endTime = process.hrtime.bigint(); + const duration = Number(endTime - startTime) / 1e6; + logger.trace(`File collection completed in ${duration.toFixed(2)}ms`); - return content; + return results.filter((file): file is RawFile => file !== null); } catch (error) { - logger.warn(`Failed to read file: ${filePath}`, error); - return null; + logger.error('Error during file collection:', error); + throw error; } }; diff --git a/src/core/file/fileProcess.ts b/src/core/file/fileProcess.ts index 3f75ca99..53ed3372 100644 --- a/src/core/file/fileProcess.ts +++ b/src/core/file/fileProcess.ts @@ -1,97 +1,62 @@ -import path from 'node:path'; -import { fileURLToPath } from 'node:url'; import pc from 'picocolors'; import { Piscina } from 'piscina'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { logger } from '../../shared/logger.js'; import { getWorkerThreadCount } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; -import { getFileManipulator } from './fileManipulate.js'; import type { ProcessedFile, RawFile } from './fileTypes.js'; +import type { FileProcessTask } from './workers/fileProcessWorker.js'; -// Worker pool singleton -let workerPool: Piscina | null = null; +const initTaskRunner = (numOfTasks: number) => { + const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); + logger.trace(`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads`); -/** - * Initialize the worker pool - */ -const initializeWorkerPool = (): Piscina => { - if (workerPool) { - return workerPool; - } - - const { minThreads, maxThreads } = getWorkerThreadCount(); - logger.trace(`Initializing file process worker pool with min=${minThreads}, max=${maxThreads} threads`); - - workerPool = new Piscina({ - filename: path.resolve(path.dirname(fileURLToPath(import.meta.url)), './workers/fileProcessWorker.js'), + const pool = new Piscina({ + filename: new URL('./workers/fileProcessWorker.js', import.meta.url).href, minThreads, maxThreads, idleTimeout: 5000, }); - return workerPool; + return (task: FileProcessTask) => pool.run(task); }; -/** - * Process files in chunks to maintain progress visibility and prevent memory issues - */ -async function processFileChunks( - pool: Piscina, - tasks: Array<{ rawFile: RawFile; index: number; totalFiles: number; config: RepomixConfigMerged }>, - progressCallback: RepomixProgressCallback, - chunkSize = 100, -): Promise { - const results: ProcessedFile[] = []; - let completedTasks = 0; - const totalTasks = tasks.length; - - // Process files in chunks - for (let i = 0; i < tasks.length; i += chunkSize) { - const chunk = tasks.slice(i, i + chunkSize); - const chunkPromises = chunk.map((task) => { - return pool.run(task).then((result) => { - completedTasks++; - progressCallback(`Processing file... (${completedTasks}/${totalTasks}) ${pc.dim(task.rawFile.path)}`); - return result; - }); - }); - - const chunkResults = await Promise.all(chunkPromises); - results.push(...chunkResults); - - // Allow event loop to process other tasks - await new Promise((resolve) => setTimeout(resolve, 0)); - } - - return results; -} - -/** - * Process files using a worker thread pool - */ export const processFiles = async ( rawFiles: RawFile[], config: RepomixConfigMerged, progressCallback: RepomixProgressCallback, + deps = { + initTaskRunner, + }, ): Promise => { - const pool = initializeWorkerPool(); - const tasks = rawFiles.map((rawFile, index) => ({ - rawFile, - index, - totalFiles: rawFiles.length, - config, - })); + const runTask = deps.initTaskRunner(rawFiles.length); + const tasks = rawFiles.map( + (rawFile, index) => + ({ + rawFile, + config, + }) satisfies FileProcessTask, + ); try { const startTime = process.hrtime.bigint(); logger.trace(`Starting file processing for ${rawFiles.length} files using worker pool`); - // Process files in chunks - const results = await processFileChunks(pool, tasks, progressCallback); + let completedTasks = 0; + const totalTasks = tasks.length; + + const results = await Promise.all( + tasks.map((task) => + runTask(task).then((result) => { + completedTasks++; + progressCallback(`Processing file... (${completedTasks}/${totalTasks}) ${pc.dim(task.rawFile.path)}`); + return result; + }), + ), + ); const endTime = process.hrtime.bigint(); - const duration = Number(endTime - startTime) / 1e6; // Convert to milliseconds + const duration = Number(endTime - startTime) / 1e6; logger.trace(`File processing completed in ${duration.toFixed(2)}ms`); return results; @@ -100,50 +65,3 @@ export const processFiles = async ( throw error; } }; - -/** - * Cleanup worker pool resources - */ -export const cleanupWorkerPool = async (): Promise => { - if (workerPool) { - logger.trace('Cleaning up file process worker pool'); - await workerPool.destroy(); - workerPool = null; - } -}; - -export const processContent = async ( - content: string, - filePath: string, - config: RepomixConfigMerged, -): Promise => { - let processedContent = content; - const manipulator = getFileManipulator(filePath); - - logger.trace(`Processing file: ${filePath}`); - - const processStartAt = process.hrtime.bigint(); - - if (config.output.removeComments && manipulator) { - processedContent = manipulator.removeComments(processedContent); - } - - if (config.output.removeEmptyLines && manipulator) { - processedContent = manipulator.removeEmptyLines(processedContent); - } - - processedContent = processedContent.trim(); - - if (config.output.showLineNumbers) { - const lines = processedContent.split('\n'); - const padding = lines.length.toString().length; - const numberedLines = lines.map((line, index) => `${(index + 1).toString().padStart(padding)}: ${line}`); - processedContent = numberedLines.join('\n'); - } - - const processEndAt = process.hrtime.bigint(); - - logger.trace(`Processed file: ${filePath}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`); - - return processedContent; -}; diff --git a/src/core/file/workers/fileCollectWorker.ts b/src/core/file/workers/fileCollectWorker.ts index bc493007..6ceee656 100644 --- a/src/core/file/workers/fileCollectWorker.ts +++ b/src/core/file/workers/fileCollectWorker.ts @@ -4,21 +4,52 @@ import iconv from 'iconv-lite'; import { isBinary } from 'istextorbinary'; import jschardet from 'jschardet'; import { logger } from '../../../shared/logger.js'; -import type { ReadFileTask } from './types.js'; -/** - * Reads a file and detects if it's binary or text - * Returns null if file is binary or unreadable - */ -const readRawFile = async (filePath: string): Promise => { - if (isBinary(filePath)) { - logger.debug(`Skipping binary file: ${filePath}`); - return null; +// Maximum file size to process (50MB) +// This prevents out-of-memory errors when processing very large files +export const MAX_FILE_SIZE = 50 * 1024 * 1024; + +export interface FileCollectTask { + filePath: string; + rootDir: string; +} + +export default async ({ filePath, rootDir }: FileCollectTask) => { + const fullPath = path.resolve(rootDir, filePath); + const content = await readRawFile(fullPath); + + if (content) { + return { + path: filePath, + content, + }; } - logger.trace(`Reading file: ${filePath}`); + return null; +}; +const readRawFile = async (filePath: string): Promise => { try { + const stats = await fs.stat(filePath); + + if (stats.size > MAX_FILE_SIZE) { + const sizeMB = (stats.size / 1024 / 1024).toFixed(1); + logger.log(''); + logger.log('⚠️ Large File Warning:'); + logger.log('──────────────────────'); + logger.log(`File exceeds size limit: ${sizeMB}MB > ${MAX_FILE_SIZE / 1024 / 1024}MB (${filePath})`); + logger.note('Add this file to .repomixignore if you want to exclude it permanently'); + logger.log(''); + return null; + } + + if (isBinary(filePath)) { + logger.debug(`Skipping binary file: ${filePath}`); + return null; + } + + logger.trace(`Reading file: ${filePath}`); + const buffer = await fs.readFile(filePath); if (isBinary(null, buffer)) { @@ -35,20 +66,3 @@ const readRawFile = async (filePath: string): Promise => { return null; } }; - -/** - * Worker thread function that reads a single file - */ -export default async ({ filePath, rootDir }: ReadFileTask) => { - const fullPath = path.resolve(rootDir, filePath); - const content = await readRawFile(fullPath); - - if (content) { - return { - path: filePath, - content, - }; - } - - return null; -}; diff --git a/src/core/file/workers/fileProcessWorker.ts b/src/core/file/workers/fileProcessWorker.ts index c8a828d8..92a61c01 100644 --- a/src/core/file/workers/fileProcessWorker.ts +++ b/src/core/file/workers/fileProcessWorker.ts @@ -3,24 +3,27 @@ import { logger } from '../../../shared/logger.js'; import { getFileManipulator } from '../fileManipulate.js'; import type { ProcessedFile, RawFile } from '../fileTypes.js'; -interface FileProcessWorkerInput { +export interface FileProcessTask { rawFile: RawFile; - index: number; - totalFiles: number; config: RepomixConfigMerged; } -/** - * Worker thread function that processes a single file - */ -export default async ({ rawFile, index, totalFiles, config }: FileProcessWorkerInput): Promise => { +export default async ({ rawFile, config }: FileProcessTask): Promise => { + const processedContent = await processContent(rawFile, config); + return { + path: rawFile.path, + content: processedContent, + }; +}; + +export const processContent = async (rawFile: RawFile, config: RepomixConfigMerged) => { const processStartAt = process.hrtime.bigint(); let processedContent = rawFile.content; const manipulator = getFileManipulator(rawFile.path); logger.trace(`Processing file: ${rawFile.path}`); - if (config.output.removeComments && manipulator) { + if (manipulator && config.output.removeComments) { processedContent = manipulator.removeComments(processedContent); } @@ -40,8 +43,5 @@ export default async ({ rawFile, index, totalFiles, config }: FileProcessWorkerI const processEndAt = process.hrtime.bigint(); logger.trace(`Processed file: ${rawFile.path}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`); - return { - path: rawFile.path, - content: processedContent, - }; + return processedContent; }; diff --git a/src/core/file/workers/types.ts b/src/core/file/workers/types.ts deleted file mode 100644 index fad421cb..00000000 --- a/src/core/file/workers/types.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Task definition for file reading worker - */ -export interface ReadFileTask { - filePath: string; - rootDir: string; -} - -/** - * Configuration for worker thread pool - */ -export interface WorkerPoolConfig { - minThreads?: number; - maxThreads?: number; - idleTimeout?: number; -} diff --git a/src/core/metrics/aggregateMetrics.ts b/src/core/metrics/aggregateMetrics.ts deleted file mode 100644 index a0cedc35..00000000 --- a/src/core/metrics/aggregateMetrics.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { TiktokenEncoding } from 'tiktoken'; -import type { ProcessedFile } from '../file/fileTypes.js'; -import { TokenCounter } from '../tokenCount/tokenCount.js'; -import type { FileMetrics } from './calculateIndividualFileMetrics.js'; - -export const aggregateMetrics = ( - fileMetrics: FileMetrics[], - processedFiles: ProcessedFile[], - output: string, - tokenCounterEncoding: TiktokenEncoding, -) => { - const totalFiles = processedFiles.length; - const totalCharacters = output.length; - const tokenCounter = new TokenCounter(tokenCounterEncoding); - const totalTokens = tokenCounter.countTokens(output); - - tokenCounter.free(); - - const fileCharCounts: Record = {}; - const fileTokenCounts: Record = {}; - for (const file of fileMetrics) { - fileCharCounts[file.path] = file.charCount; - fileTokenCounts[file.path] = file.tokenCount; - } - - return { - totalFiles, - totalCharacters, - totalTokens, - fileCharCounts, - fileTokenCounts, - }; -}; diff --git a/src/core/metrics/calculateAllFileMetrics.ts b/src/core/metrics/calculateAllFileMetrics.ts index 26352502..65d8572a 100644 --- a/src/core/metrics/calculateAllFileMetrics.ts +++ b/src/core/metrics/calculateAllFileMetrics.ts @@ -7,67 +7,26 @@ import { logger } from '../../shared/logger.js'; import { getWorkerThreadCount } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { ProcessedFile } from '../file/fileTypes.js'; -import type { TokenCounter } from '../tokenCount/tokenCount.js'; -import type { FileMetrics } from './calculateIndividualFileMetrics.js'; - -// Worker pool singleton -let workerPool: Piscina | null = null; +import type { FileMetricsTask } from './workers/fileMetricsWorker.js'; +import type { FileMetrics } from './workers/types.js'; /** * Initialize the worker pool */ -const initializeWorkerPool = (): Piscina => { - if (workerPool) { - return workerPool; - } - - const { minThreads, maxThreads } = getWorkerThreadCount(); - logger.trace(`Initializing metrics worker pool with min=${minThreads}, max=${maxThreads} threads`); +const initTaskRunner = (numOfTasks: number) => { + const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); + logger.trace(`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads`); - workerPool = new Piscina({ - filename: path.resolve(path.dirname(fileURLToPath(import.meta.url)), './workers/metricsWorker.js'), + const pool = new Piscina({ + filename: new URL('./workers/fileMetricsWorker.js', import.meta.url).href, minThreads, maxThreads, idleTimeout: 5000, }); - return workerPool; + return (task: FileMetricsTask) => pool.run(task); }; -/** - * Process files in chunks to maintain progress visibility and prevent memory issues - */ -async function processFileChunks( - pool: Piscina, - tasks: Array<{ file: ProcessedFile; index: number; totalFiles: number; encoding: TiktokenEncoding }>, - progressCallback: RepomixProgressCallback, - chunkSize = 100, -): Promise { - const results: FileMetrics[] = []; - let completedTasks = 0; - const totalTasks = tasks.length; - - // Process files in chunks - for (let i = 0; i < tasks.length; i += chunkSize) { - const chunk = tasks.slice(i, i + chunkSize); - const chunkPromises = chunk.map((task) => { - return pool.run(task).then((result) => { - completedTasks++; - progressCallback(`Calculating metrics... (${completedTasks}/${totalTasks}) ${pc.dim(task.file.path)}`); - return result; - }); - }); - - const chunkResults = await Promise.all(chunkPromises); - results.push(...chunkResults); - - // Allow event loop to process other tasks - await new Promise((resolve) => setTimeout(resolve, 0)); - } - - return results; -} - /** * Calculate metrics for all files using a worker thread pool */ @@ -75,24 +34,38 @@ export const calculateAllFileMetrics = async ( processedFiles: ProcessedFile[], tokenCounterEncoding: TiktokenEncoding, progressCallback: RepomixProgressCallback, + deps = { + initTaskRunner, + }, ): Promise => { - const pool = initializeWorkerPool(); - const tasks = processedFiles.map((file, index) => ({ - file, - index, - totalFiles: processedFiles.length, - encoding: tokenCounterEncoding, - })); + const runTask = deps.initTaskRunner(processedFiles.length); + const tasks = processedFiles.map( + (file, index) => + ({ + file, + index, + totalFiles: processedFiles.length, + encoding: tokenCounterEncoding, + }) satisfies FileMetricsTask, + ); try { const startTime = process.hrtime.bigint(); logger.trace(`Starting metrics calculation for ${processedFiles.length} files using worker pool`); - // Process files in chunks - const results = await processFileChunks(pool, tasks, progressCallback); + let completedTasks = 0; + const results = await Promise.all( + tasks.map((task) => + runTask(task).then((result) => { + completedTasks++; + progressCallback(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${pc.dim(task.file.path)}`); + return result; + }), + ), + ); const endTime = process.hrtime.bigint(); - const duration = Number(endTime - startTime) / 1e6; // Convert to milliseconds + const duration = Number(endTime - startTime) / 1e6; logger.trace(`Metrics calculation completed in ${duration.toFixed(2)}ms`); return results; @@ -101,14 +74,3 @@ export const calculateAllFileMetrics = async ( throw error; } }; - -/** - * Cleanup worker pool resources - */ -export const cleanupWorkerPool = async (): Promise => { - if (workerPool) { - logger.trace('Cleaning up metrics worker pool'); - await workerPool.destroy(); - workerPool = null; - } -}; diff --git a/src/core/metrics/calculateIndividualFileMetrics.ts b/src/core/metrics/calculateIndividualFileMetrics.ts deleted file mode 100644 index 7dc66bc9..00000000 --- a/src/core/metrics/calculateIndividualFileMetrics.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { setTimeout } from 'node:timers/promises'; -import pc from 'picocolors'; -import type { RepomixProgressCallback } from '../../shared/types.js'; -import type { ProcessedFile } from '../file/fileTypes.js'; -import type { TokenCounter } from '../tokenCount/tokenCount.js'; - -export interface FileMetrics { - path: string; - charCount: number; - tokenCount: number; -} - -export const calculateIndividualFileMetrics = async ( - file: ProcessedFile, - index: number, - totalFiles: number, - tokenCounter: TokenCounter, - progressCallback: RepomixProgressCallback, -): Promise => { - const charCount = file.content.length; - const tokenCount = tokenCounter.countTokens(file.content, file.path); - - progressCallback(`Calculating metrics... (${index + 1}/${totalFiles}) ${pc.dim(file.path)}`); - - // Sleep for a short time to prevent blocking the event loop - await setTimeout(1); - - return { path: file.path, charCount, tokenCount }; -}; diff --git a/src/core/metrics/calculateMetrics.ts b/src/core/metrics/calculateMetrics.ts index 13538e33..29b8a622 100644 --- a/src/core/metrics/calculateMetrics.ts +++ b/src/core/metrics/calculateMetrics.ts @@ -1,10 +1,8 @@ -import { TiktokenEncoding } from 'tiktoken'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { ProcessedFile } from '../file/fileTypes.js'; -import { TokenCounter } from '../tokenCount/tokenCount.js'; -import { aggregateMetrics } from './aggregateMetrics.js'; import { calculateAllFileMetrics } from './calculateAllFileMetrics.js'; +import { calculateOutputMetrics } from './calculateOutputMetrics.js'; export interface CalculateMetricsResult { totalFiles: number; @@ -19,11 +17,33 @@ export const calculateMetrics = async ( output: string, progressCallback: RepomixProgressCallback, config: RepomixConfigMerged, + deps = { + calculateAllFileMetrics, + calculateOutputMetrics, + }, ): Promise => { progressCallback('Calculating metrics...'); - const fileMetrics = await calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback); - const result = aggregateMetrics(fileMetrics, processedFiles, output, config.tokenCount.encoding); + const [fileMetrics, totalTokens] = await Promise.all([ + deps.calculateAllFileMetrics(processedFiles, config.tokenCount.encoding, progressCallback), + deps.calculateOutputMetrics(output, config.tokenCount.encoding), + ]); - return result; + const totalFiles = processedFiles.length; + const totalCharacters = output.length; + + const fileCharCounts: Record = {}; + const fileTokenCounts: Record = {}; + for (const file of fileMetrics) { + fileCharCounts[file.path] = file.charCount; + fileTokenCounts[file.path] = file.tokenCount; + } + + return { + totalFiles, + totalCharacters, + totalTokens, + fileCharCounts, + fileTokenCounts, + }; }; diff --git a/src/core/metrics/calculateOutputMetrics.ts b/src/core/metrics/calculateOutputMetrics.ts new file mode 100644 index 00000000..742299a2 --- /dev/null +++ b/src/core/metrics/calculateOutputMetrics.ts @@ -0,0 +1,45 @@ +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { Piscina } from 'piscina'; +import type { TiktokenEncoding } from 'tiktoken'; +import { logger } from '../../shared/logger.js'; +import type { OutputMetricsTask } from './workers/outputMetricsWorker.js'; + +const initTaskRunner = () => { + const pool = new Piscina({ + filename: new URL('./workers/outputMetricsWorker.js', import.meta.url).href, + // Set minThreads and maxThreads to 1 + minThreads: 1, + maxThreads: 1, + idleTimeout: 5000, + }); + + return (task: OutputMetricsTask) => pool.run(task); +}; + +export const calculateOutputMetrics = async ( + content: string, + encoding: TiktokenEncoding, + path?: string, + deps = { + initTaskRunner, + }, +): Promise => { + const runTask = deps.initTaskRunner(); + + try { + logger.trace(`Starting output token count for ${path}`); + const startTime = process.hrtime.bigint(); + + const result = await runTask({ content, encoding, path }); + + const endTime = process.hrtime.bigint(); + const duration = Number(endTime - startTime) / 1e6; + logger.trace(`Output token count completed in ${duration.toFixed(2)}ms`); + + return result; + } catch (error) { + logger.error('Error during token count:', error); + throw error; + } +}; diff --git a/src/core/metrics/workers/metricsWorker.ts b/src/core/metrics/workers/fileMetricsWorker.ts similarity index 70% rename from src/core/metrics/workers/metricsWorker.ts rename to src/core/metrics/workers/fileMetricsWorker.ts index 77d9c44e..e9cf4748 100644 --- a/src/core/metrics/workers/metricsWorker.ts +++ b/src/core/metrics/workers/fileMetricsWorker.ts @@ -2,9 +2,9 @@ import type { TiktokenEncoding } from 'tiktoken'; import { logger } from '../../../shared/logger.js'; import type { ProcessedFile } from '../../file/fileTypes.js'; import { TokenCounter } from '../../tokenCount/tokenCount.js'; -import type { FileMetrics } from '../calculateIndividualFileMetrics.js'; +import type { FileMetrics } from './types.js'; -interface MetricsWorkerInput { +export interface FileMetricsTask { file: ProcessedFile; index: number; totalFiles: number; @@ -27,18 +27,25 @@ const getTokenCounter = (encoding: TiktokenEncoding): TokenCounter => { /** * Worker thread function that calculates metrics for a single file */ -export default async ({ file, index, totalFiles, encoding }: MetricsWorkerInput): Promise => { +export default async ({ file, encoding }: FileMetricsTask): Promise => { const processStartAt = process.hrtime.bigint(); - const counter = getTokenCounter(encoding); - const charCount = file.content.length; - const tokenCount = counter.countTokens(file.content, file.path); - const processEndAt = process.hrtime.bigint(); logger.trace( `Calculated metrics for ${file.path}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, ); + return calculateIndividualFileMetrics(file, encoding); +}; + +export const calculateIndividualFileMetrics = async ( + file: ProcessedFile, + encoding: TiktokenEncoding, +): Promise => { + const charCount = file.content.length; + const tokenCounter = getTokenCounter(encoding); + const tokenCount = tokenCounter.countTokens(file.content, file.path); + return { path: file.path, charCount, tokenCount }; }; diff --git a/src/core/metrics/workers/outputMetricsWorker.ts b/src/core/metrics/workers/outputMetricsWorker.ts new file mode 100644 index 00000000..7ed5a6fe --- /dev/null +++ b/src/core/metrics/workers/outputMetricsWorker.ts @@ -0,0 +1,50 @@ +// src/core/metrics/workers/tokenCountWorker.ts + +import type { TiktokenEncoding } from 'tiktoken'; +import { logger } from '../../../shared/logger.js'; +import { TokenCounter } from '../../tokenCount/tokenCount.js'; + +export interface OutputMetricsTask { + content: string; + encoding: TiktokenEncoding; + path?: string; +} + +// Worker-level singleton for TokenCounter +let tokenCounter: TokenCounter | null = null; + +/** + * Get or create TokenCounter instance + */ +const getTokenCounter = (encoding: TiktokenEncoding): TokenCounter => { + if (!tokenCounter) { + tokenCounter = new TokenCounter(encoding); + } + return tokenCounter; +}; + +/** + * Worker thread function that counts tokens for content + */ +export default async ({ content, encoding, path }: OutputMetricsTask): Promise => { + const processStartAt = process.hrtime.bigint(); + const counter = getTokenCounter(encoding); + const tokenCount = counter.countTokens(content, path); + + const processEndAt = process.hrtime.bigint(); + if (path) { + logger.trace( + `Counted tokens for ${path}. Count: ${tokenCount}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, + ); + } + + return tokenCount; +}; + +// Cleanup when worker is terminated +process.on('exit', () => { + if (tokenCounter) { + tokenCounter.free(); + tokenCounter = null; + } +}); diff --git a/src/core/metrics/workers/types.ts b/src/core/metrics/workers/types.ts new file mode 100644 index 00000000..60f2d759 --- /dev/null +++ b/src/core/metrics/workers/types.ts @@ -0,0 +1,5 @@ +export interface FileMetrics { + path: string; + charCount: number; + tokenCount: number; +} diff --git a/src/core/packager.ts b/src/core/packager.ts index 3c031ada..ce929b8d 100644 --- a/src/core/packager.ts +++ b/src/core/packager.ts @@ -38,7 +38,7 @@ export const pack = async ( const { filePaths } = await deps.searchFiles(rootDir, config); progressCallback('Collecting files...'); - const rawFiles = await deps.collectFiles(filePaths, rootDir); + const rawFiles = await deps.collectFiles(filePaths, rootDir, progressCallback); const { safeFilePaths, safeRawFiles, suspiciousFilesResults } = await deps.validateFileSafety( rawFiles, diff --git a/src/core/security/runSecurityCheckIfEnabled.ts b/src/core/security/runSecurityCheckIfEnabled.ts deleted file mode 100644 index 2b997159..00000000 --- a/src/core/security/runSecurityCheckIfEnabled.ts +++ /dev/null @@ -1,19 +0,0 @@ -import type { RepomixConfigMerged } from '../../config/configSchema.js'; -import type { RepomixProgressCallback } from '../../shared/types.js'; -import type { RawFile } from '../file/fileTypes.js'; -import { type SuspiciousFileResult, runSecurityCheck } from './securityCheck.js'; - -export const runSecurityCheckIfEnabled = async ( - rawFiles: RawFile[], - config: RepomixConfigMerged, - progressCallback: RepomixProgressCallback, - deps = { - runSecurityCheck, - }, -): Promise => { - if (config.security.enableSecurityCheck) { - progressCallback('Running security check...'); - return await deps.runSecurityCheck(rawFiles, progressCallback); - } - return []; -}; diff --git a/src/core/security/securityCheck.ts b/src/core/security/securityCheck.ts index 36690b2d..119deaa4 100644 --- a/src/core/security/securityCheck.ts +++ b/src/core/security/securityCheck.ts @@ -6,106 +6,65 @@ import { logger } from '../../shared/logger.js'; import { getWorkerThreadCount } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { RawFile } from '../file/fileTypes.js'; +import type { SecurityCheckTask } from './workers/securityCheckWorker.js'; export interface SuspiciousFileResult { filePath: string; messages: string[]; } -// Worker pool singleton -let workerPool: Piscina | null = null; +const initTaskRunner = (numOfTasks: number) => { + const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); + logger.trace(`Initializing worker pool with min=${minThreads}, max=${maxThreads} threads`); -/** - * Initialize the worker pool - */ -const initializeWorkerPool = (): Piscina => { - if (workerPool) { - return workerPool; - } - - const { minThreads, maxThreads } = getWorkerThreadCount(); - logger.trace(`Initializing security check worker pool with min=${minThreads}, max=${maxThreads} threads`); - - workerPool = new Piscina({ - filename: path.resolve(path.dirname(fileURLToPath(import.meta.url)), './workers/securityCheckWorker.js'), + const pool = new Piscina({ + filename: new URL('./workers/securityCheckWorker.js', import.meta.url).href, minThreads, maxThreads, idleTimeout: 5000, }); - return workerPool; + return (task: SecurityCheckTask) => pool.run(task); }; -/** - * Cleanup worker pool resources - */ -export const cleanupWorkerPool = async (): Promise => { - if (workerPool) { - logger.trace('Cleaning up security check worker pool'); - await workerPool.destroy(); - workerPool = null; - } -}; - -/** - * Process files in chunks to maintain progress visibility - */ -async function processFileChunks( - pool: Piscina, - tasks: Array<{ filePath: string; content: string }>, - progressCallback: RepomixProgressCallback, - chunkSize = 100, -): Promise { - const results: SuspiciousFileResult[] = []; - let completedTasks = 0; - const totalTasks = tasks.length; - - // Process files in chunks - for (let i = 0; i < tasks.length; i += chunkSize) { - const chunk = tasks.slice(i, i + chunkSize); - const chunkPromises = chunk.map((task) => { - return pool.run(task).then((result) => { - completedTasks++; - progressCallback(`Running security check... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); - return result; - }); - }); - - const chunkResults = await Promise.all(chunkPromises); - results.push(...chunkResults.filter((result): result is SuspiciousFileResult => result !== null)); - - // Allow event loop to process other tasks - await new Promise((resolve) => setTimeout(resolve, 0)); - } - - return results; -} - -/** - * Run security checks on multiple files in parallel using worker threads - */ export const runSecurityCheck = async ( rawFiles: RawFile[], progressCallback: RepomixProgressCallback = () => {}, + deps = { + initTaskRunner, + }, ): Promise => { - const pool = initializeWorkerPool(); - const tasks = rawFiles.map((file) => ({ - filePath: file.path, - content: file.content, - })); + const runTask = deps.initTaskRunner(rawFiles.length); + const tasks = rawFiles.map( + (file) => + ({ + filePath: file.path, + content: file.content, + }) satisfies SecurityCheckTask, + ); try { logger.trace(`Starting security check for ${tasks.length} files`); const startTime = process.hrtime.bigint(); - // Process files in chunks - const results = await processFileChunks(pool, tasks, progressCallback); + let completedTasks = 0; + const totalTasks = tasks.length; + + const results = await Promise.all( + tasks.map((task) => + runTask(task).then((result) => { + completedTasks++; + progressCallback(`Running security check... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); + return result; + }), + ), + ); const endTime = process.hrtime.bigint(); const duration = Number(endTime - startTime) / 1e6; logger.trace(`Security check completed in ${duration.toFixed(2)}ms`); - return results; + return results.filter((result): result is SuspiciousFileResult => result !== null); } catch (error) { logger.error('Error during security check:', error); throw error; diff --git a/src/core/security/validateFileSafety.ts b/src/core/security/validateFileSafety.ts index 8c0a345b..ec678134 100644 --- a/src/core/security/validateFileSafety.ts +++ b/src/core/security/validateFileSafety.ts @@ -3,7 +3,7 @@ import { logger } from '../../shared/logger.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { RawFile } from '../file/fileTypes.js'; import { filterOutUntrustedFiles } from './filterOutUntrustedFiles.js'; -import { runSecurityCheckIfEnabled } from './runSecurityCheckIfEnabled.js'; +import { type SuspiciousFileResult, runSecurityCheck } from './securityCheck.js'; // marks which files are suspicious and which are safe export const validateFileSafety = async ( @@ -11,11 +11,17 @@ export const validateFileSafety = async ( progressCallback: RepomixProgressCallback, config: RepomixConfigMerged, deps = { - runSecurityCheckIfEnabled, + runSecurityCheck, filterOutUntrustedFiles, }, ) => { - const suspiciousFilesResults = await deps.runSecurityCheckIfEnabled(rawFiles, config, progressCallback); + let suspiciousFilesResults: SuspiciousFileResult[] = []; + + if (config.security.enableSecurityCheck) { + progressCallback('Running security check...'); + suspiciousFilesResults = await deps.runSecurityCheck(rawFiles, progressCallback); + } + const safeRawFiles = deps.filterOutUntrustedFiles(rawFiles, suspiciousFilesResults); const safeFilePaths = safeRawFiles.map((file) => file.path); logger.trace('Safe files count:', safeRawFiles.length); diff --git a/src/core/security/workers/securityCheckWorker.ts b/src/core/security/workers/securityCheckWorker.ts index 086af9ae..4b3183d8 100644 --- a/src/core/security/workers/securityCheckWorker.ts +++ b/src/core/security/workers/securityCheckWorker.ts @@ -3,47 +3,7 @@ import { creator } from '@secretlint/secretlint-rule-preset-recommend'; import type { SecretLintCoreConfig, SecretLintCoreResult } from '@secretlint/types'; import { logger } from '../../../shared/logger.js'; -/** - * Create SecretLint configuration for the worker - */ -export const createSecretLintConfig = (): SecretLintCoreConfig => ({ - rules: [ - { - id: '@secretlint/secretlint-rule-preset-recommend', - rule: creator, - }, - ], -}); - -/** - * Run SecretLint check on a single file - */ -export const runSecretLint = async ( - filePath: string, - content: string, - config: SecretLintCoreConfig, -): Promise => { - const result = await lintSource({ - source: { - filePath: filePath, - content: content, - ext: filePath.split('.').pop() || '', - contentType: 'text', - }, - options: { - config: config, - }, - }); - - if (result.messages.length > 0) { - logger.trace(`Found ${result.messages.length} issues in ${filePath}`); - logger.trace(result.messages.map((message) => ` - ${message.message}`).join('\n')); - } - - return result; -}; - -interface SecurityCheckWorkerInput { +export interface SecurityCheckTask { filePath: string; content: string; } @@ -51,7 +11,7 @@ interface SecurityCheckWorkerInput { /** * Worker thread function that checks a single file for security issues */ -export default async ({ filePath, content }: SecurityCheckWorkerInput) => { +export default async ({ filePath, content }: SecurityCheckTask) => { const config = createSecretLintConfig(); const processStartAt = process.hrtime.bigint(); @@ -76,3 +36,37 @@ export default async ({ filePath, content }: SecurityCheckWorkerInput) => { throw error; } }; + +export const runSecretLint = async ( + filePath: string, + content: string, + config: SecretLintCoreConfig, +): Promise => { + const result = await lintSource({ + source: { + filePath: filePath, + content: content, + ext: filePath.split('.').pop() || '', + contentType: 'text', + }, + options: { + config: config, + }, + }); + + if (result.messages.length > 0) { + logger.trace(`Found ${result.messages.length} issues in ${filePath}`); + logger.trace(result.messages.map((message) => ` - ${message.message}`).join('\n')); + } + + return result; +}; + +export const createSecretLintConfig = (): SecretLintCoreConfig => ({ + rules: [ + { + id: '@secretlint/secretlint-rule-preset-recommend', + rule: creator, + }, + ], +}); diff --git a/src/shared/processConcurrency.ts b/src/shared/processConcurrency.ts index 272de726..d4c88521 100644 --- a/src/shared/processConcurrency.ts +++ b/src/shared/processConcurrency.ts @@ -10,10 +10,16 @@ export const getProcessConcurrency = (): number => { /** * Get the minimum and maximum number of threads for worker pools */ -export const getWorkerThreadCount = (): { minThreads: number; maxThreads: number } => { +export const getWorkerThreadCount = (numOfTasks: number): { minThreads: number; maxThreads: number } => { const processConcurrency = getProcessConcurrency(); + + const minThreads = 1; + + // Limit max threads based on number of tasks + const maxThreads = Math.max(minThreads, Math.min(processConcurrency, Math.floor(numOfTasks / 100))); + return { - minThreads: Math.max(1, Math.floor(processConcurrency / 2)), - maxThreads: processConcurrency, + minThreads, + maxThreads, }; }; diff --git a/tests/cli/actions/remoteAction.test.ts b/tests/cli/actions/remoteAction.test.ts index 5185bfb7..88f644f7 100644 --- a/tests/cli/actions/remoteAction.test.ts +++ b/tests/cli/actions/remoteAction.test.ts @@ -1,12 +1,15 @@ import * as fs from 'node:fs/promises'; import path from 'node:path'; import { beforeEach, describe, expect, test, vi } from 'vitest'; +import type { DefaultActionRunnerResult } from '../../../src/cli/actions/defaultAction.js'; import { copyOutputToCurrentDirectory, formatRemoteValueToUrl, isValidRemoteValue, runRemoteAction, } from '../../../src/cli/actions/remoteAction.js'; +import type { SuspiciousFileResult } from '../../../src/core/security/securityCheck.js'; +import { createMockConfig } from '../../testing/testUtils.js'; vi.mock('node:fs/promises', async (importOriginal) => { const actual = await importOriginal(); @@ -33,6 +36,19 @@ describe('remoteAction functions', () => { execGitShallowClone: async (url: string, directory: string) => { await fs.writeFile(path.join(directory, 'README.md'), 'Hello, world!'); }, + runDefaultAction: async () => { + return { + packResult: { + totalFiles: 1, + totalCharacters: 1, + totalTokens: 1, + fileCharCounts: {}, + fileTokenCounts: {}, + suspiciousFilesResults: [], + }, + config: createMockConfig(), + } satisfies DefaultActionRunnerResult; + }, }, ); }); diff --git a/tests/core/file/fileCollect.test.ts b/tests/core/file/fileCollect.test.ts index 0719b384..8c7536ed 100644 --- a/tests/core/file/fileCollect.test.ts +++ b/tests/core/file/fileCollect.test.ts @@ -5,7 +5,10 @@ import iconv from 'iconv-lite'; import { isBinary } from 'istextorbinary'; import jschardet from 'jschardet'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { MAX_FILE_SIZE, collectFiles } from '../../../src/core/file/fileCollect.js'; +import { collectFiles } from '../../../src/core/file/fileCollect.js'; +import type { FileCollectTask } from '../../../src/core/file/workers/fileCollectWorker.js'; +import { MAX_FILE_SIZE } from '../../../src/core/file/workers/fileCollectWorker.js'; +import fileCollectWorker from '../../../src/core/file/workers/fileCollectWorker.js'; import { logger } from '../../../src/shared/logger.js'; vi.mock('node:fs/promises'); @@ -14,6 +17,12 @@ vi.mock('jschardet'); vi.mock('iconv-lite'); vi.mock('../../../src/shared/logger'); +const mockInitTaskRunner = () => { + return async (task: FileCollectTask) => { + return await fileCollectWorker(task); + }; +}; + describe('fileCollect', () => { beforeEach(() => { vi.resetAllMocks(); @@ -38,7 +47,9 @@ describe('fileCollect', () => { vi.mocked(jschardet.detect).mockReturnValue({ encoding: 'utf-8', confidence: 0.99 }); vi.mocked(iconv.decode).mockReturnValue('decoded content'); - const result = await collectFiles(mockFilePaths, mockRootDir); + const result = await collectFiles(mockFilePaths, mockRootDir, () => {}, { + initTaskRunner: mockInitTaskRunner, + }); expect(result).toEqual([ { path: 'file1.txt', content: 'decoded content' }, @@ -57,7 +68,9 @@ describe('fileCollect', () => { vi.mocked(jschardet.detect).mockReturnValue({ encoding: 'utf-8', confidence: 0.99 }); vi.mocked(iconv.decode).mockReturnValue('decoded content'); - const result = await collectFiles(mockFilePaths, mockRootDir); + const result = await collectFiles(mockFilePaths, mockRootDir, () => {}, { + initTaskRunner: mockInitTaskRunner, + }); expect(result).toEqual([{ path: 'text.txt', content: 'decoded content' }]); expect(logger.debug).toHaveBeenCalledWith(`Skipping binary file: ${path.resolve('/root/binary.bin')}`); @@ -84,7 +97,9 @@ describe('fileCollect', () => { vi.mocked(jschardet.detect).mockReturnValue({ encoding: 'utf-8', confidence: 0.99 }); vi.mocked(iconv.decode).mockReturnValue('decoded content'); - const result = await collectFiles(mockFilePaths, mockRootDir); + const result = await collectFiles(mockFilePaths, mockRootDir, () => {}, { + initTaskRunner: mockInitTaskRunner, + }); expect(result).toEqual([{ path: 'normal.txt', content: 'decoded content' }]); expect(logger.log).toHaveBeenCalledWith('⚠️ Large File Warning:'); @@ -105,7 +120,9 @@ describe('fileCollect', () => { vi.mocked(isBinary).mockReturnValue(false); vi.mocked(fs.readFile).mockRejectedValue(new Error('Read error')); - const result = await collectFiles(mockFilePaths, mockRootDir); + const result = await collectFiles(mockFilePaths, mockRootDir, () => {}, { + initTaskRunner: mockInitTaskRunner, + }); expect(result).toEqual([]); expect(logger.warn).toHaveBeenCalledWith( diff --git a/tests/core/file/fileProcess.test.ts b/tests/core/file/fileProcess.test.ts index 0ced59ef..7bce5c8a 100644 --- a/tests/core/file/fileProcess.test.ts +++ b/tests/core/file/fileProcess.test.ts @@ -1,11 +1,19 @@ import { describe, expect, it, vi } from 'vitest'; import { getFileManipulator } from '../../../src/core/file/fileManipulate.js'; -import { processContent, processFiles } from '../../../src/core/file/fileProcess.js'; +import { processFiles } from '../../../src/core/file/fileProcess.js'; import type { RawFile } from '../../../src/core/file/fileTypes.js'; +import { type FileProcessTask, processContent } from '../../../src/core/file/workers/fileProcessWorker.js'; +import fileProcessWorker from '../../../src/core/file/workers/fileProcessWorker.js'; import { createMockConfig } from '../../testing/testUtils.js'; vi.mock('../../../src/core/file/fileManipulate'); +const mockInitTaskRunner = (numOfTasks: number) => { + return async (task: FileProcessTask) => { + return await fileProcessWorker(task); + }; +}; + describe('fileProcess', () => { describe('processFiles', () => { it('should process multiple files', async () => { @@ -25,7 +33,9 @@ describe('fileProcess', () => { removeEmptyLines: (content: string) => content.replace(/^\s*[\r\n]/gm, ''), }); - const result = await processFiles(mockRawFiles, config, () => {}); + const result = await processFiles(mockRawFiles, config, () => {}, { + initTaskRunner: mockInitTaskRunner, + }); expect(result).toEqual([ { path: 'file1.js', content: 'const a = 1;' }, @@ -50,7 +60,7 @@ describe('fileProcess', () => { removeEmptyLines: (content: string) => content.replace(/^\s*[\r\n]/gm, ''), }); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); expect(result).toBe('const a = 1;\nconst b = 2;'); }); @@ -65,7 +75,7 @@ describe('fileProcess', () => { }, }); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); expect(result).toBe(content.trim()); }); @@ -82,7 +92,7 @@ describe('fileProcess', () => { vi.mocked(getFileManipulator).mockReturnValue(null); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); expect(result).toBe(content); }); @@ -98,7 +108,7 @@ describe('fileProcess', () => { }, }); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); expect(result).toBe('1: Line 1\n2: Line 2\n3: Line 3'); }); @@ -114,7 +124,7 @@ describe('fileProcess', () => { }, }); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); expect(result).toBe('Line 1\nLine 2\nLine 3'); }); @@ -130,7 +140,7 @@ describe('fileProcess', () => { }, }); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); expect(result).toBe('1: '); }); @@ -146,7 +156,7 @@ describe('fileProcess', () => { }, }); - const result = await processContent(content, filePath, config); + const result = await processContent({ path: filePath, content }, config); const lines = result.split('\n'); expect(lines[0]).toBe(' 1: Line'); diff --git a/tests/core/metrics/aggregateMetrics.test.ts b/tests/core/metrics/aggregateMetrics.test.ts deleted file mode 100644 index 7ed7dd90..00000000 --- a/tests/core/metrics/aggregateMetrics.test.ts +++ /dev/null @@ -1,57 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import type { ProcessedFile } from '../../../src/core/file/fileTypes.js'; -import { aggregateMetrics } from '../../../src/core/metrics/aggregateMetrics.js'; -import type { FileMetrics } from '../../../src/core/metrics/calculateIndividualFileMetrics.js'; -import type { TokenCounter } from '../../../src/core/tokenCount/tokenCount.js'; - -describe('aggregateMetrics', () => { - it('should aggregate metrics correctly', () => { - const fileMetrics: FileMetrics[] = [ - { path: 'file1.txt', charCount: 100, tokenCount: 10 }, - { path: 'file2.txt', charCount: 200, tokenCount: 20 }, - ]; - const processedFiles: ProcessedFile[] = [ - { path: 'file1.txt', content: 'a' }, - { path: 'file2.txt', content: 'b'.repeat(200) }, - ]; - const output = 'a'.repeat(300); - const tokenCounter = { - countTokens: (content: string) => content.length / 10, - } as TokenCounter; - - const result = aggregateMetrics(fileMetrics, processedFiles, output, tokenCounter); - - expect(result).toEqual({ - totalFiles: 2, - totalCharacters: 300, - totalTokens: 30, - fileCharCounts: { - 'file1.txt': 100, - 'file2.txt': 200, - }, - fileTokenCounts: { - 'file1.txt': 10, - 'file2.txt': 20, - }, - }); - }); - - it('should handle empty file metrics', () => { - const fileMetrics: FileMetrics[] = []; - const processedFiles: ProcessedFile[] = []; - const output = ''; - const tokenCounter = { - countTokens: (content: string) => content.length / 10, - } as TokenCounter; - - const result = aggregateMetrics(fileMetrics, processedFiles, output, tokenCounter); - - expect(result).toEqual({ - totalFiles: 0, - totalCharacters: 0, - totalTokens: 0, - fileCharCounts: {}, - fileTokenCounts: {}, - }); - }); -}); diff --git a/tests/core/metrics/calculateAllFileMetrics.test.ts b/tests/core/metrics/calculateAllFileMetrics.test.ts index cbf3fed7..6d46505a 100644 --- a/tests/core/metrics/calculateAllFileMetrics.test.ts +++ b/tests/core/metrics/calculateAllFileMetrics.test.ts @@ -1,40 +1,37 @@ -import { type Mock, describe, expect, it, vi } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; import type { ProcessedFile } from '../../../src/core/file/fileTypes.js'; import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js'; -import { calculateIndividualFileMetrics } from '../../../src/core/metrics/calculateIndividualFileMetrics.js'; -import type { TokenCounter } from '../../../src/core/tokenCount/tokenCount.js'; +import { + type FileMetricsTask, + calculateIndividualFileMetrics, +} from '../../../src/core/metrics/workers/fileMetricsWorker.js'; import type { RepomixProgressCallback } from '../../../src/shared/types.js'; -vi.mock('../../../src/core/metrics/calculateIndividualFileMetrics.js'); vi.mock('../../shared/processConcurrency', () => ({ getProcessConcurrency: () => 1, })); +const mockInitTaskRunner = (numOfTasks: number) => { + return async (task: FileMetricsTask) => { + return await calculateIndividualFileMetrics(task.file, task.encoding); + }; +}; + describe('calculateAllFileMetrics', () => { it('should calculate metrics for all files', async () => { const processedFiles: ProcessedFile[] = [ { path: 'file1.txt', content: 'a'.repeat(100) }, { path: 'file2.txt', content: 'b'.repeat(200) }, ]; - const tokenCounter = {} as TokenCounter; const progressCallback: RepomixProgressCallback = vi.fn(); - (calculateIndividualFileMetrics as Mock).mockImplementation( - (file, _index, _totalFiles, _tokenCounter, _progressCallback) => { - return { - path: file.path, - charCount: file.content.length, - tokenCount: file.content.length / 10, - }; - }, - ); - - const result = await calculateAllFileMetrics(processedFiles, tokenCounter, progressCallback); + const result = await calculateAllFileMetrics(processedFiles, 'o200k_base', progressCallback, { + initTaskRunner: mockInitTaskRunner, + }); - expect(calculateIndividualFileMetrics).toHaveBeenCalledTimes(2); expect(result).toEqual([ - { path: 'file1.txt', charCount: 100, tokenCount: 10 }, - { path: 'file2.txt', charCount: 200, tokenCount: 20 }, + { path: 'file1.txt', charCount: 100, tokenCount: 13 }, + { path: 'file2.txt', charCount: 200, tokenCount: 50 }, ]); }); }); diff --git a/tests/core/metrics/calculateIndividualFileMetrics.test.ts b/tests/core/metrics/calculateIndividualFileMetrics.test.ts deleted file mode 100644 index be192f06..00000000 --- a/tests/core/metrics/calculateIndividualFileMetrics.test.ts +++ /dev/null @@ -1,28 +0,0 @@ -import pc from 'picocolors'; -import { describe, expect, it, vi } from 'vitest'; -import type { ProcessedFile } from '../../../src/core/file/fileTypes.js'; -import { calculateIndividualFileMetrics } from '../../../src/core/metrics/calculateIndividualFileMetrics.js'; -import type { TokenCounter } from '../../../src/core/tokenCount/tokenCount.js'; -import type { RepomixProgressCallback } from '../../../src/shared/types.js'; - -describe('calculateIndividualFileMetrics', () => { - it('should calculate file metrics and report progress', async () => { - const file: ProcessedFile = { path: 'file1.txt', content: 'a'.repeat(100) }; - const index = 0; - const totalFiles = 1; - const tokenCounter = { - countTokens: vi.fn().mockReturnValue(10), - } as unknown as TokenCounter; - const progressCallback: RepomixProgressCallback = vi.fn(); - - const result = await calculateIndividualFileMetrics(file, index, totalFiles, tokenCounter, progressCallback); - - expect(tokenCounter.countTokens).toHaveBeenCalledWith(file.content, file.path); - expect(progressCallback).toHaveBeenCalledWith(`Calculating metrics... (1/1) ${pc.dim('file1.txt')}`); - expect(result).toEqual({ - path: 'file1.txt', - charCount: 100, - tokenCount: 10, - }); - }); -}); diff --git a/tests/core/metrics/calculateMetrics.test.ts b/tests/core/metrics/calculateMetrics.test.ts index 52bb5a4f..3bbd3168 100644 --- a/tests/core/metrics/calculateMetrics.test.ts +++ b/tests/core/metrics/calculateMetrics.test.ts @@ -1,6 +1,5 @@ import { type Mock, describe, expect, it, vi } from 'vitest'; import type { ProcessedFile } from '../../../src/core/file/fileTypes.js'; -import { aggregateMetrics } from '../../../src/core/metrics/aggregateMetrics.js'; import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllFileMetrics.js'; import { calculateMetrics } from '../../../src/core/metrics/calculateMetrics.js'; import { TokenCounter } from '../../../src/core/tokenCount/tokenCount.js'; @@ -45,16 +44,16 @@ describe('calculateMetrics', () => { 'file2.txt': 20, }, }; - (aggregateMetrics as unknown as Mock).mockReturnValue(aggregatedResult); const config = createMockConfig(); - const result = await calculateMetrics(processedFiles, output, progressCallback, config); + const result = await calculateMetrics(processedFiles, output, progressCallback, config, { + calculateAllFileMetrics, + calculateOutputMetrics: () => Promise.resolve(30), + }); expect(progressCallback).toHaveBeenCalledWith('Calculating metrics...'); - expect(calculateAllFileMetrics).toHaveBeenCalledWith(processedFiles, mockTokenCounter, progressCallback); - expect(aggregateMetrics).toHaveBeenCalledWith(fileMetrics, processedFiles, output, mockTokenCounter); - expect(mockTokenCounter.free).toHaveBeenCalled(); + expect(calculateAllFileMetrics).toHaveBeenCalledWith(processedFiles, 'o200k_base', progressCallback); expect(result).toEqual(aggregatedResult); }); }); diff --git a/tests/core/packager.test.ts b/tests/core/packager.test.ts index f35c7ae6..c0705381 100644 --- a/tests/core/packager.test.ts +++ b/tests/core/packager.test.ts @@ -72,7 +72,7 @@ describe('packager', () => { const result = await pack('root', mockConfig, progressCallback, mockDeps); expect(mockDeps.searchFiles).toHaveBeenCalledWith('root', mockConfig); - expect(mockDeps.collectFiles).toHaveBeenCalledWith(mockFilePaths, 'root'); + expect(mockDeps.collectFiles).toHaveBeenCalledWith(mockFilePaths, 'root', progressCallback); expect(mockDeps.validateFileSafety).toHaveBeenCalled(); expect(mockDeps.processFiles).toHaveBeenCalled(); expect(mockDeps.writeOutputToDisk).toHaveBeenCalled(); diff --git a/tests/core/security/runSecurityCheckIfEnabled.test.ts b/tests/core/security/runSecurityCheckIfEnabled.test.ts deleted file mode 100644 index e6092e5b..00000000 --- a/tests/core/security/runSecurityCheckIfEnabled.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { RepomixConfigMerged } from '../../../src/config/configSchema.js'; -import type { RawFile } from '../../../src/core/file/fileTypes.js'; -import { runSecurityCheckIfEnabled } from '../../../src/core/security/runSecurityCheckIfEnabled.js'; -import type { SuspiciousFileResult } from '../../../src/core/security/securityCheck.js'; -import type { RepomixProgressCallback } from '../../../src/shared/types.js'; - -describe('runSecurityCheckIfEnabled', () => { - it('should run security check if enabled in config', async () => { - const rawFiles: RawFile[] = [ - { path: 'file1.txt', content: 'contents1' }, - { path: 'file2.txt', content: 'contents2' }, - ]; - const config: RepomixConfigMerged = { - security: { enableSecurityCheck: true }, - } as RepomixConfigMerged; - const progressCallback: RepomixProgressCallback = vi.fn(); - const checkSecurity = vi.fn().mockResolvedValue([{ filePath: 'file1.txt' }] as SuspiciousFileResult[]); - - const result = await runSecurityCheckIfEnabled(rawFiles, config, progressCallback, { - runSecurityCheck: checkSecurity, - }); - - expect(progressCallback).toHaveBeenCalledWith('Running security check...'); - expect(checkSecurity).toHaveBeenCalledWith(rawFiles, progressCallback); - expect(result).toEqual([{ filePath: 'file1.txt' }]); - }); - - it('should not run security check if disabled in config', async () => { - const rawFiles: RawFile[] = [ - { path: 'file1.txt', content: 'contents1' }, - { path: 'file2.txt', content: 'contents2' }, - ]; - const config: RepomixConfigMerged = { - security: { enableSecurityCheck: false }, - } as RepomixConfigMerged; - const progressCallback: RepomixProgressCallback = vi.fn(); - const checkSecurity = vi.fn(); - - const result = await runSecurityCheckIfEnabled(rawFiles, config, progressCallback, { - runSecurityCheck: checkSecurity, - }); - - expect(progressCallback).not.toHaveBeenCalled(); - expect(checkSecurity).not.toHaveBeenCalled(); - expect(result).toEqual([]); - }); -}); diff --git a/tests/core/security/validateFileSafety.test.ts b/tests/core/security/validateFileSafety.test.ts index 3edce75f..3ee23c40 100644 --- a/tests/core/security/validateFileSafety.test.ts +++ b/tests/core/security/validateFileSafety.test.ts @@ -21,13 +21,13 @@ describe('validateFileSafety', () => { { filePath: 'file2.txt', messages: ['something suspicious.'] }, ]; const deps = { - runSecurityCheckIfEnabled: vi.fn().mockResolvedValue(suspiciousFilesResults), + runSecurityCheck: vi.fn().mockResolvedValue(suspiciousFilesResults), filterOutUntrustedFiles: vi.fn().mockReturnValue(safeRawFiles), }; const result = await validateFileSafety(rawFiles, progressCallback, config, deps); - expect(deps.runSecurityCheckIfEnabled).toHaveBeenCalledWith(rawFiles, config, progressCallback); + expect(deps.runSecurityCheck).toHaveBeenCalledWith(rawFiles, progressCallback); expect(deps.filterOutUntrustedFiles).toHaveBeenCalledWith(rawFiles, suspiciousFilesResults); expect(result).toEqual({ safeRawFiles, diff --git a/tests/integration-tests/packager.test.ts b/tests/integration-tests/packager.test.ts index e361f14d..4c33ed49 100644 --- a/tests/integration-tests/packager.test.ts +++ b/tests/integration-tests/packager.test.ts @@ -5,13 +5,30 @@ import process from 'node:process'; import { afterEach, beforeEach, describe, expect, test } from 'vitest'; import { loadFileConfig, mergeConfigs } from '../../src/config/configLoad.js'; import type { RepomixConfigFile, RepomixConfigMerged, RepomixOutputStyle } from '../../src/config/configSchema.js'; +import { collectFiles } from '../../src/core/file/fileCollect.js'; +import { searchFiles } from '../../src/core/file/fileSearch.js'; +import type { ProcessedFile } from '../../src/core/file/fileTypes.js'; +import type { FileCollectTask } from '../../src/core/file/workers/fileCollectWorker.js'; +import fileCollectWorker from '../../src/core/file/workers/fileCollectWorker.js'; +import fileProcessWorker from '../../src/core/file/workers/fileProcessWorker.js'; +import { generateOutput } from '../../src/core/output/outputGenerate.js'; import { pack } from '../../src/core/packager.js'; +import { copyToClipboardIfEnabled } from '../../src/core/packager/copyToClipboardIfEnabled.js'; +import { writeOutputToDisk } from '../../src/core/packager/writeOutputToDisk.js'; +import { filterOutUntrustedFiles } from '../../src/core/security/filterOutUntrustedFiles.js'; +import { validateFileSafety } from '../../src/core/security/validateFileSafety.js'; import { isWindows } from '../testing/testUtils.js'; const fixturesDir = path.join(__dirname, 'fixtures', 'packager'); const inputsDir = path.join(fixturesDir, 'inputs'); const outputsDir = path.join(fixturesDir, 'outputs'); +const mockCollectFileInitTaskRunner = () => { + return async (task: FileCollectTask) => { + return await fileCollectWorker(task); + }; +}; + describe.runIf(!isWindows)('packager integration', () => { const testCases = [ { desc: 'simple plain style', input: 'simple-project', output: 'simple-project-output.txt', config: {} }, @@ -50,7 +67,51 @@ describe.runIf(!isWindows)('packager integration', () => { }); // Run the pack function - await pack(inputDir, mergedConfig); + await pack(inputDir, mergedConfig, () => {}, { + searchFiles, + collectFiles: (filePaths, rootDir, progressCallback) => { + return collectFiles(filePaths, rootDir, progressCallback, { + initTaskRunner: mockCollectFileInitTaskRunner, + }); + }, + processFiles: async (rawFiles, config, progressCallback) => { + const processedFiles: ProcessedFile[] = []; + for (const rawFile of rawFiles) { + processedFiles.push(await fileProcessWorker({ rawFile, config })); + } + return processedFiles; + }, + generateOutput, + validateFileSafety: (rawFiles, progressCallback, config) => { + return validateFileSafety(rawFiles, progressCallback, config, { + runSecurityCheck: async () => [], + filterOutUntrustedFiles, + }); + }, + writeOutputToDisk, + copyToClipboardIfEnabled, + calculateMetrics: async (processedFiles, output, progressCallback, config) => { + return { + totalFiles: processedFiles.length, + totalCharacters: processedFiles.reduce((acc, file) => acc + file.content.length, 0), + totalTokens: processedFiles.reduce((acc, file) => acc + file.content.split(/\s+/).length, 0), + fileCharCounts: processedFiles.reduce( + (acc, file) => { + acc[file.path] = file.content.length; + return acc; + }, + {} as Record, + ), + fileTokenCounts: processedFiles.reduce( + (acc, file) => { + acc[file.path] = file.content.split(/\s+/).length; + return acc; + }, + {} as Record, + ), + }; + }, + }); // Read the actual and expected outputs let actualOutput = await fs.readFile(actualOutputPath, 'utf-8');