Skip to content

Commit 23c5271

Browse files
authored
Merge pull request #4 from yamadashy/feature/comment-remove-option
feat: Add comment remove feature
2 parents 52948c5 + dbbfce5 commit 23c5271

13 files changed

+592
-11
lines changed

README.md

+44-2
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@
66

77
Repopack is a powerful tool that packs your entire repository into a single, AI-friendly file. Perfect for when you need to feed your codebase to Large Language Models (LLMs) or other AI tools.
88

9+
10+
911
## 🚀 Features
1012

1113
- **AI-Optimized**: Formats your codebase in a way that's easy for AI to understand and process.
1214
- **Simple to Use**: Just one command to pack your entire repository.
1315
- **Customizable**: Easily configure what to include or exclude.
1416
- **Git-Aware**: Automatically respects your .gitignore files.
1517

18+
19+
1620
## 🛠 Installation
1721

1822
You can install Repopack globally using npm:
@@ -33,6 +37,8 @@ Alternatively, you can use npx to run Repopack without installing it:
3337
npx repopack
3438
```
3539

40+
41+
3642
## 📊 Usage
3743

3844
To pack your entire repository:
@@ -81,6 +87,8 @@ repopack -c ./custom-config.json
8187
npx repopack src
8288
```
8389

90+
91+
8492
## ⚙️ Configuration
8593

8694
Create a `repopack.config.json` file in your project root for custom configurations. Here's an explanation of the configuration options:
@@ -89,6 +97,7 @@ Create a `repopack.config.json` file in your project root for custom configurati
8997
|--------|-------------|---------|
9098
|`output.filePath`| The name of the output file | `"repopack-output.txt"` |
9199
|`output.headerText`| Custom text to include in the file header |`null`|
100+
|`output.removeComments`| Whether to remove comments from supported file types. Suppurts python | `false` |
92101
|`ignore.useDefaultPatterns`| Whether to use default ignore patterns |`true`|
93102
|`ignore.customPatterns`| Additional patterns to ignore |`[]`|
94103

@@ -97,8 +106,9 @@ Example configuration:
97106
```json
98107
{
99108
"output": {
100-
"filePath": "custom-output.txt",
101-
"headerText": "Custom header information for the packed file."
109+
"filePath": "repopack-output.txt",
110+
"headerText": "Custom header information for the packed file.",
111+
"removeComments": true
102112
},
103113
"ignore": {
104114
"useDefaultPatterns": true,
@@ -119,6 +129,36 @@ Repopack automatically ignores certain files and directories by default:
119129

120130
This ensures that only relevant source code is included in the packed file. You can add additional ignore patterns using the `ignore.customPatterns` configuration option or the `-i` command line flag.
121131

132+
### Comment Removal
133+
134+
When `output.removeComments` is set to `true`, Repopack will attempt to remove comments from supported file types. This feature can help reduce the size of the output file and focus on the essential code content.
135+
136+
Currently supported file types for comment removal:
137+
138+
- HTML (.html)
139+
- CSS (.css, .scss, .sass)
140+
- JavaScript, React (.js, .jsx)
141+
- TypeScript (.ts, .tsx)
142+
- Vue (.vue)
143+
- Svelte (.svelte)
144+
- Python (.py)
145+
- PHP (.php)
146+
- Ruby (.rb)
147+
- C (.c)
148+
- C# (.cs)
149+
- Java (.java)
150+
- Go (.go)
151+
- Rust (.rs)
152+
- Swift (.swift)
153+
- Kotlin (.kt)
154+
- Dart (.dart)
155+
- Shell (.sh)
156+
- YAML (.yml, .yaml)
157+
158+
Note: The comment removal process is designed to be conservative to avoid accidentally removing code. In some complex cases, especially with nested comments or language-specific peculiarities, some comments might be retained.
159+
160+
161+
122162
## 📄 Output Format
123163

124164
Repopack generates a single file with clear separators between different parts of your codebase:
@@ -146,5 +186,7 @@ File: src/utils.js
146186

147187
This format ensures that AI tools can easily distinguish between different files in your codebase.
148188

189+
190+
149191
## 📜 License
150192
MIT

package-lock.json

+17-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,16 @@
5656
"is-binary-path": "^2.1.0",
5757
"jschardet": "^3.1.3",
5858
"log-update": "^6.0.0",
59-
"picocolors": "^1.0.1"
59+
"picocolors": "^1.0.1",
60+
"strip-comments": "^2.0.1"
6061
},
6162
"devDependencies": {
6263
"@eslint/js": "^9.7.0",
6364
"@types/eslint": "~8.56.10",
6465
"@types/eslint__js": "~8.42.3",
6566
"@types/eslint-config-prettier": "~6.11.3",
6667
"@types/node": "^20.14.10",
68+
"@types/strip-comments": "^2.0.4",
6769
"@typescript-eslint/eslint-plugin": "^7.16.0",
6870
"@typescript-eslint/parser": "^7.16.0",
6971
"@vitest/coverage-v8": "^2.0.2",

repopack.config.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
{
22
"output": {
33
"filePath": "repopack-output.txt",
4-
"headerText": "This repository contains the source code for the Repopack tool.\nRepopack is designed to pack repository contents into a single file,\nmaking it easier for AI systems to analyze and process the codebase.\n\nKey Features:\n- Configurable ignore patterns\n- Custom header text support\n- Efficient file processing and packing\n\nPlease refer to the README.md file for more detailed information on usage and configuration.\n"
4+
"headerText": "This repository contains the source code for the Repopack tool.\nRepopack is designed to pack repository contents into a single file,\nmaking it easier for AI systems to analyze and process the codebase.\n\nKey Features:\n- Configurable ignore patterns\n- Custom header text support\n- Efficient file processing and packing\n\nPlease refer to the README.md file for more detailed information on usage and configuration.\n",
5+
"removeComments": false
56
},
67
"ignore": {
78
"useDefaultPatterns": true,

src/config/defaultConfig.ts

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { RepopackConfigDefault } from '../types/index.js';
33
export const defaultConfig: RepopackConfigDefault = {
44
output: {
55
filePath: 'repopack-output.txt',
6+
removeComments: false,
67
},
78
ignore: {
89
useDefaultPatterns: true,

src/core/outputGenerator.ts

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Notes:
6868
- Some files may have been excluded based on .gitignore rules and Repopack's
6969
configuration.
7070
- Binary files are not included in this packed representation.
71+
${config.output.removeComments ? '- Code comments have been removed.\n' : ''}
7172
7273
For more information about Repopack, visit: https://github.com/yamadashy/repopack
7374
`;

src/core/packager.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ async function packDirectory(
8181
const subDirFiles = await packDirectory(fullPath, entryRelativePath, config, ignoreFilter, deps);
8282
packedFiles.push(...subDirFiles);
8383
} else {
84-
const content = await deps.processFile(fullPath);
84+
const content = await deps.processFile(fullPath, config);
8585
if (content) {
8686
packedFiles.push({ path: entryRelativePath, content });
8787
}

src/types/index.ts

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ interface RepopackConfigBase {
22
output?: {
33
filePath?: string;
44
headerText?: string;
5+
removeComments?: boolean;
56
};
67
ignore?: {
78
useDefaultPatterns?: boolean;
@@ -13,6 +14,7 @@ export type RepopackConfigDefault = RepopackConfigBase & {
1314
output: {
1415
filePath: string;
1516
headerText?: string;
17+
removeComments?: boolean;
1618
};
1719
ignore: {
1820
useDefaultPatterns: boolean;
@@ -24,6 +26,7 @@ export type RepopackConfigFile = RepopackConfigBase & {
2426
output?: {
2527
filePath?: string;
2628
headerText?: string;
29+
removeComments?: boolean;
2730
};
2831
ignore?: {
2932
useDefaultPatterns?: boolean;
@@ -35,6 +38,7 @@ export type RepopackConfigCli = RepopackConfigBase & {
3538
output?: {
3639
filePath?: string;
3740
headerText?: string;
41+
removeComments?: boolean;
3842
};
3943
ignore?: {
4044
useDefaultPatterns?: boolean;

src/utils/fileHandler.ts

+18-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,14 @@ import * as fs from 'fs/promises';
22
import isBinaryPath from 'is-binary-path';
33
import jschardet from 'jschardet';
44
import iconv from 'iconv-lite';
5+
import { RepopackConfigMerged } from '../types/index.js';
6+
import { getFileManipulator } from './fileManipulator.js';
57

6-
export async function processFile(filePath: string, fsModule = fs): Promise<string | null> {
8+
export async function processFile(
9+
filePath: string,
10+
config: RepopackConfigMerged,
11+
fsModule = fs,
12+
): Promise<string | null> {
713
// Skip binary files
814
if (isBinaryPath(filePath)) {
915
return null;
@@ -12,13 +18,22 @@ export async function processFile(filePath: string, fsModule = fs): Promise<stri
1218
try {
1319
const buffer = await fsModule.readFile(filePath);
1420
const encoding = jschardet.detect(buffer).encoding || 'utf-8';
15-
const content = iconv.decode(buffer, encoding);
21+
let content = iconv.decode(buffer, encoding);
1622

1723
if (!isValidTextContent(content)) {
1824
return null;
1925
}
2026

21-
return preprocessContent(content);
27+
content = preprocessContent(content);
28+
29+
if (config.output.removeComments) {
30+
const manipulator = getFileManipulator(filePath);
31+
if (manipulator) {
32+
content = manipulator.removeComments(content);
33+
}
34+
}
35+
36+
return content;
2237
} catch (error) {
2338
console.warn(`Error processing file ${filePath}:`, error);
2439
return null;

src/utils/fileManipulator.ts

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import path from 'path';
2+
import strip from 'strip-comments';
3+
4+
interface FileManipulator {
5+
removeComments(content: string): string;
6+
}
7+
8+
function rtrimLines(content: string): string {
9+
return content.replace(/\s+$/gm, '');
10+
}
11+
12+
class StripCommentsManipulator implements FileManipulator {
13+
private language: string;
14+
15+
constructor(language: string) {
16+
this.language = language;
17+
}
18+
19+
removeComments(content: string): string {
20+
let result = strip(content, { language: this.language, preserveNewlines: true });
21+
return rtrimLines(result);
22+
}
23+
}
24+
25+
class PythonManipulator implements FileManipulator {
26+
removeComments(content: string): string {
27+
// First, use strip-comments to remove standard comments
28+
let result = strip(content, { language: 'python', preserveNewlines: true });
29+
30+
// Then, remove triple-quote comments
31+
result = result.replace(/'''[\s\S]*?'''/g, '');
32+
result = result.replace(/"""[\s\S]*?"""/g, '');
33+
34+
return rtrimLines(result);
35+
}
36+
}
37+
38+
class CompositeManipulator implements FileManipulator {
39+
private manipulators: FileManipulator[];
40+
41+
constructor(...manipulators: FileManipulator[]) {
42+
this.manipulators = manipulators;
43+
}
44+
45+
removeComments(content: string): string {
46+
return this.manipulators.reduce((acc, manipulator) => manipulator.removeComments(acc), content);
47+
}
48+
}
49+
50+
const manipulators: Record<string, FileManipulator> = {
51+
'.c': new StripCommentsManipulator('c'),
52+
'.cs': new StripCommentsManipulator('csharp'),
53+
'.css': new StripCommentsManipulator('css'),
54+
'.dart': new StripCommentsManipulator('c'),
55+
'.go': new StripCommentsManipulator('c'),
56+
'.html': new StripCommentsManipulator('html'),
57+
'.java': new StripCommentsManipulator('java'),
58+
'.js': new StripCommentsManipulator('javascript'),
59+
'.jsx': new StripCommentsManipulator('javascript'),
60+
'.kt': new StripCommentsManipulator('c'),
61+
'.less': new StripCommentsManipulator('less'),
62+
'.php': new StripCommentsManipulator('php'),
63+
'.rb': new StripCommentsManipulator('ruby'),
64+
'.rs': new StripCommentsManipulator('c'),
65+
'.sass': new StripCommentsManipulator('sass'),
66+
'.scss': new StripCommentsManipulator('sass'),
67+
'.sh': new StripCommentsManipulator('perl'),
68+
'.sql': new StripCommentsManipulator('sql'),
69+
'.swift': new StripCommentsManipulator('swift'),
70+
'.ts': new StripCommentsManipulator('javascript'),
71+
'.tsx': new StripCommentsManipulator('javascript'),
72+
'.xml': new StripCommentsManipulator('xml'),
73+
'.yaml': new StripCommentsManipulator('perl'),
74+
'.yml': new StripCommentsManipulator('perl'),
75+
76+
'.py': new PythonManipulator(),
77+
78+
'.vue': new CompositeManipulator(
79+
new StripCommentsManipulator('html'),
80+
new StripCommentsManipulator('css'),
81+
new StripCommentsManipulator('javascript'),
82+
),
83+
'.svelte': new CompositeManipulator(
84+
new StripCommentsManipulator('html'),
85+
new StripCommentsManipulator('css'),
86+
new StripCommentsManipulator('javascript'),
87+
),
88+
};
89+
90+
export function getFileManipulator(filePath: string): FileManipulator | null {
91+
const ext = path.extname(filePath);
92+
return manipulators[ext] || null;
93+
}

src/utils/gitignoreUtils.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import * as fs from 'fs/promises';
22
import path from 'path';
33
import ignore from 'ignore';
4+
import { logger } from './logger.js';
45

56
export async function getGitignorePatterns(rootDir: string, fsModule = fs): Promise<string[]> {
67
const gitignorePath = path.join(rootDir, '.gitignore');
78
try {
89
const gitignoreContent = await fsModule.readFile(gitignorePath, 'utf-8');
910
return parseGitignoreContent(gitignoreContent);
1011
} catch (error) {
11-
console.warn('No .gitignore file found or unable to read it.');
12+
logger.warn('No .gitignore file found or unable to read it.');
1213
return [];
1314
}
1415
}

0 commit comments

Comments
 (0)