Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,12 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.vscode/settings.json
history.txt
.DS_Store

# ignore default output directory
# Project specific
history.txt
cleanup.py
Caddyfile

# ignore default output directory
tmp/*
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,59 @@ Create a pull request or open an Issue about anything you'd like to see in gitin
- Repository cloning is limited to public GitHub repositories only
- Too big repos will probably timeout (if longer than 20 secondes to clone)


## Command Line Global Installation

```bash
pip install -e .
```

## Command Line Usage

The `gitingest` command line tool allows you to analyze any directory and create a text dump of its contents.

### Basic Usage

```bash
gitingest /path/to/directory
```

This will create a text file named after your directory (e.g., `directory.txt`) in your current working directory.

### Specifying Output Location

```bash
gitingest /path/to/directory -o /path/to/output.txt
```

### Options

- `-o, --output`: Specify the output file path (default: `<directory_name>.txt` in current directory)
- `-s, --max-size`: Maximum file size to process in bytes (default: 10MB)
- `-i, --ignore-pattern`: Additional patterns to ignore (can be used multiple times)

### Examples

```bash
# Basic usage
gitingest ~/projects/myproject

# Custom output location
gitingest ~/projects/myproject -o ~/Desktop/analysis.txt

# Ignore specific file patterns
gitingest ~/projects/myproject -i "*.csv" -i "*.json"

# Set maximum file size (e.g., 5MB)
gitingest ~/projects/myproject -s 5000000
```

### Output Format

The generated text file contains:
1. Summary statistics (file count, directory count, content size)
2. Directory tree structure
3. Contents of all text files

Files and directories that are commonly ignored (like `.git`, `node_modules`, cache directories, etc.) are automatically excluded. You can add additional patterns to ignore using the `-i` flag.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ slowapi
tokencost
pytest
pytest-asyncio
pytest-cov
pytest-cov
click>=8.0.0
21 changes: 21 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from setuptools import setup, find_packages

setup(
name="gitingest",
version="0.1.0",
packages=find_packages(),
include_package_data=True,
install_requires=[
"click",
],
entry_points={
"console_scripts": [
"gitingest=src.cli:main",
],
},
python_requires=">=3.6",
author="Your Name",
description="A tool to analyze and create text dumps of git repositories",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
)
60 changes: 60 additions & 0 deletions src/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import click
import os
from .ingest import analyze_codebase, DEFAULT_IGNORE_PATTERNS, MAX_FILE_SIZE

@click.command()
@click.argument('path', type=click.Path(exists=True))
@click.option('--output', '-o', default=None, help='Output file path (default: <repo_name>.txt in current directory)')
@click.option('--max-size', '-s', default=MAX_FILE_SIZE, help='Maximum file size to process in bytes')
@click.option('--ignore-pattern', '-i', multiple=True, help='Additional patterns to ignore')
def main(path, output, max_size, ignore_pattern):
"""Analyze a directory and create a text dump of its contents."""
try:
# Convert path to absolute path
abs_path = os.path.abspath(path)
repo_name = os.path.basename(abs_path)

# Combine default and custom ignore patterns
ignore_patterns = list(DEFAULT_IGNORE_PATTERNS)
if ignore_pattern:
ignore_patterns.extend(ignore_pattern)

# If no output file specified, use repo name in current directory
if output is None:
output = f"{repo_name}.txt"

# Create a query dict to match the expected format
query = {
'local_path': abs_path,
'subpath': '/',
'user_name': os.path.basename(os.path.dirname(abs_path)),
'repo_name': repo_name,
'ignore_patterns': ignore_patterns,
'include_patterns': [],
'pattern_type': 'exclude',
'max_file_size': max_size,
'branch': None,
'commit': None,
'type': 'tree',
'slug': repo_name
}

# Run analysis
summary, tree, content = analyze_codebase(query)

# Write to file
with open(output, 'w') as f:
f.write(f"Summary:\n{summary}\n\n")
f.write(f"{tree}\n")
f.write(content)

click.echo(f"Analysis complete! Output written to: {output}")
click.echo("\nSummary:")
click.echo(summary)

except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()

if __name__ == '__main__':
main()
Loading