Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Implement Custom Directory Tree Method #53

Merged
merged 3 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ https://github.com/eli64s/readme-ai/assets/43382407/e8260e78-b684-4e72-941c-b304
<tr>
<h3>Directory Tree and File Summaries</h3>
<p>
‣ Your project's directory structure is visualized using the <i>tree</i> command.
‣ Your project's directory structure is visualized using a custom tree function.
</p>
<p>
‣ Each file in the codebase is summarized by OpenAI's <i>GPT</i> model.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "readmeai"
version = "0.3.077"
version = "0.3.079"
description = "🚀 Generate beautiful README.md files from the terminal. Powered by OpenAI's GPT LLMs 💫"
authors = ["Eli <0x.eli.64s@gmail.com>"]
license = "MIT"
Expand Down
64 changes: 45 additions & 19 deletions readmeai/builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Builds the README Markdown file for your codebase."""

import os
import subprocess
import urllib.parse
from pathlib import Path
from typing import List, Tuple
Expand Down Expand Up @@ -229,23 +228,50 @@ def generate_code_summary_table(base_url: str, directory: Path, level=0) -> str:
return markdown


def create_directory_tree(repo_path: Path) -> str:
"""Creates a directory tree for the project."""
try:
tree_str = run_tree_command(repo_path)
return f"```bash\n.\n{tree_str}```"
except Exception as excinfo:
logger.warning(f"Error running tree command: {excinfo}")
return "```bash\n # Error generating directory tree.\n```"
def generate_tree(
directory: Path,
repo_url: str,
prefix: str = "",
is_last: bool = True,
parent_prefix: str = "",
) -> str:
"""Recursively generates a tree structure for a given directory."""
if directory.name == directory:
return ""

if directory == repo_url:
display_name = "."
else:
display_name = directory.name

def run_tree_command(repo_path: Path) -> str:
"""Executes the 'tree' command to generate a directory tree."""
try:
tree_bytes = subprocess.check_output(["tree", "-n", repo_path])
tree_str = tree_bytes.decode("utf-8")
tree_lines = tree_str.split("\n")[1:]
tree_str = "\n".join(tree_lines)
return tree_str
except subprocess.CalledProcessError as excinfo:
raise Exception(f"Error running tree command: {excinfo}")
box_branch = "└── " if is_last else "├── "
tree_str = parent_prefix + box_branch + display_name

if directory.is_dir():
tree_str += "/\n"
children = sorted(
[child for child in directory.iterdir() if child.name != ".git"]
)
for index, child in enumerate(children):
is_last_child = index == len(children) - 1
child_prefix = " " if is_last else "│ "
tree_str += generate_tree(
child,
repo_url,
box_branch,
is_last_child,
f"{parent_prefix}{child_prefix}",
)
else:
tree_str += "\n"

return tree_str


def format_tree(name: str, tree_str: str) -> str:
"""Replaces tmp directory name with project name."""
tree_str = tree_str.split("\n", 1)
tree_str[0] = f"└── {name}/"
tree_str = "\n".join(tree_str)
tree = f"```sh\n{tree_str}```"
return tree
5 changes: 3 additions & 2 deletions readmeai/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ async def generate_readme(llm: model.OpenAIHandler, offline: bool) -> None:

try:
temp_dir = utils.clone_repo_to_temp_dir(repository)
tree = builder.create_directory_tree(temp_dir)
tree_str = builder.generate_tree(temp_dir, repository)
tree = builder.format_tree(name, tree_str)
config.md.tree = config.md.tree.format(tree)
logger.info(f"Directory tree: {config.md.tree}")

scanner = preprocess.RepositoryParserWrapper(config, config_helper)
scanner = preprocess.RepositoryHandler(config, config_helper)
dependencies, file_text = scanner.get_dependencies(temp_dir)
logger.info(f"Dependencies: {dependencies}")
logger.info(f"Total files: {len(file_text)}")
Expand Down
71 changes: 30 additions & 41 deletions readmeai/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,17 @@
logger = logger.Logger(__name__)


class RepositoryParserWrapper:
"""Wrapper class for the RepositoryParser."""

def __init__(self, config: conf.AppConfig, conf_helper: conf.ConfigHelper):
self.parser = RepositoryParser(
config, conf_helper.language_names, conf_helper.language_setup
)

def get_unique_contents(self, contents: Dict, keys: List[str]) -> List[str]:
"""Extracts the unique contents from the list of dicts."""
unique_contents = {data[key] for key in keys for data in contents}
return list(unique_contents)

def get_file_contents(self, contents: Dict) -> Dict[str, str]:
"""Extracts the file contents from the list of dicts."""
return {content["path"]: content["content"] for content in contents}

def get_dependencies(
self, temp_dir: str = None
) -> Tuple[List[str], Dict[str, str]]:
"""Extracts the dependencies of the user's repository."""
contents = self.parser.analyze(temp_dir)
dependencies = self.parser.get_dependency_file_contents(contents)
attributes = ["extension", "language", "name"]
dependencies.extend(self.get_unique_contents(contents, attributes))
return list(set(dependencies)), self.get_file_contents(contents)


class RepositoryParser:
class RepositoryHandler:
"""Analyzes a local or remote git repository."""

def __init__(
self,
config: conf.AppConfig,
language_names: Dict[str, str],
language_setup: Dict[str, str],
conf_helper: conf.ConfigHelper,
):
self.config = config
self.language_names = language_names
self.language_setup = language_setup
self.language_names = conf_helper.language_names
self.language_setup = conf_helper.language_setup
self.encoding_name = config.api.encoding

def analyze(self, repo_path: str) -> List[Dict]:
Expand Down Expand Up @@ -82,7 +53,6 @@ def get_dependency_file_contents(self, contents: List[Dict]) -> List[str]:
parsed_contents = []
for content in dependency_files:
logger.info(f"Dependency file found: {content['name']}")
# logger.info(f"Dependency file content: {content['content']}")
parser = file_parsers[content["name"]]
parsed_content = parser(content=content["content"])
parsed_contents.append(parsed_content)
Expand All @@ -107,13 +77,24 @@ def generate_file_info(
except UnicodeDecodeError:
continue

def tokenize_content(self, contents: List[Dict]) -> List[Dict]:
"""Tokenizes the content of each file."""
for content in contents:
content["tokens"] = utils.get_token_count(
content["content"], self.encoding_name
)
return contents
def get_file_contents(self, contents: Dict) -> Dict[str, str]:
"""Extracts the file contents from the list of dicts."""
return {content["path"]: content["content"] for content in contents}

def get_unique_contents(self, contents: Dict, keys: List[str]) -> List[str]:
"""Extracts the unique contents from the list of dicts."""
unique_contents = {data[key] for key in keys for data in contents}
return list(unique_contents)

def get_dependencies(
self, temp_dir: str = None
) -> Tuple[List[str], Dict[str, str]]:
"""Extracts the dependencies of the user's repository."""
contents = self.analyze(temp_dir)
dependencies = self.get_dependency_file_contents(contents)
attributes = ["extension", "language", "name"]
dependencies.extend(self.get_unique_contents(contents, attributes))
return list(set(dependencies)), self.get_file_contents(contents)

def process_language_mapping(self, contents: List[Dict]) -> List[Dict]:
"""Maps file extensions to their programming languages."""
Expand All @@ -128,6 +109,14 @@ def process_language_mapping(self, contents: List[Dict]) -> List[Dict]:
content["install"], content["run"], content["test"] = setup
return contents

def tokenize_content(self, contents: List[Dict]) -> List[Dict]:
"""Tokenizes the content of each file."""
for content in contents:
content["tokens"] = utils.get_token_count(
content["content"], self.encoding_name
)
return contents

@staticmethod
def _get_file_parsers() -> Dict[str, callable]:
"""Returns a dictionary of callable file parser methods."""
Expand Down
14 changes: 10 additions & 4 deletions readmeai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import platform
import re
import shutil
import tempfile
from pathlib import Path
from typing import List, Optional
Expand All @@ -16,7 +17,7 @@


def clone_repo_to_temp_dir(repo_path: str) -> Path:
"""Clone a repository to a temporary directory."""
"""Clone a repository to a temporary directory and remove the .git directory."""
git_exec_path = find_git_executable()
validate_git_executable(git_exec_path)

Expand All @@ -25,15 +26,20 @@ def clone_repo_to_temp_dir(repo_path: str) -> Path:

temp_dir = tempfile.mkdtemp()
try:
git.Repo.clone_from(repo_path, temp_dir, depth=1, env=env)
logger.info(f"Successfully cloned {repo_path} to {temp_dir}.")
git.Repo.clone_from(repo_path, temp_dir, env=env)
git_dir = Path(temp_dir) / ".git"
if git_dir.exists():
shutil.rmtree(git_dir)

logger.info(f"Cloned codebase {repo_path} to {temp_dir}.")

return Path(temp_dir)

except git.GitCommandError as excinfo:
raise ValueError(f"Git clone error: {excinfo}") from excinfo

except Exception as excinfo:
raise (f"Error cloning git repository: {excinfo}")
raise ValueError(f"Error cloning git repository: {excinfo}") from excinfo


def find_git_executable() -> Optional[Path]:
Expand Down
2 changes: 1 addition & 1 deletion scripts/clean.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ find . -type d \( -name "__pycache__" \
-o -name ".vscode" \) -execdir rm -rf {} +

# Remove build artifacts, pytest cache, and benchmarks
rm -rf build/ dist/ *.egg-info/ .pytest_cache/ .benchmarks/
rm -rf build/ dist/ -- *.egg-info/ .pytest_cache/ .benchmarks/

# Remove specific files
rm -rf docs/raw_data.csv *.log *.out *.rdb