diff --git a/README.md b/README.md index 733e6a38..9d7e425b 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ https://github.com/eli64s/readme-ai/assets/43382407/e8260e78-b684-4e72-941c-b304
- ‣ Your project's directory structure is visualized using the tree command. + ‣ Your project's directory structure is visualized using a custom tree function.
‣ Each file in the codebase is summarized by OpenAI's GPT model. diff --git a/pyproject.toml b/pyproject.toml index 83f4e3c2..727af729 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "readmeai" -version = "0.3.077" +version = "0.3.079" description = "🚀 Generate beautiful README.md files from the terminal. Powered by OpenAI's GPT LLMs 💫" authors = ["Eli <0x.eli.64s@gmail.com>"] license = "MIT" diff --git a/readmeai/builder.py b/readmeai/builder.py index 9702606b..f449c99d 100644 --- a/readmeai/builder.py +++ b/readmeai/builder.py @@ -1,7 +1,6 @@ """Builds the README Markdown file for your codebase.""" import os -import subprocess import urllib.parse from pathlib import Path from typing import List, Tuple @@ -229,23 +228,50 @@ def generate_code_summary_table(base_url: str, directory: Path, level=0) -> str: return markdown -def create_directory_tree(repo_path: Path) -> str: - """Creates a directory tree for the project.""" - try: - tree_str = run_tree_command(repo_path) - return f"```bash\n.\n{tree_str}```" - except Exception as excinfo: - logger.warning(f"Error running tree command: {excinfo}") - return "```bash\n # Error generating directory tree.\n```" +def generate_tree( + directory: Path, + repo_url: str, + prefix: str = "", + is_last: bool = True, + parent_prefix: str = "", +) -> str: + """Recursively generates a tree structure for a given directory.""" + if directory.name == directory: + return "" + if directory == repo_url: + display_name = "." + else: + display_name = directory.name -def run_tree_command(repo_path: Path) -> str: - """Executes the 'tree' command to generate a directory tree.""" - try: - tree_bytes = subprocess.check_output(["tree", "-n", repo_path]) - tree_str = tree_bytes.decode("utf-8") - tree_lines = tree_str.split("\n")[1:] - tree_str = "\n".join(tree_lines) - return tree_str - except subprocess.CalledProcessError as excinfo: - raise Exception(f"Error running tree command: {excinfo}") + box_branch = "└── " if is_last else "├── " + tree_str = parent_prefix + box_branch + display_name + + if directory.is_dir(): + tree_str += "/\n" + children = sorted( + [child for child in directory.iterdir() if child.name != ".git"] + ) + for index, child in enumerate(children): + is_last_child = index == len(children) - 1 + child_prefix = " " if is_last else "│ " + tree_str += generate_tree( + child, + repo_url, + box_branch, + is_last_child, + f"{parent_prefix}{child_prefix}", + ) + else: + tree_str += "\n" + + return tree_str + + +def format_tree(name: str, tree_str: str) -> str: + """Replaces tmp directory name with project name.""" + tree_str = tree_str.split("\n", 1) + tree_str[0] = f"└── {name}/" + tree_str = "\n".join(tree_str) + tree = f"```sh\n{tree_str}```" + return tree diff --git a/readmeai/main.py b/readmeai/main.py index 431c9124..0d723620 100755 --- a/readmeai/main.py +++ b/readmeai/main.py @@ -33,11 +33,12 @@ async def generate_readme(llm: model.OpenAIHandler, offline: bool) -> None: try: temp_dir = utils.clone_repo_to_temp_dir(repository) - tree = builder.create_directory_tree(temp_dir) + tree_str = builder.generate_tree(temp_dir, repository) + tree = builder.format_tree(name, tree_str) config.md.tree = config.md.tree.format(tree) logger.info(f"Directory tree: {config.md.tree}") - scanner = preprocess.RepositoryParserWrapper(config, config_helper) + scanner = preprocess.RepositoryHandler(config, config_helper) dependencies, file_text = scanner.get_dependencies(temp_dir) logger.info(f"Dependencies: {dependencies}") logger.info(f"Total files: {len(file_text)}") diff --git a/readmeai/preprocess.py b/readmeai/preprocess.py index 3a1c2aea..64470d0e 100644 --- a/readmeai/preprocess.py +++ b/readmeai/preprocess.py @@ -8,46 +8,17 @@ logger = logger.Logger(__name__) -class RepositoryParserWrapper: - """Wrapper class for the RepositoryParser.""" - - def __init__(self, config: conf.AppConfig, conf_helper: conf.ConfigHelper): - self.parser = RepositoryParser( - config, conf_helper.language_names, conf_helper.language_setup - ) - - def get_unique_contents(self, contents: Dict, keys: List[str]) -> List[str]: - """Extracts the unique contents from the list of dicts.""" - unique_contents = {data[key] for key in keys for data in contents} - return list(unique_contents) - - def get_file_contents(self, contents: Dict) -> Dict[str, str]: - """Extracts the file contents from the list of dicts.""" - return {content["path"]: content["content"] for content in contents} - - def get_dependencies( - self, temp_dir: str = None - ) -> Tuple[List[str], Dict[str, str]]: - """Extracts the dependencies of the user's repository.""" - contents = self.parser.analyze(temp_dir) - dependencies = self.parser.get_dependency_file_contents(contents) - attributes = ["extension", "language", "name"] - dependencies.extend(self.get_unique_contents(contents, attributes)) - return list(set(dependencies)), self.get_file_contents(contents) - - -class RepositoryParser: +class RepositoryHandler: """Analyzes a local or remote git repository.""" def __init__( self, config: conf.AppConfig, - language_names: Dict[str, str], - language_setup: Dict[str, str], + conf_helper: conf.ConfigHelper, ): self.config = config - self.language_names = language_names - self.language_setup = language_setup + self.language_names = conf_helper.language_names + self.language_setup = conf_helper.language_setup self.encoding_name = config.api.encoding def analyze(self, repo_path: str) -> List[Dict]: @@ -82,7 +53,6 @@ def get_dependency_file_contents(self, contents: List[Dict]) -> List[str]: parsed_contents = [] for content in dependency_files: logger.info(f"Dependency file found: {content['name']}") - # logger.info(f"Dependency file content: {content['content']}") parser = file_parsers[content["name"]] parsed_content = parser(content=content["content"]) parsed_contents.append(parsed_content) @@ -107,13 +77,24 @@ def generate_file_info( except UnicodeDecodeError: continue - def tokenize_content(self, contents: List[Dict]) -> List[Dict]: - """Tokenizes the content of each file.""" - for content in contents: - content["tokens"] = utils.get_token_count( - content["content"], self.encoding_name - ) - return contents + def get_file_contents(self, contents: Dict) -> Dict[str, str]: + """Extracts the file contents from the list of dicts.""" + return {content["path"]: content["content"] for content in contents} + + def get_unique_contents(self, contents: Dict, keys: List[str]) -> List[str]: + """Extracts the unique contents from the list of dicts.""" + unique_contents = {data[key] for key in keys for data in contents} + return list(unique_contents) + + def get_dependencies( + self, temp_dir: str = None + ) -> Tuple[List[str], Dict[str, str]]: + """Extracts the dependencies of the user's repository.""" + contents = self.analyze(temp_dir) + dependencies = self.get_dependency_file_contents(contents) + attributes = ["extension", "language", "name"] + dependencies.extend(self.get_unique_contents(contents, attributes)) + return list(set(dependencies)), self.get_file_contents(contents) def process_language_mapping(self, contents: List[Dict]) -> List[Dict]: """Maps file extensions to their programming languages.""" @@ -128,6 +109,14 @@ def process_language_mapping(self, contents: List[Dict]) -> List[Dict]: content["install"], content["run"], content["test"] = setup return contents + def tokenize_content(self, contents: List[Dict]) -> List[Dict]: + """Tokenizes the content of each file.""" + for content in contents: + content["tokens"] = utils.get_token_count( + content["content"], self.encoding_name + ) + return contents + @staticmethod def _get_file_parsers() -> Dict[str, callable]: """Returns a dictionary of callable file parser methods.""" diff --git a/readmeai/utils.py b/readmeai/utils.py index d8425a85..1d578592 100644 --- a/readmeai/utils.py +++ b/readmeai/utils.py @@ -3,6 +3,7 @@ import os import platform import re +import shutil import tempfile from pathlib import Path from typing import List, Optional @@ -16,7 +17,7 @@ def clone_repo_to_temp_dir(repo_path: str) -> Path: - """Clone a repository to a temporary directory.""" + """Clone a repository to a temporary directory and remove the .git directory.""" git_exec_path = find_git_executable() validate_git_executable(git_exec_path) @@ -25,15 +26,20 @@ def clone_repo_to_temp_dir(repo_path: str) -> Path: temp_dir = tempfile.mkdtemp() try: - git.Repo.clone_from(repo_path, temp_dir, depth=1, env=env) - logger.info(f"Successfully cloned {repo_path} to {temp_dir}.") + git.Repo.clone_from(repo_path, temp_dir, env=env) + git_dir = Path(temp_dir) / ".git" + if git_dir.exists(): + shutil.rmtree(git_dir) + + logger.info(f"Cloned codebase {repo_path} to {temp_dir}.") + return Path(temp_dir) except git.GitCommandError as excinfo: raise ValueError(f"Git clone error: {excinfo}") from excinfo except Exception as excinfo: - raise (f"Error cloning git repository: {excinfo}") + raise ValueError(f"Error cloning git repository: {excinfo}") from excinfo def find_git_executable() -> Optional[Path]: diff --git a/scripts/clean.sh b/scripts/clean.sh index 817350fc..6614704c 100644 --- a/scripts/clean.sh +++ b/scripts/clean.sh @@ -12,7 +12,7 @@ find . -type d \( -name "__pycache__" \ -o -name ".vscode" \) -execdir rm -rf {} + # Remove build artifacts, pytest cache, and benchmarks -rm -rf build/ dist/ *.egg-info/ .pytest_cache/ .benchmarks/ +rm -rf build/ dist/ -- *.egg-info/ .pytest_cache/ .benchmarks/ # Remove specific files rm -rf docs/raw_data.csv *.log *.out *.rdb