From f340a3109c00edc84eaccd4aa5ce5f7eb40b4e3c Mon Sep 17 00:00:00 2001 From: Alberto Gaona Date: Sun, 10 Sep 2023 21:32:47 -0600 Subject: [PATCH] Add support for Go language (#42) * fix saving structure * allow processing go files * remove debugging lines * add go agreggator --- cli.py | 11 ++++- devtale/aggregators/__init__.py | 3 +- devtale/aggregators/go.py | 87 +++++++++++++++++++++++++++++++++ devtale/constants.py | 4 +- 4 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 devtale/aggregators/go.py diff --git a/cli.py b/cli.py index 8511fdc..89028c0 100644 --- a/cli.py +++ b/cli.py @@ -7,7 +7,7 @@ import click from dotenv import load_dotenv -from devtale.aggregators import PHPAggregator, PythonAggregator +from devtale.aggregators import GoAggregator, PHPAggregator, PythonAggregator from devtale.constants import ALLOWED_EXTENSIONS, LANGUAGES from devtale.utils import ( build_project_tree, @@ -59,6 +59,8 @@ def process_repository( for folder_path in folders: try: + if folder_path == root_path: + folder_path += "/" folder_tale = process_folder(folder_path, output_path, model_name, fuse) except Exception as e: folder_name = os.path.basename(folder_path) @@ -69,7 +71,10 @@ def process_repository( if folder_tale is not None: # add root folder summary information - if folder_path == root_path: + if ( + os.path.basename(folder_path) == os.path.basename(root_path) + or folder_path == "" + ): folder_tales["folders"].append( { "folder_name": os.path.basename(os.path.abspath(root_path)), @@ -247,6 +252,8 @@ def process_file( aggregator = PythonAggregator() elif file_ext == ".php": aggregator = PHPAggregator() + elif file_ext == ".go": + aggregator = GoAggregator() fused_tale = aggregator.document(code=code, documentation=tale) with open(save_path, "w") as file: diff --git a/devtale/aggregators/__init__.py b/devtale/aggregators/__init__.py index 1ba2b87..ee2a514 100644 --- a/devtale/aggregators/__init__.py +++ b/devtale/aggregators/__init__.py @@ -1,4 +1,5 @@ +from .go import GoAggregator from .php import PHPAggregator from .python import PythonAggregator -__all__ = ["PHPAggregator", "PythonAggregator"] +__all__ = ["PHPAggregator", "PythonAggregator", "GoAggregator"] diff --git a/devtale/aggregators/go.py b/devtale/aggregators/go.py new file mode 100644 index 0000000..68c5e05 --- /dev/null +++ b/devtale/aggregators/go.py @@ -0,0 +1,87 @@ +import re + + +class GoAggregator: + def __init__(self): + pass + + def document(self, documentation, code): + documented_code = code + documented_code = self._document_file(documentation, documented_code) + documented_code = self._add_docstrings( + documentation, documented_code, type="method" + ) + documented_code = self._add_docstrings( + documentation, documented_code, type="class" + ) + return documented_code + + def _add_docstrings(self, documentation, code, type="method"): + if type == "method": + pattern = r"func \([\w\s\*]+\) (\w+)[^\n]*{|\bfunc (\w+)[^\n]*{" + docstrings = { + item["method_name"]: item["method_docstring"] + for item in documentation["methods"] + } + else: + pattern = r"type\s+([A-Z][a-zA-Z0-9_]*)\s+(struct|interface)\s*\{" + docstrings = { + item["class_name"]: item["class_docstring"] + for item in documentation["classes"] + } + + updated_code_lines = [] + matches = re.finditer(pattern, code) + last_end = 0 + + for match in matches: + name = match.group(1) or match.group(2) + index = match.start() + + opening_brace_index = code.find("{", index) + + if opening_brace_index != -1: + signature = code[index : opening_brace_index + 1] + lines_before = code[:index].split("\n")[-3:] + existing_docstring = any( + line.strip().startswith("//") or "*/" in line + for line in lines_before + ) + + if name in docstrings: + docstring = docstrings[name] + if not existing_docstring: + fixed_docstring = self._break_large_strings(docstring) + signature = f"{fixed_docstring}\n{signature}" + + updated_code_lines.append(code[last_end:index]) + updated_code_lines.append(signature) + last_end = opening_brace_index + 1 + + # append any remaining code + updated_code_lines.append(code[last_end:]) + documented_code = "".join(updated_code_lines) + return documented_code + + def _break_large_strings(self, string, max_lenght=90): + words = string.split() + lines = [] + current_line = "" + for word in words: + if len(current_line) + len(word) + 1 <= max_lenght: + if current_line: + current_line += " " + current_line += word + else: + lines.append(current_line) + current_line = word + if current_line: + lines.append(current_line) + + return "\n".join(["// " + line for line in lines]) + + def _document_file(self, documentation, code): + file_description = self._break_large_strings(documentation["file_docstring"]) + code = file_description + "\n" + code + + return code diff --git a/devtale/constants.py b/devtale/constants.py index 10297b8..28eb005 100644 --- a/devtale/constants.py +++ b/devtale/constants.py @@ -1,12 +1,13 @@ from langchain.text_splitter import Language # we are only documenting the file that ends with the following extensions: -ALLOWED_EXTENSIONS = [".php", ".py", ""] +ALLOWED_EXTENSIONS = [".go", ".php", ".py", ""] # split code files according the programming language LANGUAGES = { ".php": Language.PHP, ".py": Language.PYTHON, + ".go": Language.GO, ".cpp": Language.CPP, ".java": Language.JAVA, ".js": Language.JS, @@ -16,6 +17,7 @@ IDENTIFIERS = { "php": [["class"], ["function"]], "python": [["class"], ["def"]], + "go": [["struct"], ["func"]], "cpp": [["class"], ["void", "int", "float", "double"]], "java": [["class"], ["public", "protected", "private", "static"]], "js": [["class"], ["function", "const", "let", "var"]],