From 4906b2f712a73771abf344e25bd5b46e06c10081 Mon Sep 17 00:00:00 2001 From: jpotw Date: Fri, 7 Mar 2025 15:22:25 +0900 Subject: [PATCH] feat: made --recurse-submodules optional --- src/gitingest/cli.py | 11 ++++-- src/gitingest/cloning.py | 7 +++- src/gitingest/query_parsing.py | 6 ++++ src/gitingest/repository_ingest.py | 8 +++++ src/server/query_processor.py | 5 +++ src/server/routers/dynamic.py | 4 +++ src/server/routers/index.py | 4 +++ .../templates/components/git_form.jinja | 35 +++++++++++++++++++ tests/test_flow_integration.py | 6 ++++ 9 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/gitingest/cli.py b/src/gitingest/cli.py index 73b49b67..89af9101 100644 --- a/src/gitingest/cli.py +++ b/src/gitingest/cli.py @@ -18,6 +18,7 @@ @click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude") @click.option("--include-pattern", "-i", multiple=True, help="Patterns to include") @click.option("--branch", "-b", default=None, help="Branch to clone and ingest") +@click.option("--include-submodules", is_flag=True, help="Include git submodules in the analysis") def main( source: str, output: Optional[str], @@ -25,6 +26,7 @@ def main( exclude_pattern: Tuple[str, ...], include_pattern: Tuple[str, ...], branch: Optional[str], + include_submodules: bool, ): """ Main entry point for the CLI. This function is called when the CLI is run as a script. @@ -46,9 +48,11 @@ def main( A tuple of patterns to include during the analysis. Only files matching these patterns will be processed. branch : str, optional The branch to clone (optional). + include_submodules : bool + Whether to include git submodules in the analysis. """ # Main entry point for the CLI. This function is called when the CLI is run as a script. - asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern, branch)) + asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern, branch, include_submodules)) async def _async_main( @@ -58,6 +62,7 @@ async def _async_main( exclude_pattern: Tuple[str, ...], include_pattern: Tuple[str, ...], branch: Optional[str], + include_submodules: bool, ) -> None: """ Analyze a directory or repository and create a text dump of its contents. @@ -80,6 +85,8 @@ async def _async_main( A tuple of patterns to include during the analysis. Only files matching these patterns will be processed. branch : str, optional The branch to clone (optional). + include_submodules : bool + Whether to include git submodules in the analysis. Raises ------ @@ -93,7 +100,7 @@ async def _async_main( if not output: output = OUTPUT_FILE_NAME - summary, _, _ = await ingest_async(source, max_size, include_patterns, exclude_patterns, branch, output=output) + summary, _, _ = await ingest_async(source, max_size, include_patterns, exclude_patterns, branch, include_submodules, output=output) click.echo(f"Analysis complete! Output written to: {output}") click.echo("\nSummary:") diff --git a/src/gitingest/cloning.py b/src/gitingest/cloning.py index ffd933c1..49ceda08 100644 --- a/src/gitingest/cloning.py +++ b/src/gitingest/cloning.py @@ -29,6 +29,8 @@ class CloneConfig: The specific commit hash to check out after cloning (default is None). branch : str, optional The branch to clone (default is None). + include_submodules : bool + The flag whether to include submodules when cloning (default is False). subpath : str The subpath to clone from the repository (default is "/"). """ @@ -37,6 +39,7 @@ class CloneConfig: local_path: str commit: Optional[str] = None branch: Optional[str] = None + include_submodules: bool = False subpath: str = "/" blob: bool = False @@ -81,7 +84,9 @@ async def clone_repo(config: CloneConfig) -> None: raise ValueError("Repository not found, make sure it is public") clone_cmd = ["git", "clone", "--single-branch"] - # TODO re-enable --recurse-submodules + + if config.include_submodules: + clone_cmd.append("--recurse-submodules") if partial_clone: clone_cmd += ["--filter=blob:none", "--sparse"] diff --git a/src/gitingest/query_parsing.py b/src/gitingest/query_parsing.py index e2b0e0cf..4bc2f0d2 100644 --- a/src/gitingest/query_parsing.py +++ b/src/gitingest/query_parsing.py @@ -43,6 +43,7 @@ class ParsedQuery: # pylint: disable=too-many-instance-attributes ignore_patterns: Optional[Set[str]] = None include_patterns: Optional[Set[str]] = None pattern_type: Optional[str] = None + include_submodules: bool = False def extact_clone_config(self) -> CloneConfig: """ @@ -68,6 +69,7 @@ def extact_clone_config(self) -> CloneConfig: branch=self.branch, subpath=self.subpath, blob=self.type == "blob", + include_submodules=self.include_submodules, ) @@ -77,6 +79,7 @@ async def parse_query( from_web: bool, include_patterns: Optional[Union[str, Set[str]]] = None, ignore_patterns: Optional[Union[str, Set[str]]] = None, + include_submodules: bool = False, ) -> ParsedQuery: """ Parse the input source (URL or path) to extract relevant details for the query. @@ -97,6 +100,8 @@ async def parse_query( Patterns to include, by default None. Can be a set of strings or a single string. ignore_patterns : Union[str, Set[str]], optional Patterns to ignore, by default None. Can be a set of strings or a single string. + include_submodules : bool + The flag whether to include git submodules in the analysis. Defaults to False. Returns ------- @@ -139,6 +144,7 @@ async def parse_query( max_file_size=max_file_size, ignore_patterns=ignore_patterns_set, include_patterns=parsed_include, + include_submodules=include_submodules, ) diff --git a/src/gitingest/repository_ingest.py b/src/gitingest/repository_ingest.py index f30d6001..6fd12e7c 100644 --- a/src/gitingest/repository_ingest.py +++ b/src/gitingest/repository_ingest.py @@ -17,6 +17,7 @@ async def ingest_async( include_patterns: Optional[Union[str, Set[str]]] = None, exclude_patterns: Optional[Union[str, Set[str]]] = None, branch: Optional[str] = None, + include_submodules: bool = False, output: Optional[str] = None, ) -> Tuple[str, str, str]: """ @@ -39,6 +40,8 @@ async def ingest_async( Pattern or set of patterns specifying which files to exclude. If `None`, no files are excluded. branch : str, optional The branch to clone and ingest. If `None`, the default branch is used. + include_submodules : bool + The flag whether to include git submodules in the analysis. Defaults to False. output : str, optional File path where the summary and content should be written. If `None`, the results are not written to a file. @@ -64,6 +67,7 @@ async def ingest_async( from_web=False, include_patterns=include_patterns, ignore_patterns=exclude_patterns, + include_submodules=include_submodules, ) if parsed_query.url: @@ -102,6 +106,7 @@ def ingest( include_patterns: Optional[Union[str, Set[str]]] = None, exclude_patterns: Optional[Union[str, Set[str]]] = None, branch: Optional[str] = None, + include_submodules: bool = False, output: Optional[str] = None, ) -> Tuple[str, str, str]: """ @@ -124,6 +129,8 @@ def ingest( Pattern or set of patterns specifying which files to exclude. If `None`, no files are excluded. branch : str, optional The branch to clone and ingest. If `None`, the default branch is used. + include_submodules : bool + The flag whether to include git submodules in the analysis. Defaults to False. output : str, optional File path where the summary and content should be written. If `None`, the results are not written to a file. @@ -146,6 +153,7 @@ def ingest( include_patterns=include_patterns, exclude_patterns=exclude_patterns, branch=branch, + include_submodules=include_submodules, output=output, ) ) diff --git a/src/server/query_processor.py b/src/server/query_processor.py index f6cdcea2..b32a0653 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -19,6 +19,7 @@ async def process_query( pattern_type: str = "exclude", pattern: str = "", is_index: bool = False, + include_submodules: bool = False, ) -> _TemplateResponse: """ Process a query by parsing input, cloning a repository, and generating a summary. @@ -40,6 +41,8 @@ async def process_query( Pattern to include or exclude in the query, depending on the pattern type. is_index : bool Flag indicating whether the request is for the index page (default is False). + include_submodules : bool + Flag indicating whether to include submodules in the query (default is False). Returns ------- @@ -71,6 +74,7 @@ async def process_query( "default_file_size": slider_position, "pattern_type": pattern_type, "pattern": pattern, + "include_submodules": include_submodules, } try: @@ -80,6 +84,7 @@ async def process_query( from_web=True, include_patterns=include_patterns, ignore_patterns=exclude_patterns, + include_submodules=include_submodules, ) if not parsed_query.url: raise ValueError("The 'url' parameter is required.") diff --git a/src/server/routers/dynamic.py b/src/server/routers/dynamic.py index 74febf8d..db89d0ea 100644 --- a/src/server/routers/dynamic.py +++ b/src/server/routers/dynamic.py @@ -50,6 +50,7 @@ async def process_catch_all( max_file_size: int = Form(...), pattern_type: str = Form(...), pattern: str = Form(...), + include_submodules: bool = Form(...), ) -> HTMLResponse: """ Process the form submission with user input for query parameters. @@ -69,6 +70,8 @@ async def process_catch_all( The type of pattern used for the query, specified by the user. pattern : str The pattern string used in the query, specified by the user. + include_submodules : bool + The flag indicating whether to include submodules in the query, specified by the user. Returns ------- @@ -83,4 +86,5 @@ async def process_catch_all( pattern_type, pattern, is_index=False, + include_submodules=include_submodules, ) diff --git a/src/server/routers/index.py b/src/server/routers/index.py index 5b08a244..e352ad48 100644 --- a/src/server/routers/index.py +++ b/src/server/routers/index.py @@ -47,6 +47,7 @@ async def index_post( max_file_size: int = Form(...), pattern_type: str = Form(...), pattern: str = Form(...), + include_submodules: bool = Form(...), ) -> HTMLResponse: """ Process the form submission with user input for query parameters. @@ -67,6 +68,8 @@ async def index_post( The type of pattern used for the query, specified by the user. pattern : str The pattern string used in the query, specified by the user. + include_submodules : bool + The flag indicating whether to include submodules in the query, specified by the user. Returns ------- @@ -81,4 +84,5 @@ async def index_post( pattern_type, pattern, is_index=True, + include_submodules=include_submodules, ) diff --git a/src/server/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja index 764fff70..f93e3155 100644 --- a/src/server/templates/components/git_form.jinja +++ b/src/server/templates/components/git_form.jinja @@ -17,6 +17,15 @@ element.classList.toggle('hover:text-gray-500'); }); } + function updateSubmodulesValue(checkbox) { + const hiddenInput = document.querySelector('input[name="include_submodules"]'); + hiddenInput.value = checkbox.checked.toString(); + } + document.addEventListener('DOMContentLoaded', () => { + const checkbox = document.getElementById("include_submodules"); + const hiddenInput = document.querySelector('input[name="include_submodules"]'); + checkbox.checked = hiddenInput.value === "true"; + });
@@ -45,6 +54,7 @@
+
@@ -95,6 +105,31 @@ required value="{{ default_file_size }}" class="w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000] "> +
+
+ +
+
+
+ +
+
+ + +
{% if show_examples %} diff --git a/tests/test_flow_integration.py b/tests/test_flow_integration.py index 99ea35af..dc78649d 100644 --- a/tests/test_flow_integration.py +++ b/tests/test_flow_integration.py @@ -66,6 +66,7 @@ async def test_remote_repository_analysis(request): "max_file_size": "243", "pattern_type": "exclude", "pattern": "", + "include_submodules": "false", } response = client.post("/", data=form_data) @@ -82,6 +83,7 @@ async def test_invalid_repository_url(request): "max_file_size": "243", "pattern_type": "exclude", "pattern": "", + "include_submodules": "false", } response = client.post("/", data=form_data) @@ -98,6 +100,7 @@ async def test_large_repository(request): "max_file_size": "243", "pattern_type": "exclude", "pattern": "", + "include_submodules": "false", } response = client.post("/", data=form_data) @@ -116,6 +119,7 @@ def make_request(): "max_file_size": "243", "pattern_type": "exclude", "pattern": "", + "include_submodules": "false", } response = client.post("/", data=form_data) assert response.status_code == 200, f"Request failed: {response.text}" @@ -136,6 +140,7 @@ async def test_large_file_handling(request): "max_file_size": "1", "pattern_type": "exclude", "pattern": "", + "include_submodules": "false", } response = client.post("/", data=form_data) @@ -152,6 +157,7 @@ async def test_repository_with_patterns(request): "max_file_size": "243", "pattern_type": "include", "pattern": "*.md", + "include_submodules": "false", } response = client.post("/", data=form_data)