Skip to content

Commit deeaf5d

Browse files
cyclotrucfilipchristiansen
authored andcommitted
Refactor/pydantic(#226)
Signed-off-by: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com>
1 parent c62c211 commit deeaf5d

15 files changed

+281
-268
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,6 @@ pythonpath = ["src"]
7676
testpaths = ["tests/"]
7777
python_files = "test_*.py"
7878
asyncio_mode = "auto"
79+
asyncio_default_fixture_loop_scope = "function"
7980
python_classes = "Test*"
8081
python_functions = "test_*"

src/gitingest/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
""" Gitingest: A package for ingesting data from Git repositories. """
22

3-
from gitingest.cloning import clone_repo
3+
from gitingest.cloning import clone
4+
from gitingest.entrypoint import ingest, ingest_async
45
from gitingest.ingestion import ingest_query
56
from gitingest.query_parsing import parse_query
6-
from gitingest.repository_ingest import ingest, ingest_async
77

8-
__all__ = ["ingest_query", "clone_repo", "parse_query", "ingest", "ingest_async"]
8+
__all__ = ["ingest_query", "clone", "parse_query", "ingest", "ingest_async"]

src/gitingest/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import click
99

1010
from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
11-
from gitingest.repository_ingest import ingest_async
11+
from gitingest.entrypoint import ingest_async
1212

1313

1414
@click.command()

src/gitingest/cloning.py

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,17 @@
22

33
import asyncio
44
import os
5-
from dataclasses import dataclass
65
from pathlib import Path
76
from typing import List, Optional, Tuple
87

8+
from gitingest.ingestion_schema import CloneConfig
99
from gitingest.utils.timeout_wrapper import async_timeout
1010

1111
TIMEOUT: int = 60
1212

1313

14-
@dataclass
15-
class CloneConfig:
16-
"""
17-
Configuration for cloning a Git repository.
18-
19-
This class holds the necessary parameters for cloning a repository to a local path, including
20-
the repository's URL, the target local path, and optional parameters for a specific commit or branch.
21-
22-
Attributes
23-
----------
24-
url : str
25-
The URL of the Git repository to clone.
26-
local_path : str
27-
The local directory where the repository will be cloned.
28-
commit : str, optional
29-
The specific commit hash to check out after cloning (default is None).
30-
branch : str, optional
31-
The branch to clone (default is None).
32-
subpath : str
33-
The subpath to clone from the repository (default is "/").
34-
"""
35-
36-
url: str
37-
local_path: str
38-
commit: Optional[str] = None
39-
branch: Optional[str] = None
40-
subpath: str = "/"
41-
blob: bool = False
42-
43-
4414
@async_timeout(TIMEOUT)
45-
async def clone_repo(config: CloneConfig) -> None:
15+
async def clone(config: CloneConfig) -> None:
4616
"""
4717
Clone a repository to a local path based on the provided configuration.
4818

src/gitingest/repository_ingest.py renamed to src/gitingest/entrypoint.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
import shutil
66
from typing import Optional, Set, Tuple, Union
77

8-
from gitingest.cloning import clone_repo
8+
from gitingest.cloning import clone
99
from gitingest.config import TMP_BASE_PATH
1010
from gitingest.ingestion import ingest_query
11-
from gitingest.query_parsing import ParsedQuery, parse_query
11+
from gitingest.query_parsing import IngestionQuery, parse_query
1212

1313

1414
async def ingest_async(
@@ -53,37 +53,37 @@ async def ingest_async(
5353
Raises
5454
------
5555
TypeError
56-
If `clone_repo` does not return a coroutine, or if the `source` is of an unsupported type.
56+
If `clone` does not return a coroutine, or if the `source` is of an unsupported type.
5757
"""
5858
repo_cloned = False
5959

6060
try:
61-
parsed_query: ParsedQuery = await parse_query(
61+
query: IngestionQuery = await parse_query(
6262
source=source,
6363
max_file_size=max_file_size,
6464
from_web=False,
6565
include_patterns=include_patterns,
6666
ignore_patterns=exclude_patterns,
6767
)
6868

69-
if parsed_query.url:
70-
selected_branch = branch if branch else parsed_query.branch # prioritize branch argument
71-
parsed_query.branch = selected_branch
69+
if query.url:
70+
selected_branch = branch if branch else query.branch # prioritize branch argument
71+
query.branch = selected_branch
7272

73-
clone_config = parsed_query.extact_clone_config()
74-
clone_coroutine = clone_repo(clone_config)
73+
clone_config = query.extract_clone_config()
74+
clone_coroutine = clone(clone_config)
7575

7676
if inspect.iscoroutine(clone_coroutine):
7777
if asyncio.get_event_loop().is_running():
7878
await clone_coroutine
7979
else:
8080
asyncio.run(clone_coroutine)
8181
else:
82-
raise TypeError("clone_repo did not return a coroutine as expected.")
82+
raise TypeError("clone did not return a coroutine as expected.")
8383

8484
repo_cloned = True
8585

86-
summary, tree, content = ingest_query(parsed_query)
86+
summary, tree, content = ingest_query(query)
8787

8888
if output is not None:
8989
with open(output, "w", encoding="utf-8") as f:

src/gitingest/ingestion.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
88
from gitingest.filesystem_schema import FileSystemNode, FileSystemNodeType, FileSystemStats
99
from gitingest.output_formatters import format_node
10-
from gitingest.query_parsing import ParsedQuery
10+
from gitingest.query_parsing import IngestionQuery
1111
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1212
from gitingest.utils.path_utils import _is_safe_symlink
1313

@@ -17,7 +17,7 @@
1717
import tomli as tomllib
1818

1919

20-
def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
20+
def ingest_query(query: IngestionQuery) -> Tuple[str, str, str]:
2121
"""
2222
Run the ingestion process for a parsed query.
2323
@@ -27,7 +27,7 @@ def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
2727
2828
Parameters
2929
----------
30-
query : ParsedQuery
30+
query : IngestionQuery
3131
The parsed query object containing information about the repository and query parameters.
3232
3333
Returns
@@ -87,7 +87,7 @@ def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
8787
return format_node(root_node, query)
8888

8989

90-
def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
90+
def apply_gitingest_file(path: Path, query: IngestionQuery) -> None:
9191
"""
9292
Apply the .gitingest file to the query object.
9393
@@ -98,7 +98,7 @@ def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
9898
----------
9999
path : Path
100100
The path of the directory to ingest.
101-
query : ParsedQuery
101+
query : IngestionQuery
102102
The parsed query object containing information about the repository and query parameters.
103103
It should have an attribute `ignore_patterns` which is either None or a set of strings.
104104
"""
@@ -154,7 +154,7 @@ def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
154154

155155
def _process_node(
156156
node: FileSystemNode,
157-
query: ParsedQuery,
157+
query: IngestionQuery,
158158
stats: FileSystemStats,
159159
) -> None:
160160
"""
@@ -167,7 +167,7 @@ def _process_node(
167167
----------
168168
node : FileSystemNode
169169
The current directory or file node being processed.
170-
query : ParsedQuery
170+
query : IngestionQuery
171171
The parsed query object containing information about the repository and query parameters.
172172
stats : FileSystemStats
173173
Statistics tracking object for the total file count and size.

src/gitingest/ingestion_schema.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
""" This module contains the dataclasses for the ingestion process. """
2+
3+
from dataclasses import dataclass
4+
from pathlib import Path
5+
from typing import Optional, Set
6+
7+
from pydantic import BaseModel, Field
8+
9+
from gitingest.config import MAX_FILE_SIZE
10+
11+
12+
@dataclass
13+
class CloneConfig:
14+
"""
15+
Configuration for cloning a Git repository.
16+
17+
This class holds the necessary parameters for cloning a repository to a local path, including
18+
the repository's URL, the target local path, and optional parameters for a specific commit or branch.
19+
20+
Attributes
21+
----------
22+
url : str
23+
The URL of the Git repository to clone.
24+
local_path : str
25+
The local directory where the repository will be cloned.
26+
commit : str, optional
27+
The specific commit hash to check out after cloning (default is None).
28+
branch : str, optional
29+
The branch to clone (default is None).
30+
subpath : str
31+
The subpath to clone from the repository (default is "/").
32+
"""
33+
34+
url: str
35+
local_path: str
36+
commit: Optional[str] = None
37+
branch: Optional[str] = None
38+
subpath: str = "/"
39+
blob: bool = False
40+
41+
42+
class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
43+
"""
44+
Pydantic model to store the parsed details of the repository or file path.
45+
"""
46+
47+
user_name: Optional[str] = None
48+
repo_name: Optional[str] = None
49+
local_path: Path
50+
url: Optional[str] = None
51+
slug: str
52+
id: str
53+
subpath: str = "/"
54+
type: Optional[str] = None
55+
branch: Optional[str] = None
56+
commit: Optional[str] = None
57+
max_file_size: int = Field(default=MAX_FILE_SIZE)
58+
ignore_patterns: Optional[Set[str]] = None
59+
include_patterns: Optional[Set[str]] = None
60+
61+
class Config:
62+
"""Pydantic model configuration."""
63+
64+
arbitrary_types_allowed = True
65+
66+
def extract_clone_config(self) -> CloneConfig:
67+
"""
68+
Extract the relevant fields for the CloneConfig object.
69+
70+
Returns
71+
-------
72+
CloneConfig
73+
A CloneConfig object containing the relevant fields.
74+
75+
Raises
76+
------
77+
ValueError
78+
If the 'url' parameter is not provided.
79+
"""
80+
if not self.url:
81+
raise ValueError("The 'url' parameter is required.")
82+
83+
return CloneConfig(
84+
url=self.url,
85+
local_path=str(self.local_path),
86+
commit=self.commit,
87+
branch=self.branch,
88+
subpath=self.subpath,
89+
blob=self.type == "blob",
90+
)

src/gitingest/output_formatters.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
import tiktoken
66

77
from gitingest.filesystem_schema import FileSystemNode, FileSystemNodeType
8-
from gitingest.query_parsing import ParsedQuery
8+
from gitingest.query_parsing import IngestionQuery
99

1010

11-
def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str]:
11+
def format_node(node: FileSystemNode, query: IngestionQuery) -> Tuple[str, str, str]:
1212
"""
1313
Generate a summary, directory structure, and file contents for a given file system node.
1414
@@ -18,7 +18,7 @@ def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str
1818
----------
1919
node : FileSystemNode
2020
The file system node to be summarized.
21-
query : ParsedQuery
21+
query : IngestionQuery
2222
The parsed query object containing information about the repository and query parameters.
2323
2424
Returns
@@ -47,15 +47,15 @@ def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str
4747
return summary, tree, content
4848

4949

50-
def _create_summary_prefix(query: ParsedQuery, single_file: bool = False) -> str:
50+
def _create_summary_prefix(query: IngestionQuery, single_file: bool = False) -> str:
5151
"""
5252
Create a prefix string for summarizing a repository or local directory.
5353
5454
Includes repository name (if provided), commit/branch details, and subpath if relevant.
5555
5656
Parameters
5757
----------
58-
query : ParsedQuery
58+
query : IngestionQuery
5959
The parsed query object containing information about the repository and query parameters.
6060
single_file : bool
6161
A flag indicating whether the summary is for a single file, by default False.
@@ -108,7 +108,7 @@ def _gather_file_contents(node: FileSystemNode) -> str:
108108
return "\n".join(_gather_file_contents(child) for child in node.children)
109109

110110

111-
def _create_tree_structure(query: ParsedQuery, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
111+
def _create_tree_structure(query: IngestionQuery, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
112112
"""
113113
Generate a tree-like string representation of the file structure.
114114
@@ -117,7 +117,7 @@ def _create_tree_structure(query: ParsedQuery, node: FileSystemNode, prefix: str
117117
118118
Parameters
119119
----------
120-
query : ParsedQuery
120+
query : IngestionQuery
121121
The parsed query object containing information about the repository and query parameters.
122122
node : FileSystemNode
123123
The current directory or file node being processed.

0 commit comments

Comments
 (0)