Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from server.metrics_server import start_metrics_server
from server.routers import dynamic, index, ingest
from server.server_config import templates
from server.server_utils import lifespan, limiter, rate_limit_exception_handler
from server.server_utils import limiter, rate_limit_exception_handler

# Load environment variables from .env file
load_dotenv()
Expand Down Expand Up @@ -55,8 +55,8 @@
environment=sentry_environment,
)

# Initialize the FastAPI application with lifespan
app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
# Initialize the FastAPI application
app = FastAPI(docs_url=None, redoc_url=None)
app.state.limiter = limiter

# Register the custom exception handler for rate limits
Expand Down
17 changes: 17 additions & 0 deletions src/server/query_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import shutil
from pathlib import Path
from typing import TYPE_CHECKING, cast

Expand Down Expand Up @@ -31,6 +32,17 @@
from gitingest.schemas.ingestion import IngestionQuery


def _cleanup_repository(clone_config: CloneConfig) -> None:
"""Clean up the cloned repository after processing."""
try:
local_path = Path(clone_config.local_path)
if local_path.exists():
shutil.rmtree(local_path)
logger.info("Successfully cleaned up repository", extra={"local_path": str(local_path)})
except (PermissionError, OSError):
logger.exception("Could not delete repository", extra={"local_path": str(clone_config.local_path)})


async def _check_s3_cache(
query: IngestionQuery,
input_text: str,
Expand Down Expand Up @@ -292,6 +304,8 @@ async def process_query(
_store_digest_content(query, clone_config, digest_content, summary, tree, content)
except Exception as exc:
_print_error(query.url, exc, max_file_size, pattern_type, pattern)
# Clean up repository even if processing failed
_cleanup_repository(clone_config)
return IngestErrorResponse(error=str(exc))

if len(content) > MAX_DISPLAY_SIZE:
Expand All @@ -310,6 +324,9 @@ async def process_query(

digest_url = _generate_digest_url(query)

# Clean up the repository after successful processing
_cleanup_repository(clone_config)

return IngestSuccessResponse(
repo_url=input_text,
short_repo_url=short_repo_url,
Expand Down
1 change: 0 additions & 1 deletion src/server/server_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from fastapi.templating import Jinja2Templates

MAX_DISPLAY_SIZE: int = 300_000
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)

# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb
Expand Down
123 changes: 1 addition & 122 deletions src/server/server_utils.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
"""Utility functions for the server."""

import asyncio
import shutil
import time
from contextlib import asynccontextmanager, suppress
from pathlib import Path
from typing import AsyncGenerator

from fastapi import FastAPI, Request
from fastapi import Request
from fastapi.responses import Response
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
from slowapi.util import get_remote_address

from gitingest.config import TMP_BASE_PATH
from gitingest.utils.logging_config import get_logger
from server.server_config import DELETE_REPO_AFTER

# Initialize logger for this module
logger = get_logger(__name__)
Expand Down Expand Up @@ -52,118 +43,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp
raise exc


@asynccontextmanager
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
"""Manage startup & graceful-shutdown tasks for the FastAPI app.

Returns
-------
AsyncGenerator[None, None]
Yields control back to the FastAPI application while the background task runs.

"""
task = asyncio.create_task(_remove_old_repositories())

yield # app runs while the background task is alive

task.cancel() # ask the worker to stop
with suppress(asyncio.CancelledError):
await task # swallow the cancellation signal


async def _remove_old_repositories(
base_path: Path = TMP_BASE_PATH,
scan_interval: int = 60,
delete_after: int = DELETE_REPO_AFTER,
) -> None:
"""Periodically delete old repositories/directories.

Every ``scan_interval`` seconds the coroutine scans ``base_path`` and deletes directories older than
``delete_after`` seconds. The repository URL is extracted from the first ``.txt`` file in each directory
and appended to ``history.txt``, assuming the filename format: "owner-repository.txt". Filesystem errors are
logged and the loop continues.

Parameters
----------
base_path : Path
The path to the base directory where repositories are stored (default: ``TMP_BASE_PATH``).
scan_interval : int
The number of seconds between scans (default: 60).
delete_after : int
The number of seconds after which a repository is considered old and will be deleted
(default: ``DELETE_REPO_AFTER``).

"""
while True:
if not base_path.exists():
await asyncio.sleep(scan_interval)
continue

now = time.time()
try:
for folder in base_path.iterdir():
if now - folder.stat().st_ctime <= delete_after: # Not old enough
continue

await _process_folder(folder)

except (OSError, PermissionError):
logger.exception("Error in repository cleanup", extra={"base_path": str(base_path)})

await asyncio.sleep(scan_interval)


async def _process_folder(folder: Path) -> None:
"""Append the repo URL (if discoverable) to ``history.txt`` and delete ``folder``.

Parameters
----------
folder : Path
The path to the folder to be processed.

"""
history_file = Path("history.txt")
loop = asyncio.get_running_loop()

try:
first_txt_file = next(folder.glob("*.txt"))
except StopIteration: # No .txt file found
return

# Append owner/repo to history.txt
try:
filename = first_txt_file.stem # "owner-repo"
if "-" in filename:
owner, repo = filename.split("-", 1)
repo_url = f"{owner}/{repo}"
await loop.run_in_executor(None, _append_line, history_file, repo_url)
except (OSError, PermissionError):
logger.exception("Error logging repository URL", extra={"folder": str(folder)})

# Delete the cloned repo
try:
await loop.run_in_executor(None, shutil.rmtree, folder)
except PermissionError:
logger.exception("No permission to delete folder", extra={"folder": str(folder)})
except OSError:
logger.exception("Could not delete folder", extra={"folder": str(folder)})


def _append_line(path: Path, line: str) -> None:
"""Append a line to a file.

Parameters
----------
path : Path
The path to the file to append the line to.
line : str
The line to append to the file.

"""
with path.open("a", encoding="utf-8") as fp:
fp.write(f"{line}\n")


## Color printing utility
class Colors:
"""ANSI color codes."""
Expand Down
Loading