diff --git a/src/server/main.py b/src/server/main.py index e2d68c37..d973c387 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -19,7 +19,7 @@ from server.metrics_server import start_metrics_server from server.routers import dynamic, index, ingest from server.server_config import templates -from server.server_utils import lifespan, limiter, rate_limit_exception_handler +from server.server_utils import limiter, rate_limit_exception_handler # Load environment variables from .env file load_dotenv() @@ -55,8 +55,8 @@ environment=sentry_environment, ) -# Initialize the FastAPI application with lifespan -app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None) +# Initialize the FastAPI application +app = FastAPI(docs_url=None, redoc_url=None) app.state.limiter = limiter # Register the custom exception handler for rate limits diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 5cb22d27..d568a21f 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import shutil from pathlib import Path from typing import TYPE_CHECKING, cast @@ -31,6 +32,17 @@ from gitingest.schemas.ingestion import IngestionQuery +def _cleanup_repository(clone_config: CloneConfig) -> None: + """Clean up the cloned repository after processing.""" + try: + local_path = Path(clone_config.local_path) + if local_path.exists(): + shutil.rmtree(local_path) + logger.info("Successfully cleaned up repository", extra={"local_path": str(local_path)}) + except (PermissionError, OSError): + logger.exception("Could not delete repository", extra={"local_path": str(clone_config.local_path)}) + + async def _check_s3_cache( query: IngestionQuery, input_text: str, @@ -292,6 +304,8 @@ async def process_query( _store_digest_content(query, clone_config, digest_content, summary, tree, content) except Exception as exc: _print_error(query.url, exc, max_file_size, pattern_type, pattern) + # Clean up repository even if processing failed + _cleanup_repository(clone_config) return IngestErrorResponse(error=str(exc)) if len(content) > MAX_DISPLAY_SIZE: @@ -310,6 +324,9 @@ async def process_query( digest_url = _generate_digest_url(query) + # Clean up the repository after successful processing + _cleanup_repository(clone_config) + return IngestSuccessResponse( repo_url=input_text, short_repo_url=short_repo_url, diff --git a/src/server/server_config.py b/src/server/server_config.py index d0b51c4d..6918bf24 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -7,7 +7,6 @@ from fastapi.templating import Jinja2Templates MAX_DISPLAY_SIZE: int = 300_000 -DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour) # Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js) DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb diff --git a/src/server/server_utils.py b/src/server/server_utils.py index ffeaad51..9dec3ce2 100644 --- a/src/server/server_utils.py +++ b/src/server/server_utils.py @@ -1,21 +1,12 @@ """Utility functions for the server.""" -import asyncio -import shutil -import time -from contextlib import asynccontextmanager, suppress -from pathlib import Path -from typing import AsyncGenerator - -from fastapi import FastAPI, Request +from fastapi import Request from fastapi.responses import Response from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi.errors import RateLimitExceeded from slowapi.util import get_remote_address -from gitingest.config import TMP_BASE_PATH from gitingest.utils.logging_config import get_logger -from server.server_config import DELETE_REPO_AFTER # Initialize logger for this module logger = get_logger(__name__) @@ -52,118 +43,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp raise exc -@asynccontextmanager -async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]: - """Manage startup & graceful-shutdown tasks for the FastAPI app. - - Returns - ------- - AsyncGenerator[None, None] - Yields control back to the FastAPI application while the background task runs. - - """ - task = asyncio.create_task(_remove_old_repositories()) - - yield # app runs while the background task is alive - - task.cancel() # ask the worker to stop - with suppress(asyncio.CancelledError): - await task # swallow the cancellation signal - - -async def _remove_old_repositories( - base_path: Path = TMP_BASE_PATH, - scan_interval: int = 60, - delete_after: int = DELETE_REPO_AFTER, -) -> None: - """Periodically delete old repositories/directories. - - Every ``scan_interval`` seconds the coroutine scans ``base_path`` and deletes directories older than - ``delete_after`` seconds. The repository URL is extracted from the first ``.txt`` file in each directory - and appended to ``history.txt``, assuming the filename format: "owner-repository.txt". Filesystem errors are - logged and the loop continues. - - Parameters - ---------- - base_path : Path - The path to the base directory where repositories are stored (default: ``TMP_BASE_PATH``). - scan_interval : int - The number of seconds between scans (default: 60). - delete_after : int - The number of seconds after which a repository is considered old and will be deleted - (default: ``DELETE_REPO_AFTER``). - - """ - while True: - if not base_path.exists(): - await asyncio.sleep(scan_interval) - continue - - now = time.time() - try: - for folder in base_path.iterdir(): - if now - folder.stat().st_ctime <= delete_after: # Not old enough - continue - - await _process_folder(folder) - - except (OSError, PermissionError): - logger.exception("Error in repository cleanup", extra={"base_path": str(base_path)}) - - await asyncio.sleep(scan_interval) - - -async def _process_folder(folder: Path) -> None: - """Append the repo URL (if discoverable) to ``history.txt`` and delete ``folder``. - - Parameters - ---------- - folder : Path - The path to the folder to be processed. - - """ - history_file = Path("history.txt") - loop = asyncio.get_running_loop() - - try: - first_txt_file = next(folder.glob("*.txt")) - except StopIteration: # No .txt file found - return - - # Append owner/repo to history.txt - try: - filename = first_txt_file.stem # "owner-repo" - if "-" in filename: - owner, repo = filename.split("-", 1) - repo_url = f"{owner}/{repo}" - await loop.run_in_executor(None, _append_line, history_file, repo_url) - except (OSError, PermissionError): - logger.exception("Error logging repository URL", extra={"folder": str(folder)}) - - # Delete the cloned repo - try: - await loop.run_in_executor(None, shutil.rmtree, folder) - except PermissionError: - logger.exception("No permission to delete folder", extra={"folder": str(folder)}) - except OSError: - logger.exception("Could not delete folder", extra={"folder": str(folder)}) - - -def _append_line(path: Path, line: str) -> None: - """Append a line to a file. - - Parameters - ---------- - path : Path - The path to the file to append the line to. - line : str - The line to append to the file. - - """ - with path.open("a", encoding="utf-8") as fp: - fp.write(f"{line}\n") - - ## Color printing utility class Colors: """ANSI color codes."""