From 579894d36aad22231f914c96d3de81df81748715 Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Mon, 28 Jul 2025 17:46:38 +0200
Subject: [PATCH 1/5] feat: serve cached digest if available

---
 src/server/query_processor.py | 254 +++++++++++++++++++++++++++++-----
 src/server/s3_utils.py        | 186 +++++++++++++++++++++++++
 2 files changed, 404 insertions(+), 36 deletions(-)

diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index 172330ac..476dd9bf 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -2,19 +2,219 @@
 
 from __future__ import annotations
 
+import logging
 from pathlib import Path
-from typing import cast
+from typing import TYPE_CHECKING, cast
 
 from gitingest.clone import clone_repo
 from gitingest.ingestion import ingest_query
 from gitingest.query_parser import parse_remote_repo
-from gitingest.utils.git_utils import validate_github_token
+from gitingest.utils.git_utils import resolve_commit, validate_github_token
 from gitingest.utils.pattern_utils import process_patterns
 from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
-from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3
+from server.s3_utils import (
+    _build_s3_url,
+    check_s3_object_exists,
+    generate_s3_file_path,
+    get_metadata_from_s3,
+    is_s3_enabled,
+    upload_metadata_to_s3,
+    upload_to_s3,
+)
 from server.server_config import MAX_DISPLAY_SIZE
 from server.server_utils import Colors
 
+if TYPE_CHECKING:
+    from gitingest.schemas.cloning import CloneConfig
+    from gitingest.schemas.ingestion import IngestionQuery
+
+logger = logging.getLogger(__name__)
+
+
+async def _check_s3_cache(
+    query: IngestionQuery,
+    input_text: str,
+    max_file_size: int,
+    pattern_type: str,
+    pattern: str,
+    token: str | None,
+) -> IngestSuccessResponse | None:
+    """Check if digest already exists on S3 and return response if found.
+
+    Parameters
+    ----------
+    query : IngestionQuery
+        The parsed query object.
+    input_text : str
+        Original input text.
+    max_file_size : int
+        Maximum file size in KB.
+    pattern_type : str
+        Pattern type (include/exclude).
+    pattern : str
+        Pattern string.
+    token : str | None
+        GitHub token.
+
+    Returns
+    -------
+    IngestSuccessResponse | None
+        Response if file exists on S3, None otherwise.
+
+    """
+    if not is_s3_enabled():
+        return None
+
+    try:
+        # Use git ls-remote to get commit SHA without cloning
+        clone_config = query.extract_clone_config()
+        commit_sha = await resolve_commit(clone_config, token=token)
+        query.commit = commit_sha
+
+        # Generate S3 file path using the resolved commit
+        s3_file_path = generate_s3_file_path(
+            source=query.url,
+            user_name=cast("str", query.user_name),
+            repo_name=cast("str", query.repo_name),
+            commit=commit_sha,
+            include_patterns=query.include_patterns,
+            ignore_patterns=query.ignore_patterns,
+        )
+
+        # Check if file exists on S3
+        if check_s3_object_exists(s3_file_path):
+            # File exists on S3, serve it directly without cloning
+            s3_url = _build_s3_url(s3_file_path)
+            query.s3_url = s3_url
+
+            short_repo_url = f"{query.user_name}/{query.repo_name}"
+
+            # Try to get cached metadata
+            metadata = get_metadata_from_s3(s3_file_path)
+
+            if metadata:
+                # Use cached metadata if available
+                summary = metadata.get(
+                    "summary",
+                    "Digest served from cache (S3). Download the full digest to see content details.",
+                )
+                tree = metadata.get("tree", "Digest served from cache. Download the full digest to see the file tree.")
+                content = metadata.get(
+                    "content",
+                    "Digest served from cache. Download the full digest to see the content.",
+                )
+            else:
+                # Fallback to placeholder messages if metadata not available
+                summary = "Digest served from cache (S3). Download the full digest to see content details."
+                tree = "Digest served from cache. Download the full digest to see the file tree."
+                content = "Digest served from cache. Download the full digest to see the content."
+
+            return IngestSuccessResponse(
+                repo_url=input_text,
+                short_repo_url=short_repo_url,
+                summary=summary,
+                digest_url=s3_url,
+                tree=tree,
+                content=content,
+                default_max_file_size=max_file_size,
+                pattern_type=pattern_type,
+                pattern=pattern,
+            )
+    except Exception as exc:
+        # Log the exception but don't fail the entire request
+        logger.warning("S3 cache check failed, falling back to normal cloning: %s", exc)
+
+    return None
+
+
+def _store_digest_content(
+    query: IngestionQuery,
+    clone_config: CloneConfig,
+    digest_content: str,
+    summary: str,
+    tree: str,
+    content: str,
+) -> None:
+    """Store digest content either to S3 or locally based on configuration.
+
+    Parameters
+    ----------
+    query : IngestionQuery
+        The query object containing repository information.
+    clone_config : CloneConfig
+        The clone configuration object.
+    digest_content : str
+        The complete digest content to store.
+    summary : str
+        The summary content for metadata.
+    tree : str
+        The tree content for metadata.
+    content : str
+        The file content for metadata.
+
+    """
+    if is_s3_enabled():
+        # Upload to S3 instead of storing locally
+        s3_file_path = generate_s3_file_path(
+            source=query.url,
+            user_name=cast("str", query.user_name),
+            repo_name=cast("str", query.repo_name),
+            commit=query.commit,
+            include_patterns=query.include_patterns,
+            ignore_patterns=query.ignore_patterns,
+        )
+        s3_url = upload_to_s3(content=digest_content, s3_file_path=s3_file_path, ingest_id=query.id)
+
+        # Also upload metadata JSON for caching
+        metadata = {
+            "summary": summary,
+            "tree": tree,
+            "content": content,
+        }
+        try:
+            upload_metadata_to_s3(metadata=metadata, s3_file_path=s3_file_path, ingest_id=query.id)
+            logger.debug("Successfully uploaded metadata to S3")
+        except Exception as metadata_exc:
+            # Log the error but don't fail the entire request
+            logger.warning("Failed to upload metadata to S3: %s", metadata_exc)
+
+        # Store S3 URL in query for later use
+        query.s3_url = s3_url
+    else:
+        # Store locally
+        local_txt_file = Path(clone_config.local_path).with_suffix(".txt")
+        with local_txt_file.open("w", encoding="utf-8") as f:
+            f.write(digest_content)
+
+
+def _generate_digest_url(query: IngestionQuery) -> str:
+    """Generate the digest URL based on S3 configuration.
+
+    Parameters
+    ----------
+    query : IngestionQuery
+        The query object containing repository information.
+
+    Returns
+    -------
+    str
+        The digest URL.
+
+    Raises
+    ------
+    RuntimeError
+        If S3 is enabled but no S3 URL was generated.
+
+    """
+    if is_s3_enabled():
+        digest_url = getattr(query, "s3_url", None)
+        if not digest_url:
+            # This should not happen if S3 upload was successful
+            msg = "S3 is enabled but no S3 URL was generated"
+            raise RuntimeError(msg)
+        return digest_url
+    return f"/api/download/file/{query.id}"
+
 
 async def process_query(
     input_text: str,
@@ -69,10 +269,22 @@ async def process_query(
         include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,
     )
 
+    # Check if digest already exists on S3 before cloning
+    s3_response = await _check_s3_cache(
+        query=query,
+        input_text=input_text,
+        max_file_size=max_file_size,
+        pattern_type=pattern_type.value,
+        pattern=pattern,
+        token=token,
+    )
+    if s3_response:
+        return s3_response
+
     clone_config = query.extract_clone_config()
     await clone_repo(clone_config, token=token)
 
-    short_repo_url = f"{query.user_name}/{query.repo_name}"  # Sets the "<user>/<repo>" for the page title
+    short_repo_url = f"{query.user_name}/{query.repo_name}"
 
     # The commit hash should always be available at this point
     if not query.commit:
@@ -81,30 +293,8 @@ async def process_query(
 
     try:
         summary, tree, content = ingest_query(query)
-
-        # Prepare the digest content (tree + content)
         digest_content = tree + "\n" + content
-
-        # Store digest based on S3 configuration
-        if is_s3_enabled():
-            # Upload to S3 instead of storing locally
-            s3_file_path = generate_s3_file_path(
-                source=query.url,
-                user_name=cast("str", query.user_name),
-                repo_name=cast("str", query.repo_name),
-                commit=query.commit,
-                include_patterns=query.include_patterns,
-                ignore_patterns=query.ignore_patterns,
-            )
-            s3_url = upload_to_s3(content=digest_content, s3_file_path=s3_file_path, ingest_id=query.id)
-            # Store S3 URL in query for later use
-            query.s3_url = s3_url
-        else:
-            # Store locally
-            local_txt_file = Path(clone_config.local_path).with_suffix(".txt")
-            with local_txt_file.open("w", encoding="utf-8") as f:
-                f.write(digest_content)
-
+        _store_digest_content(query, clone_config, digest_content, summary, tree, content)
     except Exception as exc:
         _print_error(query.url, exc, max_file_size, pattern_type, pattern)
         return IngestErrorResponse(error=str(exc))
@@ -123,15 +313,7 @@ async def process_query(
         summary=summary,
     )
 
-    # Generate digest_url based on S3 configuration
-    if is_s3_enabled():
-        digest_url = getattr(query, "s3_url", None)
-        if not digest_url:
-            # This should not happen if S3 upload was successful
-            msg = "S3 is enabled but no S3 URL was generated"
-            raise RuntimeError(msg)
-    else:
-        digest_url = f"/api/download/file/{query.id}"
+    digest_url = _generate_digest_url(query)
 
     return IngestSuccessResponse(
         repo_url=input_text,
diff --git a/src/server/s3_utils.py b/src/server/s3_utils.py
index a30a957f..26fb32e0 100644
--- a/src/server/s3_utils.py
+++ b/src/server/s3_utils.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import hashlib
+import json
 import logging
 import os
 from typing import TYPE_CHECKING
@@ -231,6 +232,149 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
     return public_url
 
 
+def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) -> str:
+    """Upload metadata JSON to S3 alongside the digest file.
+
+    Parameters
+    ----------
+    metadata : dict
+        The metadata dictionary containing summary, tree, and content.
+    s3_file_path : str
+        The S3 file path for the digest (metadata will use .json extension).
+    ingest_id : UUID
+        The ingest ID to store as an S3 object tag.
+
+    Returns
+    -------
+    str
+        Public URL to access the uploaded metadata file.
+
+    Raises
+    ------
+    ValueError
+        If S3 is not enabled.
+    S3UploadError
+        If the upload to S3 fails.
+
+    """
+    if not is_s3_enabled():
+        msg = "S3 is not enabled"
+        logger.error(msg)
+        raise ValueError(msg)
+
+    # Generate metadata file path by replacing .txt with .json
+    metadata_file_path = s3_file_path.replace(".txt", ".json")
+
+    s3_client = create_s3_client()
+    bucket_name = get_s3_bucket_name()
+
+    extra_fields = {
+        "bucket_name": bucket_name,
+        "metadata_file_path": metadata_file_path,
+        "ingest_id": str(ingest_id),
+        "metadata_size": len(json.dumps(metadata)),
+    }
+
+    # Log upload attempt
+    logger.debug("Starting S3 metadata upload", extra=extra_fields)
+
+    try:
+        # Upload the metadata with ingest_id as tag
+        s3_client.put_object(
+            Bucket=bucket_name,
+            Key=metadata_file_path,
+            Body=json.dumps(metadata, indent=2).encode("utf-8"),
+            ContentType="application/json",
+            Tagging=f"ingest_id={ingest_id!s}",
+        )
+    except ClientError as err:
+        # Log upload failure
+        logger.exception(
+            "S3 metadata upload failed",
+            extra={
+                "bucket_name": bucket_name,
+                "metadata_file_path": metadata_file_path,
+                "ingest_id": str(ingest_id),
+                "error_code": err.response.get("Error", {}).get("Code"),
+                "error_message": str(err),
+            },
+        )
+        msg = f"Failed to upload metadata to S3: {err}"
+        raise S3UploadError(msg) from err
+
+    # Generate public URL
+    alias_host = get_s3_alias_host()
+    if alias_host:
+        # Use alias host if configured
+        public_url = f"{alias_host.rstrip('/')}/{metadata_file_path}"
+    else:
+        # Fallback to direct S3 URL
+        endpoint = get_s3_config().get("endpoint_url")
+        if endpoint:
+            public_url = f"{endpoint.rstrip('/')}/{bucket_name}/{metadata_file_path}"
+        else:
+            public_url = (
+                f"https://{bucket_name}.s3.{get_s3_config()['region_name']}.amazonaws.com/{metadata_file_path}"
+            )
+
+    # Log successful upload
+    logger.debug(
+        "S3 metadata upload completed successfully",
+        extra={
+            "bucket_name": bucket_name,
+            "metadata_file_path": metadata_file_path,
+            "ingest_id": str(ingest_id),
+            "public_url": public_url,
+        },
+    )
+
+    return public_url
+
+
+def get_metadata_from_s3(s3_file_path: str) -> dict | None:
+    """Retrieve metadata JSON from S3.
+
+    Parameters
+    ----------
+    s3_file_path : str
+        The S3 file path for the digest (metadata will use .json extension).
+
+    Returns
+    -------
+    dict | None
+        The metadata dictionary if found, None otherwise.
+
+    """
+    if not is_s3_enabled():
+        return None
+
+    # Generate metadata file path by replacing .txt with .json
+    metadata_file_path = s3_file_path.replace(".txt", ".json")
+
+    try:
+        s3_client = create_s3_client()
+        bucket_name = get_s3_bucket_name()
+
+        # Get the metadata object
+        response = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_path)
+        metadata_content = response["Body"].read().decode("utf-8")
+
+        return json.loads(metadata_content)
+    except ClientError as err:
+        # Object doesn't exist if we get a 404 error
+        error_code = err.response.get("Error", {}).get("Code")
+        if error_code == "404":
+            logger.debug("Metadata file not found: %s", metadata_file_path)
+            return None
+        # Log other errors but don't fail
+        logger.warning("Failed to retrieve metadata from S3: %s", err)
+        return None
+    except Exception as exc:
+        # For any other exception, log and return None
+        logger.warning("Unexpected error retrieving metadata from S3: %s", exc)
+        return None
+
+
 def _build_s3_url(key: str) -> str:
     """Build S3 URL for a given key."""
     alias_host = get_s3_alias_host()
@@ -257,6 +401,48 @@ def _check_object_tags(s3_client: BaseClient, bucket_name: str, key: str, target
         return False
 
 
+def check_s3_object_exists(s3_file_path: str) -> bool:
+    """Check if an S3 object exists at the given path.
+
+    Parameters
+    ----------
+    s3_file_path : str
+        The S3 file path to check.
+
+    Returns
+    -------
+    bool
+        True if the object exists, False otherwise.
+
+    Raises
+    ------
+    ClientError
+        If there's an S3 error other than 404 (not found).
+
+    """
+    if not is_s3_enabled():
+        return False
+
+    try:
+        s3_client = create_s3_client()
+        bucket_name = get_s3_bucket_name()
+
+        # Use head_object to check if the object exists without downloading it
+        s3_client.head_object(Bucket=bucket_name, Key=s3_file_path)
+    except ClientError as err:
+        # Object doesn't exist if we get a 404 error
+        error_code = err.response.get("Error", {}).get("Code")
+        if error_code == "404":
+            return False
+        # Re-raise other errors (permissions, etc.)
+        raise
+    except Exception:
+        # For any other exception, assume object doesn't exist
+        return False
+    else:
+        return True
+
+
 def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
     """Get S3 URL for a given ingest ID if it exists.
 

From 9af0dcc180ba93505338f0e9183eb60ec7b8e71e Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Mon, 28 Jul 2025 19:23:13 +0200
Subject: [PATCH 2/5] feat: add S3Metadata model for structured metadata
 handling

---
 src/server/models.py          | 19 +++++++++++++++++++
 src/server/query_processor.py | 24 +++++++++---------------
 src/server/s3_utils.py        | 22 ++++++++++++----------
 3 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/src/server/models.py b/src/server/models.py
index 533da611..97739416 100644
--- a/src/server/models.py
+++ b/src/server/models.py
@@ -116,6 +116,25 @@ class IngestErrorResponse(BaseModel):
 IngestResponse = Union[IngestSuccessResponse, IngestErrorResponse]
 
 
+class S3Metadata(BaseModel):
+    """Model for S3 metadata structure.
+
+    Attributes
+    ----------
+    summary : str
+        Summary of the ingestion process including token estimates.
+    tree : str
+        File tree structure of the repository.
+    content : str
+        Processed content from the repository files.
+
+    """
+
+    summary: str = Field(..., description="Ingestion summary with token estimates")
+    tree: str = Field(..., description="File tree structure")
+    content: str = Field(..., description="Processed file content")
+
+
 class QueryForm(BaseModel):
     """Form data for the query.
 
diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index 476dd9bf..12924d2f 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -11,7 +11,7 @@
 from gitingest.query_parser import parse_remote_repo
 from gitingest.utils.git_utils import resolve_commit, validate_github_token
 from gitingest.utils.pattern_utils import process_patterns
-from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
+from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType, S3Metadata
 from server.s3_utils import (
     _build_s3_url,
     check_s3_object_exists,
@@ -94,15 +94,9 @@ async def _check_s3_cache(
 
             if metadata:
                 # Use cached metadata if available
-                summary = metadata.get(
-                    "summary",
-                    "Digest served from cache (S3). Download the full digest to see content details.",
-                )
-                tree = metadata.get("tree", "Digest served from cache. Download the full digest to see the file tree.")
-                content = metadata.get(
-                    "content",
-                    "Digest served from cache. Download the full digest to see the content.",
-                )
+                summary = metadata.summary
+                tree = metadata.tree
+                content = metadata.content
             else:
                 # Fallback to placeholder messages if metadata not available
                 summary = "Digest served from cache (S3). Download the full digest to see content details."
@@ -166,11 +160,11 @@ def _store_digest_content(
         s3_url = upload_to_s3(content=digest_content, s3_file_path=s3_file_path, ingest_id=query.id)
 
         # Also upload metadata JSON for caching
-        metadata = {
-            "summary": summary,
-            "tree": tree,
-            "content": content,
-        }
+        metadata = S3Metadata(
+            summary=summary,
+            tree=tree,
+            content=content,
+        )
         try:
             upload_metadata_to_s3(metadata=metadata, s3_file_path=s3_file_path, ingest_id=query.id)
             logger.debug("Successfully uploaded metadata to S3")
diff --git a/src/server/s3_utils.py b/src/server/s3_utils.py
index 26fb32e0..e1d38cfa 100644
--- a/src/server/s3_utils.py
+++ b/src/server/s3_utils.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import hashlib
-import json
 import logging
 import os
 from typing import TYPE_CHECKING
@@ -13,9 +12,12 @@
 import boto3
 from botocore.exceptions import ClientError
 
+from server.models import S3Metadata
+
 if TYPE_CHECKING:
     from botocore.client import BaseClient
 
+
 # Initialize logger for this module
 logger = logging.getLogger(__name__)
 
@@ -232,13 +234,13 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
     return public_url
 
 
-def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) -> str:
+def upload_metadata_to_s3(metadata: S3Metadata, s3_file_path: str, ingest_id: UUID) -> str:
     """Upload metadata JSON to S3 alongside the digest file.
 
     Parameters
     ----------
-    metadata : dict
-        The metadata dictionary containing summary, tree, and content.
+    metadata : S3Metadata
+        The metadata struct containing summary, tree, and content.
     s3_file_path : str
         The S3 file path for the digest (metadata will use .json extension).
     ingest_id : UUID
@@ -272,7 +274,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
         "bucket_name": bucket_name,
         "metadata_file_path": metadata_file_path,
         "ingest_id": str(ingest_id),
-        "metadata_size": len(json.dumps(metadata)),
+        "metadata_size": len(metadata.model_dump_json()),
     }
 
     # Log upload attempt
@@ -283,7 +285,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
         s3_client.put_object(
             Bucket=bucket_name,
             Key=metadata_file_path,
-            Body=json.dumps(metadata, indent=2).encode("utf-8"),
+            Body=metadata.model_dump_json(indent=2).encode("utf-8"),
             ContentType="application/json",
             Tagging=f"ingest_id={ingest_id!s}",
         )
@@ -331,7 +333,7 @@ def upload_metadata_to_s3(metadata: dict, s3_file_path: str, ingest_id: UUID) ->
     return public_url
 
 
-def get_metadata_from_s3(s3_file_path: str) -> dict | None:
+def get_metadata_from_s3(s3_file_path: str) -> S3Metadata | None:
     """Retrieve metadata JSON from S3.
 
     Parameters
@@ -341,8 +343,8 @@ def get_metadata_from_s3(s3_file_path: str) -> dict | None:
 
     Returns
     -------
-    dict | None
-        The metadata dictionary if found, None otherwise.
+    S3Metadata | None
+        The metadata struct if found, None otherwise.
 
     """
     if not is_s3_enabled():
@@ -359,7 +361,7 @@ def get_metadata_from_s3(s3_file_path: str) -> dict | None:
         response = s3_client.get_object(Bucket=bucket_name, Key=metadata_file_path)
         metadata_content = response["Body"].read().decode("utf-8")
 
-        return json.loads(metadata_content)
+        return S3Metadata.model_validate_json(metadata_content)
     except ClientError as err:
         # Object doesn't exist if we get a 404 error
         error_code = err.response.get("Error", {}).get("Code")

From 88f0754e2c861121f74648965487f85120307f1d Mon Sep 17 00:00:00 2001
From: Nicolas IRAGNE <nicoragne@hotmail.fr>
Date: Mon, 28 Jul 2025 22:43:37 +0200
Subject: [PATCH 3/5] feat: add prometheus metrics for s3 cache

---
 src/server/s3_utils.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/server/s3_utils.py b/src/server/s3_utils.py
index e1d38cfa..f7f14ad0 100644
--- a/src/server/s3_utils.py
+++ b/src/server/s3_utils.py
@@ -11,6 +11,7 @@
 
 import boto3
 from botocore.exceptions import ClientError
+from prometheus_client import Counter
 
 from server.models import S3Metadata
 
@@ -21,6 +22,10 @@
 # Initialize logger for this module
 logger = logging.getLogger(__name__)
 
+_cache_lookup_counter = Counter("gitingest_cache_lookup", "Number of cache lookups", ["url"])
+_cache_hit_counter = Counter("gitingest_cache_hit", "Number of cache hits", ["url"])
+_cache_miss_counter = Counter("gitingest_cache_miss", "Number of cache misses", ["url"])
+
 
 class S3UploadError(Exception):
     """Custom exception for S3 upload failures."""
@@ -424,7 +429,7 @@ def check_s3_object_exists(s3_file_path: str) -> bool:
     """
     if not is_s3_enabled():
         return False
-
+    _cache_lookup_counter.labels(url=s3_file_path).inc()
     try:
         s3_client = create_s3_client()
         bucket_name = get_s3_bucket_name()
@@ -435,13 +440,16 @@ def check_s3_object_exists(s3_file_path: str) -> bool:
         # Object doesn't exist if we get a 404 error
         error_code = err.response.get("Error", {}).get("Code")
         if error_code == "404":
+            _cache_miss_counter.labels(url=s3_file_path).inc()
             return False
         # Re-raise other errors (permissions, etc.)
         raise
     except Exception:
         # For any other exception, assume object doesn't exist
+        _cache_miss_counter.labels(url=s3_file_path).inc()
         return False
     else:
+        _cache_hit_counter.labels(url=s3_file_path).inc()
         return True
 
 

From 34fe40a608dd9fd8d75b5053850fde9773bd41a9 Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Tue, 29 Jul 2025 16:45:17 +0200
Subject: [PATCH 4/5] refactor: simplify query commit resolution

Combine steps to resolve and assign commit in query_processor.
---
 src/server/query_processor.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index 12924d2f..7a55bfd1 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -68,15 +68,13 @@ async def _check_s3_cache(
     try:
         # Use git ls-remote to get commit SHA without cloning
         clone_config = query.extract_clone_config()
-        commit_sha = await resolve_commit(clone_config, token=token)
-        query.commit = commit_sha
-
+        query.commit = await resolve_commit(clone_config, token=token)
         # Generate S3 file path using the resolved commit
         s3_file_path = generate_s3_file_path(
             source=query.url,
             user_name=cast("str", query.user_name),
             repo_name=cast("str", query.repo_name),
-            commit=commit_sha,
+            commit=query.commit,
             include_patterns=query.include_patterns,
             ignore_patterns=query.ignore_patterns,
         )

From 48fc139bb850b8f3c243908a2e372620f79567cf Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Tue, 29 Jul 2025 16:57:52 +0200
Subject: [PATCH 5/5] ci: disable fail-fast in matrix strategy

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 17e6628a..ed498934 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ jobs:
   test:
     runs-on: ${{ matrix.os }}
     strategy:
-      fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]