Skip to content

Commit

Permalink
chore: use exception to signal intent
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesGuthrie committed Dec 5, 2024
1 parent 768ed78 commit 3b4f56d
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
44 changes: 28 additions & 16 deletions projects/pgai/pgai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
import time
from collections.abc import Sequence
from dataclasses import dataclass
from typing import Any

import click
Expand All @@ -26,6 +27,23 @@
log = structlog.get_logger()


@dataclass
class VectorizerNotFound(BaseException):
vectorizer_id: int

def __str__(self) -> str:
return f"Vectorizer not found vectorizer_id={self.vectorizer_id}"


@dataclass
class APIKeyNotFound(BaseException):
vectorizer_id: int
api_key_name: str

def __str__(self) -> str:
return f'API key not found vectorizer_id={self.vectorizer_id} api_key_name="{self.api_key_name}"' # noqa: E501 (line too long)


def asbool(value: str | None):
"""Convert the given String to a boolean object.
Expand Down Expand Up @@ -76,7 +94,7 @@ def get_vectorizer_ids(
return valid_vectorizer_ids


def get_vectorizer(db_url: str, vectorizer_id: int) -> Vectorizer | None:
def get_vectorizer(db_url: str, vectorizer_id: int) -> Vectorizer:
with (
psycopg.Connection.connect(db_url) as con,
con.cursor(row_factory=dict_row) as cur,
Expand All @@ -87,8 +105,7 @@ def get_vectorizer(db_url: str, vectorizer_id: int) -> Vectorizer | None:
)
row = cur.fetchone()
if row is None:
log.warning("vectorizer not found", vectorizer_id=vectorizer_id)
return None
raise VectorizerNotFound(vectorizer_id=vectorizer_id)
vectorizer = row["vectorizer"]
embedding = vectorizer["config"]["embedding"]
vectorizer = Vectorizer(**vectorizer)
Expand All @@ -97,12 +114,9 @@ def get_vectorizer(db_url: str, vectorizer_id: int) -> Vectorizer | None:
api_key_name = embedding["api_key_name"]
api_key = os.getenv(api_key_name, None)
if api_key is None:
log.error(
"API key not found",
api_key_name=api_key_name,
vectorizer_id=vectorizer_id,
raise APIKeyNotFound(
api_key_name=api_key_name, vectorizer_id=vectorizer_id
)
return None
secrets: dict[str, str | None] = {api_key_name: api_key}
# The Ollama API doesn't need a key, so doesn't support `set_api_key`
set_api_key = getattr(vectorizer.config.embedding, "set_api_key", None)
Expand Down Expand Up @@ -284,16 +298,14 @@ def vectorizer_worker(
log.warning("no vectorizers found")

for vectorizer_id in valid_vectorizer_ids:
vectorizer = get_vectorizer(db_url, vectorizer_id)
if vectorizer is None:
log.error(
"error fetching vectorizer", vectorizer_id=vectorizer_id
)
try:
vectorizer = get_vectorizer(db_url, vectorizer_id)
log.info("running vectorizer", vectorizer_id=vectorizer_id)
run_vectorizer(db_url, vectorizer, concurrency)
except (VectorizerNotFound, APIKeyNotFound) as e:
log.error(f"error getting vectorizer: {e}")
if exit_on_error:
sys.exit(1)
continue
log.info("running vectorizer", vectorizer_id=vectorizer_id)
run_vectorizer(db_url, vectorizer, concurrency)
except psycopg.OperationalError as e:
if "connection failed" in str(e):
log.error(f"unable to connect to database: {str(e)}")
Expand Down
1 change: 1 addition & 0 deletions projects/pgai/tests/vectorizer/test_vectorizer_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ def test_voyageai_vectorizer_fails_when_api_key_is_not_set(
)

assert result.exit_code == 1
assert "API key not found" in result.stdout


def test_voyageai_vectorizer_handles_chunk_failure_correctly(
Expand Down

0 comments on commit 3b4f56d

Please sign in to comment.