diff --git a/.github/workflows/python-CI.yml b/.github/workflows/python-CI.yml index 984014cf7e..a201434795 100644 --- a/.github/workflows/python-CI.yml +++ b/.github/workflows/python-CI.yml @@ -57,7 +57,11 @@ jobs: python-version: | 3.8 3.12 - - run: pip install tox-uv==1.11.2 + - uses: yezz123/setup-uv@v4 + with: + uv-version: 0.3.0 + uv-venv: ${{ github.job }}-${{ github.run_number }} + - run: uv pip install tox==4.18.0 tox-uv==1.11.2 - run: tox run-parallel --parallel-no-spinner -e py38-ci-pkg-phoenix_evals,py312-ci-pkg-phoenix_evals if: ${{ needs.changes.outputs.phoenix_evals == 'true' }} @@ -136,3 +140,23 @@ jobs: if: runner.os == 'Windows' run: | hatch run test:tests + + integration-test: + runs-on: ${{ matrix.os }} + needs: changes + if: ${{ needs.changes.outputs.phoenix == 'true' }} + strategy: + matrix: + os: [ ubuntu-latest, windows-latest, windows-2019, macos-12 ] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.8 + - uses: yezz123/setup-uv@v4 + with: + uv-version: 0.3.0 + uv-venv: ${{ github.job }}-${{ github.run_number }} + - run: uv pip install tox==4.18.0 tox-uv==1.11.2 + - run: tox run -e ci-integration_tests -- server + timeout-minutes: 5 diff --git a/integration-tests/evals/test_multiple_classify_calls_anthropic.py b/integration_tests/evals/test_multiple_classify_calls_anthropic.py similarity index 99% rename from integration-tests/evals/test_multiple_classify_calls_anthropic.py rename to integration_tests/evals/test_multiple_classify_calls_anthropic.py index 8b8f38c855..41cfa4eb8a 100644 --- a/integration-tests/evals/test_multiple_classify_calls_anthropic.py +++ b/integration_tests/evals/test_multiple_classify_calls_anthropic.py @@ -1,5 +1,4 @@ import pandas as pd - from phoenix.evals import AnthropicModel, llm_classify from phoenix.evals.default_templates import ( RAG_RELEVANCY_PROMPT_TEMPLATE, diff --git a/integration-tests/evals/test_multiple_classify_calls_openai.py b/integration_tests/evals/test_multiple_classify_calls_openai.py similarity index 99% rename from integration-tests/evals/test_multiple_classify_calls_openai.py rename to integration_tests/evals/test_multiple_classify_calls_openai.py index 01e6cd7d53..450d0e1767 100644 --- a/integration-tests/evals/test_multiple_classify_calls_openai.py +++ b/integration_tests/evals/test_multiple_classify_calls_openai.py @@ -1,5 +1,4 @@ import pandas as pd - from phoenix.evals import OpenAIModel, llm_classify from phoenix.evals.default_templates import ( RAG_RELEVANCY_PROMPT_TEMPLATE, diff --git a/integration-tests/evals/test_multiple_classify_calls_vertex.py b/integration_tests/evals/test_multiple_classify_calls_vertex.py similarity index 99% rename from integration-tests/evals/test_multiple_classify_calls_vertex.py rename to integration_tests/evals/test_multiple_classify_calls_vertex.py index e21e52d246..90729f2952 100644 --- a/integration-tests/evals/test_multiple_classify_calls_vertex.py +++ b/integration_tests/evals/test_multiple_classify_calls_vertex.py @@ -1,5 +1,4 @@ import pandas as pd - from phoenix.evals import GeminiModel, llm_classify from phoenix.evals.default_templates import ( RAG_RELEVANCY_PROMPT_TEMPLATE, diff --git a/integration_tests/mypy.ini b/integration_tests/mypy.ini new file mode 100644 index 0000000000..3529cc191f --- /dev/null +++ b/integration_tests/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +strict = true +explicit_package_bases = true diff --git a/integration-tests/eval_query_testing.ipynb b/integration_tests/notebooks/eval_query_testing.ipynb similarity index 99% rename from integration-tests/eval_query_testing.ipynb rename to integration_tests/notebooks/eval_query_testing.ipynb index 1a7bae1c4d..c76ac7c903 100644 --- a/integration-tests/eval_query_testing.ipynb +++ b/integration_tests/notebooks/eval_query_testing.ipynb @@ -6,10 +6,9 @@ "metadata": {}, "outputs": [], "source": [ + "import phoenix as px\n", "from sqlalchemy import create_engine\n", - "from sqlalchemy.orm import sessionmaker\n", - "\n", - "import phoenix as px" + "from sqlalchemy.orm import sessionmaker" ] }, { diff --git a/integration-tests/graphql_query_performance.ipynb b/integration_tests/notebooks/graphql_query_performance.ipynb similarity index 100% rename from integration-tests/graphql_query_performance.ipynb rename to integration_tests/notebooks/graphql_query_performance.ipynb diff --git a/integration-tests/pagination_queries.ipynb b/integration_tests/notebooks/pagination_queries.ipynb similarity index 98% rename from integration-tests/pagination_queries.ipynb rename to integration_tests/notebooks/pagination_queries.ipynb index d74ceead64..20cad1ce70 100644 --- a/integration-tests/pagination_queries.ipynb +++ b/integration_tests/notebooks/pagination_queries.ipynb @@ -13,10 +13,9 @@ "metadata": {}, "outputs": [], "source": [ + "from phoenix.db import models\n", "from sqlalchemy import and_, create_engine, select\n", - "from sqlalchemy.orm import aliased, sessionmaker\n", - "\n", - "from phoenix.db import models" + "from sqlalchemy.orm import aliased, sessionmaker" ] }, { diff --git a/integration-tests/pagination_query_testing.ipynb b/integration_tests/notebooks/pagination_query_testing.ipynb similarity index 99% rename from integration-tests/pagination_query_testing.ipynb rename to integration_tests/notebooks/pagination_query_testing.ipynb index 80467e66b3..e251ac5bd2 100644 --- a/integration-tests/pagination_query_testing.ipynb +++ b/integration_tests/notebooks/pagination_query_testing.ipynb @@ -8,7 +8,6 @@ "source": [ "from gql import Client, gql\n", "from gql.transport.requests import RequestsHTTPTransport\n", - "\n", "from phoenix.server.api.types.pagination import (\n", " Cursor,\n", " CursorSortColumn,\n", diff --git a/integration-tests/span_query_testing.ipynb b/integration_tests/notebooks/span_query_testing.ipynb similarity index 99% rename from integration-tests/span_query_testing.ipynb rename to integration_tests/notebooks/span_query_testing.ipynb index 81bdb9c5c1..41b88ad032 100644 --- a/integration-tests/span_query_testing.ipynb +++ b/integration_tests/notebooks/span_query_testing.ipynb @@ -19,13 +19,12 @@ "metadata": {}, "outputs": [], "source": [ - "from sqlalchemy import create_engine, select\n", - "from sqlalchemy.orm import sessionmaker\n", - "\n", "import phoenix as px\n", "from phoenix.db import models\n", "from phoenix.trace.dsl.helpers import get_qa_with_reference, get_retrieved_documents\n", - "from phoenix.trace.dsl.query import SpanQuery" + "from phoenix.trace.dsl.query import SpanQuery\n", + "from sqlalchemy import create_engine, select\n", + "from sqlalchemy.orm import sessionmaker" ] }, { diff --git a/integration_tests/pytest.ini b/integration_tests/pytest.ini new file mode 100644 index 0000000000..1df10d788b --- /dev/null +++ b/integration_tests/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -raP -l diff --git a/integration_tests/requirements.txt b/integration_tests/requirements.txt new file mode 100644 index 0000000000..b13c91dd70 --- /dev/null +++ b/integration_tests/requirements.txt @@ -0,0 +1,6 @@ +faker +openinference-semantic-conventions +opentelemetry-sdk +portpicker +psutil +types-psutil diff --git a/integration_tests/ruff.toml b/integration_tests/ruff.toml new file mode 100644 index 0000000000..25e88a14bb --- /dev/null +++ b/integration_tests/ruff.toml @@ -0,0 +1,14 @@ +line-length = 100 +target-version = "py38" + +[lint] +select = ["E", "F", "W", "I", "NPY201"] + +[lint.isort] +force-single-line = false + +[lint.per-file-ignores] +"*.ipynb" = ["E402", "E501"] + +[format] +line-ending = "native" diff --git a/integration_tests/server/conftest.py b/integration_tests/server/conftest.py new file mode 100644 index 0000000000..4ab5e4fe02 --- /dev/null +++ b/integration_tests/server/conftest.py @@ -0,0 +1,74 @@ +import os +import tempfile +from typing import Iterator, List +from unittest import mock +from urllib.parse import urljoin + +import pytest +from _pytest.monkeypatch import MonkeyPatch +from faker import Faker +from openinference.semconv.resource import ResourceAttributes +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.trace import Tracer +from phoenix.config import ( + ENV_PHOENIX_GRPC_PORT, + ENV_PHOENIX_PORT, + ENV_PHOENIX_WORKING_DIR, + get_base_url, + get_env_grpc_port, + get_env_host, +) +from portpicker import pick_unused_port # type: ignore[import-untyped] + + +@pytest.fixture(autouse=True) +def set_env_var(monkeypatch: Iterator[MonkeyPatch]) -> Iterator[None]: + tmp = tempfile.TemporaryDirectory() + values = ( + (ENV_PHOENIX_PORT, str(pick_unused_port())), + (ENV_PHOENIX_GRPC_PORT, str(pick_unused_port())), + (ENV_PHOENIX_WORKING_DIR, tmp.name), + ) + try: + with mock.patch.dict(os.environ, values): + yield + finally: + try: + # This is for Windows. In Python 3.10+, it's cleaner to use + # `TemporaryDirectory(ignore_cleanup_errors=True)` instead. + tmp.cleanup() + except BaseException: + pass + + +@pytest.fixture +def tracers( + project_name: str, + fake: Faker, +) -> List[Tracer]: + host = get_env_host() + if host == "0.0.0.0": + host = "127.0.0.1" + grpc_endpoint = f"http://{host}:{get_env_grpc_port()}" + http_endpoint = urljoin(get_base_url(), "v1/traces") + tracers = [] + resource = Resource({ResourceAttributes.PROJECT_NAME: project_name}) + for exporter in (GRPCExporter(grpc_endpoint), HTTPExporter(http_endpoint)): + tracer_provider = TracerProvider(resource=resource) + tracer_provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracers.append(tracer_provider.get_tracer(__name__)) + return tracers + + +@pytest.fixture +def fake() -> Faker: + return Faker() + + +@pytest.fixture +def project_name(fake: Faker) -> str: + return fake.pystr() diff --git a/integration_tests/server/test_launch_app.py b/integration_tests/server/test_launch_app.py new file mode 100644 index 0000000000..8759bb37fc --- /dev/null +++ b/integration_tests/server/test_launch_app.py @@ -0,0 +1,98 @@ +import json +import os +import sys +from contextlib import contextmanager +from queue import SimpleQueue +from subprocess import PIPE, STDOUT +from threading import Thread +from time import sleep, time +from typing import Iterator, List, Set +from urllib.parse import urljoin +from urllib.request import Request, urlopen + +import pytest +from faker import Faker +from opentelemetry.trace import Tracer +from phoenix.config import get_base_url +from psutil import STATUS_ZOMBIE, Popen + + +@pytest.fixture +def req() -> Request: + query = dict(query="query{projects{edges{node{name spans{edges{node{name}}}}}}}") + return Request( + method="POST", + url=urljoin(get_base_url(), "graphql"), + data=json.dumps(query).encode("utf-8"), + headers={"Content-Type": "application/json"}, + ) + + +def test_launch_app( + tracers: List[Tracer], + project_name: str, + req: Request, + fake: Faker, +) -> None: + span_names: Set[str] = set() + for i in range(2): + with launch(): + for t, tracer in enumerate(tracers): + name = f"{i}_{t}_{fake.pystr()}" + span_names.add(name) + tracer.start_span(name).end() + sleep(2) + response = urlopen(req) + response_dict = json.loads(response.read().decode("utf-8")) + assert response_dict + assert not response_dict.get("errors") + assert { + span["node"]["name"] + for project in response_dict["data"]["projects"]["edges"] + for span in project["node"]["spans"]["edges"] + if project["node"]["name"] == project_name + } == span_names + print(f"{response_dict=}") + + +@contextmanager +def launch() -> Iterator[None]: + command = f"{sys.executable} -m phoenix.server.main --no-ui serve" + process = Popen(command.split(), stdout=PIPE, stderr=STDOUT, text=True, env=os.environ) + log: "SimpleQueue[str]" = SimpleQueue() + Thread(target=capture_stdout, args=(process, log), daemon=True).start() + t = 60 + time_limit = time() + t + timed_out = False + url = urljoin(get_base_url(), "healthz") + while not timed_out and is_alive(process): + sleep(0.1) + try: + urlopen(url) + break + except BaseException: + timed_out = time() > time_limit + try: + if timed_out: + raise TimeoutError(f"Server did not start within {t} seconds.") + assert is_alive(process) + yield + process.terminate() + process.wait(10) + finally: + logs = [] + while not log.empty(): + # For unknown reasons, this hangs if we try to print immediately + # after `get()`, so we collect the lines and print them later. + logs.append(log.get()) + for line in logs: + print(line, end="") + + +def is_alive(process: Popen) -> bool: + return process.is_running() and process.status() != STATUS_ZOMBIE + + +def capture_stdout(process: Popen, log: "SimpleQueue[str]") -> None: + while True: + log.put(process.stdout.readline()) diff --git a/pyproject.toml b/pyproject.toml index 1958e0fec8..3392b640ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ dev = [ "prometheus_client", "asgi-lifespan", "Faker>=26.0.0", + "portpicker", "uvloop; platform_system != 'Windows'", ] evals = [] @@ -253,6 +254,7 @@ addopts = [ "--import-mode=importlib", "--doctest-modules", "--new-first", + "--showlocals", ] testpaths = [ "tests", @@ -430,4 +432,4 @@ select = ["E", "F", "W", "I", "NPY201"] force-single-line = false [tool.ruff.format] -line-ending = "lf" +line-ending = "native" diff --git a/src/phoenix/db/engines.py b/src/phoenix/db/engines.py index af7913ad67..e39dc7ddd0 100644 --- a/src/phoenix/db/engines.py +++ b/src/phoenix/db/engines.py @@ -7,6 +7,7 @@ import aiosqlite import numpy as np +import sqlalchemy import sqlean from sqlalchemy import URL, StaticPool, event, make_url from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine @@ -15,6 +16,7 @@ from phoenix.db.helpers import SupportedSQLDialect from phoenix.db.migrate import migrate_in_thread from phoenix.db.models import init_models +from phoenix.settings import Settings sqlean.extensions.enable("text", "stats") @@ -118,7 +120,13 @@ def async_creator() -> aiosqlite.Connection: else: asyncio.create_task(init_models(engine)) else: - migrate_in_thread(engine.url) + sync_engine = sqlalchemy.create_engine( + url=url.set(drivername="sqlite"), + echo=Settings.log_migrations, + json_serializer=_dumps, + creator=lambda: sqlean.connect(f"file:{database}", uri=True), + ) + migrate_in_thread(sync_engine) return engine @@ -130,7 +138,12 @@ def aio_postgresql_engine( engine = create_async_engine(url=url, echo=echo, json_serializer=_dumps) if not migrate: return engine - migrate_in_thread(engine.url) + sync_engine = sqlalchemy.create_engine( + url=url.set(drivername="postgresql"), + echo=Settings.log_migrations, + json_serializer=_dumps, + ) + migrate_in_thread(sync_engine) return engine diff --git a/src/phoenix/db/insertion/dataset.py b/src/phoenix/db/insertion/dataset.py index 17285b581b..636ca3636b 100644 --- a/src/phoenix/db/insertion/dataset.py +++ b/src/phoenix/db/insertion/dataset.py @@ -24,6 +24,7 @@ from phoenix.db.insertion.helpers import DataManipulationEvent logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) DatasetId: TypeAlias = int DatasetVersionId: TypeAlias = int diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py index 20262fc051..b5ba84458c 100644 --- a/src/phoenix/db/migrate.py +++ b/src/phoenix/db/migrate.py @@ -1,25 +1,34 @@ +import codecs import logging +import sys from pathlib import Path -from queue import Empty, Queue +from queue import Empty, SimpleQueue from threading import Thread from typing import Optional from alembic import command from alembic.config import Config -from sqlalchemy import URL +from sqlalchemy import Engine from phoenix.exceptions import PhoenixMigrationError from phoenix.settings import Settings logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) def printif(condition: bool, text: str) -> None: - if condition: - print(text) + if not condition: + return + if sys.platform.startswith("win"): + text = codecs.encode(text, "ascii", errors="ignore").decode("ascii").strip() + print(text) -def migrate(url: URL, error_queue: Optional["Queue[Exception]"] = None) -> None: +def migrate( + engine: Engine, + error_queue: Optional["SimpleQueue[BaseException]"] = None, +) -> None: """ Runs migrations on the database. NB: Migrate only works on non-memory databases. @@ -37,24 +46,26 @@ def migrate(url: URL, error_queue: Optional["Queue[Exception]"] = None) -> None: # Explicitly set the migration directory scripts_location = str(Path(__file__).parent.resolve() / "migrations") alembic_cfg.set_main_option("script_location", scripts_location) - alembic_cfg.set_main_option("sqlalchemy.url", str(url).replace("%", "%%")) + url = str(engine.url).replace("%", "%%") + alembic_cfg.set_main_option("sqlalchemy.url", url) + alembic_cfg.attributes["connection"] = engine.connect() command.upgrade(alembic_cfg, "head") printif(log_migrations, "---------------------------") printif(log_migrations, "✅ Migrations complete.") - except Exception as e: + except BaseException as e: if error_queue: error_queue.put(e) raise e -def migrate_in_thread(url: URL) -> None: +def migrate_in_thread(engine: Engine) -> None: """ Runs migrations on the database in a separate thread. This is needed because depending on the context (notebook) the migration process can fail to execute in the main thread. """ - error_queue: Queue[Exception] = Queue() - t = Thread(target=migrate, args=(url, error_queue)) + error_queue: SimpleQueue[BaseException] = SimpleQueue() + t = Thread(target=migrate, args=(engine, error_queue)) t.start() t.join() diff --git a/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py b/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py index 6b957350bc..141a378335 100644 --- a/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +++ b/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py @@ -19,7 +19,6 @@ update, ) from sqlalchemy.dialects import postgresql -from sqlalchemy.ext.asyncio.engine import AsyncConnection from sqlalchemy.ext.compiler import compiles from sqlalchemy.orm import ( DeclarativeBase, @@ -111,11 +110,10 @@ class Span(Base): LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION.split(".") -async def get_token_counts_from_attributes(connection: AsyncConnection) -> None: - """ - Gets token counts from attributes if present. - """ - await connection.execute( +def upgrade() -> None: + op.add_column("spans", sa.Column("llm_token_count_prompt", sa.Integer, nullable=True)) + op.add_column("spans", sa.Column("llm_token_count_completion", sa.Integer, nullable=True)) + op.execute( update(Span).values( llm_token_count_prompt=Span.attributes[LLM_TOKEN_COUNT_PROMPT].as_float(), llm_token_count_completion=Span.attributes[LLM_TOKEN_COUNT_COMPLETION].as_float(), @@ -123,12 +121,6 @@ async def get_token_counts_from_attributes(connection: AsyncConnection) -> None: ) -def upgrade() -> None: - op.add_column("spans", sa.Column("llm_token_count_prompt", sa.Integer, nullable=True)) - op.add_column("spans", sa.Column("llm_token_count_completion", sa.Integer, nullable=True)) - op.run_async(get_token_counts_from_attributes) - - def downgrade() -> None: op.drop_column("spans", "llm_token_count_completion") op.drop_column("spans", "llm_token_count_prompt") diff --git a/src/phoenix/inferences/fixtures.py b/src/phoenix/inferences/fixtures.py index e54dcd2473..049a6ad942 100644 --- a/src/phoenix/inferences/fixtures.py +++ b/src/phoenix/inferences/fixtures.py @@ -18,6 +18,7 @@ ) logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) class InferencesRole(Enum): diff --git a/src/phoenix/inferences/inferences.py b/src/phoenix/inferences/inferences.py index d585cb40ac..3cfa69b3d4 100644 --- a/src/phoenix/inferences/inferences.py +++ b/src/phoenix/inferences/inferences.py @@ -34,6 +34,7 @@ from .validation import validate_inferences_inputs logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) # A schema like object. Not recommended to use this directly SchemaLike: TypeAlias = Any diff --git a/src/phoenix/metrics/__init__.py b/src/phoenix/metrics/__init__.py index 374cbeaee6..476c1de4df 100644 --- a/src/phoenix/metrics/__init__.py +++ b/src/phoenix/metrics/__init__.py @@ -10,6 +10,7 @@ from phoenix.core.model_schema import Column logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) @dataclass(frozen=True) diff --git a/src/phoenix/server/api/routers/v1/datasets.py b/src/phoenix/server/api/routers/v1/datasets.py index 961a370423..950939d140 100644 --- a/src/phoenix/server/api/routers/v1/datasets.py +++ b/src/phoenix/server/api/routers/v1/datasets.py @@ -71,6 +71,7 @@ ) logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) DATASET_NODE_NAME = DatasetNodeType.__name__ DATASET_VERSION_NODE_NAME = DatasetVersionNodeType.__name__ diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py index f9187caf1e..32c3fd4afd 100644 --- a/src/phoenix/server/app.py +++ b/src/phoenix/server/app.py @@ -100,6 +100,7 @@ from opentelemetry.trace import TracerProvider logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) router = APIRouter(include_in_schema=False) @@ -229,7 +230,8 @@ def _lifespan( dml_event_handler: DmlEventHandler, tracer_provider: Optional["TracerProvider"] = None, enable_prometheus: bool = False, - clean_ups: Iterable[Callable[[], None]] = (), + startup_callbacks: Iterable[Callable[[], None]] = (), + shutdown_callbacks: Iterable[Callable[[], None]] = (), read_only: bool = False, ) -> StatefulLifespan[FastAPI]: @contextlib.asynccontextmanager @@ -247,6 +249,8 @@ async def lifespan(_: FastAPI) -> AsyncIterator[Dict[str, Any]]: tracer_provider=tracer_provider, enable_prometheus=enable_prometheus, ), dml_event_handler: + for callback in startup_callbacks: + callback() yield { "event_queue": dml_event_handler, "enqueue": enqueue, @@ -254,8 +258,8 @@ async def lifespan(_: FastAPI) -> AsyncIterator[Dict[str, Any]]: "queue_evaluation_for_bulk_insert": queue_evaluation, "enqueue_operation": enqueue_operation, } - for clean_up in clean_ups: - clean_up() + for callback in shutdown_callbacks: + callback() return lifespan @@ -428,10 +432,12 @@ def create_app( initial_spans: Optional[Iterable[Union[Span, Tuple[Span, str]]]] = None, initial_evaluations: Optional[Iterable[pb.Evaluation]] = None, serve_ui: bool = True, - clean_up_callbacks: List[Callable[[], None]] = [], + startup_callbacks: Iterable[Callable[[], None]] = (), + shutdown_callbacks: Iterable[Callable[[], None]] = (), secret: Optional[str] = None, ) -> FastAPI: - clean_ups: List[Callable[[], None]] = clean_up_callbacks # To be called at app shutdown. + startup_callbacks_list: List[Callable[[], None]] = list(startup_callbacks) + shutdown_callbacks_list: List[Callable[[], None]] = list(shutdown_callbacks) initial_batch_of_spans: Iterable[Tuple[Span, str]] = ( () if initial_spans is None @@ -511,7 +517,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: dml_event_handler=dml_event_handler, tracer_provider=tracer_provider, enable_prometheus=enable_prometheus, - clean_ups=clean_ups, + shutdown_callbacks=shutdown_callbacks_list, + startup_callbacks=startup_callbacks_list, ), middleware=[ Middleware(HeadersMiddleware), @@ -554,5 +561,5 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: FastAPIInstrumentor().instrument(tracer_provider=tracer_provider) FastAPIInstrumentor.instrument_app(app, tracer_provider=tracer_provider) - clean_ups.append(FastAPIInstrumentor().uninstrument) + shutdown_callbacks_list.append(FastAPIInstrumentor().uninstrument) return app diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index 906aa54213..67bf6cd54d 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -1,13 +1,16 @@ import atexit +import codecs import logging import os +import sys from argparse import ArgumentParser -from pathlib import Path, PosixPath +from importlib.metadata import version +from pathlib import Path from threading import Thread from time import sleep, time from typing import List, Optional +from urllib.parse import urljoin -import pkg_resources from uvicorn import Config, Server import phoenix.trace.v1 as pb @@ -53,6 +56,7 @@ from phoenix.trace.schemas import Span logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) _WELCOME_MESSAGE = """ @@ -137,6 +141,7 @@ def _get_pid_file() -> Path: parser.add_argument("--debug", action="store_true") # Whether the app is running in a development environment parser.add_argument("--dev", action="store_true") + parser.add_argument("--no-ui", action="store_true") subparsers = parser.add_subparsers(dest="command", required=True) serve_parser = subparsers.add_parser("serve") datasets_parser = subparsers.add_parser("datasets") @@ -255,6 +260,18 @@ def _get_pid_file() -> Path: engine = create_engine_and_run_migrations(db_connection_str) instrumentation_cleanups = instrument_engine_if_enabled(engine) factory = DbSessionFactory(db=_db(engine), dialect=engine.dialect.name) + # Print information about the server + msg = _WELCOME_MESSAGE.format( + version=version("arize-phoenix"), + ui_path=urljoin(f"http://{host}:{port}", host_root_path), + grpc_path=f"http://{host}:{get_env_grpc_port()}", + http_path=urljoin(urljoin(f"http://{host}:{port}", host_root_path), "v1/traces"), + storage=get_printable_db_url(db_connection_str), + ) + if authentication_enabled: + msg += _EXPERIMENTAL_WARNING.format(auth_enabled=True) + if sys.platform.startswith("win"): + msg = codecs.encode(msg, "ascii", errors="ignore").decode("ascii").strip() app = create_app( db=factory, export_path=export_path, @@ -266,30 +283,17 @@ def _get_pid_file() -> Path: else create_model_from_inferences(corpus_inferences), debug=args.debug, dev=args.dev, + serve_ui=not args.no_ui, read_only=read_only, enable_prometheus=enable_prometheus, initial_spans=fixture_spans, initial_evaluations=fixture_evals, - clean_up_callbacks=instrumentation_cleanups, + startup_callbacks=[lambda: print(msg)], + shutdown_callbacks=instrumentation_cleanups, secret=secret, ) server = Server(config=Config(app, host=host, port=port, root_path=host_root_path)) # type: ignore Thread(target=_write_pid_file_when_ready, args=(server,), daemon=True).start() - # Print information about the server - phoenix_version = pkg_resources.get_distribution("arize-phoenix").version - print( - _WELCOME_MESSAGE.format( - version=phoenix_version, - ui_path=PosixPath(f"http://{host}:{port}", host_root_path), - grpc_path=f"http://{host}:{get_env_grpc_port()}", - http_path=PosixPath(f"http://{host}:{port}", host_root_path, "v1/traces"), - storage=get_printable_db_url(db_connection_str), - ) - ) - - if authentication_enabled: - print(_EXPERIMENTAL_WARNING.format(auth_enabled=authentication_enabled)) - # Start the server server.run() diff --git a/src/phoenix/services.py b/src/phoenix/services.py index 9a2662196f..65ac8a689d 100644 --- a/src/phoenix/services.py +++ b/src/phoenix/services.py @@ -12,6 +12,7 @@ from phoenix.config import SERVER_DIR, get_pids_path, get_running_pid logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) class Service: diff --git a/src/phoenix/session/client.py b/src/phoenix/session/client.py index 7b64ba20f9..df0635db2c 100644 --- a/src/phoenix/session/client.py +++ b/src/phoenix/session/client.py @@ -52,7 +52,7 @@ from phoenix.utilities.client import VersionedClient logger = logging.getLogger(__name__) - +logger.addHandler(logging.NullHandler()) DEFAULT_TIMEOUT_IN_SECONDS = 5 diff --git a/src/phoenix/session/evaluation.py b/src/phoenix/session/evaluation.py index c4bf506a9c..afd4de3a05 100644 --- a/src/phoenix/session/evaluation.py +++ b/src/phoenix/session/evaluation.py @@ -35,6 +35,7 @@ ] logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) def encode_evaluations(evaluations: Evaluations) -> Iterator[pb.Evaluation]: diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py index 9d0049fa7f..72e5cad184 100644 --- a/src/phoenix/session/session.py +++ b/src/phoenix/session/session.py @@ -63,6 +63,7 @@ pass logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) # type workaround # https://github.com/python/mypy/issues/5264#issuecomment-399407428 @@ -392,7 +393,7 @@ def __init__( if (trace_dataset and (initial_evaluations := trace_dataset.evaluations)) else None ), - clean_up_callbacks=instrumentation_cleanups, + shutdown_callbacks=instrumentation_cleanups, ) self.server = ThreadServer( app=self.app, diff --git a/src/phoenix/trace/fixtures.py b/src/phoenix/trace/fixtures.py index fc17e2a417..e944ffe571 100644 --- a/src/phoenix/trace/fixtures.py +++ b/src/phoenix/trace/fixtures.py @@ -22,6 +22,7 @@ from phoenix.trace.utils import download_json_traces_fixture, is_jsonl_file, json_lines_to_df logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) class EvaluationResultSchema(NamedTuple): diff --git a/src/phoenix/trace/langchain/instrumentor.py b/src/phoenix/trace/langchain/instrumentor.py index 56c6191aad..89b0a95d70 100644 --- a/src/phoenix/trace/langchain/instrumentor.py +++ b/src/phoenix/trace/langchain/instrumentor.py @@ -13,7 +13,7 @@ from phoenix.trace.exporter import _OpenInferenceExporter logger = logging.getLogger(__name__) - +logger.addHandler(logging.NullHandler()) __all__ = ("LangChainInstrumentor",) diff --git a/src/phoenix/trace/llama_index/callback.py b/src/phoenix/trace/llama_index/callback.py index 1adc46485d..1db8bbaa66 100644 --- a/src/phoenix/trace/llama_index/callback.py +++ b/src/phoenix/trace/llama_index/callback.py @@ -13,6 +13,7 @@ from phoenix.trace.exporter import _OpenInferenceExporter logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) LLAMA_INDEX_MODERN_VERSION = (0, 10, 0) INSTRUMENTATION_MODERN_VERSION = (1, 0, 0) diff --git a/src/phoenix/trace/openai/instrumentor.py b/src/phoenix/trace/openai/instrumentor.py index cadb4e82df..e77021171c 100644 --- a/src/phoenix/trace/openai/instrumentor.py +++ b/src/phoenix/trace/openai/instrumentor.py @@ -13,6 +13,7 @@ from phoenix.trace.exporter import _OpenInferenceExporter logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) class OpenAIInstrumentor(Instrumentor): diff --git a/tox.ini b/tox.ini index 4ae2f8b605..fb32d4f640 100644 --- a/tox.ini +++ b/tox.ini @@ -7,16 +7,19 @@ package = wheel wheel_build_env = .pkg deps = -r dev-requirements.txt + integration_tests: . changedir = phoenix_evals: packages/phoenix-evals/ + integration_tests: integration_tests/ commands_pre = pkg: uv pip install .[test] + integration_tests: uv pip install -r requirements.txt commands = ruff: ruff format {posargs:.} ruff: ruff check --fix {posargs:.} mypy: mypy {posargs:.} - test: pytest -n auto {posargs:.} + test: pytest -n auto --nf {posargs:.} ci: ruff format --diff {posargs:.} ci: ruff check --no-fix {posargs:.} ci: mypy {posargs:.} - ci: pytest -n auto -x -ra {posargs:.} + ci: pytest -n auto -x {posargs:.}