fix(python): application launch on Windows (#4276)

Arize-ai · Aug 21, 2024 · 9ede0a3 · 9ede0a3
1 parent c8de7f6
commit 9ede0a3
Show file tree

Hide file tree

Showing 35 changed files with 327 additions and 70 deletions.
diff --git a/.github/workflows/python-CI.yml b/.github/workflows/python-CI.yml
@@ -57,7 +57,11 @@ jobs:
           python-version: |
             3.8
             3.12
-      - run: pip install tox-uv==1.11.2
+      - uses: yezz123/setup-uv@v4
+        with:
+          uv-version: 0.3.0
+          uv-venv: ${{ github.job }}-${{ github.run_number }}
+      - run: uv pip install tox==4.18.0 tox-uv==1.11.2
       - run: tox run-parallel --parallel-no-spinner -e py38-ci-pkg-phoenix_evals,py312-ci-pkg-phoenix_evals
         if: ${{ needs.changes.outputs.phoenix_evals == 'true' }}
 
@@ -136,3 +140,23 @@ jobs:
         if: runner.os == 'Windows'
         run: |
           hatch run test:tests
+
+  integration-test:
+    runs-on: ${{ matrix.os }}
+    needs: changes
+    if: ${{ needs.changes.outputs.phoenix == 'true' }}
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, windows-latest, windows-2019, macos-12 ]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.8
+      - uses: yezz123/setup-uv@v4
+        with:
+          uv-version: 0.3.0
+          uv-venv: ${{ github.job }}-${{ github.run_number }}
+      - run: uv pip install tox==4.18.0 tox-uv==1.11.2
+      - run: tox run -e ci-integration_tests -- server
+        timeout-minutes: 5
diff --git a/...test_multiple_classify_calls_anthropic.py → ...test_multiple_classify_calls_anthropic.py b/...test_multiple_classify_calls_anthropic.py → ...test_multiple_classify_calls_anthropic.py
@@ -1,5 +1,4 @@
 import pandas as pd
-
 from phoenix.evals import AnthropicModel, llm_classify
 from phoenix.evals.default_templates import (
     RAG_RELEVANCY_PROMPT_TEMPLATE,

diff --git a/...ls/test_multiple_classify_calls_openai.py → ...ls/test_multiple_classify_calls_openai.py b/...ls/test_multiple_classify_calls_openai.py → ...ls/test_multiple_classify_calls_openai.py
@@ -1,5 +1,4 @@
 import pandas as pd
-
 from phoenix.evals import OpenAIModel, llm_classify
 from phoenix.evals.default_templates import (
     RAG_RELEVANCY_PROMPT_TEMPLATE,

diff --git a/...ls/test_multiple_classify_calls_vertex.py → ...ls/test_multiple_classify_calls_vertex.py b/...ls/test_multiple_classify_calls_vertex.py → ...ls/test_multiple_classify_calls_vertex.py
@@ -1,5 +1,4 @@
 import pandas as pd
-
 from phoenix.evals import GeminiModel, llm_classify
 from phoenix.evals.default_templates import (
     RAG_RELEVANCY_PROMPT_TEMPLATE,

diff --git a/integration_tests/mypy.ini b/integration_tests/mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+strict = true
+explicit_package_bases = true
diff --git a/integration-tests/eval_query_testing.ipynb → ..._tests/notebooks/eval_query_testing.ipynb b/integration-tests/eval_query_testing.ipynb → ..._tests/notebooks/eval_query_testing.ipynb
@@ -6,10 +6,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import phoenix as px\n",
     "from sqlalchemy import create_engine\n",
-    "from sqlalchemy.orm import sessionmaker\n",
-    "\n",
-    "import phoenix as px"
+    "from sqlalchemy.orm import sessionmaker"
    ]
   },
   {

diff --git a/...ion-tests/graphql_query_performance.ipynb → ...notebooks/graphql_query_performance.ipynb b/...ion-tests/graphql_query_performance.ipynb → ...notebooks/graphql_query_performance.ipynb
diff --git a/integration-tests/pagination_queries.ipynb → ..._tests/notebooks/pagination_queries.ipynb b/integration-tests/pagination_queries.ipynb → ..._tests/notebooks/pagination_queries.ipynb
@@ -13,10 +13,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from phoenix.db import models\n",
     "from sqlalchemy import and_, create_engine, select\n",
-    "from sqlalchemy.orm import aliased, sessionmaker\n",
-    "\n",
-    "from phoenix.db import models"
+    "from sqlalchemy.orm import aliased, sessionmaker"
    ]
   },
   {

diff --git a/...tion-tests/pagination_query_testing.ipynb → .../notebooks/pagination_query_testing.ipynb b/...tion-tests/pagination_query_testing.ipynb → .../notebooks/pagination_query_testing.ipynb
@@ -8,7 +8,6 @@
    "source": [
     "from gql import Client, gql\n",
     "from gql.transport.requests import RequestsHTTPTransport\n",
-    "\n",
     "from phoenix.server.api.types.pagination import (\n",
     "    Cursor,\n",
     "    CursorSortColumn,\n",

diff --git a/integration-tests/span_query_testing.ipynb → ..._tests/notebooks/span_query_testing.ipynb b/integration-tests/span_query_testing.ipynb → ..._tests/notebooks/span_query_testing.ipynb
@@ -19,13 +19,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sqlalchemy import create_engine, select\n",
-    "from sqlalchemy.orm import sessionmaker\n",
-    "\n",
     "import phoenix as px\n",
     "from phoenix.db import models\n",
     "from phoenix.trace.dsl.helpers import get_qa_with_reference, get_retrieved_documents\n",
-    "from phoenix.trace.dsl.query import SpanQuery"
+    "from phoenix.trace.dsl.query import SpanQuery\n",
+    "from sqlalchemy import create_engine, select\n",
+    "from sqlalchemy.orm import sessionmaker"
    ]
   },
   {

diff --git a/integration_tests/pytest.ini b/integration_tests/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = -raP -l
diff --git a/integration_tests/requirements.txt b/integration_tests/requirements.txt
@@ -0,0 +1,6 @@
+faker
+openinference-semantic-conventions
+opentelemetry-sdk
+portpicker
+psutil
+types-psutil
diff --git a/integration_tests/ruff.toml b/integration_tests/ruff.toml
@@ -0,0 +1,14 @@
+line-length = 100
+target-version = "py38"
+
+[lint]
+select = ["E", "F", "W", "I", "NPY201"]
+
+[lint.isort]
+force-single-line = false
+
+[lint.per-file-ignores]
+"*.ipynb" = ["E402", "E501"]
+
+[format]
+line-ending = "native"
diff --git a/integration_tests/server/conftest.py b/integration_tests/server/conftest.py
@@ -0,0 +1,74 @@
+import os
+import tempfile
+from typing import Iterator, List
+from unittest import mock
+from urllib.parse import urljoin
+
+import pytest
+from _pytest.monkeypatch import MonkeyPatch
+from faker import Faker
+from openinference.semconv.resource import ResourceAttributes
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.trace import Tracer
+from phoenix.config import (
+    ENV_PHOENIX_GRPC_PORT,
+    ENV_PHOENIX_PORT,
+    ENV_PHOENIX_WORKING_DIR,
+    get_base_url,
+    get_env_grpc_port,
+    get_env_host,
+)
+from portpicker import pick_unused_port  # type: ignore[import-untyped]
+
+
+@pytest.fixture(autouse=True)
+def set_env_var(monkeypatch: Iterator[MonkeyPatch]) -> Iterator[None]:
+    tmp = tempfile.TemporaryDirectory()
+    values = (
+        (ENV_PHOENIX_PORT, str(pick_unused_port())),
+        (ENV_PHOENIX_GRPC_PORT, str(pick_unused_port())),
+        (ENV_PHOENIX_WORKING_DIR, tmp.name),
+    )
+    try:
+        with mock.patch.dict(os.environ, values):
+            yield
+    finally:
+        try:
+            # This is for Windows. In Python 3.10+, it's cleaner to use
+            # `TemporaryDirectory(ignore_cleanup_errors=True)` instead.
+            tmp.cleanup()
+        except BaseException:
+            pass
+
+
+@pytest.fixture
+def tracers(
+    project_name: str,
+    fake: Faker,
+) -> List[Tracer]:
+    host = get_env_host()
+    if host == "0.0.0.0":
+        host = "127.0.0.1"
+    grpc_endpoint = f"http://{host}:{get_env_grpc_port()}"
+    http_endpoint = urljoin(get_base_url(), "v1/traces")
+    tracers = []
+    resource = Resource({ResourceAttributes.PROJECT_NAME: project_name})
+    for exporter in (GRPCExporter(grpc_endpoint), HTTPExporter(http_endpoint)):
+        tracer_provider = TracerProvider(resource=resource)
+        tracer_provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracers.append(tracer_provider.get_tracer(__name__))
+    return tracers
+
+
+@pytest.fixture
+def fake() -> Faker:
+    return Faker()
+
+
+@pytest.fixture
+def project_name(fake: Faker) -> str:
+    return fake.pystr()
diff --git a/integration_tests/server/test_launch_app.py b/integration_tests/server/test_launch_app.py
@@ -0,0 +1,98 @@
+import json
+import os
+import sys
+from contextlib import contextmanager
+from queue import SimpleQueue
+from subprocess import PIPE, STDOUT
+from threading import Thread
+from time import sleep, time
+from typing import Iterator, List, Set
+from urllib.parse import urljoin
+from urllib.request import Request, urlopen
+
+import pytest
+from faker import Faker
+from opentelemetry.trace import Tracer
+from phoenix.config import get_base_url
+from psutil import STATUS_ZOMBIE, Popen
+
+
+@pytest.fixture
+def req() -> Request:
+    query = dict(query="query{projects{edges{node{name spans{edges{node{name}}}}}}}")
+    return Request(
+        method="POST",
+        url=urljoin(get_base_url(), "graphql"),
+        data=json.dumps(query).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+    )
+
+
+def test_launch_app(
+    tracers: List[Tracer],
+    project_name: str,
+    req: Request,
+    fake: Faker,
+) -> None:
+    span_names: Set[str] = set()
+    for i in range(2):
+        with launch():
+            for t, tracer in enumerate(tracers):
+                name = f"{i}_{t}_{fake.pystr()}"
+                span_names.add(name)
+                tracer.start_span(name).end()
+            sleep(2)
+            response = urlopen(req)
+            response_dict = json.loads(response.read().decode("utf-8"))
+            assert response_dict
+            assert not response_dict.get("errors")
+            assert {
+                span["node"]["name"]
+                for project in response_dict["data"]["projects"]["edges"]
+                for span in project["node"]["spans"]["edges"]
+                if project["node"]["name"] == project_name
+            } == span_names
+        print(f"{response_dict=}")
+
+
+@contextmanager
+def launch() -> Iterator[None]:
+    command = f"{sys.executable} -m phoenix.server.main --no-ui serve"
+    process = Popen(command.split(), stdout=PIPE, stderr=STDOUT, text=True, env=os.environ)
+    log: "SimpleQueue[str]" = SimpleQueue()
+    Thread(target=capture_stdout, args=(process, log), daemon=True).start()
+    t = 60
+    time_limit = time() + t
+    timed_out = False
+    url = urljoin(get_base_url(), "healthz")
+    while not timed_out and is_alive(process):
+        sleep(0.1)
+        try:
+            urlopen(url)
+            break
+        except BaseException:
+            timed_out = time() > time_limit
+    try:
+        if timed_out:
+            raise TimeoutError(f"Server did not start within {t} seconds.")
+        assert is_alive(process)
+        yield
+        process.terminate()
+        process.wait(10)
+    finally:
+        logs = []
+        while not log.empty():
+            # For unknown reasons, this hangs if we try to print immediately
+            # after `get()`, so we collect the lines and print them later.
+            logs.append(log.get())
+        for line in logs:
+            print(line, end="")
+
+
+def is_alive(process: Popen) -> bool:
+    return process.is_running() and process.status() != STATUS_ZOMBIE
+
+
+def capture_stdout(process: Popen, log: "SimpleQueue[str]") -> None:
+    while True:
+        log.put(process.stdout.readline())
diff --git a/pyproject.toml b/pyproject.toml
@@ -97,6 +97,7 @@ dev = [
   "prometheus_client",
   "asgi-lifespan",
   "Faker>=26.0.0",
+  "portpicker",
   "uvloop; platform_system != 'Windows'",
 ]
 evals = []
@@ -253,6 +254,7 @@ addopts = [
   "--import-mode=importlib",
   "--doctest-modules",
   "--new-first",
+  "--showlocals",
 ]
 testpaths = [
   "tests",
@@ -430,4 +432,4 @@ select = ["E", "F", "W", "I", "NPY201"]
 force-single-line = false
 
 [tool.ruff.format]
-line-ending = "lf"
+line-ending = "native"
diff --git a/src/phoenix/db/engines.py b/src/phoenix/db/engines.py
@@ -7,6 +7,7 @@
 
 import aiosqlite
 import numpy as np
+import sqlalchemy
 import sqlean
 from sqlalchemy import URL, StaticPool, event, make_url
 from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
@@ -15,6 +16,7 @@
 from phoenix.db.helpers import SupportedSQLDialect
 from phoenix.db.migrate import migrate_in_thread
 from phoenix.db.models import init_models
+from phoenix.settings import Settings
 
 sqlean.extensions.enable("text", "stats")
 
@@ -118,7 +120,13 @@ def async_creator() -> aiosqlite.Connection:
         else:
             asyncio.create_task(init_models(engine))
     else:
-        migrate_in_thread(engine.url)
+        sync_engine = sqlalchemy.create_engine(
+            url=url.set(drivername="sqlite"),
+            echo=Settings.log_migrations,
+            json_serializer=_dumps,
+            creator=lambda: sqlean.connect(f"file:{database}", uri=True),
+        )
+        migrate_in_thread(sync_engine)
     return engine
 
 
@@ -130,7 +138,12 @@ def aio_postgresql_engine(
     engine = create_async_engine(url=url, echo=echo, json_serializer=_dumps)
     if not migrate:
         return engine
-    migrate_in_thread(engine.url)
+    sync_engine = sqlalchemy.create_engine(
+        url=url.set(drivername="postgresql"),
+        echo=Settings.log_migrations,
+        json_serializer=_dumps,
+    )
+    migrate_in_thread(sync_engine)
     return engine
 
 

diff --git a/src/phoenix/db/insertion/dataset.py b/src/phoenix/db/insertion/dataset.py
@@ -24,6 +24,7 @@
 from phoenix.db.insertion.helpers import DataManipulationEvent
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 DatasetId: TypeAlias = int
 DatasetVersionId: TypeAlias = int