diff --git a/.github/workflows/python-CI.yml b/.github/workflows/python-CI.yml
index 984014cf7e..a201434795 100644
--- a/.github/workflows/python-CI.yml
+++ b/.github/workflows/python-CI.yml
@@ -57,7 +57,11 @@ jobs:
           python-version: |
             3.8
             3.12
-      - run: pip install tox-uv==1.11.2
+      - uses: yezz123/setup-uv@v4
+        with:
+          uv-version: 0.3.0
+          uv-venv: ${{ github.job }}-${{ github.run_number }}
+      - run: uv pip install tox==4.18.0 tox-uv==1.11.2
       - run: tox run-parallel --parallel-no-spinner -e py38-ci-pkg-phoenix_evals,py312-ci-pkg-phoenix_evals
         if: ${{ needs.changes.outputs.phoenix_evals == 'true' }}
 
@@ -136,3 +140,23 @@ jobs:
         if: runner.os == 'Windows'
         run: |
           hatch run test:tests
+
+  integration-test:
+    runs-on: ${{ matrix.os }}
+    needs: changes
+    if: ${{ needs.changes.outputs.phoenix == 'true' }}
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, windows-latest, windows-2019, macos-12 ]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.8
+      - uses: yezz123/setup-uv@v4
+        with:
+          uv-version: 0.3.0
+          uv-venv: ${{ github.job }}-${{ github.run_number }}
+      - run: uv pip install tox==4.18.0 tox-uv==1.11.2
+      - run: tox run -e ci-integration_tests -- server
+        timeout-minutes: 5
diff --git a/integration-tests/evals/test_multiple_classify_calls_anthropic.py b/integration_tests/evals/test_multiple_classify_calls_anthropic.py
similarity index 99%
rename from integration-tests/evals/test_multiple_classify_calls_anthropic.py
rename to integration_tests/evals/test_multiple_classify_calls_anthropic.py
index 8b8f38c855..41cfa4eb8a 100644
--- a/integration-tests/evals/test_multiple_classify_calls_anthropic.py
+++ b/integration_tests/evals/test_multiple_classify_calls_anthropic.py
@@ -1,5 +1,4 @@
 import pandas as pd
-
 from phoenix.evals import AnthropicModel, llm_classify
 from phoenix.evals.default_templates import (
     RAG_RELEVANCY_PROMPT_TEMPLATE,
diff --git a/integration-tests/evals/test_multiple_classify_calls_openai.py b/integration_tests/evals/test_multiple_classify_calls_openai.py
similarity index 99%
rename from integration-tests/evals/test_multiple_classify_calls_openai.py
rename to integration_tests/evals/test_multiple_classify_calls_openai.py
index 01e6cd7d53..450d0e1767 100644
--- a/integration-tests/evals/test_multiple_classify_calls_openai.py
+++ b/integration_tests/evals/test_multiple_classify_calls_openai.py
@@ -1,5 +1,4 @@
 import pandas as pd
-
 from phoenix.evals import OpenAIModel, llm_classify
 from phoenix.evals.default_templates import (
     RAG_RELEVANCY_PROMPT_TEMPLATE,
diff --git a/integration-tests/evals/test_multiple_classify_calls_vertex.py b/integration_tests/evals/test_multiple_classify_calls_vertex.py
similarity index 99%
rename from integration-tests/evals/test_multiple_classify_calls_vertex.py
rename to integration_tests/evals/test_multiple_classify_calls_vertex.py
index e21e52d246..90729f2952 100644
--- a/integration-tests/evals/test_multiple_classify_calls_vertex.py
+++ b/integration_tests/evals/test_multiple_classify_calls_vertex.py
@@ -1,5 +1,4 @@
 import pandas as pd
-
 from phoenix.evals import GeminiModel, llm_classify
 from phoenix.evals.default_templates import (
     RAG_RELEVANCY_PROMPT_TEMPLATE,
diff --git a/integration_tests/mypy.ini b/integration_tests/mypy.ini
new file mode 100644
index 0000000000..3529cc191f
--- /dev/null
+++ b/integration_tests/mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+strict = true
+explicit_package_bases = true
diff --git a/integration-tests/eval_query_testing.ipynb b/integration_tests/notebooks/eval_query_testing.ipynb
similarity index 99%
rename from integration-tests/eval_query_testing.ipynb
rename to integration_tests/notebooks/eval_query_testing.ipynb
index 1a7bae1c4d..c76ac7c903 100644
--- a/integration-tests/eval_query_testing.ipynb
+++ b/integration_tests/notebooks/eval_query_testing.ipynb
@@ -6,10 +6,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import phoenix as px\n",
     "from sqlalchemy import create_engine\n",
-    "from sqlalchemy.orm import sessionmaker\n",
-    "\n",
-    "import phoenix as px"
+    "from sqlalchemy.orm import sessionmaker"
    ]
   },
   {
diff --git a/integration-tests/graphql_query_performance.ipynb b/integration_tests/notebooks/graphql_query_performance.ipynb
similarity index 100%
rename from integration-tests/graphql_query_performance.ipynb
rename to integration_tests/notebooks/graphql_query_performance.ipynb
diff --git a/integration-tests/pagination_queries.ipynb b/integration_tests/notebooks/pagination_queries.ipynb
similarity index 98%
rename from integration-tests/pagination_queries.ipynb
rename to integration_tests/notebooks/pagination_queries.ipynb
index d74ceead64..20cad1ce70 100644
--- a/integration-tests/pagination_queries.ipynb
+++ b/integration_tests/notebooks/pagination_queries.ipynb
@@ -13,10 +13,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from phoenix.db import models\n",
     "from sqlalchemy import and_, create_engine, select\n",
-    "from sqlalchemy.orm import aliased, sessionmaker\n",
-    "\n",
-    "from phoenix.db import models"
+    "from sqlalchemy.orm import aliased, sessionmaker"
    ]
   },
   {
diff --git a/integration-tests/pagination_query_testing.ipynb b/integration_tests/notebooks/pagination_query_testing.ipynb
similarity index 99%
rename from integration-tests/pagination_query_testing.ipynb
rename to integration_tests/notebooks/pagination_query_testing.ipynb
index 80467e66b3..e251ac5bd2 100644
--- a/integration-tests/pagination_query_testing.ipynb
+++ b/integration_tests/notebooks/pagination_query_testing.ipynb
@@ -8,7 +8,6 @@
    "source": [
     "from gql import Client, gql\n",
     "from gql.transport.requests import RequestsHTTPTransport\n",
-    "\n",
     "from phoenix.server.api.types.pagination import (\n",
     "    Cursor,\n",
     "    CursorSortColumn,\n",
diff --git a/integration-tests/span_query_testing.ipynb b/integration_tests/notebooks/span_query_testing.ipynb
similarity index 99%
rename from integration-tests/span_query_testing.ipynb
rename to integration_tests/notebooks/span_query_testing.ipynb
index 81bdb9c5c1..41b88ad032 100644
--- a/integration-tests/span_query_testing.ipynb
+++ b/integration_tests/notebooks/span_query_testing.ipynb
@@ -19,13 +19,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sqlalchemy import create_engine, select\n",
-    "from sqlalchemy.orm import sessionmaker\n",
-    "\n",
     "import phoenix as px\n",
     "from phoenix.db import models\n",
     "from phoenix.trace.dsl.helpers import get_qa_with_reference, get_retrieved_documents\n",
-    "from phoenix.trace.dsl.query import SpanQuery"
+    "from phoenix.trace.dsl.query import SpanQuery\n",
+    "from sqlalchemy import create_engine, select\n",
+    "from sqlalchemy.orm import sessionmaker"
    ]
   },
   {
diff --git a/integration_tests/pytest.ini b/integration_tests/pytest.ini
new file mode 100644
index 0000000000..1df10d788b
--- /dev/null
+++ b/integration_tests/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = -raP -l
diff --git a/integration_tests/requirements.txt b/integration_tests/requirements.txt
new file mode 100644
index 0000000000..b13c91dd70
--- /dev/null
+++ b/integration_tests/requirements.txt
@@ -0,0 +1,6 @@
+faker
+openinference-semantic-conventions
+opentelemetry-sdk
+portpicker
+psutil
+types-psutil
diff --git a/integration_tests/ruff.toml b/integration_tests/ruff.toml
new file mode 100644
index 0000000000..25e88a14bb
--- /dev/null
+++ b/integration_tests/ruff.toml
@@ -0,0 +1,14 @@
+line-length = 100
+target-version = "py38"
+
+[lint]
+select = ["E", "F", "W", "I", "NPY201"]
+
+[lint.isort]
+force-single-line = false
+
+[lint.per-file-ignores]
+"*.ipynb" = ["E402", "E501"]
+
+[format]
+line-ending = "native"
diff --git a/integration_tests/server/conftest.py b/integration_tests/server/conftest.py
new file mode 100644
index 0000000000..4ab5e4fe02
--- /dev/null
+++ b/integration_tests/server/conftest.py
@@ -0,0 +1,74 @@
+import os
+import tempfile
+from typing import Iterator, List
+from unittest import mock
+from urllib.parse import urljoin
+
+import pytest
+from _pytest.monkeypatch import MonkeyPatch
+from faker import Faker
+from openinference.semconv.resource import ResourceAttributes
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.trace import Tracer
+from phoenix.config import (
+    ENV_PHOENIX_GRPC_PORT,
+    ENV_PHOENIX_PORT,
+    ENV_PHOENIX_WORKING_DIR,
+    get_base_url,
+    get_env_grpc_port,
+    get_env_host,
+)
+from portpicker import pick_unused_port  # type: ignore[import-untyped]
+
+
+@pytest.fixture(autouse=True)
+def set_env_var(monkeypatch: Iterator[MonkeyPatch]) -> Iterator[None]:
+    tmp = tempfile.TemporaryDirectory()
+    values = (
+        (ENV_PHOENIX_PORT, str(pick_unused_port())),
+        (ENV_PHOENIX_GRPC_PORT, str(pick_unused_port())),
+        (ENV_PHOENIX_WORKING_DIR, tmp.name),
+    )
+    try:
+        with mock.patch.dict(os.environ, values):
+            yield
+    finally:
+        try:
+            # This is for Windows. In Python 3.10+, it's cleaner to use
+            # `TemporaryDirectory(ignore_cleanup_errors=True)` instead.
+            tmp.cleanup()
+        except BaseException:
+            pass
+
+
+@pytest.fixture
+def tracers(
+    project_name: str,
+    fake: Faker,
+) -> List[Tracer]:
+    host = get_env_host()
+    if host == "0.0.0.0":
+        host = "127.0.0.1"
+    grpc_endpoint = f"http://{host}:{get_env_grpc_port()}"
+    http_endpoint = urljoin(get_base_url(), "v1/traces")
+    tracers = []
+    resource = Resource({ResourceAttributes.PROJECT_NAME: project_name})
+    for exporter in (GRPCExporter(grpc_endpoint), HTTPExporter(http_endpoint)):
+        tracer_provider = TracerProvider(resource=resource)
+        tracer_provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracers.append(tracer_provider.get_tracer(__name__))
+    return tracers
+
+
+@pytest.fixture
+def fake() -> Faker:
+    return Faker()
+
+
+@pytest.fixture
+def project_name(fake: Faker) -> str:
+    return fake.pystr()
diff --git a/integration_tests/server/test_launch_app.py b/integration_tests/server/test_launch_app.py
new file mode 100644
index 0000000000..8759bb37fc
--- /dev/null
+++ b/integration_tests/server/test_launch_app.py
@@ -0,0 +1,98 @@
+import json
+import os
+import sys
+from contextlib import contextmanager
+from queue import SimpleQueue
+from subprocess import PIPE, STDOUT
+from threading import Thread
+from time import sleep, time
+from typing import Iterator, List, Set
+from urllib.parse import urljoin
+from urllib.request import Request, urlopen
+
+import pytest
+from faker import Faker
+from opentelemetry.trace import Tracer
+from phoenix.config import get_base_url
+from psutil import STATUS_ZOMBIE, Popen
+
+
+@pytest.fixture
+def req() -> Request:
+    query = dict(query="query{projects{edges{node{name spans{edges{node{name}}}}}}}")
+    return Request(
+        method="POST",
+        url=urljoin(get_base_url(), "graphql"),
+        data=json.dumps(query).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+    )
+
+
+def test_launch_app(
+    tracers: List[Tracer],
+    project_name: str,
+    req: Request,
+    fake: Faker,
+) -> None:
+    span_names: Set[str] = set()
+    for i in range(2):
+        with launch():
+            for t, tracer in enumerate(tracers):
+                name = f"{i}_{t}_{fake.pystr()}"
+                span_names.add(name)
+                tracer.start_span(name).end()
+            sleep(2)
+            response = urlopen(req)
+            response_dict = json.loads(response.read().decode("utf-8"))
+            assert response_dict
+            assert not response_dict.get("errors")
+            assert {
+                span["node"]["name"]
+                for project in response_dict["data"]["projects"]["edges"]
+                for span in project["node"]["spans"]["edges"]
+                if project["node"]["name"] == project_name
+            } == span_names
+        print(f"{response_dict=}")
+
+
+@contextmanager
+def launch() -> Iterator[None]:
+    command = f"{sys.executable} -m phoenix.server.main --no-ui serve"
+    process = Popen(command.split(), stdout=PIPE, stderr=STDOUT, text=True, env=os.environ)
+    log: "SimpleQueue[str]" = SimpleQueue()
+    Thread(target=capture_stdout, args=(process, log), daemon=True).start()
+    t = 60
+    time_limit = time() + t
+    timed_out = False
+    url = urljoin(get_base_url(), "healthz")
+    while not timed_out and is_alive(process):
+        sleep(0.1)
+        try:
+            urlopen(url)
+            break
+        except BaseException:
+            timed_out = time() > time_limit
+    try:
+        if timed_out:
+            raise TimeoutError(f"Server did not start within {t} seconds.")
+        assert is_alive(process)
+        yield
+        process.terminate()
+        process.wait(10)
+    finally:
+        logs = []
+        while not log.empty():
+            # For unknown reasons, this hangs if we try to print immediately
+            # after `get()`, so we collect the lines and print them later.
+            logs.append(log.get())
+        for line in logs:
+            print(line, end="")
+
+
+def is_alive(process: Popen) -> bool:
+    return process.is_running() and process.status() != STATUS_ZOMBIE
+
+
+def capture_stdout(process: Popen, log: "SimpleQueue[str]") -> None:
+    while True:
+        log.put(process.stdout.readline())
diff --git a/pyproject.toml b/pyproject.toml
index 1958e0fec8..3392b640ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,6 +97,7 @@ dev = [
   "prometheus_client",
   "asgi-lifespan",
   "Faker>=26.0.0",
+  "portpicker",
   "uvloop; platform_system != 'Windows'",
 ]
 evals = []
@@ -253,6 +254,7 @@ addopts = [
   "--import-mode=importlib",
   "--doctest-modules",
   "--new-first",
+  "--showlocals",
 ]
 testpaths = [
   "tests",
@@ -430,4 +432,4 @@ select = ["E", "F", "W", "I", "NPY201"]
 force-single-line = false
 
 [tool.ruff.format]
-line-ending = "lf"
+line-ending = "native"
diff --git a/src/phoenix/db/engines.py b/src/phoenix/db/engines.py
index af7913ad67..e39dc7ddd0 100644
--- a/src/phoenix/db/engines.py
+++ b/src/phoenix/db/engines.py
@@ -7,6 +7,7 @@
 
 import aiosqlite
 import numpy as np
+import sqlalchemy
 import sqlean
 from sqlalchemy import URL, StaticPool, event, make_url
 from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
@@ -15,6 +16,7 @@
 from phoenix.db.helpers import SupportedSQLDialect
 from phoenix.db.migrate import migrate_in_thread
 from phoenix.db.models import init_models
+from phoenix.settings import Settings
 
 sqlean.extensions.enable("text", "stats")
 
@@ -118,7 +120,13 @@ def async_creator() -> aiosqlite.Connection:
         else:
             asyncio.create_task(init_models(engine))
     else:
-        migrate_in_thread(engine.url)
+        sync_engine = sqlalchemy.create_engine(
+            url=url.set(drivername="sqlite"),
+            echo=Settings.log_migrations,
+            json_serializer=_dumps,
+            creator=lambda: sqlean.connect(f"file:{database}", uri=True),
+        )
+        migrate_in_thread(sync_engine)
     return engine
 
 
@@ -130,7 +138,12 @@ def aio_postgresql_engine(
     engine = create_async_engine(url=url, echo=echo, json_serializer=_dumps)
     if not migrate:
         return engine
-    migrate_in_thread(engine.url)
+    sync_engine = sqlalchemy.create_engine(
+        url=url.set(drivername="postgresql"),
+        echo=Settings.log_migrations,
+        json_serializer=_dumps,
+    )
+    migrate_in_thread(sync_engine)
     return engine
 
 
diff --git a/src/phoenix/db/insertion/dataset.py b/src/phoenix/db/insertion/dataset.py
index 17285b581b..636ca3636b 100644
--- a/src/phoenix/db/insertion/dataset.py
+++ b/src/phoenix/db/insertion/dataset.py
@@ -24,6 +24,7 @@
 from phoenix.db.insertion.helpers import DataManipulationEvent
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 DatasetId: TypeAlias = int
 DatasetVersionId: TypeAlias = int
diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py
index 20262fc051..b5ba84458c 100644
--- a/src/phoenix/db/migrate.py
+++ b/src/phoenix/db/migrate.py
@@ -1,25 +1,34 @@
+import codecs
 import logging
+import sys
 from pathlib import Path
-from queue import Empty, Queue
+from queue import Empty, SimpleQueue
 from threading import Thread
 from typing import Optional
 
 from alembic import command
 from alembic.config import Config
-from sqlalchemy import URL
+from sqlalchemy import Engine
 
 from phoenix.exceptions import PhoenixMigrationError
 from phoenix.settings import Settings
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 def printif(condition: bool, text: str) -> None:
-    if condition:
-        print(text)
+    if not condition:
+        return
+    if sys.platform.startswith("win"):
+        text = codecs.encode(text, "ascii", errors="ignore").decode("ascii").strip()
+    print(text)
 
 
-def migrate(url: URL, error_queue: Optional["Queue[Exception]"] = None) -> None:
+def migrate(
+    engine: Engine,
+    error_queue: Optional["SimpleQueue[BaseException]"] = None,
+) -> None:
     """
     Runs migrations on the database.
     NB: Migrate only works on non-memory databases.
@@ -37,24 +46,26 @@ def migrate(url: URL, error_queue: Optional["Queue[Exception]"] = None) -> None:
         # Explicitly set the migration directory
         scripts_location = str(Path(__file__).parent.resolve() / "migrations")
         alembic_cfg.set_main_option("script_location", scripts_location)
-        alembic_cfg.set_main_option("sqlalchemy.url", str(url).replace("%", "%%"))
+        url = str(engine.url).replace("%", "%%")
+        alembic_cfg.set_main_option("sqlalchemy.url", url)
+        alembic_cfg.attributes["connection"] = engine.connect()
         command.upgrade(alembic_cfg, "head")
         printif(log_migrations, "---------------------------")
         printif(log_migrations, "✅ Migrations complete.")
-    except Exception as e:
+    except BaseException as e:
         if error_queue:
             error_queue.put(e)
             raise e
 
 
-def migrate_in_thread(url: URL) -> None:
+def migrate_in_thread(engine: Engine) -> None:
     """
     Runs migrations on the database in a separate thread.
     This is needed because depending on the context (notebook)
     the migration process can fail to execute in the main thread.
     """
-    error_queue: Queue[Exception] = Queue()
-    t = Thread(target=migrate, args=(url, error_queue))
+    error_queue: SimpleQueue[BaseException] = SimpleQueue()
+    t = Thread(target=migrate, args=(engine, error_queue))
     t.start()
     t.join()
 
diff --git a/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py b/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py
index 6b957350bc..141a378335 100644
--- a/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py
+++ b/src/phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py
@@ -19,7 +19,6 @@
     update,
 )
 from sqlalchemy.dialects import postgresql
-from sqlalchemy.ext.asyncio.engine import AsyncConnection
 from sqlalchemy.ext.compiler import compiles
 from sqlalchemy.orm import (
     DeclarativeBase,
@@ -111,11 +110,10 @@ class Span(Base):
 LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION.split(".")
 
 
-async def get_token_counts_from_attributes(connection: AsyncConnection) -> None:
-    """
-    Gets token counts from attributes if present.
-    """
-    await connection.execute(
+def upgrade() -> None:
+    op.add_column("spans", sa.Column("llm_token_count_prompt", sa.Integer, nullable=True))
+    op.add_column("spans", sa.Column("llm_token_count_completion", sa.Integer, nullable=True))
+    op.execute(
         update(Span).values(
             llm_token_count_prompt=Span.attributes[LLM_TOKEN_COUNT_PROMPT].as_float(),
             llm_token_count_completion=Span.attributes[LLM_TOKEN_COUNT_COMPLETION].as_float(),
@@ -123,12 +121,6 @@ async def get_token_counts_from_attributes(connection: AsyncConnection) -> None:
     )
 
 
-def upgrade() -> None:
-    op.add_column("spans", sa.Column("llm_token_count_prompt", sa.Integer, nullable=True))
-    op.add_column("spans", sa.Column("llm_token_count_completion", sa.Integer, nullable=True))
-    op.run_async(get_token_counts_from_attributes)
-
-
 def downgrade() -> None:
     op.drop_column("spans", "llm_token_count_completion")
     op.drop_column("spans", "llm_token_count_prompt")
diff --git a/src/phoenix/inferences/fixtures.py b/src/phoenix/inferences/fixtures.py
index e54dcd2473..049a6ad942 100644
--- a/src/phoenix/inferences/fixtures.py
+++ b/src/phoenix/inferences/fixtures.py
@@ -18,6 +18,7 @@
 )
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 class InferencesRole(Enum):
diff --git a/src/phoenix/inferences/inferences.py b/src/phoenix/inferences/inferences.py
index d585cb40ac..3cfa69b3d4 100644
--- a/src/phoenix/inferences/inferences.py
+++ b/src/phoenix/inferences/inferences.py
@@ -34,6 +34,7 @@
 from .validation import validate_inferences_inputs
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 # A schema like object. Not recommended to use this directly
 SchemaLike: TypeAlias = Any
diff --git a/src/phoenix/metrics/__init__.py b/src/phoenix/metrics/__init__.py
index 374cbeaee6..476c1de4df 100644
--- a/src/phoenix/metrics/__init__.py
+++ b/src/phoenix/metrics/__init__.py
@@ -10,6 +10,7 @@
 from phoenix.core.model_schema import Column
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 @dataclass(frozen=True)
diff --git a/src/phoenix/server/api/routers/v1/datasets.py b/src/phoenix/server/api/routers/v1/datasets.py
index 961a370423..950939d140 100644
--- a/src/phoenix/server/api/routers/v1/datasets.py
+++ b/src/phoenix/server/api/routers/v1/datasets.py
@@ -71,6 +71,7 @@
 )
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 DATASET_NODE_NAME = DatasetNodeType.__name__
 DATASET_VERSION_NODE_NAME = DatasetVersionNodeType.__name__
diff --git a/src/phoenix/server/app.py b/src/phoenix/server/app.py
index f9187caf1e..32c3fd4afd 100644
--- a/src/phoenix/server/app.py
+++ b/src/phoenix/server/app.py
@@ -100,6 +100,7 @@
     from opentelemetry.trace import TracerProvider
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 router = APIRouter(include_in_schema=False)
 
@@ -229,7 +230,8 @@ def _lifespan(
     dml_event_handler: DmlEventHandler,
     tracer_provider: Optional["TracerProvider"] = None,
     enable_prometheus: bool = False,
-    clean_ups: Iterable[Callable[[], None]] = (),
+    startup_callbacks: Iterable[Callable[[], None]] = (),
+    shutdown_callbacks: Iterable[Callable[[], None]] = (),
     read_only: bool = False,
 ) -> StatefulLifespan[FastAPI]:
     @contextlib.asynccontextmanager
@@ -247,6 +249,8 @@ async def lifespan(_: FastAPI) -> AsyncIterator[Dict[str, Any]]:
             tracer_provider=tracer_provider,
             enable_prometheus=enable_prometheus,
         ), dml_event_handler:
+            for callback in startup_callbacks:
+                callback()
             yield {
                 "event_queue": dml_event_handler,
                 "enqueue": enqueue,
@@ -254,8 +258,8 @@ async def lifespan(_: FastAPI) -> AsyncIterator[Dict[str, Any]]:
                 "queue_evaluation_for_bulk_insert": queue_evaluation,
                 "enqueue_operation": enqueue_operation,
             }
-        for clean_up in clean_ups:
-            clean_up()
+        for callback in shutdown_callbacks:
+            callback()
 
     return lifespan
 
@@ -428,10 +432,12 @@ def create_app(
     initial_spans: Optional[Iterable[Union[Span, Tuple[Span, str]]]] = None,
     initial_evaluations: Optional[Iterable[pb.Evaluation]] = None,
     serve_ui: bool = True,
-    clean_up_callbacks: List[Callable[[], None]] = [],
+    startup_callbacks: Iterable[Callable[[], None]] = (),
+    shutdown_callbacks: Iterable[Callable[[], None]] = (),
     secret: Optional[str] = None,
 ) -> FastAPI:
-    clean_ups: List[Callable[[], None]] = clean_up_callbacks  # To be called at app shutdown.
+    startup_callbacks_list: List[Callable[[], None]] = list(startup_callbacks)
+    shutdown_callbacks_list: List[Callable[[], None]] = list(shutdown_callbacks)
     initial_batch_of_spans: Iterable[Tuple[Span, str]] = (
         ()
         if initial_spans is None
@@ -511,7 +517,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
             dml_event_handler=dml_event_handler,
             tracer_provider=tracer_provider,
             enable_prometheus=enable_prometheus,
-            clean_ups=clean_ups,
+            shutdown_callbacks=shutdown_callbacks_list,
+            startup_callbacks=startup_callbacks_list,
         ),
         middleware=[
             Middleware(HeadersMiddleware),
@@ -554,5 +561,5 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
         FastAPIInstrumentor().instrument(tracer_provider=tracer_provider)
         FastAPIInstrumentor.instrument_app(app, tracer_provider=tracer_provider)
-        clean_ups.append(FastAPIInstrumentor().uninstrument)
+        shutdown_callbacks_list.append(FastAPIInstrumentor().uninstrument)
     return app
diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py
index 906aa54213..67bf6cd54d 100644
--- a/src/phoenix/server/main.py
+++ b/src/phoenix/server/main.py
@@ -1,13 +1,16 @@
 import atexit
+import codecs
 import logging
 import os
+import sys
 from argparse import ArgumentParser
-from pathlib import Path, PosixPath
+from importlib.metadata import version
+from pathlib import Path
 from threading import Thread
 from time import sleep, time
 from typing import List, Optional
+from urllib.parse import urljoin
 
-import pkg_resources
 from uvicorn import Config, Server
 
 import phoenix.trace.v1 as pb
@@ -53,6 +56,7 @@
 from phoenix.trace.schemas import Span
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 _WELCOME_MESSAGE = """
 
@@ -137,6 +141,7 @@ def _get_pid_file() -> Path:
     parser.add_argument("--debug", action="store_true")
     # Whether the app is running in a development environment
     parser.add_argument("--dev", action="store_true")
+    parser.add_argument("--no-ui", action="store_true")
     subparsers = parser.add_subparsers(dest="command", required=True)
     serve_parser = subparsers.add_parser("serve")
     datasets_parser = subparsers.add_parser("datasets")
@@ -255,6 +260,18 @@ def _get_pid_file() -> Path:
     engine = create_engine_and_run_migrations(db_connection_str)
     instrumentation_cleanups = instrument_engine_if_enabled(engine)
     factory = DbSessionFactory(db=_db(engine), dialect=engine.dialect.name)
+    # Print information about the server
+    msg = _WELCOME_MESSAGE.format(
+        version=version("arize-phoenix"),
+        ui_path=urljoin(f"http://{host}:{port}", host_root_path),
+        grpc_path=f"http://{host}:{get_env_grpc_port()}",
+        http_path=urljoin(urljoin(f"http://{host}:{port}", host_root_path), "v1/traces"),
+        storage=get_printable_db_url(db_connection_str),
+    )
+    if authentication_enabled:
+        msg += _EXPERIMENTAL_WARNING.format(auth_enabled=True)
+    if sys.platform.startswith("win"):
+        msg = codecs.encode(msg, "ascii", errors="ignore").decode("ascii").strip()
     app = create_app(
         db=factory,
         export_path=export_path,
@@ -266,30 +283,17 @@ def _get_pid_file() -> Path:
         else create_model_from_inferences(corpus_inferences),
         debug=args.debug,
         dev=args.dev,
+        serve_ui=not args.no_ui,
         read_only=read_only,
         enable_prometheus=enable_prometheus,
         initial_spans=fixture_spans,
         initial_evaluations=fixture_evals,
-        clean_up_callbacks=instrumentation_cleanups,
+        startup_callbacks=[lambda: print(msg)],
+        shutdown_callbacks=instrumentation_cleanups,
         secret=secret,
     )
     server = Server(config=Config(app, host=host, port=port, root_path=host_root_path))  # type: ignore
     Thread(target=_write_pid_file_when_ready, args=(server,), daemon=True).start()
 
-    # Print information about the server
-    phoenix_version = pkg_resources.get_distribution("arize-phoenix").version
-    print(
-        _WELCOME_MESSAGE.format(
-            version=phoenix_version,
-            ui_path=PosixPath(f"http://{host}:{port}", host_root_path),
-            grpc_path=f"http://{host}:{get_env_grpc_port()}",
-            http_path=PosixPath(f"http://{host}:{port}", host_root_path, "v1/traces"),
-            storage=get_printable_db_url(db_connection_str),
-        )
-    )
-
-    if authentication_enabled:
-        print(_EXPERIMENTAL_WARNING.format(auth_enabled=authentication_enabled))
-
     # Start the server
     server.run()
diff --git a/src/phoenix/services.py b/src/phoenix/services.py
index 9a2662196f..65ac8a689d 100644
--- a/src/phoenix/services.py
+++ b/src/phoenix/services.py
@@ -12,6 +12,7 @@
 from phoenix.config import SERVER_DIR, get_pids_path, get_running_pid
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 class Service:
diff --git a/src/phoenix/session/client.py b/src/phoenix/session/client.py
index 7b64ba20f9..df0635db2c 100644
--- a/src/phoenix/session/client.py
+++ b/src/phoenix/session/client.py
@@ -52,7 +52,7 @@
 from phoenix.utilities.client import VersionedClient
 
 logger = logging.getLogger(__name__)
-
+logger.addHandler(logging.NullHandler())
 
 DEFAULT_TIMEOUT_IN_SECONDS = 5
 
diff --git a/src/phoenix/session/evaluation.py b/src/phoenix/session/evaluation.py
index c4bf506a9c..afd4de3a05 100644
--- a/src/phoenix/session/evaluation.py
+++ b/src/phoenix/session/evaluation.py
@@ -35,6 +35,7 @@
 ]
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 def encode_evaluations(evaluations: Evaluations) -> Iterator[pb.Evaluation]:
diff --git a/src/phoenix/session/session.py b/src/phoenix/session/session.py
index 9d0049fa7f..72e5cad184 100644
--- a/src/phoenix/session/session.py
+++ b/src/phoenix/session/session.py
@@ -63,6 +63,7 @@
     pass
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 # type workaround
 # https://github.com/python/mypy/issues/5264#issuecomment-399407428
@@ -392,7 +393,7 @@ def __init__(
                 if (trace_dataset and (initial_evaluations := trace_dataset.evaluations))
                 else None
             ),
-            clean_up_callbacks=instrumentation_cleanups,
+            shutdown_callbacks=instrumentation_cleanups,
         )
         self.server = ThreadServer(
             app=self.app,
diff --git a/src/phoenix/trace/fixtures.py b/src/phoenix/trace/fixtures.py
index fc17e2a417..e944ffe571 100644
--- a/src/phoenix/trace/fixtures.py
+++ b/src/phoenix/trace/fixtures.py
@@ -22,6 +22,7 @@
 from phoenix.trace.utils import download_json_traces_fixture, is_jsonl_file, json_lines_to_df
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 class EvaluationResultSchema(NamedTuple):
diff --git a/src/phoenix/trace/langchain/instrumentor.py b/src/phoenix/trace/langchain/instrumentor.py
index 56c6191aad..89b0a95d70 100644
--- a/src/phoenix/trace/langchain/instrumentor.py
+++ b/src/phoenix/trace/langchain/instrumentor.py
@@ -13,7 +13,7 @@
 from phoenix.trace.exporter import _OpenInferenceExporter
 
 logger = logging.getLogger(__name__)
-
+logger.addHandler(logging.NullHandler())
 
 __all__ = ("LangChainInstrumentor",)
 
diff --git a/src/phoenix/trace/llama_index/callback.py b/src/phoenix/trace/llama_index/callback.py
index 1adc46485d..1db8bbaa66 100644
--- a/src/phoenix/trace/llama_index/callback.py
+++ b/src/phoenix/trace/llama_index/callback.py
@@ -13,6 +13,7 @@
 from phoenix.trace.exporter import _OpenInferenceExporter
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 LLAMA_INDEX_MODERN_VERSION = (0, 10, 0)
 INSTRUMENTATION_MODERN_VERSION = (1, 0, 0)
diff --git a/src/phoenix/trace/openai/instrumentor.py b/src/phoenix/trace/openai/instrumentor.py
index cadb4e82df..e77021171c 100644
--- a/src/phoenix/trace/openai/instrumentor.py
+++ b/src/phoenix/trace/openai/instrumentor.py
@@ -13,6 +13,7 @@
 from phoenix.trace.exporter import _OpenInferenceExporter
 
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
 
 
 class OpenAIInstrumentor(Instrumentor):
diff --git a/tox.ini b/tox.ini
index 4ae2f8b605..fb32d4f640 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,16 +7,19 @@ package = wheel
 wheel_build_env = .pkg
 deps =
   -r dev-requirements.txt
+  integration_tests: .
 changedir =
   phoenix_evals: packages/phoenix-evals/
+  integration_tests: integration_tests/
 commands_pre =
   pkg: uv pip install .[test]
+  integration_tests: uv pip install -r requirements.txt
 commands =
   ruff: ruff format {posargs:.}
   ruff: ruff check --fix {posargs:.}
   mypy: mypy {posargs:.}
-  test: pytest -n auto {posargs:.}
+  test: pytest -n auto --nf {posargs:.}
   ci: ruff format --diff {posargs:.}
   ci: ruff check --no-fix {posargs:.}
   ci: mypy {posargs:.}
-  ci: pytest -n auto -x -ra {posargs:.}
+  ci: pytest -n auto -x {posargs:.}