Arize-ai · mikeldking · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024
diff --git a/cspell.json b/cspell.json
@@ -37,6 +37,7 @@
         "respx",
         "rgba",
         "seafoam",
+        "sqlalchemy",
         "templating",
         "tensorboard",
         "testset",

diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,7 @@ dependencies = [
   "openinference-instrumentation-llama-index>=1.2.0",
   "openinference-instrumentation-openai>=0.1.4",
   "sqlalchemy>=2, <3",
+  "alembic>=1.3.0, <2",
 ]
 dynamic = ["version"]
 

diff --git a/src/phoenix/db/__init__.py b/src/phoenix/db/__init__.py
@@ -0,0 +1,3 @@
+from .migrate import migrate
+
+__all__ = ["migrate"]
diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini
@@ -0,0 +1,119 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Note this is overridden in .migrate during programatic migrations
+script_location = migrations
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to migrations/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# NB: This is commented out intentionally as it is dynamic
+# See migrations/env.py
+# sqlalchemy.url = driver://user:pass@localhost/dbname
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = DEBUG
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/src/phoenix/db/database.py b/src/phoenix/db/database.py
@@ -8,10 +8,10 @@
 
 import numpy as np
 from openinference.semconv.trace import SpanAttributes
-from sqlalchemy import Engine, create_engine, event
+from sqlalchemy import Engine, create_engine, event, insert
 from sqlalchemy.orm import sessionmaker
 
-from phoenix.db.models import Base, Trace
+from phoenix.db.models import Base, Project, Trace
 from phoenix.trace.schemas import (
     ComputedValues,
     Span,
@@ -21,6 +21,8 @@
     SpanStatusCode,
 )
 
+from .migrate import migrate
+
 _CONFIG = """
 PRAGMA foreign_keys = ON;
 PRAGMA journal_mode = WAL;
@@ -29,50 +31,6 @@
 PRAGMA busy_timeout = 10000;
 """
 
-_INIT_DB = """
-BEGIN;
-CREATE TABLE projects (
-    id INTEGER PRIMARY KEY,
-    name TEXT NOT NULL UNIQUE,
-    description TEXT,
-    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
-);
-INSERT INTO projects(name) VALUES('default');
-CREATE TABLE traces (
-    id INTEGER PRIMARY KEY,
-    trace_id TEXT UNIQUE NOT NULL,
-    project_rowid INTEGER NOT NULL,
-    session_id TEXT,
-    start_time TIMESTAMP NOT NULL,
-    end_time TIMESTAMP NOT NULL,
-    FOREIGN KEY(project_rowid) REFERENCES projects(id)
-);
-CREATE INDEX idx_trace_start_time ON traces(start_time);
-CREATE TABLE spans (
-    id INTEGER PRIMARY KEY,
-    span_id TEXT UNIQUE NOT NULL,
-    trace_rowid INTEGER NOT NULL,
-    parent_span_id TEXT,
-    kind TEXT NOT NULL,
-    name TEXT NOT NULL,
-    start_time TIMESTAMP NOT NULL,
-    end_time TIMESTAMP NOT NULL,
-    attributes JSON,
-    events JSON,
-    status TEXT CHECK(status IN ('UNSET','OK','ERROR')) NOT NULL DEFAULT('UNSET'),
-    status_message TEXT,
-    latency_ms REAL,
-    cumulative_error_count INTEGER NOT NULL DEFAULT 0,
-    cumulative_llm_token_count_prompt INTEGER NOT NULL DEFAULT 0,
-    cumulative_llm_token_count_completion INTEGER NOT NULL DEFAULT 0,
-    FOREIGN KEY(trace_rowid) REFERENCES traces(id)
-);
-CREATE INDEX idx_parent_span_id ON spans(parent_span_id);
-PRAGMA user_version = 1;
-COMMIT;
-"""
-
 
 _MEM_DB_STR = "file::memory:?cache=shared"
 
@@ -105,19 +63,28 @@ def __init__(self, db_path: Optional[Path] = None) -> None:
         # self.con.set_trace_callback(print)
         cur = self.con.cursor()
         cur.executescript(_CONFIG)
-        if int(cur.execute("PRAGMA user_version;").fetchone()[0]) < 1:
-            cur.executescript(_INIT_DB)
 
+        db_url = f"sqlite:///{db_path}" if db_path else "sqlite:///:memory:"
         engine = (
-            create_engine(f"sqlite:///{db_path}", echo=True)
+            create_engine(db_url, echo=True)
             if db_path
             else create_engine(
-                "sqlite:///:memory:",
+                db_url,
                 echo=True,
                 creator=_mem_db_creator,
             )
         )
-        Base.metadata.create_all(engine)
+
+        # TODO this should be moved out
+        if db_path:
+            migrate(db_url)
+        else:
+            Base.metadata.create_all(engine)
+            # Create the default project
+            with engine.connect() as conn:
+                conn.execute(insert(Project).values(name="default", description="default project"))
+                conn.commit()
+
         self.Session = sessionmaker(bind=engine)
 
     def insert_span(self, span: Span, project_name: str) -> None:

diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py
@@ -0,0 +1,30 @@
+import logging
+import os
+from pathlib import Path
+
+from alembic import command
+from alembic.config import Config
+
+logger = logging.getLogger(__name__)
+
+
+def migrate(url: str) -> None:
+    """
+    Runs migrations on the database.
+    NB: Migrate only works on non-memory databases.
+
+    Args:
+        url: The database URL.
+    """
+    logger.warning("Running migrations on the database")
+    config_path = os.path.normpath(str(Path(__file__).parent.resolve()) + os.sep + "alembic.ini")
+    alembic_cfg = Config(config_path)
+
+    # Explicitly set the migration directory
+    scripts_location = os.path.normpath(
+        str(Path(__file__).parent.resolve()) + os.sep + "migrations"
+    )
+    alembic_cfg.set_main_option("script_location", scripts_location)
+    alembic_cfg.set_main_option("sqlalchemy.url", url)
+
+    command.upgrade(alembic_cfg, "head")
diff --git a/src/phoenix/db/migrations/README b/src/phoenix/db/migrations/README
@@ -0,0 +1 @@
+Generic single-database configuration.
diff --git a/src/phoenix/db/migrations/env.py b/src/phoenix/db/migrations/env.py
@@ -0,0 +1,74 @@
+from logging.config import fileConfig
+
+from alembic import context
+from phoenix.db.models import Base
+from sqlalchemy import engine_from_config, pool
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata)
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/src/phoenix/db/migrations/script.py.mako b/src/phoenix/db/migrations/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .migrate import migrate

		__all__ = ["migrate"]