From 8f79e290eb79e73101a05175f5de4b8f8e747c6f Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 3 Apr 2024 19:37:00 -0600 Subject: [PATCH 01/11] initialize alembic refactor --- pyproject.toml | 1 + src/phoenix/db/alembic.ini | 116 +++++++++++++++++++++++ src/phoenix/db/migrations/README | 1 + src/phoenix/db/migrations/env.py | 74 +++++++++++++++ src/phoenix/db/migrations/script.py.mako | 26 +++++ 5 files changed, 218 insertions(+) create mode 100644 src/phoenix/db/alembic.ini create mode 100644 src/phoenix/db/migrations/README create mode 100644 src/phoenix/db/migrations/env.py create mode 100644 src/phoenix/db/migrations/script.py.mako diff --git a/pyproject.toml b/pyproject.toml index 513b7b4438..3c7b1cea88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dependencies = [ "openinference-instrumentation-llama-index>=1.2.0", "openinference-instrumentation-openai>=0.1.4", "sqlalchemy>=2, <3", + "alembic>=1.3.0, <2", ] dynamic = ["version"] diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini new file mode 100644 index 0000000000..cd413a2606 --- /dev/null +++ b/src/phoenix/db/alembic.ini @@ -0,0 +1,116 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = migrations + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to migrations/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/src/phoenix/db/migrations/README b/src/phoenix/db/migrations/README new file mode 100644 index 0000000000..98e4f9c44e --- /dev/null +++ b/src/phoenix/db/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/src/phoenix/db/migrations/env.py b/src/phoenix/db/migrations/env.py new file mode 100644 index 0000000000..9dd6c6ceca --- /dev/null +++ b/src/phoenix/db/migrations/env.py @@ -0,0 +1,74 @@ +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/src/phoenix/db/migrations/script.py.mako b/src/phoenix/db/migrations/script.py.mako new file mode 100644 index 0000000000..fbc4b07dce --- /dev/null +++ b/src/phoenix/db/migrations/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} From 4272f5cd13c14e4ba276a7dbe71a8072ca66876b Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 3 Apr 2024 19:48:39 -0600 Subject: [PATCH 02/11] make alembic path to DB dynamic --- src/phoenix/db/alembic.ini | 4 +++- src/phoenix/db/migrations/env.py | 14 +++++------ .../migrations/versions/cf03bd6bae1d_init.py | 23 +++++++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index cd413a2606..5aeabbcc9d 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -60,7 +60,9 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # are written from script.py.mako # output_encoding = utf-8 -sqlalchemy.url = driver://user:pass@localhost/dbname +# NB: This is commented out intentionally as it is dynamic +# See migrations/env.py +# sqlalchemy.url = driver://user:pass@localhost/dbname [post_write_hooks] diff --git a/src/phoenix/db/migrations/env.py b/src/phoenix/db/migrations/env.py index 9dd6c6ceca..ad1f2e3887 100644 --- a/src/phoenix/db/migrations/env.py +++ b/src/phoenix/db/migrations/env.py @@ -1,7 +1,8 @@ from logging.config import fileConfig from alembic import context -from sqlalchemy import engine_from_config, pool +from phoenix.config import get_working_dir +from sqlalchemy import create_engine # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -23,6 +24,10 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. +# Since the working directory is dynamic, we need to get it from the config +working_dir = get_working_dir() +url = f"sqlite:///{working_dir}/phoenix.db" + def run_migrations_offline() -> None: """Run migrations in 'offline' mode. @@ -36,7 +41,6 @@ def run_migrations_offline() -> None: script output. """ - url = config.get_main_option("sqlalchemy.url") context.configure( url=url, target_metadata=target_metadata, @@ -55,11 +59,7 @@ def run_migrations_online() -> None: and associate a connection with the context. """ - connectable = engine_from_config( - config.get_section(config.config_ini_section, {}), - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) + connectable = create_engine(url) with connectable.connect() as connection: context.configure(connection=connection, target_metadata=target_metadata) diff --git a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py new file mode 100644 index 0000000000..6c7f693dc2 --- /dev/null +++ b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py @@ -0,0 +1,23 @@ +"""init + +Revision ID: cf03bd6bae1d +Revises: +Create Date: 2024-04-03 19:41:48.871555 + +""" + +from typing import Sequence, Union + +# revision identifiers, used by Alembic. +revision: str = "cf03bd6bae1d" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass From 2dbe17fb6b249632a4d6d8853baf4a72b98459d6 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 3 Apr 2024 22:15:33 -0600 Subject: [PATCH 03/11] move migrations over --- cspell.json | 1 + src/phoenix/db/__init__.py | 3 + src/phoenix/db/alembic.ini | 4 +- src/phoenix/db/database.py | 2 - src/phoenix/db/migrate.py | 22 +++++++ .../migrations/versions/cf03bd6bae1d_init.py | 59 ++++++++++++++++++- src/phoenix/db/models.py | 2 + src/phoenix/server/main.py | 4 ++ 8 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 src/phoenix/db/migrate.py diff --git a/cspell.json b/cspell.json index af4d274d9a..67bedffd61 100644 --- a/cspell.json +++ b/cspell.json @@ -37,6 +37,7 @@ "respx", "rgba", "seafoam", + "sqlalchemy", "templating", "tensorboard", "testset", diff --git a/src/phoenix/db/__init__.py b/src/phoenix/db/__init__.py index e69de29bb2..2848bf384b 100644 --- a/src/phoenix/db/__init__.py +++ b/src/phoenix/db/__init__.py @@ -0,0 +1,3 @@ +from .migrate import migrate + +__all__ = ["migrate"] diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index 5aeabbcc9d..70e3c84046 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -93,12 +93,12 @@ keys = console keys = generic [logger_root] -level = WARN +level = DEBUG handlers = console qualname = [logger_sqlalchemy] -level = WARN +level = DEBUG handlers = qualname = sqlalchemy.engine diff --git a/src/phoenix/db/database.py b/src/phoenix/db/database.py index f97ca07b69..2269a31538 100644 --- a/src/phoenix/db/database.py +++ b/src/phoenix/db/database.py @@ -105,8 +105,6 @@ def __init__(self, db_path: Optional[Path] = None) -> None: # self.con.set_trace_callback(print) cur = self.con.cursor() cur.executescript(_CONFIG) - if int(cur.execute("PRAGMA user_version;").fetchone()[0]) < 1: - cur.executescript(_INIT_DB) engine = ( create_engine(f"sqlite:///{db_path}", echo=True) diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py new file mode 100644 index 0000000000..9f189f8d75 --- /dev/null +++ b/src/phoenix/db/migrate.py @@ -0,0 +1,22 @@ +import logging +import os +from pathlib import Path + +import alembic.config + +logger = logging.getLogger(__name__) + + +def migrate() -> None: + """ + Runs migrations on the database. + """ + config_path = os.path.normpath(str(Path(__file__).parent.resolve()) + os.sep + "alembic.ini") + alembicArgs = [ + "--config", + config_path, + "--raiseerr", + "upgrade", + "head", + ] + alembic.config.main(argv=alembicArgs) diff --git a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py index 6c7f693dc2..26163ae0c6 100644 --- a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +++ b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py @@ -8,6 +8,9 @@ from typing import Sequence, Union +import sqlalchemy as sa +from alembic import op + # revision identifiers, used by Alembic. revision: str = "cf03bd6bae1d" down_revision: Union[str, None] = None @@ -16,8 +19,60 @@ def upgrade() -> None: - pass + projects_table = op.create_table( + "projects", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("name", sa.String, nullable=False), + sa.Column("description", sa.String, nullable=True), + # TODO(mikeldking): is timezone=True necessary? + sa.Column( + "created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() + ), + sa.Column( + "updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() + ), + ) + op.create_table( + "traces", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("project_rowid", sa.Integer, sa.ForeignKey("projects.id"), nullable=False), + sa.Column("session_id", sa.String, nullable=True), + sa.Column("trace_id", sa.String, nullable=False), + ) + + op.create_table( + "spans", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("trace_rowid", sa.Integer, sa.ForeignKey("traces.id"), nullable=False), + sa.Column("span_id", sa.String, nullable=False), + sa.Column("parent_span_id", sa.String, nullable=True), + sa.Column("name", sa.String, nullable=False), + sa.Column("kind", sa.String, nullable=False), + sa.Column("start_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("end_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("attributes", sa.JSON, nullable=False), + sa.Column("events", sa.JSON, nullable=False), + sa.Column("status", sa.String, nullable=False), + sa.Column("latency_ms", sa.Float, nullable=False), + sa.Column("cumulative_error_count", sa.Integer, nullable=False), + sa.Column("cumulative_llm_token_count_prompt", sa.Integer, nullable=False), + sa.Column("cumulative_llm_token_count_completion", sa.Integer, nullable=False), + ) + + op.create_index("idx_trace_start_time", "traces", ["start_time"]) + op.create_index("idx_parent_span_id", "spans", ["parent_span_id"]) + op.bulk_insert( + projects_table, + [ + {"name": "default", "description": "Default project"}, + ], + ) def downgrade() -> None: - pass + op.drop_index("idx_trace_start_time") + op.drop_index("idx_parent_span_id", "spans") + + op.drop_table("projects") + op.drop_table("traces") + op.drop_table("spans") diff --git a/src/phoenix/db/models.py b/src/phoenix/db/models.py index 62bc032347..44b98f3b08 100644 --- a/src/phoenix/db/models.py +++ b/src/phoenix/db/models.py @@ -51,6 +51,7 @@ class Trace(Base): project_rowid: Mapped[int] = mapped_column(ForeignKey("projects.id")) session_id: Mapped[Optional[str]] trace_id: Mapped[str] + # TODO(mikeldking): why is the start and end time necessary? just filtering? start_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) end_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) @@ -87,6 +88,7 @@ class Span(Base): status: Mapped[str] status_message: Mapped[str] + # TODO(mikeldking): is computed columns possible here latency_ms: Mapped[float] cumulative_error_count: Mapped[int] cumulative_llm_token_count_prompt: Mapped[int] diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index 1b43921264..b5add55a0a 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -22,6 +22,7 @@ from phoenix.core.traces import Traces from phoenix.datasets.dataset import EMPTY_DATASET, Dataset from phoenix.datasets.fixtures import FIXTURES, get_datasets +from phoenix.db import migrate from phoenix.db.database import SqliteDatabase from phoenix.pointcloud.umap_parameters import ( DEFAULT_MIN_DIST, @@ -197,6 +198,9 @@ def _load_items( ) working_dir = get_working_dir() db = SqliteDatabase(working_dir / "phoenix.db") + # Run migrations + migrate() + traces = Traces(db) if span_store := get_span_store(): Thread(target=load_traces_data_from_store, args=(traces, span_store), daemon=True).start() From 595400f981679c46edb2a1d08e4da865e9b3bdcc Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Wed, 3 Apr 2024 22:50:15 -0600 Subject: [PATCH 04/11] remove init_db --- src/phoenix/db/database.py | 45 -------------------------------------- 1 file changed, 45 deletions(-) diff --git a/src/phoenix/db/database.py b/src/phoenix/db/database.py index 2269a31538..89f9a1a97b 100644 --- a/src/phoenix/db/database.py +++ b/src/phoenix/db/database.py @@ -29,51 +29,6 @@ PRAGMA busy_timeout = 10000; """ -_INIT_DB = """ -BEGIN; -CREATE TABLE projects ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL UNIQUE, - description TEXT, - created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP -); -INSERT INTO projects(name) VALUES('default'); -CREATE TABLE traces ( - id INTEGER PRIMARY KEY, - trace_id TEXT UNIQUE NOT NULL, - project_rowid INTEGER NOT NULL, - session_id TEXT, - start_time TIMESTAMP NOT NULL, - end_time TIMESTAMP NOT NULL, - FOREIGN KEY(project_rowid) REFERENCES projects(id) -); -CREATE INDEX idx_trace_start_time ON traces(start_time); -CREATE TABLE spans ( - id INTEGER PRIMARY KEY, - span_id TEXT UNIQUE NOT NULL, - trace_rowid INTEGER NOT NULL, - parent_span_id TEXT, - kind TEXT NOT NULL, - name TEXT NOT NULL, - start_time TIMESTAMP NOT NULL, - end_time TIMESTAMP NOT NULL, - attributes JSON, - events JSON, - status TEXT CHECK(status IN ('UNSET','OK','ERROR')) NOT NULL DEFAULT('UNSET'), - status_message TEXT, - latency_ms REAL, - cumulative_error_count INTEGER NOT NULL DEFAULT 0, - cumulative_llm_token_count_prompt INTEGER NOT NULL DEFAULT 0, - cumulative_llm_token_count_completion INTEGER NOT NULL DEFAULT 0, - FOREIGN KEY(trace_rowid) REFERENCES traces(id) -); -CREATE INDEX idx_parent_span_id ON spans(parent_span_id); -PRAGMA user_version = 1; -COMMIT; -""" - - _MEM_DB_STR = "file::memory:?cache=shared" From 3ef525de76119c03e929a4f35b09e54b29d4397a Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 00:47:09 -0600 Subject: [PATCH 05/11] finish out the migration --- src/phoenix/db/alembic.ini | 1 + src/phoenix/db/database.py | 56 ++++++++++++++++++- src/phoenix/db/migrate.py | 20 ++++--- src/phoenix/db/migrations/env.py | 7 ++- .../migrations/versions/cf03bd6bae1d_init.py | 18 +++++- src/phoenix/server/main.py | 3 - 6 files changed, 86 insertions(+), 19 deletions(-) diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index 70e3c84046..f584199f26 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -2,6 +2,7 @@ [alembic] # path to migration scripts +# Note this is overridden in migrations/env.py for programmatic use script_location = migrations # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s diff --git a/src/phoenix/db/database.py b/src/phoenix/db/database.py index 89f9a1a97b..49291090af 100644 --- a/src/phoenix/db/database.py +++ b/src/phoenix/db/database.py @@ -21,6 +21,8 @@ SpanStatusCode, ) +from .migrate import migrate + _CONFIG = """ PRAGMA foreign_keys = ON; PRAGMA journal_mode = WAL; @@ -29,6 +31,51 @@ PRAGMA busy_timeout = 10000; """ +_INIT_DB = """ +BEGIN; +CREATE TABLE projects ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + description TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); +INSERT INTO projects(name) VALUES('default'); +CREATE TABLE traces ( + id INTEGER PRIMARY KEY, + trace_id TEXT UNIQUE NOT NULL, + project_rowid INTEGER NOT NULL, + session_id TEXT, + start_time TIMESTAMP NOT NULL, + end_time TIMESTAMP NOT NULL, + FOREIGN KEY(project_rowid) REFERENCES projects(id) +); +CREATE INDEX idx_trace_start_time ON traces(start_time); +CREATE TABLE spans ( + id INTEGER PRIMARY KEY, + span_id TEXT UNIQUE NOT NULL, + trace_rowid INTEGER NOT NULL, + parent_span_id TEXT, + kind TEXT NOT NULL, + name TEXT NOT NULL, + start_time TIMESTAMP NOT NULL, + end_time TIMESTAMP NOT NULL, + attributes JSON, + events JSON, + status TEXT CHECK(status IN ('UNSET','OK','ERROR')) NOT NULL DEFAULT('UNSET'), + status_message TEXT, + latency_ms REAL, + cumulative_error_count INTEGER NOT NULL DEFAULT 0, + cumulative_llm_token_count_prompt INTEGER NOT NULL DEFAULT 0, + cumulative_llm_token_count_completion INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY(trace_rowid) REFERENCES traces(id) +); +CREATE INDEX idx_parent_span_id ON spans(parent_span_id); +PRAGMA user_version = 1; +COMMIT; +""" + + _MEM_DB_STR = "file::memory:?cache=shared" @@ -70,7 +117,14 @@ def __init__(self, db_path: Optional[Path] = None) -> None: creator=_mem_db_creator, ) ) - Base.metadata.create_all(engine) + + # TODO this should be moved out + # Migrate the database if a path is provided + if db_path: + migrate() + else: + Base.metadata.create_all(engine) + self.Session = sessionmaker(bind=engine) def insert_span(self, span: Span, project_name: str) -> None: diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py index 9f189f8d75..d9a25fc10d 100644 --- a/src/phoenix/db/migrate.py +++ b/src/phoenix/db/migrate.py @@ -2,7 +2,8 @@ import os from pathlib import Path -import alembic.config +from alembic import command +from alembic.config import Config logger = logging.getLogger(__name__) @@ -12,11 +13,12 @@ def migrate() -> None: Runs migrations on the database. """ config_path = os.path.normpath(str(Path(__file__).parent.resolve()) + os.sep + "alembic.ini") - alembicArgs = [ - "--config", - config_path, - "--raiseerr", - "upgrade", - "head", - ] - alembic.config.main(argv=alembicArgs) + alembic_cfg = Config(config_path) + + # Explicitly set the migration directory + scripts_location = os.path.normpath( + str(Path(__file__).parent.resolve()) + os.sep + "migrations" + ) + alembic_cfg.set_main_option("script_location", scripts_location) + + command.upgrade(alembic_cfg, "head") diff --git a/src/phoenix/db/migrations/env.py b/src/phoenix/db/migrations/env.py index ad1f2e3887..3b929bdcd8 100644 --- a/src/phoenix/db/migrations/env.py +++ b/src/phoenix/db/migrations/env.py @@ -2,6 +2,7 @@ from alembic import context from phoenix.config import get_working_dir +from phoenix.db.models import Base from sqlalchemy import create_engine # this is the Alembic Config object, which provides @@ -15,9 +16,8 @@ # add your model's MetaData object here # for 'autogenerate' support -# from myapp import mymodel -# target_metadata = mymodel.Base.metadata -target_metadata = None + +target_metadata = Base.metadata # other values from the config, defined by the needs of env.py, # can be acquired: @@ -59,6 +59,7 @@ def run_migrations_online() -> None: and associate a connection with the context. """ + print(config) connectable = create_engine(url) with connectable.connect() as connection: diff --git a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py index 26163ae0c6..cf3a7740d7 100644 --- a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +++ b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py @@ -36,8 +36,11 @@ def upgrade() -> None: "traces", sa.Column("id", sa.Integer, primary_key=True), sa.Column("project_rowid", sa.Integer, sa.ForeignKey("projects.id"), nullable=False), + # TODO(mikeldking): might not be the right place for this sa.Column("session_id", sa.String, nullable=True), - sa.Column("trace_id", sa.String, nullable=False), + sa.Column("trace_id", sa.String, nullable=False, unique=True), + sa.Column("start_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("end_time", sa.DateTime(timezone=True), nullable=False), ) op.create_table( @@ -52,8 +55,17 @@ def upgrade() -> None: sa.Column("end_time", sa.DateTime(timezone=True), nullable=False), sa.Column("attributes", sa.JSON, nullable=False), sa.Column("events", sa.JSON, nullable=False), - sa.Column("status", sa.String, nullable=False), - sa.Column("latency_ms", sa.Float, nullable=False), + sa.Column( + "status", + sa.String, + # TODO(mikeldking): this doesn't seem to work... + sa.CheckConstraint("status IN ('OK', 'ERROR', 'UNSET')"), + nullable=False, + default="UNSET", + server_default="UNSET", + ), + sa.Column("status_message", sa.String, nullable=False), + sa.Column("latency_ms", sa.REAL, nullable=False), sa.Column("cumulative_error_count", sa.Integer, nullable=False), sa.Column("cumulative_llm_token_count_prompt", sa.Integer, nullable=False), sa.Column("cumulative_llm_token_count_completion", sa.Integer, nullable=False), diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py index b5add55a0a..ae78c49e15 100644 --- a/src/phoenix/server/main.py +++ b/src/phoenix/server/main.py @@ -22,7 +22,6 @@ from phoenix.core.traces import Traces from phoenix.datasets.dataset import EMPTY_DATASET, Dataset from phoenix.datasets.fixtures import FIXTURES, get_datasets -from phoenix.db import migrate from phoenix.db.database import SqliteDatabase from phoenix.pointcloud.umap_parameters import ( DEFAULT_MIN_DIST, @@ -198,8 +197,6 @@ def _load_items( ) working_dir = get_working_dir() db = SqliteDatabase(working_dir / "phoenix.db") - # Run migrations - migrate() traces = Traces(db) if span_store := get_span_store(): From 8c10e4677ad244a75a8e266631584e6076133e14 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 00:49:31 -0600 Subject: [PATCH 06/11] WIP' --- src/phoenix/db/alembic.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index f584199f26..07950f4c0a 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -2,7 +2,7 @@ [alembic] # path to migration scripts -# Note this is overridden in migrations/env.py for programmatic use +# Note this is overridden in .migrate during programatic migrations script_location = migrations # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s From ca5a23d904a771210e5e586f94a0a0d765f8e690 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 02:15:46 -0600 Subject: [PATCH 07/11] make it work --- src/phoenix/db/alembic.ini | 6 +- src/phoenix/db/database.py | 60 ++++--------------- src/phoenix/db/migrate.py | 8 ++- src/phoenix/db/migrations/env.py | 15 +++-- .../migrations/versions/cf03bd6bae1d_init.py | 6 +- src/phoenix/db/models.py | 7 ++- 6 files changed, 37 insertions(+), 65 deletions(-) diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index 07950f4c0a..27aff4ddf9 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -94,17 +94,17 @@ keys = console keys = generic [logger_root] -level = DEBUG +level = WARN handlers = console qualname = [logger_sqlalchemy] -level = DEBUG +level = WARN handlers = qualname = sqlalchemy.engine [logger_alembic] -level = INFO +level = DEBUG handlers = qualname = alembic diff --git a/src/phoenix/db/database.py b/src/phoenix/db/database.py index 49291090af..86d9f761dc 100644 --- a/src/phoenix/db/database.py +++ b/src/phoenix/db/database.py @@ -8,10 +8,10 @@ import numpy as np from openinference.semconv.trace import SpanAttributes -from sqlalchemy import Engine, create_engine, event +from sqlalchemy import Engine, create_engine, event, insert from sqlalchemy.orm import sessionmaker -from phoenix.db.models import Base, Trace +from phoenix.db.models import Base, Project, Trace from phoenix.trace.schemas import ( ComputedValues, Span, @@ -31,50 +31,6 @@ PRAGMA busy_timeout = 10000; """ -_INIT_DB = """ -BEGIN; -CREATE TABLE projects ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL UNIQUE, - description TEXT, - created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP -); -INSERT INTO projects(name) VALUES('default'); -CREATE TABLE traces ( - id INTEGER PRIMARY KEY, - trace_id TEXT UNIQUE NOT NULL, - project_rowid INTEGER NOT NULL, - session_id TEXT, - start_time TIMESTAMP NOT NULL, - end_time TIMESTAMP NOT NULL, - FOREIGN KEY(project_rowid) REFERENCES projects(id) -); -CREATE INDEX idx_trace_start_time ON traces(start_time); -CREATE TABLE spans ( - id INTEGER PRIMARY KEY, - span_id TEXT UNIQUE NOT NULL, - trace_rowid INTEGER NOT NULL, - parent_span_id TEXT, - kind TEXT NOT NULL, - name TEXT NOT NULL, - start_time TIMESTAMP NOT NULL, - end_time TIMESTAMP NOT NULL, - attributes JSON, - events JSON, - status TEXT CHECK(status IN ('UNSET','OK','ERROR')) NOT NULL DEFAULT('UNSET'), - status_message TEXT, - latency_ms REAL, - cumulative_error_count INTEGER NOT NULL DEFAULT 0, - cumulative_llm_token_count_prompt INTEGER NOT NULL DEFAULT 0, - cumulative_llm_token_count_completion INTEGER NOT NULL DEFAULT 0, - FOREIGN KEY(trace_rowid) REFERENCES traces(id) -); -CREATE INDEX idx_parent_span_id ON spans(parent_span_id); -PRAGMA user_version = 1; -COMMIT; -""" - _MEM_DB_STR = "file::memory:?cache=shared" @@ -108,22 +64,26 @@ def __init__(self, db_path: Optional[Path] = None) -> None: cur = self.con.cursor() cur.executescript(_CONFIG) + db_url = f"sqlite:///{db_path}" if db_path else "sqlite:///:memory:" engine = ( - create_engine(f"sqlite:///{db_path}", echo=True) + create_engine(db_url, echo=True) if db_path else create_engine( - "sqlite:///:memory:", + db_url, echo=True, creator=_mem_db_creator, ) ) # TODO this should be moved out - # Migrate the database if a path is provided if db_path: - migrate() + migrate(db_url) else: Base.metadata.create_all(engine) + # Create the default project + with engine.connect() as conn: + conn.execute(insert(Project).values(name="default", description="default project")) + conn.commit() self.Session = sessionmaker(bind=engine) diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py index d9a25fc10d..4a1bf52450 100644 --- a/src/phoenix/db/migrate.py +++ b/src/phoenix/db/migrate.py @@ -8,10 +8,15 @@ logger = logging.getLogger(__name__) -def migrate() -> None: +def migrate(url: str) -> None: """ Runs migrations on the database. + NB: Migrate only works on non-memory databases. + + Args: + url: The database URL. """ + logger.warning("Running migrations on the database") config_path = os.path.normpath(str(Path(__file__).parent.resolve()) + os.sep + "alembic.ini") alembic_cfg = Config(config_path) @@ -20,5 +25,6 @@ def migrate() -> None: str(Path(__file__).parent.resolve()) + os.sep + "migrations" ) alembic_cfg.set_main_option("script_location", scripts_location) + alembic_cfg.set_main_option("sqlalchemy.url", url) command.upgrade(alembic_cfg, "head") diff --git a/src/phoenix/db/migrations/env.py b/src/phoenix/db/migrations/env.py index 3b929bdcd8..6e3c4d0d3c 100644 --- a/src/phoenix/db/migrations/env.py +++ b/src/phoenix/db/migrations/env.py @@ -1,9 +1,8 @@ from logging.config import fileConfig from alembic import context -from phoenix.config import get_working_dir from phoenix.db.models import Base -from sqlalchemy import create_engine +from sqlalchemy import engine_from_config, pool # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -24,10 +23,6 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. -# Since the working directory is dynamic, we need to get it from the config -working_dir = get_working_dir() -url = f"sqlite:///{working_dir}/phoenix.db" - def run_migrations_offline() -> None: """Run migrations in 'offline' mode. @@ -41,6 +36,7 @@ def run_migrations_offline() -> None: script output. """ + url = config.get_main_option("sqlalchemy.url") context.configure( url=url, target_metadata=target_metadata, @@ -59,8 +55,11 @@ def run_migrations_online() -> None: and associate a connection with the context. """ - print(config) - connectable = create_engine(url) + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) with connectable.connect() as connection: context.configure(connection=connection, target_metadata=target_metadata) diff --git a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py index cf3a7740d7..181d791306 100644 --- a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +++ b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py @@ -29,7 +29,11 @@ def upgrade() -> None: "created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() ), sa.Column( - "updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.func.now(), + onupdate=sa.func.now(), ), ) op.create_table( diff --git a/src/phoenix/db/models.py b/src/phoenix/db/models.py index 44b98f3b08..e457406782 100644 --- a/src/phoenix/db/models.py +++ b/src/phoenix/db/models.py @@ -6,6 +6,7 @@ DateTime, ForeignKey, UniqueConstraint, + func, ) from sqlalchemy.orm import ( DeclarativeBase, @@ -28,8 +29,10 @@ class Project(Base): id: Mapped[int] = mapped_column(primary_key=True) name: Mapped[str] description: Mapped[Optional[str]] - updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) - created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) traces: WriteOnlyMapped["Trace"] = relationship( "Trace", From fdf2507e1935f940b9d549bc2672d84f280b2f15 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 09:25:24 -0600 Subject: [PATCH 08/11] add contraints naming conventions --- src/phoenix/db/alembic.ini | 4 ++-- .../migrations/versions/cf03bd6bae1d_init.py | 18 +++++++-------- src/phoenix/db/models.py | 23 +++++++++++++++---- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index 27aff4ddf9..c8b09b0273 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -94,12 +94,12 @@ keys = console keys = generic [logger_root] -level = WARN +level = DEBUG handlers = console qualname = [logger_sqlalchemy] -level = WARN +level = DEBUG handlers = qualname = sqlalchemy.engine diff --git a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py index 181d791306..217bfaf2f6 100644 --- a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +++ b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py @@ -22,15 +22,13 @@ def upgrade() -> None: projects_table = op.create_table( "projects", sa.Column("id", sa.Integer, primary_key=True), - sa.Column("name", sa.String, nullable=False), + # TODO does the uniqueness constraint need to be named + sa.Column("name", sa.String, nullable=False, unique=True), sa.Column("description", sa.String, nullable=True), - # TODO(mikeldking): is timezone=True necessary? - sa.Column( - "created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() - ), + sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()), sa.Column( "updated_at", - sa.DateTime(timezone=True), + sa.DateTime(), nullable=False, server_default=sa.func.now(), onupdate=sa.func.now(), @@ -43,15 +41,15 @@ def upgrade() -> None: # TODO(mikeldking): might not be the right place for this sa.Column("session_id", sa.String, nullable=True), sa.Column("trace_id", sa.String, nullable=False, unique=True), - sa.Column("start_time", sa.DateTime(timezone=True), nullable=False), - sa.Column("end_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("start_time", sa.DateTime(), nullable=False), + sa.Column("end_time", sa.DateTime(), nullable=False), ) op.create_table( "spans", sa.Column("id", sa.Integer, primary_key=True), sa.Column("trace_rowid", sa.Integer, sa.ForeignKey("traces.id"), nullable=False), - sa.Column("span_id", sa.String, nullable=False), + sa.Column("span_id", sa.String, nullable=False, unique=True), sa.Column("parent_span_id", sa.String, nullable=True), sa.Column("name", sa.String, nullable=False), sa.Column("kind", sa.String, nullable=False), @@ -63,7 +61,7 @@ def upgrade() -> None: "status", sa.String, # TODO(mikeldking): this doesn't seem to work... - sa.CheckConstraint("status IN ('OK', 'ERROR', 'UNSET')"), + sa.CheckConstraint("status IN ('OK', 'ERROR', 'UNSET')", "valid_status"), nullable=False, default="UNSET", server_default="UNSET", diff --git a/src/phoenix/db/models.py b/src/phoenix/db/models.py index e457406782..8971b7d940 100644 --- a/src/phoenix/db/models.py +++ b/src/phoenix/db/models.py @@ -3,8 +3,10 @@ from sqlalchemy import ( JSON, + CheckConstraint, DateTime, ForeignKey, + MetaData, UniqueConstraint, func, ) @@ -18,6 +20,17 @@ class Base(DeclarativeBase): + # Enforce best practices for naming constraints + # https://alembic.sqlalchemy.org/en/latest/naming.html#integration-of-naming-conventions-into-operations-autogenerate + metadata = MetaData( + naming_convention={ + "ix": "ix_%(column_0_label)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_`%(constraint_name)s`", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", + } + ) type_annotation_map = { Dict[str, Any]: JSON, List[Dict[str, Any]]: JSON, @@ -42,7 +55,7 @@ class Project(Base): __table_args__ = ( UniqueConstraint( "name", - name="project_name_unique", + name="uq_projects_name", sqlite_on_conflict="IGNORE", ), ) @@ -70,7 +83,7 @@ class Trace(Base): __table_args__ = ( UniqueConstraint( "trace_id", - name="trace_id_unique", + name="uq_traces_trace_id", sqlite_on_conflict="IGNORE", ), ) @@ -88,7 +101,9 @@ class Span(Base): end_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) attributes: Mapped[Dict[str, Any]] events: Mapped[List[Dict[str, Any]]] - status: Mapped[str] + status: Mapped[str] = mapped_column( + CheckConstraint("status IN ('OK', 'ERROR', 'UNSET')", "valid_status") + ) status_message: Mapped[str] # TODO(mikeldking): is computed columns possible here @@ -102,7 +117,7 @@ class Span(Base): __table_args__ = ( UniqueConstraint( "span_id", - name="trace_id_unique", + name="uq_spans_trace_id", sqlite_on_conflict="IGNORE", ), ) From 27a87e8d00b84145f69bcdb6e590f11429750b58 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 09:26:39 -0600 Subject: [PATCH 09/11] Update src/phoenix/db/migrate.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/db/migrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py index 4a1bf52450..8746cfb694 100644 --- a/src/phoenix/db/migrate.py +++ b/src/phoenix/db/migrate.py @@ -17,7 +17,7 @@ def migrate(url: str) -> None: url: The database URL. """ logger.warning("Running migrations on the database") - config_path = os.path.normpath(str(Path(__file__).parent.resolve()) + os.sep + "alembic.ini") + config_path = str(Path(__file__).parent.resolve() / "alembic.ini") alembic_cfg = Config(config_path) # Explicitly set the migration directory From 5f2630240cddfcf783aa476863dbb34dd972b5a7 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 09:26:44 -0600 Subject: [PATCH 10/11] Update src/phoenix/db/migrate.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> --- src/phoenix/db/migrate.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py index 8746cfb694..7e1f942ee4 100644 --- a/src/phoenix/db/migrate.py +++ b/src/phoenix/db/migrate.py @@ -21,9 +21,7 @@ def migrate(url: str) -> None: alembic_cfg = Config(config_path) # Explicitly set the migration directory - scripts_location = os.path.normpath( - str(Path(__file__).parent.resolve()) + os.sep + "migrations" - ) + scripts_location = str(Path(__file__).parent.resolve()) / "migrations") alembic_cfg.set_main_option("script_location", scripts_location) alembic_cfg.set_main_option("sqlalchemy.url", url) From a4d4c8827250d04074f1b42885ab813455a4df96 Mon Sep 17 00:00:00 2001 From: Mikyo King Date: Thu, 4 Apr 2024 09:46:29 -0600 Subject: [PATCH 11/11] restore log level --- src/phoenix/db/alembic.ini | 6 +++--- src/phoenix/db/database.py | 2 +- src/phoenix/db/migrate.py | 3 +-- .../db/migrations/versions/cf03bd6bae1d_init.py | 14 ++++---------- src/phoenix/db/models.py | 11 +++++------ 5 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/phoenix/db/alembic.ini b/src/phoenix/db/alembic.ini index c8b09b0273..402ff9e576 100644 --- a/src/phoenix/db/alembic.ini +++ b/src/phoenix/db/alembic.ini @@ -94,17 +94,17 @@ keys = console keys = generic [logger_root] -level = DEBUG +level = WARN handlers = console qualname = [logger_sqlalchemy] -level = DEBUG +level = WARN handlers = qualname = sqlalchemy.engine [logger_alembic] -level = DEBUG +level = WARN handlers = qualname = alembic diff --git a/src/phoenix/db/database.py b/src/phoenix/db/database.py index 86d9f761dc..be23975d6e 100644 --- a/src/phoenix/db/database.py +++ b/src/phoenix/db/database.py @@ -80,7 +80,7 @@ def __init__(self, db_path: Optional[Path] = None) -> None: migrate(db_url) else: Base.metadata.create_all(engine) - # Create the default project + # Create the default project and setup indexes with engine.connect() as conn: conn.execute(insert(Project).values(name="default", description="default project")) conn.commit() diff --git a/src/phoenix/db/migrate.py b/src/phoenix/db/migrate.py index 7e1f942ee4..35d640300a 100644 --- a/src/phoenix/db/migrate.py +++ b/src/phoenix/db/migrate.py @@ -1,5 +1,4 @@ import logging -import os from pathlib import Path from alembic import command @@ -21,7 +20,7 @@ def migrate(url: str) -> None: alembic_cfg = Config(config_path) # Explicitly set the migration directory - scripts_location = str(Path(__file__).parent.resolve()) / "migrations") + scripts_location = str(Path(__file__).parent.resolve() / "migrations") alembic_cfg.set_main_option("script_location", scripts_location) alembic_cfg.set_main_option("sqlalchemy.url", url) diff --git a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py index 217bfaf2f6..e0a997531c 100644 --- a/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +++ b/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py @@ -41,7 +41,7 @@ def upgrade() -> None: # TODO(mikeldking): might not be the right place for this sa.Column("session_id", sa.String, nullable=True), sa.Column("trace_id", sa.String, nullable=False, unique=True), - sa.Column("start_time", sa.DateTime(), nullable=False), + sa.Column("start_time", sa.DateTime(), nullable=False, index=True), sa.Column("end_time", sa.DateTime(), nullable=False), ) @@ -50,11 +50,11 @@ def upgrade() -> None: sa.Column("id", sa.Integer, primary_key=True), sa.Column("trace_rowid", sa.Integer, sa.ForeignKey("traces.id"), nullable=False), sa.Column("span_id", sa.String, nullable=False, unique=True), - sa.Column("parent_span_id", sa.String, nullable=True), + sa.Column("parent_span_id", sa.String, nullable=True, index=True), sa.Column("name", sa.String, nullable=False), sa.Column("kind", sa.String, nullable=False), - sa.Column("start_time", sa.DateTime(timezone=True), nullable=False), - sa.Column("end_time", sa.DateTime(timezone=True), nullable=False), + sa.Column("start_time", sa.DateTime(), nullable=False), + sa.Column("end_time", sa.DateTime(), nullable=False), sa.Column("attributes", sa.JSON, nullable=False), sa.Column("events", sa.JSON, nullable=False), sa.Column( @@ -72,9 +72,6 @@ def upgrade() -> None: sa.Column("cumulative_llm_token_count_prompt", sa.Integer, nullable=False), sa.Column("cumulative_llm_token_count_completion", sa.Integer, nullable=False), ) - - op.create_index("idx_trace_start_time", "traces", ["start_time"]) - op.create_index("idx_parent_span_id", "spans", ["parent_span_id"]) op.bulk_insert( projects_table, [ @@ -84,9 +81,6 @@ def upgrade() -> None: def downgrade() -> None: - op.drop_index("idx_trace_start_time") - op.drop_index("idx_parent_span_id", "spans") - op.drop_table("projects") op.drop_table("traces") op.drop_table("spans") diff --git a/src/phoenix/db/models.py b/src/phoenix/db/models.py index 8971b7d940..b52428f1f8 100644 --- a/src/phoenix/db/models.py +++ b/src/phoenix/db/models.py @@ -67,9 +67,8 @@ class Trace(Base): project_rowid: Mapped[int] = mapped_column(ForeignKey("projects.id")) session_id: Mapped[Optional[str]] trace_id: Mapped[str] - # TODO(mikeldking): why is the start and end time necessary? just filtering? - start_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) - end_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + start_time: Mapped[datetime] = mapped_column(DateTime(), index=True) + end_time: Mapped[datetime] = mapped_column(DateTime()) project: Mapped["Project"] = relationship( "Project", @@ -94,11 +93,11 @@ class Span(Base): id: Mapped[int] = mapped_column(primary_key=True) trace_rowid: Mapped[int] = mapped_column(ForeignKey("traces.id")) span_id: Mapped[str] - parent_span_id: Mapped[Optional[str]] + parent_span_id: Mapped[Optional[str]] = mapped_column(index=True) name: Mapped[str] kind: Mapped[str] - start_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) - end_time: Mapped[datetime] = mapped_column(DateTime(timezone=True)) + start_time: Mapped[datetime] = mapped_column(DateTime()) + end_time: Mapped[datetime] = mapped_column(DateTime()) attributes: Mapped[Dict[str, Any]] events: Mapped[List[Dict[str, Any]]] status: Mapped[str] = mapped_column(