Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(persistence): migrations support #2748

Merged
merged 11 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"respx",
"rgba",
"seafoam",
"sqlalchemy",
"templating",
"tensorboard",
"testset",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ dependencies = [
"openinference-instrumentation-llama-index>=1.2.0",
"openinference-instrumentation-openai>=0.1.4",
"sqlalchemy>=2, <3",
"alembic>=1.3.0, <2",
]
dynamic = ["version"]

Expand Down
3 changes: 3 additions & 0 deletions src/phoenix/db/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .migrate import migrate

__all__ = ["migrate"]
119 changes: 119 additions & 0 deletions src/phoenix/db/alembic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
# Note this is overridden in .migrate during programatic migrations
script_location = migrations

# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to migrations/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.

# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# NB: This is commented out intentionally as it is dynamic
# See migrations/env.py
# sqlalchemy.url = driver://user:pass@localhost/dbname


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME

# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = DEBUG
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
69 changes: 18 additions & 51 deletions src/phoenix/db/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import numpy as np
from openinference.semconv.trace import SpanAttributes
from sqlalchemy import Engine, create_engine, event
from sqlalchemy import Engine, create_engine, event, insert
from sqlalchemy.orm import sessionmaker

from phoenix.db.models import Base, Trace
from phoenix.db.models import Base, Project, Trace
from phoenix.trace.schemas import (
ComputedValues,
Span,
Expand All @@ -21,6 +21,8 @@
SpanStatusCode,
)

from .migrate import migrate

_CONFIG = """
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
Expand All @@ -29,50 +31,6 @@
PRAGMA busy_timeout = 10000;
"""

_INIT_DB = """
BEGIN;
CREATE TABLE projects (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
description TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO projects(name) VALUES('default');
CREATE TABLE traces (
id INTEGER PRIMARY KEY,
trace_id TEXT UNIQUE NOT NULL,
project_rowid INTEGER NOT NULL,
session_id TEXT,
start_time TIMESTAMP NOT NULL,
end_time TIMESTAMP NOT NULL,
FOREIGN KEY(project_rowid) REFERENCES projects(id)
);
CREATE INDEX idx_trace_start_time ON traces(start_time);
CREATE TABLE spans (
id INTEGER PRIMARY KEY,
span_id TEXT UNIQUE NOT NULL,
trace_rowid INTEGER NOT NULL,
parent_span_id TEXT,
kind TEXT NOT NULL,
name TEXT NOT NULL,
start_time TIMESTAMP NOT NULL,
end_time TIMESTAMP NOT NULL,
attributes JSON,
events JSON,
status TEXT CHECK(status IN ('UNSET','OK','ERROR')) NOT NULL DEFAULT('UNSET'),
status_message TEXT,
latency_ms REAL,
cumulative_error_count INTEGER NOT NULL DEFAULT 0,
cumulative_llm_token_count_prompt INTEGER NOT NULL DEFAULT 0,
cumulative_llm_token_count_completion INTEGER NOT NULL DEFAULT 0,
FOREIGN KEY(trace_rowid) REFERENCES traces(id)
);
CREATE INDEX idx_parent_span_id ON spans(parent_span_id);
PRAGMA user_version = 1;
COMMIT;
"""


_MEM_DB_STR = "file::memory:?cache=shared"

Expand Down Expand Up @@ -105,19 +63,28 @@ def __init__(self, db_path: Optional[Path] = None) -> None:
# self.con.set_trace_callback(print)
cur = self.con.cursor()
cur.executescript(_CONFIG)
if int(cur.execute("PRAGMA user_version;").fetchone()[0]) < 1:
cur.executescript(_INIT_DB)

db_url = f"sqlite:///{db_path}" if db_path else "sqlite:///:memory:"
engine = (
create_engine(f"sqlite:///{db_path}", echo=True)
create_engine(db_url, echo=True)
if db_path
else create_engine(
"sqlite:///:memory:",
db_url,
echo=True,
creator=_mem_db_creator,
)
)
Base.metadata.create_all(engine)

# TODO this should be moved out
if db_path:
migrate(db_url)
else:
Base.metadata.create_all(engine)
# Create the default project
with engine.connect() as conn:
conn.execute(insert(Project).values(name="default", description="default project"))
conn.commit()

self.Session = sessionmaker(bind=engine)

def insert_span(self, span: Span, project_name: str) -> None:
Expand Down
30 changes: 30 additions & 0 deletions src/phoenix/db/migrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging
import os
from pathlib import Path

from alembic import command
from alembic.config import Config

logger = logging.getLogger(__name__)


def migrate(url: str) -> None:
"""
Runs migrations on the database.
NB: Migrate only works on non-memory databases.

Args:
url: The database URL.
"""
logger.warning("Running migrations on the database")
config_path = os.path.normpath(str(Path(__file__).parent.resolve()) + os.sep + "alembic.ini")
mikeldking marked this conversation as resolved.
Show resolved Hide resolved
alembic_cfg = Config(config_path)

# Explicitly set the migration directory
scripts_location = os.path.normpath(
str(Path(__file__).parent.resolve()) + os.sep + "migrations"
)
mikeldking marked this conversation as resolved.
Show resolved Hide resolved
alembic_cfg.set_main_option("script_location", scripts_location)
alembic_cfg.set_main_option("sqlalchemy.url", url)

command.upgrade(alembic_cfg, "head")
1 change: 1 addition & 0 deletions src/phoenix/db/migrations/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Generic single-database configuration.
74 changes: 74 additions & 0 deletions src/phoenix/db/migrations/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from logging.config import fileConfig

from alembic import context
from phoenix.db.models import Base
from sqlalchemy import engine_from_config, pool

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)

# add your model's MetaData object here
# for 'autogenerate' support

target_metadata = Base.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.

This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.

Calls to context.execute() here emit the given string to the
script output.

"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online() -> None:
"""Run migrations in 'online' mode.

In this scenario we need to create an Engine
and associate a connection with the context.

"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)

with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)

with context.begin_transaction():
context.run_migrations()


if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
26 changes: 26 additions & 0 deletions src/phoenix/db/migrations/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}


def upgrade() -> None:
${upgrades if upgrades else "pass"}


def downgrade() -> None:
${downgrades if downgrades else "pass"}
Loading
Loading