-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/move fixtures to migrations #73
Changes from all commits
1ce7654
35a028e
1550f42
1726b2f
ffc1108
fde1f41
61692d8
814d150
5a72a22
b4f102b
fe2f66b
d861c16
56d90cd
99c8277
c13739a
2af13e5
d09cf62
0501fd7
7b1a830
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,23 @@ | ||
from macrostrat.database import Database | ||
|
||
from .._legacy import get_db | ||
from .._legacy import get_db, refresh_db | ||
from rich import print | ||
from .base import Migration | ||
from .base import Migration, ApplicationStatus | ||
from typing import ClassVar | ||
from pathlib import Path | ||
from .partition_maps import PartitionMapsMigration | ||
from .partition_carto import PartitionCartoMigration | ||
from .update_macrostrat import MacrostratCoreMigration | ||
|
||
from graphlib import TopologicalSorter | ||
from . import ( | ||
baseline, macrostrat_mariadb, partition_carto, partition_maps, update_macrostrat, map_source_slugs, map_sources, | ||
column_builder, api_v3, points, maps_source_operations | ||
) | ||
__dir__ = Path(__file__).parent | ||
|
||
|
||
class StorageSchemeMigration(Migration): | ||
name = "storage-scheme" | ||
|
||
depends_on = ['api-v3'] | ||
|
||
def apply(self, db: Database): | ||
db.run_sql( | ||
""" | ||
|
@@ -37,7 +40,10 @@ def apply(self, db: Database): | |
) | ||
|
||
def should_apply(self, db: Database): | ||
return has_enum(db, "schemeenum", schema="macrostrat") | ||
if has_enum(db, "schemeenum", schema="macrostrat"): | ||
return ApplicationStatus.CAN_APPLY | ||
else: | ||
return ApplicationStatus.APPLIED | ||
|
||
|
||
def has_enum(db: Database, name: str, schema: str = None): | ||
|
@@ -52,7 +58,7 @@ def has_enum(db: Database, name: str, schema: str = None): | |
).scalar() | ||
|
||
|
||
def run_migrations(apply: bool = False, name: str = None, force: bool = False): | ||
def run_migrations(apply: bool = False, name: str = None, force: bool = False, data_changes: bool = False): | ||
"""Apply database migrations""" | ||
db = get_db() | ||
|
||
|
@@ -61,24 +67,55 @@ def run_migrations(apply: bool = False, name: str = None, force: bool = False): | |
if force and not name: | ||
raise ValueError("--force can only be applied with --name") | ||
|
||
migrations: list[ClassVar[Migration]] = [ | ||
PartitionMapsMigration, | ||
PartitionCartoMigration, | ||
StorageSchemeMigration, | ||
MacrostratCoreMigration, | ||
] | ||
# Find all subclasses of Migration among imported modules | ||
migrations = Migration.__subclasses__() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am into this but maybe it's a bit too implicit? Should there be a
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the |
||
|
||
# Instantiate each migration, then sort topologically according to dependency order | ||
instances = [cls() for cls in migrations] | ||
graph = {inst.name: inst.depends_on for inst in instances} | ||
order = list(TopologicalSorter(graph).static_order()) | ||
instances.sort(key=lambda i: order.index(i.name)) | ||
|
||
# While iterating over migrations, keep track of which have already applied | ||
completed_migrations = [] | ||
|
||
for cls in migrations: | ||
# Initialize migration | ||
_migration = cls() | ||
for _migration in instances: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One pattern we could eventually add here, which I have successfully used in the past, is to have a "dry run" migration automatically run first, based on a schema-only clone of the database. |
||
_name = _migration.name | ||
|
||
# Check whether the migration is capable of applying, or has already applied | ||
apply_status = _migration.should_apply(db) | ||
if apply_status == ApplicationStatus.APPLIED: | ||
completed_migrations.append(_migration.name) | ||
|
||
# If --name is specified, only run the migration with the matching name | ||
if name is not None and name != _name: | ||
continue | ||
|
||
# By default, don't run migrations that depend on other non-applied migrations | ||
dependencies_met = all(d in completed_migrations for d in _migration.depends_on) | ||
if not dependencies_met and not force: | ||
print(f"Dependencies not met for migration [cyan]{_name}[/cyan]") | ||
continue | ||
|
||
if _migration.should_apply(db) or force: | ||
if force or apply_status == ApplicationStatus.CAN_APPLY: | ||
if not apply: | ||
print(f"Would apply migration [cyan]{_name}[/cyan]") | ||
else: | ||
if _migration.destructive and not data_changes and not force: | ||
print(f"Migration [cyan]{_name}[/cyan] would alter data in the database. Run with --force or --data-changes") | ||
return | ||
|
||
print(f"Applying migration [cyan]{_name}[/cyan]") | ||
_migration.apply(db) | ||
# After running migration, reload the database and confirm that application was sucessful | ||
db = refresh_db() | ||
if _migration.should_apply(db) == ApplicationStatus.APPLIED: | ||
completed_migrations.append(_migration.name) | ||
elif apply_status == ApplicationStatus.APPLIED: | ||
print(f"Migration [cyan]{_name}[/cyan] already applied") | ||
else: | ||
print(f"Migration [cyan]{_name}[/cyan] not required") | ||
print(f"Migration [cyan]{_name}[/cyan] cannot apply") | ||
|
||
# Short circuit after applying the migration specified by --name | ||
if name is not None and name == _name: | ||
break |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from ..base import Migration, exists | ||
|
||
class BaselineMigration(Migration): | ||
name = "api-v3" | ||
subsystem = "core" | ||
description = """ | ||
Apply the schema changes from https://github.com/UW-Macrostrat/api-v3 to the database | ||
""" | ||
|
||
depends_on = ['map-source-slug'] | ||
|
||
# Confirm that the tables created by the API v3 migrations are present | ||
postconditions = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Postcondition checks are a fantastic addition. Looks like you went for higher-order functions simply to enhance composability, yeah? |
||
exists("storage","object_group","object"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might want to use a pattern like |
||
exists("maps_metadata","ingest_process","ingest_process_tag"), | ||
exists("macrostrat_auth","user","group"), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,88 @@ | ||
from macrostrat.database import Database | ||
from pathlib import Path | ||
import inspect | ||
from typing import Callable | ||
from enum import Enum | ||
|
||
""" Higher-order functions that return a function that evaluates whether a condition is met on the database """ | ||
DbEvaluator = Callable[[Database], bool] | ||
|
||
|
||
def exists(schema: str, *table_names: str) -> DbEvaluator: | ||
""" Return a function that evaluates to true when every given table in the given schema exists """ | ||
return lambda db: all(db.inspector.has_table(t, schema=schema) for t in table_names) | ||
|
||
def not_exists(schema: str, *table_names: str) -> DbEvaluator: | ||
""" Return a function that evaluates to true when every given table in the given schema doesn't exist """ | ||
return lambda db: all(not db.inspector.has_table(t, schema=schema) for t in table_names) | ||
|
||
def schema_exists(schema: str) -> DbEvaluator: | ||
""" Return a function that evaluates to true when the given schema exists """ | ||
return lambda db: db.inspector.has_schema(schema) | ||
|
||
def view_exists(schema: str, *view_names: str) -> DbEvaluator: | ||
""" Return a function that evaluates to true when every given view in the given schema exists """ | ||
return lambda db: all(v in db.inspector.get_view_names(schema) for v in view_names) | ||
|
||
def has_fks(schema: str, *table_names: str) -> DbEvaluator: | ||
""" Return a function that evaluates to true when every given table in the given schema has at least one foreign key """ | ||
return lambda db: all( | ||
db.inspector.has_table(t, schema=schema) and | ||
len(db.inspector.get_foreign_keys(t, schema=schema)) for t in table_names) | ||
|
||
class ApplicationStatus(Enum): | ||
""" Enum for the possible """ | ||
|
||
# The preconditions for this migration aren't met, so it can't be applied | ||
CANT_APPLY = "cant_apply" | ||
|
||
# The preconditions for this migration are met but the postconditions aren't met, so it can be applied | ||
CAN_APPLY = "can_apply" | ||
|
||
# The postconditions for this migration are met, so it doesn't need to be applied | ||
APPLIED = "applied" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would there be any value in having a state Put another way, is there value to having the difference between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the current implementation of checking for whether a migration is applied is sophisticated enough to handle that use case, since it just checks the current state of tables in the database and assigns one of the 3 labels based on that state. We might be able to get at this by looking further down the dependency graph, eg. checking whether any dependents of a |
||
|
||
class Migration: | ||
"""This will eventually be merged with the migration system in macrostrat.dinosaur""" | ||
""" Class defining a set of SQL changes to be applied to the database, as well as checks for | ||
whether the migration can be applied to the current state of the database | ||
""" | ||
|
||
# Unique name for the migration | ||
name: str | ||
|
||
# Short description for the migration | ||
description: str | ||
|
||
# Portion of the database to which this migration applies | ||
subsystem: str | ||
|
||
def should_apply(self, database: Database): | ||
raise NotImplementedError | ||
# List of migration names that must | ||
depends_on: list[str] = [] | ||
|
||
# List of checks on the database that must all evaluate to true before the migration can be run | ||
preconditions: list[DbEvaluator] = [] | ||
|
||
def apply(self, database: Database): | ||
raise NotImplementedError | ||
# List of checks on the database that should all evaluate to true after the migration has run successfully | ||
postconditions: list[DbEvaluator] = [] | ||
|
||
# Flag for whether running this migration will cause data changes in the database in addition to | ||
# schema changes | ||
destructive: bool = False | ||
|
||
def is_satisfied(self, database: Database): | ||
"""In some cases, we may want to note that a migration does not need to be run | ||
(e.g. if the database is already in the correct state) without actually running it. | ||
""" | ||
return not self.should_apply(database) | ||
def should_apply(self, database: Database) -> ApplicationStatus: | ||
""" Determine whether this migration can run, or has already run. """ | ||
# If all post-conditions are met, the migration is already applied | ||
if all([cond(database) for cond in self.postconditions]): | ||
return ApplicationStatus.APPLIED | ||
# Else if all pre-conditions are met, the migration can be applied | ||
elif all([cond(database) for cond in self.preconditions]): | ||
return ApplicationStatus.CAN_APPLY | ||
# Else, can't apply | ||
else: | ||
return ApplicationStatus.CANT_APPLY | ||
|
||
def apply(self, database: Database): | ||
""" Apply the migrations defined by this class. By default, run every sql file | ||
in the same directory as the class definition. """ | ||
child_cls_dir = Path(inspect.getfile(self.__class__)).parent | ||
database.run_fixtures(child_cls_dir) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should add this method to the
macrostrat.database
module.