UW-Macrostrat · davenquinn · Jul 29, 2024 · Jul 23, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/cli/macrostrat/cli/database/_legacy.py b/cli/macrostrat/cli/database/_legacy.py
@@ -49,3 +49,13 @@ def get_db():
     if db is None:
         db = Database(PG_DATABASE)
     return db
+
+def refresh_db():
+    from macrostrat.database import Database, scoped_session
+
+    global db
+    if db is not None:
+        db.session.flush()
+        db.session.close()
+    db = Database(PG_DATABASE)
+    return db
diff --git a/cli/macrostrat/cli/database/migrations/__init__.py b/cli/macrostrat/cli/database/migrations/__init__.py
@@ -1,20 +1,23 @@
 from macrostrat.database import Database
 
-from .._legacy import get_db
+from .._legacy import get_db, refresh_db
 from rich import print
-from .base import Migration
+from .base import Migration, ApplicationStatus
 from typing import ClassVar
 from pathlib import Path
-from .partition_maps import PartitionMapsMigration
-from .partition_carto import PartitionCartoMigration
-from .update_macrostrat import MacrostratCoreMigration
-
+from graphlib import TopologicalSorter
+from . import (
+    baseline, macrostrat_mariadb, partition_carto, partition_maps, update_macrostrat, map_source_slugs, map_sources, 
+    column_builder, api_v3, points, maps_source_operations
+)
 __dir__ = Path(__file__).parent
 
 
 class StorageSchemeMigration(Migration):
     name = "storage-scheme"
 
+    depends_on = ['api-v3']
+
     def apply(self, db: Database):
         db.run_sql(
             """
@@ -37,7 +40,10 @@ def apply(self, db: Database):
         )
 
     def should_apply(self, db: Database):
-        return has_enum(db, "schemeenum", schema="macrostrat")
+        if has_enum(db, "schemeenum", schema="macrostrat"):
+            return ApplicationStatus.CAN_APPLY
+        else:
+            return ApplicationStatus.APPLIED
 
 
 def has_enum(db: Database, name: str, schema: str = None):
@@ -52,7 +58,7 @@ def has_enum(db: Database, name: str, schema: str = None):
     ).scalar()
 
 
-def run_migrations(apply: bool = False, name: str = None, force: bool = False):
+def run_migrations(apply: bool = False, name: str = None, force: bool = False, data_changes: bool = False):
     """Apply database migrations"""
     db = get_db()
 
@@ -61,24 +67,55 @@ def run_migrations(apply: bool = False, name: str = None, force: bool = False):
     if force and not name:
         raise ValueError("--force can only be applied with --name")
 
-    migrations: list[ClassVar[Migration]] = [
-        PartitionMapsMigration,
-        PartitionCartoMigration,
-        StorageSchemeMigration,
-        MacrostratCoreMigration,
-    ]
+    # Find all subclasses of Migration among imported modules
+    migrations = Migration.__subclasses__() 
+
+    # Instantiate each migration, then sort topologically according to dependency order
+    instances = [cls() for cls in migrations]
+    graph = {inst.name: inst.depends_on for inst in instances}
+    order = list(TopologicalSorter(graph).static_order())
+    instances.sort(key=lambda i: order.index(i.name))
+
+    # While iterating over migrations, keep track of which have already applied
+    completed_migrations = []
 
-    for cls in migrations:
-        # Initialize migration
-        _migration = cls()
+    for _migration in instances:
         _name = _migration.name
+
+        # Check whether the migration is capable of applying, or has already applied
+        apply_status = _migration.should_apply(db)
+        if apply_status == ApplicationStatus.APPLIED:
+            completed_migrations.append(_migration.name)
+
+        # If --name is specified, only run the migration with the matching name
         if name is not None and name != _name:
             continue
+
+        # By default, don't run migrations that depend on other non-applied migrations
+        dependencies_met = all(d in completed_migrations for d in _migration.depends_on)
+        if not dependencies_met and not force:
+            print(f"Dependencies not met for migration [cyan]{_name}[/cyan]")
+            continue
 
-        if _migration.should_apply(db) or force:
+        if force or apply_status == ApplicationStatus.CAN_APPLY:
             if not apply:
                 print(f"Would apply migration [cyan]{_name}[/cyan]")
             else:
+                if _migration.destructive and not data_changes and not force:
+                    print(f"Migration [cyan]{_name}[/cyan] would alter data in the database. Run with --force or --data-changes")
+                    return
+
+                print(f"Applying migration [cyan]{_name}[/cyan]")
                 _migration.apply(db)
+                # After running migration, reload the database and confirm that application was sucessful
+                db = refresh_db()
+                if _migration.should_apply(db) == ApplicationStatus.APPLIED:
+                    completed_migrations.append(_migration.name)
+        elif apply_status == ApplicationStatus.APPLIED:
+            print(f"Migration [cyan]{_name}[/cyan] already applied")
         else:
-            print(f"Migration [cyan]{_name}[/cyan] not required")
+            print(f"Migration [cyan]{_name}[/cyan] cannot apply")
+
+        # Short circuit after applying the migration specified by --name
+        if name is not None and name == _name:
+            break
diff --git a/cli/macrostrat/cli/fixtures/05-users.sql → ...i/database/migrations/api_v3/05-users.sql b/cli/macrostrat/cli/fixtures/05-users.sql → ...i/database/migrations/api_v3/05-users.sql
diff --git a/cli/macrostrat/cli/fixtures/08-storage.sql → ...database/migrations/api_v3/08-storage.sql b/cli/macrostrat/cli/fixtures/08-storage.sql → ...database/migrations/api_v3/08-storage.sql
diff --git a/...ostrat/cli/fixtures/09-ingest-process.sql → ...e/migrations/api_v3/09-ingest-process.sql b/...ostrat/cli/fixtures/09-ingest-process.sql → ...e/migrations/api_v3/09-ingest-process.sql
@@ -20,7 +20,7 @@ CREATE TABLE maps_metadata.ingest_process
     map_id            text
 );
 
-ALTER TABLE ingest_process
+ALTER TABLE maps_metadata.ingest_process
     owner to macrostrat;
 
 CREATE TABLE maps_metadata.ingest_process_tag (

diff --git a/cli/macrostrat/cli/fixtures/10-auth.sql → ...li/database/migrations/api_v3/10-auth.sql b/cli/macrostrat/cli/fixtures/10-auth.sql → ...li/database/migrations/api_v3/10-auth.sql
diff --git a/cli/macrostrat/cli/database/migrations/api_v3/__init__.py b/cli/macrostrat/cli/database/migrations/api_v3/__init__.py
@@ -0,0 +1,17 @@
+from ..base import Migration, exists
+
+class BaselineMigration(Migration):
+    name = "api-v3"
+    subsystem = "core"
+    description = """
+    Apply the schema changes from https://github.com/UW-Macrostrat/api-v3 to the database
+    """
+
+    depends_on = ['map-source-slug']
+
+    # Confirm that the tables created by the API v3 migrations are present
+    postconditions = [
+        exists("storage","object_group","object"),
+        exists("maps_metadata","ingest_process","ingest_process_tag"),
+        exists("macrostrat_auth","user","group"),
+    ]
diff --git a/cli/macrostrat/cli/database/migrations/base.py b/cli/macrostrat/cli/database/migrations/base.py
@@ -1,21 +1,88 @@
 from macrostrat.database import Database
+from pathlib import Path
+import inspect
+from typing import Callable
+from enum import Enum
 
+""" Higher-order functions that return a function that evaluates whether a condition is met on the database """
+DbEvaluator = Callable[[Database], bool]
+
+
+def exists(schema: str, *table_names: str) -> DbEvaluator:
+    """ Return a function that evaluates to true when every given table in the given schema exists """
+    return lambda db: all(db.inspector.has_table(t, schema=schema) for t in table_names)
+
+def not_exists(schema: str, *table_names: str) -> DbEvaluator:
+    """ Return a function that evaluates to true when every given table in the given schema doesn't exist """
+    return lambda db: all(not db.inspector.has_table(t, schema=schema) for t in table_names)
+
+def schema_exists(schema: str) -> DbEvaluator:
+    """ Return a function that evaluates to true when the given schema exists """
+    return lambda db: db.inspector.has_schema(schema)
+
+def view_exists(schema: str, *view_names: str) -> DbEvaluator:
+    """ Return a function that evaluates to true when every given view in the given schema exists """
+    return lambda db: all(v in db.inspector.get_view_names(schema) for v in view_names)
+
+def has_fks(schema: str, *table_names: str) -> DbEvaluator:
+    """ Return a function that evaluates to true when every given table in the given schema has at least one foreign key """
+    return lambda db: all(
+        db.inspector.has_table(t, schema=schema) and 
+        len(db.inspector.get_foreign_keys(t, schema=schema)) for t in table_names)
+
+class ApplicationStatus(Enum):
+    """ Enum for the possible """
+
+    # The preconditions for this migration aren't met, so it can't be applied
+    CANT_APPLY = "cant_apply"
+
+    # The preconditions for this migration are met but the postconditions aren't met, so it can be applied
+    CAN_APPLY = "can_apply"
+
+    # The postconditions for this migration are met, so it doesn't need to be applied
+    APPLIED = "applied"
 
 class Migration:
-    """This will eventually be merged with the migration system in macrostrat.dinosaur"""
+    """ Class defining a set of SQL changes to be applied to the database, as well as checks for 
+    whether the migration can be applied to the current state of the database
+    """
 
+    # Unique name for the migration
     name: str
+
+    # Short description for the migration
     description: str
+
+    # Portion of the database to which this migration applies
     subsystem: str
 
-    def should_apply(self, database: Database):
-        raise NotImplementedError
+    # List of migration names that must 
+    depends_on: list[str] = []
+
+    # List of checks on the database that must all evaluate to true before the migration can be run
+    preconditions: list[DbEvaluator] = []
 
-    def apply(self, database: Database):
-        raise NotImplementedError
+    # List of checks on the database that should all evaluate to true after the migration has run successfully
+    postconditions: list[DbEvaluator] = []
+
+    # Flag for whether running this migration will cause data changes in the database in addition to
+    # schema changes
+    destructive: bool = False
 
-    def is_satisfied(self, database: Database):
-        """In some cases, we may want to note that a migration does not need to be run
-        (e.g. if the database is already in the correct state) without actually running it.
-        """
-        return not self.should_apply(database)
+    def should_apply(self, database: Database) -> ApplicationStatus:
+        """ Determine whether this migration can run, or has already run.  """
+        # If all post-conditions are met, the migration is already applied
+        if all([cond(database) for cond in self.postconditions]):
+           return ApplicationStatus.APPLIED
+        # Else if all pre-conditions are met, the migration can be applied
+        elif all([cond(database) for cond in self.preconditions]):
+           return ApplicationStatus.CAN_APPLY
+        # Else, can't apply
+        else:
+            return ApplicationStatus.CANT_APPLY
+
+    def apply(self, database: Database):
+        """ Apply the migrations defined by this class. By default, run every sql file 
+        in the same directory as the class definition. """
+        child_cls_dir = Path(inspect.getfile(self.__class__)).parent
+        database.run_fixtures(child_cls_dir)