From d4b5ad8cdbd1905dd775b9d71c4f81386801d9d1 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 13:13:18 -0600 Subject: [PATCH 1/9] Disable some files --- .../migrations/update_macrostrat/{ => __disabled}/00-types.sql | 0 .../update_macrostrat/{ => __disabled}/02-remove-unused.sql | 0 .../update_macrostrat/{ => __disabled}/03-primary-key-seq.sql | 0 .../update_macrostrat/{ => __disabled}/04-notes-columns.sql | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename cli/macrostrat/cli/database/migrations/update_macrostrat/{ => __disabled}/00-types.sql (100%) rename cli/macrostrat/cli/database/migrations/update_macrostrat/{ => __disabled}/02-remove-unused.sql (100%) rename cli/macrostrat/cli/database/migrations/update_macrostrat/{ => __disabled}/03-primary-key-seq.sql (100%) rename cli/macrostrat/cli/database/migrations/update_macrostrat/{ => __disabled}/04-notes-columns.sql (100%) diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/00-types.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/00-types.sql similarity index 100% rename from cli/macrostrat/cli/database/migrations/update_macrostrat/00-types.sql rename to cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/00-types.sql diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/02-remove-unused.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/02-remove-unused.sql similarity index 100% rename from cli/macrostrat/cli/database/migrations/update_macrostrat/02-remove-unused.sql rename to cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/02-remove-unused.sql diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/03-primary-key-seq.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/03-primary-key-seq.sql similarity index 100% rename from cli/macrostrat/cli/database/migrations/update_macrostrat/03-primary-key-seq.sql rename to cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/03-primary-key-seq.sql diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/04-notes-columns.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/04-notes-columns.sql similarity index 100% rename from cli/macrostrat/cli/database/migrations/update_macrostrat/04-notes-columns.sql rename to cli/macrostrat/cli/database/migrations/update_macrostrat/__disabled/04-notes-columns.sql From 43aabcbb66390a2a39e293f92934a9b9a93b7917 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 14:04:29 -0600 Subject: [PATCH 2/9] Remove some NOT NULL constraints in order to set fks to NULL --- .../update_macrostrat/01-add-foreign-keys.sql | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql index 8dd356d8..984cdbbc 100644 --- a/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql +++ b/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql @@ -30,8 +30,7 @@ ALTER TABLE macrostrat.col_refs ADD CONSTRAINT col_refs_ref_fk FOREIGN KEY (ref_id) REFERENCES macrostrat.refs(id) ON DELETE CASCADE; /* I had to make the id the primary key of intervals first before adding the foreign key */ -ALTER TABLE macrostrat.intervals - ADD PRIMARY KEY (id); +ALTER TABLE macrostrat.intervals ADD PRIMARY KEY (id); /* no issues col_notes were not perserved, in mariaDB this is a separte table. @@ -59,6 +58,8 @@ ALTER TABLE macrostrat.unit_econs /* 23769 rows updated to have null ref_ids instead of 0... */ +-- pg_loader apparently adds NOT NULL constraints in some cases +ALTER TABLE macrostrat.unit_environs ALTER COLUMN ref_id DROP NOT NULL; UPDATE macrostrat.unit_environs SET ref_id = NULL WHERE ref_id = 0; @@ -77,7 +78,8 @@ ALTER TABLE macrostrat.unit_liths ADD CONSTRAINT unit_liths_units_fk FOREIGN KEY (unit_id) REFERENCES macrostrat.units(id) ON DELETE CASCADE; /* no issues */ -ALTER TABLE macrostrat.unit_lith_atts +-- now, we have renamed the table unit_lith_atts -> unit_liths_atts +ALTER TABLE macrostrat.unit_liths_atts ADD CONSTRAINT unit_liths_atts_unit_liths_fk FOREIGN KEY (unit_lith_id) REFERENCES macrostrat.unit_liths(id) ON DELETE CASCADE, ADD CONSTRAINT unit_liths_atts_lith_atts_fk FOREIGN KEY (lith_att_id) REFERENCES macrostrat.lith_atts(id) ON DELETE CASCADE; @@ -108,11 +110,15 @@ ALTER TABLE macrostrat.unit_strat_names ADD CONSTRAINT unit_strat_names_units_fk FOREIGN KEY (unit_id) REFERENCES macrostrat.units(id) ON DELETE CASCADE, ADD CONSTRAINT unit_strat_names_strat_names_fk FOREIGN KEY (strat_name_id) REFERENCES macrostrat.strat_names(id) ON DELETE CASCADE; +-- Remove the NOT NULL constraint on concept_id +ALTER TABLE macrostrat.strat_names ALTER COLUMN concept_id DROP NOT NULL; UPDATE macrostrat.strat_names SET concept_id = NULL WHERE concept_id = 0; /* BREAKS!!! there is a non-null constraint on ref_id.. but 0 means none so. */ +-- Remove the NOT NULL constraint on ref_id +ALTER TABLE macrostrat.strat_names ALTER COLUMN ref_id DROP NOT NULL; UPDATE macrostrat.strat_names SET ref_id = NULL WHERE ref_id = 0; @@ -123,6 +129,13 @@ WHERE sn.concept_id NOT IN (SELECT concept_id FROM macrostrat.strat_names_meta); ALTER TABLE macrostrat.strat_names ADD CONSTRAINT strat_names_strat_names_meta_fk FOREIGN KEY (concept_id) REFERENCES macrostrat.strat_names_meta(concept_id) ON DELETE CASCADE; +-- There are 6,000+ strat_name_meta entries with interval_id = 0 + +ALTER TABLE macrostrat.strat_names_meta ALTER COLUMN interval_id DROP NOT NULL; +UPDATE macrostrat.strat_names_meta + SET interval_id = NULL + WHERE interval_id = 0; + ALTER TABLE macrostrat.strat_names_meta ADD CONSTRAINT strat_names_meta_intervals_fk FOREIGN KEY(interval_id) REFERENCES macrostrat.intervals(id) ON DELETE CASCADE, ADD CONSTRAINT strat_names_meta_refs_fk FOREIGN KEY(ref_id) REFERENCES macrostrat.refs(id) ON DELETE CASCADE; @@ -141,20 +154,25 @@ ALTER TABLE macrostrat.strat_names_places DELETE FROM macrostrat.timescales_intervals WHERE interval_id NOT IN (SELECT id from macrostrat.intervals); -/* I had to make the id the primary key of intervals first before adding the foreign key */ -ALTER TABLE macrostrat.intervals - ADD PRIMARY KEY (id); - ALTER TABLE macrostrat.timescales_intervals ADD CONSTRAINT timescales_intervals_timescales_fk FOREIGN KEY (timescale_id) REFERENCES macrostrat.timescales(id) ON DELETE CASCADE, ADD CONSTRAINT timescales_intervals_intervals_fk FOREIGN KEY (interval_id) REFERENCES macrostrat.intervals(id) ON DELETE CASCADE; /* 2 rows deleted for a col_id of 0, one was a `test_delete_me` -the other was Lane Shale, unit_id 42143 -*/ +the other was Lane Shale, unit_id 42143 */ DELETE FROM macrostrat.units - WHERE col_id NOT IN (SELECT id FROM macrostrat.cols); + WHERE col_id NOT IN (SELECT id FROM macrostrat.cols) + -- Ensure that a maximum of two units are deleted, just for sanity. + AND ((SELECT count(*) FROM macrostrat.units WHERE col_id = 0) <= 2); + +/* Some units are not present in an existing section... */ + +-- 17 units exist with section_id = 0 +-- 941 units exist with a section_id that is not a valid section + +SELECT count(*) FROM macrostrat.units +WHERE section_id NOT IN (select id from macrostrat.sections); UPDATE macrostrat.units set section_id = NULL @@ -198,6 +216,7 @@ SET project_id = c.project_id FROM macrostrat.cols c WHERE c.col_group_id = cg.id; + -- -- /* unit_boundaries table, needs a unit_id and ref_id fk -- lots of 0's in the unit_id row... not sure why From 785749fb92a180d6627c7bf88c1d0c9c7acd2224 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 15:58:47 -0600 Subject: [PATCH 3/9] Deal with a few units that have missing section information, or inconsistent links to section information --- .../update_macrostrat/01-add-foreign-keys.sql | 78 +++++++++++++++++-- .../scripts/find-data-variances.sql | 12 +++ 2 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql index 984cdbbc..2ca64760 100644 --- a/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql +++ b/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql @@ -166,17 +166,79 @@ DELETE FROM macrostrat.units -- Ensure that a maximum of two units are deleted, just for sanity. AND ((SELECT count(*) FROM macrostrat.units WHERE col_id = 0) <= 2); -/* Some units are not present in an existing section... */ --- 17 units exist with section_id = 0 --- 941 units exist with a section_id that is not a valid section +-- Create fk constraints on units_sections table +-- Delete bad units_sections record +DELETE FROM macrostrat.units_sections WHERE unit_id NOT IN (SELECT id FROM macrostrat.units); -SELECT count(*) FROM macrostrat.units -WHERE section_id NOT IN (select id from macrostrat.sections); +ALTER TABLE macrostrat.units_sections + ADD CONSTRAINT units_sections_units_fk FOREIGN KEY (unit_id) REFERENCES macrostrat.units(id) ON DELETE CASCADE, + ADD CONSTRAINT units_sections_sections_fk FOREIGN KEY (section_id) REFERENCES macrostrat.sections(id) ON DELETE CASCADE; + + +/* Some units are not tied to the correct sections... */ +/** New addition 2024-11-06 + We want to keep the NOT NULL constraint on section_id (at least for now) + so we have to reconstruct sections in a few cases. + +- 17 units exist with section_id = 0 +- 941 units exist with a section_id that is not a valid section, + however all but 5 of those (all from New Zealand) + have a valid section_id in the units_sections table. + + SELECT * FROM macrostrat.units + WHERE section_id NOT IN (select id from macrostrat.sections) + AND section_id NOT IN (SELECT id FROM macrostrat.sections); +*/ + +/** Add sections to contain a few stray units from New Zealand */ +WITH units AS ( + SELECT id, col_id, fo, lo, fo_h, lo_h + FROM macrostrat.units + WHERE id NOT IN (SELECT unit_id FROM macrostrat.units_sections) + AND section_id NOT IN (SELECT id FROM macrostrat.sections) + LIMIT 5 -- there should be only 5 units that match this criteria +), new_sections AS ( + INSERT INTO macrostrat.sections (col_id, fo, lo, fo_h, lo_h) + SELECT + col_id, fo, lo, fo_h, lo_h + FROM units + GROUP BY col_id, fo, lo, fo_h, lo_h + RETURNING col_id, id section_id +), new_units_sections AS ( + INSERT INTO macrostrat.units_sections (unit_id, section_id, col_id) + SELECT u.id, ns.section_id, ns.col_id + FROM units u + JOIN new_sections ns + ON u.col_id = ns.col_id + RETURNING unit_id, section_id, col_id +) + -- Update legacy unit.section_id relationships to mirror the macrostrat.units_sections links +UPDATE macrostrat.units u +SET section_id = us.section_id +FROM new_units_sections us +WHERE u.id = us.unit_id + AND u.col_id = us.col_id; + + +/** Only a few units that are totally unlinked to sections + SELECT * FROM macrostrat.units + WHERE id NOT IN (SELECT unit_id FROM macrostrat.units_sections) + AND section_id NOT IN (SELECT id FROM macrostrat.sections); + + -- fo, lo, fo_h and lo_h seem to match in all cases + */ + + +/* Reconstruct section_id field from units_sections table. + TODO: the col_id and section_id fields in the "units_columns" table + are the 'master' version of the link, and the others are around for + legacy purposes. +*/ +UPDATE macrostrat.units u +SET section_id = (SELECT unit_id FROM macrostrat.units_sections WHERE unit_id = u.id) +WHERE section_id not in (select id from macrostrat.sections); -UPDATE macrostrat.units -set section_id = NULL -where section_id not in (select id from macrostrat.sections); ALTER TABLE macrostrat.units ADD CONSTRAINT units_cols_fk FOREIGN KEY (col_id) REFERENCES macrostrat.cols(id) ON DELETE CASCADE, diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql new file mode 100644 index 00000000..e034e27d --- /dev/null +++ b/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql @@ -0,0 +1,12 @@ +/** Useful queries to assess database structure for the Macrostrat schema */ + +/** Find units that are replicated elsewhere... */ +SELECT * FROM macrostrat.units +WHERE id NOT IN (SELECT unit_id FROM macrostrat.units_sections) + AND section_id NOT IN (SELECT id FROM macrostrat.sections); + +/** Find units_sections entries correlated to units direct links to sections */ +SELECT u.id, u.section_id, us.section_id, u.col_id, us.col_id +FROM macrostrat.units u +JOIN macrostrat.units_sections us + ON u.id = us.unit_id; From 7a3c77a21e732e9a7fe7918fdcc616e1b404a401 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 16:19:25 -0600 Subject: [PATCH 4/9] Successfully ran all migration commands --- .../update_macrostrat/01-add-foreign-keys.sql | 22 +++++++++--- .../scripts/assess-data-variances.sql | 36 +++++++++++++++++++ .../scripts/find-data-variances.sql | 12 ------- 3 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/assess-data-variances.sql delete mode 100644 cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql index 2ca64760..0e520d04 100644 --- a/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql +++ b/cli/macrostrat/cli/database/migrations/update_macrostrat/01-add-foreign-keys.sql @@ -221,6 +221,16 @@ WHERE u.id = us.unit_id AND u.col_id = us.col_id; +/** Update legacy section_id field for cases where it references a non-existent section. +TODO: we may want to delete this legacy field if it isn't needed. +*/ +UPDATE macrostrat.units u +SET section_id = us.section_id +FROM macrostrat.units_sections us +WHERE u.id = us.unit_id + AND u.col_id = us.col_id + AND u.section_id NOT IN (SELECT id FROM macrostrat.sections); + /** Only a few units that are totally unlinked to sections SELECT * FROM macrostrat.units WHERE id NOT IN (SELECT unit_id FROM macrostrat.units_sections) @@ -236,15 +246,17 @@ WHERE u.id = us.unit_id legacy purposes. */ UPDATE macrostrat.units u -SET section_id = (SELECT unit_id FROM macrostrat.units_sections WHERE unit_id = u.id) -WHERE section_id not in (select id from macrostrat.sections); - +SET section_id = us.section_id, + col_id = us.col_id +FROM macrostrat.units_sections us +WHERE u.id = us.unit_id + AND u.section_id NOT IN (SELECT id FROM macrostrat.sections); ALTER TABLE macrostrat.units ADD CONSTRAINT units_cols_fk FOREIGN KEY (col_id) REFERENCES macrostrat.cols(id) ON DELETE CASCADE, ADD CONSTRAINT units_sections_fk FOREIGN KEY (section_id) REFERENCES macrostrat.sections(id) ON DELETE CASCADE, - ADD CONSTRAINT units_intervals_fo_fk FOREIGN KEY (fo) REFERENCES macrostrat.intervals(id) ON DELETE CASCADE, - ADD CONSTRAINT units_intervals_lo_fk FOREIGN KEY (lo) REFERENCES macrostrat.intervals(id) ON DELETE CASCADE; + ADD CONSTRAINT units_intervals_fo_fk FOREIGN KEY (fo) REFERENCES macrostrat.intervals(id) ON DELETE RESTRICT, + ADD CONSTRAINT units_intervals_lo_fk FOREIGN KEY (lo) REFERENCES macrostrat.intervals(id) ON DELETE RESTRICT; ALTER TABLE macrostrat.sections ADD CONSTRAINT sections_cols_fk FOREIGN KEY (col_id) REFERENCES macrostrat.cols(id) ON DELETE CASCADE; diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/assess-data-variances.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/assess-data-variances.sql new file mode 100644 index 00000000..a371ce13 --- /dev/null +++ b/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/assess-data-variances.sql @@ -0,0 +1,36 @@ +/** Useful queries to assess database structure for the Macrostrat schema */ + +/** Find units that are replicated elsewhere... */ +SELECT * FROM macrostrat.units +WHERE id NOT IN (SELECT unit_id FROM macrostrat.units_sections) + AND section_id NOT IN (SELECT id FROM macrostrat.sections); + +/** Find units in multiple sections */ +SELECT * FROM macrostrat.units +WHERE id IN (SELECT unit_id FROM macrostrat.units_sections + GROUP BY unit_id + HAVING COUNT(unit_id) > 1); + + + +/** Find units_sections entries correlated to units direct links to sections */ +SELECT u.id, u.section_id, us.section_id, u.col_id, us.col_id +FROM macrostrat.units u +JOIN macrostrat.units_sections us + ON u.id = us.unit_id +WHERE u.section_id != us.section_id + OR u.col_id != us.col_id; + +/** 258 units are in more than one section/column + TODO: we should decide if this will be allowed (i.e., do we put a unique constraint in units_sections?) + */ + +SELECT unit_id, array_agg(col_id), array_agg(section_id) FROM macrostrat.units_sections +GROUP BY unit_id +HAVING COUNT(unit_id) > 1; + +-- Find units that reference an invalid section +SELECT * FROM macrostrat.units +WHERE section_id NOT IN (SELECT id FROM macrostrat.sections); + + diff --git a/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql b/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql deleted file mode 100644 index e034e27d..00000000 --- a/cli/macrostrat/cli/database/migrations/update_macrostrat/scripts/find-data-variances.sql +++ /dev/null @@ -1,12 +0,0 @@ -/** Useful queries to assess database structure for the Macrostrat schema */ - -/** Find units that are replicated elsewhere... */ -SELECT * FROM macrostrat.units -WHERE id NOT IN (SELECT unit_id FROM macrostrat.units_sections) - AND section_id NOT IN (SELECT id FROM macrostrat.sections); - -/** Find units_sections entries correlated to units direct links to sections */ -SELECT u.id, u.section_id, us.section_id, u.col_id, us.col_id -FROM macrostrat.units u -JOIN macrostrat.units_sections us - ON u.id = us.unit_id; From 9f9bba86be2b253b6eaecdd9653b62565475055e Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 16:40:47 -0600 Subject: [PATCH 5/9] Updated column builder schema --- .../column_builder/00-view-drops.sql | 32 ++++++++ .../schema/01-column-builder-views.sql | 82 +++++++++---------- 2 files changed, 73 insertions(+), 41 deletions(-) create mode 100644 cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql diff --git a/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql b/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql new file mode 100644 index 00000000..6de8229f --- /dev/null +++ b/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql @@ -0,0 +1,32 @@ +/* +Drop all views so they can be recreated. +TODO: automate this process. +*/ +DROP VIEW IF EXISTS macrostrat_api.projects; +DROP VIEW IF EXISTS macrostrat_api.cols; +DROP VIEW IF EXISTS macrostrat_api.col_groups; +DROP VIEW IF EXISTS macrostrat_api.environs; +DROP VIEW IF EXISTS macrostrat_api.liths; +DROP VIEW IF EXISTS macrostrat_api.intervals; +DROP VIEW IF EXISTS macrostrat_api.timescales; +DROP VIEW IF EXISTS macrostrat_api.strat_tree; +DROP VIEW IF EXISTS macrostrat_api.refs; +DROP VIEW IF EXISTS macrostrat_api.units; +DROP VIEW IF EXISTS macrostrat_api.col_refs; +DROP VIEW IF EXISTS macrostrat_api.unit_environs; +DROP VIEW IF EXISTS macrostrat_api.unit_liths; +DROP VIEW IF EXISTS macrostrat_api.sections; +DROP VIEW IF EXISTS macrostrat_api.strat_names; +DROP VIEW IF EXISTS macrostrat_api.unit_strat_names; +DROP VIEW IF EXISTS macrostrat_api.units_strat_names; +DROP VIEW IF EXISTS macrostrat_api.strat_names_ref; +DROP VIEW IF EXISTS macrostrat_api.col_group_with_cols; +DROP VIEW IF EXISTS macrostrat_api.environ_unit; +DROP VIEW IF EXISTS macrostrat_api.econ_unit; +DROP VIEW IF EXISTS macrostrat_api.lith_attr_unit; +DROP VIEW IF EXISTS macrostrat_api.lith_unit; +DROP VIEW IF EXISTS macrostrat_api.unit_strat_name_expanded; +DROP VIEW IF EXISTS macrostrat_api.col_sections; +DROP VIEW IF EXISTS macrostrat_api.col_ref_expanded; +DROP VIEW IF EXISTS macrostrat_api.strat_names_meta; +DROP VIEW IF EXISTS macrostrat_api.unit_boundaries; diff --git a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql index 001d7f10..216f14f6 100644 --- a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql +++ b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql @@ -1,9 +1,9 @@ -/* +/* Macrostrat's postgrest api is expressed through the macrostrat_api schema. -Any functions, views or tables in the macrostrat_api schema can be accessed +Any functions, views or tables in the macrostrat_api schema can be accessed through the postgrest api. -Below are a multitude of views that are made from the macrostrat data schema. +Below are a multitude of views that are made from the macrostrat data schema. Many are direct copies, however some are more customized data views for the frontend. */ @@ -58,20 +58,20 @@ CREATE OR REPLACE VIEW macrostrat_api.unit_strat_names AS SELECT * FROM macrostrat.unit_strat_names; CREATE OR REPLACE VIEW macrostrat_api.strat_names_ref AS -SELECT -s.id, -s.strat_name, -s.rank, -row_to_json(r.*) ref, -row_to_json(sm.*) concept +SELECT +s.id, +s.strat_name, +s.rank, +row_to_json(r.*) ref, +row_to_json(sm.*) concept FROM macrostrat.strat_names s LEFT JOIN macrostrat.refs r ON r.id = s.ref_id LEFT JOIN macrostrat.strat_names_meta sm -ON sm.concept_id = s.concept_id; +ON sm.concept_id = s.concept_id; CREATE OR REPLACE VIEW macrostrat_api.col_group_with_cols AS -SELECT +SELECT cg.id, cg.col_group, cg.col_group_long, @@ -79,9 +79,9 @@ SELECT COALESCE(jsonb_agg( jsonb_build_object( 'col_id', c.id, - 'status_code', c.status_code, - 'col_number', c.col, - 'col_name', c.col_name)) + 'status_code', c.status_code, + 'col_number', c.col, + 'col_name', c.col_name)) FILTER (WHERE c.id IS NOT NULL), '[]') AS cols FROM macrostrat.col_groups cg @@ -100,15 +100,15 @@ JOIN macrostrat.unit_econs ue ON e.id = ue.econ_id; CREATE OR REPLACE VIEW macrostrat_api.lith_attr_unit AS -SELECT -la.id as lith_attr_id, -la.lith_att, +SELECT +la.id as lith_attr_id, +la.lith_att, la.att_type, -la.lith_att_fill, -l.*, -ul.unit_id +la.lith_att_fill, +l.*, +ul.unit_id from macrostrat.lith_atts la -JOIN macrostrat.unit_lith_atts ula +JOIN macrostrat.unit_liths_atts ula ON ula.lith_att_id = la.id JOIN macrostrat.unit_liths ul ON ul.id = ula.unit_lith_id @@ -116,26 +116,26 @@ JOIN macrostrat.liths l ON ul.lith_id = l.id; CREATE OR REPLACE VIEW macrostrat_api.lith_unit AS -SELECT +SELECT l.id, -l.lith, -l.lith_group, +l.lith, +l.lith_group, l.lith_type, -l.lith_class, +l.lith_class, l.lith_color, ul.dom, ul.prop, ul.mod_prop, -ul.comp_prop, +ul.comp_prop, ul.ref_id, -ul.unit_id +ul.unit_id from macrostrat.liths l JOIN macrostrat.unit_liths ul ON ul.lith_id = l.id; /*LO is top and FO is bottom*/ CREATE OR REPLACE VIEW macrostrat_api.unit_strat_name_expanded AS -SELECT +SELECT usn.id, usn.unit_id, usn.strat_name_id, @@ -150,7 +150,7 @@ u.max_thick, u.min_thick, u.section_id, u.col_id, -u.notes, +'' AS notes, -- TODO: notes are no longer part of the units table, need to remove this fo.interval_name AS name_fo, fo.age_bottom, lo.interval_name AS name_lo, @@ -164,7 +164,7 @@ LEFT JOIN macrostrat.intervals fo ON u.fo = fo.id LEFT JOIN macrostrat.intervals lo ON u.lo = lo.id; CREATE OR REPLACE VIEW macrostrat_api.col_sections AS -SELECT c.id col_id, c.col_name, u.section_id, u.position_top, u.position_bottom, fo.interval_name bottom, +SELECT c.id col_id, c.col_name, u.section_id, u.position_top, u.position_bottom, fo.interval_name bottom, lo.interval_name top FROM macrostrat.cols c LEFT JOIN macrostrat.units u ON u.col_id = c.id @@ -174,19 +174,19 @@ LEFT JOIN macrostrat.intervals lo ON u.lo = lo.id; CREATE OR REPLACE VIEW macrostrat_api.col_ref_expanded AS -SELECT -c.id col_id, -c.col_name, +SELECT +c.id col_id, +c.col_name, c.col col_number, -c.notes, +'' AS notes, -- TODO: notes are no longer part of the cols table, need to remove this c.lat, c.lng, -json_build_object( -'id', r.id, -'pub_year', r.pub_year, -'author', r.author, -'ref', r.ref, -'doi',r.doi, +json_build_object( +'id', r.id, +'pub_year', r.pub_year, +'author', r.author, +'ref', r.ref, +'doi',r.doi, 'url', r.url) ref FROM macrostrat.cols c LEFT JOIN macrostrat.col_refs cr @@ -198,4 +198,4 @@ CREATE OR REPLACE VIEW macrostrat_api.strat_names_meta AS SELECT * FROM macrostrat.strat_names_meta; CREATE OR REPLACE VIEW macrostrat_api.unit_boundaries AS -SELECT * FROM macrostrat.unit_boundaries; \ No newline at end of file +SELECT * FROM macrostrat.unit_boundaries; From 348e0ffbc022f152ce42eab95157bdf6e728a107 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 16:42:48 -0600 Subject: [PATCH 6/9] Integrate cache reloading into schema defs --- cli/macrostrat/cli/database/migrations/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cli/macrostrat/cli/database/migrations/__init__.py b/cli/macrostrat/cli/database/migrations/__init__.py index d4a22675..70fe9691 100644 --- a/cli/macrostrat/cli/database/migrations/__init__.py +++ b/cli/macrostrat/cli/database/migrations/__init__.py @@ -140,6 +140,9 @@ def run_migrations( if name is not None and name == _name: break + # Notify PostgREST to reload the schema cache + db.run_sql("NOTIFY pgrst, 'reload schema';") + def migration_has_been_run(*names: str): db = get_db() From cb7aeb01f57c059ad5eba04cb104247cc71b359b Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Wed, 6 Nov 2024 16:58:25 -0600 Subject: [PATCH 7/9] Updated column builder views --- .../macrostrat_api/schema/01-column-builder-views.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql index 216f14f6..9d97c8c0 100644 --- a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql +++ b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/01-column-builder-views.sql @@ -76,6 +76,7 @@ SELECT cg.col_group, cg.col_group_long, cg.project_id, + p.project, COALESCE(jsonb_agg( jsonb_build_object( 'col_id', c.id, @@ -86,7 +87,8 @@ SELECT AS cols FROM macrostrat.col_groups cg LEFT JOIN macrostrat.cols c ON c.col_group_id = cg.id -GROUP BY cg.id, c.project_id; + LEFT JOIN macrostrat.projects p ON p.id = cg.project_id +GROUP BY cg.id, c.project_id, p.project; CREATE OR REPLACE VIEW macrostrat_api.environ_unit AS From f0a9a981b482f5ca1aef07ff07fbcdee2aa067bf Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Thu, 7 Nov 2024 02:37:57 -0600 Subject: [PATCH 8/9] Added a more advanced view for col_sections --- .../column_builder/00-view-drops.sql | 1 + .../schema/02-column-builder-functions.sql | 155 +++++++++++------- 2 files changed, 97 insertions(+), 59 deletions(-) diff --git a/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql b/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql index 6de8229f..21ad195b 100644 --- a/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql +++ b/cli/macrostrat/cli/database/migrations/column_builder/00-view-drops.sql @@ -30,3 +30,4 @@ DROP VIEW IF EXISTS macrostrat_api.col_sections; DROP VIEW IF EXISTS macrostrat_api.col_ref_expanded; DROP VIEW IF EXISTS macrostrat_api.strat_names_meta; DROP VIEW IF EXISTS macrostrat_api.unit_boundaries; +DROP VIEW IF EXISTS macrostrat_api.col_section_data; diff --git a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql index 1e9148b8..5aac42dd 100644 --- a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql +++ b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql @@ -1,31 +1,32 @@ -/* - Creates the view for a col and sections, Not scaleable as a view, but runs quickly +/* + Creates the view for a col and sections, Not scaleable as a view, but runs quickly for each column. Returns the top interval and bottom interval of each section in the column. Top interval is defined as unit.lo where unit.position_bottom is smallest. - Likewise bottom interval is unit.fo where unit.position_bottom is greatest! + Likewise bottom interval is unit.fo where unit.position_bottom is greatest! */ -DROP FUNCTION IF EXISTS macrostrat_api.get_col_section_data(int); -CREATE OR REPLACE FUNCTION macrostrat_api.get_col_section_data(column_id INT) +DROP FUNCTION IF EXISTS macrostrat_api.get_col_section_data(integer); +CREATE OR REPLACE FUNCTION macrostrat_api.get_col_section_data(column_id integer) RETURNS TABLE ( - id INT, - unit_count BIGINT, - top varchar(200), - bottom varchar(200)) + id integer, + unit_count integer, + top varchar(200), + bottom varchar(200) +) AS $$ -BEGIN +BEGIN RETURN QUERY - SELECT - s.id, - COUNT(uc) as unit_count, + SELECT + s.id::integer, + count(uc)::integer as unit_count, lo.interval_name as top, fo.interval_name as bottom - FROM macrostrat.sections s - JOIN macrostrat.units uc + FROM macrostrat.sections s + JOIN macrostrat.units uc ON uc.section_id = s.id - JOIN macrostrat.units u + JOIN macrostrat.units u ON u.section_id = s.id JOIN macrostrat.units un ON un.section_id = s.id @@ -43,6 +44,42 @@ RETURN QUERY END $$ LANGUAGE plpgsql; + +/** New (2024-11-07) create a view that is more efficient and flexible than the above function */ +CREATE OR REPLACE VIEW macrostrat_api.col_section_data AS +WITH a AS (SELECT us.unit_id, + us.section_id, + us.col_id, + u.fo, + fo.age_bottom AS fo_age, + fo.interval_name AS fo_name, + u.lo, + lo.age_top AS lo_age, + lo.interval_name AS lo_name + FROM macrostrat.units_sections us + JOIN macrostrat.units u + ON us.unit_id = u.id + JOIN macrostrat.intervals fo + ON u.fo = fo.id + JOIN macrostrat.intervals lo + ON u.lo = lo.id + ) +SELECT DISTINCT ON (col_id, section_id) + col_id, + section_id, + count(*) OVER w AS unit_count, + first_value(fo) OVER w AS fo, + first_value(lo) OVER w AS lo, + first_value(fo_name) OVER w AS bottom, + first_value(fo_age) OVER w AS fo_age, + first_value(lo_name) OVER w1 AS top, + first_value(lo_age) OVER w1 AS lo_age +FROM a +WINDOW w AS (PARTITION BY col_id, section_id ORDER BY fo_age DESC), + w1 AS (PARTITION BY col_id, section_id ORDER BY lo_age) +ORDER BY col_id, section_id; + + DROP FUNCTION IF EXISTS macrostrat_api.get_units_with_collections(int); CREATE OR REPLACE FUNCTION macrostrat_api.get_units_with_collections(column_id int) RETURNS TABLE( @@ -70,10 +107,10 @@ RETURNS TABLE( $$ BEGIN RETURN QUERY - SELECT - u.*, - COALESCE(jsonb_agg(lu.*) FILTER (WHERE lu.unit_id IS NOT NULL), '[]') as lith_unit, - COALESCE(jsonb_agg(eu.*) FILTER (WHERE eu.unit_id IS NOT NULL),'[]') as environ_unit + SELECT + u.*, + COALESCE(jsonb_agg(lu.*) FILTER (WHERE lu.unit_id IS NOT NULL), '[]') as lith_unit, + COALESCE(jsonb_agg(eu.*) FILTER (WHERE eu.unit_id IS NOT NULL),'[]') as environ_unit FROM macrostrat_api.unit_strat_name_expanded u LEFT JOIN macrostrat_api.lith_unit lu ON lu.unit_id = u.id @@ -85,7 +122,7 @@ RETURN QUERY END $$ LANGUAGE plpgsql; -/* +/* Functions for Combing and Splitting Sections! */ CREATE OR REPLACE FUNCTION macrostrat_api.split_section(unit_ids int[]) @@ -98,13 +135,13 @@ BEGIN SELECT col_id FROM macrostrat.units WHERE id = unit_ids[0] INTO _col_id; INSERT INTO macrostrat.sections(col_id) VALUES (_col_id) RETURNING id INTO _section_id; UPDATE macrostrat.units - SET + SET section_id = _section_id - WHERE id = ANY(unit_ids); + WHERE id = ANY(unit_ids); END $$ language plpgsql; -/* +/* Combine 2 or more sections */ CREATE OR REPLACE FUNCTION macrostrat_api.combine_sections(section_ids int[]) @@ -125,10 +162,10 @@ BEGIN END $$ language plpgsql; -/* +/* sophisticated ways of fetching related strat_names -Returns only strat_name_records where it's connected to a -concept and that concept ref contains the point geom for the +Returns only strat_name_records where it's connected to a +concept and that concept ref contains the point geom for the column. Unions searches for strat_names in column, ones in col-group, and geographically @@ -146,12 +183,12 @@ RETURNS TABLE( ) AS $$ BEGIN - RETURN QUERY + RETURN QUERY WITH a AS( SELECT cc.*, ST_Distance( - ST_Transform(c.coordinate, 3857), + ST_Transform(c.coordinate, 3857), ST_Transform(cc.coordinate, 3857) - ) + ) as distance FROM macrostrat.cols c JOIN macrostrat.cols cc ON c.col_group_id = cc.col_group_id @@ -159,18 +196,18 @@ WHERE c.id = _col_id ), b AS( SELECT c.col_name from macrostrat.cols c WHERE c.id = _col_id ) -SELECT sn.*, r.author, b.col_name::text as source from b,macrostrat_api.units u +SELECT sn.*, r.author, b.col_name::text as source from b,macrostrat_api.units u JOIN macrostrat_api.unit_strat_names usn ON u.id = usn.unit_id JOIN macrostrat_api.strat_names sn ON usn.strat_name_id = sn.id JOIN macrostrat_api.refs r ON r.id = sn.ref_id -WHERE u.col_id = _col_id +WHERE u.col_id = _col_id AND sn.concept_id IS NULL UNION ALL -SELECT DISTINCT ON(sn.id) sn.*, r.author, a.col_name::text as source -FROM a, macrostrat_api.units u +SELECT DISTINCT ON(sn.id) sn.*, r.author, a.col_name::text as source +FROM a, macrostrat_api.units u JOIN macrostrat_api.unit_strat_names usn ON u.id = usn.unit_id JOIN macrostrat_api.strat_names sn @@ -180,27 +217,27 @@ JOIN macrostrat_api.refs r WHERE u.col_id = _col_id AND sn.concept_id IS NULL or u.col_id = a.id AND sn.concept_id IS NULL UNION ALL -SELECT DISTINCT ON(sn.id) sn.*, r.author, 'nearby' as source FROM macrostrat.strat_names sn +SELECT DISTINCT ON(sn.id) sn.*, r.author, 'nearby' as source FROM macrostrat.strat_names sn LEFT JOIN macrostrat.strat_names_meta snm ON sn.concept_id = snm.concept_id LEFT JOIN macrostrat.refs r ON r.id = snm.ref_id WHERE ST_Intersects(r.rgeom, ( - select ST_SetSrid((coordinate)::geometry, 4326) + select ST_SetSrid((coordinate)::geometry, 4326) from macrostrat.cols c where c.id = _col_id ) ) UNION ALL -SELECT DISTINCT ON(sn.id) sn.*, r.author, 'unrelated' as source -FROM macrostrat_api.units u +SELECT DISTINCT ON(sn.id) sn.*, r.author, 'unrelated' as source +FROM macrostrat_api.units u JOIN macrostrat_api.unit_strat_names usn ON u.id = usn.unit_id JOIN macrostrat_api.strat_names sn ON usn.strat_name_id = sn.id JOIN macrostrat_api.refs r ON r.id = sn.ref_id -WHERE u.col_id NOT IN (SELECT a.id FROM a) - AND sn.concept_id IS NULL +WHERE u.col_id NOT IN (SELECT a.id FROM a) + AND sn.concept_id IS NULL AND r.rgeom IS NULL ; END @@ -221,9 +258,9 @@ RETURNS TABLE( $$ BEGIN RETURN QUERY - SELECT - gc.*, - st.strat_name ||' '|| st.rank as parent + SELECT + gc.*, + st.strat_name ||' '|| st.rank as parent FROM macrostrat_api.get_col_strat_names(_col_id) gc LEFT JOIN macrostrat.strat_tree tree ON tree.child = gc.id @@ -244,13 +281,13 @@ RETURNS TABLE( $$ BEGIN RETURN QUERY - SELECT - sn.id, - sn.strat_name, + SELECT + sn.id, + sn.strat_name, sn.rank, sn.concept_id, r.author, - st.strat_name ||' '|| st.rank as parent + st.strat_name ||' '|| st.rank as parent FROM macrostrat.strat_names sn JOIN macrostrat.strat_names_meta snm ON sn.concept_id = snm.concept_id @@ -265,14 +302,14 @@ RETURN QUERY END $$ language plpgsql; -/* function that calculates proportions of lithologies based on +/* function that calculates proportions of lithologies based on subdom and dom props. happens here: https://github.com/UW-Macrostrat/utils/blob/bba082956cc611af8458cf234c160b05e1cb3794/cli/commands/rebuild_scripts/lookup_unit_attrs_api.py#L40 but as two separate queries. Makes more sense to make a function that calculates both sub and dom comp_prop for a unit and returns that for a update query */ -CREATE OR REPLACE FUNCTION macrostrat.get_lith_comp_prop(_unit_id integer) +CREATE OR REPLACE FUNCTION macrostrat.get_lith_comp_prop(_unit_id integer) RETURNS TABLE( dom_prop numeric, sub_prop numeric @@ -296,11 +333,11 @@ BEGIN WHERE dom = 'sub' and unit_id = _unit_id GROUP BY unit_id ) - SELECT + SELECT -- need at least one float to prevent truncating to 0 - ROUND((5.0 / (COALESCE(sub.count, 0) + (dom.count * 5))),4) AS dom_prop, - ROUND((1.0 / (COALESCE(sub.count, 0) + (dom.count * 5))),4) AS sub_prop - FROM sub + ROUND((5.0 / (COALESCE(sub.count, 0) + (dom.count * 5))),4) AS dom_prop, + ROUND((1.0 / (COALESCE(sub.count, 0) + (dom.count * 5))),4) AS sub_prop + FROM sub JOIN dom ON dom.unit_id = sub.unit_id; END @@ -312,25 +349,25 @@ RETURNS VOID as $$ BEGIN UPDATE macrostrat.unit_liths ul - SET + SET comp_prop = (CASE WHEN ul.dom = 'sub' THEN prop.sub_prop ELSE prop.dom_prop END) FROM (SELECT * FROM macrostrat.get_lith_comp_prop(_unit_id)) as prop WHERE ul.unit_id = _unit_id; END $$ language plpgsql; -/* - a insert or update trigger for cols. +/* + a insert or update trigger for cols. - If a lat and long are passed but NOT a wkt or coordinate, + If a lat and long are passed but NOT a wkt or coordinate, we want to create those two columns and insert them as well */ -CREATE OR REPLACE FUNCTION macrostrat.lng_lat_insert_trigger() +CREATE OR REPLACE FUNCTION macrostrat.lng_lat_insert_trigger() RETURNS TRIGGER AS $$ DECLARE BEGIN IF tg_op = 'INSERT' OR new.lat <> old.lat OR new.lng <> old.lng THEN - new.wkt := ST_AsText(ST_MakePoint(new.lng, new.lat)); + new.wkt := ST_AsText(ST_MakePoint(new.lng, new.lat)); new.coordinate := ST_SetSrid(new.wkt, 4326); END IF; RETURN new; From 253928ed9b45bcc370ac1e80fdb4822d51475935 Mon Sep 17 00:00:00 2001 From: Daven Quinn Date: Thu, 7 Nov 2024 02:47:06 -0600 Subject: [PATCH 9/9] Updated column builder views --- .../schema/02-column-builder-functions.sql | 37 ------------------- 1 file changed, 37 deletions(-) diff --git a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql index 5aac42dd..fc3579b0 100644 --- a/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql +++ b/cli/macrostrat/cli/subsystems/macrostrat_api/schema/02-column-builder-functions.sql @@ -7,43 +7,6 @@ Likewise bottom interval is unit.fo where unit.position_bottom is greatest! */ DROP FUNCTION IF EXISTS macrostrat_api.get_col_section_data(integer); -CREATE OR REPLACE FUNCTION macrostrat_api.get_col_section_data(column_id integer) -RETURNS TABLE ( - id integer, - unit_count integer, - top varchar(200), - bottom varchar(200) -) -AS -$$ -BEGIN -RETURN QUERY - SELECT - s.id::integer, - count(uc)::integer as unit_count, - lo.interval_name as top, - fo.interval_name as bottom - FROM macrostrat.sections s - JOIN macrostrat.units uc - ON uc.section_id = s.id - JOIN macrostrat.units u - ON u.section_id = s.id - JOIN macrostrat.units un - ON un.section_id = s.id - JOIN macrostrat.intervals fo - ON un.fo = fo.id - JOIN macrostrat.intervals lo - ON u.lo = lo.id - WHERE u.position_bottom = ( - SELECT MIN(position_bottom) FROM macrostrat.units WHERE section_id = u.section_id - ) AND un.position_bottom = ( - SELECT MAX(position_bottom) FROM macrostrat.units WHERE section_id = un.section_id - ) AND s.col_id = column_id - GROUP BY s.id, lo.interval_name, fo.interval_name, fo.age_bottom ORDER BY fo.age_bottom - ; -END -$$ LANGUAGE plpgsql; - /** New (2024-11-07) create a view that is more efficient and flexible than the above function */ CREATE OR REPLACE VIEW macrostrat_api.col_section_data AS