Skip to content

Commit

Permalink
(PDB-5792) Prevent duplicate catalogs
Browse files Browse the repository at this point in the history
Add a migration to make the catalogs certname index unique.

In order to prevent the constraint addition failing for users who do
have duplicate catalogs, purge any duplicate catalogs. The latest
catalog will be kept. If catalogs have the same producer_timestamp, the
id column is used as a tiebreaker.
  • Loading branch information
austb authored and rbrw committed Sep 18, 2024
1 parent adf1ed7 commit 7d47d13
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 4 deletions.
2 changes: 1 addition & 1 deletion ext/test/upgrade-and-exit
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,6 @@ psql -U puppetdb puppetdb -c 'select max(version) from schema_migrations;' \
> "$tmpdir/out"
cat "$tmpdir/out"
# This must be updated every time we add a new migration
grep -qE ' 82$' "$tmpdir/out"
grep -qE ' 88$' "$tmpdir/out"

test ! -e "$PDBBOX"/var/mq-migrated
2 changes: 1 addition & 1 deletion resources/ext/cli/delete-reports.erb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ chown "$pg_user:$pg_user" "$tmp_dir"

# Verify that the PuppetDB schema version it the expected value
# so that we do not incorrectly delete the report data.
expected_schema_ver=82
expected_schema_ver=88
su - "$pg_user" -s /bin/sh -c "$psql_cmd -p $pg_port -d $pdb_db_name -c 'COPY ( SELECT max(version) FROM schema_migrations ) TO STDOUT;' > $tmp_dir/schema_ver"
actual_schema_ver="$(cat "$tmp_dir/schema_ver")"
if test "$actual_schema_ver" -ne $expected_schema_ver; then
Expand Down
17 changes: 16 additions & 1 deletion src/puppetlabs/puppetdb/scf/migrate.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2301,6 +2301,20 @@
"CREATE INDEX reports_status_id_idx ON reports USING btree (status_id)"
"CREATE INDEX reports_tx_uuid_expr_idx ON reports USING btree (((transaction_uuid)::text))")))

(defn prevent-duplicate-catalogs
[]
(jdbc/do-commands
;; Clear any possible duplicates
["DELETE FROM catalogs c1 USING catalogs c2"
" WHERE c1.certname = c2.certname"
" AND (c1.producer_timestamp, c1.id) < (c2.producer_timestamp, c2.id)"]

;; Remove the old index
"DROP INDEX catalogs_certname_idx"

;; Create a unique constraint on the certname, which creates the unique index
"ALTER TABLE catalogs ADD CONSTRAINT catalogs_certname_idx UNIQUE (certname)"))

(def migrations
"The available migrations, as a map from migration version to migration function."
{00 require-schema-migrations-table
Expand Down Expand Up @@ -2366,7 +2380,8 @@
79 add-report-partition-indexes-on-certname-end-time
80 add-workspaces-tables
81 migrate-resource-events-to-declarative-partitioning
82 migrate-reports-to-declarative-partitioning})
82 migrate-reports-to-declarative-partitioning
88 prevent-duplicate-catalogs})
;; Make sure that if you change the structure of reports
;; or resource events, you also update the delete-reports
;; cli command.
Expand Down
63 changes: 62 additions & 1 deletion test/puppetlabs/puppetdb/scf/migrate_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
:refer [*db* clear-db-for-testing!
schema-info-map diff-schema-maps with-test-db]]
[puppetlabs.puppetdb.scf.hash :as shash]
[puppetlabs.puppetdb.time :refer [now to-timestamp]]
[puppetlabs.puppetdb.time :refer [now to-timestamp] :as t]
[puppetlabs.puppetdb.scf.partitioning :as part]
[clojure.string :as str])
(:import (java.time ZoneId ZonedDateTime)
Expand Down Expand Up @@ -2153,3 +2153,64 @@
(update :index-diff set)
(update :constraint-diff set))]
(is (= expected-diff diff)))))))

(deftest migration-88-prevent-duplicate-catalogs
(testing "reports table declarative partitioning migration"
(jdbc/with-db-connection *db*
(clear-db-for-testing!)
(fast-forward-to-migration! 82)
(let [ts1 (to-timestamp (now))
ts2 (-> (now)
(t/plus (t/hours 1))
to-timestamp)
fake-hash (sutils/munge-hash-for-storage "0001")]

(jdbc/insert-multi! :certnames
[{:certname "host-1"}
{:certname "host-2"}])

(jdbc/insert-multi! :catalogs
[{:id 1 :hash fake-hash
:certname "host-1" :producer_timestamp ts1
:api_version 1 :catalog_version "one"}
{:id 2 :hash fake-hash
:certname "host-1" :producer_timestamp ts2
:api_version 1 :catalog_version "one"}
{:id 3 :hash fake-hash
:certname "host-1" :producer_timestamp ts2
:api_version 1 :catalog_version "one"}
{:id 4 :hash fake-hash
:certname "host-2" :producer_timestamp ts1
:api_version 1 :catalog_version "one"}])
(let [before-migration (schema-info-map *db*)
_ (apply-migration-for-testing! 88)
diff (-> (diff-schema-maps before-migration (schema-info-map *db*))
(update :index-diff set)
(update :constraint-diff set))]
(is (= {:index-diff
#{{:left-only {:unique? false}
:right-only {:unique? true}
:same {:index "catalogs_certname_idx"
:user "pdb_test"
:primary? false
:is_partial false
:functional? false
:type "btree"
:index_keys ["certname"]
:table "catalogs"
:schema "public"}}}

:table-diff nil

:constraint-diff
#{{:left-only nil
:right-only {:constraint_name "catalogs_certname_idx"
:table_name "catalogs"
:constraint_type "UNIQUE"
:initially_deferred "NO"
:deferrable? "NO"}
:same nil}}}
diff))
(is (= [{:id 3 :certname "host-1" :producer_timestamp ts2}
{:id 4 :certname "host-2" :producer_timestamp ts1}]
(query-to-vec "select id, certname, producer_timestamp from catalogs"))))))))

0 comments on commit 7d47d13

Please sign in to comment.