Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

database_dump: Fix num_no_build import issue #10097

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions crates/crates_io_database_dump/src/dump-import.sql.j2
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,56 @@ BEGIN;
{%- endfor %}

-- Enable this trigger so that `crates.textsearchable_index_col` can be excluded from the export

ALTER TABLE "crates" ENABLE TRIGGER "trigger_crates_tsvector_update";

-- Drop the `NOT NULL` and uniqueness constraints on `num_no_build` column
-- while importing the data.

alter table "versions"
alter column "num_no_build" drop not null;

drop index versions_crate_id_num_no_build_uindex;

-- Import the CSV data.
{% for table in tables %}
\copy "{{table.name}}" ({{table.columns}}) FROM 'data/{{table.name}}.csv' WITH CSV HEADER
{%- endfor %}

-- Backfill the `num_no_build` column with the correct values.

update versions
set num_no_build = split_part(num, '+', 1);

with duplicates as (
-- find all versions that have the same `crate_id` and `num_no_build`
select crate_id, num_no_build, array_agg(num ORDER BY id) as nums
from versions
group by crate_id, num_no_build
having count(*) > 1
),
duplicates_to_update as (
-- for each group of duplicates, update all versions except the one that
-- doesn't have "build metadata", or the first one that was published if
-- all versions have "build metadata"
select crate_id, num_no_build, unnest(case when array_position(nums, num_no_build) IS NOT NULL then array_remove(nums, num_no_build) else nums[2:] end) as num
from duplicates
)
update versions
set num_no_build = duplicates_to_update.num
from duplicates_to_update
where versions.crate_id = duplicates_to_update.crate_id
and versions.num = duplicates_to_update.num;

-- Set the `num_no_build` column to `NOT NULL` again after the backfill is
-- done and add the unique constraint.

alter table "versions"
alter column "num_no_build" set not null;

create unique index versions_crate_id_num_no_build_uindex
on versions (crate_id, num_no_build);

-- Drop the defaults again.
{% for table in tables -%}
{% for cd in table.column_defaults %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,17 @@ BEGIN;
TRUNCATE "version_downloads" RESTART IDENTITY CASCADE;

-- Enable this trigger so that `crates.textsearchable_index_col` can be excluded from the export

ALTER TABLE "crates" ENABLE TRIGGER "trigger_crates_tsvector_update";

-- Drop the `NOT NULL` and uniqueness constraints on `num_no_build` column
-- while importing the data.

alter table "versions"
alter column "num_no_build" drop not null;

drop index versions_crate_id_num_no_build_uindex;

-- Import the CSV data.

\copy "categories" ("category", "crates_cnt", "created_at", "description", "id", "path", "slug") FROM 'data/categories.csv' WITH CSV HEADER
Expand All @@ -65,6 +74,40 @@ BEGIN;
\copy "dependencies" ("crate_id", "default_features", "explicit_name", "features", "id", "kind", "optional", "req", "target", "version_id") FROM 'data/dependencies.csv' WITH CSV HEADER
\copy "version_downloads" ("date", "downloads", "version_id") FROM 'data/version_downloads.csv' WITH CSV HEADER

-- Backfill the `num_no_build` column with the correct values.

update versions
set num_no_build = split_part(num, '+', 1);

with duplicates as (
-- find all versions that have the same `crate_id` and `num_no_build`
select crate_id, num_no_build, array_agg(num ORDER BY id) as nums
from versions
group by crate_id, num_no_build
having count(*) > 1
),
duplicates_to_update as (
-- for each group of duplicates, update all versions except the one that
-- doesn't have "build metadata", or the first one that was published if
-- all versions have "build metadata"
select crate_id, num_no_build, unnest(case when array_position(nums, num_no_build) IS NOT NULL then array_remove(nums, num_no_build) else nums[2:] end) as num
from duplicates
)
update versions
set num_no_build = duplicates_to_update.num
from duplicates_to_update
where versions.crate_id = duplicates_to_update.crate_id
and versions.num = duplicates_to_update.num;

-- Set the `num_no_build` column to `NOT NULL` again after the backfill is
-- done and add the unique constraint.

alter table "versions"
alter column "num_no_build" set not null;

create unique index versions_crate_id_num_no_build_uindex
on versions (crate_id, num_no_build);

-- Drop the defaults again.

ALTER TABLE "users" ALTER COLUMN "gh_access_token" DROP DEFAULT;
Expand Down