diff --git a/migrations/2024-04-17-135931_default-versions-table/down.sql b/migrations/2024-04-17-135931_default-versions-table/down.sql new file mode 100644 index 00000000000..2f238bc87ed --- /dev/null +++ b/migrations/2024-04-17-135931_default-versions-table/down.sql @@ -0,0 +1 @@ +drop table default_versions; diff --git a/migrations/2024-04-17-135931_default-versions-table/up.sql b/migrations/2024-04-17-135931_default-versions-table/up.sql new file mode 100644 index 00000000000..e1e9e24ce3b --- /dev/null +++ b/migrations/2024-04-17-135931_default-versions-table/up.sql @@ -0,0 +1,24 @@ +create table default_versions +( + crate_id integer not null + constraint default_versions_pk + primary key + constraint default_versions_crates_id_fk + references crates + on delete cascade, + version_id integer not null + constraint default_versions_versions_id_fk + references versions + on delete no action deferrable initially deferred +); + +create unique index default_versions_version_id_uindex + on default_versions (version_id); + +comment on table default_versions is 'A mapping from crates to the versions that the frontend will display by default.'; +comment on column default_versions.crate_id is 'Reference to the crate in the `crates` table.'; +comment on column default_versions.version_id is 'Reference to the version in the `versions` table.'; +comment on constraint default_versions_crates_id_fk on default_versions is + 'This is a `cascade` delete because we want to delete the row when the crate is deleted.'; +comment on constraint default_versions_versions_id_fk on default_versions is + 'This is a `no action` delete because we want to fail the version deletion unless the default version is updated in the same transaction.'; diff --git a/src/admin/delete_version.rs b/src/admin/delete_version.rs index 9b918a0d677..38530d963d3 100644 --- a/src/admin/delete_version.rs +++ b/src/admin/delete_version.rs @@ -1,3 +1,4 @@ +use crate::models::update_default_version; use crate::schema::crates; use crate::storage::Storage; use crate::worker::jobs; @@ -48,27 +49,36 @@ pub fn run(opts: Opts) -> anyhow::Result<()> { return Ok(()); } - info!(%crate_name, %crate_id, versions = ?opts.versions, "Deleting versions from the database"); - let result = diesel::delete( - versions::table - .filter(versions::crate_id.eq(crate_id)) - .filter(versions::num.eq_any(&opts.versions)), - ) - .execute(conn); - - match result { - Ok(num_deleted) if num_deleted == opts.versions.len() => {} - Ok(num_deleted) => { - warn!( - %crate_name, - "Deleted only {num_deleted} of {num_expected} versions from the database", - num_expected = opts.versions.len() - ); + conn.transaction(|conn| { + info!(%crate_name, %crate_id, versions = ?opts.versions, "Deleting versions from the database"); + let result = diesel::delete( + versions::table + .filter(versions::crate_id.eq(crate_id)) + .filter(versions::num.eq_any(&opts.versions)), + ) + .execute(conn); + + match result { + Ok(num_deleted) if num_deleted == opts.versions.len() => {} + Ok(num_deleted) => { + warn!( + %crate_name, + "Deleted only {num_deleted} of {num_expected} versions from the database", + num_expected = opts.versions.len() + ); + } + Err(error) => { + warn!(%crate_name, ?error, "Failed to delete versions from the database") + } } - Err(error) => { - warn!(%crate_name, ?error, "Failed to delete versions from the database") + + info!(%crate_name, %crate_id, "Updating default version in the database"); + if let Err(error) = update_default_version(crate_id, conn) { + warn!(%crate_name, %crate_id, ?error, "Failed to update default version"); } - } + + Ok::<_, anyhow::Error>(()) + })?; info!(%crate_name, "Enqueuing index sync jobs"); if let Err(error) = jobs::enqueue_sync_to_index(crate_name, conn) { diff --git a/src/admin/mod.rs b/src/admin/mod.rs index 089218d01dd..710233d809c 100644 --- a/src/admin/mod.rs +++ b/src/admin/mod.rs @@ -9,6 +9,7 @@ pub mod populate; pub mod render_readmes; pub mod test_pagerduty; pub mod transfer_crates; +pub mod update_default_versions; pub mod upload_index; pub mod verify_token; pub mod yank_version; diff --git a/src/admin/update_default_versions.rs b/src/admin/update_default_versions.rs new file mode 100644 index 00000000000..4e80858b944 --- /dev/null +++ b/src/admin/update_default_versions.rs @@ -0,0 +1,34 @@ +use crate::models::update_default_version; +use crate::{db, schema::crates}; +use anyhow::Context; +use diesel::prelude::*; +use indicatif::{ProgressBar, ProgressIterator, ProgressStyle}; + +#[derive(clap::Parser, Debug)] +#[clap( + name = "update-default-versions", + about = "Iterates over every crate ever uploaded and updates the `default_versions` table." +)] +pub struct Opts; + +pub fn run(_opts: Opts) -> anyhow::Result<()> { + let mut conn = db::oneoff_connection().context("Failed to connect to the database")?; + + let crate_ids: Vec = crates::table + .select(crates::id) + .load(&mut conn) + .context("Failed to load crates")?; + + let pb = ProgressBar::new(crate_ids.len() as u64); + pb.set_style(ProgressStyle::with_template( + "{bar:60} ({pos}/{len}, ETA {eta})", + )?); + + for crate_id in crate_ids.into_iter().progress_with(pb.clone()) { + if let Err(error) = update_default_version(crate_id, &mut conn) { + pb.suspend(|| warn!(%crate_id, %error, "Failed to update the default version")); + } + } + + Ok(()) +} diff --git a/src/admin/yank_version.rs b/src/admin/yank_version.rs index 867cfe5c668..4b050633550 100644 --- a/src/admin/yank_version.rs +++ b/src/admin/yank_version.rs @@ -3,6 +3,8 @@ use crate::db; use crate::models::{Crate, Version}; use crate::schema::versions; use crate::worker::jobs; +use crate::worker::jobs::UpdateDefaultVersion; +use crates_io_worker::BackgroundJob; use diesel::prelude::*; #[derive(clap::Parser, Debug)] @@ -59,5 +61,7 @@ fn yank(opts: Opts, conn: &mut PgConnection) -> anyhow::Result<()> { jobs::enqueue_sync_to_index(&krate.name, conn)?; + UpdateDefaultVersion::new(krate.id).enqueue(conn)?; + Ok(()) } diff --git a/src/bin/crates-admin.rs b/src/bin/crates-admin.rs index c49553f5cd1..2a9ee259c7c 100644 --- a/src/bin/crates-admin.rs +++ b/src/bin/crates-admin.rs @@ -3,7 +3,8 @@ extern crate tracing; use crates_io::admin::{ delete_crate, delete_version, enqueue_job, git_import, migrate, populate, render_readmes, - test_pagerduty, transfer_crates, upload_index, verify_token, yank_version, + test_pagerduty, transfer_crates, update_default_versions, upload_index, verify_token, + yank_version, }; #[derive(clap::Parser, Debug)] @@ -22,6 +23,7 @@ enum Command { GitImport(git_import::Opts), #[clap(subcommand)] EnqueueJob(enqueue_job::Command), + UpdateDefaultVersions(update_default_versions::Opts), } fn main() -> anyhow::Result<()> { @@ -49,6 +51,7 @@ fn main() -> anyhow::Result<()> { Command::YankVersion(opts) => yank_version::run(opts), Command::GitImport(opts) => git_import::run(opts), Command::EnqueueJob(command) => enqueue_job::run(command), + Command::UpdateDefaultVersions(opts) => update_default_versions::run(opts), } } diff --git a/src/controllers/krate/publish.rs b/src/controllers/krate/publish.rs index 76e4be6a020..c7371355ccc 100644 --- a/src/controllers/krate/publish.rs +++ b/src/controllers/krate/publish.rs @@ -1,7 +1,7 @@ //! Functionality related to publishing a new crate or version of a crate. use crate::auth::AuthCheck; -use crate::worker::jobs::{self, CheckTyposquat}; +use crate::worker::jobs::{self, CheckTyposquat, UpdateDefaultVersion}; use axum::body::Bytes; use cargo_manifest::{Dependency, DepsSet, TargetDepsSet}; use crates_io_tarball::{process_tarball, TarballError}; @@ -360,6 +360,18 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult AppResult bool { + !self.num.pre.is_empty() + } +} + +/// Updates the `default_versions` table entry for the specified crate. +/// +/// This function first loads all versions of the crate from the database, +/// then determines the default version based on the following criteria: +/// +/// 1. The highest non-prerelease version that is not yanked. +/// 2. The highest non-yanked version. +/// 3. The highest version. +/// +/// The default version is then written to the `default_versions` table. +#[instrument(skip(conn))] +pub fn update_default_version(crate_id: i32, conn: &mut PgConnection) -> QueryResult<()> { + debug!("Loading all versions for the crate…"); + let versions = versions::table + .filter(versions::crate_id.eq(crate_id)) + .select(Version::as_returning()) + .load::(conn)?; + + debug!("Found {} versions", versions.len()); + + let default_version = find_default_version(&versions).ok_or(diesel::result::Error::NotFound)?; + + debug!( + "Updating default version to {} (id: {})…", + default_version.num, default_version.id + ); + + diesel::insert_into(default_versions::table) + .values(( + default_versions::crate_id.eq(crate_id), + default_versions::version_id.eq(default_version.id), + )) + .on_conflict(default_versions::crate_id) + .do_update() + .set(default_versions::version_id.eq(default_version.id)) + .execute(conn)?; + + Ok(()) +} + +fn find_default_version(versions: &[Version]) -> Option<&Version> { + highest(versions, |v| !v.is_prerelease() && !v.yanked) + .or_else(|| highest(versions, |v| !v.yanked)) + .or_else(|| highest(versions, |_| true)) +} + +fn highest(versions: &[Version], filter: impl FnMut(&&Version) -> bool) -> Option<&Version> { + versions.iter().filter(filter).max_by_key(|v| &v.num) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::schema::crates; + use crate::test_util::test_db_connection; + + fn v(num: &str, yanked: bool) -> Version { + let num = semver::Version::parse(num).unwrap(); + Version { id: 0, num, yanked } + } + + #[test] + fn test_find_default_version() { + let check = |versions, expected| { + let default_version = assert_some!(find_default_version(versions)); + assert_eq!(default_version.num.to_string(), expected); + }; + + // No versions + let versions = vec![]; + assert_none!(find_default_version(&versions)); + + // Only a single version + let versions = vec![v("1.0.0", false)]; + check(&versions, "1.0.0"); + + // Multiple versions out of order + let versions = vec![ + v("1.0.0", false), + v("1.0.1", false), + v("1.1.0", false), + v("1.0.2", false), + ]; + check(&versions, "1.1.0"); + + // Multiple versions with one pre-release + let versions = vec![ + v("1.0.0", false), + v("1.1.0", false), + v("2.0.0-beta.1", false), + ]; + check(&versions, "1.1.0"); + + // Only pre-release versions + let versions = vec![ + v("1.0.0-beta.1", false), + v("1.0.0-beta.2", false), + v("1.0.0-beta.3", false), + ]; + check(&versions, "1.0.0-beta.3"); + + // Only pre-release versions, with highest yanked + let versions = vec![ + v("1.0.0-beta.1", false), + v("1.0.0-beta.2", false), + v("1.0.0-beta.3", true), + ]; + check(&versions, "1.0.0-beta.2"); + + // Only yanked versions + let versions = vec![ + v("1.0.0-beta.1", true), + v("1.0.0-beta.2", true), + v("1.0.0-beta.3", true), + ]; + check(&versions, "1.0.0-beta.3"); + } + + fn create_crate(name: &str, conn: &mut PgConnection) -> i32 { + diesel::insert_into(crates::table) + .values(crates::name.eq(name)) + .returning(crates::id) + .get_result(conn) + .unwrap() + } + + fn create_version(crate_id: i32, num: &str, conn: &mut PgConnection) { + diesel::insert_into(versions::table) + .values(( + versions::crate_id.eq(crate_id), + versions::num.eq(num), + versions::checksum.eq(""), + )) + .execute(conn) + .unwrap(); + } + + fn get_default_version(crate_id: i32, conn: &mut PgConnection) -> String { + default_versions::table + .inner_join(versions::table) + .select(versions::num) + .filter(default_versions::crate_id.eq(crate_id)) + .first(conn) + .unwrap() + } + + #[test] + fn test_update_default_version() { + let (_test_db, conn) = &mut test_db_connection(); + + let crate_id = create_crate("foo", conn); + create_version(crate_id, "1.0.0", conn); + + update_default_version(crate_id, conn).unwrap(); + assert_eq!(get_default_version(crate_id, conn), "1.0.0"); + + create_version(crate_id, "1.1.0", conn); + create_version(crate_id, "1.0.1", conn); + assert_eq!(get_default_version(crate_id, conn), "1.0.0"); + + update_default_version(crate_id, conn).unwrap(); + assert_eq!(get_default_version(crate_id, conn), "1.1.0"); + } +} diff --git a/src/schema.rs b/src/schema.rs index 487790c605a..3ddd3961d2f 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -406,6 +406,16 @@ diesel::table! { } } +diesel::table! { + /// A mapping from crates to the versions that the frontend will display by default. + default_versions (crate_id) { + /// Reference to the crate in the `crates` table. + crate_id -> Int4, + /// Reference to the version in the `versions` table. + version_id -> Int4, + } +} + diesel::table! { /// Representation of the `dependencies` table. /// @@ -1018,6 +1028,8 @@ diesel::joinable!(crates_categories -> categories (category_id)); diesel::joinable!(crates_categories -> crates (crate_id)); diesel::joinable!(crates_keywords -> crates (crate_id)); diesel::joinable!(crates_keywords -> keywords (keyword_id)); +diesel::joinable!(default_versions -> crates (crate_id)); +diesel::joinable!(default_versions -> versions (version_id)); diesel::joinable!(dependencies -> crates (crate_id)); diesel::joinable!(dependencies -> versions (version_id)); diesel::joinable!(emails -> users (user_id)); @@ -1045,6 +1057,7 @@ diesel::allow_tables_to_appear_in_same_query!( crates, crates_categories, crates_keywords, + default_versions, dependencies, emails, follows, diff --git a/src/sql.rs b/src/sql.rs index 4e41f6984ff..d1335a5b1d1 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -1,5 +1,9 @@ use diesel::sql_types::{Date, Double, Integer, Interval, SingleValue, Text, Timestamp}; +mod semver; + +pub use semver::SemverVersion; + sql_function!(#[aggregate] fn array_agg(x: T) -> Array); sql_function!(fn canon_crate_name(x: Text) -> Text); sql_function!(fn to_char(a: Date, b: Text) -> Text); diff --git a/src/sql/semver.rs b/src/sql/semver.rs new file mode 100644 index 00000000000..ec01133f1b1 --- /dev/null +++ b/src/sql/semver.rs @@ -0,0 +1,35 @@ +use diesel::pg::Pg; +use diesel::sql_types::Text; +use diesel::Queryable; + +/// A wrapper around `semver::Version` that implements `diesel::Queryable`. +/// +/// ## Example +/// +/// ```rust +/// # use crates_io::sql::SemverVersion; +/// # use crates_io::schema::versions; +/// # use diesel::prelude::*; +/// # +/// #[derive(Clone, Debug, Queryable, Selectable)] +/// struct Version { +/// #[diesel(deserialize_as = SemverVersion)] +/// num: semver::Version, +/// } +/// ``` +#[derive(Clone, Debug)] +pub struct SemverVersion(semver::Version); + +impl From for semver::Version { + fn from(version: SemverVersion) -> Self { + version.0 + } +} + +impl Queryable for SemverVersion { + type Row = String; + + fn build(row: Self::Row) -> diesel::deserialize::Result { + row.parse().map(SemverVersion).map_err(Into::into) + } +} diff --git a/src/tests/schema_details.rs b/src/tests/schema_details.rs index 1765d8fbcf1..67991708de5 100644 --- a/src/tests/schema_details.rs +++ b/src/tests/schema_details.rs @@ -33,6 +33,12 @@ fn all_columns_called_version_id_have_a_cascading_foreign_key() { row.table_name ), }; + + if row.table_name == "default_versions" { + // We explicitly don't want to enforce this on the default_versions table. + continue; + } + if !constraint.definition.contains("ON DELETE CASCADE") { panic!( "Foreign key {} on table {} should have `ON DELETE CASCADE` \ diff --git a/src/worker/jobs/dump_db/dump-db.toml b/src/worker/jobs/dump_db/dump-db.toml index 6199ec8eb78..a38845bf178 100644 --- a/src/worker/jobs/dump_db/dump-db.toml +++ b/src/worker/jobs/dump_db/dump-db.toml @@ -99,6 +99,12 @@ dependencies = ["crates", "keywords"] crate_id = "public" keyword_id = "public" +[default_versions] +dependencies = ["crates", "versions"] +[default_versions.columns] +crate_id = "public" +version_id = "public" + [dependencies] dependencies = ["crates", "versions"] [dependencies.columns] diff --git a/src/worker/jobs/mod.rs b/src/worker/jobs/mod.rs index f370d68115c..0419efb5113 100644 --- a/src/worker/jobs/mod.rs +++ b/src/worker/jobs/mod.rs @@ -12,6 +12,7 @@ mod git; mod readmes; mod sync_admins; mod typosquat; +mod update_default_version; pub use self::daily_db_maintenance::DailyDbMaintenance; pub use self::downloads::{ @@ -22,6 +23,7 @@ pub use self::git::{NormalizeIndex, SquashIndex, SyncToGitIndex, SyncToSparseInd pub use self::readmes::RenderAndUploadReadme; pub use self::sync_admins::SyncAdmins; pub use self::typosquat::CheckTyposquat; +pub use self::update_default_version::UpdateDefaultVersion; /// Enqueue both index sync jobs (git and sparse) for a crate, unless they /// already exist in the background job queue. diff --git a/src/worker/jobs/update_default_version.rs b/src/worker/jobs/update_default_version.rs new file mode 100644 index 00000000000..8ec464e5fe9 --- /dev/null +++ b/src/worker/jobs/update_default_version.rs @@ -0,0 +1,38 @@ +use crate::models::update_default_version; +use crate::worker::Environment; +use anyhow::anyhow; +use crates_io_worker::BackgroundJob; +use std::sync::Arc; + +#[derive(Serialize, Deserialize)] +pub struct UpdateDefaultVersion { + crate_id: i32, +} + +impl UpdateDefaultVersion { + pub fn new(crate_id: i32) -> Self { + Self { crate_id } + } +} + +impl BackgroundJob for UpdateDefaultVersion { + const JOB_NAME: &'static str = "update_default_version"; + const PRIORITY: i16 = 80; + + type Context = Arc; + + async fn run(&self, ctx: Self::Context) -> anyhow::Result<()> { + let crate_id = self.crate_id; + + info!("Updating default version for crate {crate_id}"); + let conn = ctx.deadpool.get().await?; + conn.interact::<_, anyhow::Result<_>>(move |conn| { + update_default_version(crate_id, conn)?; + Ok(()) + }) + .await + .map_err(|err| anyhow!(err.to_string()))??; + + Ok(()) + } +} diff --git a/src/worker/mod.rs b/src/worker/mod.rs index d395b57a99b..b3737f1086c 100644 --- a/src/worker/mod.rs +++ b/src/worker/mod.rs @@ -32,5 +32,6 @@ impl RunnerExt for Runner> { .register_job_type::() .register_job_type::() .register_job_type::() + .register_job_type::() } }