Skip to content

Commit

Permalink
Generate the index from the database
Browse files Browse the repository at this point in the history
When a crate is modified, rather than editing the existing index file in
git, this change re-generates the entire file from the database. This
makes the jobs idempotent and prevents the index from getting out of
sync with the database.

The `delete_version` and `delete_crate` tasks now also modify the index.

Test file changes are caused because the tests were inserting versions
in to the DB without adding them to the index.
  • Loading branch information
arlosi committed Apr 14, 2023
1 parent 2e58c24 commit 33a48ab
Show file tree
Hide file tree
Showing 12 changed files with 213 additions and 171 deletions.
14 changes: 6 additions & 8 deletions src/admin/delete_crate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::{admin::dialoguer, config, db, models::Crate, schema::crates};
use crate::{admin::dialoguer, db, models::Crate, schema::crates};

use diesel::prelude::*;
use reqwest::blocking::Client;

#[derive(clap::Parser, Debug)]
#[command(
Expand Down Expand Up @@ -30,10 +29,6 @@ pub fn run(opts: Opts) {
fn delete(opts: Opts, conn: &mut PgConnection) {
let krate: Crate = Crate::by_name(&opts.crate_name).first(conn).unwrap();

let config = config::Base::from_environment();
let uploader = config.uploader();
let client = Client::new();

if !opts.yes {
let prompt = format!(
"Are you sure you want to delete {} ({})?",
Expand All @@ -44,7 +39,8 @@ fn delete(opts: Opts, conn: &mut PgConnection) {
}
}

println!("deleting the crate");
let message = format!("Deleting crate `{}`", opts.crate_name);
println!("{message}");
let n = diesel::delete(crates::table.find(krate.id))
.execute(conn)
.unwrap();
Expand All @@ -54,5 +50,7 @@ fn delete(opts: Opts, conn: &mut PgConnection) {
panic!("aborting transaction");
}

uploader.delete_index(&client, &krate.name).unwrap();
crate::worker::sync(krate.name, message, false)
.enqueue(conn)
.unwrap();
}
7 changes: 6 additions & 1 deletion src/admin/delete_version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,17 @@ fn delete(opts: Opts, conn: &mut PgConnection) {
}
}

println!("deleting version {} ({})", v.num, v.id);
let message = format!("Deleting crate `{}#{}`", opts.crate_name, opts.version);
println!("{message}");
diesel::delete(versions::table.find(&v.id))
.execute(conn)
.unwrap();

if !opts.yes && !dialoguer::confirm("commit?") {
panic!("aborting transaction");
}

crate::worker::sync(krate.name, message, false)
.enqueue(conn)
.unwrap();
}
27 changes: 17 additions & 10 deletions src/background_jobs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub enum Job {
DumpDb(DumpDbJob),
IndexAddCrate(IndexAddCrateJob),
IndexSquash,
IndexSyncToHttp(IndexSyncToHttpJob),
IndexSync(IndexSyncJob),
IndexUpdateYanked(IndexUpdateYankedJob),
NormalizeIndex(NormalizeIndexJob),
RenderAndUploadReadme(RenderAndUploadReadmeJob),
Expand All @@ -43,7 +43,7 @@ impl Job {
const DUMP_DB: &str = "dump_db";
const INDEX_ADD_CRATE: &str = "add_crate";
const INDEX_SQUASH: &str = "squash_index";
const INDEX_SYNC_TO_HTTP: &str = "update_crate_index";
const INDEX_SYNC: &str = "update_crate_index";
const INDEX_UPDATE_YANKED: &str = "sync_yanked";
const NORMALIZE_INDEX: &str = "normalize_index";
const RENDER_AND_UPLOAD_README: &str = "render_and_upload_readme";
Expand All @@ -55,7 +55,7 @@ impl Job {
Job::DumpDb(_) => Self::DUMP_DB,
Job::IndexAddCrate(_) => Self::INDEX_ADD_CRATE,
Job::IndexSquash => Self::INDEX_SQUASH,
Job::IndexSyncToHttp(_) => Self::INDEX_SYNC_TO_HTTP,
Job::IndexSync(_) => Self::INDEX_SYNC,
Job::IndexUpdateYanked(_) => Self::INDEX_UPDATE_YANKED,
Job::NormalizeIndex(_) => Self::NORMALIZE_INDEX,
Job::RenderAndUploadReadme(_) => Self::RENDER_AND_UPLOAD_README,
Expand All @@ -69,7 +69,7 @@ impl Job {
Job::DumpDb(inner) => serde_json::to_value(inner),
Job::IndexAddCrate(inner) => serde_json::to_value(inner),
Job::IndexSquash => Ok(serde_json::Value::Null),
Job::IndexSyncToHttp(inner) => serde_json::to_value(inner),
Job::IndexSync(inner) => serde_json::to_value(inner),
Job::IndexUpdateYanked(inner) => serde_json::to_value(inner),
Job::NormalizeIndex(inner) => serde_json::to_value(inner),
Job::RenderAndUploadReadme(inner) => serde_json::to_value(inner),
Expand Down Expand Up @@ -97,7 +97,7 @@ impl Job {
Self::DUMP_DB => Job::DumpDb(from_value(value)?),
Self::INDEX_ADD_CRATE => Job::IndexAddCrate(from_value(value)?),
Self::INDEX_SQUASH => Job::IndexSquash,
Self::INDEX_SYNC_TO_HTTP => Job::IndexSyncToHttp(from_value(value)?),
Self::INDEX_SYNC => Job::IndexSync(from_value(value)?),
Self::INDEX_UPDATE_YANKED => Job::IndexUpdateYanked(from_value(value)?),
Self::NORMALIZE_INDEX => Job::NormalizeIndex(from_value(value)?),
Self::RENDER_AND_UPLOAD_README => Job::RenderAndUploadReadme(from_value(value)?),
Expand All @@ -120,9 +120,13 @@ impl Job {
worker::perform_daily_db_maintenance(&mut *fresh_connection(pool)?)
}
Job::DumpDb(args) => worker::perform_dump_db(env, args.database_url, args.target_name),
Job::IndexAddCrate(args) => worker::perform_index_add_crate(env, conn, &args.krate),
Job::IndexAddCrate(args) => {
worker::perform_index_add_crate(env, conn, &args.krate, &args.version_num)
}
Job::IndexSquash => worker::perform_index_squash(env),
Job::IndexSyncToHttp(args) => worker::perform_index_sync_to_http(env, args.crate_name),
Job::IndexSync(args) => {
worker::perform_index_sync(env, conn, &args.krate, &args.message, args.force)
}
Job::IndexUpdateYanked(args) => {
worker::perform_index_update_yanked(env, conn, &args.krate, &args.version_num)
}
Expand Down Expand Up @@ -164,12 +168,15 @@ pub struct DumpDbJob {

#[derive(Serialize, Deserialize)]
pub struct IndexAddCrateJob {
pub(super) krate: cargo_registry_index::Crate,
pub(super) krate: String,
pub(super) version_num: String,
}

#[derive(Serialize, Deserialize)]
pub struct IndexSyncToHttpJob {
pub(super) crate_name: String,
pub struct IndexSyncJob {
pub(super) krate: String,
pub(super) message: String,
pub(super) force: bool,
}

#[derive(Serialize, Deserialize)]
Expand Down
87 changes: 18 additions & 69 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ use flate2::read::GzDecoder;
use hex::ToHex;
use hyper::body::Buf;
use sha2::{Digest, Sha256};
use std::collections::BTreeMap;
use std::io::Read;
use std::path::Path;

use crate::controllers::cargo_prelude::*;
use crate::controllers::util::RequestPartsExt;
use crate::models::{
insert_version_owner_action, Category, Crate, DependencyKind, Keyword, NewCrate, NewVersion,
Rights, VersionAction,
insert_version_owner_action, Category, Crate, Keyword, NewCrate, NewVersion, Rights,
VersionAction,
};
use crate::worker;

Expand Down Expand Up @@ -199,8 +198,8 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
// to get here, and max upload sizes are way less than i32 max
content_length as i32,
user.id,
hex_cksum.clone(),
links.clone(),
hex_cksum,
links,
)?
.save(conn, &verified_email_address)?;

Expand All @@ -213,7 +212,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
)?;

// Link this new version to all dependencies
let git_deps = add_dependencies(conn, &new_crate.deps, version.id)?;
add_dependencies(conn, &new_crate.deps, version.id)?;

// Update all keywords for this crate
Keyword::update_crate(conn, &krate, &keywords)?;
Expand Down Expand Up @@ -247,31 +246,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
.uploader()
.upload_crate(app.http_client(), tarball_bytes, &krate, vers)?;

let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) =
features.into_iter().partition(|(_k, vals)| {
!vals
.iter()
.any(|v| v.starts_with("dep:") || v.contains("?/"))
});
let (features2, v) = if features2.is_empty() {
(None, None)
} else {
(Some(features2), Some(2))
};

// Register this crate in our local git repo.
let git_crate = cargo_registry_index::Crate {
name: name.0,
vers: vers.to_string(),
cksum: hex_cksum,
features,
features2,
deps: git_deps,
yanked: Some(false),
links,
v,
};
worker::add_crate(git_crate).enqueue(conn)?;
worker::add_crate(name.0, vers.to_string()).enqueue(conn)?;

// The `other` field on `PublishWarnings` was introduced to handle a temporary warning
// that is no longer needed. As such, crates.io currently does not return any `other`
Expand Down Expand Up @@ -349,11 +324,11 @@ pub fn add_dependencies(
conn: &mut PgConnection,
deps: &[EncodableCrateDependency],
target_version_id: i32,
) -> AppResult<Vec<cargo_registry_index::Dependency>> {
) -> AppResult<()> {
use self::dependencies::dsl::*;
use diesel::insert_into;

let git_and_new_dependencies = deps
let new_dependencies = deps
.iter()
.map(|dep| {
if let Some(registry) = &dep.registry {
Expand All @@ -373,51 +348,25 @@ pub fn add_dependencies(
}
}

// If this dependency has an explicit name in `Cargo.toml` that
// means that the `name` we have listed is actually the package name
// that we're depending on. The `name` listed in the index is the
// Cargo.toml-written-name which is what cargo uses for
// `--extern foo=...`
let (name, package) = match &dep.explicit_name_in_toml {
Some(explicit) => (explicit.to_string(), Some(dep.name.to_string())),
None => (dep.name.to_string(), None),
};

Ok((
cargo_registry_index::Dependency {
name,
req: dep.version_req.to_string(),
features: dep.features.iter().map(|s| s.0.to_string()).collect(),
optional: dep.optional,
default_features: dep.default_features,
target: dep.target.clone(),
kind: dep.kind.or(Some(DependencyKind::Normal)).map(|dk| dk.into()),
package,
},
(
version_id.eq(target_version_id),
crate_id.eq(krate.id),
req.eq(dep.version_req.to_string()),
dep.kind.map(|k| kind.eq(k as i32)),
optional.eq(dep.optional),
default_features.eq(dep.default_features),
features.eq(&dep.features),
target.eq(dep.target.as_deref()),
explicit_name.eq(dep.explicit_name_in_toml.as_deref())
),
version_id.eq(target_version_id),
crate_id.eq(krate.id),
req.eq(dep.version_req.to_string()),
dep.kind.map(|k| kind.eq(k as i32)),
optional.eq(dep.optional),
default_features.eq(dep.default_features),
features.eq(&dep.features),
target.eq(dep.target.as_deref()),
explicit_name.eq(dep.explicit_name_in_toml.as_deref())
))
})
.collect::<Result<Vec<_>, _>>()?;

let (mut git_deps, new_dependencies): (Vec<_>, Vec<_>) =
git_and_new_dependencies.into_iter().unzip();
git_deps.sort();

insert_into(dependencies)
.values(&new_dependencies)
.execute(conn)?;

Ok(git_deps)
Ok(())
}

fn verify_tarball(
Expand Down
10 changes: 10 additions & 0 deletions src/models/dependency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ pub enum DependencyKind {
// if you add a kind here, be sure to update `from_row` below.
}

impl From<cargo_registry_index::DependencyKind> for DependencyKind {
fn from(dk: cargo_registry_index::DependencyKind) -> Self {
match dk {
cargo_registry_index::DependencyKind::Normal => DependencyKind::Normal,
cargo_registry_index::DependencyKind::Build => DependencyKind::Build,
cargo_registry_index::DependencyKind::Dev => DependencyKind::Dev,
}
}
}

impl From<DependencyKind> for IndexDependencyKind {
fn from(dk: DependencyKind) -> Self {
match dk {
Expand Down
71 changes: 71 additions & 0 deletions src/models/krate.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::BTreeMap;

use chrono::NaiveDateTime;
use diesel::associations::Identifiable;
use diesel::pg::Pg;
Expand Down Expand Up @@ -432,6 +434,75 @@ impl Crate {

Ok(rows.records_and_total())
}

/// Serialize the crate as an index metadata file
pub fn index_metadata(&self, conn: &mut PgConnection) -> QueryResult<String> {
let mut versions: Vec<Version> = self.all_versions().load(conn)?;
versions.sort_by_cached_key(|k| {
semver::Version::parse(&k.num)
.expect("version was valid semver when inserted into the database")
});

let mut body = Vec::new();
for version in versions {
let mut deps: Vec<cargo_registry_index::Dependency> = version
.dependencies(conn)?
.into_iter()
.map(|(dep, name)| {
// If this dependency has an explicit name in `Cargo.toml` that
// means that the `name` we have listed is actually the package name
// that we're depending on. The `name` listed in the index is the
// Cargo.toml-written-name which is what cargo uses for
// `--extern foo=...`
let (name, package) = match dep.explicit_name {
Some(explicit_name) => (explicit_name, Some(name)),
None => (name, None),
};
cargo_registry_index::Dependency {
name,
req: dep.req,
features: dep.features,
optional: dep.optional,
default_features: dep.default_features,
kind: Some(dep.kind.into()),
package,
target: dep.target,
}
})
.collect();
deps.sort();

let features: BTreeMap<String, Vec<String>> =
serde_json::from_value(version.features).unwrap_or_default();
let (features, features2): (BTreeMap<_, _>, BTreeMap<_, _>) =
features.into_iter().partition(|(_k, vals)| {
!vals
.iter()
.any(|v| v.starts_with("dep:") || v.contains("?/"))
});
let (features, features2, v) = if features2.is_empty() {
(features, None, None)
} else {
(features, Some(features2), Some(2))
};

let krate = cargo_registry_index::Crate {
name: self.name.clone(),
vers: version.num.to_string(),
cksum: version.checksum,
yanked: Some(version.yanked),
deps,
features,
links: version.links,
features2,
v,
};
serde_json::to_writer(&mut body, &krate).unwrap();
body.push(b'\n');
}
let body = String::from_utf8(body).unwrap();
Ok(body)
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 33a48ab

Please sign in to comment.