diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74ad24b05..5642e4e93 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -106,7 +106,7 @@ jobs: - name: run slow tests env: - DOCSRS_INCLUDE_DEFAULT_TARGETS: false + DOCSRS_INCLUDE_DEFAULT_TARGETS: true run: | cargo test --locked -- --ignored --test-threads=1 diff --git a/.sqlx/query-3d98a27dab09b6f74a097b8cea8077063f77fb1758c9c4a075d2dfa4e4a80822.json b/.sqlx/query-3d98a27dab09b6f74a097b8cea8077063f77fb1758c9c4a075d2dfa4e4a80822.json new file mode 100644 index 000000000..0bd35131c --- /dev/null +++ b/.sqlx/query-3d98a27dab09b6f74a097b8cea8077063f77fb1758c9c4a075d2dfa4e4a80822.json @@ -0,0 +1,22 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT path\n FROM files\n WHERE path LIKE $1\n ORDER BY path;", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "path", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false + ] + }, + "hash": "3d98a27dab09b6f74a097b8cea8077063f77fb1758c9c4a075d2dfa4e4a80822" +} diff --git a/Cargo.lock b/Cargo.lock index ad3341c3e..9229a2001 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -169,6 +169,28 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-stream" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.51", +] + [[package]] name = "async-trait" version = "0.1.77" @@ -1545,6 +1567,7 @@ name = "docs-rs" version = "0.6.0" dependencies = [ "anyhow", + "async-stream", "aws-config", "aws-sdk-cloudfront", "aws-sdk-s3", diff --git a/Cargo.toml b/Cargo.toml index 4b66cd0f1..72db23c1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,6 +74,7 @@ hex = "0.4.3" # Async tokio = { version = "1.0", features = ["rt-multi-thread", "signal", "macros"] } futures-util = "0.3.5" +async-stream = "0.3.5" aws-config = "1.0.0" aws-sdk-s3 = "1.3.0" aws-sdk-cloudfront = "1.3.0" diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 6f58d695c..2fbe248e4 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -478,6 +478,7 @@ impl RustwideBuilder { } let mut algs = HashSet::new(); + let mut target_build_logs = HashMap::new(); if has_docs { debug!("adding documentation for the default target to the database"); self.copy_docs( @@ -493,7 +494,7 @@ impl RustwideBuilder { // Limit the number of targets so that no one can try to build all 200000 possible targets for target in other_targets.into_iter().take(limits.targets()) { debug!("building package {} {} for {}", name, version, target); - self.build_target( + let target_res = self.build_target( target, build, &limits, @@ -501,6 +502,7 @@ impl RustwideBuilder { &mut successful_targets, &metadata, )?; + target_build_logs.insert(target, target_res.build_log); } let (_, new_alg) = self.runtime.block_on(add_path_into_remote_archive( &self.async_storage, @@ -588,6 +590,10 @@ impl RustwideBuilder { ))?; let build_log_path = format!("build-logs/{build_id}/{default_target}.txt"); self.storage.store_one(build_log_path, res.build_log)?; + for (target, log) in target_build_logs { + let build_log_path = format!("build-logs/{build_id}/{target}.txt"); + self.storage.store_one(build_log_path, log)?; + } // Some crates.io crate data is mutable, so we proactively update it during a release if !is_local { @@ -640,7 +646,7 @@ impl RustwideBuilder { local_storage: &Path, successful_targets: &mut Vec, metadata: &Metadata, - ) -> Result<()> { + ) -> Result { let target_res = self.execute_build(target, false, build, limits, metadata, false)?; if target_res.result.successful { // Cargo is not giving any error and not generating documentation of some crates @@ -652,7 +658,7 @@ impl RustwideBuilder { successful_targets.push(target.to_string()); } } - Ok(()) + Ok(target_res) } fn get_coverage( @@ -981,17 +987,19 @@ mod tests { // check release record in the db (default and other targets) let mut conn = env.db().conn(); - let rows = conn - .query( + let row = conn + .query_one( "SELECT r.rustdoc_status, r.default_target, r.doc_targets, r.archive_storage, - cov.total_items + cov.total_items, + b.id as build_id FROM crates as c INNER JOIN releases AS r ON c.id = r.crate_id + INNER JOIN builds as b ON r.id = b.rid LEFT OUTER JOIN doc_coverage AS cov ON r.id = cov.release_id WHERE c.name = $1 AND @@ -999,7 +1007,6 @@ mod tests { &[&crate_, &version], ) .unwrap(); - let row = rows.first().unwrap(); assert!(row.get::<_, bool>("rustdoc_status")); assert_eq!(row.get::<_, String>("default_target"), default_target); @@ -1086,6 +1093,13 @@ mod tests { assert!(target_docs_present); assert_success(&target_url, web)?; + + assert!(storage + .exists(&format!( + "build-logs/{}/{target}.txt", + row.get::<_, i32>("build_id") + )) + .unwrap()); } } diff --git a/src/storage/database.rs b/src/storage/database.rs index 0899f714d..4249a543d 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -1,10 +1,8 @@ +use super::{Blob, FileRange}; +use crate::{db::Pool, error::Result, InstanceMetrics}; use chrono::{DateTime, Utc}; +use futures_util::stream::{Stream, TryStreamExt}; use sqlx::Acquire; - -use super::{Blob, FileRange}; -use crate::db::Pool; -use crate::error::Result; -use crate::InstanceMetrics; use std::{convert::TryFrom, sync::Arc}; pub(crate) struct DatabaseBackend { @@ -167,6 +165,22 @@ impl DatabaseBackend { Ok(()) } + pub(super) async fn list_prefix<'a>( + &'a self, + prefix: &'a str, + ) -> impl Stream> + 'a { + sqlx::query!( + "SELECT path + FROM files + WHERE path LIKE $1 + ORDER BY path;", + format!("{}%", prefix.replace('%', "\\%")) + ) + .fetch(&self.pool) + .map_err(Into::into) + .map_ok(|row| row.path) + } + pub(crate) async fn delete_prefix(&self, prefix: &str) -> Result<()> { sqlx::query!( "DELETE FROM files WHERE path LIKE $1;", diff --git a/src/storage/mod.rs b/src/storage/mod.rs index ae9d369ea..48f6cbf31 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -6,18 +6,20 @@ mod s3; pub use self::compression::{compress, decompress, CompressionAlgorithm, CompressionAlgorithms}; use self::database::DatabaseBackend; use self::s3::S3Backend; -use crate::error::Result; -use crate::web::metrics::RenderingTimesRecorder; -use crate::{db::Pool, utils::spawn_blocking, Config, InstanceMetrics}; +use crate::{ + db::Pool, error::Result, utils::spawn_blocking, web::metrics::RenderingTimesRecorder, Config, + InstanceMetrics, +}; use anyhow::{anyhow, ensure}; use chrono::{DateTime, Utc}; use fn_error_context::context; +use futures_util::stream::BoxStream; use path_slash::PathExt; -use std::io::BufReader; use std::{ collections::{HashMap, HashSet}, ffi::OsStr, - fmt, fs, io, + fmt, fs, + io::{self, BufReader}, ops::RangeInclusive, path::{Path, PathBuf}, sync::Arc, @@ -558,6 +560,16 @@ impl AsyncStorage { } } + pub(super) async fn list_prefix<'a>( + &'a self, + prefix: &'a str, + ) -> BoxStream<'a, Result> { + match &self.backend { + StorageBackend::Database(db) => Box::pin(db.list_prefix(prefix).await), + StorageBackend::S3(s3) => Box::pin(s3.list_prefix(prefix).await), + } + } + pub(crate) async fn delete_prefix(&self, prefix: &str) -> Result<()> { match &self.backend { StorageBackend::Database(db) => db.delete_prefix(prefix).await, @@ -752,6 +764,22 @@ impl Storage { self.runtime.block_on(self.inner.store_one(path, content)) } + /// sync wrapper for the list_prefix function + /// purely for testing purposes since it collects all files into a Vec. + #[cfg(test)] + pub(crate) fn list_prefix(&self, prefix: &str) -> impl Iterator> { + use futures_util::stream::StreamExt; + self.runtime + .block_on(async { + self.inner + .list_prefix(prefix) + .await + .collect::>() + .await + }) + .into_iter() + } + pub(crate) fn delete_prefix(&self, prefix: &str) -> Result<()> { self.runtime.block_on(self.inner.delete_prefix(prefix)) } @@ -964,6 +992,37 @@ mod backend_tests { Ok(()) } + fn test_list_prefix(storage: &Storage) -> Result<()> { + static FILENAMES: &[&str] = &["baz.txt", "some/bar.txt"]; + + storage.store_blobs( + FILENAMES + .iter() + .map(|&filename| Blob { + path: filename.into(), + mime: "text/plain".into(), + date_updated: Utc::now(), + compression: None, + content: b"test content\n".to_vec(), + }) + .collect(), + )?; + + assert_eq!( + storage.list_prefix("").collect::>>()?, + FILENAMES + ); + + assert_eq!( + storage + .list_prefix("some/") + .collect::>>()?, + &["some/bar.txt"] + ); + + Ok(()) + } + fn test_too_long_filename(storage: &Storage) -> Result<()> { // minio returns ErrKeyTooLongError when the key is over 1024 bytes long. // When testing, minio just gave me `XMinioInvalidObjectName`, so I'll check that too. @@ -1321,6 +1380,7 @@ mod backend_tests { test_get_range, test_get_too_big, test_too_long_filename, + test_list_prefix, test_delete_prefix, test_delete_prefix_without_matches, test_delete_percent, diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 0d3d21eb5..16ed8be81 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -1,6 +1,7 @@ use super::{Blob, FileRange}; use crate::{Config, InstanceMetrics}; use anyhow::{Context as _, Error}; +use async_stream::try_stream; use aws_config::BehaviorVersion; use aws_sdk_s3::{ config::{retry::RetryConfig, Region}, @@ -12,7 +13,8 @@ use aws_smithy_types_convert::date_time::DateTimeExt; use chrono::Utc; use futures_util::{ future::TryFutureExt, - stream::{FuturesUnordered, StreamExt}, + pin_mut, + stream::{FuturesUnordered, Stream, StreamExt}, }; use std::{io::Write, sync::Arc}; use tracing::{error, warn}; @@ -260,55 +262,73 @@ impl S3Backend { panic!("failed to upload 3 times, exiting"); } - pub(super) async fn delete_prefix(&self, prefix: &str) -> Result<(), Error> { - let mut continuation_token = None; - loop { - let list = self - .client - .list_objects_v2() - .bucket(&self.bucket) - .prefix(prefix) - .set_continuation_token(continuation_token) - .send() - .await?; - - if list.key_count().unwrap_or(0) > 0 { - let to_delete = Delete::builder() - .set_objects(Some( - list.contents - .expect("didn't get content even though key_count was > 0") - .into_iter() - .filter_map(|obj| { - obj.key() - .and_then(|k| ObjectIdentifier::builder().key(k).build().ok()) - }) - .collect(), - )) - .build() - .context("could not build delete request")?; - - let resp = self + pub(super) async fn list_prefix<'a>( + &'a self, + prefix: &'a str, + ) -> impl Stream> + 'a { + try_stream! { + let mut continuation_token = None; + loop { + let list = self .client - .delete_objects() + .list_objects_v2() .bucket(&self.bucket) - .delete(to_delete) + .prefix(prefix) + .set_continuation_token(continuation_token) .send() .await?; - if let Some(errs) = resp.errors { - for err in &errs { - error!("error deleting file from s3: {:?}", err); + if let Some(contents) = list.contents { + for obj in contents { + if let Some(key) = obj.key() { + yield key.to_owned(); + } } + } - anyhow::bail!("deleting from s3 failed"); + continuation_token = list.next_continuation_token; + if continuation_token.is_none() { + break; } } + } + } - continuation_token = list.next_continuation_token; - if continuation_token.is_none() { - return Ok(()); + pub(super) async fn delete_prefix(&self, prefix: &str) -> Result<(), Error> { + let stream = self.list_prefix(prefix).await; + pin_mut!(stream); + let mut chunks = stream.chunks(900); // 1000 is the limit for the delete_objects API + + while let Some(batch) = chunks.next().await { + let batch: Vec<_> = batch.into_iter().collect::>()?; + + let to_delete = Delete::builder() + .set_objects(Some( + batch + .into_iter() + .filter_map(|k| ObjectIdentifier::builder().key(k).build().ok()) + .collect(), + )) + .build() + .context("could not build delete request")?; + + let resp = self + .client + .delete_objects() + .bucket(&self.bucket) + .delete(to_delete) + .send() + .await?; + + if let Some(errs) = resp.errors { + for err in &errs { + error!("error deleting file from s3: {:?}", err); + } + + anyhow::bail!("deleting from s3 failed"); } } + Ok(()) } #[cfg(test)] diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 4729d84bc..2b465f967 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -7,7 +7,7 @@ use crate::storage::{ rustdoc_archive_path, source_archive_path, AsyncStorage, CompressionAlgorithms, }; use crate::utils::{Dependency, MetadataPackage, Target}; -use anyhow::Context; +use anyhow::{bail, Context}; use base64::{engine::general_purpose::STANDARD as b64, Engine}; use chrono::{DateTime, Utc}; use serde_json::Value; @@ -43,6 +43,7 @@ pub(crate) struct FakeRelease<'a> { pub(crate) struct FakeBuild { s3_build_log: Option, + other_build_logs: HashMap, db_build_log: Option, result: BuildResult, } @@ -561,6 +562,16 @@ impl FakeBuild { } } + pub(crate) fn build_log_for_other_target( + mut self, + target: impl Into, + build_log: impl Into, + ) -> Self { + self.other_build_logs + .insert(target.into(), build_log.into()); + self + } + pub(crate) fn db_build_log(self, build_log: impl Into) -> Self { Self { db_build_log: Some(build_log.into()), @@ -605,11 +616,21 @@ impl FakeBuild { .await?; } + let prefix = format!("build-logs/{build_id}/"); + if let Some(s3_build_log) = self.s3_build_log.as_deref() { - let path = format!("build-logs/{build_id}/{default_target}.txt"); + let path = format!("{prefix}{default_target}.txt"); storage.store_one(path, s3_build_log).await?; } + for (target, log) in &self.other_build_logs { + if target == default_target { + bail!("build log for default target has to be set via `s3_build_log`"); + } + let path = format!("{prefix}{target}.txt"); + storage.store_one(path, log.as_str()).await?; + } + Ok(()) } } @@ -619,6 +640,7 @@ impl Default for FakeBuild { Self { s3_build_log: Some("It works!".into()), db_build_log: None, + other_build_logs: HashMap::new(), result: BuildResult { rustc_version: "rustc 2.0.0-nightly (000000000 1970-01-01)".into(), docsrs_version: "docs.rs 1.0.0 (000000000 1970-01-01)".into(), diff --git a/src/web/build_details.rs b/src/web/build_details.rs index c670a04f4..9c2060e1e 100644 --- a/src/web/build_details.rs +++ b/src/web/build_details.rs @@ -11,8 +11,9 @@ use crate::{ use anyhow::Context as _; use axum::{extract::Extension, response::IntoResponse}; use chrono::{DateTime, Utc}; +use futures_util::TryStreamExt; use semver::Version; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use std::sync::Arc; #[derive(Debug, Clone, PartialEq, Eq, Serialize)] @@ -30,19 +31,29 @@ struct BuildDetailsPage { metadata: MetaData, build_details: BuildDetails, use_direct_platform_links: bool, + all_log_filenames: Vec, + current_filename: Option, } impl_axum_webpage! { BuildDetailsPage = "crate/build_details.html", } +#[derive(Clone, Deserialize, Debug)] +pub(crate) struct BuildDetailsParams { + pub(crate) name: String, + pub(crate) version: Version, + pub(crate) id: String, + pub(crate) filename: Option, +} + pub(crate) async fn build_details_handler( - Path((name, version, id)): Path<(String, Version, String)>, + Path(params): Path, mut conn: DbConnection, Extension(config): Extension>, Extension(storage): Extension>, ) -> AxumResult { - let id: i32 = id.parse().map_err(|_| AxumNope::BuildNotFound)?; + let id: i32 = params.id.parse().map_err(|_| AxumNope::BuildNotFound)?; let row = sqlx::query!( "SELECT @@ -57,23 +68,42 @@ pub(crate) async fn build_details_handler( INNER JOIN crates ON releases.crate_id = crates.id WHERE builds.id = $1 AND crates.name = $2 AND releases.version = $3", id, - name, - version.to_string(), + params.name, + params.version.to_string(), ) .fetch_optional(&mut *conn) .await? .ok_or(AxumNope::BuildNotFound)?; - let output = if let Some(output) = row.output { - output + let (output, all_log_filenames, current_filename) = if let Some(output) = row.output { + (output, Vec::new(), None) } else { - let path = format!("build-logs/{id}/{}.txt", row.default_target); + let prefix = format!("build-logs/{}/", id); + + let current_filename = params + .filename + .unwrap_or_else(|| format!("{}.txt", row.default_target)); + + let path = format!("{prefix}{current_filename}"); let file = File::from_path(&storage, &path, &config).await?; - String::from_utf8(file.0.content).context("non utf8")? + ( + String::from_utf8(file.0.content).context("non utf8")?, + storage + .list_prefix(&prefix) // the result from S3 is ordered by key + .await + .map_ok(|path| { + path.strip_prefix(&prefix) + .expect("since we query for the prefix, it has to be always there") + .to_owned() + }) + .try_collect() + .await?, + Some(current_filename), + ) }; Ok(BuildDetailsPage { - metadata: MetaData::from_crate(&mut conn, &name, &version, None).await?, + metadata: MetaData::from_crate(&mut conn, ¶ms.name, ¶ms.version, None).await?, build_details: BuildDetails { id, rustc_version: row.rustc_version, @@ -83,6 +113,8 @@ pub(crate) async fn build_details_handler( output, }, use_direct_platform_links: true, + all_log_filenames, + current_filename, } .into_response()) } @@ -93,6 +125,19 @@ mod tests { use kuchikiki::traits::TendrilSink; use test_case::test_case; + fn get_all_log_links(page: &kuchikiki::NodeRef) -> Vec<(String, String)> { + page.select("ul > li a.release") + .unwrap() + .map(|el| { + let attributes = el.attributes.borrow(); + ( + el.text_contents().trim().to_owned(), + attributes.get("href").unwrap().to_string(), + ) + }) + .collect() + } + #[test] fn db_build_logs() { wrapper(|env| { @@ -116,6 +161,7 @@ mod tests { let url = attrs.get("href").unwrap(); let page = kuchikiki::parse_html().one(env.frontend().get(url).send()?.text()?); + assert!(get_all_log_links(&page).is_empty()); let log = page.select("pre").unwrap().next().unwrap().text_contents(); @@ -143,15 +189,99 @@ mod tests { let node = page.select("ul > li a.release").unwrap().next().unwrap(); let attrs = node.attributes.borrow(); - let url = attrs.get("href").unwrap(); + let build_url = attrs.get("href").unwrap(); - let page = kuchikiki::parse_html() - .one(env.frontend().get(url).send()?.error_for_status()?.text()?); + let page = kuchikiki::parse_html().one(env.frontend().get(build_url).send()?.text()?); let log = page.select("pre").unwrap().next().unwrap().text_contents(); assert!(log.contains("A build log")); + let all_log_links = get_all_log_links(&page); + assert_eq!( + all_log_links, + vec![( + "x86_64-unknown-linux-gnu.txt".into(), + format!("{build_url}/x86_64-unknown-linux-gnu.txt") + )] + ); + + // now get the log with the specific filename in the URL + let log = kuchikiki::parse_html() + .one(env.frontend().get(&all_log_links[0].1).send()?.text()?) + .select("pre") + .unwrap() + .next() + .unwrap() + .text_contents(); + + assert!(log.contains("A build log")); + + Ok(()) + }); + } + + #[test] + fn s3_build_logs_multiple_targets() { + wrapper(|env| { + env.fake_release() + .name("foo") + .version("0.1.0") + .builds(vec![FakeBuild::default() + .s3_build_log("A build log") + .build_log_for_other_target( + "other_target", + "other target build log", + )]) + .create()?; + + let page = kuchikiki::parse_html().one( + env.frontend() + .get("/crate/foo/0.1.0/builds") + .send()? + .text()?, + ); + + let node = page.select("ul > li a.release").unwrap().next().unwrap(); + let attrs = node.attributes.borrow(); + let build_url = attrs.get("href").unwrap(); + + let page = kuchikiki::parse_html().one(env.frontend().get(build_url).send()?.text()?); + + let log = page.select("pre").unwrap().next().unwrap().text_contents(); + + assert!(log.contains("A build log")); + + let all_log_links = get_all_log_links(&page); + assert_eq!( + all_log_links, + vec![ + ( + "other_target.txt".into(), + format!("{build_url}/other_target.txt") + ), + ( + "x86_64-unknown-linux-gnu.txt".into(), + format!("{build_url}/x86_64-unknown-linux-gnu.txt"), + ) + ] + ); + + for (url, expected_content) in &[ + (&all_log_links[0].1, "other target build log"), + (&all_log_links[1].1, "A build log"), + ] { + let other_log = kuchikiki::parse_html() + .one(env.frontend().get(url).send()?.text()?) + .select("pre") + .unwrap() + .next() + .unwrap() + .text_contents(); + + assert!(other_log.contains(expected_content)); + } + Ok(()) }); } diff --git a/src/web/routes.rs b/src/web/routes.rs index f0630bf33..392515100 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -220,6 +220,10 @@ pub(super) fn build_axum_routes() -> AxumRouter { "/crate/:name/:version/builds/:id", get_internal(super::build_details::build_details_handler), ) + .route_with_tsr( + "/crate/:name/:version/builds/:id/:filename", + get_internal(super::build_details::build_details_handler), + ) .route_with_tsr( "/crate/:name/:version/features", get_internal(super::features::build_features_handler), diff --git a/templates/crate/build_details.html b/templates/crate/build_details.html index e5a634b91..80e23dfbd 100644 --- a/templates/crate/build_details.html +++ b/templates/crate/build_details.html @@ -30,6 +30,25 @@ Build #{{ build_details.id }} {{ build_details.build_time | date(format="%+") }} + + {%- filter dedent -%}
                     # rustc version