Skip to content

Commit

Permalink
Create db-dump.zip file too
Browse files Browse the repository at this point in the history
Zip files use compression per file, which allows users to only extract the data that they need, instead of needlessly extracting the full tarball to read the small table that they are interested in.
  • Loading branch information
Turbo87 committed May 31, 2024
1 parent 1967652 commit 181479c
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 38 deletions.
75 changes: 75 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ tracing-subscriber = { version = "=0.3.18", features = ["env-filter"] }
typomania = { version = "=0.1.2", default-features = false }
url = "=2.5.0"
unicode-xid = "=0.2.4"
zip = { version = "=2.1.1", default-features = false, features = ["deflate"] }

[dev-dependencies]
bytes = "=1.6.0"
Expand Down
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ allow = [
#"Apache-2.0 WITH LLVM-exception",
"BSD-2-Clause",
"BSD-3-Clause",
"BSL-1.0",
"ISC",
"MIT",
"MPL-2.0",
Expand Down
6 changes: 4 additions & 2 deletions src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ const PREFIX_CRATES: &str = "crates";
const PREFIX_READMES: &str = "readmes";
const DEFAULT_REGION: &str = "us-west-1";
const CONTENT_TYPE_CRATE: &str = "application/gzip";
const CONTENT_TYPE_DB_DUMP: &str = "application/gzip";
const CONTENT_TYPE_GZIP: &str = "application/gzip";
const CONTENT_TYPE_ZIP: &str = "application/zip";
const CONTENT_TYPE_INDEX: &str = "text/plain";
const CONTENT_TYPE_README: &str = "text/html";
const CACHE_CONTROL_IMMUTABLE: &str = "public,max-age=31536000,immutable";
Expand Down Expand Up @@ -126,7 +127,8 @@ impl Storage {
// The `BufWriter::new()` API currently does not allow
// specifying any file attributes, so we need to set the
// content type here instead for the database dump upload.
.with_content_type_for_suffix("gz", CONTENT_TYPE_DB_DUMP);
.with_content_type_for_suffix("gz", CONTENT_TYPE_GZIP)
.with_content_type_for_suffix("zip", CONTENT_TYPE_ZIP);

let store = build_s3(default, options);

Expand Down
37 changes: 35 additions & 2 deletions src/tests/dump_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use insta::{assert_debug_snapshot, assert_snapshot};
use once_cell::sync::Lazy;
use regex::Regex;
use secrecy::ExposeSecret;
use std::io::Read;
use std::io::{Cursor, Read};
use tar::Archive;

static PATH_DATE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{4}-\d{2}-\d{2}-\d{6}").unwrap());
Expand All @@ -28,8 +28,9 @@ async fn test_dump_db_job() {
app.run_pending_background_jobs().await;

let stored_files = app.stored_files().await;
assert_eq!(stored_files.len(), 1);
assert_eq!(stored_files.len(), 2);
assert_eq!(stored_files[0], "db-dump.tar.gz");
assert_eq!(stored_files[1], "db-dump.zip");

let path = object_store::path::Path::parse("db-dump.tar.gz").unwrap();
let result = app.as_inner().storage.as_inner().get(&path).await.unwrap();
Expand Down Expand Up @@ -65,6 +66,38 @@ async fn test_dump_db_job() {
"YYYY-MM-DD-HHMMSS/data/version_downloads.csv",
]
"###);

let path = object_store::path::Path::parse("db-dump.zip").unwrap();
let result = app.as_inner().storage.as_inner().get(&path).await.unwrap();
let bytes = result.bytes().await.unwrap();

let archive = zip::ZipArchive::new(Cursor::new(bytes)).unwrap();
let zip_paths = archive.file_names().collect::<Vec<_>>();
assert_debug_snapshot!(zip_paths, @r###"
[
"README.md",
"export.sql",
"import.sql",
"metadata.json",
"schema.sql",
"data/",
"data/categories.csv",
"data/crate_downloads.csv",
"data/crates.csv",
"data/keywords.csv",
"data/metadata.csv",
"data/reserved_crate_names.csv",
"data/teams.csv",
"data/users.csv",
"data/crates_categories.csv",
"data/crates_keywords.csv",
"data/crate_owners.csv",
"data/versions.csv",
"data/default_versions.csv",
"data/dependencies.csv",
"data/version_downloads.csv",
]
"###);
}

fn tar_paths<R: Read>(archive: &mut Archive<R>) -> Vec<String> {
Expand Down
Loading

0 comments on commit 181479c

Please sign in to comment.