Skip to content

Commit

Permalink
Merge pull request #6 from spacemeshos/support-zstd
Browse files Browse the repository at this point in the history
Support zstd archives (and keep support of zip)
  • Loading branch information
brusherru authored Mar 1, 2024
2 parents 6896ca9 + b718b6b commit c73b887
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 25 deletions.
25 changes: 22 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "quicksync"
version = "0.1.7"
version = "0.1.8"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -16,3 +16,4 @@ reqwest = { version = "0.11.23", features = ["json", "stream", "blocking"] }
rusqlite = { version = "0.30.0", features = ["bundled"] }
url = "2.5.0"
zip = "0.6.6"
zstd = "0.13.0"
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ cargo run -- help
- `1` - failed to download archive within max retries (any reason)
- `2` - cannot unpack archive: not enough disk space
- `3` - cannot unpack archive: any other reason
- `4` - invalid checksum
- `4` - invalid checksum of db
- `5` - cannot verify checksum for some reason
- `6` - cannot create a backup file
- `6` - cannot create a backup file
- `7` - invalid checksum of archive
- `8` - cannot validate archive checksum
32 changes: 26 additions & 6 deletions src/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,30 @@ use url::Url;

use crate::utils::strip_trailing_newline;

fn replace_sql_zip_with_md5(url: &Url) -> Result<Url> {
fn get_link_to_db_md5(url: &Url) -> Result<Url> {
let url_str = url.as_str();
if url_str.ends_with(".sql.zip") {
let new_url_str = url_str.replace(".sql.zip", ".sql.md5");
Ok(Url::parse(&new_url_str)?)
} else if url_str.ends_with(".sql.zst") {
let new_url_str = url_str.replace(".sql.zst", ".sql.md5");
Ok(Url::parse(&new_url_str)?)
} else {
anyhow::bail!("URL does not end with .sql.zip")
}
}

pub fn download_checksum(url: &Url) -> Result<String> {
let md5_url = replace_sql_zip_with_md5(&url)?;
fn get_link_to_archive_md5(url: &Url) -> Result<Url> {
let url_str = url.as_str();
let mut md5_url = url_str.to_owned();
let md5_ext = ".md5";
md5_url.push_str(md5_ext);
Ok(Url::parse(&md5_url)?)
}

pub fn download_checksum(url: Url) -> Result<String> {
let client = Client::new();
let response: Response = client.get(md5_url).send()?;
let response: Response = client.get(url).send()?;

if response.status().is_success() {
let md5 = response.text()?;
Expand Down Expand Up @@ -56,12 +65,23 @@ pub fn calculate_checksum(file_path: &Path) -> Result<String> {
Ok(format!("{:x}", hash))
}

pub fn verify(redirect_file_path: &Path, unpacked_file_path: &Path) -> Result<bool> {
pub fn verify_archive(redirect_file_path: &Path, archive_path: &Path) -> Result<bool> {
let archive_url_str = String::from_utf8(std::fs::read(redirect_file_path)?)?;
let archive_url = Url::parse(&archive_url_str)?;
let md5_url = get_link_to_archive_md5(&archive_url)?;

let md5_expected = download_checksum(md5_url)?;
let md5_actual = calculate_checksum(archive_path)?;

Ok(md5_actual == md5_expected)
}

pub fn verify_db(redirect_file_path: &Path, unpacked_file_path: &Path) -> Result<bool> {
let archive_url_str = String::from_utf8(std::fs::read(redirect_file_path)?)?;
let archive_url = Url::parse(&archive_url_str)?;
let md5_url = get_link_to_db_md5(&archive_url)?;

let md5_expected = download_checksum(&archive_url)?;
let md5_expected = download_checksum(md5_url)?;
let md5_actual = calculate_checksum(unpacked_file_path)?;

Ok(md5_actual == md5_expected)
Expand Down
53 changes: 44 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ mod checksum;
mod download;
mod go_spacemesh;
mod parsers;
mod reader_with_bytes;
mod reader_with_progress;
mod sql;
mod unpack;
mod utils;
mod zip;

use checksum::*;
use download::download_with_retries;
use go_spacemesh::get_version;
use parsers::*;
use sql::get_last_layer_from_db;
use unpack::{unpack_zip, unpack_zstd};
use utils::*;
use zip::unpack;

#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
Expand Down Expand Up @@ -159,13 +160,15 @@ fn main() -> anyhow::Result<()> {
let dir_path = node_data;
let temp_file_path = dir_path.join("state.download");
let redirect_file_path = dir_path.join("state.url");
let archive_file_path = dir_path.join("state.zip");
let archive_zip_file_path = dir_path.join("state.zip");
let archive_zstd_file_path = dir_path.join("state.zst");
let unpacked_file_path = dir_path.join("state_downloaded.sql");
let final_file_path = dir_path.join("state.sql");
let wal_file_path = dir_path.join("state.sql-wal");

// Download archive if needed
if !archive_file_path.exists() {
let archive_file_path = if !archive_zip_file_path.exists() && !archive_zstd_file_path.exists()
{
println!("Downloading the latest database...");
let url = if redirect_file_path.exists() {
std::fs::read_to_string(&redirect_file_path)?
Expand All @@ -174,7 +177,7 @@ fn main() -> anyhow::Result<()> {
let go_path_str = go_path
.to_str()
.expect("Cannot resolve path to go-spacemesh");
let path = format!("{}/state.zip", &get_version(go_path_str)?);
let path = format!("{}/state.zst", &get_version(go_path_str)?);
let url = build_url(&download_url, &path);
url.to_string()
};
Expand All @@ -189,9 +192,43 @@ fn main() -> anyhow::Result<()> {
process::exit(1);
}

// Rename `state.download` -> `state.zip`
let archive_file_path = if url.ends_with(".zip") {
archive_zip_file_path
} else {
archive_zstd_file_path
};

// Rename `state.download` -> `state.zst`
std::fs::rename(&temp_file_path, &archive_file_path)?;
println!("Archive downloaded!");
archive_file_path
} else if archive_zip_file_path.exists() {
archive_zip_file_path
} else {
archive_zstd_file_path
};

let archive_url = std::fs::read_to_string(&redirect_file_path)?;
let unpack = if archive_url.ends_with(".zip") {
unpack_zip
} else {
unpack_zstd
};

// Verify downloaded archive
match verify_archive(&redirect_file_path, &archive_file_path) {
Ok(true) => {
println!("Archive checksm validated");
}
Ok(false) => {
eprintln!("Archive checksum is invalid. Deleting archive");
std::fs::remove_file(&archive_file_path)?;
process::exit(7);
}
Err(e) => {
eprintln!("Cannot validate archive checksum: {}", e);
process::exit(8);
}
}

// Unzip
Expand All @@ -209,15 +246,13 @@ fn main() -> anyhow::Result<()> {
}
eprintln!("Cannot unpack archive: {}", e);
std::fs::remove_file(&unpacked_file_path)?;
std::fs::remove_file(&archive_file_path)?;
std::fs::remove_file(&redirect_file_path)?;
process::exit(3);
}
}

// Verify checksum
println!("Verifying MD5 checksum...");
match verify(&redirect_file_path, &unpacked_file_path) {
match verify_db(&redirect_file_path, &unpacked_file_path) {
Ok(true) => {
println!("Checksum is valid");
}
Expand Down
33 changes: 33 additions & 0 deletions src/reader_with_bytes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use std::io::{self, Read};

const MB: u64 = 1024 * 1024;

pub struct ReaderWithBytes<R: Read> {
reader: R,
bytes_read: u64,
last_reported: u64,
}

impl<R: Read> ReaderWithBytes<R> {
pub fn new(reader: R) -> Self {
ReaderWithBytes {
reader,
bytes_read: 0,
last_reported: 0,
}
}
}

impl<R: Read> Read for ReaderWithBytes<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let bytes_read = self.reader.read(buf)?;
self.bytes_read += bytes_read as u64;

if self.bytes_read / MB > self.last_reported / MB {
println!("Unpacking... {} MB extracted", self.bytes_read / MB);
self.last_reported = self.bytes_read;
}

Ok(bytes_read)
}
}
27 changes: 25 additions & 2 deletions src/zip.rs → src/unpack.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use anyhow::Result;
use std::fs::File;
use std::io::{BufReader, Error};
use std::io::{BufReader, BufWriter, Error};
use std::path::Path;
use zip::read::ZipFile;
use zip::ZipArchive;
use zstd::stream::read::Decoder;

use crate::reader_with_bytes::ReaderWithBytes;
use crate::reader_with_progress::ReaderWithProgress;

fn find_file_in_archive<'a>(
Expand All @@ -29,7 +31,28 @@ fn find_file_in_archive<'a>(
))
}

pub fn unpack(archive_path: &Path, output_path: &Path) -> Result<()> {
pub fn unpack_zstd(archive_path: &Path, output_path: &Path) -> Result<()> {
let file = File::open(archive_path)?;
let reader = BufReader::new(file);
let mut decoder = Decoder::new(reader)?;

decoder.window_log_max(31)?;
let outpath = Path::new(output_path);
if let Some(p) = outpath.parent() {
std::fs::create_dir_all(p)?;
}
let outfile = File::create(outpath)?;
let mut writer = BufWriter::new(outfile);

let mut reader = ReaderWithBytes::new(decoder);

std::io::copy(&mut reader, &mut writer)?;
println!("Unpacking complete!");

Ok(())
}

pub fn unpack_zip(archive_path: &Path, output_path: &Path) -> Result<()> {
let file = File::open(archive_path)?;
let mut zip = ZipArchive::new(file)?;

Expand Down
10 changes: 8 additions & 2 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn backup_file(original_path: &PathBuf) -> Result<PathBuf> {
}

fn extract_number_from_url(url: &Url) -> Result<u64> {
let re = Regex::new(r"/(\d+)\.sql\.zip$")?;
let re = Regex::new(r"/(\d+)\.sql\.(zip|zst)$")?;
let path = url.path();
let caps = re
.captures(path)
Expand Down Expand Up @@ -95,11 +95,17 @@ mod tests {
use url::Url;

#[test]
fn test_extract_number_valid() {
fn test_extract_number_zip_valid() {
let url = Url::parse("https://quicksync-downloads.spacemesh.network/10/61579.sql.zip").unwrap();
assert_eq!(extract_number_from_url(&url).unwrap(), 61579);
}

#[test]
fn test_extract_number_zstd_valid() {
let url = Url::parse("https://quicksync-downloads.spacemesh.network/10/61579.sql.zst").unwrap();
assert_eq!(extract_number_from_url(&url).unwrap(), 61579);
}

#[test]
fn test_extract_number_invalid() {
let url = Url::parse("https://quicksync.spacemesh.network/state.zip").unwrap();
Expand Down

0 comments on commit c73b887

Please sign in to comment.