diff --git a/Cargo.toml b/Cargo.toml index 0f39a73..ea83481 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,10 +26,15 @@ include = [ fst = "0.4.7" memmap2 = "0.9.9" once_cell = "1.21" +reqwest = { version = "0.11", features = ["blocking"], optional = true } [build-dependencies] reqwest = { version = "0.11", features = ["blocking"] } +[features] +default = [] +fetch-latest = ["reqwest"] + [[bin]] name = "fst_builder" path = "fst_builder/main.rs" diff --git a/build.rs b/build.rs index b39667c..f5206a5 100644 --- a/build.rs +++ b/build.rs @@ -21,14 +21,22 @@ fn main() { if !fst_path.exists() { println!("cargo::warning=Downloading PURL v{} FST map.", version); - download_purl_fst(&fst_path, version); + if !download_purl_fst(&fst_path, version.clone()) { + // Fallback to local purls.fst if download fails + if let Ok(content) = std::fs::read("purls.fst") { + let _ = write(&fst_path, &content); + println!("cargo::warning=Using local purls.fst file as fallback"); + } else { + println!("cargo::error=Failed to download and no local purls.fst found"); + } + } } println!("cargo::rerun-if-changed=build.rs"); println!("cargo::rerun-if-changed=Cargo.toml"); } -fn download_purl_fst(path: &Path, version: String) { +fn download_purl_fst(path: &Path, version: String) -> bool { let url = &format!( "https://raw.githubusercontent.com/aboutcode-org/purl-validator.rs/refs/tags/v{}/purls.fst", version @@ -39,15 +47,27 @@ fn download_purl_fst(path: &Path, version: String) { let status = response.status(); if status.is_success() { - let content = response.bytes().expect("Failed to read response body"); - write(path, &content).expect("Failed to write"); + match response.bytes() { + Ok(content) => { + let _ = write(path, &content); + true + } + Err(e) => { + println!("cargo::warning=Failed to read response body: {}", e); + false + } + } } else { println!( - "cargo::error=Failed to fetch purls.fst: {}", + "cargo::warning=Failed to fetch purls.fst: {}", response.status() ); + false } } - Err(e) => println!("cargo::error=Failed to request: {}", e), + Err(e) => { + println!("cargo::warning=Failed to request purls.fst: {}", e); + false + } } } diff --git a/src/lib.rs b/src/lib.rs index 20ae634..89ae668 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,13 +32,92 @@ use fst::Set; use once_cell::sync::Lazy; use std::env; +use std::path::PathBuf; static FST_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/purls.fst")); -static VALIDATOR: Lazy> = - Lazy::new(|| Set::new(FST_DATA).expect("Failed to load FST from embedded bytes")); +fn load_purl_fst_data() -> &'static [u8] { + FST_DATA +} + +fn should_fetch_latest() -> bool { + env::var("PURL_VALIDATOR_FETCH_LATEST") + .map(|v| v.to_lowercase() == "true") + .unwrap_or(false) +} + +#[allow(dead_code)] +fn cache_path() -> PathBuf { + env::temp_dir().join("purl-validator-cache.fst") +} + +#[cfg(feature = "fetch-latest")] +fn load_cached_fst() -> Option> { + let path = cache_path(); + std::fs::read(path).ok() +} + +#[cfg(not(feature = "fetch-latest"))] +fn load_cached_fst() -> Option> { + None +} -fn strip_and_check_purl(packageurl: &str, fst_map: &Set<&[u8]>) -> bool { +#[cfg(feature = "fetch-latest")] +fn fetch_and_cache_fst() -> Option> { + let version = env!("CARGO_PKG_VERSION"); + let url = format!( + "https://raw.githubusercontent.com/aboutcode-org/purl-validator.rs/refs/tags/v{}/purls.fst", + version + ); + + match reqwest::blocking::get(&url) { + Ok(response) => { + if response.status().is_success() { + match response.bytes() { + Ok(bytes) => { + let data = bytes.to_vec(); + if let Err(_) = std::fs::write(cache_path(), &data) { + // Cache write failed, but we still have the data in memory + } + return Some(data); + } + Err(_) => { + return None; + } + } + } + None + } + Err(_) => None, + } +} + +#[cfg(not(feature = "fetch-latest"))] +fn fetch_and_cache_fst() -> Option> { + None +} + +static VALIDATOR: Lazy>> = Lazy::new(|| { + let fst_data = if should_fetch_latest() { + // Try to load from cache first + if let Some(cached_data) = load_cached_fst() { + cached_data + } else if let Some(fetched_data) = fetch_and_cache_fst() { + // Fetch and cache succeeded + fetched_data + } else { + // Fallback to embedded data + load_purl_fst_data().to_vec() + } + } else { + // Use embedded data by default + load_purl_fst_data().to_vec() + }; + + Set::new(fst_data).expect("Failed to load FST") +}); + +fn strip_and_check_purl(packageurl: &str, fst_map: &Set>) -> bool { let trimmed_packageurl = packageurl.trim_end_matches("/"); fst_map.contains(trimmed_packageurl) } @@ -50,6 +129,10 @@ fn strip_and_check_purl(packageurl: &str, fst_map: &Set<&[u8]>) -> bool { /// /// Use pre-built FST (Finite State Transducer) to perform lookups and confirm whether /// the **base PURL** exists. +/// +/// By default, uses the embedded (offline) PURL database. To enable runtime fetching of the +/// latest PURL data, set the `PURL_VALIDATOR_FETCH_LATEST` environment variable to `true` +/// (only works if the `fetch-latest` feature is enabled). pub fn validate(packageurl: &str) -> bool { strip_and_check_purl(packageurl, &VALIDATOR) } diff --git a/src/validate_tests.rs b/src/validate_tests.rs index 141658b..0b3d851 100644 --- a/src/validate_tests.rs +++ b/src/validate_tests.rs @@ -18,8 +18,7 @@ use std::path::Path; fn test_validate_with_custom_file() { let test_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/data/test_purls.fst"); let data: Vec = fs::read(test_path).unwrap(); - let data_slice: &[u8] = &data; - let validator = Set::new(data_slice).unwrap(); + let validator = Set::new(data).unwrap(); assert!(strip_and_check_purl( "pkg:nuget/FluentUtils.EnumExtensions", &validator @@ -31,8 +30,7 @@ fn test_validate_with_custom_file() { fn test_validate_with_packageurl_trailing_slash() { let test_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/data/test_purls.fst"); let data: Vec = fs::read(test_path).unwrap(); - let data_slice: &[u8] = &data; - let validator = Set::new(data_slice).unwrap(); + let validator = Set::new(data).unwrap(); assert!(validator.contains("pkg:nuget/FluentUtils.EnumExtensions")); assert!(strip_and_check_purl( @@ -40,3 +38,57 @@ fn test_validate_with_packageurl_trailing_slash() { &validator )); } + +#[test] +fn test_default_behavior_without_fetch_env() { + // When PURL_VALIDATOR_FETCH_LATEST is not set, should use embedded data + // We can't actually unset env vars in tests without unsafe, so we just verify + // that validation works by default + let result = validate("pkg:nuget/FluentValidation"); + // Just ensure it completes without panic + let _ = result; +} + +#[test] +fn test_cache_path_generation() { + // Test that cache_path returns a valid path + let path = cache_path(); + assert!(path.to_string_lossy().contains("purl-validator-cache.fst")); +} + +#[test] +fn test_validate_defaults_to_embedded() { + // Test that validate always uses embedded FST when fetch-latest feature is not enabled + // (or when fetch fails and falls back) + // This should work without network access + let result = validate("pkg:nuget/FluentValidation"); + // Just verify it completes without panic + let _ = result; +} + +#[cfg(feature = "fetch-latest")] +#[test] +fn test_fetch_latest_feature_enabled() { + // This test only runs when fetch-latest feature is enabled + // Verify the feature-gated functions compile correctly + let cached = load_cached_fst(); + // cached may be None or Some, both are valid + let _ = cached; + + // fetch_and_cache_fst should compile when feature is enabled + let fetched = fetch_and_cache_fst(); + // fetched may be None or Some, both are valid + let _ = fetched; +} + +#[cfg(not(feature = "fetch-latest"))] +#[test] +fn test_fetch_latest_feature_disabled() { + // When feature is disabled, fetch functions should return None + let cached = load_cached_fst(); + assert!(cached.is_none(), "load_cached_fst should return None when feature is disabled"); + + let fetched = fetch_and_cache_fst(); + assert!(fetched.is_none(), "fetch_and_cache_fst should return None when feature is disabled"); +} +