Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@ include = [
fst = "0.4.7"
memmap2 = "0.9.9"
once_cell = "1.21"
reqwest = { version = "0.11", features = ["blocking"], optional = true }

[build-dependencies]
reqwest = { version = "0.11", features = ["blocking"] }

[features]
default = []
fetch-latest = ["reqwest"]

[[bin]]
name = "fst_builder"
path = "fst_builder/main.rs"
32 changes: 26 additions & 6 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@ fn main() {

if !fst_path.exists() {
println!("cargo::warning=Downloading PURL v{} FST map.", version);
download_purl_fst(&fst_path, version);
if !download_purl_fst(&fst_path, version.clone()) {
// Fallback to local purls.fst if download fails
if let Ok(content) = std::fs::read("purls.fst") {
let _ = write(&fst_path, &content);
println!("cargo::warning=Using local purls.fst file as fallback");
} else {
println!("cargo::error=Failed to download and no local purls.fst found");
}
}
}

println!("cargo::rerun-if-changed=build.rs");
println!("cargo::rerun-if-changed=Cargo.toml");
}

fn download_purl_fst(path: &Path, version: String) {
fn download_purl_fst(path: &Path, version: String) -> bool {
let url = &format!(
"https://raw.githubusercontent.com/aboutcode-org/purl-validator.rs/refs/tags/v{}/purls.fst",
version
Expand All @@ -39,15 +47,27 @@ fn download_purl_fst(path: &Path, version: String) {
let status = response.status();

if status.is_success() {
let content = response.bytes().expect("Failed to read response body");
write(path, &content).expect("Failed to write");
match response.bytes() {
Ok(content) => {
let _ = write(path, &content);
true
}
Err(e) => {
println!("cargo::warning=Failed to read response body: {}", e);
false
}
}
} else {
println!(
"cargo::error=Failed to fetch purls.fst: {}",
"cargo::warning=Failed to fetch purls.fst: {}",
response.status()
);
false
}
}
Err(e) => println!("cargo::error=Failed to request: {}", e),
Err(e) => {
println!("cargo::warning=Failed to request purls.fst: {}", e);
false
}
}
}
89 changes: 86 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,92 @@ use fst::Set;

use once_cell::sync::Lazy;
use std::env;
use std::path::PathBuf;

static FST_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/purls.fst"));

static VALIDATOR: Lazy<Set<&'static [u8]>> =
Lazy::new(|| Set::new(FST_DATA).expect("Failed to load FST from embedded bytes"));
fn load_purl_fst_data() -> &'static [u8] {
FST_DATA
}

fn should_fetch_latest() -> bool {
env::var("PURL_VALIDATOR_FETCH_LATEST")
.map(|v| v.to_lowercase() == "true")
.unwrap_or(false)
}

#[allow(dead_code)]
fn cache_path() -> PathBuf {
env::temp_dir().join("purl-validator-cache.fst")
}

#[cfg(feature = "fetch-latest")]
fn load_cached_fst() -> Option<Vec<u8>> {
let path = cache_path();
std::fs::read(path).ok()
}

#[cfg(not(feature = "fetch-latest"))]
fn load_cached_fst() -> Option<Vec<u8>> {
None
}

fn strip_and_check_purl(packageurl: &str, fst_map: &Set<&[u8]>) -> bool {
#[cfg(feature = "fetch-latest")]
fn fetch_and_cache_fst() -> Option<Vec<u8>> {
let version = env!("CARGO_PKG_VERSION");
let url = format!(
"https://raw.githubusercontent.com/aboutcode-org/purl-validator.rs/refs/tags/v{}/purls.fst",
version
);

match reqwest::blocking::get(&url) {
Ok(response) => {
if response.status().is_success() {
match response.bytes() {
Ok(bytes) => {
let data = bytes.to_vec();
if let Err(_) = std::fs::write(cache_path(), &data) {
// Cache write failed, but we still have the data in memory
}
return Some(data);
}
Err(_) => {
return None;
}
}
}
None
}
Err(_) => None,
}
}

#[cfg(not(feature = "fetch-latest"))]
fn fetch_and_cache_fst() -> Option<Vec<u8>> {
None
}

static VALIDATOR: Lazy<Set<Vec<u8>>> = Lazy::new(|| {
let fst_data = if should_fetch_latest() {
// Try to load from cache first
if let Some(cached_data) = load_cached_fst() {
cached_data
} else if let Some(fetched_data) = fetch_and_cache_fst() {
// Fetch and cache succeeded
fetched_data
} else {
// Fallback to embedded data
load_purl_fst_data().to_vec()
}
} else {
// Use embedded data by default
load_purl_fst_data().to_vec()
};

Set::new(fst_data).expect("Failed to load FST")
});

fn strip_and_check_purl(packageurl: &str, fst_map: &Set<Vec<u8>>) -> bool {
let trimmed_packageurl = packageurl.trim_end_matches("/");
fst_map.contains(trimmed_packageurl)
}
Expand All @@ -50,6 +129,10 @@ fn strip_and_check_purl(packageurl: &str, fst_map: &Set<&[u8]>) -> bool {
///
/// Use pre-built FST (Finite State Transducer) to perform lookups and confirm whether
/// the **base PURL** exists.
///
/// By default, uses the embedded (offline) PURL database. To enable runtime fetching of the
/// latest PURL data, set the `PURL_VALIDATOR_FETCH_LATEST` environment variable to `true`
/// (only works if the `fetch-latest` feature is enabled).
pub fn validate(packageurl: &str) -> bool {
strip_and_check_purl(packageurl, &VALIDATOR)
}
Expand Down
60 changes: 56 additions & 4 deletions src/validate_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ use std::path::Path;
fn test_validate_with_custom_file() {
let test_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/data/test_purls.fst");
let data: Vec<u8> = fs::read(test_path).unwrap();
let data_slice: &[u8] = &data;
let validator = Set::new(data_slice).unwrap();
let validator = Set::new(data).unwrap();
assert!(strip_and_check_purl(
"pkg:nuget/FluentUtils.EnumExtensions",
&validator
Expand All @@ -31,12 +30,65 @@ fn test_validate_with_custom_file() {
fn test_validate_with_packageurl_trailing_slash() {
let test_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/data/test_purls.fst");
let data: Vec<u8> = fs::read(test_path).unwrap();
let data_slice: &[u8] = &data;
let validator = Set::new(data_slice).unwrap();
let validator = Set::new(data).unwrap();

assert!(validator.contains("pkg:nuget/FluentUtils.EnumExtensions"));
assert!(strip_and_check_purl(
"pkg:nuget/FluentUtils.EnumExtensions/",
&validator
));
}

#[test]
fn test_default_behavior_without_fetch_env() {
// When PURL_VALIDATOR_FETCH_LATEST is not set, should use embedded data
// We can't actually unset env vars in tests without unsafe, so we just verify
// that validation works by default
let result = validate("pkg:nuget/FluentValidation");
// Just ensure it completes without panic
let _ = result;
}

#[test]
fn test_cache_path_generation() {
// Test that cache_path returns a valid path
let path = cache_path();
assert!(path.to_string_lossy().contains("purl-validator-cache.fst"));
}

#[test]
fn test_validate_defaults_to_embedded() {
// Test that validate always uses embedded FST when fetch-latest feature is not enabled
// (or when fetch fails and falls back)
// This should work without network access
let result = validate("pkg:nuget/FluentValidation");
// Just verify it completes without panic
let _ = result;
}

#[cfg(feature = "fetch-latest")]
#[test]
fn test_fetch_latest_feature_enabled() {
// This test only runs when fetch-latest feature is enabled
// Verify the feature-gated functions compile correctly
let cached = load_cached_fst();
// cached may be None or Some, both are valid
let _ = cached;

// fetch_and_cache_fst should compile when feature is enabled
let fetched = fetch_and_cache_fst();
// fetched may be None or Some, both are valid
let _ = fetched;
}

#[cfg(not(feature = "fetch-latest"))]
#[test]
fn test_fetch_latest_feature_disabled() {
// When feature is disabled, fetch functions should return None
let cached = load_cached_fst();
assert!(cached.is_none(), "load_cached_fst should return None when feature is disabled");

let fetched = fetch_and_cache_fst();
assert!(fetched.is_none(), "fetch_and_cache_fst should return None when feature is disabled");
}