diff --git a/src/lib.rs b/src/lib.rs index 20ae634..9aa69c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,10 +33,44 @@ use fst::Set; use once_cell::sync::Lazy; use std::env; +mod runtime; + static FST_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/purls.fst")); -static VALIDATOR: Lazy> = - Lazy::new(|| Set::new(FST_DATA).expect("Failed to load FST from embedded bytes")); +/// Decide whether runtime mode is enabled. +/// +/// Controlled by environment variable: +/// PURL_VALIDATOR_FETCH_LATEST=1 or true +fn runtime_mode_enabled() -> bool { + match env::var("PURL_VALIDATOR_FETCH_LATEST") { + Ok(value) => value == "1" || value.eq_ignore_ascii_case("true"), + Err(_) => false, + } +} + +/// Load FST bytes from the appropriate source. +/// +/// Policy: +/// - Default: bundled FST +/// - If runtime mode enabled: +/// - Try runtime FST from disk +/// - Fallback to bundled on any failure +fn load_fst_bytes() -> &'static [u8] { + if runtime_mode_enabled() { + if let Some(bytes) = runtime::try_load_runtime_fst_bytes() { + return bytes; + } + // Fallback to bundled if runtime fails + FST_DATA + } else { + FST_DATA + } +} + +static VALIDATOR: Lazy> = Lazy::new(|| { + let bytes = load_fst_bytes(); + Set::new(bytes).expect("Failed to load FST from embedded bytes") +}); fn strip_and_check_purl(packageurl: &str, fst_map: &Set<&[u8]>) -> bool { let trimmed_packageurl = packageurl.trim_end_matches("/"); diff --git a/src/runtime.rs b/src/runtime.rs new file mode 100644 index 0000000..1551650 --- /dev/null +++ b/src/runtime.rs @@ -0,0 +1,56 @@ +use memmap2::Mmap; +use once_cell::sync::OnceCell; +use std::fs::File; +use std::path::PathBuf; + +/// Global storage for runtime FST bytes. +/// +/// This ensures the bytes live for the entire program lifetime, +/// which is required because VALIDATOR borrows them as 'static. +static RUNTIME_FST: OnceCell = OnceCell::new(); + +/// Try to load runtime FST bytes from disk. +/// +/// Returns: +/// - Some(&'static [u8]) if runtime FST is available +/// - None if not available or any error occurs +pub fn try_load_runtime_fst_bytes() -> Option<&'static [u8]> { + // Initialize only once + let mmap = RUNTIME_FST.get_or_try_init(|| { + let path = runtime_fst_path(); + + if !path.exists() { + return Err("runtime FST not found"); + } + + let file = File::open(&path).map_err(|_| "failed to open runtime FST")?; + + // Safety: + // - File is not modified after creation + // - Mmap lives for entire program lifetime (stored in OnceCell) + let mmap = unsafe { Mmap::map(&file).map_err(|_| "failed to mmap runtime FST")? }; + + Ok(mmap) + }); + + match mmap { + Ok(mmap) => Some(&mmap[..]), + Err(_) => None, + } +} + +/// Compute the path of the runtime FST cache file. +/// +/// Current design: +/// $HOME/.cache/purl-validator/purls.fst +fn runtime_fst_path() -> PathBuf { + let mut base = std::env::var_os("HOME") + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from(".")); + + base.push(".cache"); + base.push("purl-validator"); + base.push("purls.fst"); + + base +}