From 2c7d5ea41e8efed71a4e84f58ddd589b14180675 Mon Sep 17 00:00:00 2001 From: Douwe Osinga Date: Sun, 1 Feb 2026 14:32:09 +0100 Subject: [PATCH 1/3] Clean up build canonical warnings --- .../canonical/build_canonical_models.rs | 357 ++++++++---------- 1 file changed, 159 insertions(+), 198 deletions(-) diff --git a/crates/goose/src/providers/canonical/build_canonical_models.rs b/crates/goose/src/providers/canonical/build_canonical_models.rs index b683e8f3279d..54d4f0ee3b22 100644 --- a/crates/goose/src/providers/canonical/build_canonical_models.rs +++ b/crates/goose/src/providers/canonical/build_canonical_models.rs @@ -19,8 +19,10 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::path::PathBuf; const MODELS_DEV_API_URL: &str = "https://models.dev/api.json"; +const DEFAULT_CONTEXT_LIMIT: usize = 128_000; +const SEPARATOR: &str = "================================================================================"; +const SUBSEPARATOR: &str = "--------------------------------------------------------------------------------"; -// Providers to include in canonical models const ALLOWED_PROVIDERS: &[&str] = &[ "anthropic", "google", @@ -37,7 +39,6 @@ const ALLOWED_PROVIDERS: &[&str] = &[ "google-vertex", ]; -// Normalize provider names from models.dev to our canonical format fn normalize_provider_name(provider: &str) -> &str { match provider { "llama" => "meta-llama", @@ -71,23 +72,11 @@ struct MappingEntry { #[derive(Debug, Clone, Serialize, Deserialize)] struct MappingReport { - /// Timestamp of this report timestamp: String, - - /// Models that are NOT mapped to canonical models unmapped_models: Vec, - - /// All mappings: (provider, model) -> canonical model - /// Stored per provider for backward compatibility all_mappings: BTreeMap>, - - /// Flat list of all mappings for easier comparison (lock file format) mapped_models: Vec, - - /// Total models checked per provider model_counts: BTreeMap, - - /// Canonical models referenced canonical_models_used: BTreeSet, } @@ -110,16 +99,16 @@ impl MappingReport { mappings: Vec, recommended_models: Vec, ) { - let mapping_map: HashMap = mappings + let mapping_map: HashMap<&str, &str> = mappings .iter() - .map(|m| (m.provider_model.clone(), m.canonical_model.clone())) + .map(|m| (m.provider_model.as_str(), m.canonical_model.as_str())) .collect(); - let recommended_set: std::collections::HashSet = - recommended_models.into_iter().collect(); + let recommended_set: std::collections::HashSet<&str> = + recommended_models.iter().map(|s| s.as_str()).collect(); for model in &fetched_models { - if !mapping_map.contains_key(model) { + if !mapping_map.contains_key(model.as_str()) { self.unmapped_models.push(ProviderModelPair { provider: provider_name.to_string(), model: model.clone(), @@ -127,13 +116,14 @@ impl MappingReport { } } - for (model, canonical) in &mapping_map { - self.canonical_models_used.insert(canonical.clone()); + for mapping in &mappings { + self.canonical_models_used + .insert(mapping.canonical_model.clone()); self.mapped_models.push(MappingEntry { provider: provider_name.to_string(), - model: model.clone(), - canonical: canonical.clone(), - recommended: recommended_set.contains(model), + model: mapping.provider_model.clone(), + canonical: mapping.canonical_model.clone(), + recommended: recommended_set.contains(mapping.provider_model.as_str()), }); } @@ -144,13 +134,13 @@ impl MappingReport { } fn print_summary(&self) { - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); println!("CANONICAL MODEL MAPPING REPORT"); - println!("{}", "=".repeat(80)); + println!("{SEPARATOR}"); println!("\nGenerated: {}\n", self.timestamp); println!("Models Checked Per Provider:"); - println!("{}", "-".repeat(80)); + println!("{SUBSEPARATOR}"); let mut providers: Vec<_> = self.model_counts.iter().collect(); providers.sort_by_key(|(name, _)| *name); for (provider, count) in providers { @@ -166,9 +156,9 @@ impl MappingReport { ); } - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); println!("UNMAPPED MODELS ({})", self.unmapped_models.len()); - println!("{}", "=".repeat(80)); + println!("{SEPARATOR}"); if self.unmapped_models.is_empty() { println!("āœ“ All models are mapped to canonical models!"); @@ -194,12 +184,12 @@ impl MappingReport { } } - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); println!( "CANONICAL MODELS REFERENCED ({})", self.canonical_models_used.len() ); - println!("{}", "=".repeat(80)); + println!("{SEPARATOR}"); if self.canonical_models_used.is_empty() { println!(" (none yet)"); } else { @@ -210,56 +200,46 @@ impl MappingReport { } } - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); } fn compare_with_previous(&self, previous: &MappingReport) { - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); println!("CHANGES SINCE PREVIOUS RUN"); - println!("{}", "=".repeat(80)); + println!("{SEPARATOR}"); - let mut prev_map: HashMap<(String, String), String> = HashMap::new(); - for entry in &previous.mapped_models { - prev_map.insert( - (entry.provider.clone(), entry.model.clone()), - entry.canonical.clone(), - ); - } + // Build maps using references to avoid cloning unless necessary + let prev_map: HashMap<(&str, &str), &str> = previous + .mapped_models + .iter() + .map(|e| ((e.provider.as_str(), e.model.as_str()), e.canonical.as_str())) + .collect(); - let mut curr_map: HashMap<(String, String), String> = HashMap::new(); - for entry in &self.mapped_models { - curr_map.insert( - (entry.provider.clone(), entry.model.clone()), - entry.canonical.clone(), - ); - } + let curr_map: HashMap<(&str, &str), &str> = self + .mapped_models + .iter() + .map(|e| ((e.provider.as_str(), e.model.as_str()), e.canonical.as_str())) + .collect(); let mut changed_mappings = Vec::new(); let mut added_mappings = Vec::new(); let mut removed_mappings = Vec::new(); - for (key @ (provider, model), canonical) in &curr_map { - match prev_map.get(key) { - Some(prev_canonical) if prev_canonical != canonical => { - changed_mappings.push(( - provider.clone(), - model.clone(), - prev_canonical.clone(), - canonical.clone(), - )); + for (&key @ (provider, model), &canonical) in &curr_map { + match prev_map.get(&key) { + Some(&prev_canonical) if prev_canonical != canonical => { + changed_mappings.push((provider, model, prev_canonical, canonical)); } None => { - added_mappings.push((provider.clone(), model.clone(), canonical.clone())); - } - _ => { - // No change + added_mappings.push((provider, model, canonical)); } + _ => {} } } - for (key @ (provider, model), canonical) in &prev_map { - if !curr_map.contains_key(key) { - removed_mappings.push((provider.clone(), model.clone(), canonical.clone())); + for (&key @ (provider, model), &canonical) in &prev_map { + if !curr_map.contains_key(&key) { + removed_mappings.push((provider, model, canonical)); } } @@ -293,7 +273,7 @@ impl MappingReport { } } - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); } fn save_to_file(&self, path: &PathBuf) -> Result<()> { @@ -328,7 +308,14 @@ impl MappingReport { } } -async fn build_canonical_models() -> Result<()> { +/// Get the path to a data file in the canonical models directory +fn data_file_path(filename: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src/providers/canonical/data") + .join(filename) +} + +async fn fetch_models_dev() -> Result { println!("Fetching models from models.dev API..."); let client = reqwest::Client::new(); @@ -339,10 +326,104 @@ async fn build_canonical_models() -> Result<()> { .await .context("Failed to fetch from models.dev API")?; - let json: Value = response + response .json() .await - .context("Failed to parse models.dev response")?; + .context("Failed to parse models.dev response") +} + +/// Extract optional string field from JSON +fn get_string(value: &Value, field: &str) -> Option { + value.get(field).and_then(|v| v.as_str()).map(String::from) +} + +fn parse_modalities(model_data: &Value, field: &str) -> Vec { + model_data + .get("modalities") + .and_then(|m| m.get(field)) + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str()) + .filter_map(|s| { + serde_json::from_value(serde_json::Value::String(s.to_string())).ok() + }) + .collect() + }) + .unwrap_or_else(|| vec![Modality::Text]) +} + +fn process_model( + model_id: &str, + model_data: &Value, + normalized_provider: &str, +) -> Result> { + let cost_data = match model_data.get("cost") { + Some(c) if !c.is_null() => c, + _ => return Ok(None), + }; + + let name = model_data["name"] + .as_str() + .with_context(|| format!("Model {} missing name", model_id))?; + + let canonical_id = canonical_name(normalized_provider, model_id); + + let modalities = Modalities { + input: parse_modalities(model_data, "input"), + output: parse_modalities(model_data, "output"), + }; + + let cost = Pricing { + input: cost_data.get("input").and_then(|v| v.as_f64()), + output: cost_data.get("output").and_then(|v| v.as_f64()), + cache_read: cost_data.get("cache_read").and_then(|v| v.as_f64()), + cache_write: cost_data.get("cache_write").and_then(|v| v.as_f64()), + }; + + let limit = Limit { + context: model_data + .get("limit") + .and_then(|l| l.get("context")) + .and_then(|v| v.as_u64()) + .unwrap_or(DEFAULT_CONTEXT_LIMIT as u64) as usize, + output: model_data + .get("limit") + .and_then(|l| l.get("output")) + .and_then(|v| v.as_u64()) + .map(|v| v as usize), + }; + + let canonical_model = CanonicalModel { + id: canonical_id.clone(), + name: name.to_string(), + family: get_string(model_data, "family"), + attachment: model_data.get("attachment").and_then(|v| v.as_bool()), + reasoning: model_data.get("reasoning").and_then(|v| v.as_bool()), + tool_call: model_data + .get("tool_call") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + temperature: model_data.get("temperature").and_then(|v| v.as_bool()), + knowledge: get_string(model_data, "knowledge"), + release_date: get_string(model_data, "release_date"), + last_updated: get_string(model_data, "last_updated"), + modalities, + open_weights: model_data.get("open_weights").and_then(|v| v.as_bool()), + cost, + limit, + }; + + let model_name = canonical_id + .strip_prefix(&format!("{}/", normalized_provider)) + .unwrap_or(model_id) + .to_string(); + + Ok(Some((model_name, canonical_model))) +} + +async fn build_canonical_models() -> Result<()> { + let json = fetch_models_dev().await?; let providers_obj = json .as_object() @@ -355,7 +436,7 @@ async fn build_canonical_models() -> Result<()> { if let Some(provider_data) = providers_obj.get(*provider_key) { let models = provider_data["models"] .as_object() - .context(format!("Provider {} missing models object", provider_key))?; + .with_context(|| format!("Provider {} missing models object", provider_key))?; let normalized_provider = normalize_provider_name(provider_key); @@ -366,133 +447,17 @@ async fn build_canonical_models() -> Result<()> { ); for (model_id, model_data) in models { - // Skip models without pricing information - let cost_data = match model_data.get("cost") { - Some(c) if !c.is_null() => c, - _ => continue, - }; - - let name = model_data["name"] - .as_str() - .context(format!("Model {} missing name", model_id))?; - - // Use canonical_name to normalize the model ID (strips date stamps, etc.) - // This deduplicates different versions of the same model - let canonical_id = canonical_name(normalized_provider, model_id); - - let family = model_data - .get("family") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - let attachment = model_data.get("attachment").and_then(|v| v.as_bool()); - - let reasoning = model_data.get("reasoning").and_then(|v| v.as_bool()); - - let tool_call = model_data - .get("tool_call") - .and_then(|v| v.as_bool()) - .unwrap_or(false); - - let temperature = model_data.get("temperature").and_then(|v| v.as_bool()); - - let knowledge = model_data - .get("knowledge") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - let release_date = model_data - .get("release_date") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - let last_updated = model_data - .get("last_updated") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - let modalities = Modalities { - input: model_data - .get("modalities") - .and_then(|m| m.get("input")) - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str()) - .filter_map(|s| { - serde_json::from_value(serde_json::Value::String(s.to_string())) - .ok() - }) - .collect() - }) - .unwrap_or_else(|| vec![Modality::Text]), - output: model_data - .get("modalities") - .and_then(|m| m.get("output")) - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str()) - .filter_map(|s| { - serde_json::from_value(serde_json::Value::String(s.to_string())) - .ok() - }) - .collect() - }) - .unwrap_or_else(|| vec![Modality::Text]), - }; - - let open_weights = model_data.get("open_weights").and_then(|v| v.as_bool()); - - let cost = Pricing { - input: cost_data.get("input").and_then(|v| v.as_f64()), - output: cost_data.get("output").and_then(|v| v.as_f64()), - cache_read: cost_data.get("cache_read").and_then(|v| v.as_f64()), - cache_write: cost_data.get("cache_write").and_then(|v| v.as_f64()), - }; - - let limit = Limit { - context: model_data - .get("limit") - .and_then(|l| l.get("context")) - .and_then(|v| v.as_u64()) - .unwrap_or(128_000) as usize, - output: model_data - .get("limit") - .and_then(|l| l.get("output")) - .and_then(|v| v.as_u64()) - .map(|v| v as usize), - }; - - let canonical_model = CanonicalModel { - id: canonical_id.clone(), - name: name.to_string(), - family, - attachment, - reasoning, - tool_call, - temperature, - knowledge, - release_date, - last_updated, - modalities, - open_weights, - cost, - limit, - }; - - // Extract the normalized model name (everything after "provider/") - let model_name = canonical_id - .strip_prefix(&format!("{}/", normalized_provider)) - .unwrap_or(model_id); - registry.register(normalized_provider, model_name, canonical_model); - total_models += 1; + if let Some((model_name, canonical_model)) = + process_model(model_id, model_data, normalized_provider)? + { + registry.register(normalized_provider, &model_name, canonical_model); + total_models += 1; + } } } } - let output_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("src/providers/canonical/data/canonical_models.json"); + let output_path = data_file_path("canonical_models.json"); registry.to_file(&output_path)?; println!( "\nāœ“ Wrote {} models to {}", @@ -566,11 +531,10 @@ async fn check_provider( } async fn check_canonical_mappings() -> Result<()> { - println!("\n{}", "=".repeat(80)); + println!("\n{SEPARATOR}"); println!("Canonical Model Checker"); println!("Checking model mappings for top providers...\n"); - // Define providers to check with their default models let providers = vec![ ("anthropic", "claude-3-5-sonnet-20241022"), ("openai", "gpt-4"), @@ -595,8 +559,7 @@ async fn check_canonical_mappings() -> Result<()> { report.print_summary(); - let output_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("src/providers/canonical/data/canonical_mapping_report.json"); + let output_path = data_file_path("canonical_mapping_report.json"); if output_path.exists() { if let Ok(previous) = MappingReport::load_from_file(&output_path) { @@ -614,10 +577,8 @@ async fn check_canonical_mappings() -> Result<()> { async fn main() -> Result<()> { let args = Args::parse(); - // Build canonical models build_canonical_models().await?; - // Run the checker unless --no-check is passed if !args.no_check { check_canonical_mappings().await?; } From 5477cea4b05301cc2538228225e791487c3420fc Mon Sep 17 00:00:00 2001 From: Douwe Osinga Date: Sun, 1 Feb 2026 14:35:53 +0100 Subject: [PATCH 2/3] remove comments --- crates/goose/src/providers/canonical/build_canonical_models.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/goose/src/providers/canonical/build_canonical_models.rs b/crates/goose/src/providers/canonical/build_canonical_models.rs index 54d4f0ee3b22..1fb81268ba44 100644 --- a/crates/goose/src/providers/canonical/build_canonical_models.rs +++ b/crates/goose/src/providers/canonical/build_canonical_models.rs @@ -208,7 +208,6 @@ impl MappingReport { println!("CHANGES SINCE PREVIOUS RUN"); println!("{SEPARATOR}"); - // Build maps using references to avoid cloning unless necessary let prev_map: HashMap<(&str, &str), &str> = previous .mapped_models .iter() @@ -308,7 +307,6 @@ impl MappingReport { } } -/// Get the path to a data file in the canonical models directory fn data_file_path(filename: &str) -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("src/providers/canonical/data") @@ -332,7 +330,6 @@ async fn fetch_models_dev() -> Result { .context("Failed to parse models.dev response") } -/// Extract optional string field from JSON fn get_string(value: &Value, field: &str) -> Option { value.get(field).and_then(|v| v.as_str()).map(String::from) } From 7b9e8bad8d4478dd75c0e9b83840f363d66125c6 Mon Sep 17 00:00:00 2001 From: Douwe Osinga Date: Mon, 2 Feb 2026 18:21:37 +0100 Subject: [PATCH 3/3] fmt --- .../canonical/build_canonical_models.rs | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/crates/goose/src/providers/canonical/build_canonical_models.rs b/crates/goose/src/providers/canonical/build_canonical_models.rs index 1fb81268ba44..e53cd7310a9c 100644 --- a/crates/goose/src/providers/canonical/build_canonical_models.rs +++ b/crates/goose/src/providers/canonical/build_canonical_models.rs @@ -20,8 +20,10 @@ use std::path::PathBuf; const MODELS_DEV_API_URL: &str = "https://models.dev/api.json"; const DEFAULT_CONTEXT_LIMIT: usize = 128_000; -const SEPARATOR: &str = "================================================================================"; -const SUBSEPARATOR: &str = "--------------------------------------------------------------------------------"; +const SEPARATOR: &str = + "================================================================================"; +const SUBSEPARATOR: &str = + "--------------------------------------------------------------------------------"; const ALLOWED_PROVIDERS: &[&str] = &[ "anthropic", @@ -211,13 +213,23 @@ impl MappingReport { let prev_map: HashMap<(&str, &str), &str> = previous .mapped_models .iter() - .map(|e| ((e.provider.as_str(), e.model.as_str()), e.canonical.as_str())) + .map(|e| { + ( + (e.provider.as_str(), e.model.as_str()), + e.canonical.as_str(), + ) + }) .collect(); let curr_map: HashMap<(&str, &str), &str> = self .mapped_models .iter() - .map(|e| ((e.provider.as_str(), e.model.as_str()), e.canonical.as_str())) + .map(|e| { + ( + (e.provider.as_str(), e.model.as_str()), + e.canonical.as_str(), + ) + }) .collect(); let mut changed_mappings = Vec::new();