block · angiejones · Jul 2, 2025 · Jun 30, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/crates/goose-server/src/routes/config_management.rs b/crates/goose-server/src/routes/config_management.rs
@@ -13,7 +13,9 @@ use goose::config::{extensions::name_to_key, PermissionManager};
 use goose::config::{ExtensionConfigManager, ExtensionEntry};
 use goose::model::ModelConfig;
 use goose::providers::base::ProviderMetadata;
-use goose::providers::pricing::{get_all_pricing, get_model_pricing, refresh_pricing};
+use goose::providers::pricing::{
+    get_all_pricing, get_model_pricing, parse_model_id, refresh_pricing,
+};
 use goose::providers::providers as get_providers;
 use goose::{agents::ExtensionConfig, config::permission::PermissionLevel};
 use http::{HeaderMap, StatusCode};
@@ -390,8 +392,22 @@ pub async fn get_pricing(
             }
 
             for model_info in &metadata.known_models {
-                // Try to get pricing from cache
-                if let Some(pricing) = get_model_pricing(&metadata.name, &model_info.name).await {
+                // Handle OpenRouter models specially - they store full provider/model names
+                let (lookup_provider, lookup_model) = if metadata.name == "openrouter" {
+                    // For OpenRouter, parse the model name to extract real provider/model
+                    if let Some((provider, model)) = parse_model_id(&model_info.name) {
+                        (provider, model)
+                    } else {
+                        // Fallback if parsing fails
+                        (metadata.name.clone(), model_info.name.clone())
+                    }
+                } else {
+                    // For other providers, use names as-is
+                    (metadata.name.clone(), model_info.name.clone())
+                };
+
+                // Only get pricing from OpenRouter cache
+                if let Some(pricing) = get_model_pricing(&lookup_provider, &lookup_model).await {
                     pricing_data.push(PricingData {
                         provider: metadata.name.clone(),
                         model: model_info.name.clone(),
@@ -401,27 +417,12 @@ pub async fn get_pricing(
                         context_length: pricing.context_length,
                     });
                 }
-                // Check if the model has embedded pricing data
-                else if let (Some(input_cost), Some(output_cost)) =
-                    (model_info.input_token_cost, model_info.output_token_cost)
-                {
-                    pricing_data.push(PricingData {
-                        provider: metadata.name.clone(),
-                        model: model_info.name.clone(),
-                        input_token_cost: input_cost,
-                        output_token_cost: output_cost,
-                        currency: model_info
-                            .currency
-                            .clone()
-                            .unwrap_or_else(|| "$".to_string()),
-                        context_length: Some(model_info.context_limit as u32),
-                    });
-                }
+                // No fallback to hardcoded prices
             }
         }
     }
 
-    tracing::info!(
+    tracing::debug!(
         "Returning pricing for {} models{}",
         pricing_data.len(),
         if configured_only {

diff --git a/crates/goose-server/tests/pricing_api_test.rs b/crates/goose-server/tests/pricing_api_test.rs
@@ -0,0 +1,41 @@
+use axum::http::StatusCode;
+use axum::Router;
+use axum::{body::Body, http::Request};
+use etcetera::AppStrategy;
+use serde_json::json;
+use std::sync::Arc;
+use tower::ServiceExt;
+
+async fn create_test_app() -> Router {
+    let agent = Arc::new(goose::agents::Agent::default());
+    let state = goose_server::AppState::new(agent, "test".to_string()).await;
+
+    // Add scheduler setup like in the existing tests
+    let sched_storage_path = etcetera::choose_app_strategy(goose::config::APP_STRATEGY.clone())
+        .unwrap()
+        .data_dir()
+        .join("schedules.json");
+    let sched = goose::scheduler_factory::SchedulerFactory::create_legacy(sched_storage_path)
+        .await
+        .unwrap();
+    state.set_scheduler(sched).await;
+
+    goose_server::routes::config_management::routes(state)
+}
+
+#[tokio::test]
+async fn test_pricing_endpoint_basic() {
+    // Basic test to ensure pricing endpoint responds correctly
+    let app = create_test_app().await;
+
+    let request = Request::builder()
+        .uri("/config/pricing")
+        .method("POST")
+        .header("content-type", "application/json")
+        .header("x-secret-key", "test")
+        .body(Body::from(json!({"configured_only": true}).to_string()))
+        .unwrap();
+
+    let response = app.oneshot(request).await.unwrap();
+    assert_eq!(response.status(), StatusCode::OK);
+}
diff --git a/crates/goose/src/providers/anthropic.rs b/crates/goose/src/providers/anthropic.rs
@@ -130,17 +130,17 @@ impl Provider for AnthropicProvider {
             "Claude and other models from Anthropic",
             ANTHROPIC_DEFAULT_MODEL,
             vec![
-                ModelInfo::with_cost("claude-sonnet-4-latest", 200000, 0.000015, 0.000075),
-                ModelInfo::with_cost("claude-sonnet-4-20250514", 200000, 0.000015, 0.000075),
-                ModelInfo::with_cost("claude-opus-4-latest", 200000, 0.000025, 0.000125),
-                ModelInfo::with_cost("claude-opus-4-20250514", 200000, 0.000025, 0.000125),
-                ModelInfo::with_cost("claude-3-7-sonnet-latest", 200000, 0.000008, 0.000024),
-                ModelInfo::with_cost("claude-3-7-sonnet-20250219", 200000, 0.000008, 0.000024),
-                ModelInfo::with_cost("claude-3-5-sonnet-20241022", 200000, 0.000003, 0.000015),
-                ModelInfo::with_cost("claude-3-5-haiku-20241022", 200000, 0.000001, 0.000005),
-                ModelInfo::with_cost("claude-3-opus-20240229", 200000, 0.000015, 0.000075),
-                ModelInfo::with_cost("claude-3-sonnet-20240229", 200000, 0.000003, 0.000015),
-                ModelInfo::with_cost("claude-3-haiku-20240307", 200000, 0.00000025, 0.00000125),
+                ModelInfo::new("claude-sonnet-4-latest", 200000),
+                ModelInfo::new("claude-sonnet-4-20250514", 200000),
+                ModelInfo::new("claude-opus-4-latest", 200000),
+                ModelInfo::new("claude-opus-4-20250514", 200000),
+                ModelInfo::new("claude-3-7-sonnet-latest", 200000),
+                ModelInfo::new("claude-3-7-sonnet-20250219", 200000),
+                ModelInfo::new("claude-3-5-sonnet-20241022", 200000),
+                ModelInfo::new("claude-3-5-haiku-20241022", 200000),
+                ModelInfo::new("claude-3-opus-20240229", 200000),
+                ModelInfo::new("claude-3-sonnet-20240229", 200000),
+                ModelInfo::new("claude-3-haiku-20240307", 200000),
             ],
             ANTHROPIC_DOC_URL,
             vec![

diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs
@@ -237,33 +237,61 @@ pub fn response_to_message(response: Value) -> Result<Message> {
 pub fn get_usage(data: &Value) -> Result<Usage> {
     // Extract usage data if available
     if let Some(usage) = data.get("usage") {
-        // Sum up all input token types:
-        // - input_tokens (fresh/uncached)
-        // - cache_creation_input_tokens (being written to cache)
-        // - cache_read_input_tokens (read from cache)
-        let total_input_tokens = usage
+        // Get all token fields for analysis
+        let input_tokens = usage
             .get("input_tokens")
             .and_then(|v| v.as_u64())
-            .unwrap_or(0)
-            + usage
-                .get("cache_creation_input_tokens")
-                .and_then(|v| v.as_u64())
-                .unwrap_or(0)
-            + usage
-                .get("cache_read_input_tokens")
-                .and_then(|v| v.as_u64())
-                .unwrap_or(0);
-
-        let input_tokens = Some(total_input_tokens as i32);
+            .unwrap_or(0);
 
-        let output_tokens = usage
-            .get("output_tokens")
+        let cache_creation_tokens = usage
+            .get("cache_creation_input_tokens")
             .and_then(|v| v.as_u64())
-            .map(|v| v as i32);
+            .unwrap_or(0);
 
-        let total_tokens = output_tokens.map(|o| total_input_tokens as i32 + o);
+        let cache_read_tokens = usage
+            .get("cache_read_input_tokens")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(0);
 
-        Ok(Usage::new(input_tokens, output_tokens, total_tokens))
+        let output_tokens = usage
+            .get("output_tokens")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(0);
+
+        // IMPORTANT: Based on the API responses, when caching is used:
+        // - input_tokens is ONLY the new/fresh tokens (can be very small, like 7)
+        // - cache_creation_input_tokens and cache_read_input_tokens are the cached content
+        // - These cached tokens are charged at different rates:
+        //   * Fresh input tokens: 100% of regular price
+        //   * Cache creation tokens: 125% of regular price
+        //   * Cache read tokens: 10% of regular price
+        //
+        // Calculate effective input tokens for cost calculation based on Anthropic's pricing:
+        // - Fresh input tokens: 100% of regular price (1.0x)
+        // - Cache creation tokens: 125% of regular price (1.25x)
+        // - Cache read tokens: 10% of regular price (0.10x)
+        //
+        // The effective input tokens represent the cost-equivalent tokens when multiplied
+        // by the regular input price, ensuring accurate cost calculations in the frontend.
+        let effective_input_tokens = input_tokens as f64 * 1.0
+            + cache_creation_tokens as f64 * 1.25
+            + cache_read_tokens as f64 * 0.10;
+
+        // For token counting purposes, we still want to show the actual total count
+        let _total_actual_tokens = input_tokens + cache_creation_tokens + cache_read_tokens;
+
+        // Return the effective input tokens for cost calculation
+        // This ensures the frontend cost calculation is accurate when multiplying by regular prices
+        let effective_input_i32 = effective_input_tokens.round().clamp(0.0, i32::MAX as f64) as i32;
+        let output_tokens_i32 = output_tokens.min(i32::MAX as u64) as i32;
+        let total_tokens_i32 =
+            (effective_input_i32 as i64 + output_tokens_i32 as i64).min(i32::MAX as i64) as i32;
+
+        Ok(Usage::new(
+            Some(effective_input_i32),
+            Some(output_tokens_i32),
+            Some(total_tokens_i32),
+        ))
     } else {
         tracing::debug!(
             "Failed to get usage data: {}",
@@ -387,9 +415,9 @@ mod tests {
             panic!("Expected Text content");
         }
 
-        assert_eq!(usage.input_tokens, Some(24)); // 12 + 12 + 0
+        assert_eq!(usage.input_tokens, Some(27)); // 12 * 1.0 + 12 * 1.25 = 27 effective tokens
         assert_eq!(usage.output_tokens, Some(15));
-        assert_eq!(usage.total_tokens, Some(39)); // 24 + 15
+        assert_eq!(usage.total_tokens, Some(42)); // 27 + 15
 
         Ok(())
     }
@@ -430,9 +458,9 @@ mod tests {
             panic!("Expected ToolRequest content");
         }
 
-        assert_eq!(usage.input_tokens, Some(30)); // 15 + 15 + 0
+        assert_eq!(usage.input_tokens, Some(34)); // 15 * 1.0 + 15 * 1.25 = 33.75 → 34 effective tokens
         assert_eq!(usage.output_tokens, Some(20));
-        assert_eq!(usage.total_tokens, Some(50)); // 30 + 20
+        assert_eq!(usage.total_tokens, Some(54)); // 34 + 20
 
         Ok(())
     }
@@ -631,4 +659,37 @@ mod tests {
         // Return the test result
         result
     }
+
+    #[test]
+    fn test_cache_pricing_calculation() -> Result<()> {
+        // Test realistic cache scenario: small fresh input, large cached content
+        let response = json!({
+            "id": "msg_cache_test",
+            "type": "message",
+            "role": "assistant",
+            "content": [{
+                "type": "text",
+                "text": "Based on the cached context, here's my response."
+            }],
+            "model": "claude-3-5-sonnet-latest",
+            "stop_reason": "end_turn",
+            "stop_sequence": null,
+            "usage": {
+                "input_tokens": 7,        // Small fresh input
+                "output_tokens": 50,      // Output tokens
+                "cache_creation_input_tokens": 10000, // Large cache creation
+                "cache_read_input_tokens": 5000       // Large cache read
+            }
+        });
+
+        let usage = get_usage(&response)?;
+
+        // Effective input tokens should be:
+        // 7 * 1.0 + 10000 * 1.25 + 5000 * 0.10 = 7 + 12500 + 500 = 13007
+        assert_eq!(usage.input_tokens, Some(13007));
+        assert_eq!(usage.output_tokens, Some(50));
+        assert_eq!(usage.total_tokens, Some(13057)); // 13007 + 50
+
+        Ok(())
+    }
 }
diff --git a/crates/goose/src/providers/openai.rs b/crates/goose/src/providers/openai.rs
@@ -132,13 +132,13 @@ impl Provider for OpenAiProvider {
             "GPT-4 and other OpenAI models, including OpenAI compatible ones",
             OPEN_AI_DEFAULT_MODEL,
             vec![
-                ModelInfo::with_cost("gpt-4o", 128000, 0.0000025, 0.00001),
-                ModelInfo::with_cost("gpt-4o-mini", 128000, 0.00000015, 0.0000006),
-                ModelInfo::with_cost("gpt-4-turbo", 128000, 0.00001, 0.00003),
-                ModelInfo::with_cost("gpt-3.5-turbo", 16385, 0.0000005, 0.0000015),
-                ModelInfo::with_cost("o1", 200000, 0.000015, 0.00006),
-                ModelInfo::with_cost("o3", 200000, 0.000015, 0.00006), // Using o1 pricing as placeholder
-                ModelInfo::with_cost("o4-mini", 128000, 0.000003, 0.000012), // Using o1-mini pricing as placeholder
+                ModelInfo::new("gpt-4o", 128000),
+                ModelInfo::new("gpt-4o-mini", 128000),
+                ModelInfo::new("gpt-4-turbo", 128000),
+                ModelInfo::new("gpt-3.5-turbo", 16385),
+                ModelInfo::new("o1", 200000),
+                ModelInfo::new("o3", 200000),
+                ModelInfo::new("o4-mini", 128000),
             ],
             OPEN_AI_DOC_URL,
             vec![