Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions crates/goose-server/src/routes/config_management.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ use goose::config::{extensions::name_to_key, PermissionManager};
use goose::config::{ExtensionConfigManager, ExtensionEntry};
use goose::model::ModelConfig;
use goose::providers::base::ProviderMetadata;
use goose::providers::pricing::{get_all_pricing, get_model_pricing, refresh_pricing};
use goose::providers::pricing::{
get_all_pricing, get_model_pricing, parse_model_id, refresh_pricing,
};
use goose::providers::providers as get_providers;
use goose::{agents::ExtensionConfig, config::permission::PermissionLevel};
use http::{HeaderMap, StatusCode};
Expand Down Expand Up @@ -390,8 +392,22 @@ pub async fn get_pricing(
}

for model_info in &metadata.known_models {
// Try to get pricing from cache
if let Some(pricing) = get_model_pricing(&metadata.name, &model_info.name).await {
// Handle OpenRouter models specially - they store full provider/model names
let (lookup_provider, lookup_model) = if metadata.name == "openrouter" {
// For OpenRouter, parse the model name to extract real provider/model
if let Some((provider, model)) = parse_model_id(&model_info.name) {
(provider, model)
} else {
// Fallback if parsing fails
(metadata.name.clone(), model_info.name.clone())
}
} else {
// For other providers, use names as-is
(metadata.name.clone(), model_info.name.clone())
};

// Only get pricing from OpenRouter cache
if let Some(pricing) = get_model_pricing(&lookup_provider, &lookup_model).await {
pricing_data.push(PricingData {
provider: metadata.name.clone(),
model: model_info.name.clone(),
Expand All @@ -401,27 +417,12 @@ pub async fn get_pricing(
context_length: pricing.context_length,
});
}
// Check if the model has embedded pricing data
else if let (Some(input_cost), Some(output_cost)) =
(model_info.input_token_cost, model_info.output_token_cost)
{
pricing_data.push(PricingData {
provider: metadata.name.clone(),
model: model_info.name.clone(),
input_token_cost: input_cost,
output_token_cost: output_cost,
currency: model_info
.currency
.clone()
.unwrap_or_else(|| "$".to_string()),
context_length: Some(model_info.context_limit as u32),
});
}
// No fallback to hardcoded prices
}
}
}

tracing::info!(
tracing::debug!(
"Returning pricing for {} models{}",
pricing_data.len(),
if configured_only {
Expand Down
41 changes: 41 additions & 0 deletions crates/goose-server/tests/pricing_api_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
use axum::http::StatusCode;
use axum::Router;
use axum::{body::Body, http::Request};
use etcetera::AppStrategy;
use serde_json::json;
use std::sync::Arc;
use tower::ServiceExt;

async fn create_test_app() -> Router {
let agent = Arc::new(goose::agents::Agent::default());
let state = goose_server::AppState::new(agent, "test".to_string()).await;

// Add scheduler setup like in the existing tests
let sched_storage_path = etcetera::choose_app_strategy(goose::config::APP_STRATEGY.clone())
.unwrap()
.data_dir()
.join("schedules.json");
let sched = goose::scheduler_factory::SchedulerFactory::create_legacy(sched_storage_path)
.await
.unwrap();
state.set_scheduler(sched).await;

goose_server::routes::config_management::routes(state)
}

#[tokio::test]
async fn test_pricing_endpoint_basic() {
// Basic test to ensure pricing endpoint responds correctly
let app = create_test_app().await;

let request = Request::builder()
.uri("/config/pricing")
.method("POST")
.header("content-type", "application/json")
.header("x-secret-key", "test")
.body(Body::from(json!({"configured_only": true}).to_string()))
.unwrap();

let response = app.oneshot(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}
22 changes: 11 additions & 11 deletions crates/goose/src/providers/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,17 +130,17 @@ impl Provider for AnthropicProvider {
"Claude and other models from Anthropic",
ANTHROPIC_DEFAULT_MODEL,
vec![
ModelInfo::with_cost("claude-sonnet-4-latest", 200000, 0.000015, 0.000075),
ModelInfo::with_cost("claude-sonnet-4-20250514", 200000, 0.000015, 0.000075),
ModelInfo::with_cost("claude-opus-4-latest", 200000, 0.000025, 0.000125),
ModelInfo::with_cost("claude-opus-4-20250514", 200000, 0.000025, 0.000125),
ModelInfo::with_cost("claude-3-7-sonnet-latest", 200000, 0.000008, 0.000024),
ModelInfo::with_cost("claude-3-7-sonnet-20250219", 200000, 0.000008, 0.000024),
ModelInfo::with_cost("claude-3-5-sonnet-20241022", 200000, 0.000003, 0.000015),
ModelInfo::with_cost("claude-3-5-haiku-20241022", 200000, 0.000001, 0.000005),
ModelInfo::with_cost("claude-3-opus-20240229", 200000, 0.000015, 0.000075),
ModelInfo::with_cost("claude-3-sonnet-20240229", 200000, 0.000003, 0.000015),
ModelInfo::with_cost("claude-3-haiku-20240307", 200000, 0.00000025, 0.00000125),
ModelInfo::new("claude-sonnet-4-latest", 200000),
ModelInfo::new("claude-sonnet-4-20250514", 200000),
ModelInfo::new("claude-opus-4-latest", 200000),
ModelInfo::new("claude-opus-4-20250514", 200000),
ModelInfo::new("claude-3-7-sonnet-latest", 200000),
ModelInfo::new("claude-3-7-sonnet-20250219", 200000),
ModelInfo::new("claude-3-5-sonnet-20241022", 200000),
ModelInfo::new("claude-3-5-haiku-20241022", 200000),
ModelInfo::new("claude-3-opus-20240229", 200000),
ModelInfo::new("claude-3-sonnet-20240229", 200000),
ModelInfo::new("claude-3-haiku-20240307", 200000),
],
ANTHROPIC_DOC_URL,
vec![
Expand Down
111 changes: 86 additions & 25 deletions crates/goose/src/providers/formats/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,33 +237,61 @@ pub fn response_to_message(response: Value) -> Result<Message> {
pub fn get_usage(data: &Value) -> Result<Usage> {
// Extract usage data if available
if let Some(usage) = data.get("usage") {
// Sum up all input token types:
// - input_tokens (fresh/uncached)
// - cache_creation_input_tokens (being written to cache)
// - cache_read_input_tokens (read from cache)
let total_input_tokens = usage
// Get all token fields for analysis
let input_tokens = usage
.get("input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0)
+ usage
.get("cache_creation_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0)
+ usage
.get("cache_read_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0);

let input_tokens = Some(total_input_tokens as i32);
.unwrap_or(0);

let output_tokens = usage
.get("output_tokens")
let cache_creation_tokens = usage
.get("cache_creation_input_tokens")
.and_then(|v| v.as_u64())
.map(|v| v as i32);
.unwrap_or(0);

let total_tokens = output_tokens.map(|o| total_input_tokens as i32 + o);
let cache_read_tokens = usage
.get("cache_read_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0);

Ok(Usage::new(input_tokens, output_tokens, total_tokens))
let output_tokens = usage
.get("output_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0);

// IMPORTANT: Based on the API responses, when caching is used:
// - input_tokens is ONLY the new/fresh tokens (can be very small, like 7)
// - cache_creation_input_tokens and cache_read_input_tokens are the cached content
// - These cached tokens are charged at different rates:
// * Fresh input tokens: 100% of regular price
// * Cache creation tokens: 125% of regular price
// * Cache read tokens: 10% of regular price
//
// Calculate effective input tokens for cost calculation based on Anthropic's pricing:
// - Fresh input tokens: 100% of regular price (1.0x)
// - Cache creation tokens: 125% of regular price (1.25x)
// - Cache read tokens: 10% of regular price (0.10x)
//
// The effective input tokens represent the cost-equivalent tokens when multiplied
// by the regular input price, ensuring accurate cost calculations in the frontend.
let effective_input_tokens = input_tokens as f64 * 1.0
+ cache_creation_tokens as f64 * 1.25
+ cache_read_tokens as f64 * 0.10;

// For token counting purposes, we still want to show the actual total count
let _total_actual_tokens = input_tokens + cache_creation_tokens + cache_read_tokens;

// Return the effective input tokens for cost calculation
// This ensures the frontend cost calculation is accurate when multiplying by regular prices
let effective_input_i32 = effective_input_tokens.round().clamp(0.0, i32::MAX as f64) as i32;
let output_tokens_i32 = output_tokens.min(i32::MAX as u64) as i32;
let total_tokens_i32 =
(effective_input_i32 as i64 + output_tokens_i32 as i64).min(i32::MAX as i64) as i32;

Ok(Usage::new(
Some(effective_input_i32),
Some(output_tokens_i32),
Some(total_tokens_i32),
))
} else {
tracing::debug!(
"Failed to get usage data: {}",
Expand Down Expand Up @@ -387,9 +415,9 @@ mod tests {
panic!("Expected Text content");
}

assert_eq!(usage.input_tokens, Some(24)); // 12 + 12 + 0
assert_eq!(usage.input_tokens, Some(27)); // 12 * 1.0 + 12 * 1.25 = 27 effective tokens
assert_eq!(usage.output_tokens, Some(15));
assert_eq!(usage.total_tokens, Some(39)); // 24 + 15
assert_eq!(usage.total_tokens, Some(42)); // 27 + 15

Ok(())
}
Expand Down Expand Up @@ -430,9 +458,9 @@ mod tests {
panic!("Expected ToolRequest content");
}

assert_eq!(usage.input_tokens, Some(30)); // 15 + 15 + 0
assert_eq!(usage.input_tokens, Some(34)); // 15 * 1.0 + 15 * 1.25 = 33.75 → 34 effective tokens
assert_eq!(usage.output_tokens, Some(20));
assert_eq!(usage.total_tokens, Some(50)); // 30 + 20
assert_eq!(usage.total_tokens, Some(54)); // 34 + 20

Ok(())
}
Expand Down Expand Up @@ -631,4 +659,37 @@ mod tests {
// Return the test result
result
}

#[test]
fn test_cache_pricing_calculation() -> Result<()> {
// Test realistic cache scenario: small fresh input, large cached content
let response = json!({
"id": "msg_cache_test",
"type": "message",
"role": "assistant",
"content": [{
"type": "text",
"text": "Based on the cached context, here's my response."
}],
"model": "claude-3-5-sonnet-latest",
"stop_reason": "end_turn",
"stop_sequence": null,
"usage": {
"input_tokens": 7, // Small fresh input
"output_tokens": 50, // Output tokens
"cache_creation_input_tokens": 10000, // Large cache creation
"cache_read_input_tokens": 5000 // Large cache read
}
});

let usage = get_usage(&response)?;

// Effective input tokens should be:
// 7 * 1.0 + 10000 * 1.25 + 5000 * 0.10 = 7 + 12500 + 500 = 13007
assert_eq!(usage.input_tokens, Some(13007));
assert_eq!(usage.output_tokens, Some(50));
assert_eq!(usage.total_tokens, Some(13057)); // 13007 + 50

Ok(())
}
}
14 changes: 7 additions & 7 deletions crates/goose/src/providers/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ impl Provider for OpenAiProvider {
"GPT-4 and other OpenAI models, including OpenAI compatible ones",
OPEN_AI_DEFAULT_MODEL,
vec![
ModelInfo::with_cost("gpt-4o", 128000, 0.0000025, 0.00001),
ModelInfo::with_cost("gpt-4o-mini", 128000, 0.00000015, 0.0000006),
ModelInfo::with_cost("gpt-4-turbo", 128000, 0.00001, 0.00003),
ModelInfo::with_cost("gpt-3.5-turbo", 16385, 0.0000005, 0.0000015),
ModelInfo::with_cost("o1", 200000, 0.000015, 0.00006),
ModelInfo::with_cost("o3", 200000, 0.000015, 0.00006), // Using o1 pricing as placeholder
ModelInfo::with_cost("o4-mini", 128000, 0.000003, 0.000012), // Using o1-mini pricing as placeholder
ModelInfo::new("gpt-4o", 128000),
ModelInfo::new("gpt-4o-mini", 128000),
ModelInfo::new("gpt-4-turbo", 128000),
ModelInfo::new("gpt-3.5-turbo", 16385),
ModelInfo::new("o1", 200000),
ModelInfo::new("o3", 200000),
ModelInfo::new("o4-mini", 128000),
],
OPEN_AI_DOC_URL,
vec![
Expand Down
Loading
Loading