Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 36 additions & 58 deletions src/rotator_library/providers/nanogpt_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ class NanoGptProvider(NanoGptQuotaTracker, ProviderInterface):
}
default_tier_priority = 3

# Quota groups for tracking daily and monthly limits
# Quota groups for tracking monthly requests and weekly input tokens
# These are virtual models used to track subscription-level quota
model_quota_groups = {
"daily": ["_daily"],
"monthly": ["_monthly"],
"weekly_tokens": ["_weekly_tokens"],
}

def __init__(self):
Expand Down Expand Up @@ -135,11 +135,11 @@ def get_model_quota_group(self, model: str) -> Optional[str]:
Get the quota group for a model.

NanoGPT has two quota types:
- Daily: Soft limit (2000/day) - display only, does NOT block
- Monthly: Hard limit (60000/month) - BLOCKS when exhausted
- Monthly requests: Hard limit (60,000/month) - BLOCKS when exhausted
- Weekly input tokens: Soft limit (60M tokens/week) - display + soft enforcement

Real models belong to "monthly" so they're only blocked by the
hard limit. The "daily" group is just for display.
Real models belong to "monthly" so they're blocked by the hard request limit.
The "weekly_tokens" group tracks the token quota separately.

Args:
model: Model name
Expand All @@ -150,11 +150,11 @@ def get_model_quota_group(self, model: str) -> Optional[str]:
# Strip provider prefix if present
clean_model = model.split("/")[-1] if "/" in model else model

# _daily is for soft limit display only
if clean_model == "_daily":
return "daily"
# _weekly_tokens is the virtual tracker for the weekly token quota
if clean_model == "_weekly_tokens":
return "weekly_tokens"

# Real models + _monthly belong to monthly (hard limit)
# Real models + _monthly belong to monthly (hard request limit)
return "monthly"

def get_models_in_quota_group(self, group: str) -> List[str]:
Expand All @@ -170,11 +170,11 @@ def get_models_in_quota_group(self, group: str) -> List[str]:
Returns:
List of model names in the group
"""
if group == "daily":
# Daily is soft limit - only virtual tracker for display
return ["_daily"]
if group == "weekly_tokens":
# Weekly token quota - only virtual tracker for display/soft enforcement
return ["_weekly_tokens"]
elif group == "monthly":
# Monthly is hard limit - include subscription models for sync
# Monthly is hard request limit - include subscription models for sync
models = ["_monthly"]
models.extend(list(self._subscription_models))
return models
Expand All @@ -187,7 +187,7 @@ def get_quota_groups(self) -> List[str]:
Returns:
List of quota group names
"""
return ["daily", "monthly"]
return ["monthly", "weekly_tokens"]

# =========================================================================
# MODEL DISCOVERY
Expand Down Expand Up @@ -397,66 +397,44 @@ async def refresh_single_credential(
tier = self.get_tier_from_state(state)
self._tier_cache[api_key] = tier

# Extract quota data for daily and monthly limits
daily_data = usage_data.get("daily", {})
# Extract quota data for monthly requests and weekly tokens
monthly_data = usage_data.get("monthly", {})
weekly_token_data = usage_data.get("weekly_input_tokens")
limits = usage_data.get("limits", {})

daily_limit = limits.get("daily", 0)
monthly_limit = limits.get("monthly", 0)
daily_remaining = daily_data.get("remaining", 0)
monthly_remaining = monthly_data.get("remaining", 0)

# Calculate remaining fractions
daily_fraction = (
daily_remaining / daily_limit if daily_limit > 0 else 1.0
)
monthly_fraction = (
monthly_remaining / monthly_limit
if monthly_limit > 0
else 1.0
)

# Get reset timestamps
daily_reset_ts = daily_data.get("reset_at", 0)
monthly_reset_ts = monthly_data.get("reset_at", 0)

# Store daily quota baseline
daily_used = (
int((1.0 - daily_fraction) * daily_limit)
if daily_limit > 0
else 0
)
await usage_manager.update_quota_baseline(
api_key,
"nanogpt/_daily",
quota_max_requests=daily_limit,
quota_reset_ts=daily_reset_ts
if daily_reset_ts > 0
else None,
quota_used=daily_used,
)

# Store monthly quota baseline
monthly_used = (
int((1.0 - monthly_fraction) * monthly_limit)
if monthly_limit > 0
else 0
)
# Store monthly request quota baseline (hard limit)
monthly_used = monthly_limit - monthly_remaining if monthly_limit > 0 else 0
await usage_manager.update_quota_baseline(
api_key,
"nanogpt/_monthly",
quota_max_requests=monthly_limit,
quota_reset_ts=monthly_reset_ts
if monthly_reset_ts > 0
else None,
quota_reset_ts=monthly_reset_ts if monthly_reset_ts > 0 else None,
quota_used=monthly_used,
)

# Store weekly token quota baseline (if present)
if weekly_token_data is not None:
weekly_token_limit = limits.get("weekly_input_tokens", 0)
weekly_token_remaining = weekly_token_data.get("remaining", 0)
weekly_token_reset_ts = weekly_token_data.get("reset_at", 0)
weekly_token_used = weekly_token_limit - weekly_token_remaining if weekly_token_limit > 0 else 0
await usage_manager.update_quota_baseline(
api_key,
"nanogpt/_weekly_tokens",
quota_max_requests=weekly_token_limit,
quota_reset_ts=weekly_token_reset_ts if weekly_token_reset_ts > 0 else None,
quota_used=weekly_token_used,
)

lib_logger.debug(
f"Updated NanoGPT quota baselines: "
f"daily={daily_remaining}/{daily_limit}, "
f"monthly={monthly_remaining}/{monthly_limit}"
+ (f", weekly_tokens={weekly_token_remaining}/{weekly_token_limit}"
if weekly_token_data is not None else "")
)

except Exception as e:
Expand Down
87 changes: 73 additions & 14 deletions src/rotator_library/providers/utilities/nanogpt_quota_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ async def fetch_subscription_usage(
"status": "success" | "error",
"error": str | None,
"active": bool,
"allow_overage": bool,
"state": str, # "active" | "grace" | "inactive"
"limits": {"daily": int, "monthly": int},
"enforce_daily_limit": bool,
"limits": {"daily": int, "monthly": int, "weekly_input_tokens": int},
"daily": {
"used": int,
"remaining": int,
Expand All @@ -89,6 +91,12 @@ async def fetch_subscription_usage(
"percent_used": float,
"reset_at": float,
},
"weekly_input_tokens": {
"used": int,
"remaining": int,
"percent_used": float,
"reset_at": float, # Unix timestamp (seconds)
} | None,
"fetched_at": float,
}
"""
Expand All @@ -114,16 +122,30 @@ async def fetch_subscription_usage(
daily = data.get("daily", {})
monthly = data.get("monthly", {})
limits = data.get("limits", {})
weekly_tokens_raw = data.get("weeklyInputTokens")

# Parse weekly token quota if present
weekly_input_tokens = None
if weekly_tokens_raw is not None:
weekly_input_tokens = {
"used": weekly_tokens_raw.get("used", 0),
"remaining": weekly_tokens_raw.get("remaining", 0),
"percent_used": weekly_tokens_raw.get("percentUsed", 0.0),
# Convert epoch ms to seconds
"reset_at": weekly_tokens_raw.get("resetAt", 0) / 1000.0,
}

return {
"status": "success",
"error": None,
"active": data.get("active", False),
"allow_overage": data.get("allowOverage", False),
"state": data.get("state", "inactive"),
"enforce_daily_limit": data.get("enforceDailyLimit", False),
"limits": {
"daily": limits.get("daily", 0),
"monthly": limits.get("monthly", 0),
"weekly_input_tokens": limits.get("weeklyInputTokens", 0),
},
"daily": {
"used": daily.get("used", 0),
Expand All @@ -138,6 +160,7 @@ async def fetch_subscription_usage(
"percent_used": monthly.get("percentUsed", 0.0),
"reset_at": monthly.get("resetAt", 0) / 1000.0,
},
"weekly_input_tokens": weekly_input_tokens,
"fetched_at": time.time(),
}

Expand All @@ -154,10 +177,13 @@ async def fetch_subscription_usage(
"status": "error",
"error": error_msg,
"active": False,
"allow_overage": False,
"state": "unknown",
"limits": {"daily": 0, "monthly": 0},
"enforce_daily_limit": False,
"limits": {"daily": 0, "monthly": 0, "weekly_input_tokens": 0},
"daily": {"used": 0, "remaining": 0, "percent_used": 0.0, "reset_at": 0},
"monthly": {"used": 0, "remaining": 0, "percent_used": 0.0, "reset_at": 0},
"weekly_input_tokens": None,
"fetched_at": time.time(),
}
except Exception as e:
Expand All @@ -166,10 +192,13 @@ async def fetch_subscription_usage(
"status": "error",
"error": str(e),
"active": False,
"allow_overage": False,
"state": "unknown",
"limits": {"daily": 0, "monthly": 0},
"enforce_daily_limit": False,
"limits": {"daily": 0, "monthly": 0, "weekly_input_tokens": 0},
"daily": {"used": 0, "remaining": 0, "percent_used": 0.0, "reset_at": 0},
"monthly": {"used": 0, "remaining": 0, "percent_used": 0.0, "reset_at": 0},
"weekly_input_tokens": None,
"fetched_at": time.time(),
}

Expand All @@ -194,38 +223,68 @@ def get_remaining_fraction(self, usage_data: Dict[str, Any]) -> float:
"""
Calculate remaining quota fraction from usage data.

Uses daily limit by default, unless enforceDailyLimit is False
(in which case only monthly matters).
Uses monthly limit as the primary enforcement axis.
Daily is only used if enforceDailyLimit is True.

Args:
usage_data: Response from fetch_subscription_usage()

Returns:
Remaining fraction (0.0 to 1.0)
Remaining fraction (0.0 to 1.0), minimum across enforced limits
"""
limits = usage_data.get("limits", {})
monthly = usage_data.get("monthly", {})
daily = usage_data.get("daily", {})
enforce_daily = usage_data.get("enforce_daily_limit", False)

daily_limit = limits.get("daily", 0)
daily_remaining = daily.get("remaining", 0)
fractions = []

if daily_limit <= 0:
return 1.0 # No limit configured
# Monthly is always the primary hard limit
monthly_limit = limits.get("monthly", 0)
if monthly_limit > 0:
monthly_remaining = monthly.get("remaining", 0)
fractions.append(monthly_remaining / monthly_limit)

return min(1.0, max(0.0, daily_remaining / daily_limit))
# Daily only enforced when enforceDailyLimit is True
if enforce_daily:
daily_limit = limits.get("daily", 0)
if daily_limit > 0:
daily_remaining = daily.get("remaining", 0)
fractions.append(daily_remaining / daily_limit)

if not fractions:
return 1.0 # No limits configured

return min(1.0, max(0.0, min(fractions)))

def get_reset_timestamp(self, usage_data: Dict[str, Any]) -> Optional[float]:
"""
Get the next reset timestamp from usage data.
Get the next reset timestamp from usage data (monthly window).

Args:
usage_data: Response from fetch_subscription_usage()

Returns:
Unix timestamp when quota resets, or None
"""
daily = usage_data.get("daily", {})
reset_at = daily.get("reset_at", 0)
monthly = usage_data.get("monthly", {})
reset_at = monthly.get("reset_at", 0)
return reset_at if reset_at > 0 else None

def get_weekly_token_reset_timestamp(self, usage_data: Dict[str, Any]) -> Optional[float]:
"""
Get the weekly token quota reset timestamp from usage data.

Args:
usage_data: Response from fetch_subscription_usage()

Returns:
Unix timestamp when weekly token quota resets, or None
"""
weekly = usage_data.get("weekly_input_tokens")
if not weekly:
return None
reset_at = weekly.get("reset_at", 0)
return reset_at if reset_at > 0 else None

# =========================================================================
Expand Down