2f8d4c412f
Closes R-4 and R-5 of the routing-defaults plan.
R-4: Strengths + CostWeight defaults for closed frontier models.
Cloud entries land in the same knownFamilyDefaults table as local
ones, with MaxComplexity intentionally left zero (cloud arms get
no complexity ceiling). CostWeight tuned per the plan's rationale:
claude-opus-4-7 → Planning/SecurityReview/Debug/Refactor, 0.3
claude-sonnet-4-6 → Generation/Refactor/Review, 0.7
gpt-5.5 → Planning/SecurityReview/Generation, 0.3
gpt-5.3-codex → Generation/Refactor/Debug/UnitTest, 0.6
gpt-5.2 → Orchestration/Review, 0.8
gemini-3.1-pro → Planning/Review/Orchestration, 0.5
gemini-3.5-flash → Boilerplate/Explain/Orchestration, 1.2
The 0.3 weight on frontier arms keeps them competitive on
SecurityReview / Planning despite $4+/Mtok; 1.2 on Gemini Flash
penalizes cost more so it only wins when cost is genuinely
decisive (boilerplate, explain).
Mechanism: extracted applyFamilyDefaults into defaults.go and call
it from Router.RegisterArm. Single source of truth — both local
discovery and the primary-provider path in cmd/gnoma/main.go now
flow through the same defaults application. Removed the duplicate
apply block from RegisterDiscoveredModels.
Legacy model IDs (claude-opus-4-20250514, gpt-4o, o3, gemini-2.5-pro,
etc.) intentionally do not match any table entry — keeps users on
pinned older models safe from imposed 2026 Strengths.
R-5: gpt-5.3-codex registration.
- internal/provider/openai/provider.go: added to fallbackModels
and inferOpenAIModelCapabilities (400K context, 32K output).
- internal/provider/ratelimits.go: gpt-5.3-codex and its dated
alias gpt-5.3-codex-2026-02-15 added with the same Tier 1
quotas as gpt-5.2.
Gemini 3.x (3.1-pro-preview, 3.5-flash, 3.1-flash-lite) was already
registered in both google/provider.go and ratelimits.go — no change
needed for that part of R-5.
Test coverage:
- ResolveFamilyDefaults table-driven across all 7 cloud entries
including prefix-sharing (gpt-5.5-pro → gpt-5.5 defaults,
gemini-3.1-pro-preview → gemini-3.1-pro defaults).
- Legacy IDs return !ok.
- RegisterArm applies cloud defaults end-to-end.
- User-supplied Strengths and CostWeight are not overridden.
- ID.Model() fallback works when ModelName is empty (test code
often constructs arms this way).
Refs: docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md
181 lines
6.5 KiB
Go
181 lines
6.5 KiB
Go
package provider
|
|
|
|
import "math"
|
|
|
|
// RateLimits describes the rate limits for a provider+model pair.
|
|
// Zero values mean "no limit" or "unknown".
|
|
type RateLimits struct {
|
|
RPS float64 // requests per second (Mistral global)
|
|
RPM int // requests per minute
|
|
RPD int // requests per day
|
|
TPM int // tokens per minute (combined input+output)
|
|
ITPM int // input tokens per minute (Anthropic)
|
|
OTPM int // output tokens per minute (Anthropic)
|
|
TokensMonth int64 // tokens per month
|
|
SpendCap float64 // monthly spend cap in provider currency
|
|
}
|
|
|
|
// MaxConcurrent returns the maximum number of concurrent in-flight requests
|
|
// that this rate limit allows. Returns 0 when there is no meaningful concurrency
|
|
// constraint (provider has high or unknown limits).
|
|
func (rl RateLimits) MaxConcurrent() int {
|
|
if rl.RPS > 0 {
|
|
n := int(math.Ceil(rl.RPS))
|
|
if n < 1 {
|
|
n = 1
|
|
}
|
|
return n
|
|
}
|
|
if rl.RPM > 0 {
|
|
// Allow 1 concurrent slot per 30 RPM (conservative heuristic).
|
|
n := rl.RPM / 30
|
|
if n < 1 {
|
|
n = 1
|
|
}
|
|
if n > 16 {
|
|
n = 16
|
|
}
|
|
return n
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// ProviderDefaults holds default rate limits keyed by model glob.
|
|
// The special key "*" matches any model not explicitly listed.
|
|
type ProviderDefaults struct {
|
|
Provider string
|
|
Tier string // "free", "tier1", "tier2", etc.
|
|
Models map[string]RateLimits
|
|
}
|
|
|
|
// DefaultRateLimits returns conservative defaults for known providers.
|
|
// These are "starter tier" limits — users should override via config.
|
|
func DefaultRateLimits(providerName string) ProviderDefaults {
|
|
switch providerName {
|
|
case "mistral":
|
|
return mistralDefaults()
|
|
case "anthropic":
|
|
return anthropicDefaults()
|
|
case "openai":
|
|
return openaiDefaults()
|
|
case "google":
|
|
return googleDefaults()
|
|
default:
|
|
return ProviderDefaults{Provider: providerName}
|
|
}
|
|
}
|
|
|
|
// LookupModel finds rate limits for a specific model, falling back to "*".
|
|
func (pd ProviderDefaults) LookupModel(model string) (RateLimits, bool) {
|
|
if rl, ok := pd.Models[model]; ok {
|
|
return rl, true
|
|
}
|
|
if rl, ok := pd.Models["*"]; ok {
|
|
return rl, true
|
|
}
|
|
return RateLimits{}, false
|
|
}
|
|
|
|
func mistralDefaults() ProviderDefaults {
|
|
// Starter tier from Mistral dashboard. Spend cap is variable — not hardcoded.
|
|
base := RateLimits{RPS: 1, TPM: 50_000, TokensMonth: 4_000_000}
|
|
return ProviderDefaults{
|
|
Provider: "mistral",
|
|
Tier: "starter",
|
|
Models: map[string]RateLimits{
|
|
"*": base,
|
|
// Mistral 3 (released Dec 2025) — flagship.
|
|
"mistral-large-3": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
|
|
"mistral-large-2512": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
|
|
"mistral-large-latest": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
|
|
"mistral-medium-3.5": {RPS: 1, TPM: 375_000},
|
|
"mistral-medium-2511": {RPS: 1, TPM: 375_000},
|
|
// Magistral models get higher limits
|
|
"magistral-medium-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000},
|
|
"magistral-small-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000},
|
|
// Older Large/medium
|
|
"mistral-large-2411": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
|
|
"mistral-medium-2505": {RPS: 1, TPM: 375_000},
|
|
"mistral-medium-2508": {RPS: 1, TPM: 375_000},
|
|
"mistral-small-2603": {RPS: 1, TPM: 375_000},
|
|
// Codestral
|
|
"codestral-2508": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
|
|
// Pixtral
|
|
"pixtral-large-2411": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
|
|
},
|
|
}
|
|
}
|
|
|
|
func anthropicDefaults() ProviderDefaults {
|
|
// Tier 1 (lowest paid tier, $5 deposit). Users on higher tiers override via config.
|
|
return ProviderDefaults{
|
|
Provider: "anthropic",
|
|
Tier: "tier1",
|
|
Models: map[string]RateLimits{
|
|
"*": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
// Claude 4.6 / 4.7 generation — dateless IDs.
|
|
"claude-opus-4-7": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-opus-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-sonnet-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-haiku-4-5": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
|
|
"claude-haiku-4-5-20251001": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
|
|
// Legacy dated 4.0 IDs.
|
|
"claude-opus-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-opus-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-sonnet-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-sonnet-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
|
|
"claude-3-5-haiku-20241022": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
|
|
},
|
|
}
|
|
}
|
|
|
|
func openaiDefaults() ProviderDefaults {
|
|
// Tier 1 ($5 paid). Higher tiers have dramatically higher limits.
|
|
return ProviderDefaults{
|
|
Provider: "openai",
|
|
Tier: "tier1",
|
|
Models: map[string]RateLimits{
|
|
"*": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
// GPT-5.5 generation.
|
|
"gpt-5.5": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
"gpt-5.5-pro": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
"gpt-5.5-2026-04-23": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
// GPT-5.3 Codex (coding-specialist branch).
|
|
"gpt-5.3-codex": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
"gpt-5.3-codex-2026-02-15": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
// GPT-5.2 generation.
|
|
"gpt-5.2": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
"gpt-5.2-chat-latest": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
// Legacy.
|
|
"gpt-4o": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
|
"gpt-4o-mini": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
|
"o1": {RPM: 500, TPM: 30_000},
|
|
"o3": {RPM: 500, TPM: 30_000},
|
|
"o3-mini": {RPM: 500, TPM: 200_000},
|
|
"o4-mini": {RPM: 500, TPM: 200_000},
|
|
},
|
|
}
|
|
}
|
|
|
|
func googleDefaults() ProviderDefaults {
|
|
// Free tier. Pay-as-you-go Tier 1 is significantly higher.
|
|
return ProviderDefaults{
|
|
Provider: "google",
|
|
Tier: "free",
|
|
Models: map[string]RateLimits{
|
|
"*": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
// Gemini 3.x generation.
|
|
"gemini-3.1-pro-preview": {RPM: 5, TPM: 250_000, RPD: 100},
|
|
"gemini-3.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-3.1-flash-lite": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-3.1-flash-image-preview": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
// Legacy.
|
|
"gemini-2.5-pro": {RPM: 5, TPM: 250_000, RPD: 100},
|
|
"gemini-2.5-pro-preview-05-06": {RPM: 5, TPM: 250_000, RPD: 100},
|
|
"gemini-2.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-2.5-flash-preview-04-17": {RPM: 15, TPM: 250_000, RPD: 250},
|
|
"gemini-2.0-flash": {RPM: 10, RPD: 1_500},
|
|
},
|
|
}
|
|
}
|