refactor(providers): refresh defaults to current 2026 model lineup

Bump hard-coded provider defaults to the May 2026 lineup:

- Anthropic: claude-sonnet-4-6 (default); Opus 4.7 and Haiku 4.5 in
  the fallback list. 4.6/4.7 generation has 1M context standard.
- OpenAI: gpt-5.5 (default); 5.5-pro / 5.2 / 5.2-chat-latest in
  fallback. ThinkingModes now baseline on GPT-5.x.
- Google: gemini-3.5-flash (default); 3.1 Pro / Flash Lite in fallback.
- Mistral: mistral-large-latest unchanged (Mistral Large 3); add
  mistral-medium-3.5, mistral-medium-2511, mistral-large-2512 to the
  rate-limit map.

Legacy dated IDs retained in fallback lists and ratelimits maps so
configs pinned to claude-sonnet-4-20250514 / gpt-4o / gemini-2.5-flash
keep resolving. Capability tables (ContextWindow, MaxOutput,
ThinkingModes) updated to match each generation. CLI help text in
cmd/gnoma/main.go also updated.
This commit is contained in:
2026-05-20 03:13:21 +02:00
parent c4fde583f5
commit 99fa0ff08e
5 changed files with 214 additions and 83 deletions
+36 -36
View File
@@ -16,28 +16,28 @@ import (
"sync"
"time"
"somegit.dev/Owlibou/gnoma/internal/engine"
"somegit.dev/Owlibou/gnoma/internal/hook"
"somegit.dev/Owlibou/gnoma/internal/skill"
"somegit.dev/Owlibou/gnoma/internal/slm"
"somegit.dev/Owlibou/gnoma/internal/tool/persist"
gnomacfg "somegit.dev/Owlibou/gnoma/internal/config"
gnomactx "somegit.dev/Owlibou/gnoma/internal/context"
"somegit.dev/Owlibou/gnoma/internal/engine"
"somegit.dev/Owlibou/gnoma/internal/hook"
"somegit.dev/Owlibou/gnoma/internal/message"
"somegit.dev/Owlibou/gnoma/internal/permission"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/router"
"somegit.dev/Owlibou/gnoma/internal/security"
"somegit.dev/Owlibou/gnoma/internal/tokenizer"
anthropicprov "somegit.dev/Owlibou/gnoma/internal/provider/anthropic"
"somegit.dev/Owlibou/gnoma/internal/provider/mistral"
googleprov "somegit.dev/Owlibou/gnoma/internal/provider/google"
"somegit.dev/Owlibou/gnoma/internal/provider/mistral"
oaiprov "somegit.dev/Owlibou/gnoma/internal/provider/openai"
"somegit.dev/Owlibou/gnoma/internal/provider/openaicompat"
subprocprov "somegit.dev/Owlibou/gnoma/internal/provider/subprocess"
"somegit.dev/Owlibou/gnoma/internal/router"
"somegit.dev/Owlibou/gnoma/internal/security"
"somegit.dev/Owlibou/gnoma/internal/session"
"somegit.dev/Owlibou/gnoma/internal/skill"
"somegit.dev/Owlibou/gnoma/internal/slm"
"somegit.dev/Owlibou/gnoma/internal/stream"
"somegit.dev/Owlibou/gnoma/internal/tokenizer"
"somegit.dev/Owlibou/gnoma/internal/tool"
"somegit.dev/Owlibou/gnoma/internal/tool/persist"
"somegit.dev/Owlibou/gnoma/internal/tui"
tea "charm.land/bubbletea/v2"
@@ -413,10 +413,10 @@ func main() {
armID = router.NewArmID(*providerName, armModel)
armProvider := security.WrapProvider(limitedProvider(prov, *providerName, armModel, cfg), fwRef)
arm := &router.Arm{
ID: armID,
Provider: armProvider,
ModelName: armModel,
IsLocal: localProviders[*providerName],
ID: armID,
Provider: armProvider,
ModelName: armModel,
IsLocal: localProviders[*providerName],
Capabilities: provider.Capabilities{ToolUse: true},
}
arm.Pools = resolveRateLimitPools(armID, *providerName, armModel, cfg)
@@ -698,10 +698,10 @@ func main() {
// Build skill registry: bundled → user → plugins → project (precedence order).
skillReg := skill.NewRegistry()
skillReg.LoadBundled() //nolint:errcheck
skillReg.LoadDir(filepath.Join(gnomacfg.GlobalConfigDir(), "skills"), "user") //nolint:errcheck
skillReg.LoadBundled() //nolint:errcheck
skillReg.LoadDir(filepath.Join(gnomacfg.GlobalConfigDir(), "skills"), "user") //nolint:errcheck
for _, ps := range pluginResult.Skills {
skillReg.LoadDir(ps.Dir, ps.Source) //nolint:errcheck
skillReg.LoadDir(ps.Dir, ps.Source) //nolint:errcheck
}
skillReg.LoadDir(filepath.Join(gnomacfg.ProjectRoot(), ".gnoma", "skills"), "project") //nolint:errcheck
@@ -838,17 +838,17 @@ func main() {
// Wrap even though the engine's own buildRequest scans inline —
// belt-and-suspenders so a future engine path that bypasses
// buildRequest still routes through the firewall.
Provider: security.WrapProvider(prov, fwRef),
Router: rtr,
Classifier: engineClassifier,
Tools: reg,
Firewall: fw,
Permissions: permChecker,
Context: ctxWindow,
System: systemPrompt,
Model: *model,
Temperature: cfg.Provider.Temperature,
MaxTurns: *maxTurns,
Provider: security.WrapProvider(prov, fwRef),
Router: rtr,
Classifier: engineClassifier,
Tools: reg,
Firewall: fw,
Permissions: permChecker,
Context: ctxWindow,
System: systemPrompt,
Model: *model,
Temperature: cfg.Provider.Temperature,
MaxTurns: *maxTurns,
Store: store,
Hooks: dispatcher,
Logger: logger,
@@ -916,7 +916,7 @@ func main() {
mode = "pipe"
}
dispatcher.Fire(hook.SessionStart, hook.MarshalSessionStartPayload(sessionID, mode)) //nolint:errcheck
defer dispatcher.Fire(hook.SessionEnd, hook.MarshalSessionEndPayload(sessionID, 0)) //nolint:errcheck
defer dispatcher.Fire(hook.SessionEnd, hook.MarshalSessionEndPayload(sessionID, 0)) //nolint:errcheck
if input != "" {
// Pipe mode: single input → stream to stdout
@@ -974,8 +974,8 @@ func main() {
}
} else {
// TUI mode: permission prompts via channels
permCh := make(chan bool) // TUI → engine: y/n response
permReqCh := make(chan tui.PermReqMsg, 1) // engine → TUI: tool requesting permission
permCh := make(chan bool) // TUI → engine: y/n response
permReqCh := make(chan tui.PermReqMsg, 1) // engine → TUI: tool requesting permission
permChecker.SetPromptFunc(func(ctx context.Context, toolName string, args json.RawMessage) (bool, error) {
// Notify TUI that a permission prompt is needed
select {
@@ -1312,10 +1312,10 @@ func buildPluginInfos(plugins []plugin.Plugin, enabledSet map[string]bool) []tui
// Logs the first deferred-fallback at INFO so operators can tell when the
// SLM was not yet ready vs. unconfigured.
type lazyClassifier struct {
mu sync.Mutex
inner router.TaskClassifier
deferredLogged bool
logger *slog.Logger
mu sync.Mutex
inner router.TaskClassifier
deferredLogged bool
logger *slog.Logger
}
func (l *lazyClassifier) set(c router.TaskClassifier) {
@@ -1349,7 +1349,7 @@ type stubProvider struct{ reason string }
func newStubProvider(reason string) provider.Provider { return &stubProvider{reason: reason} }
func (s *stubProvider) Name() string { return "none" }
func (s *stubProvider) Name() string { return "none" }
func (s *stubProvider) DefaultModel() string { return "none" }
func (s *stubProvider) Models(_ context.Context) ([]provider.ModelInfo, error) {
return nil, fmt.Errorf("%s", s.reason)
@@ -1430,7 +1430,7 @@ func runProvidersCommand(cfg *gnomacfg.Config, logger *slog.Logger) int {
}
fmt.Println("\nTo set a provider:")
fmt.Println(" gnoma --provider anthropic --model claude-opus-4-5 (one-off)")
fmt.Println(" gnoma --provider anthropic --model claude-opus-4-7 (one-off)")
fmt.Println(" gnoma config set provider.default anthropic (permanent)")
return 0
+37 -11
View File
@@ -11,7 +11,7 @@ import (
"github.com/anthropics/anthropic-sdk-go/option"
)
const defaultModel = "claude-sonnet-4-20250514"
const defaultModel = "claude-sonnet-4-6"
// Provider implements provider.Provider for the Anthropic API.
type Provider struct {
@@ -79,7 +79,7 @@ func (p *Provider) DefaultModel() string {
// Models returns available Anthropic models with capabilities by querying the API.
func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
pager := p.client.Models.ListAutoPaging(ctx, anthropic.ModelListParams{})
var models []provider.ModelInfo
for pager.Next() {
m := pager.Current()
@@ -108,24 +108,24 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
func (p *Provider) fallbackModels() []provider.ModelInfo {
return []provider.ModelInfo{
{
ID: "claude-opus-4-20250514", Name: "Claude Opus 4", Provider: p.name,
ID: "claude-opus-4-7", Name: "Claude Opus 4.7", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
Vision: true,
ContextWindow: 200000,
ContextWindow: 1_000_000,
MaxOutput: 32000,
},
},
{
ID: "claude-sonnet-4-20250514", Name: "Claude Sonnet 4", Provider: p.name,
ID: "claude-sonnet-4-6", Name: "Claude Sonnet 4.6", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
Vision: true,
ContextWindow: 200000,
ContextWindow: 1_000_000,
MaxOutput: 16000,
},
},
@@ -136,31 +136,57 @@ func (p *Provider) fallbackModels() []provider.ModelInfo {
ContextWindow: 200000, MaxOutput: 8192,
},
},
// Legacy 4.0 IDs retained so user-pinned models continue to surface.
{
ID: "claude-opus-4-20250514", Name: "Claude Opus 4 (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 200000, MaxOutput: 32000,
},
},
{
ID: "claude-sonnet-4-20250514", Name: "Claude Sonnet 4 (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 200000, MaxOutput: 16000,
},
},
}
}
// inferAnthropicModelCapabilities infers capabilities from model ID.
func inferAnthropicModelCapabilities(modelID string) provider.Capabilities {
// Default capabilities for most modern Claude models
// Default capabilities for most modern Claude models (4.6/4.7 baseline).
caps := provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 200000,
ContextWindow: 1_000_000,
MaxOutput: 16000,
}
// Model-specific overrides
switch modelID {
case "claude-opus-4-20250514", "claude-opus-4-20250612":
case "claude-opus-4-7", "claude-opus-4-6":
caps.MaxOutput = 32000
case "claude-sonnet-4-6":
caps.MaxOutput = 16000
case "claude-haiku-4-5-20251001", "claude-haiku-4-5":
caps.ContextWindow = 200000
caps.MaxOutput = 8192
case "claude-opus-4-20250514", "claude-opus-4-20250612":
caps.ContextWindow = 200000
caps.MaxOutput = 32000
case "claude-sonnet-4-20250514":
caps.ContextWindow = 200000
caps.MaxOutput = 16000
case "claude-3-opus-20240229", "claude-3-sonnet-20240229":
caps.ThinkingModes = []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}
caps.ContextWindow = 200000
caps.MaxOutput = 4096
case "claude-3-haiku-20240307":
caps.ThinkingModes = []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}
caps.ContextWindow = 200000
caps.MaxOutput = 4096
case "claude-2", "claude-2:1", "claude-instant-1":
+38 -5
View File
@@ -10,7 +10,7 @@ import (
"google.golang.org/genai"
)
const defaultModel = "gemini-2.5-flash"
const defaultModel = "gemini-3.5-flash"
// Provider implements provider.Provider for Google's Gemini API.
type Provider struct {
@@ -74,7 +74,7 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
// Fallback to hardcoded list if API call fails
return p.fallbackModels(), nil
}
caps := inferGoogleModelCapabilities(model)
models = append(models, provider.ModelInfo{
ID: model.Name,
@@ -96,7 +96,7 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
func (p *Provider) fallbackModels() []provider.ModelInfo {
return []provider.ModelInfo{
{
ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Provider: p.name,
ID: "gemini-3.1-pro-preview", Name: "Gemini 3.1 Pro", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
@@ -107,7 +107,7 @@ func (p *Provider) fallbackModels() []provider.ModelInfo {
},
},
{
ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Provider: p.name,
ID: "gemini-3.5-flash", Name: "Gemini 3.5 Flash", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
@@ -118,7 +118,37 @@ func (p *Provider) fallbackModels() []provider.ModelInfo {
},
},
{
ID: "gemini-2.0-flash", Name: "Gemini 2.0 Flash", Provider: p.name,
ID: "gemini-3.1-flash-lite", Name: "Gemini 3.1 Flash Lite", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 1048576, MaxOutput: 65536,
},
},
// Legacy IDs retained for users pinned to older models.
{
ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
Vision: true,
ContextWindow: 1048576,
MaxOutput: 65536,
},
},
{
ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
Vision: true,
ContextWindow: 1048576,
MaxOutput: 65536,
},
},
{
ID: "gemini-2.0-flash", Name: "Gemini 2.0 Flash (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 1048576, MaxOutput: 8192,
@@ -141,6 +171,9 @@ func inferGoogleModelCapabilities(m *genai.Model) provider.Capabilities {
// Model-specific overrides based on model name
switch m.Name {
case "gemini-3.1-pro-preview", "gemini-3.5-flash", "gemini-3.1-flash-lite":
caps.ContextWindow = 1048576
caps.MaxOutput = 65536
case "gemini-2.5-pro", "gemini-2.5-flash":
caps.ContextWindow = 1048576
caps.MaxOutput = 65536
+58 -8
View File
@@ -11,7 +11,7 @@ import (
"github.com/openai/openai-go/option"
)
const defaultModel = "gpt-4o"
const defaultModel = "gpt-5.5"
// Provider implements provider.Provider for the OpenAI API.
type Provider struct {
@@ -82,7 +82,7 @@ func (p *Provider) DefaultModel() string { return p.model }
// Models returns available OpenAI models with capabilities by querying the API.
func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
pager := p.client.Models.ListAutoPaging(ctx)
var models []provider.ModelInfo
for pager.Next() {
m := pager.Current()
@@ -111,14 +111,55 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
func (p *Provider) fallbackModels() []provider.ModelInfo {
return []provider.ModelInfo{
{
ID: "gpt-4o", Name: "GPT-4o", Provider: p.name,
ID: "gpt-5.5", Name: "GPT-5.5", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 1_000_000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.5-pro", Name: "GPT-5.5 Pro", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 1_000_000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.2", Name: "GPT-5.2 Thinking", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 400000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.2-chat-latest", Name: "GPT-5.2 Instant", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 400000, MaxOutput: 32000,
},
},
// Legacy IDs retained for users pinned to older models.
{
ID: "gpt-4o", Name: "GPT-4o (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 128000, MaxOutput: 16384,
},
},
{
ID: "gpt-4o-mini", Name: "GPT-4o Mini", Provider: p.name,
ID: "gpt-4o-mini", Name: "GPT-4o Mini (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 128000, MaxOutput: 16384,
@@ -149,29 +190,38 @@ func (p *Provider) fallbackModels() []provider.ModelInfo {
// inferOpenAIModelCapabilities infers capabilities from model ID.
func inferOpenAIModelCapabilities(modelID string) provider.Capabilities {
// Default capabilities for most modern OpenAI models
// Default capabilities for most modern OpenAI models (GPT-5.x baseline).
caps := provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ContextWindow: 128000,
MaxOutput: 16384,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 400000,
MaxOutput: 32000,
}
// Model-specific overrides
switch modelID {
case "gpt-5.5", "gpt-5.5-pro":
caps.ContextWindow = 1_000_000
caps.MaxOutput = 32000
case "gpt-5.2", "gpt-5.2-chat-latest":
caps.ContextWindow = 400000
caps.MaxOutput = 32000
case "gpt-4o", "gpt-4o-mini":
caps.ThinkingModes = nil
caps.ContextWindow = 128000
caps.MaxOutput = 16384
case "o3", "o3-mini":
caps.ThinkingModes = []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}
caps.ContextWindow = 200000
caps.MaxOutput = 100000
case "gpt-4", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613":
caps.ThinkingModes = nil
caps.Vision = false
caps.ContextWindow = 8192
caps.MaxOutput = 8192
case "gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613":
caps.ThinkingModes = nil
caps.Vision = false
caps.ToolUse = false
caps.ContextWindow = 16384
+45 -23
View File
@@ -84,12 +84,17 @@ func mistralDefaults() ProviderDefaults {
Tier: "starter",
Models: map[string]RateLimits{
"*": base,
// Mistral 3 (released Dec 2025) — flagship.
"mistral-large-3": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
"mistral-large-2512": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
"mistral-large-latest": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
"mistral-medium-3.5": {RPS: 1, TPM: 375_000},
"mistral-medium-2511": {RPS: 1, TPM: 375_000},
// Magistral models get higher limits
"magistral-medium-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000},
"magistral-small-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000},
// Large/medium get higher TPM
// Older Large/medium
"mistral-large-2411": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000},
"mistral-large-latest": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000},
"mistral-medium-2505": {RPS: 1, TPM: 375_000},
"mistral-medium-2508": {RPS: 1, TPM: 375_000},
"mistral-small-2603": {RPS: 1, TPM: 375_000},
@@ -108,15 +113,18 @@ func anthropicDefaults() ProviderDefaults {
Tier: "tier1",
Models: map[string]RateLimits{
"*": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
// Claude 4.x Opus (shared across 4, 4.1, 4.5, 4.6)
"claude-opus-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-opus-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
// Claude 4.x Sonnet (shared across 4, 4.5, 4.6)
"claude-sonnet-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-sonnet-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
// Haiku
"claude-haiku-4-5-20251001": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
"claude-3-5-haiku-20241022": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
// Claude 4.6 / 4.7 generation — dateless IDs.
"claude-opus-4-7": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-opus-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-sonnet-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-haiku-4-5": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
"claude-haiku-4-5-20251001": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
// Legacy dated 4.0 IDs.
"claude-opus-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-opus-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-sonnet-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-sonnet-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000},
"claude-3-5-haiku-20241022": {RPM: 50, ITPM: 50_000, OTPM: 10_000},
},
}
}
@@ -127,13 +135,21 @@ func openaiDefaults() ProviderDefaults {
Provider: "openai",
Tier: "tier1",
Models: map[string]RateLimits{
"*": {RPM: 500, TPM: 30_000, RPD: 10_000},
"gpt-4o": {RPM: 500, TPM: 30_000, RPD: 10_000},
"gpt-4o-mini": {RPM: 500, TPM: 200_000, RPD: 10_000},
"o1": {RPM: 500, TPM: 30_000},
"o3": {RPM: 500, TPM: 30_000},
"o3-mini": {RPM: 500, TPM: 200_000},
"o4-mini": {RPM: 500, TPM: 200_000},
"*": {RPM: 500, TPM: 30_000, RPD: 10_000},
// GPT-5.5 generation.
"gpt-5.5": {RPM: 500, TPM: 30_000, RPD: 10_000},
"gpt-5.5-pro": {RPM: 500, TPM: 30_000, RPD: 10_000},
"gpt-5.5-2026-04-23": {RPM: 500, TPM: 30_000, RPD: 10_000},
// GPT-5.2 generation.
"gpt-5.2": {RPM: 500, TPM: 200_000, RPD: 10_000},
"gpt-5.2-chat-latest": {RPM: 500, TPM: 200_000, RPD: 10_000},
// Legacy.
"gpt-4o": {RPM: 500, TPM: 30_000, RPD: 10_000},
"gpt-4o-mini": {RPM: 500, TPM: 200_000, RPD: 10_000},
"o1": {RPM: 500, TPM: 30_000},
"o3": {RPM: 500, TPM: 30_000},
"o3-mini": {RPM: 500, TPM: 200_000},
"o4-mini": {RPM: 500, TPM: 200_000},
},
}
}
@@ -144,12 +160,18 @@ func googleDefaults() ProviderDefaults {
Provider: "google",
Tier: "free",
Models: map[string]RateLimits{
"*": {RPM: 15, TPM: 250_000, RPD: 250},
"gemini-2.5-pro": {RPM: 5, TPM: 250_000, RPD: 100},
"gemini-2.5-pro-preview-05-06": {RPM: 5, TPM: 250_000, RPD: 100},
"gemini-2.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
"*": {RPM: 15, TPM: 250_000, RPD: 250},
// Gemini 3.x generation.
"gemini-3.1-pro-preview": {RPM: 5, TPM: 250_000, RPD: 100},
"gemini-3.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
"gemini-3.1-flash-lite": {RPM: 15, TPM: 250_000, RPD: 250},
"gemini-3.1-flash-image-preview": {RPM: 15, TPM: 250_000, RPD: 250},
// Legacy.
"gemini-2.5-pro": {RPM: 5, TPM: 250_000, RPD: 100},
"gemini-2.5-pro-preview-05-06": {RPM: 5, TPM: 250_000, RPD: 100},
"gemini-2.5-flash": {RPM: 15, TPM: 250_000, RPD: 250},
"gemini-2.5-flash-preview-04-17": {RPM: 15, TPM: 250_000, RPD: 250},
"gemini-2.0-flash": {RPM: 10, RPD: 1_500},
"gemini-2.0-flash": {RPM: 10, RPD: 1_500},
},
}
}