diff --git a/cmd/gnoma/main.go b/cmd/gnoma/main.go index 52f50f8..809f6fa 100644 --- a/cmd/gnoma/main.go +++ b/cmd/gnoma/main.go @@ -16,28 +16,28 @@ import ( "sync" "time" - "somegit.dev/Owlibou/gnoma/internal/engine" - "somegit.dev/Owlibou/gnoma/internal/hook" - "somegit.dev/Owlibou/gnoma/internal/skill" - "somegit.dev/Owlibou/gnoma/internal/slm" - "somegit.dev/Owlibou/gnoma/internal/tool/persist" gnomacfg "somegit.dev/Owlibou/gnoma/internal/config" gnomactx "somegit.dev/Owlibou/gnoma/internal/context" + "somegit.dev/Owlibou/gnoma/internal/engine" + "somegit.dev/Owlibou/gnoma/internal/hook" "somegit.dev/Owlibou/gnoma/internal/message" "somegit.dev/Owlibou/gnoma/internal/permission" "somegit.dev/Owlibou/gnoma/internal/provider" - "somegit.dev/Owlibou/gnoma/internal/router" - "somegit.dev/Owlibou/gnoma/internal/security" - "somegit.dev/Owlibou/gnoma/internal/tokenizer" anthropicprov "somegit.dev/Owlibou/gnoma/internal/provider/anthropic" - "somegit.dev/Owlibou/gnoma/internal/provider/mistral" googleprov "somegit.dev/Owlibou/gnoma/internal/provider/google" + "somegit.dev/Owlibou/gnoma/internal/provider/mistral" oaiprov "somegit.dev/Owlibou/gnoma/internal/provider/openai" "somegit.dev/Owlibou/gnoma/internal/provider/openaicompat" subprocprov "somegit.dev/Owlibou/gnoma/internal/provider/subprocess" + "somegit.dev/Owlibou/gnoma/internal/router" + "somegit.dev/Owlibou/gnoma/internal/security" "somegit.dev/Owlibou/gnoma/internal/session" + "somegit.dev/Owlibou/gnoma/internal/skill" + "somegit.dev/Owlibou/gnoma/internal/slm" "somegit.dev/Owlibou/gnoma/internal/stream" + "somegit.dev/Owlibou/gnoma/internal/tokenizer" "somegit.dev/Owlibou/gnoma/internal/tool" + "somegit.dev/Owlibou/gnoma/internal/tool/persist" "somegit.dev/Owlibou/gnoma/internal/tui" tea "charm.land/bubbletea/v2" @@ -413,10 +413,10 @@ func main() { armID = router.NewArmID(*providerName, armModel) armProvider := security.WrapProvider(limitedProvider(prov, *providerName, armModel, cfg), fwRef) arm := &router.Arm{ - ID: armID, - Provider: armProvider, - ModelName: armModel, - IsLocal: localProviders[*providerName], + ID: armID, + Provider: armProvider, + ModelName: armModel, + IsLocal: localProviders[*providerName], Capabilities: provider.Capabilities{ToolUse: true}, } arm.Pools = resolveRateLimitPools(armID, *providerName, armModel, cfg) @@ -698,10 +698,10 @@ func main() { // Build skill registry: bundled → user → plugins → project (precedence order). skillReg := skill.NewRegistry() - skillReg.LoadBundled() //nolint:errcheck - skillReg.LoadDir(filepath.Join(gnomacfg.GlobalConfigDir(), "skills"), "user") //nolint:errcheck + skillReg.LoadBundled() //nolint:errcheck + skillReg.LoadDir(filepath.Join(gnomacfg.GlobalConfigDir(), "skills"), "user") //nolint:errcheck for _, ps := range pluginResult.Skills { - skillReg.LoadDir(ps.Dir, ps.Source) //nolint:errcheck + skillReg.LoadDir(ps.Dir, ps.Source) //nolint:errcheck } skillReg.LoadDir(filepath.Join(gnomacfg.ProjectRoot(), ".gnoma", "skills"), "project") //nolint:errcheck @@ -838,17 +838,17 @@ func main() { // Wrap even though the engine's own buildRequest scans inline — // belt-and-suspenders so a future engine path that bypasses // buildRequest still routes through the firewall. - Provider: security.WrapProvider(prov, fwRef), - Router: rtr, - Classifier: engineClassifier, - Tools: reg, - Firewall: fw, - Permissions: permChecker, - Context: ctxWindow, - System: systemPrompt, - Model: *model, - Temperature: cfg.Provider.Temperature, - MaxTurns: *maxTurns, + Provider: security.WrapProvider(prov, fwRef), + Router: rtr, + Classifier: engineClassifier, + Tools: reg, + Firewall: fw, + Permissions: permChecker, + Context: ctxWindow, + System: systemPrompt, + Model: *model, + Temperature: cfg.Provider.Temperature, + MaxTurns: *maxTurns, Store: store, Hooks: dispatcher, Logger: logger, @@ -916,7 +916,7 @@ func main() { mode = "pipe" } dispatcher.Fire(hook.SessionStart, hook.MarshalSessionStartPayload(sessionID, mode)) //nolint:errcheck - defer dispatcher.Fire(hook.SessionEnd, hook.MarshalSessionEndPayload(sessionID, 0)) //nolint:errcheck + defer dispatcher.Fire(hook.SessionEnd, hook.MarshalSessionEndPayload(sessionID, 0)) //nolint:errcheck if input != "" { // Pipe mode: single input → stream to stdout @@ -974,8 +974,8 @@ func main() { } } else { // TUI mode: permission prompts via channels - permCh := make(chan bool) // TUI → engine: y/n response - permReqCh := make(chan tui.PermReqMsg, 1) // engine → TUI: tool requesting permission + permCh := make(chan bool) // TUI → engine: y/n response + permReqCh := make(chan tui.PermReqMsg, 1) // engine → TUI: tool requesting permission permChecker.SetPromptFunc(func(ctx context.Context, toolName string, args json.RawMessage) (bool, error) { // Notify TUI that a permission prompt is needed select { @@ -1312,10 +1312,10 @@ func buildPluginInfos(plugins []plugin.Plugin, enabledSet map[string]bool) []tui // Logs the first deferred-fallback at INFO so operators can tell when the // SLM was not yet ready vs. unconfigured. type lazyClassifier struct { - mu sync.Mutex - inner router.TaskClassifier - deferredLogged bool - logger *slog.Logger + mu sync.Mutex + inner router.TaskClassifier + deferredLogged bool + logger *slog.Logger } func (l *lazyClassifier) set(c router.TaskClassifier) { @@ -1349,7 +1349,7 @@ type stubProvider struct{ reason string } func newStubProvider(reason string) provider.Provider { return &stubProvider{reason: reason} } -func (s *stubProvider) Name() string { return "none" } +func (s *stubProvider) Name() string { return "none" } func (s *stubProvider) DefaultModel() string { return "none" } func (s *stubProvider) Models(_ context.Context) ([]provider.ModelInfo, error) { return nil, fmt.Errorf("%s", s.reason) @@ -1430,7 +1430,7 @@ func runProvidersCommand(cfg *gnomacfg.Config, logger *slog.Logger) int { } fmt.Println("\nTo set a provider:") - fmt.Println(" gnoma --provider anthropic --model claude-opus-4-5 (one-off)") + fmt.Println(" gnoma --provider anthropic --model claude-opus-4-7 (one-off)") fmt.Println(" gnoma config set provider.default anthropic (permanent)") return 0 diff --git a/internal/provider/anthropic/provider.go b/internal/provider/anthropic/provider.go index cc200e3..c71763e 100644 --- a/internal/provider/anthropic/provider.go +++ b/internal/provider/anthropic/provider.go @@ -11,7 +11,7 @@ import ( "github.com/anthropics/anthropic-sdk-go/option" ) -const defaultModel = "claude-sonnet-4-20250514" +const defaultModel = "claude-sonnet-4-6" // Provider implements provider.Provider for the Anthropic API. type Provider struct { @@ -79,7 +79,7 @@ func (p *Provider) DefaultModel() string { // Models returns available Anthropic models with capabilities by querying the API. func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) { pager := p.client.Models.ListAutoPaging(ctx, anthropic.ModelListParams{}) - + var models []provider.ModelInfo for pager.Next() { m := pager.Current() @@ -108,24 +108,24 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) { func (p *Provider) fallbackModels() []provider.ModelInfo { return []provider.ModelInfo{ { - ID: "claude-opus-4-20250514", Name: "Claude Opus 4", Provider: p.name, + ID: "claude-opus-4-7", Name: "Claude Opus 4.7", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, Vision: true, - ContextWindow: 200000, + ContextWindow: 1_000_000, MaxOutput: 32000, }, }, { - ID: "claude-sonnet-4-20250514", Name: "Claude Sonnet 4", Provider: p.name, + ID: "claude-sonnet-4-6", Name: "Claude Sonnet 4.6", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, Vision: true, - ContextWindow: 200000, + ContextWindow: 1_000_000, MaxOutput: 16000, }, }, @@ -136,31 +136,57 @@ func (p *Provider) fallbackModels() []provider.ModelInfo { ContextWindow: 200000, MaxOutput: 8192, }, }, + // Legacy 4.0 IDs retained so user-pinned models continue to surface. + { + ID: "claude-opus-4-20250514", Name: "Claude Opus 4 (legacy)", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, JSONOutput: true, Vision: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 200000, MaxOutput: 32000, + }, + }, + { + ID: "claude-sonnet-4-20250514", Name: "Claude Sonnet 4 (legacy)", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, JSONOutput: true, Vision: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 200000, MaxOutput: 16000, + }, + }, } } // inferAnthropicModelCapabilities infers capabilities from model ID. func inferAnthropicModelCapabilities(modelID string) provider.Capabilities { - // Default capabilities for most modern Claude models + // Default capabilities for most modern Claude models (4.6/4.7 baseline). caps := provider.Capabilities{ ToolUse: true, JSONOutput: true, Vision: true, ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, - ContextWindow: 200000, + ContextWindow: 1_000_000, MaxOutput: 16000, } // Model-specific overrides switch modelID { - case "claude-opus-4-20250514", "claude-opus-4-20250612": + case "claude-opus-4-7", "claude-opus-4-6": caps.MaxOutput = 32000 + case "claude-sonnet-4-6": + caps.MaxOutput = 16000 + case "claude-haiku-4-5-20251001", "claude-haiku-4-5": + caps.ContextWindow = 200000 + caps.MaxOutput = 8192 + case "claude-opus-4-20250514", "claude-opus-4-20250612": + caps.ContextWindow = 200000 + caps.MaxOutput = 32000 + case "claude-sonnet-4-20250514": + caps.ContextWindow = 200000 + caps.MaxOutput = 16000 case "claude-3-opus-20240229", "claude-3-sonnet-20240229": - caps.ThinkingModes = []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh} caps.ContextWindow = 200000 caps.MaxOutput = 4096 case "claude-3-haiku-20240307": - caps.ThinkingModes = []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh} caps.ContextWindow = 200000 caps.MaxOutput = 4096 case "claude-2", "claude-2:1", "claude-instant-1": diff --git a/internal/provider/google/provider.go b/internal/provider/google/provider.go index 10259d1..ad15d0a 100644 --- a/internal/provider/google/provider.go +++ b/internal/provider/google/provider.go @@ -10,7 +10,7 @@ import ( "google.golang.org/genai" ) -const defaultModel = "gemini-2.5-flash" +const defaultModel = "gemini-3.5-flash" // Provider implements provider.Provider for Google's Gemini API. type Provider struct { @@ -74,7 +74,7 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) { // Fallback to hardcoded list if API call fails return p.fallbackModels(), nil } - + caps := inferGoogleModelCapabilities(model) models = append(models, provider.ModelInfo{ ID: model.Name, @@ -96,7 +96,7 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) { func (p *Provider) fallbackModels() []provider.ModelInfo { return []provider.ModelInfo{ { - ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro", Provider: p.name, + ID: "gemini-3.1-pro-preview", Name: "Gemini 3.1 Pro", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, @@ -107,7 +107,7 @@ func (p *Provider) fallbackModels() []provider.ModelInfo { }, }, { - ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash", Provider: p.name, + ID: "gemini-3.5-flash", Name: "Gemini 3.5 Flash", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, @@ -118,7 +118,37 @@ func (p *Provider) fallbackModels() []provider.ModelInfo { }, }, { - ID: "gemini-2.0-flash", Name: "Gemini 2.0 Flash", Provider: p.name, + ID: "gemini-3.1-flash-lite", Name: "Gemini 3.1 Flash Lite", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, JSONOutput: true, Vision: true, + ContextWindow: 1048576, MaxOutput: 65536, + }, + }, + // Legacy IDs retained for users pinned to older models. + { + ID: "gemini-2.5-pro", Name: "Gemini 2.5 Pro (legacy)", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, + JSONOutput: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + Vision: true, + ContextWindow: 1048576, + MaxOutput: 65536, + }, + }, + { + ID: "gemini-2.5-flash", Name: "Gemini 2.5 Flash (legacy)", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, + JSONOutput: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + Vision: true, + ContextWindow: 1048576, + MaxOutput: 65536, + }, + }, + { + ID: "gemini-2.0-flash", Name: "Gemini 2.0 Flash (legacy)", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, Vision: true, ContextWindow: 1048576, MaxOutput: 8192, @@ -141,6 +171,9 @@ func inferGoogleModelCapabilities(m *genai.Model) provider.Capabilities { // Model-specific overrides based on model name switch m.Name { + case "gemini-3.1-pro-preview", "gemini-3.5-flash", "gemini-3.1-flash-lite": + caps.ContextWindow = 1048576 + caps.MaxOutput = 65536 case "gemini-2.5-pro", "gemini-2.5-flash": caps.ContextWindow = 1048576 caps.MaxOutput = 65536 diff --git a/internal/provider/openai/provider.go b/internal/provider/openai/provider.go index a868ca7..17ba1c0 100644 --- a/internal/provider/openai/provider.go +++ b/internal/provider/openai/provider.go @@ -11,7 +11,7 @@ import ( "github.com/openai/openai-go/option" ) -const defaultModel = "gpt-4o" +const defaultModel = "gpt-5.5" // Provider implements provider.Provider for the OpenAI API. type Provider struct { @@ -82,7 +82,7 @@ func (p *Provider) DefaultModel() string { return p.model } // Models returns available OpenAI models with capabilities by querying the API. func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) { pager := p.client.Models.ListAutoPaging(ctx) - + var models []provider.ModelInfo for pager.Next() { m := pager.Current() @@ -111,14 +111,55 @@ func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) { func (p *Provider) fallbackModels() []provider.ModelInfo { return []provider.ModelInfo{ { - ID: "gpt-4o", Name: "GPT-4o", Provider: p.name, + ID: "gpt-5.5", Name: "GPT-5.5", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, + JSONOutput: true, + Vision: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 1_000_000, + MaxOutput: 32000, + }, + }, + { + ID: "gpt-5.5-pro", Name: "GPT-5.5 Pro", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, + JSONOutput: true, + Vision: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 1_000_000, + MaxOutput: 32000, + }, + }, + { + ID: "gpt-5.2", Name: "GPT-5.2 Thinking", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, + JSONOutput: true, + Vision: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 400000, + MaxOutput: 32000, + }, + }, + { + ID: "gpt-5.2-chat-latest", Name: "GPT-5.2 Instant", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, JSONOutput: true, Vision: true, + ContextWindow: 400000, MaxOutput: 32000, + }, + }, + // Legacy IDs retained for users pinned to older models. + { + ID: "gpt-4o", Name: "GPT-4o (legacy)", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, Vision: true, ContextWindow: 128000, MaxOutput: 16384, }, }, { - ID: "gpt-4o-mini", Name: "GPT-4o Mini", Provider: p.name, + ID: "gpt-4o-mini", Name: "GPT-4o Mini (legacy)", Provider: p.name, Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, Vision: true, ContextWindow: 128000, MaxOutput: 16384, @@ -149,29 +190,38 @@ func (p *Provider) fallbackModels() []provider.ModelInfo { // inferOpenAIModelCapabilities infers capabilities from model ID. func inferOpenAIModelCapabilities(modelID string) provider.Capabilities { - // Default capabilities for most modern OpenAI models + // Default capabilities for most modern OpenAI models (GPT-5.x baseline). caps := provider.Capabilities{ ToolUse: true, JSONOutput: true, Vision: true, - ContextWindow: 128000, - MaxOutput: 16384, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 400000, + MaxOutput: 32000, } // Model-specific overrides switch modelID { + case "gpt-5.5", "gpt-5.5-pro": + caps.ContextWindow = 1_000_000 + caps.MaxOutput = 32000 + case "gpt-5.2", "gpt-5.2-chat-latest": + caps.ContextWindow = 400000 + caps.MaxOutput = 32000 case "gpt-4o", "gpt-4o-mini": + caps.ThinkingModes = nil caps.ContextWindow = 128000 caps.MaxOutput = 16384 case "o3", "o3-mini": - caps.ThinkingModes = []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh} caps.ContextWindow = 200000 caps.MaxOutput = 100000 case "gpt-4", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613": + caps.ThinkingModes = nil caps.Vision = false caps.ContextWindow = 8192 caps.MaxOutput = 8192 case "gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613": + caps.ThinkingModes = nil caps.Vision = false caps.ToolUse = false caps.ContextWindow = 16384 diff --git a/internal/provider/ratelimits.go b/internal/provider/ratelimits.go index 0fade82..1ea8088 100644 --- a/internal/provider/ratelimits.go +++ b/internal/provider/ratelimits.go @@ -84,12 +84,17 @@ func mistralDefaults() ProviderDefaults { Tier: "starter", Models: map[string]RateLimits{ "*": base, + // Mistral 3 (released Dec 2025) — flagship. + "mistral-large-3": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000}, + "mistral-large-2512": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000}, + "mistral-large-latest": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000}, + "mistral-medium-3.5": {RPS: 1, TPM: 375_000}, + "mistral-medium-2511": {RPS: 1, TPM: 375_000}, // Magistral models get higher limits "magistral-medium-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000}, "magistral-small-2509": {RPS: 1, TPM: 75_000, TokensMonth: 1_000_000_000}, - // Large/medium get higher TPM + // Older Large/medium "mistral-large-2411": {RPS: 1, TPM: 600_000, TokensMonth: 200_000_000_000}, - "mistral-large-latest": {RPS: 1, TPM: 50_000, TokensMonth: 4_000_000}, "mistral-medium-2505": {RPS: 1, TPM: 375_000}, "mistral-medium-2508": {RPS: 1, TPM: 375_000}, "mistral-small-2603": {RPS: 1, TPM: 375_000}, @@ -108,15 +113,18 @@ func anthropicDefaults() ProviderDefaults { Tier: "tier1", Models: map[string]RateLimits{ "*": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, - // Claude 4.x Opus (shared across 4, 4.1, 4.5, 4.6) - "claude-opus-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, - "claude-opus-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, - // Claude 4.x Sonnet (shared across 4, 4.5, 4.6) - "claude-sonnet-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, - "claude-sonnet-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, - // Haiku - "claude-haiku-4-5-20251001": {RPM: 50, ITPM: 50_000, OTPM: 10_000}, - "claude-3-5-haiku-20241022": {RPM: 50, ITPM: 50_000, OTPM: 10_000}, + // Claude 4.6 / 4.7 generation — dateless IDs. + "claude-opus-4-7": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-opus-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-sonnet-4-6": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-haiku-4-5": {RPM: 50, ITPM: 50_000, OTPM: 10_000}, + "claude-haiku-4-5-20251001": {RPM: 50, ITPM: 50_000, OTPM: 10_000}, + // Legacy dated 4.0 IDs. + "claude-opus-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-opus-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-sonnet-4-20250514": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-sonnet-4-0": {RPM: 50, ITPM: 30_000, OTPM: 8_000}, + "claude-3-5-haiku-20241022": {RPM: 50, ITPM: 50_000, OTPM: 10_000}, }, } } @@ -127,13 +135,21 @@ func openaiDefaults() ProviderDefaults { Provider: "openai", Tier: "tier1", Models: map[string]RateLimits{ - "*": {RPM: 500, TPM: 30_000, RPD: 10_000}, - "gpt-4o": {RPM: 500, TPM: 30_000, RPD: 10_000}, - "gpt-4o-mini": {RPM: 500, TPM: 200_000, RPD: 10_000}, - "o1": {RPM: 500, TPM: 30_000}, - "o3": {RPM: 500, TPM: 30_000}, - "o3-mini": {RPM: 500, TPM: 200_000}, - "o4-mini": {RPM: 500, TPM: 200_000}, + "*": {RPM: 500, TPM: 30_000, RPD: 10_000}, + // GPT-5.5 generation. + "gpt-5.5": {RPM: 500, TPM: 30_000, RPD: 10_000}, + "gpt-5.5-pro": {RPM: 500, TPM: 30_000, RPD: 10_000}, + "gpt-5.5-2026-04-23": {RPM: 500, TPM: 30_000, RPD: 10_000}, + // GPT-5.2 generation. + "gpt-5.2": {RPM: 500, TPM: 200_000, RPD: 10_000}, + "gpt-5.2-chat-latest": {RPM: 500, TPM: 200_000, RPD: 10_000}, + // Legacy. + "gpt-4o": {RPM: 500, TPM: 30_000, RPD: 10_000}, + "gpt-4o-mini": {RPM: 500, TPM: 200_000, RPD: 10_000}, + "o1": {RPM: 500, TPM: 30_000}, + "o3": {RPM: 500, TPM: 30_000}, + "o3-mini": {RPM: 500, TPM: 200_000}, + "o4-mini": {RPM: 500, TPM: 200_000}, }, } } @@ -144,12 +160,18 @@ func googleDefaults() ProviderDefaults { Provider: "google", Tier: "free", Models: map[string]RateLimits{ - "*": {RPM: 15, TPM: 250_000, RPD: 250}, - "gemini-2.5-pro": {RPM: 5, TPM: 250_000, RPD: 100}, - "gemini-2.5-pro-preview-05-06": {RPM: 5, TPM: 250_000, RPD: 100}, - "gemini-2.5-flash": {RPM: 15, TPM: 250_000, RPD: 250}, + "*": {RPM: 15, TPM: 250_000, RPD: 250}, + // Gemini 3.x generation. + "gemini-3.1-pro-preview": {RPM: 5, TPM: 250_000, RPD: 100}, + "gemini-3.5-flash": {RPM: 15, TPM: 250_000, RPD: 250}, + "gemini-3.1-flash-lite": {RPM: 15, TPM: 250_000, RPD: 250}, + "gemini-3.1-flash-image-preview": {RPM: 15, TPM: 250_000, RPD: 250}, + // Legacy. + "gemini-2.5-pro": {RPM: 5, TPM: 250_000, RPD: 100}, + "gemini-2.5-pro-preview-05-06": {RPM: 5, TPM: 250_000, RPD: 100}, + "gemini-2.5-flash": {RPM: 15, TPM: 250_000, RPD: 250}, "gemini-2.5-flash-preview-04-17": {RPM: 15, TPM: 250_000, RPD: 250}, - "gemini-2.0-flash": {RPM: 10, RPD: 1_500}, + "gemini-2.0-flash": {RPM: 10, RPD: 1_500}, }, } }