diff --git a/internal/provider/openai/provider.go b/internal/provider/openai/provider.go index 17ba1c0..37b6914 100644 --- a/internal/provider/openai/provider.go +++ b/internal/provider/openai/provider.go @@ -132,6 +132,17 @@ func (p *Provider) fallbackModels() []provider.ModelInfo { MaxOutput: 32000, }, }, + { + ID: "gpt-5.3-codex", Name: "GPT-5.3 Codex", Provider: p.name, + Capabilities: provider.Capabilities{ + ToolUse: true, + JSONOutput: true, + Vision: true, + ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh}, + ContextWindow: 400000, + MaxOutput: 32000, + }, + }, { ID: "gpt-5.2", Name: "GPT-5.2 Thinking", Provider: p.name, Capabilities: provider.Capabilities{ @@ -205,6 +216,9 @@ func inferOpenAIModelCapabilities(modelID string) provider.Capabilities { case "gpt-5.5", "gpt-5.5-pro": caps.ContextWindow = 1_000_000 caps.MaxOutput = 32000 + case "gpt-5.3-codex": + caps.ContextWindow = 400000 + caps.MaxOutput = 32000 case "gpt-5.2", "gpt-5.2-chat-latest": caps.ContextWindow = 400000 caps.MaxOutput = 32000 diff --git a/internal/provider/ratelimits.go b/internal/provider/ratelimits.go index 1ea8088..2be8e0a 100644 --- a/internal/provider/ratelimits.go +++ b/internal/provider/ratelimits.go @@ -140,6 +140,9 @@ func openaiDefaults() ProviderDefaults { "gpt-5.5": {RPM: 500, TPM: 30_000, RPD: 10_000}, "gpt-5.5-pro": {RPM: 500, TPM: 30_000, RPD: 10_000}, "gpt-5.5-2026-04-23": {RPM: 500, TPM: 30_000, RPD: 10_000}, + // GPT-5.3 Codex (coding-specialist branch). + "gpt-5.3-codex": {RPM: 500, TPM: 200_000, RPD: 10_000}, + "gpt-5.3-codex-2026-02-15": {RPM: 500, TPM: 200_000, RPD: 10_000}, // GPT-5.2 generation. "gpt-5.2": {RPM: 500, TPM: 200_000, RPD: 10_000}, "gpt-5.2-chat-latest": {RPM: 500, TPM: 200_000, RPD: 10_000}, diff --git a/internal/router/defaults.go b/internal/router/defaults.go index a5edca1..434e133 100644 --- a/internal/router/defaults.go +++ b/internal/router/defaults.go @@ -200,6 +200,44 @@ var knownFamilyDefaults = map[string]FamilyDefaults{ MaxComplexity: 0.45, }, + // --- Closed-source frontier (cloud arms) -------------------------------- + // Cloud entries set Strengths and CostWeight but leave MaxComplexity + // zero — cloud arms shouldn't have a complexity ceiling. CostWeight + // rationale per the 2026-05-23 plan: + // - 0.3 on frontier arms (Opus 4.7, GPT-5.5): keep them competitive + // for high-stakes tasks (SecurityReview, Planning) despite $4+/Mtok. + // - 0.5-0.7 on mid-tier coding specialists: standard cost influence. + // - 1.2 on cheap fast arms (Gemini 3.5 Flash): penalize cost more + // so they win only when cost is genuinely decisive. + "claude-opus-4-7": { + Strengths: []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor}, + CostWeight: 0.3, + }, + "claude-sonnet-4-6": { + Strengths: []TaskType{TaskGeneration, TaskRefactor, TaskReview}, + CostWeight: 0.7, + }, + "gpt-5.5": { + Strengths: []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, + CostWeight: 0.3, + }, + "gpt-5.3-codex": { + Strengths: []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}, + CostWeight: 0.6, + }, + "gpt-5.2": { + Strengths: []TaskType{TaskOrchestration, TaskReview}, + CostWeight: 0.8, + }, + "gemini-3.1-pro": { + Strengths: []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, + CostWeight: 0.5, + }, + "gemini-3.5-flash": { + Strengths: []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}, + CostWeight: 1.2, + }, + // --- Tool-router specialist (reserved, not auto-routed) ----------------- // functiongemma is Google's 270M function-calling specialist. It is // not a chat model — it emits structured tool calls, not prose. We @@ -278,6 +316,43 @@ func ResolveMaxComplexity(modelID string) (float64, bool) { return 0, false } +// applyFamilyDefaults populates zero-valued routing fields on an Arm from +// the family-defaults table. Only fields that are still at their zero +// value get filled — user-supplied Strengths, MaxComplexity, CostWeight, +// or Disabled are never overwritten. Returns true when at least one +// family entry matched, false when the model is unknown. +// +// Looks up by arm.ModelName first; falls back to arm.ID.Model() when +// ModelName is empty (which test code commonly omits). +func applyFamilyDefaults(arm *Arm) bool { + if arm == nil { + return false + } + modelKey := arm.ModelName + if modelKey == "" { + modelKey = arm.ID.Model() + } + defaults, ok := ResolveFamilyDefaults(modelKey) + if !ok { + return false + } + if len(arm.Strengths) == 0 && len(defaults.Strengths) > 0 { + arm.Strengths = defaults.Strengths + } + if arm.MaxComplexity == 0 { + if cap, capOK := ResolveMaxComplexity(modelKey); capOK { + arm.MaxComplexity = cap + } + } + if arm.CostWeight == 0 && defaults.CostWeight > 0 { + arm.CostWeight = defaults.CostWeight + } + if defaults.Disabled { + arm.Disabled = true + } + return true +} + // pureSizeToken matches a token consisting of digits (optionally with a // single decimal point) followed by 'b' or 'm' — and nothing else. Used // after splitting the model ID on `:`, `-`, `_`, `/` to extract a pure diff --git a/internal/router/defaults_test.go b/internal/router/defaults_test.go index 2f99dc7..8bc7e81 100644 --- a/internal/router/defaults_test.go +++ b/internal/router/defaults_test.go @@ -186,6 +186,123 @@ func TestKnownFamilyDefaults_NoDualSpec(t *testing.T) { } } +// --- Cloud defaults -------------------------------------------------------- + +func TestResolveFamilyDefaults_CloudArms(t *testing.T) { + cases := []struct { + modelID string + wantStrengths []TaskType + wantCostWeight float64 + }{ + {"claude-opus-4-7", []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor}, 0.3}, + {"claude-sonnet-4-6", []TaskType{TaskGeneration, TaskRefactor, TaskReview}, 0.7}, + {"gpt-5.5", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, + {"gpt-5.5-pro", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, // shares prefix with gpt-5.5 + {"gpt-5.3-codex", []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}, 0.6}, + {"gpt-5.2", []TaskType{TaskOrchestration, TaskReview}, 0.8}, + {"gpt-5.2-chat-latest", []TaskType{TaskOrchestration, TaskReview}, 0.8}, + {"gemini-3.1-pro", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5}, + {"gemini-3.1-pro-preview", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5}, + {"gemini-3.5-flash", []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}, 1.2}, + } + for _, tc := range cases { + t.Run(tc.modelID, func(t *testing.T) { + got, ok := ResolveFamilyDefaults(tc.modelID) + if !ok { + t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID) + } + if !reflect.DeepEqual(got.Strengths, tc.wantStrengths) { + t.Errorf("%q Strengths = %v, want %v", tc.modelID, got.Strengths, tc.wantStrengths) + } + if got.CostWeight != tc.wantCostWeight { + t.Errorf("%q CostWeight = %v, want %v", tc.modelID, got.CostWeight, tc.wantCostWeight) + } + if got.MaxComplexity != 0 { + t.Errorf("%q MaxComplexity = %v, want 0 (cloud arms have no ceiling)", tc.modelID, got.MaxComplexity) + } + }) + } +} + +func TestResolveFamilyDefaults_CloudLegacyUnaffected(t *testing.T) { + // Legacy / unrelated cloud IDs must NOT pick up defaults — keeping + // users on older pinned models safe from imposed Strengths. + noMatch := []string{ + "claude-opus-4-20250514", + "claude-sonnet-4-20250514", + "claude-haiku-4-5-20251001", + "gpt-4o", + "gpt-4o-mini", + "o3", + "o3-mini", + "gemini-2.5-pro", + "gemini-2.0-flash", + } + for _, id := range noMatch { + if _, ok := ResolveFamilyDefaults(id); ok { + t.Errorf("ResolveFamilyDefaults(%q) should not match (legacy model)", id) + } + } +} + +func TestRegisterArm_AppliesCloudDefaults(t *testing.T) { + r := New(Config{}) + r.RegisterArm(&Arm{ + ID: NewArmID("openai", "gpt-5.3-codex"), + ModelName: "gpt-5.3-codex", + Capabilities: provider.Capabilities{ + ToolUse: true, JSONOutput: true, + ContextWindow: 400000, + }, + }) + arm, ok := r.LookupArm(NewArmID("openai", "gpt-5.3-codex")) + if !ok { + t.Fatal("gpt-5.3-codex arm should be registered") + } + wantStrengths := []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest} + if !reflect.DeepEqual(arm.Strengths, wantStrengths) { + t.Errorf("Strengths = %v, want %v", arm.Strengths, wantStrengths) + } + if arm.CostWeight != 0.6 { + t.Errorf("CostWeight = %v, want 0.6", arm.CostWeight) + } + if arm.MaxComplexity != 0 { + t.Errorf("MaxComplexity = %v, want 0 (cloud arm)", arm.MaxComplexity) + } +} + +func TestRegisterArm_DoesNotOverrideUserStrengths(t *testing.T) { + r := New(Config{}) + r.RegisterArm(&Arm{ + ID: NewArmID("anthropic", "claude-opus-4-7"), + ModelName: "claude-opus-4-7", + Strengths: []TaskType{TaskUnitTest}, // user-supplied; defaults should not overwrite + CostWeight: 0.5, // user-supplied + }) + arm, _ := r.LookupArm(NewArmID("anthropic", "claude-opus-4-7")) + if !reflect.DeepEqual(arm.Strengths, []TaskType{TaskUnitTest}) { + t.Errorf("user-supplied Strengths overridden by defaults: got %v", arm.Strengths) + } + if arm.CostWeight != 0.5 { + t.Errorf("user-supplied CostWeight overridden: got %v", arm.CostWeight) + } +} + +func TestRegisterArm_FallsBackToIDWhenModelNameMissing(t *testing.T) { + // Some test code constructs arms with ID but no ModelName. + // applyFamilyDefaults should fall back to ID.Model() so defaults + // still flow through. + r := New(Config{}) + r.RegisterArm(&Arm{ + ID: NewArmID("openai", "gpt-5.3-codex"), + // ModelName intentionally empty + }) + arm, _ := r.LookupArm(NewArmID("openai", "gpt-5.3-codex")) + if arm.CostWeight != 0.6 { + t.Errorf("CostWeight = %v, want 0.6 (defaults should resolve via ID.Model() fallback)", arm.CostWeight) + } +} + // --- Integration: routing-payoff scenario -------------------------------- // TestRoutingDefaults_PayoffScenario is the user-facing demonstration that diff --git a/internal/router/discovery.go b/internal/router/discovery.go index 064787c..6f1a740 100644 --- a/internal/router/discovery.go +++ b/internal/router/discovery.go @@ -497,7 +497,12 @@ func RegisterDiscoveredModels(r *Router, models []DiscoveredModel, providerFacto continue } - arm := &Arm{ + // Family-keyed defaults (Strengths, MaxComplexity, CostWeight, + // Disabled) are applied inside Router.RegisterArm — single source + // of truth so cloud-arm and local-arm registration paths agree. + // User-supplied [[arms]] config in TOML overrides defaults later + // via ApplyArmOverrides. + r.RegisterArm(&Arm{ ID: armID, Provider: prov, ModelName: m.ID, @@ -513,28 +518,6 @@ func RegisterDiscoveredModels(r *Router, models []DiscoveredModel, providerFacto Vision: m.SupportsVision, ContextWindow: m.ContextSize, }, - } - - // Apply family-keyed defaults (Strengths, MaxComplexity, CostWeight, - // Disabled) for known model families. User-supplied [[arms]] config - // in TOML overrides these later via ApplyArmOverrides. - if defaults, ok := ResolveFamilyDefaults(m.ID); ok { - if len(arm.Strengths) == 0 && len(defaults.Strengths) > 0 { - arm.Strengths = defaults.Strengths - } - if arm.MaxComplexity == 0 { - if cap, ok := ResolveMaxComplexity(m.ID); ok { - arm.MaxComplexity = cap - } - } - if arm.CostWeight == 0 && defaults.CostWeight > 0 { - arm.CostWeight = defaults.CostWeight - } - if defaults.Disabled { - arm.Disabled = true - } - } - - r.RegisterArm(arm) + }) } } diff --git a/internal/router/router.go b/internal/router/router.go index 6c617da..b219cda 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -42,8 +42,13 @@ func New(cfg Config) *Router { } } -// RegisterArm adds an arm to the router. +// RegisterArm adds an arm to the router. Family-keyed defaults +// (Strengths, MaxComplexity, CostWeight, Disabled) are applied to any +// fields still at their zero value — user-supplied values are never +// overwritten. See defaults.go for the family table. func (r *Router) RegisterArm(arm *Arm) { + applyFamilyDefaults(arm) + r.mu.Lock() defer r.mu.Unlock() r.arms[arm.ID] = arm