feat(router): cloud-arm defaults, gpt-5.3-codex registration
Closes R-4 and R-5 of the routing-defaults plan.
R-4: Strengths + CostWeight defaults for closed frontier models.
Cloud entries land in the same knownFamilyDefaults table as local
ones, with MaxComplexity intentionally left zero (cloud arms get
no complexity ceiling). CostWeight tuned per the plan's rationale:
claude-opus-4-7 → Planning/SecurityReview/Debug/Refactor, 0.3
claude-sonnet-4-6 → Generation/Refactor/Review, 0.7
gpt-5.5 → Planning/SecurityReview/Generation, 0.3
gpt-5.3-codex → Generation/Refactor/Debug/UnitTest, 0.6
gpt-5.2 → Orchestration/Review, 0.8
gemini-3.1-pro → Planning/Review/Orchestration, 0.5
gemini-3.5-flash → Boilerplate/Explain/Orchestration, 1.2
The 0.3 weight on frontier arms keeps them competitive on
SecurityReview / Planning despite $4+/Mtok; 1.2 on Gemini Flash
penalizes cost more so it only wins when cost is genuinely
decisive (boilerplate, explain).
Mechanism: extracted applyFamilyDefaults into defaults.go and call
it from Router.RegisterArm. Single source of truth — both local
discovery and the primary-provider path in cmd/gnoma/main.go now
flow through the same defaults application. Removed the duplicate
apply block from RegisterDiscoveredModels.
Legacy model IDs (claude-opus-4-20250514, gpt-4o, o3, gemini-2.5-pro,
etc.) intentionally do not match any table entry — keeps users on
pinned older models safe from imposed 2026 Strengths.
R-5: gpt-5.3-codex registration.
- internal/provider/openai/provider.go: added to fallbackModels
and inferOpenAIModelCapabilities (400K context, 32K output).
- internal/provider/ratelimits.go: gpt-5.3-codex and its dated
alias gpt-5.3-codex-2026-02-15 added with the same Tier 1
quotas as gpt-5.2.
Gemini 3.x (3.1-pro-preview, 3.5-flash, 3.1-flash-lite) was already
registered in both google/provider.go and ratelimits.go — no change
needed for that part of R-5.
Test coverage:
- ResolveFamilyDefaults table-driven across all 7 cloud entries
including prefix-sharing (gpt-5.5-pro → gpt-5.5 defaults,
gemini-3.1-pro-preview → gemini-3.1-pro defaults).
- Legacy IDs return !ok.
- RegisterArm applies cloud defaults end-to-end.
- User-supplied Strengths and CostWeight are not overridden.
- ID.Model() fallback works when ModelName is empty (test code
often constructs arms this way).
Refs: docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md
This commit is contained in:
@@ -132,6 +132,17 @@ func (p *Provider) fallbackModels() []provider.ModelInfo {
|
||||
MaxOutput: 32000,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.3-codex", Name: "GPT-5.3 Codex", Provider: p.name,
|
||||
Capabilities: provider.Capabilities{
|
||||
ToolUse: true,
|
||||
JSONOutput: true,
|
||||
Vision: true,
|
||||
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
||||
ContextWindow: 400000,
|
||||
MaxOutput: 32000,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.2", Name: "GPT-5.2 Thinking", Provider: p.name,
|
||||
Capabilities: provider.Capabilities{
|
||||
@@ -205,6 +216,9 @@ func inferOpenAIModelCapabilities(modelID string) provider.Capabilities {
|
||||
case "gpt-5.5", "gpt-5.5-pro":
|
||||
caps.ContextWindow = 1_000_000
|
||||
caps.MaxOutput = 32000
|
||||
case "gpt-5.3-codex":
|
||||
caps.ContextWindow = 400000
|
||||
caps.MaxOutput = 32000
|
||||
case "gpt-5.2", "gpt-5.2-chat-latest":
|
||||
caps.ContextWindow = 400000
|
||||
caps.MaxOutput = 32000
|
||||
|
||||
@@ -140,6 +140,9 @@ func openaiDefaults() ProviderDefaults {
|
||||
"gpt-5.5": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
||||
"gpt-5.5-pro": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
||||
"gpt-5.5-2026-04-23": {RPM: 500, TPM: 30_000, RPD: 10_000},
|
||||
// GPT-5.3 Codex (coding-specialist branch).
|
||||
"gpt-5.3-codex": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
||||
"gpt-5.3-codex-2026-02-15": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
||||
// GPT-5.2 generation.
|
||||
"gpt-5.2": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
||||
"gpt-5.2-chat-latest": {RPM: 500, TPM: 200_000, RPD: 10_000},
|
||||
|
||||
@@ -200,6 +200,44 @@ var knownFamilyDefaults = map[string]FamilyDefaults{
|
||||
MaxComplexity: 0.45,
|
||||
},
|
||||
|
||||
// --- Closed-source frontier (cloud arms) --------------------------------
|
||||
// Cloud entries set Strengths and CostWeight but leave MaxComplexity
|
||||
// zero — cloud arms shouldn't have a complexity ceiling. CostWeight
|
||||
// rationale per the 2026-05-23 plan:
|
||||
// - 0.3 on frontier arms (Opus 4.7, GPT-5.5): keep them competitive
|
||||
// for high-stakes tasks (SecurityReview, Planning) despite $4+/Mtok.
|
||||
// - 0.5-0.7 on mid-tier coding specialists: standard cost influence.
|
||||
// - 1.2 on cheap fast arms (Gemini 3.5 Flash): penalize cost more
|
||||
// so they win only when cost is genuinely decisive.
|
||||
"claude-opus-4-7": {
|
||||
Strengths: []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor},
|
||||
CostWeight: 0.3,
|
||||
},
|
||||
"claude-sonnet-4-6": {
|
||||
Strengths: []TaskType{TaskGeneration, TaskRefactor, TaskReview},
|
||||
CostWeight: 0.7,
|
||||
},
|
||||
"gpt-5.5": {
|
||||
Strengths: []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration},
|
||||
CostWeight: 0.3,
|
||||
},
|
||||
"gpt-5.3-codex": {
|
||||
Strengths: []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest},
|
||||
CostWeight: 0.6,
|
||||
},
|
||||
"gpt-5.2": {
|
||||
Strengths: []TaskType{TaskOrchestration, TaskReview},
|
||||
CostWeight: 0.8,
|
||||
},
|
||||
"gemini-3.1-pro": {
|
||||
Strengths: []TaskType{TaskPlanning, TaskReview, TaskOrchestration},
|
||||
CostWeight: 0.5,
|
||||
},
|
||||
"gemini-3.5-flash": {
|
||||
Strengths: []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration},
|
||||
CostWeight: 1.2,
|
||||
},
|
||||
|
||||
// --- Tool-router specialist (reserved, not auto-routed) -----------------
|
||||
// functiongemma is Google's 270M function-calling specialist. It is
|
||||
// not a chat model — it emits structured tool calls, not prose. We
|
||||
@@ -278,6 +316,43 @@ func ResolveMaxComplexity(modelID string) (float64, bool) {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// applyFamilyDefaults populates zero-valued routing fields on an Arm from
|
||||
// the family-defaults table. Only fields that are still at their zero
|
||||
// value get filled — user-supplied Strengths, MaxComplexity, CostWeight,
|
||||
// or Disabled are never overwritten. Returns true when at least one
|
||||
// family entry matched, false when the model is unknown.
|
||||
//
|
||||
// Looks up by arm.ModelName first; falls back to arm.ID.Model() when
|
||||
// ModelName is empty (which test code commonly omits).
|
||||
func applyFamilyDefaults(arm *Arm) bool {
|
||||
if arm == nil {
|
||||
return false
|
||||
}
|
||||
modelKey := arm.ModelName
|
||||
if modelKey == "" {
|
||||
modelKey = arm.ID.Model()
|
||||
}
|
||||
defaults, ok := ResolveFamilyDefaults(modelKey)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if len(arm.Strengths) == 0 && len(defaults.Strengths) > 0 {
|
||||
arm.Strengths = defaults.Strengths
|
||||
}
|
||||
if arm.MaxComplexity == 0 {
|
||||
if cap, capOK := ResolveMaxComplexity(modelKey); capOK {
|
||||
arm.MaxComplexity = cap
|
||||
}
|
||||
}
|
||||
if arm.CostWeight == 0 && defaults.CostWeight > 0 {
|
||||
arm.CostWeight = defaults.CostWeight
|
||||
}
|
||||
if defaults.Disabled {
|
||||
arm.Disabled = true
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// pureSizeToken matches a token consisting of digits (optionally with a
|
||||
// single decimal point) followed by 'b' or 'm' — and nothing else. Used
|
||||
// after splitting the model ID on `:`, `-`, `_`, `/` to extract a pure
|
||||
|
||||
@@ -186,6 +186,123 @@ func TestKnownFamilyDefaults_NoDualSpec(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// --- Cloud defaults --------------------------------------------------------
|
||||
|
||||
func TestResolveFamilyDefaults_CloudArms(t *testing.T) {
|
||||
cases := []struct {
|
||||
modelID string
|
||||
wantStrengths []TaskType
|
||||
wantCostWeight float64
|
||||
}{
|
||||
{"claude-opus-4-7", []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor}, 0.3},
|
||||
{"claude-sonnet-4-6", []TaskType{TaskGeneration, TaskRefactor, TaskReview}, 0.7},
|
||||
{"gpt-5.5", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3},
|
||||
{"gpt-5.5-pro", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, // shares prefix with gpt-5.5
|
||||
{"gpt-5.3-codex", []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}, 0.6},
|
||||
{"gpt-5.2", []TaskType{TaskOrchestration, TaskReview}, 0.8},
|
||||
{"gpt-5.2-chat-latest", []TaskType{TaskOrchestration, TaskReview}, 0.8},
|
||||
{"gemini-3.1-pro", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5},
|
||||
{"gemini-3.1-pro-preview", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5},
|
||||
{"gemini-3.5-flash", []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}, 1.2},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.modelID, func(t *testing.T) {
|
||||
got, ok := ResolveFamilyDefaults(tc.modelID)
|
||||
if !ok {
|
||||
t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Strengths, tc.wantStrengths) {
|
||||
t.Errorf("%q Strengths = %v, want %v", tc.modelID, got.Strengths, tc.wantStrengths)
|
||||
}
|
||||
if got.CostWeight != tc.wantCostWeight {
|
||||
t.Errorf("%q CostWeight = %v, want %v", tc.modelID, got.CostWeight, tc.wantCostWeight)
|
||||
}
|
||||
if got.MaxComplexity != 0 {
|
||||
t.Errorf("%q MaxComplexity = %v, want 0 (cloud arms have no ceiling)", tc.modelID, got.MaxComplexity)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveFamilyDefaults_CloudLegacyUnaffected(t *testing.T) {
|
||||
// Legacy / unrelated cloud IDs must NOT pick up defaults — keeping
|
||||
// users on older pinned models safe from imposed Strengths.
|
||||
noMatch := []string{
|
||||
"claude-opus-4-20250514",
|
||||
"claude-sonnet-4-20250514",
|
||||
"claude-haiku-4-5-20251001",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"o3",
|
||||
"o3-mini",
|
||||
"gemini-2.5-pro",
|
||||
"gemini-2.0-flash",
|
||||
}
|
||||
for _, id := range noMatch {
|
||||
if _, ok := ResolveFamilyDefaults(id); ok {
|
||||
t.Errorf("ResolveFamilyDefaults(%q) should not match (legacy model)", id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterArm_AppliesCloudDefaults(t *testing.T) {
|
||||
r := New(Config{})
|
||||
r.RegisterArm(&Arm{
|
||||
ID: NewArmID("openai", "gpt-5.3-codex"),
|
||||
ModelName: "gpt-5.3-codex",
|
||||
Capabilities: provider.Capabilities{
|
||||
ToolUse: true, JSONOutput: true,
|
||||
ContextWindow: 400000,
|
||||
},
|
||||
})
|
||||
arm, ok := r.LookupArm(NewArmID("openai", "gpt-5.3-codex"))
|
||||
if !ok {
|
||||
t.Fatal("gpt-5.3-codex arm should be registered")
|
||||
}
|
||||
wantStrengths := []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}
|
||||
if !reflect.DeepEqual(arm.Strengths, wantStrengths) {
|
||||
t.Errorf("Strengths = %v, want %v", arm.Strengths, wantStrengths)
|
||||
}
|
||||
if arm.CostWeight != 0.6 {
|
||||
t.Errorf("CostWeight = %v, want 0.6", arm.CostWeight)
|
||||
}
|
||||
if arm.MaxComplexity != 0 {
|
||||
t.Errorf("MaxComplexity = %v, want 0 (cloud arm)", arm.MaxComplexity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterArm_DoesNotOverrideUserStrengths(t *testing.T) {
|
||||
r := New(Config{})
|
||||
r.RegisterArm(&Arm{
|
||||
ID: NewArmID("anthropic", "claude-opus-4-7"),
|
||||
ModelName: "claude-opus-4-7",
|
||||
Strengths: []TaskType{TaskUnitTest}, // user-supplied; defaults should not overwrite
|
||||
CostWeight: 0.5, // user-supplied
|
||||
})
|
||||
arm, _ := r.LookupArm(NewArmID("anthropic", "claude-opus-4-7"))
|
||||
if !reflect.DeepEqual(arm.Strengths, []TaskType{TaskUnitTest}) {
|
||||
t.Errorf("user-supplied Strengths overridden by defaults: got %v", arm.Strengths)
|
||||
}
|
||||
if arm.CostWeight != 0.5 {
|
||||
t.Errorf("user-supplied CostWeight overridden: got %v", arm.CostWeight)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterArm_FallsBackToIDWhenModelNameMissing(t *testing.T) {
|
||||
// Some test code constructs arms with ID but no ModelName.
|
||||
// applyFamilyDefaults should fall back to ID.Model() so defaults
|
||||
// still flow through.
|
||||
r := New(Config{})
|
||||
r.RegisterArm(&Arm{
|
||||
ID: NewArmID("openai", "gpt-5.3-codex"),
|
||||
// ModelName intentionally empty
|
||||
})
|
||||
arm, _ := r.LookupArm(NewArmID("openai", "gpt-5.3-codex"))
|
||||
if arm.CostWeight != 0.6 {
|
||||
t.Errorf("CostWeight = %v, want 0.6 (defaults should resolve via ID.Model() fallback)", arm.CostWeight)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Integration: routing-payoff scenario --------------------------------
|
||||
|
||||
// TestRoutingDefaults_PayoffScenario is the user-facing demonstration that
|
||||
|
||||
@@ -497,7 +497,12 @@ func RegisterDiscoveredModels(r *Router, models []DiscoveredModel, providerFacto
|
||||
continue
|
||||
}
|
||||
|
||||
arm := &Arm{
|
||||
// Family-keyed defaults (Strengths, MaxComplexity, CostWeight,
|
||||
// Disabled) are applied inside Router.RegisterArm — single source
|
||||
// of truth so cloud-arm and local-arm registration paths agree.
|
||||
// User-supplied [[arms]] config in TOML overrides defaults later
|
||||
// via ApplyArmOverrides.
|
||||
r.RegisterArm(&Arm{
|
||||
ID: armID,
|
||||
Provider: prov,
|
||||
ModelName: m.ID,
|
||||
@@ -513,28 +518,6 @@ func RegisterDiscoveredModels(r *Router, models []DiscoveredModel, providerFacto
|
||||
Vision: m.SupportsVision,
|
||||
ContextWindow: m.ContextSize,
|
||||
},
|
||||
}
|
||||
|
||||
// Apply family-keyed defaults (Strengths, MaxComplexity, CostWeight,
|
||||
// Disabled) for known model families. User-supplied [[arms]] config
|
||||
// in TOML overrides these later via ApplyArmOverrides.
|
||||
if defaults, ok := ResolveFamilyDefaults(m.ID); ok {
|
||||
if len(arm.Strengths) == 0 && len(defaults.Strengths) > 0 {
|
||||
arm.Strengths = defaults.Strengths
|
||||
}
|
||||
if arm.MaxComplexity == 0 {
|
||||
if cap, ok := ResolveMaxComplexity(m.ID); ok {
|
||||
arm.MaxComplexity = cap
|
||||
}
|
||||
}
|
||||
if arm.CostWeight == 0 && defaults.CostWeight > 0 {
|
||||
arm.CostWeight = defaults.CostWeight
|
||||
}
|
||||
if defaults.Disabled {
|
||||
arm.Disabled = true
|
||||
}
|
||||
}
|
||||
|
||||
r.RegisterArm(arm)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,8 +42,13 @@ func New(cfg Config) *Router {
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterArm adds an arm to the router.
|
||||
// RegisterArm adds an arm to the router. Family-keyed defaults
|
||||
// (Strengths, MaxComplexity, CostWeight, Disabled) are applied to any
|
||||
// fields still at their zero value — user-supplied values are never
|
||||
// overwritten. See defaults.go for the family table.
|
||||
func (r *Router) RegisterArm(arm *Arm) {
|
||||
applyFamilyDefaults(arm)
|
||||
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.arms[arm.ID] = arm
|
||||
|
||||
Reference in New Issue
Block a user