package router import ( "reflect" "sort" "testing" "somegit.dev/Owlibou/gnoma/internal/provider" "somegit.dev/Owlibou/gnoma/internal/security" ) // --- parseSizeFromModelID ------------------------------------------------- func TestParseSizeFromModelID(t *testing.T) { cases := []struct { name string id string want float64 wantOK bool }{ {"ollama colon", "qwen3:14b", 14, true}, {"ollama colon decimal", "tiny3.5:1.5b", 1.5, true}, {"ollama colon millions", "reecdev/tiny3.5:500m", 0.5, true}, {"hyphen middle", "qwen3.5-9b-glm5.1-distill-v1", 9, true}, {"moe total wins over active", "qwen3-coder:30b-a3b-q4_K_M", 30, true}, {"namespace stripped", "google/functiongemma-270m-it", 0.27, true}, {"no size tag", "phi-4", 0, false}, {"plain version no b", "qwen3.5", 0, false}, {"gemma e-tag not pure size", "gemma-4-e2b-it", 0, false}, {"starcoder digit-only family", "starcoder2", 0, false}, {"large MoE", "qwen3-coder:480b", 480, true}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { got, ok := parseSizeFromModelID(tc.id) if ok != tc.wantOK { t.Fatalf("parseSizeFromModelID(%q) ok=%v, want %v (got value %v)", tc.id, ok, tc.wantOK, got) } if ok && got != tc.want { t.Errorf("parseSizeFromModelID(%q) = %v, want %v", tc.id, got, tc.want) } }) } } // --- ResolveFamilyDefaults: longest-prefix discipline --------------------- func TestResolveFamilyDefaults_LongestPrefixWins(t *testing.T) { cases := []struct { modelID string wantFamily string // expected family key (longest matching) }{ {"qwen3-coder:30b", "qwen3-coder"}, {"qwen3:14b", "qwen3"}, {"qwen3.5:4b", "qwen3.5"}, {"qwen3.5-9b-glm5.1-distill-v1", "qwen3.5"}, {"qwen2.5-coder:14b", "qwen2.5-coder"}, {"qwen2.5:7b", "qwen2.5"}, {"qwen-novel:7b", "qwen"}, {"mistral-small-3:24b", "mistral-small-3"}, {"mistral-7b-instruct-v0.3", "mistral"}, {"ministral-3:14b", "ministral-3"}, {"gemma4:latest", "gemma4"}, {"gemma4-e4b-uc:latest", "gemma4-e"}, {"gemma-4-e2b-it", "gemma-4-e"}, {"phi-4-mini", "phi-4-mini"}, {"phi-4:14b", "phi-4"}, {"tiny3.5:1.5b", "tiny3.5"}, {"reecdev/tiny3.5:500m", "tiny3.5"}, {"google/functiongemma-270m-it", "functiongemma"}, {"glm-ocr", "glm-ocr"}, {"glm-5.1", "glm"}, } for _, tc := range cases { t.Run(tc.modelID, func(t *testing.T) { defaults, ok := ResolveFamilyDefaults(tc.modelID) if !ok { t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID) } expected, ok := knownFamilyDefaults[tc.wantFamily] if !ok { t.Fatalf("test bug: %q not in knownFamilyDefaults", tc.wantFamily) } if !reflect.DeepEqual(defaults.Strengths, expected.Strengths) || defaults.MaxComplexity != expected.MaxComplexity || defaults.Disabled != expected.Disabled { t.Errorf("%q resolved to wrong family — got Strengths=%v MaxComplexity=%v Disabled=%v, want family %q Strengths=%v MaxComplexity=%v Disabled=%v", tc.modelID, defaults.Strengths, defaults.MaxComplexity, defaults.Disabled, tc.wantFamily, expected.Strengths, expected.MaxComplexity, expected.Disabled) } }) } } func TestResolveFamilyDefaults_Unknown(t *testing.T) { for _, id := range []string{ "some-novel-model:1.5b", "falcon:7b", "command-r:35b", } { if _, ok := ResolveFamilyDefaults(id); ok { t.Errorf("ResolveFamilyDefaults(%q) should not match anything in the table", id) } } } // --- ResolveMaxComplexity: size-keyed lookup ----------------------------- func TestResolveMaxComplexity_SizeKeyed(t *testing.T) { cases := []struct { id string want float64 }{ // ministral-3 ladder: 14b → 0.70, 8b → 0.55, 3b → 0.35 {"ministral-3:14b", 0.70}, {"ministral-3:8b", 0.55}, {"ministral-3:3b", 0.35}, // qwen3 ladder: 14b → 0.75, 7-13b → 0.65, <7b → 0.50 {"qwen3:14b", 0.75}, {"qwen3:7b", 0.65}, {"qwen3:4b", 0.50}, // qwen3.5 ladder: 9b → 0.65, 4-8b → 0.50, <4b → 0.40 {"qwen3.5-9b-glm5.1-distill-v1", 0.65}, {"qwen3.5:4b", 0.50}, // tiny3.5 ladder: 1.5b → 0.30, 0.5b → 0.20 {"reecdev/tiny3.5:1.5b", 0.30}, {"reecdev/tiny3.5:500m", 0.20}, // flat caps still resolve correctly {"qwen3-coder:30b", 0.85}, {"phi-4:14b", 0.65}, {"gemma4-e4b-uc:latest", 0.45}, } for _, tc := range cases { t.Run(tc.id, func(t *testing.T) { got, ok := ResolveMaxComplexity(tc.id) if !ok { t.Fatalf("ResolveMaxComplexity(%q) returned !ok", tc.id) } if got != tc.want { t.Errorf("ResolveMaxComplexity(%q) = %v, want %v", tc.id, got, tc.want) } }) } } func TestResolveMaxComplexity_SizeParseFailsFallsBack(t *testing.T) { // "qwen3" with no size tag — uses smallest SizeCap as conservative fallback. got, ok := ResolveMaxComplexity("qwen3") if !ok { t.Fatal("ResolveMaxComplexity should resolve unsized qwen3 via fallback") } if got != 0.50 { t.Errorf("ResolveMaxComplexity(\"qwen3\") = %v, want 0.50 (smallest SizeCap fallback)", got) } } // --- Table integrity ------------------------------------------------------ // TestKnownFamilyDefaults_SizeCapsOrdered confirms SizeCaps entries are // stored largest-first, since ResolveMaxComplexity iterates and stops at // the first match. func TestKnownFamilyDefaults_SizeCapsOrdered(t *testing.T) { for key, fd := range knownFamilyDefaults { if len(fd.SizeCaps) < 2 { continue } thresholds := make([]float64, len(fd.SizeCaps)) for i, sc := range fd.SizeCaps { thresholds[i] = sc.MinSizeB } sorted := append([]float64(nil), thresholds...) sort.Sort(sort.Reverse(sort.Float64Slice(sorted))) if !reflect.DeepEqual(thresholds, sorted) { t.Errorf("family %q SizeCaps not ordered largest-first: %v", key, thresholds) } } } // TestKnownFamilyDefaults_NoDualSpec confirms entries don't declare both // SizeCaps and MaxComplexity — they're mutually exclusive in the lookup. func TestKnownFamilyDefaults_NoDualSpec(t *testing.T) { for key, fd := range knownFamilyDefaults { if len(fd.SizeCaps) > 0 && fd.MaxComplexity > 0 { t.Errorf("family %q declares both SizeCaps and MaxComplexity; pick one", key) } } } // --- Cloud defaults -------------------------------------------------------- func TestResolveFamilyDefaults_CloudArms(t *testing.T) { cases := []struct { modelID string wantStrengths []TaskType wantCostWeight float64 }{ {"claude-opus-4-7", []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor}, 0.3}, {"claude-sonnet-4-6", []TaskType{TaskGeneration, TaskRefactor, TaskReview}, 0.7}, {"gpt-5.5", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, {"gpt-5.5-pro", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, // shares prefix with gpt-5.5 {"gpt-5.3-codex", []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}, 0.6}, {"gpt-5.2", []TaskType{TaskOrchestration, TaskReview}, 0.8}, {"gpt-5.2-chat-latest", []TaskType{TaskOrchestration, TaskReview}, 0.8}, {"gemini-3.1-pro", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5}, {"gemini-3.1-pro-preview", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5}, {"gemini-3.5-flash", []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}, 1.2}, } for _, tc := range cases { t.Run(tc.modelID, func(t *testing.T) { got, ok := ResolveFamilyDefaults(tc.modelID) if !ok { t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID) } if !reflect.DeepEqual(got.Strengths, tc.wantStrengths) { t.Errorf("%q Strengths = %v, want %v", tc.modelID, got.Strengths, tc.wantStrengths) } if got.CostWeight != tc.wantCostWeight { t.Errorf("%q CostWeight = %v, want %v", tc.modelID, got.CostWeight, tc.wantCostWeight) } if got.MaxComplexity != 0 { t.Errorf("%q MaxComplexity = %v, want 0 (cloud arms have no ceiling)", tc.modelID, got.MaxComplexity) } }) } } func TestResolveFamilyDefaults_CloudLegacyUnaffected(t *testing.T) { // Legacy / unrelated cloud IDs must NOT pick up defaults — keeping // users on older pinned models safe from imposed Strengths. noMatch := []string{ "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-haiku-4-5-20251001", "gpt-4o", "gpt-4o-mini", "o3", "o3-mini", "gemini-2.5-pro", "gemini-2.0-flash", } for _, id := range noMatch { if _, ok := ResolveFamilyDefaults(id); ok { t.Errorf("ResolveFamilyDefaults(%q) should not match (legacy model)", id) } } } func TestRegisterArm_AppliesCloudDefaults(t *testing.T) { r := New(Config{}) r.RegisterArm(&Arm{ ID: NewArmID("openai", "gpt-5.3-codex"), ModelName: "gpt-5.3-codex", Capabilities: provider.Capabilities{ ToolUse: true, JSONOutput: true, ContextWindow: 400000, }, }) arm, ok := r.LookupArm(NewArmID("openai", "gpt-5.3-codex")) if !ok { t.Fatal("gpt-5.3-codex arm should be registered") } wantStrengths := []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest} if !reflect.DeepEqual(arm.Strengths, wantStrengths) { t.Errorf("Strengths = %v, want %v", arm.Strengths, wantStrengths) } if arm.CostWeight != 0.6 { t.Errorf("CostWeight = %v, want 0.6", arm.CostWeight) } if arm.MaxComplexity != 0 { t.Errorf("MaxComplexity = %v, want 0 (cloud arm)", arm.MaxComplexity) } } func TestRegisterArm_DoesNotOverrideUserStrengths(t *testing.T) { r := New(Config{}) r.RegisterArm(&Arm{ ID: NewArmID("anthropic", "claude-opus-4-7"), ModelName: "claude-opus-4-7", Strengths: []TaskType{TaskUnitTest}, // user-supplied; defaults should not overwrite CostWeight: 0.5, // user-supplied }) arm, _ := r.LookupArm(NewArmID("anthropic", "claude-opus-4-7")) if !reflect.DeepEqual(arm.Strengths, []TaskType{TaskUnitTest}) { t.Errorf("user-supplied Strengths overridden by defaults: got %v", arm.Strengths) } if arm.CostWeight != 0.5 { t.Errorf("user-supplied CostWeight overridden: got %v", arm.CostWeight) } } func TestRegisterArm_FallsBackToIDWhenModelNameMissing(t *testing.T) { // Some test code constructs arms with ID but no ModelName. // applyFamilyDefaults should fall back to ID.Model() so defaults // still flow through. r := New(Config{}) r.RegisterArm(&Arm{ ID: NewArmID("openai", "gpt-5.3-codex"), // ModelName intentionally empty }) arm, _ := r.LookupArm(NewArmID("openai", "gpt-5.3-codex")) if arm.CostWeight != 0.6 { t.Errorf("CostWeight = %v, want 0.6 (defaults should resolve via ID.Model() fallback)", arm.CostWeight) } } // --- Integration: routing-payoff scenario -------------------------------- // TestRoutingDefaults_PayoffScenario is the user-facing demonstration that // out-of-the-box selection now picks sensibly across a realistic local // fleet, without any [[arms]] override. Per // docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md the // motivating goal: incognito stops feeling random. // // Note on Thinking capability: real phi-4 supports extended reasoning, // but DiscoveredModel today has no SupportsThinking field — discovery // only flips ToolUse and Vision. The selector's heuristicQuality gives // a +0.2 bump for Thinking+Planning that would otherwise push phi-4 // over the TaskPlanning quality floor (0.60). The test mutates the arm // after registration to reflect what the model actually supports; // surfacing a thinking flag in discovery is tracked separately (out of // scope for the defaults-refresh plan). func TestRoutingDefaults_PayoffScenario(t *testing.T) { r := New(Config{}) factory := func(name, model string) SecureProvider { return security.WrapProvider(&stubProvider{name: name, model: model}, nil) } models := []DiscoveredModel{ {ID: "reecdev/tiny3.5:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "phi-4:14b", Provider: "ollama", SupportsTools: true, ContextSize: 16384}, {ID: "qwen3-coder:30b", Provider: "ollama", SupportsTools: true, ContextSize: 262144}, } RegisterDiscoveredModels(r, models, factory) // Reflect phi-4's real Thinking capability — see test comment. if arm, ok := r.LookupArm("ollama/phi-4:14b"); ok { arm.Capabilities.ThinkingModes = []provider.EffortLevel{provider.EffortMedium} } cases := []struct { name string task Task wantArmID ArmID reason string }{ { name: "Generation picks qwen3-coder", task: Task{Type: TaskGeneration, RequiresTools: true, ComplexityScore: 0.7, Priority: PriorityNormal, EstimatedTokens: 2000}, wantArmID: "ollama/qwen3-coder:30b", reason: "qwen3-coder is Strengths-promoted for TaskGeneration and has the highest MaxComplexity (0.85)", }, { name: "Planning picks phi-4", task: Task{Type: TaskPlanning, RequiresTools: true, ComplexityScore: 0.5, Priority: PriorityNormal, EstimatedTokens: 1500}, wantArmID: "ollama/phi-4:14b", reason: "phi-4 is Strengths-promoted for TaskPlanning; qwen3-coder's strengths don't include Planning", }, { name: "Boilerplate picks tiny3.5", task: Task{Type: TaskBoilerplate, RequiresTools: true, ComplexityScore: 0.1, Priority: PriorityLow, EstimatedTokens: 200}, wantArmID: "ollama/reecdev/tiny3.5:1.5b", reason: "tiny3.5 Strengths include TaskBoilerplate; it's the cheapest viable arm for a trivial task", }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { decision := r.Select(tc.task) if decision.Error != nil { t.Fatalf("Select returned error: %v", decision.Error) } if decision.Arm == nil { t.Fatal("Select returned nil arm") } if decision.Arm.ID != tc.wantArmID { t.Errorf("got arm %q, want %q\n reason: %s", decision.Arm.ID, tc.wantArmID, tc.reason) } decision.Rollback() }) } } // TestRoutingDefaults_LocalFleetVisibility makes sure the maintainer's // actual Ollama inventory all register correctly (none accidentally // excluded by the non-chat filter, all get sensible defaults). func TestRoutingDefaults_LocalFleetVisibility(t *testing.T) { r := New(Config{}) factory := func(name, model string) SecureProvider { return security.WrapProvider(&stubProvider{name: name, model: model}, nil) } // Models from the maintainer's `ollama ls` output (2026-05-23 session). models := []DiscoveredModel{ {ID: "reecdev/tiny3.5:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "reecdev/tiny3.5:500m", Provider: "ollama", ContextSize: 32768}, {ID: "ministral-3:3b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "qwen3.5:4b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "gemma4-e4b-uc:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "gemma4:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "qwen3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "devstral-small-2:24b", Provider: "ollama", SupportsTools: true, ContextSize: 131072}, {ID: "qwen2.5-coder:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "embeddinggemma:latest", Provider: "ollama", ContextSize: 8192}, {ID: "functiongemma:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "ministral-3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, {ID: "ministral-3:8b", Provider: "ollama", SupportsTools: true, ContextSize: 32768}, } RegisterDiscoveredModels(r, models, factory) registered := make(map[ArmID]*Arm) for _, a := range r.Arms() { registered[a.ID] = a } // embeddinggemma must be skipped entirely. if _, ok := registered["ollama/embeddinggemma:latest"]; ok { t.Error("embeddinggemma should be skipped by non-chat filter") } // Every other model must be registered. wantRegistered := []ArmID{ "ollama/reecdev/tiny3.5:1.5b", "ollama/reecdev/tiny3.5:500m", "ollama/ministral-3:3b", "ollama/qwen3.5:4b", "ollama/gemma4-e4b-uc:latest", "ollama/gemma4:latest", "ollama/qwen3:14b", "ollama/devstral-small-2:24b", "ollama/qwen2.5-coder:14b", "ollama/functiongemma:latest", "ollama/ministral-3:14b", "ollama/ministral-3:8b", } for _, id := range wantRegistered { if _, ok := registered[id]; !ok { t.Errorf("expected %q to be registered", id) } } // Spot-check that defaults flowed through to the arms. checks := []struct { id ArmID wantMaxComp float64 wantDisabled bool wantStrengths []TaskType }{ {"ollama/qwen3-coder:30b", 0, false, nil}, // not in fleet, sanity skip {"ollama/devstral-small-2:24b", 0.85, false, []TaskType{TaskGeneration, TaskRefactor, TaskDebug}}, {"ollama/qwen3:14b", 0.75, false, []TaskType{TaskGeneration, TaskRefactor, TaskDebug}}, {"ollama/ministral-3:14b", 0.70, false, []TaskType{TaskOrchestration, TaskPlanning}}, {"ollama/ministral-3:8b", 0.55, false, []TaskType{TaskOrchestration, TaskPlanning}}, {"ollama/ministral-3:3b", 0.35, false, []TaskType{TaskOrchestration, TaskPlanning}}, {"ollama/reecdev/tiny3.5:1.5b", 0.30, false, []TaskType{TaskBoilerplate, TaskExplain}}, {"ollama/reecdev/tiny3.5:500m", 0.20, false, []TaskType{TaskBoilerplate, TaskExplain}}, {"ollama/functiongemma:latest", 0.40, true, []TaskType{TaskOrchestration}}, {"ollama/gemma4-e4b-uc:latest", 0.45, false, []TaskType{TaskExplain, TaskBoilerplate}}, {"ollama/qwen3.5:4b", 0.50, false, []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}}, } for _, c := range checks { arm, ok := registered[c.id] if !ok { continue // already reported above } if arm.MaxComplexity != c.wantMaxComp { t.Errorf("%s MaxComplexity = %v, want %v", c.id, arm.MaxComplexity, c.wantMaxComp) } if arm.Disabled != c.wantDisabled { t.Errorf("%s Disabled = %v, want %v", c.id, arm.Disabled, c.wantDisabled) } if c.wantStrengths != nil && !reflect.DeepEqual(arm.Strengths, c.wantStrengths) { t.Errorf("%s Strengths = %v, want %v", c.id, arm.Strengths, c.wantStrengths) } } }