Files
vikingowl a23eb6b92c style: gofmt drift from prior commits
Pure whitespace cleanup surfaced when 'make check' ran gofmt over the
tree. Mostly struct-field column alignment in internal/safety/banner.go
(SessionInfo) and the var(...) flag block in cmd/gnoma/main.go after
--dangerously-allow-anywhere was added without realignment. Verified
zero substantive changes via 'git diff --ignore-all-space
--ignore-blank-lines'.
2026-05-24 16:33:17 +02:00

475 lines
18 KiB
Go

package router
import (
"reflect"
"sort"
"testing"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/security"
)
// --- parseSizeFromModelID -------------------------------------------------
func TestParseSizeFromModelID(t *testing.T) {
cases := []struct {
name string
id string
want float64
wantOK bool
}{
{"ollama colon", "qwen3:14b", 14, true},
{"ollama colon decimal", "tiny3.5:1.5b", 1.5, true},
{"ollama colon millions", "reecdev/tiny3.5:500m", 0.5, true},
{"hyphen middle", "qwen3.5-9b-glm5.1-distill-v1", 9, true},
{"moe total wins over active", "qwen3-coder:30b-a3b-q4_K_M", 30, true},
{"namespace stripped", "google/functiongemma-270m-it", 0.27, true},
{"no size tag", "phi-4", 0, false},
{"plain version no b", "qwen3.5", 0, false},
{"gemma e-tag not pure size", "gemma-4-e2b-it", 0, false},
{"starcoder digit-only family", "starcoder2", 0, false},
{"large MoE", "qwen3-coder:480b", 480, true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got, ok := parseSizeFromModelID(tc.id)
if ok != tc.wantOK {
t.Fatalf("parseSizeFromModelID(%q) ok=%v, want %v (got value %v)", tc.id, ok, tc.wantOK, got)
}
if ok && got != tc.want {
t.Errorf("parseSizeFromModelID(%q) = %v, want %v", tc.id, got, tc.want)
}
})
}
}
// --- ResolveFamilyDefaults: longest-prefix discipline ---------------------
func TestResolveFamilyDefaults_LongestPrefixWins(t *testing.T) {
cases := []struct {
modelID string
wantFamily string // expected family key (longest matching)
}{
{"qwen3-coder:30b", "qwen3-coder"},
{"qwen3:14b", "qwen3"},
{"qwen3.5:4b", "qwen3.5"},
{"qwen3.5-9b-glm5.1-distill-v1", "qwen3.5"},
{"qwen2.5-coder:14b", "qwen2.5-coder"},
{"qwen2.5:7b", "qwen2.5"},
{"qwen-novel:7b", "qwen"},
{"mistral-small-3:24b", "mistral-small-3"},
{"mistral-7b-instruct-v0.3", "mistral"},
{"ministral-3:14b", "ministral-3"},
{"gemma4:latest", "gemma4"},
{"gemma4-e4b-uc:latest", "gemma4-e"},
{"gemma-4-e2b-it", "gemma-4-e"},
{"phi-4-mini", "phi-4-mini"},
{"phi-4:14b", "phi-4"},
{"tiny3.5:1.5b", "tiny3.5"},
{"reecdev/tiny3.5:500m", "tiny3.5"},
{"google/functiongemma-270m-it", "functiongemma"},
{"glm-ocr", "glm-ocr"},
{"glm-5.1", "glm"},
}
for _, tc := range cases {
t.Run(tc.modelID, func(t *testing.T) {
defaults, ok := ResolveFamilyDefaults(tc.modelID)
if !ok {
t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID)
}
expected, ok := knownFamilyDefaults[tc.wantFamily]
if !ok {
t.Fatalf("test bug: %q not in knownFamilyDefaults", tc.wantFamily)
}
if !reflect.DeepEqual(defaults.Strengths, expected.Strengths) ||
defaults.MaxComplexity != expected.MaxComplexity ||
defaults.Disabled != expected.Disabled {
t.Errorf("%q resolved to wrong family — got Strengths=%v MaxComplexity=%v Disabled=%v, want family %q Strengths=%v MaxComplexity=%v Disabled=%v",
tc.modelID, defaults.Strengths, defaults.MaxComplexity, defaults.Disabled,
tc.wantFamily, expected.Strengths, expected.MaxComplexity, expected.Disabled)
}
})
}
}
func TestResolveFamilyDefaults_Unknown(t *testing.T) {
for _, id := range []string{
"some-novel-model:1.5b",
"falcon:7b",
"command-r:35b",
} {
if _, ok := ResolveFamilyDefaults(id); ok {
t.Errorf("ResolveFamilyDefaults(%q) should not match anything in the table", id)
}
}
}
// --- ResolveMaxComplexity: size-keyed lookup -----------------------------
func TestResolveMaxComplexity_SizeKeyed(t *testing.T) {
cases := []struct {
id string
want float64
}{
// ministral-3 ladder: 14b → 0.70, 8b → 0.55, 3b → 0.35
{"ministral-3:14b", 0.70},
{"ministral-3:8b", 0.55},
{"ministral-3:3b", 0.35},
// qwen3 ladder: 14b → 0.75, 7-13b → 0.65, <7b → 0.50
{"qwen3:14b", 0.75},
{"qwen3:7b", 0.65},
{"qwen3:4b", 0.50},
// qwen3.5 ladder: 9b → 0.65, 4-8b → 0.50, <4b → 0.40
{"qwen3.5-9b-glm5.1-distill-v1", 0.65},
{"qwen3.5:4b", 0.50},
// tiny3.5 ladder: 1.5b → 0.30, 0.5b → 0.20
{"reecdev/tiny3.5:1.5b", 0.30},
{"reecdev/tiny3.5:500m", 0.20},
// flat caps still resolve correctly
{"qwen3-coder:30b", 0.85},
{"phi-4:14b", 0.65},
{"gemma4-e4b-uc:latest", 0.45},
}
for _, tc := range cases {
t.Run(tc.id, func(t *testing.T) {
got, ok := ResolveMaxComplexity(tc.id)
if !ok {
t.Fatalf("ResolveMaxComplexity(%q) returned !ok", tc.id)
}
if got != tc.want {
t.Errorf("ResolveMaxComplexity(%q) = %v, want %v", tc.id, got, tc.want)
}
})
}
}
func TestResolveMaxComplexity_SizeParseFailsFallsBack(t *testing.T) {
// "qwen3" with no size tag — uses smallest SizeCap as conservative fallback.
got, ok := ResolveMaxComplexity("qwen3")
if !ok {
t.Fatal("ResolveMaxComplexity should resolve unsized qwen3 via fallback")
}
if got != 0.50 {
t.Errorf("ResolveMaxComplexity(\"qwen3\") = %v, want 0.50 (smallest SizeCap fallback)", got)
}
}
// --- Table integrity ------------------------------------------------------
// TestKnownFamilyDefaults_SizeCapsOrdered confirms SizeCaps entries are
// stored largest-first, since ResolveMaxComplexity iterates and stops at
// the first match.
func TestKnownFamilyDefaults_SizeCapsOrdered(t *testing.T) {
for key, fd := range knownFamilyDefaults {
if len(fd.SizeCaps) < 2 {
continue
}
thresholds := make([]float64, len(fd.SizeCaps))
for i, sc := range fd.SizeCaps {
thresholds[i] = sc.MinSizeB
}
sorted := append([]float64(nil), thresholds...)
sort.Sort(sort.Reverse(sort.Float64Slice(sorted)))
if !reflect.DeepEqual(thresholds, sorted) {
t.Errorf("family %q SizeCaps not ordered largest-first: %v", key, thresholds)
}
}
}
// TestKnownFamilyDefaults_NoDualSpec confirms entries don't declare both
// SizeCaps and MaxComplexity — they're mutually exclusive in the lookup.
func TestKnownFamilyDefaults_NoDualSpec(t *testing.T) {
for key, fd := range knownFamilyDefaults {
if len(fd.SizeCaps) > 0 && fd.MaxComplexity > 0 {
t.Errorf("family %q declares both SizeCaps and MaxComplexity; pick one", key)
}
}
}
// --- Cloud defaults --------------------------------------------------------
func TestResolveFamilyDefaults_CloudArms(t *testing.T) {
cases := []struct {
modelID string
wantStrengths []TaskType
wantCostWeight float64
}{
{"claude-opus-4-7", []TaskType{TaskPlanning, TaskSecurityReview, TaskDebug, TaskRefactor}, 0.3},
{"claude-sonnet-4-6", []TaskType{TaskGeneration, TaskRefactor, TaskReview}, 0.7},
{"gpt-5.5", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3},
{"gpt-5.5-pro", []TaskType{TaskPlanning, TaskSecurityReview, TaskGeneration}, 0.3}, // shares prefix with gpt-5.5
{"gpt-5.3-codex", []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}, 0.6},
{"gpt-5.2", []TaskType{TaskOrchestration, TaskReview}, 0.8},
{"gpt-5.2-chat-latest", []TaskType{TaskOrchestration, TaskReview}, 0.8},
{"gemini-3.1-pro", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5},
{"gemini-3.1-pro-preview", []TaskType{TaskPlanning, TaskReview, TaskOrchestration}, 0.5},
{"gemini-3.5-flash", []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}, 1.2},
}
for _, tc := range cases {
t.Run(tc.modelID, func(t *testing.T) {
got, ok := ResolveFamilyDefaults(tc.modelID)
if !ok {
t.Fatalf("ResolveFamilyDefaults(%q) returned !ok", tc.modelID)
}
if !reflect.DeepEqual(got.Strengths, tc.wantStrengths) {
t.Errorf("%q Strengths = %v, want %v", tc.modelID, got.Strengths, tc.wantStrengths)
}
if got.CostWeight != tc.wantCostWeight {
t.Errorf("%q CostWeight = %v, want %v", tc.modelID, got.CostWeight, tc.wantCostWeight)
}
if got.MaxComplexity != 0 {
t.Errorf("%q MaxComplexity = %v, want 0 (cloud arms have no ceiling)", tc.modelID, got.MaxComplexity)
}
})
}
}
func TestResolveFamilyDefaults_CloudLegacyUnaffected(t *testing.T) {
// Legacy / unrelated cloud IDs must NOT pick up defaults — keeping
// users on older pinned models safe from imposed Strengths.
noMatch := []string{
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-haiku-4-5-20251001",
"gpt-4o",
"gpt-4o-mini",
"o3",
"o3-mini",
"gemini-2.5-pro",
"gemini-2.0-flash",
}
for _, id := range noMatch {
if _, ok := ResolveFamilyDefaults(id); ok {
t.Errorf("ResolveFamilyDefaults(%q) should not match (legacy model)", id)
}
}
}
func TestRegisterArm_AppliesCloudDefaults(t *testing.T) {
r := New(Config{})
r.RegisterArm(&Arm{
ID: NewArmID("openai", "gpt-5.3-codex"),
ModelName: "gpt-5.3-codex",
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true,
ContextWindow: 400000,
},
})
arm, ok := r.LookupArm(NewArmID("openai", "gpt-5.3-codex"))
if !ok {
t.Fatal("gpt-5.3-codex arm should be registered")
}
wantStrengths := []TaskType{TaskGeneration, TaskRefactor, TaskDebug, TaskUnitTest}
if !reflect.DeepEqual(arm.Strengths, wantStrengths) {
t.Errorf("Strengths = %v, want %v", arm.Strengths, wantStrengths)
}
if arm.CostWeight != 0.6 {
t.Errorf("CostWeight = %v, want 0.6", arm.CostWeight)
}
if arm.MaxComplexity != 0 {
t.Errorf("MaxComplexity = %v, want 0 (cloud arm)", arm.MaxComplexity)
}
}
func TestRegisterArm_DoesNotOverrideUserStrengths(t *testing.T) {
r := New(Config{})
r.RegisterArm(&Arm{
ID: NewArmID("anthropic", "claude-opus-4-7"),
ModelName: "claude-opus-4-7",
Strengths: []TaskType{TaskUnitTest}, // user-supplied; defaults should not overwrite
CostWeight: 0.5, // user-supplied
})
arm, _ := r.LookupArm(NewArmID("anthropic", "claude-opus-4-7"))
if !reflect.DeepEqual(arm.Strengths, []TaskType{TaskUnitTest}) {
t.Errorf("user-supplied Strengths overridden by defaults: got %v", arm.Strengths)
}
if arm.CostWeight != 0.5 {
t.Errorf("user-supplied CostWeight overridden: got %v", arm.CostWeight)
}
}
func TestRegisterArm_FallsBackToIDWhenModelNameMissing(t *testing.T) {
// Some test code constructs arms with ID but no ModelName.
// applyFamilyDefaults should fall back to ID.Model() so defaults
// still flow through.
r := New(Config{})
r.RegisterArm(&Arm{
ID: NewArmID("openai", "gpt-5.3-codex"),
// ModelName intentionally empty
})
arm, _ := r.LookupArm(NewArmID("openai", "gpt-5.3-codex"))
if arm.CostWeight != 0.6 {
t.Errorf("CostWeight = %v, want 0.6 (defaults should resolve via ID.Model() fallback)", arm.CostWeight)
}
}
// --- Integration: routing-payoff scenario --------------------------------
// TestRoutingDefaults_PayoffScenario is the user-facing demonstration that
// out-of-the-box selection now picks sensibly across a realistic local
// fleet, without any [[arms]] override. Per
// docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md the
// motivating goal: incognito stops feeling random.
//
// Note on Thinking capability: real phi-4 supports extended reasoning,
// but DiscoveredModel today has no SupportsThinking field — discovery
// only flips ToolUse and Vision. The selector's heuristicQuality gives
// a +0.2 bump for Thinking+Planning that would otherwise push phi-4
// over the TaskPlanning quality floor (0.60). The test mutates the arm
// after registration to reflect what the model actually supports;
// surfacing a thinking flag in discovery is tracked separately (out of
// scope for the defaults-refresh plan).
func TestRoutingDefaults_PayoffScenario(t *testing.T) {
r := New(Config{})
factory := func(name, model string) SecureProvider {
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
}
models := []DiscoveredModel{
{ID: "reecdev/tiny3.5:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "phi-4:14b", Provider: "ollama", SupportsTools: true, ContextSize: 16384},
{ID: "qwen3-coder:30b", Provider: "ollama", SupportsTools: true, ContextSize: 262144},
}
RegisterDiscoveredModels(r, models, factory)
// Reflect phi-4's real Thinking capability — see test comment.
if arm, ok := r.LookupArm("ollama/phi-4:14b"); ok {
arm.Capabilities.ThinkingModes = []provider.EffortLevel{provider.EffortMedium}
}
cases := []struct {
name string
task Task
wantArmID ArmID
reason string
}{
{
name: "Generation picks qwen3-coder",
task: Task{Type: TaskGeneration, RequiresTools: true, ComplexityScore: 0.7, Priority: PriorityNormal, EstimatedTokens: 2000},
wantArmID: "ollama/qwen3-coder:30b",
reason: "qwen3-coder is Strengths-promoted for TaskGeneration and has the highest MaxComplexity (0.85)",
},
{
name: "Planning picks phi-4",
task: Task{Type: TaskPlanning, RequiresTools: true, ComplexityScore: 0.5, Priority: PriorityNormal, EstimatedTokens: 1500},
wantArmID: "ollama/phi-4:14b",
reason: "phi-4 is Strengths-promoted for TaskPlanning; qwen3-coder's strengths don't include Planning",
},
{
name: "Boilerplate picks tiny3.5",
task: Task{Type: TaskBoilerplate, RequiresTools: true, ComplexityScore: 0.1, Priority: PriorityLow, EstimatedTokens: 200},
wantArmID: "ollama/reecdev/tiny3.5:1.5b",
reason: "tiny3.5 Strengths include TaskBoilerplate; it's the cheapest viable arm for a trivial task",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
decision := r.Select(tc.task)
if decision.Error != nil {
t.Fatalf("Select returned error: %v", decision.Error)
}
if decision.Arm == nil {
t.Fatal("Select returned nil arm")
}
if decision.Arm.ID != tc.wantArmID {
t.Errorf("got arm %q, want %q\n reason: %s", decision.Arm.ID, tc.wantArmID, tc.reason)
}
decision.Rollback()
})
}
}
// TestRoutingDefaults_LocalFleetVisibility makes sure the maintainer's
// actual Ollama inventory all register correctly (none accidentally
// excluded by the non-chat filter, all get sensible defaults).
func TestRoutingDefaults_LocalFleetVisibility(t *testing.T) {
r := New(Config{})
factory := func(name, model string) SecureProvider {
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
}
// Models from the maintainer's `ollama ls` output (2026-05-23 session).
models := []DiscoveredModel{
{ID: "reecdev/tiny3.5:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "reecdev/tiny3.5:500m", Provider: "ollama", ContextSize: 32768},
{ID: "ministral-3:3b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "qwen3.5:4b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "gemma4-e4b-uc:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "gemma4:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "qwen3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "devstral-small-2:24b", Provider: "ollama", SupportsTools: true, ContextSize: 131072},
{ID: "qwen2.5-coder:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "embeddinggemma:latest", Provider: "ollama", ContextSize: 8192},
{ID: "functiongemma:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "ministral-3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "ministral-3:8b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
}
RegisterDiscoveredModels(r, models, factory)
registered := make(map[ArmID]*Arm)
for _, a := range r.Arms() {
registered[a.ID] = a
}
// embeddinggemma must be skipped entirely.
if _, ok := registered["ollama/embeddinggemma:latest"]; ok {
t.Error("embeddinggemma should be skipped by non-chat filter")
}
// Every other model must be registered.
wantRegistered := []ArmID{
"ollama/reecdev/tiny3.5:1.5b",
"ollama/reecdev/tiny3.5:500m",
"ollama/ministral-3:3b",
"ollama/qwen3.5:4b",
"ollama/gemma4-e4b-uc:latest",
"ollama/gemma4:latest",
"ollama/qwen3:14b",
"ollama/devstral-small-2:24b",
"ollama/qwen2.5-coder:14b",
"ollama/functiongemma:latest",
"ollama/ministral-3:14b",
"ollama/ministral-3:8b",
}
for _, id := range wantRegistered {
if _, ok := registered[id]; !ok {
t.Errorf("expected %q to be registered", id)
}
}
// Spot-check that defaults flowed through to the arms.
checks := []struct {
id ArmID
wantMaxComp float64
wantDisabled bool
wantStrengths []TaskType
}{
{"ollama/qwen3-coder:30b", 0, false, nil}, // not in fleet, sanity skip
{"ollama/devstral-small-2:24b", 0.85, false, []TaskType{TaskGeneration, TaskRefactor, TaskDebug}},
{"ollama/qwen3:14b", 0.75, false, []TaskType{TaskGeneration, TaskRefactor, TaskDebug}},
{"ollama/ministral-3:14b", 0.70, false, []TaskType{TaskOrchestration, TaskPlanning}},
{"ollama/ministral-3:8b", 0.55, false, []TaskType{TaskOrchestration, TaskPlanning}},
{"ollama/ministral-3:3b", 0.35, false, []TaskType{TaskOrchestration, TaskPlanning}},
{"ollama/reecdev/tiny3.5:1.5b", 0.30, false, []TaskType{TaskBoilerplate, TaskExplain}},
{"ollama/reecdev/tiny3.5:500m", 0.20, false, []TaskType{TaskBoilerplate, TaskExplain}},
{"ollama/functiongemma:latest", 0.40, true, []TaskType{TaskOrchestration}},
{"ollama/gemma4-e4b-uc:latest", 0.45, false, []TaskType{TaskExplain, TaskBoilerplate}},
{"ollama/qwen3.5:4b", 0.50, false, []TaskType{TaskBoilerplate, TaskExplain, TaskOrchestration}},
}
for _, c := range checks {
arm, ok := registered[c.id]
if !ok {
continue // already reported above
}
if arm.MaxComplexity != c.wantMaxComp {
t.Errorf("%s MaxComplexity = %v, want %v", c.id, arm.MaxComplexity, c.wantMaxComp)
}
if arm.Disabled != c.wantDisabled {
t.Errorf("%s Disabled = %v, want %v", c.id, arm.Disabled, c.wantDisabled)
}
if c.wantStrengths != nil && !reflect.DeepEqual(arm.Strengths, c.wantStrengths) {
t.Errorf("%s Strengths = %v, want %v", c.id, arm.Strengths, c.wantStrengths)
}
}
}