Files
vikingowl a79e99199d feat(router): non-chat exclude, vision prefixes, family-defaults scaffold
Discovery previously registered every model returned by Ollama as a
chat arm, including embeddings, ASR, TTS, audio realtime, and
rerankers — which then failed at inference time when the router
selected them. Local arms also shipped with all-zero defaults, so
selection between e.g. tiny3.5:1.5b, phi-4:14b, and qwen3-coder:30b
was effectively random.

This change covers tasks R-1, R-2, R-6 from the routing-defaults plan.

- nonChatModelPatterns + isNonChatModel substring matcher; matched
  IDs are skipped during RegisterDiscoveredModels. Covers whisper,
  moonshine, kokoros, vibevoice, -asr, -tts, -audio, -embedding,
  embeddinggemma, -reranker, lfm2.
- knownVisionModelPrefixes gains gemma4, gemma-4, glm-ocr. gemma3
  and minicpm-v entries stay for regression coverage.
- New internal/router/defaults.go with FamilyDefaults struct,
  knownFamilyDefaults map, and ResolveFamilyDefaults longest-prefix
  lookup (with org/-namespace stripping so reecdev/tiny3.5:1.5b
  resolves to "tiny3.5"). Single entry for now: functiongemma is
  registered with Disabled=true and MaxComplexity=0.40, reserved for
  the future ArmRoleToolRouter path. Table will grow in R-3.
- RegisterDiscoveredModels consults ResolveFamilyDefaults and only
  populates fields that are still zero on the arm, so user [[arms]]
  overrides keep priority.

Plans:
- docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md
- docs/superpowers/plans/2026-05-23-tool-router-specialization.md

TODO.md surfaces both as in-flight items.
2026-05-23 21:24:59 +02:00

591 lines
17 KiB
Go

package router
import (
"context"
"fmt"
"log/slog"
"net/http"
"net/http/httptest"
"strings"
"testing"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/security"
"somegit.dev/Owlibou/gnoma/internal/stream"
)
// --- ArmID helpers ---
func TestArmID_Provider(t *testing.T) {
tests := []struct {
id ArmID
want string
}{
{"llamacpp/gemma-26b", "llamacpp"},
{"anthropic/claude-sonnet", "anthropic"},
{"single", "single"},
}
for _, tt := range tests {
if got := tt.id.Provider(); got != tt.want {
t.Errorf("ArmID(%q).Provider() = %q, want %q", tt.id, got, tt.want)
}
}
}
func TestArmID_Model(t *testing.T) {
tests := []struct {
id ArmID
want string
}{
{"llamacpp/gemma-26b", "gemma-26b"},
{"anthropic/claude-sonnet", "claude-sonnet"},
{"single", "single"},
}
for _, tt := range tests {
if got := tt.id.Model(); got != tt.want {
t.Errorf("ArmID(%q).Model() = %q, want %q", tt.id, got, tt.want)
}
}
}
// --- reconcileArms ---
func noopFactory(name, model string) SecureProvider { return nil }
func dummyArm(id ArmID, local bool) *Arm {
return &Arm{
ID: id,
ModelName: id.Model(),
IsLocal: local,
Capabilities: provider.Capabilities{ToolUse: true, ContextWindow: 8192},
}
}
func TestReconcileArms_ForcedDefaultArm_ReconciledToDiscovered(t *testing.T) {
r := New(Config{})
r.RegisterArm(dummyArm("llamacpp/default", true))
r.ForceArm("llamacpp/default")
discovered := []DiscoveredModel{
{ID: "gemma-26b", Provider: "llamacpp", SupportsTools: true},
}
var reconciled ArmID
onReconcile := func(id ArmID) { reconciled = id }
reconcileArms(r, discovered, noopFactory, slog.Default(), onReconcile)
if got := r.ForcedArm(); got != "llamacpp/gemma-26b" {
t.Errorf("ForcedArm() = %q, want %q", got, "llamacpp/gemma-26b")
}
if reconciled != "llamacpp/gemma-26b" {
t.Errorf("onReconcile called with %q, want %q", reconciled, "llamacpp/gemma-26b")
}
// Select should succeed with the reconciled arm
decision := r.Select(Task{Type: TaskGeneration})
if decision.Error != nil {
t.Fatalf("Select after reconcile: %v", decision.Error)
}
if decision.Arm.ID != "llamacpp/gemma-26b" {
t.Errorf("Select returned %q, want %q", decision.Arm.ID, "llamacpp/gemma-26b")
}
}
func TestReconcileArms_ForcedArm_AlreadyCorrect(t *testing.T) {
r := New(Config{})
r.RegisterArm(dummyArm("llamacpp/gemma-26b", true))
r.ForceArm("llamacpp/gemma-26b")
discovered := []DiscoveredModel{
{ID: "gemma-26b", Provider: "llamacpp", SupportsTools: true},
}
var called bool
onReconcile := func(id ArmID) { called = true }
reconcileArms(r, discovered, noopFactory, slog.Default(), onReconcile)
if got := r.ForcedArm(); got != "llamacpp/gemma-26b" {
t.Errorf("ForcedArm() = %q, want %q", got, "llamacpp/gemma-26b")
}
if called {
t.Error("onReconcile should not be called when arm is already correct")
}
decision := r.Select(Task{Type: TaskGeneration})
if decision.Error != nil {
t.Fatalf("Select: %v", decision.Error)
}
}
func TestReconcileArms_ForcedArm_NonLocal(t *testing.T) {
r := New(Config{})
r.RegisterArm(dummyArm("anthropic/claude", false))
r.ForceArm("anthropic/claude")
discovered := []DiscoveredModel{
{ID: "gemma-26b", Provider: "llamacpp", SupportsTools: true},
}
reconcileArms(r, discovered, noopFactory, slog.Default(), nil)
if got := r.ForcedArm(); got != "anthropic/claude" {
t.Errorf("ForcedArm() = %q, want %q (non-local forced arm should be untouched)", got, "anthropic/claude")
}
}
func TestReconcileArms_NoForcedArm(t *testing.T) {
r := New(Config{})
existing := dummyArm("llamacpp/old-model", true)
r.RegisterArm(existing)
discovered := []DiscoveredModel{
{ID: "gemma-26b", Provider: "llamacpp", SupportsTools: true},
}
factory := func(name, model string) SecureProvider {
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
}
reconcileArms(r, discovered, factory, slog.Default(), nil)
// Old arm should be removed (disappeared)
if _, ok := r.LookupArm("llamacpp/old-model"); ok {
t.Error("disappeared arm should be removed")
}
// New arm should be registered
if _, ok := r.LookupArm("llamacpp/gemma-26b"); !ok {
t.Error("discovered arm should be registered")
}
}
func TestReconcileArms_MultipleModelsForForcedProvider(t *testing.T) {
r := New(Config{})
r.RegisterArm(dummyArm("llamacpp/default", true))
r.ForceArm("llamacpp/default")
discovered := []DiscoveredModel{
{ID: "gemma-26b", Provider: "llamacpp", SupportsTools: true},
{ID: "phi-3", Provider: "llamacpp", SupportsTools: false},
}
var reconciled ArmID
onReconcile := func(id ArmID) { reconciled = id }
reconcileArms(r, discovered, noopFactory, slog.Default(), onReconcile)
// Should reconcile to the first match
if got := r.ForcedArm(); got != "llamacpp/gemma-26b" {
t.Errorf("ForcedArm() = %q, want %q", got, "llamacpp/gemma-26b")
}
if reconciled != "llamacpp/gemma-26b" {
t.Errorf("onReconcile = %q, want %q", reconciled, "llamacpp/gemma-26b")
}
}
func TestReconcileArms_NoModelsForForcedProvider(t *testing.T) {
r := New(Config{})
r.RegisterArm(dummyArm("llamacpp/default", true))
r.ForceArm("llamacpp/default")
// Discovery returns nothing (server down)
discovered := []DiscoveredModel{}
reconcileArms(r, discovered, noopFactory, slog.Default(), nil)
// Forced arm must NOT be removed
if got := r.ForcedArm(); got != "llamacpp/default" {
t.Errorf("ForcedArm() = %q, want %q (forced arm should survive empty discovery)", got, "llamacpp/default")
}
if _, ok := r.LookupArm("llamacpp/default"); !ok {
t.Error("forced arm should not be removed when discovery returns no models")
}
}
// stubProvider satisfies provider.Provider for tests that need a non-nil provider.
type stubProvider struct {
name string
model string
}
func (s *stubProvider) Name() string { return s.name }
func (s *stubProvider) DefaultModel() string { return s.model }
func (s *stubProvider) Models(_ context.Context) ([]provider.ModelInfo, error) {
return nil, nil
}
func (s *stubProvider) Stream(_ context.Context, _ provider.Request) (stream.Stream, error) {
return nil, nil
}
// --- DiscoverOllama / cache + default context size ---
// ollamaStub serves a configurable /api/tags response and a no-op /api/show.
// tagsBody is the JSON body returned for /api/tags. showFunc, if set, handles
// /api/show; otherwise the default empty template / parameters response is
// used (probe returns false, 0).
type ollamaStub struct {
tagsBody string
showFunc func(model string) (template, parameters string)
}
func (s *ollamaStub) server() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/api/tags":
_, _ = w.Write([]byte(s.tagsBody))
case "/api/show":
body := make([]byte, 256)
n, _ := r.Body.Read(body)
req := string(body[:n])
modelName := ""
// crude parse: pull the value of "name" from {"name":"..."}
if i := strings.Index(req, `"name":"`); i >= 0 {
rest := req[i+len(`"name":"`):]
if j := strings.IndexByte(rest, '"'); j >= 0 {
modelName = rest[:j]
}
}
tmpl, params := "", ""
if s.showFunc != nil {
tmpl, params = s.showFunc(modelName)
}
_, _ = fmt.Fprintf(w, `{"template":%q,"parameters":%q}`, tmpl, params)
default:
w.WriteHeader(http.StatusNotFound)
}
}))
}
// TestDiscoverOllama_AppliesDefaultContextSize verifies that a model whose
// /api/show response contains no num_ctx still gets the conservative default
// rather than ContextSize=0 (which the router treats as "tiny").
func TestDiscoverOllama_AppliesDefaultContextSize(t *testing.T) {
stub := &ollamaStub{
tagsBody: `{"models":[{"name":"llama3:8b","size":1}]}`,
showFunc: func(_ string) (string, string) {
return ".Tool", "" // tool support yes, no num_ctx line
},
}
srv := stub.server()
defer srv.Close()
cache := map[string]OllamaProbeResult{}
models, err := DiscoverOllama(context.Background(), srv.URL, cache)
if err != nil {
t.Fatalf("DiscoverOllama: %v", err)
}
if len(models) != 1 {
t.Fatalf("got %d models, want 1", len(models))
}
if models[0].ContextSize != defaultOllamaContextSize {
t.Errorf("ContextSize = %d, want %d", models[0].ContextSize, defaultOllamaContextSize)
}
if !models[0].SupportsTools {
t.Error("SupportsTools should be true (template contained .Tool)")
}
}
// TestDiscoverOllama_PrunesCacheOnDisappearance verifies that toolCache entries
// for models no longer present in /api/tags are pruned, preventing unbounded
// cache growth and stale verdicts on reappearing models.
func TestDiscoverOllama_PrunesCacheOnDisappearance(t *testing.T) {
stub := &ollamaStub{
tagsBody: `{"models":[{"name":"alive:latest","size":1}]}`,
}
srv := stub.server()
defer srv.Close()
cache := map[string]OllamaProbeResult{
"alive:latest": {SupportsTools: true},
"ghost:latest": {SupportsTools: true}, // not in tags response — must be pruned
"another-ghost": {},
}
if _, err := DiscoverOllama(context.Background(), srv.URL, cache); err != nil {
t.Fatalf("DiscoverOllama: %v", err)
}
if _, ok := cache["alive:latest"]; !ok {
t.Error("alive:latest should remain in cache")
}
if _, ok := cache["ghost:latest"]; ok {
t.Error("ghost:latest should have been pruned from cache")
}
if _, ok := cache["another-ghost"]; ok {
t.Error("another-ghost should have been pruned from cache")
}
}
// llamaCPPStub serves configurable /v1/models and /props responses.
type llamaCPPStub struct {
modelsBody string // body for /v1/models, or empty to return 404
propsBody string // body for /props, or empty to return 404
modelsCode int // override status code for /v1/models (0 = 200)
propsCode int // override status code for /props (0 = 200)
}
func (s *llamaCPPStub) server() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/v1/models":
if s.modelsBody == "" {
w.WriteHeader(http.StatusNotFound)
return
}
if s.modelsCode != 0 {
w.WriteHeader(s.modelsCode)
}
_, _ = w.Write([]byte(s.modelsBody))
case "/props":
if s.propsBody == "" {
w.WriteHeader(http.StatusNotFound)
return
}
if s.propsCode != 0 {
w.WriteHeader(s.propsCode)
}
_, _ = w.Write([]byte(s.propsBody))
default:
w.WriteHeader(http.StatusNotFound)
}
}))
}
// TestDiscoverLlamaCPP_EnumeratesMultipleModels verifies that a server (or
// front-proxy like llama-swap) returning multiple /v1/models entries is fully
// enumerated, not collapsed into a single placeholder.
func TestDiscoverLlamaCPP_EnumeratesMultipleModels(t *testing.T) {
stub := &llamaCPPStub{
modelsBody: `{"data":[{"id":"qwen2.5-coder:7b"},{"id":"llama-3.2:3b"}]}`,
propsBody: `{"default_generation_settings":{"n_ctx":16384}}`,
}
srv := stub.server()
defer srv.Close()
models, err := DiscoverLlamaCPP(context.Background(), srv.URL)
if err != nil {
t.Fatalf("DiscoverLlamaCPP: %v", err)
}
if len(models) != 2 {
t.Fatalf("got %d models, want 2", len(models))
}
want := map[string]bool{"qwen2.5-coder:7b": true, "llama-3.2:3b": true}
for _, m := range models {
if !want[m.ID] {
t.Errorf("unexpected model id %q", m.ID)
}
if m.ContextSize != 16384 {
t.Errorf("ContextSize = %d, want 16384 (shared from /props)", m.ContextSize)
}
if !m.SupportsTools {
t.Errorf("model %q: SupportsTools should be true", m.ID)
}
}
}
// TestDiscoverLlamaCPP_PropsFailsFallsBackToDefault verifies that when /props
// is unreachable (older builds, custom proxies that don't expose it), we
// still enumerate models and apply the conservative context-size default
// rather than aborting discovery.
func TestDiscoverLlamaCPP_PropsFailsFallsBackToDefault(t *testing.T) {
stub := &llamaCPPStub{
modelsBody: `{"data":[{"id":"only-model"}]}`,
// propsBody empty -> 404
}
srv := stub.server()
defer srv.Close()
models, err := DiscoverLlamaCPP(context.Background(), srv.URL)
if err != nil {
t.Fatalf("DiscoverLlamaCPP: %v", err)
}
if len(models) != 1 || models[0].ID != "only-model" {
t.Fatalf("got %+v, want one entry with id=only-model", models)
}
if models[0].ContextSize != defaultLlamaCppContextSize {
t.Errorf("ContextSize = %d, want %d (fallback)", models[0].ContextSize, defaultLlamaCppContextSize)
}
}
// TestDiscoverLlamaCPP_NoModelsIsError verifies that a /v1/models response
// with an empty list errors out instead of registering a phantom arm.
func TestDiscoverLlamaCPP_NoModelsIsError(t *testing.T) {
stub := &llamaCPPStub{
modelsBody: `{"data":[]}`,
propsBody: `{"default_generation_settings":{"n_ctx":8192}}`,
}
srv := stub.server()
defer srv.Close()
if _, err := DiscoverLlamaCPP(context.Background(), srv.URL); err == nil {
t.Error("expected error when /v1/models returns no entries, got nil")
}
}
// --- isNonChatModel pattern matching ---
func TestIsNonChatModel(t *testing.T) {
chat := []string{
"qwen3:14b",
"qwen3-coder:30b",
"gemma4:latest",
"gemma-4-e2b-it",
"devstral-small-2:24b",
"phi-4",
"reecdev/tiny3.5:1.5b",
"ministral-3:8b",
}
for _, m := range chat {
if isNonChatModel(m) {
t.Errorf("isNonChatModel(%q) = true, want false (chat model)", m)
}
}
nonChat := []string{
"whisper-base",
"moonshine-tiny",
"kokoros",
"kokoros-de",
"vibevoice",
"vibevoice-cpp",
"qwen3-asr-1.7b",
"qwen3-tts-1.7b-custom-voice",
"lfm2.5-audio-1.5b-realtime",
"embeddinggemma:latest",
"qwen3-vl-embedding-2b-gguf",
"qwen3-vl-reranker-2b-i1-gguf",
}
for _, m := range nonChat {
if !isNonChatModel(m) {
t.Errorf("isNonChatModel(%q) = false, want true (non-chat model)", m)
}
}
}
// --- isKnownVisionModelName covers new prefixes (R-2) ---
func TestIsKnownVisionModelName_NewFamilies(t *testing.T) {
vision := []string{
"gemma4:latest",
"gemma4-e4b-uc:latest",
"gemma-4-e2b-it",
"gemma-4-e4b-it",
"glm-ocr",
"gemma3:27b", // pre-existing, regression guard
"minicpm-v-4.6-thinking-gguf",
}
for _, m := range vision {
if !isKnownVisionModelName(m) {
t.Errorf("isKnownVisionModelName(%q) = false, want true", m)
}
}
nonVision := []string{
"qwen3:14b",
"devstral-small-2:24b",
"phi-4",
"functiongemma:latest", // Gemma-based but text-only function caller
}
for _, m := range nonVision {
if isKnownVisionModelName(m) {
t.Errorf("isKnownVisionModelName(%q) = true, want false", m)
}
}
}
// --- RegisterDiscoveredModels: skip non-chat, apply family defaults ---
func TestRegisterDiscoveredModels_SkipsNonChat(t *testing.T) {
r := New(Config{})
factory := func(name, model string) SecureProvider {
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
}
models := []DiscoveredModel{
{ID: "qwen3:14b", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
{ID: "embeddinggemma:latest", Provider: "ollama", ContextSize: 8192},
{ID: "whisper-base", Provider: "ollama", ContextSize: 4096},
{ID: "kokoros", Provider: "ollama"},
{ID: "qwen3-vl-reranker-2b-gguf", Provider: "ollama"},
{ID: "gemma4:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
}
RegisterDiscoveredModels(r, models, factory)
registered := make(map[ArmID]bool)
for _, a := range r.Arms() {
registered[a.ID] = true
}
wantRegistered := []ArmID{"ollama/qwen3:14b", "ollama/gemma4:latest"}
for _, id := range wantRegistered {
if !registered[id] {
t.Errorf("expected %q to be registered, got %v", id, registered)
}
}
wantSkipped := []ArmID{
"ollama/embeddinggemma:latest",
"ollama/whisper-base",
"ollama/kokoros",
"ollama/qwen3-vl-reranker-2b-gguf",
}
for _, id := range wantSkipped {
if registered[id] {
t.Errorf("expected %q to be skipped (non-chat), but it was registered", id)
}
}
}
func TestRegisterDiscoveredModels_AppliesFunctionGemmaDefaults(t *testing.T) {
r := New(Config{})
factory := func(name, model string) SecureProvider {
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
}
models := []DiscoveredModel{
{ID: "functiongemma:latest", Provider: "ollama", SupportsTools: true, ContextSize: 32768},
}
RegisterDiscoveredModels(r, models, factory)
arm, ok := r.LookupArm("ollama/functiongemma:latest")
if !ok {
t.Fatal("functiongemma should be registered (Disabled, but visible)")
}
if !arm.Disabled {
t.Error("functiongemma arm should have Disabled=true")
}
if arm.MaxComplexity != 0.40 {
t.Errorf("functiongemma MaxComplexity = %v, want 0.40", arm.MaxComplexity)
}
if len(arm.Strengths) != 1 || arm.Strengths[0] != TaskOrchestration {
t.Errorf("functiongemma Strengths = %v, want [TaskOrchestration]", arm.Strengths)
}
}
func TestRegisterDiscoveredModels_NoDefaultsForUnknownFamily(t *testing.T) {
r := New(Config{})
factory := func(name, model string) SecureProvider {
return security.WrapProvider(&stubProvider{name: name, model: model}, nil)
}
models := []DiscoveredModel{
{ID: "some-novel-model:1.5b", Provider: "ollama", SupportsTools: true, ContextSize: 16384},
}
RegisterDiscoveredModels(r, models, factory)
arm, ok := r.LookupArm("ollama/some-novel-model:1.5b")
if !ok {
t.Fatal("unknown-family model should still register")
}
if arm.Disabled {
t.Error("unknown-family arm should not be disabled")
}
if arm.MaxComplexity != 0 {
t.Errorf("unknown-family MaxComplexity = %v, want 0 (no ceiling)", arm.MaxComplexity)
}
if len(arm.Strengths) != 0 {
t.Errorf("unknown-family Strengths = %v, want none", arm.Strengths)
}
}