From eb0583f606d052b1db0f5a0713c55e1117557e07 Mon Sep 17 00:00:00 2001
From: vikingowl <26+vikingowl@noreply.somegit.dev>
Date: Tue, 19 May 2026 19:22:16 +0200
Subject: [PATCH] fix(router): unpin config-default provider + complexity floor
 by task type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two routing bugs were keeping the SLM out of every real prompt and,
once it was eligible, pulling complex tasks into it as well.

Bug 1: ForceArm was called unconditionally when a primary provider was
configured (cmd/gnoma/main.go:378). That short-circuited the entire
router — every prompt went straight to whatever was set as
[provider].default, regardless of tier, score, or feasibility. The SLM
arm appeared in `gnoma router stats` registration logs but had zero
observations after dozens of prompts.

Fix: only pin when the user passed --provider on the command line.
Config defaults register the arm but don't force it; the router picks
freely. Verified end-to-end — trivial prompts now reach slm/ollama
via the tier-0 priority.

Bug 2: A short prompt like "refactor the SLM module" classifies as
TaskRefactor with complexity 0.015 — well under the SLM arm's 0.3
ceiling. The arm became eligible despite the task being inherently
non-trivial. Once eligible, tier-0 priority then pulled it in over
the CLI agents.

Fix: add MinComplexityForType, applied in both ClassifyTask
(heuristic path) and slm.Classifier.Classify (SLM-overlay path). The
floor is per-task-type:

  - TaskSecurityReview, TaskOrchestration  → 0.60
  - TaskRefactor, TaskPlanning, TaskDebug  → 0.40
  - TaskUnitTest, TaskReview               → 0.35

Tasks like Explain/Generation/Boilerplate keep their organic
complexity score so trivial knowledge prompts (≤0.15) still fall to
the SLM. Tasks that imply existing code or multi-step reasoning are
clamped above the SLM's MaxComplexity, naturally routing them to a
bigger arm.

After both fixes, observed routing in a clean run:

  What is 2+2?              → slm/ollama (complexity 0.015)
  Define a closure          → slm/ollama (complexity 0.015)
  What is HTTP?             → slm/ollama (complexity 0.015)
  Refactor the SLM module   → subprocess/gemini (complexity 0.40)
  Audit for race conditions → subprocess/gemini (complexity 0.35)
  Plan a migration          → subprocess/gemini (complexity 0.40)
---
 cmd/gnoma/main.go          | 11 ++++++++++-
 internal/router/task.go    | 25 +++++++++++++++++++++++++
 internal/slm/classifier.go |  7 +++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/cmd/gnoma/main.go b/cmd/gnoma/main.go
index ebb8a36..b4884c1 100644
--- a/cmd/gnoma/main.go
+++ b/cmd/gnoma/main.go
@@ -375,7 +375,16 @@ func main() {
 		}
 		arm.Pools = resolveRateLimitPools(armID, *providerName, armModel, cfg)
 		rtr.RegisterArm(arm)
-		rtr.ForceArm(armID)
+		// Pin this arm only when the user passed --provider explicitly on
+		// the command line. A config-default provider is *one* registered
+		// arm among many — the router still picks by tier and score, which
+		// is what lets the SLM arm win trivial tasks. Without this guard,
+		// every gnoma launch with [provider].default set short-circuits
+		// the whole routing tree to one arm.
+		if isFlagSet("provider") {
+			rtr.ForceArm(armID)
+			logger.Info("provider pinned via --provider flag", "arm", armID)
+		}
 		if len(arm.Pools) > 0 {
 			logger.Debug("rate limit pools attached", "arm", armID, "pools", len(arm.Pools))
 		}
diff --git a/internal/router/task.go b/internal/router/task.go
index 02c2705..f50d854 100644
--- a/internal/router/task.go
+++ b/internal/router/task.go
@@ -222,6 +222,14 @@ func ClassifyTask(prompt string) Task {
 	// Estimate complexity from prompt length and keywords
 	task.ComplexityScore = estimateComplexity(lower)
 
+	// Per-task-type complexity floor. A short "refactor X" prompt looks
+	// trivial by word count but the task itself implies existing code and
+	// non-trivial reasoning — clamping the floor up keeps such tasks out
+	// of the SLM arm's MaxComplexity ceiling.
+	if floor := MinComplexityForType(task.Type); task.ComplexityScore < floor {
+		task.ComplexityScore = floor
+	}
+
 	// Trivial-prompt override: short, knowledge-only prompts whose task
 	// type doesn't imply existing code to read or modify can run without
 	// tools — making the SLM arm (ToolUse=false) feasible for genuinely
@@ -235,6 +243,23 @@ func ClassifyTask(prompt string) Task {
 	return task
 }
 
+// MinComplexityForType returns the inherent complexity floor for a task
+// type. Tasks that imply existing code or multi-step reasoning get a
+// non-zero floor so short prompts don't slip past the SLM arm's
+// MaxComplexity ceiling.
+func MinComplexityForType(t TaskType) float64 {
+	switch t {
+	case TaskSecurityReview, TaskOrchestration:
+		return 0.6
+	case TaskRefactor, TaskPlanning, TaskDebug:
+		return 0.4
+	case TaskUnitTest, TaskReview:
+		return 0.35
+	default:
+		return 0
+	}
+}
+
 // trivialEligibleTypes are the task types where a "no tools needed" verdict
 // is plausible from a short prompt alone. Debug / Refactor / Review / Test /
 // SecurityReview / Orchestration all imply existing code or processes to
diff --git a/internal/slm/classifier.go b/internal/slm/classifier.go
index 7aa3b69..987c0ec 100644
--- a/internal/slm/classifier.go
+++ b/internal/slm/classifier.go
@@ -80,6 +80,13 @@ func (c *Classifier) Classify(ctx context.Context, prompt string, history []mess
 	task.ComplexityScore = resp.Complexity
 	task.RequiresTools = resp.RequiresTools
 	task.ClassifierSource = router.ClassifierSLM
+	// Re-apply the per-task-type complexity floor after the SLM overlay.
+	// The SLM may have under-reported complexity for a Refactor-style
+	// task; the floor protects the SLM arm from being picked for its own
+	// kind of misclassification.
+	if floor := router.MinComplexityForType(task.Type); task.ComplexityScore < floor {
+		task.ComplexityScore = floor
+	}
 	return task, nil
 }