diff --git a/cmd/gnoma/main.go b/cmd/gnoma/main.go index ebb8a36..b4884c1 100644 --- a/cmd/gnoma/main.go +++ b/cmd/gnoma/main.go @@ -375,7 +375,16 @@ func main() { } arm.Pools = resolveRateLimitPools(armID, *providerName, armModel, cfg) rtr.RegisterArm(arm) - rtr.ForceArm(armID) + // Pin this arm only when the user passed --provider explicitly on + // the command line. A config-default provider is *one* registered + // arm among many — the router still picks by tier and score, which + // is what lets the SLM arm win trivial tasks. Without this guard, + // every gnoma launch with [provider].default set short-circuits + // the whole routing tree to one arm. + if isFlagSet("provider") { + rtr.ForceArm(armID) + logger.Info("provider pinned via --provider flag", "arm", armID) + } if len(arm.Pools) > 0 { logger.Debug("rate limit pools attached", "arm", armID, "pools", len(arm.Pools)) } diff --git a/internal/router/task.go b/internal/router/task.go index 02c2705..f50d854 100644 --- a/internal/router/task.go +++ b/internal/router/task.go @@ -222,6 +222,14 @@ func ClassifyTask(prompt string) Task { // Estimate complexity from prompt length and keywords task.ComplexityScore = estimateComplexity(lower) + // Per-task-type complexity floor. A short "refactor X" prompt looks + // trivial by word count but the task itself implies existing code and + // non-trivial reasoning — clamping the floor up keeps such tasks out + // of the SLM arm's MaxComplexity ceiling. + if floor := MinComplexityForType(task.Type); task.ComplexityScore < floor { + task.ComplexityScore = floor + } + // Trivial-prompt override: short, knowledge-only prompts whose task // type doesn't imply existing code to read or modify can run without // tools — making the SLM arm (ToolUse=false) feasible for genuinely @@ -235,6 +243,23 @@ func ClassifyTask(prompt string) Task { return task } +// MinComplexityForType returns the inherent complexity floor for a task +// type. Tasks that imply existing code or multi-step reasoning get a +// non-zero floor so short prompts don't slip past the SLM arm's +// MaxComplexity ceiling. +func MinComplexityForType(t TaskType) float64 { + switch t { + case TaskSecurityReview, TaskOrchestration: + return 0.6 + case TaskRefactor, TaskPlanning, TaskDebug: + return 0.4 + case TaskUnitTest, TaskReview: + return 0.35 + default: + return 0 + } +} + // trivialEligibleTypes are the task types where a "no tools needed" verdict // is plausible from a short prompt alone. Debug / Refactor / Review / Test / // SecurityReview / Orchestration all imply existing code or processes to diff --git a/internal/slm/classifier.go b/internal/slm/classifier.go index 7aa3b69..987c0ec 100644 --- a/internal/slm/classifier.go +++ b/internal/slm/classifier.go @@ -80,6 +80,13 @@ func (c *Classifier) Classify(ctx context.Context, prompt string, history []mess task.ComplexityScore = resp.Complexity task.RequiresTools = resp.RequiresTools task.ClassifierSource = router.ClassifierSLM + // Re-apply the per-task-type complexity floor after the SLM overlay. + // The SLM may have under-reported complexity for a Refactor-style + // task; the floor protects the SLM arm from being picked for its own + // kind of misclassification. + if floor := router.MinComplexityForType(task.Type); task.ComplexityScore < floor { + task.ComplexityScore = floor + } return task, nil }