package router import ( "log/slog" "math" ) // Strategy identifies how a task should be executed. type Strategy int const ( StrategySingleArm Strategy = iota // Future (M9): StrategyCascade, StrategyParallelEnsemble, StrategyMultiRound ) // RoutingDecision is the result of arm selection. type RoutingDecision struct { Strategy Strategy Arm *Arm // primary arm Error error reservations []*Reservation // pool reservations held until commit/rollback } // Commit finalizes the routing decision, recording actual token consumption. // Must be called when the request completes successfully. func (d RoutingDecision) Commit(actualTokens int) { for _, r := range d.reservations { r.Commit(actualTokens) } } // Rollback releases the routing decision's pool reservations without recording usage. // Must be called when the request fails before any tokens are consumed. func (d RoutingDecision) Rollback() { for _, r := range d.reservations { r.Rollback() } } // armTier returns the routing tier for an arm in the context of a task. // Lower tier = higher preference. // - 0: specialized small arm (MaxComplexity > 0) whose ceiling fits this // task — picked first so "the SLM does small stuff" actually happens. // - 1: CLI agent // - 2: local model (general purpose, no complexity ceiling) // - 3: API provider // // When prefer is PreferLocal, non-local non-CLI-agent arms (true cloud // API arms) are demoted by +2 tiers so any local or CLI-agent option // is preferred. When prefer is PreferCloud, IsLocal arms are demoted // by +2 tiers so cloud arms win the tier walk. The +2 shift is enough // to drop cloud below the locals (tier 3 → 5) and locals below cloud // (tier 2 → 4) without colliding with any normal tier value, keeping // the tier walk deterministic. // // The Strengths-promoted path in selectBest bypasses the tier walk // entirely, so prefer-policy never blocks a strongly-tagged arm from // winning the task it's tagged for. This is the intended interaction. func armTier(arm *Arm, task Task, prefer PreferPolicy) int { base := armBaseTier(arm, task) switch prefer { case PreferLocal: // Demote pure cloud arms. CLI-agent arms proxy to cloud but // remain "local" from a tooling perspective — leave them where // they are. Users who want to exclude them should use // `--provider X` or the existing exclude mechanisms. if !arm.IsLocal && !arm.IsCLIAgent { return base + 2 } case PreferCloud: if arm.IsLocal { return base + 2 } } return base } func armBaseTier(arm *Arm, task Task) int { if arm.MaxComplexity > 0 && task.ComplexityScore <= arm.MaxComplexity { return 0 } if arm.IsCLIAgent { return 1 } if arm.IsLocal { return 2 } return 3 } // selectBest picks the best arm. // // Step 1: arms whose Strengths list contains task.Type cross all tier // boundaries — Opus tagged with SecurityReview beats a CLI-agent tier-1 // arm for that task. Strengths are a preference, not a pin: if no // strength-matching arm is in the input set (filterFeasible already // removed arms in backoff, lacking tool support, or out of pool capacity), // selection falls through to the default tier order. // // Step 2 (fallback): walk tiers low→high. Within a tier, highest-scoring // arm wins. func selectBest(qt *QualityTracker, params BanditParams, arms []*Arm, task Task, prefer PreferPolicy) *Arm { if len(arms) == 0 { return nil } var promoted []*Arm for _, arm := range arms { if arm.HasStrength(task.Type) { promoted = append(promoted, arm) } } if len(promoted) > 0 { return bestScored(qt, params, promoted, task, prefer) } // Walk tiers low→high. armTier returns up to 5 when prefer is set // (a dispreferred tier-3 cloud arm under PreferLocal lands at 5); // the loop bound has to cover that. for tier := 0; tier <= 5; tier++ { var inTier []*Arm for _, arm := range arms { if armTier(arm, task, prefer) == tier { inTier = append(inTier, arm) } } if len(inTier) > 0 { return bestScored(qt, params, inTier, task, prefer) } } return nil } // bestScored returns the highest-scoring arm within a set. func bestScored(qt *QualityTracker, params BanditParams, arms []*Arm, task Task, prefer PreferPolicy) *Arm { var best *Arm bestScore := math.Inf(-1) for _, arm := range arms { score := scoreArm(qt, params, arm, task) * policyMultiplier(arm, prefer) if score > bestScore { bestScore = score best = arm } } return best } // policyMultiplier returns the prefer-policy score multiplier for an // arm. Soft bias only — does not zero out the dispreferred set, so // when only cloud arms are feasible under PreferLocal a cloud arm can // still win. Calibrated against the typical scoreArm output range // (~0.5–2.0) so a 0.3 multiplier is roughly equivalent to "non-local // arm must be ~3x better than local to win." // // CLI-agent subprocess arms count as non-local because they proxy to // cloud — the prefer knob is about the privacy/cost axis, not the // tooling-locality axis. Users who want to pin subprocess specifically // should use --provider subprocess, which bypasses the policy. func policyMultiplier(arm *Arm, p PreferPolicy) float64 { switch p { case PreferLocal: if arm.IsLocal { return 1.0 } return 0.3 case PreferCloud: if arm.IsLocal { return 0.5 } return 1.0 default: return 1.0 } } // scoreArm computes a quality/cost score for an arm. // When the quality tracker has sufficient observations, blends observed EMA // (default 70%) with heuristic (default 30%). Falls back to pure heuristic // otherwise. The blend ratio and strength bonus are tunable via // BanditParams (config: [router.bandit]); a zero-valued params falls back // to the built-in defaults. // // Strengths add a fixed bonus to quality when matching task.Type. CostWeight // dampens the cost penalty linearly: // // effectiveCost = 1 + CostWeight * (cost - 1) // // With CostWeight=1.0 (or unset → resolved to 1.0) the formula collapses to // the original effectiveCost == cost. With CostWeight=0 cost is fully // ignored (effectiveCost = 1.0). Local arms with sub-1 raw costs are not // amplified by fractional weights (the linear formula stays monotone). func scoreArm(qt *QualityTracker, params BanditParams, arm *Arm, task Task) float64 { params = resolveBanditParams(params) hq := heuristicQuality(arm, task) quality := hq if qt != nil { if observed, hasData := qt.Quality(arm.ID, task.Type); hasData { quality = params.ObservedWeight*observed + (1-params.ObservedWeight)*hq } } if arm.HasStrength(task.Type) { quality += params.StrengthBonus } value := task.ValueScore() rawCost := effectiveCost(arm, task) if rawCost <= 0 { rawCost = 0.001 } weighted := 1.0 + arm.ResolvedCostWeight()*(rawCost-1.0) if weighted <= 0 { weighted = 0.001 } return (quality * value) / weighted } // heuristicQuality estimates arm quality without historical data. func heuristicQuality(arm *Arm, task Task) float64 { score := 0.5 // base // Larger context window = better for complex tasks if arm.Capabilities.ContextWindow >= 100000 { score += 0.1 } if arm.Capabilities.ContextWindow >= 200000 { score += 0.05 } // Thinking capability valuable for planning/orchestration/security if arm.Capabilities.SupportsThinking() { switch task.Type { case TaskPlanning, TaskOrchestration, TaskSecurityReview: score += 0.2 case TaskDebug, TaskRefactor: score += 0.1 } } // Tool support required — arm without tools gets heavy penalty if task.RequiresTools && !arm.SupportsTools() { score *= 0.1 } // Local models get a small boost (no network latency, privacy) if arm.IsLocal { score += 0.05 } // Complexity adjustment — complex tasks penalize small/local models if task.ComplexityScore > 0.7 && arm.IsLocal { score *= 0.7 } // Clamp if score > 1.0 { score = 1.0 } if score < 0.0 { score = 0.0 } return score } // effectiveCost returns the base cost inflated by pool scarcity. func effectiveCost(arm *Arm, task Task) float64 { base := arm.EstimateCost(task.EstimatedTokens) if base <= 0 { base = 0.001 // local models are ~free but not zero for scoring } // Apply maximum scarcity multiplier across all pools maxMultiplier := 1.0 for _, pool := range arm.Pools { m := pool.ScarcityMultiplier() if m > maxMultiplier { maxMultiplier = m } } return base * maxMultiplier } // filterFeasible returns arms that can handle the task (tools, pool capacity, quality). // Arms that pass tool and pool checks but fall below the task's minimum quality threshold // are collected separately and used as a last resort if no arm meets the threshold. // // When the result is empty the caller surfaces a generic "no feasible arm" // error; rejection reasons are logged here at slog.Debug per-arm so users // debugging "why did the router reject everything?" with --verbose can see // the actual constraint each arm tripped instead of guessing. func filterFeasible(arms []*Arm, task Task) []*Arm { threshold := DefaultThresholds[task.Type] var feasible []*Arm var belowQuality []*Arm // passed tool+pool but scored below minimum quality reject := func(arm *Arm, reason string, fields ...any) { base := []any{ "arm", arm.ID, "task", task.Type, "complexity", task.ComplexityScore, "reason", reason, } slog.Debug("filterFeasible: rejected", append(base, fields...)...) } for _, arm := range arms { // Complexity ceiling: zero means no ceiling (preserves behavior for all existing arms). if arm.MaxComplexity > 0 && task.ComplexityScore > arm.MaxComplexity { reject(arm, "complexity_exceeds_max", "max_complexity", arm.MaxComplexity) continue } // Must support tools if task requires them if task.RequiresTools && !arm.SupportsTools() { reject(arm, "tools_required_but_unsupported", "tool_use_capability", arm.Capabilities.ToolUse) continue } // Must support vision if task carries inline image content. // No tools/quality fallback for vision: a non-vision arm physically // cannot consume the image bytes, so degrading to it would silently // drop the image and confuse the model. if task.RequiresVision && !arm.Capabilities.Vision { reject(arm, "vision_required_but_unsupported", "vision_capability", arm.Capabilities.Vision) continue } // Must support the required effort level (EffortAuto always passes) if !arm.Capabilities.SupportsEffort(task.RequiredEffort) { reject(arm, "effort_level_unsupported", "required_effort", task.RequiredEffort) continue } // Check all pools have capacity poolsOK := true for _, pool := range arm.Pools { pool.CheckReset() if !pool.CanAfford(arm.ID, task.EstimatedTokens) { reject(arm, "pool_capacity_exceeded", "estimated_tokens", task.EstimatedTokens) poolsOK = false break } } if !poolsOK { continue } // Quality floor: arms below minimum are set aside, not discarded if heuristicQuality(arm, task) < threshold.Minimum { belowQuality = append(belowQuality, arm) continue } feasible = append(feasible, arm) } if len(feasible) == 0 && len(belowQuality) == 0 { slog.Debug("filterFeasible: no arms feasible at any quality level", "task", task.Type, "complexity", task.ComplexityScore, "requires_tools", task.RequiresTools, "requires_vision", task.RequiresVision, "arms_considered", len(arms), ) } // Degrade gracefully: if no arm meets quality threshold, use below-quality ones if len(feasible) == 0 && len(belowQuality) > 0 { return belowQuality } // If still empty and task requires tools, relax pool checks (last resort) if len(feasible) == 0 && task.RequiresTools { for _, arm := range arms { if !arm.Capabilities.ToolUse { continue } // Vision requirement is hard: a non-vision arm cannot // consume image bytes, so even the last-resort fallback // must respect it. if task.RequiresVision && !arm.Capabilities.Vision { continue } poolsOK := true for _, pool := range arm.Pools { if !pool.CanAfford(arm.ID, task.EstimatedTokens) { poolsOK = false break } } if poolsOK { feasible = append(feasible, arm) } } } return feasible }