gnoma/internal/router/selector.go

package router

import (
	"log/slog"
	"math"
)

// Strategy identifies how a task should be executed.
type Strategy int

const (
	StrategySingleArm Strategy = iota
	// Future (M9): StrategyCascade, StrategyParallelEnsemble, StrategyMultiRound
)

// RoutingDecision is the result of arm selection.
type RoutingDecision struct {
	Strategy     Strategy
	Arm          *Arm // primary arm
	Error        error
	reservations []*Reservation // pool reservations held until commit/rollback
}

// Commit finalizes the routing decision, recording actual token consumption.
// Must be called when the request completes successfully.
func (d RoutingDecision) Commit(actualTokens int) {
	for _, r := range d.reservations {
		r.Commit(actualTokens)
	}
}

// Rollback releases the routing decision's pool reservations without recording usage.
// Must be called when the request fails before any tokens are consumed.
func (d RoutingDecision) Rollback() {
	for _, r := range d.reservations {
		r.Rollback()
	}
}

// armTier returns the routing tier for an arm in the context of a task.
// Lower tier = higher preference.
//   - 0: specialized small arm (MaxComplexity > 0) whose ceiling fits this
//     task — picked first so "the SLM does small stuff" actually happens.
//   - 1: CLI agent
//   - 2: local model (general purpose, no complexity ceiling)
//   - 3: API provider
//
// When prefer is PreferLocal, non-local non-CLI-agent arms (true cloud
// API arms) are demoted by +2 tiers so any local or CLI-agent option
// is preferred. When prefer is PreferCloud, IsLocal arms are demoted
// by +2 tiers so cloud arms win the tier walk. The +2 shift is enough
// to drop cloud below the locals (tier 3 → 5) and locals below cloud
// (tier 2 → 4) without colliding with any normal tier value, keeping
// the tier walk deterministic.
//
// The Strengths-promoted path in selectBest bypasses the tier walk
// entirely, so prefer-policy never blocks a strongly-tagged arm from
// winning the task it's tagged for. This is the intended interaction.
func armTier(arm *Arm, task Task, prefer PreferPolicy) int {
	base := armBaseTier(arm, task)
	switch prefer {
	case PreferLocal:
		// Demote pure cloud arms. CLI-agent arms proxy to cloud but
		// remain "local" from a tooling perspective — leave them where
		// they are. Users who want to exclude them should use
		// `--provider X` or the existing exclude mechanisms.
		if !arm.IsLocal && !arm.IsCLIAgent {
			return base + 2
		}
	case PreferCloud:
		if arm.IsLocal {
			return base + 2
		}
	}
	return base
}

func armBaseTier(arm *Arm, task Task) int {
	if arm.MaxComplexity > 0 && task.ComplexityScore <= arm.MaxComplexity {
		return 0
	}
	if arm.IsCLIAgent {
		return 1
	}
	if arm.IsLocal {
		return 2
	}
	return 3
}

// selectBest picks the best arm.
//
// Step 1: arms whose Strengths list contains task.Type cross all tier
// boundaries — Opus tagged with SecurityReview beats a CLI-agent tier-1
// arm for that task. Strengths are a preference, not a pin: if no
// strength-matching arm is in the input set (filterFeasible already
// removed arms in backoff, lacking tool support, or out of pool capacity),
// selection falls through to the default tier order.
//
// Step 2 (fallback): walk tiers low→high. Within a tier, highest-scoring
// arm wins.
func selectBest(qt *QualityTracker, params BanditParams, arms []*Arm, task Task, prefer PreferPolicy) *Arm {
	if len(arms) == 0 {
		return nil
	}

	var promoted []*Arm
	for _, arm := range arms {
		if arm.HasStrength(task.Type) {
			promoted = append(promoted, arm)
		}
	}
	if len(promoted) > 0 {
		return bestScored(qt, params, promoted, task, prefer)
	}

	// Walk tiers low→high. armTier returns up to 5 when prefer is set
	// (a dispreferred tier-3 cloud arm under PreferLocal lands at 5);
	// the loop bound has to cover that.
	for tier := 0; tier <= 5; tier++ {
		var inTier []*Arm
		for _, arm := range arms {
			if armTier(arm, task, prefer) == tier {
				inTier = append(inTier, arm)
			}
		}
		if len(inTier) > 0 {
			return bestScored(qt, params, inTier, task, prefer)
		}
	}
	return nil
}

// bestScored returns the highest-scoring arm within a set.
func bestScored(qt *QualityTracker, params BanditParams, arms []*Arm, task Task, prefer PreferPolicy) *Arm {
	var best *Arm
	bestScore := math.Inf(-1)
	for _, arm := range arms {
		score := scoreArm(qt, params, arm, task) * policyMultiplier(arm, prefer)
		if score > bestScore {
			bestScore = score
			best = arm
		}
	}
	return best
}

// policyMultiplier returns the prefer-policy score multiplier for an
// arm. Soft bias only — does not zero out the dispreferred set, so
// when only cloud arms are feasible under PreferLocal a cloud arm can
// still win. Calibrated against the typical scoreArm output range
// (~0.5–2.0) so a 0.3 multiplier is roughly equivalent to "non-local
// arm must be ~3x better than local to win."
//
// CLI-agent subprocess arms count as non-local because they proxy to
// cloud — the prefer knob is about the privacy/cost axis, not the
// tooling-locality axis. Users who want to pin subprocess specifically
// should use --provider subprocess, which bypasses the policy.
func policyMultiplier(arm *Arm, p PreferPolicy) float64 {
	switch p {
	case PreferLocal:
		if arm.IsLocal {
			return 1.0
		}
		return 0.3
	case PreferCloud:
		if arm.IsLocal {
			return 0.5
		}
		return 1.0
	default:
		return 1.0
	}
}

// scoreArm computes a quality/cost score for an arm.
// When the quality tracker has sufficient observations, blends observed EMA
// (default 70%) with heuristic (default 30%). Falls back to pure heuristic
// otherwise. The blend ratio and strength bonus are tunable via
// BanditParams (config: [router.bandit]); a zero-valued params falls back
// to the built-in defaults.
//
// Strengths add a fixed bonus to quality when matching task.Type. CostWeight
// dampens the cost penalty linearly:
//
//	effectiveCost = 1 + CostWeight * (cost - 1)
//
// With CostWeight=1.0 (or unset → resolved to 1.0) the formula collapses to
// the original effectiveCost == cost. With CostWeight=0 cost is fully
// ignored (effectiveCost = 1.0). Local arms with sub-1 raw costs are not
// amplified by fractional weights (the linear formula stays monotone).
func scoreArm(qt *QualityTracker, params BanditParams, arm *Arm, task Task) float64 {
	params = resolveBanditParams(params)
	hq := heuristicQuality(arm, task)
	quality := hq
	if qt != nil {
		if observed, hasData := qt.Quality(arm.ID, task.Type); hasData {
			quality = params.ObservedWeight*observed + (1-params.ObservedWeight)*hq
		}
	}
	if arm.HasStrength(task.Type) {
		quality += params.StrengthBonus
	}
	value := task.ValueScore()
	rawCost := effectiveCost(arm, task)
	if rawCost <= 0 {
		rawCost = 0.001
	}
	weighted := 1.0 + arm.ResolvedCostWeight()*(rawCost-1.0)
	if weighted <= 0 {
		weighted = 0.001
	}
	return (quality * value) / weighted
}

// heuristicQuality estimates arm quality without historical data.
func heuristicQuality(arm *Arm, task Task) float64 {
	score := 0.5 // base

	// Larger context window = better for complex tasks
	if arm.Capabilities.ContextWindow >= 100000 {
		score += 0.1
	}
	if arm.Capabilities.ContextWindow >= 200000 {
		score += 0.05
	}

	// Thinking capability valuable for planning/orchestration/security
	if arm.Capabilities.SupportsThinking() {
		switch task.Type {
		case TaskPlanning, TaskOrchestration, TaskSecurityReview:
			score += 0.2
		case TaskDebug, TaskRefactor:
			score += 0.1
		}
	}

	// Tool support required — arm without tools gets heavy penalty
	if task.RequiresTools && !arm.SupportsTools() {
		score *= 0.1
	}

	// Local models get a small boost (no network latency, privacy)
	if arm.IsLocal {
		score += 0.05
	}

	// Complexity adjustment — complex tasks penalize small/local models
	if task.ComplexityScore > 0.7 && arm.IsLocal {
		score *= 0.7
	}

	// Clamp
	if score > 1.0 {
		score = 1.0
	}
	if score < 0.0 {
		score = 0.0
	}
	return score
}

// effectiveCost returns the base cost inflated by pool scarcity.
func effectiveCost(arm *Arm, task Task) float64 {
	base := arm.EstimateCost(task.EstimatedTokens)
	if base <= 0 {
		base = 0.001 // local models are ~free but not zero for scoring
	}

	// Apply maximum scarcity multiplier across all pools
	maxMultiplier := 1.0
	for _, pool := range arm.Pools {
		m := pool.ScarcityMultiplier()
		if m > maxMultiplier {
			maxMultiplier = m
		}
	}

	return base * maxMultiplier
}

// filterFeasible returns arms that can handle the task (tools, pool capacity, quality).
// Arms that pass tool and pool checks but fall below the task's minimum quality threshold
// are collected separately and used as a last resort if no arm meets the threshold.
//
// When the result is empty the caller surfaces a generic "no feasible arm"
// error; rejection reasons are logged here at slog.Debug per-arm so users
// debugging "why did the router reject everything?" with --verbose can see
// the actual constraint each arm tripped instead of guessing.
func filterFeasible(arms []*Arm, task Task) []*Arm {
	threshold := DefaultThresholds[task.Type]

	var feasible []*Arm
	var belowQuality []*Arm // passed tool+pool but scored below minimum quality

	reject := func(arm *Arm, reason string, fields ...any) {
		base := []any{
			"arm", arm.ID,
			"task", task.Type,
			"complexity", task.ComplexityScore,
			"reason", reason,
		}
		slog.Debug("filterFeasible: rejected", append(base, fields...)...)
	}

	for _, arm := range arms {
		// Complexity ceiling: zero means no ceiling (preserves behavior for all existing arms).
		if arm.MaxComplexity > 0 && task.ComplexityScore > arm.MaxComplexity {
			reject(arm, "complexity_exceeds_max",
				"max_complexity", arm.MaxComplexity)
			continue
		}

		// Must support tools if task requires them
		if task.RequiresTools && !arm.SupportsTools() {
			reject(arm, "tools_required_but_unsupported",
				"tool_use_capability", arm.Capabilities.ToolUse)
			continue
		}

		// Must support vision if task carries inline image content.
		// No tools/quality fallback for vision: a non-vision arm physically
		// cannot consume the image bytes, so degrading to it would silently
		// drop the image and confuse the model.
		if task.RequiresVision && !arm.Capabilities.Vision {
			reject(arm, "vision_required_but_unsupported",
				"vision_capability", arm.Capabilities.Vision)
			continue
		}

		// Must support the required effort level (EffortAuto always passes)
		if !arm.Capabilities.SupportsEffort(task.RequiredEffort) {
			reject(arm, "effort_level_unsupported",
				"required_effort", task.RequiredEffort)
			continue
		}

		// Check all pools have capacity
		poolsOK := true
		for _, pool := range arm.Pools {
			pool.CheckReset()
			if !pool.CanAfford(arm.ID, task.EstimatedTokens) {
				reject(arm, "pool_capacity_exceeded",
					"estimated_tokens", task.EstimatedTokens)
				poolsOK = false
				break
			}
		}
		if !poolsOK {
			continue
		}

		// Quality floor: arms below minimum are set aside, not discarded
		if heuristicQuality(arm, task) < threshold.Minimum {
			belowQuality = append(belowQuality, arm)
			continue
		}

		feasible = append(feasible, arm)
	}

	if len(feasible) == 0 && len(belowQuality) == 0 {
		slog.Debug("filterFeasible: no arms feasible at any quality level",
			"task", task.Type,
			"complexity", task.ComplexityScore,
			"requires_tools", task.RequiresTools,
			"requires_vision", task.RequiresVision,
			"arms_considered", len(arms),
		)
	}

	// Degrade gracefully: if no arm meets quality threshold, use below-quality ones
	if len(feasible) == 0 && len(belowQuality) > 0 {
		return belowQuality
	}

	// If still empty and task requires tools, relax pool checks (last resort)
	if len(feasible) == 0 && task.RequiresTools {
		for _, arm := range arms {
			if !arm.Capabilities.ToolUse {
				continue
			}
			// Vision requirement is hard: a non-vision arm cannot
			// consume image bytes, so even the last-resort fallback
			// must respect it.
			if task.RequiresVision && !arm.Capabilities.Vision {
				continue
			}
			poolsOK := true
			for _, pool := range arm.Pools {
				if !pool.CanAfford(arm.ID, task.EstimatedTokens) {
					poolsOK = false
					break
				}
			}
			if poolsOK {
				feasible = append(feasible, arm)
			}
		}
	}

	return feasible
}