Rate limits: - Add PoolRPS/PoolTPM/PoolTokensMonth/PoolCostMonth pool kinds - Provider defaults for Mistral/Anthropic/OpenAI/Google (tier-aware) - Config override via [rate_limits.<provider>] TOML section - Pools auto-attached to arms on registration Elf tree view (CC-style): - Structured elf.Progress type replaces flat string channel - Tree with ├─/└─ branches, per-elf stats (tool uses, tokens) - Live activity updates: tool calls, "generating… (N chars)" - Completed elfs stay in tree with "Done (duration)" until turn ends - Suppress raw elf output from chat (tree + LLM summary instead) - Remove background elf mode (wait: false) — always wait - Truncate elf results to 2000 chars for parent context - Parallel hint in system prompt and tool description Permission prompts: - Show actual command in prompt: "bash wants to execute: find . -name '*.go'" - Compact hint in separator bar: "⚠ bash: find . | wc -l [y/n]" - PermReqMsg carries tool name + args Other: - Fix /model not updating status bar (session.Local.SetModel) - Add make targets: run, check, install - Update deps: BurntSushi/toml v1.6.0, chroma v2.23.1, x/text v0.35.0, cloud.google.com/go v0.123.0
174 lines
4.2 KiB
Go
174 lines
4.2 KiB
Go
package router
|
|
|
|
import (
|
|
"math"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// PoolKind identifies the type of resource a pool tracks.
|
|
type PoolKind int
|
|
|
|
const (
|
|
PoolRPM PoolKind = iota // requests per minute
|
|
PoolRPS // requests per second
|
|
PoolRPD // requests per day
|
|
PoolTPM // tokens per minute
|
|
PoolTPD // tokens per day
|
|
PoolTokensMonth // tokens per month
|
|
PoolCostMonth // monetary cost cap per month
|
|
PoolCustom // arbitrary units
|
|
)
|
|
|
|
// LimitPool tracks a shared resource budget that arms draw from.
|
|
type LimitPool struct {
|
|
mu sync.Mutex
|
|
|
|
ID string
|
|
Kind PoolKind
|
|
TotalLimit float64
|
|
Used float64
|
|
Reserved float64 // optimistically reserved for in-flight requests
|
|
ResetPeriod time.Duration
|
|
ResetAt time.Time
|
|
|
|
// Per-arm consumption rates (units per 1k tokens or per request)
|
|
ArmRates map[ArmID]float64
|
|
|
|
// Scarcity curve aggressiveness. k=2 gentle, k=4 aggressive hoarding.
|
|
ScarcityK float64
|
|
}
|
|
|
|
// RemainingFraction returns the fraction of budget still available.
|
|
func (p *LimitPool) RemainingFraction() float64 {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
if p.TotalLimit <= 0 {
|
|
return 0
|
|
}
|
|
return 1.0 - (p.Used+p.Reserved)/p.TotalLimit
|
|
}
|
|
|
|
// ScarcityMultiplier returns a cost inflation factor based on remaining budget.
|
|
// As resources deplete, the multiplier increases, making the arm more expensive.
|
|
func (p *LimitPool) ScarcityMultiplier() float64 {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
return p.scarcityMultiplierLocked()
|
|
}
|
|
|
|
func (p *LimitPool) scarcityMultiplierLocked() float64 {
|
|
if p.TotalLimit <= 0 {
|
|
return math.Inf(1)
|
|
}
|
|
|
|
f := 1.0 - (p.Used+p.Reserved)/p.TotalLimit
|
|
if f <= 0 {
|
|
return math.Inf(1) // exhausted
|
|
}
|
|
|
|
// Use-it-or-lose-it: if reset is imminent and headroom exists, discount
|
|
hoursToReset := time.Until(p.ResetAt).Hours()
|
|
if !p.ResetAt.IsZero() && hoursToReset > 0 && hoursToReset < 1.0 && f > 0.3 {
|
|
return 0.5
|
|
}
|
|
|
|
k := p.ScarcityK
|
|
if k <= 0 {
|
|
k = 2.0 // gentle default
|
|
}
|
|
return 1.0 / math.Pow(f, k)
|
|
}
|
|
|
|
// Exhausted returns true if the pool has no remaining capacity.
|
|
func (p *LimitPool) Exhausted() bool {
|
|
return p.RemainingFraction() <= 0
|
|
}
|
|
|
|
// CanAfford returns true if the pool can cover the projected consumption.
|
|
func (p *LimitPool) CanAfford(armID ArmID, estimatedTokens int) bool {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
rate := p.ArmRates[armID]
|
|
if rate == 0 {
|
|
return true // no rate defined = no limit
|
|
}
|
|
projected := rate * float64(estimatedTokens) / 1000.0
|
|
available := p.TotalLimit - p.Used - p.Reserved
|
|
return projected <= available
|
|
}
|
|
|
|
// Reservation represents an optimistic resource reservation.
|
|
type Reservation struct {
|
|
pool *LimitPool
|
|
armID ArmID
|
|
projected float64
|
|
committed bool
|
|
}
|
|
|
|
// Reserve creates an optimistic reservation. Call Commit() with actual usage
|
|
// on completion, or Rollback() on failure.
|
|
func (p *LimitPool) Reserve(armID ArmID, estimatedTokens int) (*Reservation, bool) {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
rate := p.ArmRates[armID]
|
|
if rate == 0 {
|
|
return &Reservation{pool: p}, true // no limit
|
|
}
|
|
|
|
projected := rate * float64(estimatedTokens) / 1000.0
|
|
available := p.TotalLimit - p.Used - p.Reserved
|
|
if projected > available {
|
|
return nil, false
|
|
}
|
|
|
|
p.Reserved += projected
|
|
return &Reservation{
|
|
pool: p,
|
|
armID: armID,
|
|
projected: projected,
|
|
}, true
|
|
}
|
|
|
|
// Commit finalizes the reservation with actual consumption.
|
|
func (r *Reservation) Commit(actualTokens int) {
|
|
if r.committed || r.pool == nil {
|
|
return
|
|
}
|
|
r.committed = true
|
|
r.pool.mu.Lock()
|
|
defer r.pool.mu.Unlock()
|
|
|
|
rate := r.pool.ArmRates[r.armID]
|
|
actual := rate * float64(actualTokens) / 1000.0
|
|
|
|
r.pool.Reserved -= r.projected
|
|
r.pool.Used += actual
|
|
}
|
|
|
|
// Rollback releases the reservation without consumption.
|
|
func (r *Reservation) Rollback() {
|
|
if r.committed || r.pool == nil || r.projected == 0 {
|
|
return
|
|
}
|
|
r.committed = true
|
|
r.pool.mu.Lock()
|
|
defer r.pool.mu.Unlock()
|
|
|
|
r.pool.Reserved -= r.projected
|
|
}
|
|
|
|
// CheckReset resets usage if the reset period has elapsed.
|
|
func (p *LimitPool) CheckReset() {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
if !p.ResetAt.IsZero() && time.Now().After(p.ResetAt) {
|
|
p.Used = 0
|
|
p.Reserved = 0
|
|
p.ResetAt = p.ResetAt.Add(p.ResetPeriod)
|
|
}
|
|
}
|