Files
gnoma/internal/router/arm.go
vikingowl cb2d63d06f feat: Ollama/gemma4 compat — /init flow, stream filter, safety fixes
provider/openai:
- Fix doubled tool call args (argsComplete flag): Ollama sends complete
  args in the first streaming chunk then repeats them as delta, causing
  doubled JSON and 400 errors in elfs
- Handle fs: prefix (gemma4 uses fs:grep instead of fs.grep)
- Add Reasoning field support for Ollama thinking output

cmd/gnoma:
- Early TTY detection so logger is created with correct destination
  before any component gets a reference to it (fixes slog WARN bleed
  into TUI textarea)

permission:
- Exempt spawn_elfs and agent tools from safety scanner: elf prompt
  text may legitimately mention .env/.ssh/credentials patterns and
  should not be blocked

tui/app:
- /init retry chain: no-tool-calls → spawn_elfs nudge → write nudge
  (ask for plain text output) → TUI fallback write from streamBuf
- looksLikeAgentsMD + extractMarkdownDoc: validate and clean fallback
  content before writing (reject refusals, strip narrative preambles)
- Collapse thinking output to 3 lines; ctrl+o to expand (live stream
  and committed messages)
- Stream-level filter for model pseudo-tool-call blocks: suppresses
  <<tool_code>>...</tool_code>> and <<function_call>>...<tool_call|>
  from entering streamBuf across chunk boundaries
- sanitizeAssistantText regex covers both block formats
- Reset streamFilterClose at every turn start
2026-04-05 19:24:51 +02:00

83 lines
2.4 KiB
Go

package router
import (
"sync"
"time"
"somegit.dev/Owlibou/gnoma/internal/provider"
)
// ArmID uniquely identifies a model+provider pair.
type ArmID string
// Arm represents a provider+model pair available for routing.
type Arm struct {
ID ArmID
Provider provider.Provider
ModelName string
IsLocal bool
Capabilities provider.Capabilities
Pools []*LimitPool
// Cost per 1k tokens (EUR, estimated)
CostPer1kInput float64
CostPer1kOutput float64
// Live performance metrics, updated after each completed request.
Perf ArmPerf
}
// NewArmID creates an arm ID from provider name and model.
func NewArmID(providerName, model string) ArmID {
return ArmID(providerName + "/" + model)
}
// EstimateCost returns estimated cost in EUR for a task.
func (a *Arm) EstimateCost(estimatedTokens int) float64 {
// Rough estimate: 60% input, 40% output
inputTokens := float64(estimatedTokens) * 0.6
outputTokens := float64(estimatedTokens) * 0.4
return (inputTokens/1000)*a.CostPer1kInput + (outputTokens/1000)*a.CostPer1kOutput
}
// SupportsTools returns true if this arm's model supports function calling.
func (a *Arm) SupportsTools() bool {
return a.Capabilities.ToolUse
}
// perfAlpha is the EMA smoothing factor for ArmPerf updates (0.3 = ~3-sample memory).
const perfAlpha = 0.3
// ArmPerf tracks live performance metrics using an exponential moving average.
// Updated after each completed stream. Safe for concurrent use.
type ArmPerf struct {
mu sync.Mutex
TTFTMs float64 // time to first token, EMA in milliseconds
ToksPerSec float64 // output throughput, EMA in tokens/second
Samples int // total observations recorded
}
// Update records a single observation into the EMA.
// ttft: elapsed time from stream start to first text token.
// outputTokens: tokens generated in this response.
// streamDuration: total time the stream was active (first call to last event).
func (p *ArmPerf) Update(ttft time.Duration, outputTokens int, streamDuration time.Duration) {
p.mu.Lock()
defer p.mu.Unlock()
ttftMs := float64(ttft.Milliseconds())
var tps float64
if streamDuration > 0 {
tps = float64(outputTokens) / streamDuration.Seconds()
}
if p.Samples == 0 {
p.TTFTMs = ttftMs
p.ToksPerSec = tps
} else {
p.TTFTMs = perfAlpha*ttftMs + (1-perfAlpha)*p.TTFTMs
p.ToksPerSec = perfAlpha*tps + (1-perfAlpha)*p.ToksPerSec
}
p.Samples++
}