Files
gnoma/internal/engine/compact.go
vikingowl 3873f90f83 feat: local model reliability — SDK retries, capability probing, init skill, context compaction
Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
  subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
  activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
  used API format (fs_ls) — now re-sanitized in translateMessage

Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
  (Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
  vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
  replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
2026-04-13 02:01:01 +02:00

71 lines
1.9 KiB
Go

package engine
import (
"fmt"
"somegit.dev/Owlibou/gnoma/internal/message"
)
// compactPreviousToolResults replaces the content of tool results from
// already-processed rounds with a short size marker. The most recent tool
// results (after the last assistant message) are kept intact because the
// model hasn't responded to them yet.
//
// This dramatically reduces context usage in multi-round agentic loops,
// which is critical for local models with small context windows.
func compactPreviousToolResults(msgs []message.Message) []message.Message {
// Find the last assistant message — tool results before it have been
// processed; those after it are pending.
lastAssistant := -1
for i := len(msgs) - 1; i >= 0; i-- {
if msgs[i].Role == message.RoleAssistant {
lastAssistant = i
break
}
}
if lastAssistant <= 0 {
return msgs
}
out := make([]message.Message, len(msgs))
copy(out, msgs)
for i := range out {
if i >= lastAssistant {
break
}
if isToolResultMessage(out[i]) {
out[i] = compactToolResultMessage(out[i])
}
}
return out
}
func isToolResultMessage(m message.Message) bool {
return m.Role == message.RoleUser &&
len(m.Content) > 0 &&
m.Content[0].Type == message.ContentToolResult
}
func compactToolResultMessage(m message.Message) message.Message {
compacted := message.Message{
Role: m.Role,
Content: make([]message.Content, len(m.Content)),
}
for i, c := range m.Content {
if c.Type == message.ContentToolResult && c.ToolResult != nil {
summary := fmt.Sprintf("[prior result: %d chars]", len(c.ToolResult.Content))
compacted.Content[i] = message.Content{
Type: message.ContentToolResult,
ToolResult: &message.ToolResult{
ToolCallID: c.ToolResult.ToolCallID,
Content: summary,
IsError: c.ToolResult.IsError,
},
}
} else {
compacted.Content[i] = c
}
}
return compacted
}