Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
used API format (fs_ls) — now re-sanitized in translateMessage
Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
(Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
71 lines
1.9 KiB
Go
71 lines
1.9 KiB
Go
package engine
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"somegit.dev/Owlibou/gnoma/internal/message"
|
|
)
|
|
|
|
// compactPreviousToolResults replaces the content of tool results from
|
|
// already-processed rounds with a short size marker. The most recent tool
|
|
// results (after the last assistant message) are kept intact because the
|
|
// model hasn't responded to them yet.
|
|
//
|
|
// This dramatically reduces context usage in multi-round agentic loops,
|
|
// which is critical for local models with small context windows.
|
|
func compactPreviousToolResults(msgs []message.Message) []message.Message {
|
|
// Find the last assistant message — tool results before it have been
|
|
// processed; those after it are pending.
|
|
lastAssistant := -1
|
|
for i := len(msgs) - 1; i >= 0; i-- {
|
|
if msgs[i].Role == message.RoleAssistant {
|
|
lastAssistant = i
|
|
break
|
|
}
|
|
}
|
|
if lastAssistant <= 0 {
|
|
return msgs
|
|
}
|
|
|
|
out := make([]message.Message, len(msgs))
|
|
copy(out, msgs)
|
|
for i := range out {
|
|
if i >= lastAssistant {
|
|
break
|
|
}
|
|
if isToolResultMessage(out[i]) {
|
|
out[i] = compactToolResultMessage(out[i])
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func isToolResultMessage(m message.Message) bool {
|
|
return m.Role == message.RoleUser &&
|
|
len(m.Content) > 0 &&
|
|
m.Content[0].Type == message.ContentToolResult
|
|
}
|
|
|
|
func compactToolResultMessage(m message.Message) message.Message {
|
|
compacted := message.Message{
|
|
Role: m.Role,
|
|
Content: make([]message.Content, len(m.Content)),
|
|
}
|
|
for i, c := range m.Content {
|
|
if c.Type == message.ContentToolResult && c.ToolResult != nil {
|
|
summary := fmt.Sprintf("[prior result: %d chars]", len(c.ToolResult.Content))
|
|
compacted.Content[i] = message.Content{
|
|
Type: message.ContentToolResult,
|
|
ToolResult: &message.ToolResult{
|
|
ToolCallID: c.ToolResult.ToolCallID,
|
|
Content: summary,
|
|
IsError: c.ToolResult.IsError,
|
|
},
|
|
}
|
|
} else {
|
|
compacted.Content[i] = c
|
|
}
|
|
}
|
|
return compacted
|
|
}
|