Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
used API format (fs_ls) — now re-sanitized in translateMessage
Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
(Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
98 lines
2.4 KiB
Go
98 lines
2.4 KiB
Go
package router
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"log/slog"
|
|
"net/http"
|
|
"slices"
|
|
)
|
|
|
|
// probeLlamaCppToolSupport queries the llama.cpp /props endpoint to determine
|
|
// if the loaded model supports tool calling. Returns false on any error
|
|
// (conservative: unknown = no tools).
|
|
func probeLlamaCppToolSupport(ctx context.Context, baseURL string) bool {
|
|
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/props", nil)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != 200 {
|
|
return false
|
|
}
|
|
|
|
var result struct {
|
|
ChatTemplateCaps struct {
|
|
SupportsTools bool `json:"supports_tools"`
|
|
SupportsToolCalls bool `json:"supports_tool_calls"`
|
|
} `json:"chat_template_caps"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
slog.Debug("llamacpp /props decode failed", "error", err)
|
|
return false
|
|
}
|
|
|
|
caps := result.ChatTemplateCaps
|
|
supported := caps.SupportsTools && caps.SupportsToolCalls
|
|
slog.Debug("llamacpp tool probe",
|
|
"supports_tools", caps.SupportsTools,
|
|
"supports_tool_calls", caps.SupportsToolCalls,
|
|
"result", supported,
|
|
)
|
|
return supported
|
|
}
|
|
|
|
// probeOllamaToolSupport queries Ollama's /api/show endpoint to determine
|
|
// if a specific model supports tool calling. Returns false on any error.
|
|
func probeOllamaToolSupport(ctx context.Context, baseURL, modelName string) bool {
|
|
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
|
|
defer cancel()
|
|
|
|
body, err := json.Marshal(map[string]string{"model": modelName})
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", baseURL+"/api/show", bytes.NewReader(body))
|
|
if err != nil {
|
|
return false
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != 200 {
|
|
return false
|
|
}
|
|
|
|
var result struct {
|
|
Capabilities []string `json:"capabilities"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
slog.Debug("ollama /api/show decode failed", "model", modelName, "error", err)
|
|
return false
|
|
}
|
|
|
|
supported := slices.Contains(result.Capabilities, "tools")
|
|
slog.Debug("ollama tool probe",
|
|
"model", modelName,
|
|
"capabilities", result.Capabilities,
|
|
"supports_tools", supported,
|
|
)
|
|
return supported
|
|
}
|