Files
gnoma/internal/router/probe.go
vikingowl 3873f90f83 feat: local model reliability — SDK retries, capability probing, init skill, context compaction
Three compounding bugs prevented tool calling with llama.cpp:
- Stream parser set argsComplete on partial JSON (e.g. "{"), dropping
  subsequent argument deltas — fix: use json.Valid to detect completeness
- Missing tool_choice default — llama.cpp needs explicit "auto" to
  activate its GBNF grammar constraint; now set when tools are present
- Tool names in history used internal format (fs.ls) while definitions
  used API format (fs_ls) — now re-sanitized in translateMessage

Additional changes:
- Disable SDK retries for local providers (500s are deterministic)
- Dynamic capability probing via /props (llama.cpp) and /api/show
  (Ollama), replacing hardcoded model prefix list
- Engine respects forced arm ToolUse capability when router is active
- Bundled /init skill with Go template blocks, context-aware for local
  vs cloud models, deduplication rules against CLAUDE.md
- Tool result compaction for local models — previous round results
  replaced with size markers to stay within small context windows
- Text-only fallback when tool-parse errors occur on local models
- "text-only" TUI indicator when model lacks tool support
- Session ResetError for retry after stream failures
- AllowedTools per-turn filtering in engine buildRequest
2026-04-13 02:01:01 +02:00

98 lines
2.4 KiB
Go

package router
import (
"bytes"
"context"
"encoding/json"
"log/slog"
"net/http"
"slices"
)
// probeLlamaCppToolSupport queries the llama.cpp /props endpoint to determine
// if the loaded model supports tool calling. Returns false on any error
// (conservative: unknown = no tools).
func probeLlamaCppToolSupport(ctx context.Context, baseURL string) bool {
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/props", nil)
if err != nil {
return false
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return false
}
var result struct {
ChatTemplateCaps struct {
SupportsTools bool `json:"supports_tools"`
SupportsToolCalls bool `json:"supports_tool_calls"`
} `json:"chat_template_caps"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
slog.Debug("llamacpp /props decode failed", "error", err)
return false
}
caps := result.ChatTemplateCaps
supported := caps.SupportsTools && caps.SupportsToolCalls
slog.Debug("llamacpp tool probe",
"supports_tools", caps.SupportsTools,
"supports_tool_calls", caps.SupportsToolCalls,
"result", supported,
)
return supported
}
// probeOllamaToolSupport queries Ollama's /api/show endpoint to determine
// if a specific model supports tool calling. Returns false on any error.
func probeOllamaToolSupport(ctx context.Context, baseURL, modelName string) bool {
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
defer cancel()
body, err := json.Marshal(map[string]string{"model": modelName})
if err != nil {
return false
}
req, err := http.NewRequestWithContext(ctx, "POST", baseURL+"/api/show", bytes.NewReader(body))
if err != nil {
return false
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return false
}
var result struct {
Capabilities []string `json:"capabilities"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
slog.Debug("ollama /api/show decode failed", "model", modelName, "error", err)
return false
}
supported := slices.Contains(result.Capabilities, "tools")
slog.Debug("ollama tool probe",
"model", modelName,
"capabilities", result.Capabilities,
"supports_tools", supported,
)
return supported
}