feat: auto-discover local models from ollama + llama.cpp

At startup, polls ollama (/api/tags) and llama.cpp (/v1/models) for
available models. Registers each as an arm in the router alongside
the CLI-specified provider.

Discovered: 7 ollama models + 1 llama.cpp model = 9 total arms.
Router can now select from multiple local models based on task type.
Discovery is non-blocking — failures logged and skipped.
This commit is contained in:
2026-04-03 17:53:11 +02:00
parent 86aabd4946
commit 76916846aa
2 changed files with 189 additions and 0 deletions
+16
View File
@@ -157,6 +157,22 @@ func main() {
})
rtr.ForceArm(armID)
// Discover local models (ollama + llama.cpp) and register as additional arms
localModels := router.DiscoverLocalModels(context.Background(), logger,
cfg.Provider.Endpoints["ollama"],
cfg.Provider.Endpoints["llamacpp"],
)
router.RegisterDiscoveredModels(rtr, localModels, func(provName, model string) provider.Provider {
p, err := createProvider(provName, "", model, cfg.Provider.Endpoints[provName])
if err != nil {
return nil
}
return p
})
if len(localModels) > 0 {
logger.Debug("local models discovered", "count", len(localModels))
}
// Create firewall
fw := security.NewFirewall(security.FirewallConfig{
ScanOutgoing: true,
+173
View File
@@ -0,0 +1,173 @@
package router
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"net/http"
"time"
"somegit.dev/Owlibou/gnoma/internal/provider"
)
const discoveryTimeout = 5 * time.Second
// DiscoveredModel represents a model found via discovery.
type DiscoveredModel struct {
ID string
Name string
Provider string // "ollama" or "llamacpp"
Size int64 // bytes, if available
}
// DiscoverOllama polls the local Ollama instance for available models.
func DiscoverOllama(ctx context.Context, baseURL string) ([]DiscoveredModel, error) {
if baseURL == "" {
baseURL = "http://localhost:11434"
}
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/api/tags", nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("ollama not reachable at %s: %w", baseURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("ollama returned %d", resp.StatusCode)
}
var result struct {
Models []struct {
Name string `json:"name"`
Size int64 `json:"size"`
Details struct {
Family string `json:"family"`
ParameterSize string `json:"parameter_size"`
} `json:"details"`
} `json:"models"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("ollama response parse: %w", err)
}
var models []DiscoveredModel
for _, m := range result.Models {
models = append(models, DiscoveredModel{
ID: m.Name,
Name: m.Name,
Provider: "ollama",
Size: m.Size,
})
}
return models, nil
}
// DiscoverLlamaCpp polls a local llama.cpp server for available models.
func DiscoverLlamaCpp(ctx context.Context, baseURL string) ([]DiscoveredModel, error) {
if baseURL == "" {
baseURL = "http://localhost:8080"
}
ctx, cancel := context.WithTimeout(ctx, discoveryTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/v1/models", nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("llama.cpp not reachable at %s: %w", baseURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("llama.cpp returned %d", resp.StatusCode)
}
var result struct {
Data []struct {
ID string `json:"id"`
} `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("llama.cpp response parse: %w", err)
}
var models []DiscoveredModel
for _, m := range result.Data {
models = append(models, DiscoveredModel{
ID: m.ID,
Name: m.ID,
Provider: "llamacpp",
})
}
return models, nil
}
// DiscoverLocalModels discovers all available local models (ollama + llama.cpp).
// Non-blocking: failures are logged and skipped.
func DiscoverLocalModels(ctx context.Context, logger *slog.Logger, ollamaURL, llamacppURL string) []DiscoveredModel {
var all []DiscoveredModel
if models, err := DiscoverOllama(ctx, ollamaURL); err != nil {
logger.Debug("ollama discovery failed (non-fatal)", "error", err)
} else {
logger.Debug("discovered ollama models", "count", len(models))
all = append(all, models...)
}
if models, err := DiscoverLlamaCpp(ctx, llamacppURL); err != nil {
logger.Debug("llamacpp discovery failed (non-fatal)", "error", err)
} else {
logger.Debug("discovered llamacpp models", "count", len(models))
all = append(all, models...)
}
return all
}
// RegisterDiscoveredModels registers discovered local models as arms in the router.
func RegisterDiscoveredModels(r *Router, models []DiscoveredModel, providerFactory func(name, model string) provider.Provider) {
for _, m := range models {
armID := NewArmID(m.Provider, m.ID)
// Skip if already registered
exists := false
for _, arm := range r.Arms() {
if arm.ID == armID {
exists = true
break
}
}
if exists {
continue
}
prov := providerFactory(m.Provider, m.ID)
if prov == nil {
continue
}
r.RegisterArm(&Arm{
ID: armID,
Provider: prov,
ModelName: m.ID,
IsLocal: true,
Capabilities: provider.Capabilities{
ToolUse: true, // assume tool support, will fail gracefully if not
ContextWindow: 32768,
},
})
}
}