feat: Ollama/gemma4 compat — /init flow, stream filter, safety fixes
provider/openai: - Fix doubled tool call args (argsComplete flag): Ollama sends complete args in the first streaming chunk then repeats them as delta, causing doubled JSON and 400 errors in elfs - Handle fs: prefix (gemma4 uses fs:grep instead of fs.grep) - Add Reasoning field support for Ollama thinking output cmd/gnoma: - Early TTY detection so logger is created with correct destination before any component gets a reference to it (fixes slog WARN bleed into TUI textarea) permission: - Exempt spawn_elfs and agent tools from safety scanner: elf prompt text may legitimately mention .env/.ssh/credentials patterns and should not be blocked tui/app: - /init retry chain: no-tool-calls → spawn_elfs nudge → write nudge (ask for plain text output) → TUI fallback write from streamBuf - looksLikeAgentsMD + extractMarkdownDoc: validate and clean fallback content before writing (reject refusals, strip narrative preambles) - Collapse thinking output to 3 lines; ctrl+o to expand (live stream and committed messages) - Stream-level filter for model pseudo-tool-call blocks: suppresses <<tool_code>>...</tool_code>> and <<function_call>>...<tool_call|> from entering streamBuf across chunk boundaries - sanitizeAssistantText regex covers both block formats - Reset streamFilterClose at every turn start
This commit is contained in:
57
internal/provider/limiter.go
Normal file
57
internal/provider/limiter.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package provider
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"somegit.dev/Owlibou/gnoma/internal/stream"
|
||||
)
|
||||
|
||||
// ConcurrentProvider wraps a Provider with a shared semaphore that limits the
|
||||
// number of in-flight Stream calls. All engines sharing the same
|
||||
// ConcurrentProvider instance share the same concurrency budget.
|
||||
type ConcurrentProvider struct {
|
||||
Provider
|
||||
sem chan struct{}
|
||||
}
|
||||
|
||||
// WithConcurrency wraps p so that at most max Stream calls can be in-flight
|
||||
// simultaneously. If max <= 0, p is returned unwrapped.
|
||||
func WithConcurrency(p Provider, max int) Provider {
|
||||
if max <= 0 {
|
||||
return p
|
||||
}
|
||||
sem := make(chan struct{}, max)
|
||||
for range max {
|
||||
sem <- struct{}{}
|
||||
}
|
||||
return &ConcurrentProvider{Provider: p, sem: sem}
|
||||
}
|
||||
|
||||
// Stream acquires a concurrency slot, calls the inner provider, and returns a
|
||||
// stream that releases the slot when Close is called.
|
||||
func (cp *ConcurrentProvider) Stream(ctx context.Context, req Request) (stream.Stream, error) {
|
||||
select {
|
||||
case <-cp.sem:
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
s, err := cp.Provider.Stream(ctx, req)
|
||||
if err != nil {
|
||||
cp.sem <- struct{}{}
|
||||
return nil, err
|
||||
}
|
||||
return &semStream{Stream: s, release: func() { cp.sem <- struct{}{} }}, nil
|
||||
}
|
||||
|
||||
// semStream wraps a stream.Stream to release a semaphore slot on Close.
|
||||
type semStream struct {
|
||||
stream.Stream
|
||||
release func()
|
||||
once sync.Once
|
||||
}
|
||||
|
||||
func (s *semStream) Close() error {
|
||||
s.once.Do(s.release)
|
||||
return s.Stream.Close()
|
||||
}
|
||||
@@ -15,13 +15,20 @@ const defaultModel = "gpt-4o"
|
||||
|
||||
// Provider implements provider.Provider for the OpenAI API.
|
||||
type Provider struct {
|
||||
client *oai.Client
|
||||
name string
|
||||
model string
|
||||
client *oai.Client
|
||||
name string
|
||||
model string
|
||||
streamOpts []option.RequestOption // injected per-request (e.g. think:false for Ollama)
|
||||
}
|
||||
|
||||
// New creates an OpenAI provider from config.
|
||||
func New(cfg provider.ProviderConfig) (provider.Provider, error) {
|
||||
return NewWithStreamOptions(cfg, nil)
|
||||
}
|
||||
|
||||
// NewWithStreamOptions creates an OpenAI provider with extra per-request stream options.
|
||||
// Use this for Ollama/llama.cpp adapters that need non-standard body fields.
|
||||
func NewWithStreamOptions(cfg provider.ProviderConfig, streamOpts []option.RequestOption) (provider.Provider, error) {
|
||||
if cfg.APIKey == "" {
|
||||
return nil, fmt.Errorf("openai: api key required")
|
||||
}
|
||||
@@ -41,9 +48,10 @@ func New(cfg provider.ProviderConfig) (provider.Provider, error) {
|
||||
}
|
||||
|
||||
return &Provider{
|
||||
client: &client,
|
||||
name: "openai",
|
||||
model: model,
|
||||
client: &client,
|
||||
name: "openai",
|
||||
model: model,
|
||||
streamOpts: streamOpts,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -57,7 +65,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request) (stream.Str
|
||||
params := translateRequest(req)
|
||||
params.Model = model
|
||||
|
||||
raw := p.client.Chat.Completions.NewStreaming(ctx, params)
|
||||
raw := p.client.Chat.Completions.NewStreaming(ctx, params, p.streamOpts...)
|
||||
|
||||
return newOpenAIStream(raw), nil
|
||||
}
|
||||
|
||||
@@ -25,9 +25,10 @@ type openaiStream struct {
|
||||
}
|
||||
|
||||
type toolCallState struct {
|
||||
id string
|
||||
name string
|
||||
args string
|
||||
id string
|
||||
name string
|
||||
args string
|
||||
argsComplete bool // true when args arrived in the initial chunk; skip subsequent deltas
|
||||
}
|
||||
|
||||
func newOpenAIStream(raw *ssestream.Stream[oai.ChatCompletionChunk]) *openaiStream {
|
||||
@@ -74,9 +75,10 @@ func (s *openaiStream) Next() bool {
|
||||
if !ok {
|
||||
// New tool call — capture initial arguments too
|
||||
existing = &toolCallState{
|
||||
id: tc.ID,
|
||||
name: tc.Function.Name,
|
||||
args: tc.Function.Arguments,
|
||||
id: tc.ID,
|
||||
name: tc.Function.Name,
|
||||
args: tc.Function.Arguments,
|
||||
argsComplete: tc.Function.Arguments != "",
|
||||
}
|
||||
s.toolCalls[tc.Index] = existing
|
||||
s.hadToolCalls = true
|
||||
@@ -91,8 +93,11 @@ func (s *openaiStream) Next() bool {
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate arguments (subsequent chunks)
|
||||
if tc.Function.Arguments != "" && ok {
|
||||
// Accumulate arguments (subsequent chunks).
|
||||
// Skip if args were already provided in the initial chunk — some providers
|
||||
// (e.g. Ollama) send complete args in the name chunk and then repeat them
|
||||
// as a delta, which would cause doubled JSON and unmarshal failures.
|
||||
if tc.Function.Arguments != "" && ok && !existing.argsComplete {
|
||||
existing.args += tc.Function.Arguments
|
||||
s.cur = stream.Event{
|
||||
Type: stream.EventToolCallDelta,
|
||||
@@ -113,6 +118,29 @@ func (s *openaiStream) Next() bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Ollama thinking content — non-standard "thinking" or "reasoning" field on the delta.
|
||||
// Ollama uses "reasoning"; some other servers use "thinking".
|
||||
// The openai-go struct drops unknown fields, so we read the raw JSON directly.
|
||||
if raw := delta.RawJSON(); raw != "" {
|
||||
var extra struct {
|
||||
Thinking string `json:"thinking"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
}
|
||||
if json.Unmarshal([]byte(raw), &extra) == nil {
|
||||
text := extra.Thinking
|
||||
if text == "" {
|
||||
text = extra.Reasoning
|
||||
}
|
||||
if text != "" {
|
||||
s.cur = stream.Event{
|
||||
Type: stream.EventThinkingDelta,
|
||||
Text: text,
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stream ended — flush tool call Done events, then emit stop
|
||||
|
||||
@@ -20,6 +20,10 @@ func unsanitizeToolName(name string) string {
|
||||
if strings.HasPrefix(name, "fs_") {
|
||||
return "fs." + name[3:]
|
||||
}
|
||||
// Some models (e.g. gemma4 via Ollama) use "fs:grep" instead of "fs_grep"
|
||||
if strings.HasPrefix(name, "fs:") {
|
||||
return "fs." + name[3:]
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
@@ -127,6 +131,12 @@ func translateRequest(req provider.Request) oai.ChatCompletionNewParams {
|
||||
IncludeUsage: param.NewOpt(true),
|
||||
}
|
||||
|
||||
if req.ToolChoice != "" && len(params.Tools) > 0 {
|
||||
params.ToolChoice = oai.ChatCompletionToolChoiceOptionUnionParam{
|
||||
OfAuto: param.NewOpt(string(req.ToolChoice)),
|
||||
}
|
||||
}
|
||||
|
||||
return params
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,15 @@ import (
|
||||
"somegit.dev/Owlibou/gnoma/internal/stream"
|
||||
)
|
||||
|
||||
// ToolChoiceMode controls how the model selects tools.
|
||||
type ToolChoiceMode string
|
||||
|
||||
const (
|
||||
ToolChoiceAuto ToolChoiceMode = "auto"
|
||||
ToolChoiceRequired ToolChoiceMode = "required"
|
||||
ToolChoiceNone ToolChoiceMode = "none"
|
||||
)
|
||||
|
||||
// Request encapsulates everything needed for a single LLM API call.
|
||||
type Request struct {
|
||||
Model string
|
||||
@@ -21,6 +30,7 @@ type Request struct {
|
||||
StopSequences []string
|
||||
Thinking *ThinkingConfig
|
||||
ResponseFormat *ResponseFormat
|
||||
ToolChoice ToolChoiceMode // "" = provider default (auto)
|
||||
}
|
||||
|
||||
// ToolDefinition is the provider-agnostic tool schema.
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package provider
|
||||
|
||||
import "math"
|
||||
|
||||
// RateLimits describes the rate limits for a provider+model pair.
|
||||
// Zero values mean "no limit" or "unknown".
|
||||
type RateLimits struct {
|
||||
@@ -13,6 +15,31 @@ type RateLimits struct {
|
||||
SpendCap float64 // monthly spend cap in provider currency
|
||||
}
|
||||
|
||||
// MaxConcurrent returns the maximum number of concurrent in-flight requests
|
||||
// that this rate limit allows. Returns 0 when there is no meaningful concurrency
|
||||
// constraint (provider has high or unknown limits).
|
||||
func (rl RateLimits) MaxConcurrent() int {
|
||||
if rl.RPS > 0 {
|
||||
n := int(math.Ceil(rl.RPS))
|
||||
if n < 1 {
|
||||
n = 1
|
||||
}
|
||||
return n
|
||||
}
|
||||
if rl.RPM > 0 {
|
||||
// Allow 1 concurrent slot per 30 RPM (conservative heuristic).
|
||||
n := rl.RPM / 30
|
||||
if n < 1 {
|
||||
n = 1
|
||||
}
|
||||
if n > 16 {
|
||||
n = 16
|
||||
}
|
||||
return n
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ProviderDefaults holds default rate limits keyed by model glob.
|
||||
// The special key "*" matches any model not explicitly listed.
|
||||
type ProviderDefaults struct {
|
||||
|
||||
Reference in New Issue
Block a user