From 14b88cadcc724c6118d6858f8b7d6899b164bf07 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Sat, 4 Apr 2026 20:46:50 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20M1-M7=20gap=20audit=20phase=203=20?= =?UTF-8?q?=E2=80=94=20context=20prefix,=20deferred=20tools,=20compact=20h?= =?UTF-8?q?ooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gap 11 (M6): Fixed context prefix - Window.PrefixMessages stores immutable docs (CLAUDE.md, .gnoma/GNOMA.md) - Prefix stripped before compaction, prepended after — survives all compaction - AllMessages() returns prefix + history for provider requests - main.go loads CLAUDE.md and .gnoma/GNOMA.md at startup as prefix Gap 12 (M6): Deferred tool loading - DeferrableTool optional interface: ShouldDefer() bool - buildRequest() skips deferred tools until activated - Tools auto-activate on first model request (activatedTools map) - agent + spawn_elfs marked as deferrable (large schemas, rarely needed early) - Saves ~800 tokens per deferred tool per request Gap 13 (M6): Pre/post compact hooks - OnPreCompact/OnPostCompact callbacks in WindowConfig - Called in doCompact() (shared by CompactIfNeeded + ForceCompact) - M8 hooks system will extend these to full protocol --- cmd/gnoma/main.go | 22 +++++- internal/context/window.go | 127 ++++++++++++++++++++--------------- internal/engine/engine.go | 9 ++- internal/engine/loop.go | 15 ++++- internal/tool/agent/agent.go | 1 + internal/tool/agent/batch.go | 1 + internal/tool/tool.go | 7 ++ 7 files changed, 121 insertions(+), 61 deletions(-) diff --git a/cmd/gnoma/main.go b/cmd/gnoma/main.go index 9a73689..d13b619 100644 --- a/cmd/gnoma/main.go +++ b/cmd/gnoma/main.go @@ -15,6 +15,7 @@ import ( "encoding/json" gnomacfg "somegit.dev/Owlibou/gnoma/internal/config" gnomactx "somegit.dev/Owlibou/gnoma/internal/context" + "somegit.dev/Owlibou/gnoma/internal/message" "somegit.dev/Owlibou/gnoma/internal/permission" "somegit.dev/Owlibou/gnoma/internal/provider" "somegit.dev/Owlibou/gnoma/internal/router" @@ -270,13 +271,28 @@ func main() { systemPrompt = systemPrompt + "\n\n" + summary } + // Load project docs as immutable context prefix + var prefixMsgs []message.Message + for _, name := range []string{"CLAUDE.md", ".gnoma/GNOMA.md"} { + data, err := os.ReadFile(name) + if err != nil { + continue + } + prefixMsgs = append(prefixMsgs, + message.NewUserText(fmt.Sprintf("[Project docs: %s]\n\n%s", name, string(data))), + message.NewAssistantText("I've read the project documentation and will follow these guidelines."), + ) + logger.Debug("loaded project docs as context prefix", "file", name, "size", len(data)) + } + // Create context window with summarize strategy (falls back to truncation) var compactStrategy gnomactx.Strategy compactStrategy = gnomactx.NewSummarizeStrategy(prov) ctxWindow := gnomactx.NewWindow(gnomactx.WindowConfig{ - MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x - Strategy: compactStrategy, - Logger: logger, + MaxTokens: cfg.Provider.MaxTokens * 20, // rough: max_tokens is per-turn, context window ~20x + Strategy: compactStrategy, + PrefixMessages: prefixMsgs, + Logger: logger, }) // Create engine diff --git a/internal/context/window.go b/internal/context/window.go index 05f7142..b5d704c 100644 --- a/internal/context/window.go +++ b/internal/context/window.go @@ -18,18 +18,26 @@ type Strategy interface { type Window struct { tracker *Tracker strategy Strategy - messages []message.Message + prefix []message.Message // immutable prefix (project docs), never compacted + messages []message.Message // mutable conversation history logger *slog.Logger + // Compact hooks + onPreCompact func([]message.Message) + onPostCompact func([]message.Message) + // Circuit breaker: stop retrying after consecutive failures consecutiveFailures int maxFailures int } type WindowConfig struct { - MaxTokens int64 - Strategy Strategy - Logger *slog.Logger + MaxTokens int64 + Strategy Strategy + PrefixMessages []message.Message // immutable prefix, survives compaction + OnPreCompact func([]message.Message) + OnPostCompact func([]message.Message) + Logger *slog.Logger } func NewWindow(cfg WindowConfig) *Window { @@ -38,11 +46,14 @@ func NewWindow(cfg WindowConfig) *Window { logger = slog.Default() } return &Window{ - tracker: NewTracker(cfg.MaxTokens), - strategy: cfg.Strategy, - messages: nil, - logger: logger, - maxFailures: 3, + tracker: NewTracker(cfg.MaxTokens), + strategy: cfg.Strategy, + prefix: cfg.PrefixMessages, + messages: nil, + logger: logger, + onPreCompact: cfg.OnPreCompact, + onPostCompact: cfg.OnPostCompact, + maxFailures: 3, } } @@ -52,12 +63,23 @@ func (w *Window) Append(msg message.Message, usage message.Usage) { w.tracker.Add(usage) } -// Messages returns the current message history. +// Messages returns the mutable conversation history (without prefix). func (w *Window) Messages() []message.Message { return w.messages } -// SetMessages replaces the message history (used after compaction). +// AllMessages returns prefix + mutable history. Use this for building provider requests. +func (w *Window) AllMessages() []message.Message { + if len(w.prefix) == 0 { + return w.messages + } + all := make([]message.Message, 0, len(w.prefix)+len(w.messages)) + all = append(all, w.prefix...) + all = append(all, w.messages...) + return all +} + +// SetMessages replaces the mutable message history (used after compaction). func (w *Window) SetMessages(msgs []message.Message) { w.messages = msgs } @@ -73,13 +95,25 @@ func (w *Window) CompactIfNeeded() (bool, error) { if !w.tracker.ShouldCompact() { return false, nil } + return w.doCompact(false) +} +// ForceCompact runs compaction regardless of the token threshold. +// Used for reactive compaction (e.g., after a 413 response). +func (w *Window) ForceCompact() (bool, error) { + if len(w.messages) <= 2 { + return false, nil + } + return w.doCompact(true) +} + +func (w *Window) doCompact(force bool) (bool, error) { if w.strategy == nil { return false, fmt.Errorf("no compaction strategy configured") } - // Circuit breaker - if w.consecutiveFailures >= w.maxFailures { + // Circuit breaker (skip for forced) + if !force && w.consecutiveFailures >= w.maxFailures { w.logger.Warn("compaction circuit breaker open", "failures", w.consecutiveFailures, "max", w.maxFailures, @@ -87,18 +121,33 @@ func (w *Window) CompactIfNeeded() (bool, error) { return false, nil } - budget := w.tracker.Remaining() + w.tracker.Used()/2 // target: half of current usage - if budget < 0 { + var budget int64 + if force { budget = w.tracker.MaxTokens() / 2 + } else { + budget = w.tracker.Remaining() + w.tracker.Used()/2 + if budget < 0 { + budget = w.tracker.MaxTokens() / 2 + } } - w.logger.Info("compacting context", + label := "compacting" + if force { + label = "forced compacting" + } + w.logger.Info(label+" context", "messages", len(w.messages), + "prefix", len(w.prefix), "used", w.tracker.Used(), "budget", budget, - "strategy", fmt.Sprintf("%T", w.strategy), ) + // Pre-compact hook + if w.onPreCompact != nil { + w.onPreCompact(w.messages) + } + + // Compact only mutable messages — prefix is preserved separately compacted, err := w.strategy.Compact(w.messages, budget) if err != nil { w.consecutiveFailures++ @@ -113,7 +162,6 @@ func (w *Window) CompactIfNeeded() (bool, error) { originalLen := len(w.messages) w.messages = compacted - // Rough estimate: reduce tracked tokens proportionally ratio := float64(len(compacted)) / float64(originalLen+1) w.tracker.Set(int64(float64(w.tracker.Used()) * ratio)) @@ -123,46 +171,15 @@ func (w *Window) CompactIfNeeded() (bool, error) { "tokens_after", w.tracker.Used(), ) + // Post-compact hook + if w.onPostCompact != nil { + w.onPostCompact(compacted) + } + return true, nil } -// ForceCompact runs compaction regardless of the token threshold. -// Used for reactive compaction (e.g., after a 413 response). -func (w *Window) ForceCompact() (bool, error) { - if w.strategy == nil { - return false, fmt.Errorf("no compaction strategy configured") - } - if len(w.messages) <= 2 { - return false, nil // nothing to compact - } - - budget := w.tracker.MaxTokens() / 2 - - w.logger.Info("forced compaction", - "messages", len(w.messages), - "used", w.tracker.Used(), - "budget", budget, - ) - - compacted, err := w.strategy.Compact(w.messages, budget) - if err != nil { - return false, err - } - - originalLen := len(w.messages) - w.messages = compacted - ratio := float64(len(compacted)) / float64(originalLen+1) - w.tracker.Set(int64(float64(w.tracker.Used()) * ratio)) - - w.logger.Info("forced compaction complete", - "messages_before", originalLen, - "messages_after", len(compacted), - "tokens_after", w.tracker.Used(), - ) - return true, nil -} - -// Reset clears all messages and usage. +// Reset clears all messages and usage (prefix is preserved). func (w *Window) Reset() { w.messages = nil w.tracker.Reset() diff --git a/internal/engine/engine.go b/internal/engine/engine.go index 4b85cf8..1f52ecc 100644 --- a/internal/engine/engine.go +++ b/internal/engine/engine.go @@ -55,6 +55,10 @@ type Engine struct { // Cached model capabilities, resolved lazily modelCaps *provider.Capabilities modelCapsFor string // model ID the cached caps are for + + // Deferred tool loading: tools with ShouldDefer() are excluded until + // the model requests them. Activated on first use. + activatedTools map[string]bool } // New creates an engine. @@ -67,8 +71,9 @@ func New(cfg Config) (*Engine, error) { logger = slog.Default() } return &Engine{ - cfg: cfg, - logger: logger, + cfg: cfg, + logger: logger, + activatedTools: make(map[string]bool), }, nil } diff --git a/internal/engine/loop.go b/internal/engine/loop.go index 583cc91..30da198 100644 --- a/internal/engine/loop.go +++ b/internal/engine/loop.go @@ -190,8 +190,11 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) { } func (e *Engine) buildRequest(ctx context.Context) provider.Request { - // Scan messages through firewall if configured + // Use AllMessages (prefix + history) if context window manages prefix docs messages := e.history + if e.cfg.Context != nil { + messages = e.cfg.Context.AllMessages() + } systemPrompt := e.cfg.System if e.cfg.Firewall != nil { messages = e.cfg.Firewall.ScanOutgoingMessages(messages) @@ -209,6 +212,10 @@ func (e *Engine) buildRequest(ctx context.Context) provider.Request { if caps == nil || caps.ToolUse { // nil caps = unknown model, include tools optimistically for _, t := range e.cfg.Tools.All() { + // Skip deferred tools until the model requests them + if dt, ok := t.(tool.DeferrableTool); ok && dt.ShouldDefer() && !e.activatedTools[t.Name()] { + continue + } req.Tools = append(req.Tools, provider.ToolDefinition{ Name: t.Name(), Description: t.Description(), @@ -237,6 +244,12 @@ func (e *Engine) executeTools(ctx context.Context, calls []message.ToolCall, cb for _, call := range calls { t, ok := e.cfg.Tools.Get(call.Name) + if ok { + // Activate deferred tools on first use + if dt, isDeferrable := t.(tool.DeferrableTool); isDeferrable && dt.ShouldDefer() { + e.activatedTools[call.Name] = true + } + } if !ok { e.logger.Warn("unknown tool", "name", call.Name) unknownResults = append(unknownResults, message.ToolResult{ diff --git a/internal/tool/agent/agent.go b/internal/tool/agent/agent.go index c9e0756..b8e61b9 100644 --- a/internal/tool/agent/agent.go +++ b/internal/tool/agent/agent.go @@ -53,6 +53,7 @@ func (t *Tool) Description() string { return "Spawn a sub-agent (elf) to func (t *Tool) Parameters() json.RawMessage { return paramSchema } func (t *Tool) IsReadOnly() bool { return true } func (t *Tool) IsDestructive() bool { return false } +func (t *Tool) ShouldDefer() bool { return true } type agentArgs struct { Prompt string `json:"prompt"` diff --git a/internal/tool/agent/batch.go b/internal/tool/agent/batch.go index 4e9137a..a9bf2e9 100644 --- a/internal/tool/agent/batch.go +++ b/internal/tool/agent/batch.go @@ -64,6 +64,7 @@ func (t *BatchTool) Description() string { return "Spawn multiple elfs ( func (t *BatchTool) Parameters() json.RawMessage { return batchSchema } func (t *BatchTool) IsReadOnly() bool { return true } func (t *BatchTool) IsDestructive() bool { return false } +func (t *BatchTool) ShouldDefer() bool { return true } type batchArgs struct { Tasks []batchTask `json:"tasks"` diff --git a/internal/tool/tool.go b/internal/tool/tool.go index a80266c..48abac6 100644 --- a/internal/tool/tool.go +++ b/internal/tool/tool.go @@ -20,3 +20,10 @@ type Tool interface { // IsDestructive returns true if the tool can cause irreversible changes. IsDestructive() bool } + +// DeferrableTool is an optional interface for tools that can be excluded +// from initial requests and loaded on demand. Reduces token overhead +// for rarely-used tools with large schemas. +type DeferrableTool interface { + ShouldDefer() bool +}