Files
gnoma/internal/engine/earlystop.go
T
vikingowl 397a39250c feat(engine): early-stop detection for runaway agent loops
Adds three lightweight per-turn detectors that fire corrective user
messages back into the conversation when the model goes off the rails:

- RepetitionDetector: sliding-window scan over streamed text deltas;
  trips when a 50/80/120-char pattern repeats >= 3 times in the trailing
  200 chars. Breaks the active stream and injects a correction.
- PatchFailureTracker: per-path counter for fs.edit/fs.write failures;
  trips on the 4th consecutive failure and steers the model to fs.write
  rather than another fs.edit on the same path. Success decrements with
  a floor of 0; paths are isolated.
- DetectGreeting: narrow allowlist for "how can I help" style replies;
  only consulted after a round that used tools, so first-turn greetings
  don't false-positive.

Detector state is per-turn (declared locally in runLoop), single-
goroutine use. Corrective messages are appended as user-role text to
both engine history and the context window. Telemetry: each trigger
logs at INFO with round + path where applicable.

Covered by 12 unit tests for the primitives and 5 loop-level integration
tests that drive the full agentic loop via the existing eventStream
mock.
2026-05-19 17:39:35 +02:00

206 lines
6.2 KiB
Go

package engine
import (
"fmt"
"strings"
)
// Default tuning for the early-stop detectors. These mirror the values used
// in smallcode's reference implementation, adjusted for our streaming shape.
const (
defaultRepetitionWindow = 200 // last N chars of stream we inspect
defaultRepetitionThreshold = 3 // pattern must repeat ≥ this many times
defaultMaxPatchFailures = 4 // consecutive failures on a path → escalate
)
var defaultRepetitionSizes = []int{50, 80, 120}
// RepetitionDetector watches a stream's text deltas for a fixed-size pattern
// that recurs ≥ threshold times within the trailing window. Detects the
// "model lost the plot and is now repeating itself" failure mode.
//
// Single-goroutine use only — the loop drives it from the stream consume path.
type RepetitionDetector struct {
windowChars int
threshold int
sizes []int
buf strings.Builder
}
func NewRepetitionDetector() *RepetitionDetector {
return &RepetitionDetector{
windowChars: defaultRepetitionWindow,
threshold: defaultRepetitionThreshold,
sizes: defaultRepetitionSizes,
}
}
// Feed appends streamed text to the buffer and returns true when a repetition
// pattern is detected. Once triggered, the caller is expected to act on the
// signal and call Reset before reusing the detector.
func (d *RepetitionDetector) Feed(text string) bool {
if text == "" {
return false
}
d.buf.WriteString(text)
// Trim the buffer to bound memory. Keep twice the window so we always
// have a stable trailing slice to scan.
if d.buf.Len() > d.windowChars*4 {
s := d.buf.String()
keep := s[len(s)-d.windowChars*2:]
d.buf.Reset()
d.buf.WriteString(keep)
}
s := d.buf.String()
// We need at least one window's worth of data for the smallest pattern
// to recur threshold times.
if len(s) < d.sizes[0]*d.threshold {
return false
}
tail := s
if len(tail) > d.windowChars {
tail = tail[len(tail)-d.windowChars:]
}
for _, size := range d.sizes {
if len(tail) < size*d.threshold {
continue
}
pattern := tail[:size]
count := 0
for i := 0; i+size <= len(tail); {
if tail[i:i+size] == pattern {
count++
if count >= d.threshold {
return true
}
i += size
continue
}
i++
}
}
return false
}
// Reset clears the accumulated buffer. Call at the start of a new turn.
func (d *RepetitionDetector) Reset() {
d.buf.Reset()
}
// PatchFailureTracker counts consecutive write/edit failures per file path
// within a turn. Triggers when a single path crosses the configured threshold,
// at which point the loop should steer the model away from further patches
// against that path.
type PatchFailureTracker struct {
maxFailures int
failures map[string]int
}
func NewPatchFailureTracker() *PatchFailureTracker {
return &PatchFailureTracker{
maxFailures: defaultMaxPatchFailures,
failures: make(map[string]int),
}
}
// RecordFailure increments the failure count for path and returns true when
// the threshold has just been reached. After triggering, the path's counter
// is reset so subsequent failures don't re-fire the signal until they
// re-accumulate.
func (t *PatchFailureTracker) RecordFailure(path string) bool {
if path == "" {
return false
}
t.failures[path]++
if t.failures[path] >= t.maxFailures {
delete(t.failures, path)
return true
}
return false
}
// RecordSuccess decrements the failure count for path with a floor of 0.
// A run of successful edits should let the path recover, but we don't fully
// reset on a single success — a path that fails three times then succeeds
// once is still a suspicious target.
func (t *PatchFailureTracker) RecordSuccess(path string) {
if path == "" {
return
}
if n := t.failures[path]; n > 0 {
t.failures[path] = n - 1
if t.failures[path] == 0 {
delete(t.failures, path)
}
}
}
// Reset clears all per-path counters. Call at the start of a new turn.
func (t *PatchFailureTracker) Reset() {
t.failures = make(map[string]int)
}
// greetingMarkers are case-folded substrings that indicate the model has
// dropped its task context and reverted to an opening-of-conversation reply.
// Kept deliberately narrow — we only want to fire on responses that look
// like the start of a new chat, not on any polite phrasing.
var greetingMarkers = []string{
"how can i help",
"how can i assist",
"what would you like",
"what can i do for you",
"i'm ready to",
"hi there",
}
// DetectGreeting reports whether text looks like a greeting/reset response.
// Stateless. The loop should only consult this after a round that contained
// tool calls — a greeting at the start of a turn is fine.
func DetectGreeting(text string) bool {
if len(text) < 10 {
return false
}
lc := strings.ToLower(text)
for _, m := range greetingMarkers {
if strings.Contains(lc, m) {
return true
}
}
return false
}
// Corrective injections returned to the model when a detector fires. These
// are appended as user messages before the next round so the model sees a
// concrete instruction rather than a system reset.
// RepetitionInjection is the corrective message used when the repetition
// detector fires.
func RepetitionInjection() string {
return "[system] Your output is repeating itself in a loop. Stop. " +
"Take a different approach, or state explicitly what is blocking you " +
"and why the current strategy is not converging."
}
// PatchSpiralInjection is the corrective message used when a single file
// has accumulated too many failed fs.edit attempts. Steers the model toward
// fs.write rather than another patch.
func PatchSpiralInjection(path string) string {
return fmt.Sprintf(
"[system] You have failed to edit %s several times. Stop using fs.edit "+
"on this file. Instead: 1) read the current file with fs.read, "+
"2) decide what the file should contain in full, "+
"3) rewrite it with fs.write. Do not attempt another fs.edit on %s.",
path, path)
}
// GreetingInjection is the corrective message used when the model emits a
// greeting mid-task (context loss).
func GreetingInjection() string {
return "[system] You produced a greeting instead of continuing the task. " +
"Look at the conversation above — there is work in progress. " +
"Resume where you left off. Do not restart the conversation."
}