Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fa65a68728 | |||
| 8b9bdc2978 | |||
| eea26a262e |
@@ -114,6 +114,8 @@ Active work, newest first.
|
||||
doesn't corrupt the file.
|
||||
|
||||
Surfaced from the v0.3.1 launch wave (2026-05-24).
|
||||
Plan:
|
||||
[`docs/superpowers/plans/2026-05-24-config-migration.md`](docs/superpowers/plans/2026-05-24-config-migration.md).
|
||||
|
||||
- **Bandit selector — design decisions deferred.** The current
|
||||
selector (`internal/router/selector.go:scoreArm`) is greedy
|
||||
@@ -199,7 +201,8 @@ Active work, newest first.
|
||||
warning when the content matches sensitive heuristics, a
|
||||
consent-gated review step, and consistent treatment across the
|
||||
three paths. Cross-cuts with Phase F entropy work and the
|
||||
outgoing-scan firewall.
|
||||
outgoing-scan firewall. Plan:
|
||||
[`docs/superpowers/plans/2026-05-24-sensitive-content-policy.md`](docs/superpowers/plans/2026-05-24-sensitive-content-policy.md).
|
||||
- **Distribution — follow-ups.** v0.1.0 shipped (archives on
|
||||
github.com/VikingOwl91/gnoma/releases, multi-arch images on
|
||||
ghcr.io/vikingowl91/gnoma). Still optional: Homebrew tap,
|
||||
|
||||
+22
-1
@@ -397,7 +397,17 @@ func main() {
|
||||
|
||||
// Create router and register the provider as a single arm
|
||||
// (M4 foundation: one provider from CLI. Multi-provider routing comes with config.)
|
||||
rtr := router.New(router.Config{Logger: logger})
|
||||
// BanditParams come from [router.bandit] config keys; zero values
|
||||
// resolve to built-in defaults inside the router package.
|
||||
rtr := router.New(router.Config{
|
||||
Logger: logger,
|
||||
Bandit: router.BanditParams{
|
||||
QualityAlpha: cfg.Router.Bandit.QualityAlpha,
|
||||
MinObservations: cfg.Router.Bandit.MinObservations,
|
||||
ObservedWeight: cfg.Router.Bandit.ObservedWeight,
|
||||
StrengthBonus: cfg.Router.Bandit.StrengthBonus,
|
||||
},
|
||||
})
|
||||
|
||||
// Apply the prefer-routing-policy from config (default: auto).
|
||||
// Invalid values are rejected here with an actionable error rather
|
||||
@@ -672,6 +682,17 @@ func main() {
|
||||
store := persist.New(sessionID, fw.Incognito())
|
||||
logger.Debug("session store initialized", "dir", store.Dir())
|
||||
|
||||
// Per-session firewall audit log: append-only JSONL at
|
||||
// <projectRoot>/.gnoma/sessions/<sessionID>/audit.jsonl. Honours
|
||||
// incognito (writes skipped when active) and tolerates fs errors —
|
||||
// scan pipeline never depends on the audit succeeding.
|
||||
auditPath := filepath.Join(gnomacfg.ProjectRoot(), ".gnoma", "sessions", sessionID, "audit.jsonl")
|
||||
fw.SetAudit(security.NewAuditLogger(security.AuditLoggerConfig{
|
||||
Path: auditPath,
|
||||
Incognito: fw.Incognito(),
|
||||
Logger: logger,
|
||||
}))
|
||||
|
||||
// Create elf manager and register agent tools.
|
||||
// Must be created after fw and permChecker so elfs inherit security layers.
|
||||
elfMgr := elf.NewManager(elf.ManagerConfig{
|
||||
|
||||
@@ -0,0 +1,356 @@
|
||||
# Config Migration — 2026-05-24
|
||||
|
||||
Fixes the silent-corruption pattern in `internal/config/write.go`
|
||||
that produces zero-spammed config files, adds reader-side telemetry
|
||||
to surface the resulting layering bugs (`gnoma doctor`), ships an
|
||||
active migration command (`gnoma upgrade-config`), wires automatic
|
||||
project-level migration on startup, and introduces a per-user
|
||||
project registry so all of the above can operate cross-project.
|
||||
|
||||
Surfaces in TODO.md as "Config write/merge — silent corruption of
|
||||
layered configs" with five sub-items; this plan promotes that entry
|
||||
out of the bullet form into a phased design.
|
||||
|
||||
---
|
||||
|
||||
## Problem
|
||||
|
||||
`setConfig()` in `internal/config/write.go` reads the existing TOML
|
||||
into a zero-valued `Config` struct, mutates one field, and writes
|
||||
the entire struct back out. The encoder doesn't skip zero values,
|
||||
so every untouched field gets serialized at its Go default — empty
|
||||
strings, zero ints, `false` bools, empty maps.
|
||||
|
||||
The next layered load (`Load()` → `toml.Decode` over multiple
|
||||
files) then **does not** treat those present-but-zero fields as
|
||||
"unset" — TOML's "present field wins" semantics mean those zeros
|
||||
overwrite higher-priority layers. Concrete failure observed
|
||||
2026-05-24:
|
||||
|
||||
- User's global `~/.config/gnoma/config.toml` has
|
||||
`[router].prefer = "cloud"`.
|
||||
- An earlier `gnoma config set ...` call generated a project-level
|
||||
`.gnoma/config.toml` containing `[router].prefer = ""`.
|
||||
- The merge collapses to `Prefer = ""`, which
|
||||
`ParsePreferPolicy("")` maps to `PreferAuto`.
|
||||
- The TUI's `/router` command reads `auto` despite the global
|
||||
config saying `cloud`. No warning, no error — purely silent.
|
||||
|
||||
Same root cause produces zero-spammed global configs
|
||||
(`max_tokens = 0`, `permission.mode = ""`, etc.) that silently
|
||||
override sensible defaults in `internal/config/defaults.go`.
|
||||
|
||||
This affects every layered field — provider, permission, tools,
|
||||
session, router, security, slm. Cannot be patched per-field;
|
||||
needs a structural fix.
|
||||
|
||||
---
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **Schema redesign.** The current `Config` struct stays as-is.
|
||||
This plan addresses how it's written and read, not what fields
|
||||
exist.
|
||||
- **Validation.** Future work; `gnoma doctor` will flag obviously
|
||||
invalid values (empty enum strings, etc.) but a full validation
|
||||
pass against the schema is out of scope here.
|
||||
- **Migration of the bandit-router quality JSON.** Unrelated file,
|
||||
unrelated format, separate concerns.
|
||||
|
||||
---
|
||||
|
||||
## Approach overview
|
||||
|
||||
Five phases, in dependency order:
|
||||
|
||||
1. **Encoder fix** — stop generating zero-spam in the first place.
|
||||
2. **Project registry** — `~/.config/gnoma/projects.json` so later
|
||||
phases can operate cross-project without filesystem walks.
|
||||
3. **`gnoma doctor`** — read-only diagnostic, scans global +
|
||||
project configs (via registry), reports zero-spam, invalid
|
||||
enums, removed keys, and the effective-merged view.
|
||||
4. **`gnoma upgrade-config`** — active migration with `.bak`
|
||||
backup + diff output; targets one file or all known projects.
|
||||
5. **Auto-migration on startup** — when launch detects a
|
||||
zero-spammed project config, run upgrade-config silently with
|
||||
a banner-line notice.
|
||||
|
||||
Phases 1 + 2 land first. 3 builds on 1 + 2. 4 builds on 3. 5
|
||||
builds on 4.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Encoder fix
|
||||
|
||||
`setConfig()` is the bug generator. The TOML library
|
||||
(`BurntSushi/toml`) supports `omitempty` on struct tags but the
|
||||
project's `Config` struct doesn't use it. Three options:
|
||||
|
||||
### Option A — `omitempty` on all fields
|
||||
|
||||
Tag every field with `,omitempty`. The encoder skips fields at
|
||||
their Go zero value. **Caveat:** conflates "unset" with
|
||||
"explicitly zero" for primitive types — a user who actually
|
||||
wants `max_keep = 0` (no session retention) loses that setting on
|
||||
the next write.
|
||||
|
||||
### Option B — `pelletier/go-toml/v2` document model
|
||||
|
||||
Switch encoder to a TOML library that exposes a document AST.
|
||||
Edit only the targeted key, preserve everything else byte-for-byte.
|
||||
Cleaner semantics, bigger refactor — also affects the decoder side.
|
||||
|
||||
### Option C (chosen) — hybrid
|
||||
|
||||
Use `omitempty` for fields where the Go zero value is never
|
||||
user-intent (strings, maps, slices). For numeric fields where 0
|
||||
is a legitimate user choice, switch the field to a pointer
|
||||
(`*int`, `*float64`) so `nil` means "unset" and `*0` means
|
||||
"explicitly zero". On decode, fall back to defaults for nil
|
||||
pointers in the resolution layer.
|
||||
|
||||
This keeps the existing BurntSushi library, preserves user intent
|
||||
across the full type space, and limits churn to the fields where
|
||||
the zero/unset ambiguity actually matters.
|
||||
|
||||
### Phase 1 task list
|
||||
|
||||
- **P1-1:** Audit every `Config`-tree field. Tag string/map/slice
|
||||
fields with `,omitempty`. List numeric/bool fields that need
|
||||
pointer conversion.
|
||||
- **P1-2:** Convert numeric/bool fields requiring zero-vs-unset
|
||||
distinction to pointers. Update construction sites and getters.
|
||||
- **P1-3:** Add a `Resolve()` method on `Config` that walks the
|
||||
struct and substitutes default values for nil pointers, called
|
||||
exactly once at the end of `Load()`. All consumer code reads
|
||||
resolved values; raw layered structs are internal.
|
||||
- **P1-4:** Tests covering: (a) write-then-read roundtrip
|
||||
preserves only user-set fields, (b) explicit zero (e.g.
|
||||
`max_keep = 0`) survives the roundtrip, (c) field absent from
|
||||
TOML resolves to default.
|
||||
- **P1-5:** Backwards-compat: when reading an existing zero-spammed
|
||||
file, the resolver must treat all-zeros-in-a-section as the
|
||||
default — see Phase 5 for the heuristic.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Project registry
|
||||
|
||||
New file at `~/.config/gnoma/projects.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"path": "/home/user/git/foo",
|
||||
"first_seen": "2026-04-15T10:30:00Z",
|
||||
"last_seen": "2026-05-24T19:23:00Z",
|
||||
"session_count": 47
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2 task list
|
||||
|
||||
- **P2-1:** Add `internal/config/registry.go` with `Registry`,
|
||||
`Load`, `Save`, `Record(projectRoot)`, `Prune(staleAfter time.Duration)`.
|
||||
- **P2-2:** Save uses atomic-write (temp file + `os.Rename`) so a
|
||||
crash mid-write doesn't corrupt the file.
|
||||
- **P2-3:** Call `Registry.Record(projectRoot)` from
|
||||
`cmd/gnoma/main.go` right after the startup-safety banner
|
||||
decides to proceed. Failure is logged at Warn level but never
|
||||
blocks startup.
|
||||
- **P2-4:** Add `[config].project_registry` toggle in defaults.go
|
||||
(bool, default `true`). When `false`, Record is a no-op.
|
||||
- **P2-5:** Document the file in README §Security as part of the
|
||||
no-phone-home scope note: this is purely local, never sent.
|
||||
- **P2-6:** Tests: round-trip, atomic-write under fault injection,
|
||||
toggle off path.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — `gnoma doctor`
|
||||
|
||||
New subcommand. Read-only. Scans:
|
||||
|
||||
- Global config at `GlobalConfigPath()`.
|
||||
- Every project in the registry (or filesystem-scan fallback when
|
||||
the registry is disabled or empty).
|
||||
- Active profile (when profile mode is on).
|
||||
|
||||
Reports per-file:
|
||||
|
||||
- **Zero-spam fields** — present-with-zero where higher layer or
|
||||
default has non-zero. The very thing this plan exists to fix.
|
||||
- **Invalid enum values** — `permission.mode = ""`,
|
||||
`router.prefer = "yes"`, etc. Use existing parsers to detect.
|
||||
- **Unknown keys** — fields in the TOML that don't map to any
|
||||
`Config` struct field. Decoder ignores these silently today;
|
||||
doctor surfaces them.
|
||||
- **Removed keys** — known-historical fields from older schema
|
||||
versions; suggest removal.
|
||||
|
||||
Reports per-stack:
|
||||
|
||||
- **Effective-merged values** — what gnoma will actually use after
|
||||
layering. Helps the user see whether a project file is masking
|
||||
a global setting.
|
||||
|
||||
### Phase 3 task list
|
||||
|
||||
- **P3-1:** Add `cmd/gnoma/doctor_cmd.go` with the subcommand
|
||||
scaffold.
|
||||
- **P3-2:** `internal/config/doctor.go` with the scan logic;
|
||||
exported `Diagnose(paths []string) []Finding`.
|
||||
- **P3-3:** Output: human format by default, `--json` for
|
||||
CI/script consumption.
|
||||
- **P3-4:** Exit non-zero when findings have severity ≥ Warn so
|
||||
doctor is CI-friendly.
|
||||
- **P3-5:** `--all-projects` flag (default off; uses registry).
|
||||
- **P3-6:** Tests covering each finding type.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — `gnoma upgrade-config`
|
||||
|
||||
Active migration. Writes:
|
||||
|
||||
- Original file → `<path>.bak-YYYYMMDD-HHMMSS` (deterministic
|
||||
timestamp suffix).
|
||||
- Cleaned content → original path.
|
||||
- Stdout: unified diff of what changed.
|
||||
|
||||
### Phase 4 task list
|
||||
|
||||
- **P4-1:** Add `cmd/gnoma/upgrade_config_cmd.go`.
|
||||
- **P4-2:** `internal/config/upgrade.go` with `Upgrade(path string)`
|
||||
→ reads file, applies the Phase 1 cleaning (drop fields equal to
|
||||
their resolved default, keep explicit zeros that diverge from the
|
||||
default via the pointer semantics).
|
||||
- **P4-3:** Atomic two-step write: rename original to `.bak-...`,
|
||||
then atomic-write new content to original path. Crash midway
|
||||
leaves both files present, never the corrupted state.
|
||||
- **P4-4:** `--all-projects` flag using the registry.
|
||||
- **P4-5:** `--dry-run` prints diffs without writing.
|
||||
- **P4-6:** Tests: round-trip of zero-spammed input → cleaned
|
||||
output → identical re-read; idempotency (running twice yields
|
||||
no second `.bak`).
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Auto-migration on startup
|
||||
|
||||
When `Load()` parses a project `.gnoma/config.toml` and the
|
||||
heuristic flags it as zero-spammed (every field at the Go zero
|
||||
value, no user content), gnoma:
|
||||
|
||||
- Runs the Phase 4 upgrade in-process.
|
||||
- Writes `.gnoma/config.toml.bak-...`.
|
||||
- Emits a single line to the startup safety banner:
|
||||
`config: migrated .gnoma/config.toml (see .bak)`.
|
||||
- Continues startup with the cleaned config.
|
||||
|
||||
### Heuristic for "zero-spam"
|
||||
|
||||
A config section is zero-spam if **all** of these hold:
|
||||
|
||||
- Every primitive field present in the file is at its Go zero
|
||||
value.
|
||||
- No `[[arms]]`, `[[mcp_servers]]`, or `[[hooks]]` blocks (those
|
||||
are always user content).
|
||||
- File modification time ≥ 24h old (so we don't migrate a config
|
||||
the user is actively editing).
|
||||
|
||||
If only some fields are zero and some are user-set, we don't touch
|
||||
it — the user's mix of explicit zeros and meaningful values takes
|
||||
precedence.
|
||||
|
||||
### Phase 5 task list
|
||||
|
||||
- **P5-1:** Add `isZeroSpam(*Config) bool` heuristic in
|
||||
`internal/config/upgrade.go`.
|
||||
- **P5-2:** Wire from `Load()` post-merge: if project layer
|
||||
is_zero_spam → call Upgrade on the project file, log via banner.
|
||||
- **P5-3:** Add `[config].auto_migrate` toggle, default `true`.
|
||||
Global configs are never auto-migrated; only project-level.
|
||||
- **P5-4:** Banner integration: the existing safety banner gets
|
||||
a new optional line for "config notices" right under the
|
||||
cwd/sensitivity summary.
|
||||
- **P5-5:** Tests: zero-spam project file gets migrated; mixed
|
||||
project file is left alone; recently-modified file is left
|
||||
alone; auto_migrate=false disables.
|
||||
|
||||
---
|
||||
|
||||
## Cross-cutting: schemas and resolution
|
||||
|
||||
The pointer-field design (Phase 1) needs a clear resolution layer.
|
||||
Proposal: every Config section gets a `Resolved...Section` mirror
|
||||
that has plain (non-pointer) types. After Load, the resolver
|
||||
populates one from the other, substituting defaults for nils.
|
||||
|
||||
Examples already exist in the codebase: `ResolvedSafetySection`
|
||||
mirrors `SafetySection`. The pattern is established; we just need
|
||||
to extend it.
|
||||
|
||||
Consumer-side: code reads from `cfg.Resolved.X` not `cfg.X`.
|
||||
Loud renaming will catch any reader still using the raw layered
|
||||
struct.
|
||||
|
||||
---
|
||||
|
||||
## Risks
|
||||
|
||||
- **Pointer-field migration is wide-scope.** Every reader of the
|
||||
affected fields needs to change. Mitigated by the
|
||||
resolver-mirror pattern (`ResolvedXSection`) — readers move from
|
||||
one struct to another, but the call sites don't change shape.
|
||||
- **Auto-migration writes silently.** Users might be surprised
|
||||
even with the banner notice. Mitigated by `.bak` preservation
|
||||
and the heuristic only firing on files that are obviously
|
||||
zero-spam.
|
||||
- **Registry becomes the same class of bug.** Documented in the
|
||||
TODO entry already; Phase 2 explicitly requires atomic-write
|
||||
and `omitempty` discipline. If we get this wrong the fix is the
|
||||
same shape as Phase 1.
|
||||
- **Privacy.** The registry is a list of directories the user has
|
||||
worked in. Local-only, opt-out toggle, README note required.
|
||||
- **Backwards compatibility for tests.** Tests that construct
|
||||
`Config` by hand with explicit zeros may need updating.
|
||||
Approach: add a `MustResolve` helper for test construction so
|
||||
tests don't need to know about the pointer/resolver split.
|
||||
|
||||
---
|
||||
|
||||
## Rollout
|
||||
|
||||
Phases 1 + 2 ship together as a single release (encoder fix
|
||||
needs the resolver, registry is independent but small). Tag as
|
||||
`v0.4.0` — schema-touching changes warrant a minor bump per
|
||||
the project's pre-1.0 semver discipline.
|
||||
|
||||
Phase 3 (`gnoma doctor`) can ship in a `v0.4.x` patch — it's
|
||||
read-only and adds no surface compatibility risk.
|
||||
|
||||
Phase 4 (`gnoma upgrade-config`) ships in a follow-up `v0.4.x`.
|
||||
|
||||
Phase 5 (auto-migration) ships once Phase 4 has been in the wild
|
||||
for at least one release cycle, so users have a way to opt in /
|
||||
inspect before it becomes implicit.
|
||||
|
||||
---
|
||||
|
||||
## Open questions
|
||||
|
||||
- Should `gnoma doctor` also check that the `quality.json` file
|
||||
is well-formed? Same dir, different concern — probably belongs
|
||||
in doctor's scope as the umbrella "diagnose my gnoma install"
|
||||
command.
|
||||
- Registry size cap? After a year of usage on a busy machine
|
||||
the file could grow to a few thousand entries. Reasonable; no
|
||||
cap planned, but `Prune(staleAfter)` exposed for users who
|
||||
want manual cleanup.
|
||||
- Profiles: how do profile configs interact with the doctor /
|
||||
upgrade flow? Default: treat each profile file as its own
|
||||
upgradeable unit. Doctor lists findings per-profile.
|
||||
@@ -0,0 +1,278 @@
|
||||
# Sensitive Content — Unified Policy — 2026-05-24
|
||||
|
||||
Promotes the "sensitive-content handling — unified policy" TODO
|
||||
entry into a phased design. Three input paths can introduce
|
||||
sensitive content into the conversation context — pasted images,
|
||||
pasted text, and tool-read files. Today each path has different
|
||||
defences; this plan unifies them behind a single policy with a
|
||||
single consent UI.
|
||||
|
||||
Sibling concerns:
|
||||
[`2026-05-19-post-slm-unlock.md`](2026-05-19-post-slm-unlock.md)
|
||||
Phase F (entropy detection) and the outgoing-scan firewall
|
||||
already cover detection in some places; this plan unifies the
|
||||
*decision* layer that sits in front of them.
|
||||
|
||||
---
|
||||
|
||||
## Problem
|
||||
|
||||
Three input paths to the engine carry distinct sensitivity
|
||||
risks; each is handled differently today.
|
||||
|
||||
### Path 1 — Pasted images (Ctrl+V in the TUI)
|
||||
|
||||
Screenshot might contain API keys, terminal output with creds,
|
||||
private repo contents, family photos, etc. Today:
|
||||
|
||||
- Image bytes land in the user cache dir.
|
||||
- The router only sends to vision-capable arms.
|
||||
- Local arms are fine; cloud arms send full image content to
|
||||
the provider.
|
||||
- Incognito skips paste entirely (per the no-persistence
|
||||
contract).
|
||||
|
||||
What's missing: at-paste preview / warning. The user often does
|
||||
not realise what the screenshot contained until after it's been
|
||||
sent.
|
||||
|
||||
### Path 2 — Pasted text
|
||||
|
||||
User pastes a chunk into the input composer. Could be a log
|
||||
snippet with credentials, an `.env` file content, an SSH key,
|
||||
or just text. Today:
|
||||
|
||||
- Goes straight into the input buffer with no scanning.
|
||||
- Outgoing firewall scans the final composed message before
|
||||
send — *after* the user has already pressed Enter, often
|
||||
redacting silently in the background.
|
||||
- The user sees `[REDACTED]` in their own message after the
|
||||
fact, no consent step.
|
||||
|
||||
What's missing: at-paste detection so the user sees the warning
|
||||
*before* committing to send.
|
||||
|
||||
### Path 3 — Tool-read files
|
||||
|
||||
`fs_read`, `bash`, etc. surface file contents to the model. Today:
|
||||
|
||||
- Outgoing firewall scans tool *results* before they reach the
|
||||
next provider turn (`ScanToolResult`).
|
||||
- Format-aware entropy detection (Phase F-1) reduces false
|
||||
positives on UUIDs / SHA / ISO timestamps.
|
||||
- The audit log (just shipped) records what got blocked /
|
||||
redacted per session.
|
||||
|
||||
What's missing: nothing structurally on this path; it's the
|
||||
most-mature of the three. Listed here only for completeness so
|
||||
the unified policy can be honest about asymmetric coverage.
|
||||
|
||||
### The unification question
|
||||
|
||||
These three paths converge into "content that joins the context
|
||||
window." A consistent policy needs to answer, for each path:
|
||||
|
||||
1. **When** does detection run? (at paste / at send / at receive)
|
||||
2. **What** does the user see? (warning / preview / redacted
|
||||
placeholder / silent)
|
||||
3. **What** is their consent gate? (approve / deny / approve-with-
|
||||
redaction / skip)
|
||||
4. **Where** is the action recorded? (audit log, banner, slog)
|
||||
|
||||
Today the answers vary per path. This plan picks one set of
|
||||
answers and applies them everywhere.
|
||||
|
||||
---
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **New detectors.** This plan reuses the existing scanner
|
||||
(regex + entropy + unicode-sanitize). Phase F-2's SLM-assisted
|
||||
detector lands separately when telemetry warrants.
|
||||
- **Egress allowlist.** Tracked in the security-boundary TODO
|
||||
entry, separate plan.
|
||||
- **Provider-side redaction.** That's the provider's problem.
|
||||
This plan is about what leaves gnoma's process.
|
||||
|
||||
---
|
||||
|
||||
## Approach
|
||||
|
||||
Single policy module: `internal/security/sensitive_policy.go`.
|
||||
Exposes one decision function:
|
||||
|
||||
```go
|
||||
type Decision int
|
||||
const (
|
||||
DecisionAllow Decision = iota
|
||||
DecisionWarn // show warning, allow on confirm
|
||||
DecisionRedactAndAllow
|
||||
DecisionBlock
|
||||
)
|
||||
|
||||
type Inspection struct {
|
||||
Path string // "paste_text", "paste_image", "tool_result"
|
||||
Content string // for text paths
|
||||
ImageBytes []byte // for image paths; nil otherwise
|
||||
Matches []scanner.Match // pre-scanned hits
|
||||
}
|
||||
|
||||
func Decide(insp Inspection, mode IncognitoMode, prefs Preferences) Decision
|
||||
```
|
||||
|
||||
All three paths route through `Decide` with their own
|
||||
`Inspection`. UI surface — the at-paste prompt, the at-send
|
||||
warning, the redacted-placeholder view — sits in the TUI and is
|
||||
driven by the Decision value.
|
||||
|
||||
### Path-specific wiring
|
||||
|
||||
| Path | When | UI | Default Decision rules |
|
||||
|---|---|---|---|
|
||||
| paste_text | Ctrl+V into composer | Inline warning under input box, with `Tab` to expand match details | Match in scanner → `Warn` (text stays, user dismisses); explicit block-tier match → `Block` (paste dropped) |
|
||||
| paste_image | Ctrl+V image | Pre-paste OCR scan (small local model) + warning before insertion | OCR finds secret pattern → `Warn`; user can choose `Redact` (image kept, warning attached) or `Cancel`. Incognito → `Block` (already today). |
|
||||
| tool_result | After tool runs | Banner: `firewall: redacted N items in this tool result` | Existing behaviour. `Decide` invoked just to keep the API surface consistent; matches go to audit log. |
|
||||
|
||||
### Preferences
|
||||
|
||||
New `[security.sensitive]` config section:
|
||||
|
||||
```toml
|
||||
[security.sensitive]
|
||||
warn_on_paste_text = true # default true
|
||||
warn_on_paste_image = true # default true
|
||||
ocr_image_paste = false # opt-in: requires local vision arm
|
||||
auto_redact = false # default false: ask first, redact second
|
||||
silent_tool_results = false # default false: show banner when redactions happen
|
||||
```
|
||||
|
||||
### Incognito interaction
|
||||
|
||||
When incognito is active, **every** Decision is treated as either
|
||||
`Block` or `RedactAndAllow` — never `Warn`-then-`Allow`. Incognito
|
||||
implies "I don't trust this conversation to persist"; the
|
||||
sensible default is to be strict about what flows in.
|
||||
|
||||
---
|
||||
|
||||
## Phases
|
||||
|
||||
### Phase A — Policy module + config
|
||||
|
||||
- **A-1:** Add `[security.sensitive]` section to config.go with
|
||||
the four flags above.
|
||||
- **A-2:** Add `internal/security/sensitive_policy.go` with
|
||||
`Inspection`, `Decision`, `Decide`.
|
||||
- **A-3:** Unit tests for the decision matrix.
|
||||
|
||||
### Phase B — Path 2 (pasted text)
|
||||
|
||||
Highest user-visible payoff for the smallest surface.
|
||||
|
||||
- **B-1:** TUI input composer intercepts paste, runs
|
||||
`Decide(paste_text, ...)` before the bytes enter the buffer.
|
||||
- **B-2:** Decision = Warn → status-line warning, paste still
|
||||
goes in. `Tab` expands details.
|
||||
- **B-3:** Decision = Block → paste discarded, status line
|
||||
explains why; user can override with `Ctrl+Shift+V`
|
||||
(force-paste) which bypasses but writes to audit log.
|
||||
- **B-4:** Tests: paste-of-known-secret triggers warning;
|
||||
redacted variant shows what would have been sent.
|
||||
|
||||
### Phase C — Path 3 (tool-results) banner
|
||||
|
||||
- **C-1:** When `ScanToolResult` redacts ≥1 item, the engine
|
||||
emits a system message: `firewall: redacted 2 items in
|
||||
read-file output (see audit log)`.
|
||||
- **C-2:** Gated behind `silent_tool_results = false` default.
|
||||
Users who already trust the firewall can flip it on.
|
||||
- **C-3:** Tests: integration test asserting the system
|
||||
message appears.
|
||||
|
||||
### Phase D — Path 1 (pasted images)
|
||||
|
||||
Most complex. Image OCR requires a local vision model; without
|
||||
one the paste falls back to today's behaviour.
|
||||
|
||||
- **D-1:** Add OCR hook: when `ocr_image_paste = true` and a
|
||||
vision-capable local arm is available, run a small OCR pass
|
||||
over the image before insertion.
|
||||
- **D-2:** Feed OCR output through the regex/entropy scanner.
|
||||
Matches → `Decide(paste_image, ...)` with the original image
|
||||
attached.
|
||||
- **D-3:** TUI shows a preview thumbnail + warning before
|
||||
insertion confirmation.
|
||||
- **D-4:** Without a vision arm: feature degrades gracefully
|
||||
(no OCR, paste proceeds as today, banner notes "image paste
|
||||
scan unavailable — no local vision arm").
|
||||
|
||||
### Phase E — Audit log integration
|
||||
|
||||
All four Decision outcomes get an audit entry. The audit log
|
||||
already has the file format from the security-boundary work;
|
||||
just need to define new Action values:
|
||||
|
||||
- `paste_warn`, `paste_block`, `paste_force_override`
|
||||
- `image_paste_warn`, `image_paste_block`, `image_paste_ocr_skip`
|
||||
- `tool_result_banner` (when redactions surfaced to user)
|
||||
|
||||
---
|
||||
|
||||
## Risks
|
||||
|
||||
- **OCR adds latency to paste.** Bad UX if image OCR takes >300ms.
|
||||
Mitigation: hard-cap OCR time at 500ms, skip if exceeded, fall
|
||||
back to no-scan path with banner notice. Local vision models on
|
||||
consumer hardware should comfortably make this budget.
|
||||
- **False positives on text paste become annoying.** If
|
||||
`warn_on_paste_text = true` fires on every code snippet, users
|
||||
turn it off and the protection is gone. Use the same
|
||||
entropy_safelist Phase F-1 ships (uuid/sha/iso8601/url) — those
|
||||
are the high-FP categories.
|
||||
- **OCR introduces a new attack surface.** A malicious image could
|
||||
exploit the OCR model. Mitigation: only local-arm OCR (the
|
||||
attacker's input never leaves the machine); never call cloud
|
||||
vision models for OCR (would defeat the privacy purpose).
|
||||
- **Phase D depends on having a local vision model.** Users without
|
||||
one get degraded UX. Document this clearly; consider whether to
|
||||
ship a small bundled OCR-tuned model (probably no — adds 100MB+
|
||||
to install).
|
||||
|
||||
---
|
||||
|
||||
## Open questions
|
||||
|
||||
- Should there be a "trusted projects" list where the warnings
|
||||
are suppressed? Could live in the project registry (sibling
|
||||
plan). Useful for monorepos where the user explicitly trusts
|
||||
the local code.
|
||||
- The `Ctrl+Shift+V` force-paste override is a footgun. Do we
|
||||
want a confirm-second-time dialog, or just the keybind?
|
||||
- Should clipboard contents be cleared from the host clipboard
|
||||
after a sensitive paste? Cross-platform-tricky; defer.
|
||||
- Sensitive-pattern feedback loop: when a user dismisses a warning
|
||||
as "this isn't a secret", do we learn from that? Privacy concern
|
||||
— would need an explicit opt-in.
|
||||
|
||||
---
|
||||
|
||||
## Rollout
|
||||
|
||||
Phases A + B + C land together as one feature release. Phase D
|
||||
(image OCR) is opt-in (`ocr_image_paste = true`) and can land in
|
||||
a follow-up patch — its surface is large and benefits from real-
|
||||
world UX feedback. Phase E threads through all four; it lands
|
||||
incrementally per phase, not as a single batch.
|
||||
|
||||
Realistic target: Phase A/B/C in v0.5.0; Phase D in v0.5.x. All
|
||||
behaviour is gated behind the four config flags so existing users
|
||||
who don't opt in see no behavioural change.
|
||||
|
||||
---
|
||||
|
||||
## Cross-references
|
||||
|
||||
- TODO.md entry "Sensitive-content handling — unified policy"
|
||||
- [`2026-05-19-post-slm-unlock.md`](2026-05-19-post-slm-unlock.md) — Phase F entropy detection
|
||||
- [`2026-05-19-security-wave2-incognito.md`](2026-05-19-security-wave2-incognito.md) — incognito-mode contract
|
||||
- TODO.md entry "Security boundary — egress controls + session audit log" — the audit log this plan piggybacks on
|
||||
@@ -157,6 +157,40 @@ type RouterSection struct {
|
||||
// and incognito take priority over this knob. See
|
||||
// docs/superpowers/plans/2026-05-23-prefer-routing-policy.md.
|
||||
Prefer string `toml:"prefer"`
|
||||
|
||||
// Bandit exposes the selector's tuning knobs. Defaults preserve
|
||||
// previous hard-coded behaviour exactly; only set these when you
|
||||
// need to tune the EMA quality tracker for an unusual workload.
|
||||
Bandit BanditSection `toml:"bandit"`
|
||||
}
|
||||
|
||||
// BanditSection holds the scoring knobs for the EMA quality tracker
|
||||
// and the score blend used by the selector. Each field has a sentinel
|
||||
// zero value that means "use the built-in default" so an empty TOML
|
||||
// block is byte-identical to pre-config behaviour. See
|
||||
// internal/router/feedback.go and internal/router/selector.go for the
|
||||
// formulas these knobs feed into.
|
||||
type BanditSection struct {
|
||||
// QualityAlpha is the EMA smoothing factor for arm-quality
|
||||
// observations. Larger values weight recent observations more.
|
||||
// Default: 0.3 (~3-sample memory). 0.0 here means "use default".
|
||||
QualityAlpha float64 `toml:"quality_alpha"`
|
||||
|
||||
// MinObservations is the minimum number of samples required
|
||||
// before observed EMA overrides the heuristic fallback. Default:
|
||||
// 3. 0 here means "use default".
|
||||
MinObservations int `toml:"min_observations"`
|
||||
|
||||
// ObservedWeight is the weight of the observed EMA in the
|
||||
// observed/heuristic blend inside scoreArm: the final quality is
|
||||
// `observed*W + heuristic*(1-W)`. Default: 0.7. 0.0 here means
|
||||
// "use default".
|
||||
ObservedWeight float64 `toml:"observed_weight"`
|
||||
|
||||
// StrengthBonus is the quality bonus added when an arm declares
|
||||
// the current task type in its Strengths list. Default: 0.15.
|
||||
// 0.0 here means "use default".
|
||||
StrengthBonus float64 `toml:"strength_bonus"`
|
||||
}
|
||||
|
||||
// MCPServerConfig defines an MCP server to start and connect to.
|
||||
|
||||
@@ -57,12 +57,12 @@ func benchTasks() []Task {
|
||||
func BenchmarkSelectBest(b *testing.B) {
|
||||
arms := benchArms()
|
||||
tasks := benchTasks()
|
||||
qt := NewQualityTracker()
|
||||
qt := NewQualityTracker(0, 0)
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
for _, task := range tasks {
|
||||
selectBest(qt, arms, task, PreferAuto)
|
||||
selectBest(qt, BanditParams{}, arms, task, PreferAuto)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -99,13 +99,13 @@ func BenchmarkRouterSelect(b *testing.B) {
|
||||
|
||||
func BenchmarkScoreArm(b *testing.B) {
|
||||
arms := benchArms()
|
||||
qt := NewQualityTracker()
|
||||
qt := NewQualityTracker(0, 0)
|
||||
task := Task{Type: TaskGeneration, Priority: PriorityNormal, EstimatedTokens: 2000, RequiresTools: true, ComplexityScore: 0.5}
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
for _, arm := range arms {
|
||||
scoreArm(qt, arm, task)
|
||||
scoreArm(qt, BanditParams{}, arm, task)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,9 +2,15 @@ package router
|
||||
|
||||
import "sync"
|
||||
|
||||
// Built-in defaults for the bandit knobs. Surfaced via
|
||||
// [router.bandit] config keys; see BanditParams in router.go. Kept
|
||||
// here so the QualityTracker has a sensible fallback when constructed
|
||||
// without explicit parameters (tests, ad-hoc callers).
|
||||
const (
|
||||
qualityAlpha = 0.3 // EMA smoothing factor (~3-sample memory)
|
||||
minObservations = 3 // min samples before observed score overrides heuristic
|
||||
defaultQualityAlpha = 0.3 // EMA smoothing factor (~3-sample memory)
|
||||
defaultMinObservations = 3 // min samples before observed score overrides heuristic
|
||||
defaultObservedWeight = 0.7 // weight of observed score in observed/heuristic blend
|
||||
defaultStrengthBonus = 0.15
|
||||
)
|
||||
|
||||
// EMAScore tracks an exponential moving average quality score.
|
||||
@@ -19,13 +25,27 @@ type QualityTracker struct {
|
||||
mu sync.RWMutex
|
||||
scores map[ArmID]map[TaskType]*EMAScore
|
||||
classifierCount map[ClassifierSource]int
|
||||
|
||||
// Configurable knobs — set via NewQualityTracker. Pass 0 for any
|
||||
// argument to keep the built-in default.
|
||||
alpha float64
|
||||
minObservations int
|
||||
}
|
||||
|
||||
// NewQualityTracker returns an empty QualityTracker.
|
||||
func NewQualityTracker() *QualityTracker {
|
||||
// NewQualityTracker returns an empty QualityTracker. Pass 0 for any
|
||||
// argument to keep the built-in default (alpha=0.3, minObs=3).
|
||||
func NewQualityTracker(alpha float64, minObs int) *QualityTracker {
|
||||
if alpha == 0 {
|
||||
alpha = defaultQualityAlpha
|
||||
}
|
||||
if minObs == 0 {
|
||||
minObs = defaultMinObservations
|
||||
}
|
||||
return &QualityTracker{
|
||||
scores: make(map[ArmID]map[TaskType]*EMAScore),
|
||||
classifierCount: make(map[ClassifierSource]int),
|
||||
alpha: alpha,
|
||||
minObservations: minObs,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +91,7 @@ func (qt *QualityTracker) Record(armID ArmID, taskType TaskType, success bool) {
|
||||
if s.Count == 0 {
|
||||
s.Value = observation
|
||||
} else {
|
||||
s.Value = qualityAlpha*observation + (1-qualityAlpha)*s.Value
|
||||
s.Value = qt.alpha*observation + (1-qt.alpha)*s.Value
|
||||
}
|
||||
s.Count++
|
||||
}
|
||||
@@ -86,7 +106,7 @@ func (qt *QualityTracker) Quality(armID ArmID, taskType TaskType) (score float64
|
||||
return 0, false
|
||||
}
|
||||
s, ok := m[taskType]
|
||||
if !ok || s.Count < minObservations {
|
||||
if !ok || s.Count < qt.minObservations {
|
||||
return 0, false
|
||||
}
|
||||
return s.Value, true
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
func TestQualityTracker_NoDataReturnsHeuristic(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
_, hasData := qt.Quality("arm:model", router.TaskGeneration)
|
||||
if hasData {
|
||||
t.Error("expected no data for unobserved arm")
|
||||
@@ -16,7 +16,7 @@ func TestQualityTracker_NoDataReturnsHeuristic(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQualityTracker_RecordUpdatesEMA(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
for i := 0; i < 3; i++ {
|
||||
qt.Record("arm:model", router.TaskGeneration, true)
|
||||
}
|
||||
@@ -30,7 +30,7 @@ func TestQualityTracker_RecordUpdatesEMA(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQualityTracker_AllFailuresLowScore(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
for i := 0; i < 5; i++ {
|
||||
qt.Record("arm:model", router.TaskDebug, false)
|
||||
}
|
||||
@@ -41,7 +41,7 @@ func TestQualityTracker_AllFailuresLowScore(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQualityTracker_ConcurrentSafe(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < 10; i++ {
|
||||
go func(success bool) {
|
||||
@@ -113,3 +113,45 @@ func TestQualityTracker_InsufficientDataFallsBackToHeuristic(t *testing.T) {
|
||||
}
|
||||
decision.Rollback()
|
||||
}
|
||||
|
||||
func TestQualityTracker_CustomAlphaShortensMemory(t *testing.T) {
|
||||
// alpha=0.9 weights the latest sample heavily; after a single
|
||||
// failure the score should drop further than with the default 0.3.
|
||||
fast := router.NewQualityTracker(0.9, 0)
|
||||
slow := router.NewQualityTracker(0.0, 0) // 0 → default 0.3
|
||||
|
||||
for _, qt := range []*router.QualityTracker{fast, slow} {
|
||||
// Build up history at the high end with 5 successes.
|
||||
for i := 0; i < 5; i++ {
|
||||
qt.Record("arm:m", router.TaskGeneration, true)
|
||||
}
|
||||
// One failure.
|
||||
qt.Record("arm:m", router.TaskGeneration, false)
|
||||
}
|
||||
|
||||
fastScore, _ := fast.Quality("arm:m", router.TaskGeneration)
|
||||
slowScore, _ := slow.Quality("arm:m", router.TaskGeneration)
|
||||
|
||||
if !(fastScore < slowScore) {
|
||||
t.Errorf("expected fast alpha (0.9) to drop quality faster than default (0.3): fast=%f slow=%f", fastScore, slowScore)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQualityTracker_CustomMinObservationsGatesScore(t *testing.T) {
|
||||
// minObs=10 means Quality should return hasData=false until 10
|
||||
// observations are recorded, even though the default would say
|
||||
// "yes" after 3.
|
||||
qt := router.NewQualityTracker(0, 10)
|
||||
for i := 0; i < 5; i++ {
|
||||
qt.Record("arm:m", router.TaskGeneration, true)
|
||||
}
|
||||
if _, hasData := qt.Quality("arm:m", router.TaskGeneration); hasData {
|
||||
t.Error("expected hasData=false at 5 observations with minObs=10")
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
qt.Record("arm:m", router.TaskGeneration, true)
|
||||
}
|
||||
if _, hasData := qt.Quality("arm:m", router.TaskGeneration); !hasData {
|
||||
t.Error("expected hasData=true after 10 observations with minObs=10")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
func TestQualityTracker_SnapshotRestore_RoundTrip(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
// Record some outcomes
|
||||
qt.Record("anthropic/claude-3-5-sonnet", router.TaskGeneration, true)
|
||||
qt.Record("anthropic/claude-3-5-sonnet", router.TaskGeneration, true)
|
||||
@@ -33,7 +33,7 @@ func TestQualityTracker_SnapshotRestore_RoundTrip(t *testing.T) {
|
||||
}
|
||||
|
||||
// Restore into a fresh tracker
|
||||
qt2 := router.NewQualityTracker()
|
||||
qt2 := router.NewQualityTracker(0, 0)
|
||||
qt2.Restore(restored)
|
||||
|
||||
// After restore, Quality() should return data (Count >= minObservations=3)
|
||||
@@ -47,7 +47,7 @@ func TestQualityTracker_SnapshotRestore_RoundTrip(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQualityTracker_Snapshot_Empty(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
snap := qt.Snapshot()
|
||||
if snap.Scores == nil {
|
||||
t.Error("scores map should be initialized (not nil)")
|
||||
@@ -58,7 +58,7 @@ func TestQualityTracker_Snapshot_Empty(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQualityTracker_ClassifierCounts_RecordAndSnapshot(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
qt.RecordClassifier(router.ClassifierHeuristic)
|
||||
qt.RecordClassifier(router.ClassifierSLM)
|
||||
qt.RecordClassifier(router.ClassifierSLM)
|
||||
@@ -92,7 +92,7 @@ func TestQualityTracker_ClassifierCounts_RecordAndSnapshot(t *testing.T) {
|
||||
if err := json.Unmarshal(data, &restored); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
qt2 := router.NewQualityTracker()
|
||||
qt2 := router.NewQualityTracker(0, 0)
|
||||
qt2.Restore(restored)
|
||||
if qt2.ClassifierCounts()[router.ClassifierSLM] != 2 {
|
||||
t.Errorf("restored slm count = %d, want 2", qt2.ClassifierCounts()[router.ClassifierSLM])
|
||||
@@ -107,7 +107,7 @@ func TestQualityTracker_Restore_BackCompat_NoClassifierCounts(t *testing.T) {
|
||||
if err := json.Unmarshal(legacy, &snap); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
qt.Restore(snap)
|
||||
if qt.ClassifierCounts() == nil {
|
||||
t.Error("ClassifierCounts() must return a non-nil map after restoring old snapshot")
|
||||
@@ -122,7 +122,7 @@ func TestQualityTracker_Restore_BackCompat_NoClassifierCounts(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestQualityTracker_Restore_Replaces(t *testing.T) {
|
||||
qt := router.NewQualityTracker()
|
||||
qt := router.NewQualityTracker(0, 0)
|
||||
qt.Record("arm-a", router.TaskDebug, true)
|
||||
qt.Record("arm-a", router.TaskDebug, true)
|
||||
qt.Record("arm-a", router.TaskDebug, true)
|
||||
|
||||
@@ -27,6 +27,7 @@ type Router struct {
|
||||
preferPolicy PreferPolicy
|
||||
|
||||
quality *QualityTracker
|
||||
bandit BanditParams
|
||||
}
|
||||
|
||||
// PreferPolicy biases the scoring step toward local or cloud arms.
|
||||
@@ -77,6 +78,41 @@ func (p PreferPolicy) String() string {
|
||||
|
||||
type Config struct {
|
||||
Logger *slog.Logger
|
||||
// Bandit tunes the selector's scoring knobs. Pass a zero value to
|
||||
// keep all pre-config behaviour byte-identical; set individual
|
||||
// fields to override the corresponding default.
|
||||
Bandit BanditParams
|
||||
}
|
||||
|
||||
// BanditParams controls the EMA quality tracker and score blend used
|
||||
// by the selector. Each field has a "use default" sentinel (0 for
|
||||
// floats and ints) so a zero-valued BanditParams is byte-identical to
|
||||
// the pre-config hardcoded constants. Defaults are defined in
|
||||
// resolveBanditParams below.
|
||||
type BanditParams struct {
|
||||
QualityAlpha float64
|
||||
MinObservations int
|
||||
ObservedWeight float64
|
||||
StrengthBonus float64
|
||||
}
|
||||
|
||||
// resolveBanditParams fills in the built-in defaults for any field
|
||||
// left at its zero value. Centralised so the same defaults apply
|
||||
// across NewQualityTracker, scoreArm, and any future caller.
|
||||
func resolveBanditParams(p BanditParams) BanditParams {
|
||||
if p.QualityAlpha == 0 {
|
||||
p.QualityAlpha = defaultQualityAlpha
|
||||
}
|
||||
if p.MinObservations == 0 {
|
||||
p.MinObservations = defaultMinObservations
|
||||
}
|
||||
if p.ObservedWeight == 0 {
|
||||
p.ObservedWeight = defaultObservedWeight
|
||||
}
|
||||
if p.StrengthBonus == 0 {
|
||||
p.StrengthBonus = defaultStrengthBonus
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func New(cfg Config) *Router {
|
||||
@@ -84,10 +120,12 @@ func New(cfg Config) *Router {
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
params := resolveBanditParams(cfg.Bandit)
|
||||
return &Router{
|
||||
arms: make(map[ArmID]*Arm),
|
||||
logger: logger,
|
||||
quality: NewQualityTracker(),
|
||||
quality: NewQualityTracker(params.QualityAlpha, params.MinObservations),
|
||||
bandit: params,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,7 +210,7 @@ func (r *Router) Select(task Task) RoutingDecision {
|
||||
}
|
||||
|
||||
// Select best
|
||||
best := selectBest(r.quality, feasible, task, r.preferPolicy)
|
||||
best := selectBest(r.quality, r.bandit, feasible, task, r.preferPolicy)
|
||||
if best == nil {
|
||||
return RoutingDecision{Error: fmt.Errorf("selection failed")}
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ func TestSelectBest_PrefersToolSupport(t *testing.T) {
|
||||
}
|
||||
|
||||
task := Task{Type: TaskGeneration, RequiresTools: true, Priority: PriorityNormal}
|
||||
best := selectBest(nil, []*Arm{withoutTools, withTools}, task, PreferAuto)
|
||||
best := selectBest(nil, BanditParams{}, []*Arm{withoutTools, withTools}, task, PreferAuto)
|
||||
|
||||
if best.ID != "a/with-tools" {
|
||||
t.Errorf("should prefer arm with tool support, got %s", best.ID)
|
||||
@@ -282,7 +282,7 @@ func TestSelectBest_PrefersThinkingForPlanning(t *testing.T) {
|
||||
}
|
||||
|
||||
task := Task{Type: TaskPlanning, RequiresTools: true, Priority: PriorityNormal, EstimatedTokens: 5000}
|
||||
best := selectBest(nil, []*Arm{noThinking, thinking}, task, PreferAuto)
|
||||
best := selectBest(nil, BanditParams{}, []*Arm{noThinking, thinking}, task, PreferAuto)
|
||||
|
||||
if best.ID != "a/thinking" {
|
||||
t.Errorf("should prefer thinking model for planning, got %s", best.ID)
|
||||
@@ -625,7 +625,7 @@ func TestSelectBest_SmallArmWinsTrivialTask(t *testing.T) {
|
||||
Capabilities: provider.Capabilities{ToolUse: false},
|
||||
}
|
||||
task := Task{Type: TaskExplain, ComplexityScore: 0.05, RequiresTools: false}
|
||||
got := selectBest(nil, []*Arm{cliArm, smallArm}, task, PreferAuto)
|
||||
got := selectBest(nil, BanditParams{}, []*Arm{cliArm, smallArm}, task, PreferAuto)
|
||||
if got != smallArm {
|
||||
t.Errorf("selectBest = %v, want smallArm", got)
|
||||
}
|
||||
@@ -647,7 +647,7 @@ func TestSelectBest_CLIAgentWinsComplexTask(t *testing.T) {
|
||||
Capabilities: provider.Capabilities{ToolUse: false},
|
||||
}
|
||||
task := Task{Type: TaskRefactor, ComplexityScore: 0.7, RequiresTools: true}
|
||||
got := selectBest(nil, []*Arm{cliArm, smallArm}, task, PreferAuto)
|
||||
got := selectBest(nil, BanditParams{}, []*Arm{cliArm, smallArm}, task, PreferAuto)
|
||||
if got != cliArm {
|
||||
t.Errorf("selectBest = %v, want cliArm", got)
|
||||
}
|
||||
@@ -672,21 +672,21 @@ func TestSelectBest_TierPreference(t *testing.T) {
|
||||
task := Task{Type: TaskGeneration, Priority: PriorityNormal, EstimatedTokens: 1000}
|
||||
|
||||
t.Run("CLI beats local and API", func(t *testing.T) {
|
||||
best := selectBest(nil, []*Arm{apiArm, localArm, cliArm}, task, PreferAuto)
|
||||
best := selectBest(nil, BanditParams{}, []*Arm{apiArm, localArm, cliArm}, task, PreferAuto)
|
||||
if best.ID != "subprocess/claude" {
|
||||
t.Errorf("want subprocess/claude (tier 0), got %s", best.ID)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("local beats API when no CLI", func(t *testing.T) {
|
||||
best := selectBest(nil, []*Arm{apiArm, localArm}, task, PreferAuto)
|
||||
best := selectBest(nil, BanditParams{}, []*Arm{apiArm, localArm}, task, PreferAuto)
|
||||
if best.ID != "ollama/llama3" {
|
||||
t.Errorf("want ollama/llama3 (tier 1), got %s", best.ID)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("API selected when only option", func(t *testing.T) {
|
||||
best := selectBest(nil, []*Arm{apiArm}, task, PreferAuto)
|
||||
best := selectBest(nil, BanditParams{}, []*Arm{apiArm}, task, PreferAuto)
|
||||
if best == nil || best.ID != "mistral/mistral-large" {
|
||||
t.Errorf("want mistral/mistral-large (tier 2), got %v", best)
|
||||
}
|
||||
|
||||
+13
-13
@@ -98,7 +98,7 @@ func armBaseTier(arm *Arm, task Task) int {
|
||||
//
|
||||
// Step 2 (fallback): walk tiers low→high. Within a tier, highest-scoring
|
||||
// arm wins.
|
||||
func selectBest(qt *QualityTracker, arms []*Arm, task Task, prefer PreferPolicy) *Arm {
|
||||
func selectBest(qt *QualityTracker, params BanditParams, arms []*Arm, task Task, prefer PreferPolicy) *Arm {
|
||||
if len(arms) == 0 {
|
||||
return nil
|
||||
}
|
||||
@@ -110,7 +110,7 @@ func selectBest(qt *QualityTracker, arms []*Arm, task Task, prefer PreferPolicy)
|
||||
}
|
||||
}
|
||||
if len(promoted) > 0 {
|
||||
return bestScored(qt, promoted, task, prefer)
|
||||
return bestScored(qt, params, promoted, task, prefer)
|
||||
}
|
||||
|
||||
// Walk tiers low→high. armTier returns up to 5 when prefer is set
|
||||
@@ -124,18 +124,18 @@ func selectBest(qt *QualityTracker, arms []*Arm, task Task, prefer PreferPolicy)
|
||||
}
|
||||
}
|
||||
if len(inTier) > 0 {
|
||||
return bestScored(qt, inTier, task, prefer)
|
||||
return bestScored(qt, params, inTier, task, prefer)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// bestScored returns the highest-scoring arm within a set.
|
||||
func bestScored(qt *QualityTracker, arms []*Arm, task Task, prefer PreferPolicy) *Arm {
|
||||
func bestScored(qt *QualityTracker, params BanditParams, arms []*Arm, task Task, prefer PreferPolicy) *Arm {
|
||||
var best *Arm
|
||||
bestScore := math.Inf(-1)
|
||||
for _, arm := range arms {
|
||||
score := scoreArm(qt, arm, task) * policyMultiplier(arm, prefer)
|
||||
score := scoreArm(qt, params, arm, task) * policyMultiplier(arm, prefer)
|
||||
if score > bestScore {
|
||||
bestScore = score
|
||||
best = arm
|
||||
@@ -172,13 +172,12 @@ func policyMultiplier(arm *Arm, p PreferPolicy) float64 {
|
||||
}
|
||||
}
|
||||
|
||||
// strengthScoreBonus is added to quality when an arm's Strengths list
|
||||
// matches the incoming task type. Tunable in one place.
|
||||
const strengthScoreBonus = 0.15
|
||||
|
||||
// scoreArm computes a quality/cost score for an arm.
|
||||
// When the quality tracker has sufficient observations, blends observed EMA
|
||||
// (70%) with heuristic (30%). Falls back to pure heuristic otherwise.
|
||||
// (default 70%) with heuristic (default 30%). Falls back to pure heuristic
|
||||
// otherwise. The blend ratio and strength bonus are tunable via
|
||||
// BanditParams (config: [router.bandit]); a zero-valued params falls back
|
||||
// to the built-in defaults.
|
||||
//
|
||||
// Strengths add a fixed bonus to quality when matching task.Type. CostWeight
|
||||
// dampens the cost penalty linearly:
|
||||
@@ -189,16 +188,17 @@ const strengthScoreBonus = 0.15
|
||||
// the original effectiveCost == cost. With CostWeight=0 cost is fully
|
||||
// ignored (effectiveCost = 1.0). Local arms with sub-1 raw costs are not
|
||||
// amplified by fractional weights (the linear formula stays monotone).
|
||||
func scoreArm(qt *QualityTracker, arm *Arm, task Task) float64 {
|
||||
func scoreArm(qt *QualityTracker, params BanditParams, arm *Arm, task Task) float64 {
|
||||
params = resolveBanditParams(params)
|
||||
hq := heuristicQuality(arm, task)
|
||||
quality := hq
|
||||
if qt != nil {
|
||||
if observed, hasData := qt.Quality(arm.ID, task.Type); hasData {
|
||||
quality = 0.7*observed + 0.3*hq
|
||||
quality = params.ObservedWeight*observed + (1-params.ObservedWeight)*hq
|
||||
}
|
||||
}
|
||||
if arm.HasStrength(task.Type) {
|
||||
quality += strengthScoreBonus
|
||||
quality += params.StrengthBonus
|
||||
}
|
||||
value := task.ValueScore()
|
||||
rawCost := effectiveCost(arm, task)
|
||||
|
||||
@@ -65,17 +65,17 @@ func TestScoreArm_CostWeightAffectsArmComparison(t *testing.T) {
|
||||
|
||||
// CostWeight=1.0: cost dominates, cheap arm wins.
|
||||
cheap.CostWeight, expensive.CostWeight = 1.0, 1.0
|
||||
if scoreArm(nil, cheap, task) <= scoreArm(nil, expensive, task) {
|
||||
if scoreArm(nil, BanditParams{}, cheap, task) <= scoreArm(nil, BanditParams{}, expensive, task) {
|
||||
t.Errorf("CostWeight=1.0: cheap arm should beat expensive arm; cheap=%v expensive=%v",
|
||||
scoreArm(nil, cheap, task), scoreArm(nil, expensive, task))
|
||||
scoreArm(nil, BanditParams{}, cheap, task), scoreArm(nil, BanditParams{}, expensive, task))
|
||||
}
|
||||
|
||||
// CostWeight=0.0: cost ignored, quality alone decides → expensive (better
|
||||
// context window) wins.
|
||||
cheap.CostWeight, expensive.CostWeight = 0.001, 0.001
|
||||
if scoreArm(nil, expensive, task) <= scoreArm(nil, cheap, task) {
|
||||
if scoreArm(nil, BanditParams{}, expensive, task) <= scoreArm(nil, BanditParams{}, cheap, task) {
|
||||
t.Errorf("CostWeight~0: higher-quality expensive arm should beat cheap arm; expensive=%v cheap=%v",
|
||||
scoreArm(nil, expensive, task), scoreArm(nil, cheap, task))
|
||||
scoreArm(nil, BanditParams{}, expensive, task), scoreArm(nil, BanditParams{}, cheap, task))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,8 +140,8 @@ func TestScoreArm_StrengthBonus(t *testing.T) {
|
||||
}
|
||||
task := Task{Type: TaskSecurityReview, EstimatedTokens: 5000, RequiresTools: true, Priority: PriorityNormal}
|
||||
|
||||
a := scoreArm(nil, withoutStrength, task)
|
||||
b := scoreArm(nil, withStrength, task)
|
||||
a := scoreArm(nil, BanditParams{}, withoutStrength, task)
|
||||
b := scoreArm(nil, BanditParams{}, withStrength, task)
|
||||
if !(b > a) {
|
||||
t.Errorf("strength-tagged arm score (%v) should exceed plain arm score (%v)", b, a)
|
||||
}
|
||||
@@ -160,8 +160,8 @@ func TestScoreArm_StrengthBonusDoesNotApplyToOtherTasks(t *testing.T) {
|
||||
}
|
||||
task := Task{Type: TaskDebug, EstimatedTokens: 5000, RequiresTools: true, Priority: PriorityNormal}
|
||||
|
||||
a := scoreArm(nil, plain, task)
|
||||
b := scoreArm(nil, tagged, task)
|
||||
a := scoreArm(nil, BanditParams{}, plain, task)
|
||||
b := scoreArm(nil, BanditParams{}, tagged, task)
|
||||
if math.Abs(a-b) > 1e-9 {
|
||||
t.Errorf("non-matching task should ignore Strengths: plain=%v tagged=%v", a, b)
|
||||
}
|
||||
@@ -184,7 +184,7 @@ func TestSelectBest_StrengthPromotedArmBeatsCLIAgent(t *testing.T) {
|
||||
}
|
||||
|
||||
task := Task{Type: TaskSecurityReview, EstimatedTokens: 5000, RequiresTools: true, Priority: PriorityNormal}
|
||||
got := selectBest(nil, []*Arm{cliAgent, opus}, task, PreferAuto)
|
||||
got := selectBest(nil, BanditParams{}, []*Arm{cliAgent, opus}, task, PreferAuto)
|
||||
if got == nil {
|
||||
t.Fatal("selectBest returned nil")
|
||||
}
|
||||
@@ -208,7 +208,7 @@ func TestSelectBest_EmptyStrengthsPreservesTierOrder(t *testing.T) {
|
||||
}
|
||||
|
||||
task := Task{Type: TaskSecurityReview, EstimatedTokens: 5000, RequiresTools: true, Priority: PriorityNormal}
|
||||
got := selectBest(nil, []*Arm{cliAgent, opus}, task, PreferAuto)
|
||||
got := selectBest(nil, BanditParams{}, []*Arm{cliAgent, opus}, task, PreferAuto)
|
||||
if got.ID != cliAgent.ID {
|
||||
t.Errorf("without Strengths, CLI-agent tier-1 should win; got %s", got.ID)
|
||||
}
|
||||
@@ -327,7 +327,7 @@ func TestSelectBest_MultiplePromotedArmsBestQualityWins(t *testing.T) {
|
||||
Strengths: []TaskType{TaskSecurityReview},
|
||||
}
|
||||
|
||||
qt := NewQualityTracker()
|
||||
qt := NewQualityTracker(0, 0)
|
||||
// armB has consistently succeeded — minObservations=3 is enough to flip
|
||||
// the score blend.
|
||||
for i := 0; i < 5; i++ {
|
||||
@@ -339,7 +339,7 @@ func TestSelectBest_MultiplePromotedArmsBestQualityWins(t *testing.T) {
|
||||
}
|
||||
|
||||
task := Task{Type: TaskSecurityReview, EstimatedTokens: 5000, RequiresTools: true, Priority: PriorityNormal}
|
||||
got := selectBest(qt, []*Arm{armA, armB}, task, PreferAuto)
|
||||
got := selectBest(qt, BanditParams{}, []*Arm{armA, armB}, task, PreferAuto)
|
||||
if got == nil {
|
||||
t.Fatal("selectBest returned nil")
|
||||
}
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
package security
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// AuditEvent records a single firewall action (block / redact / sanitize)
|
||||
// in a structured form intended for per-session post-mortem grepping.
|
||||
//
|
||||
// Discipline: this struct must never carry the raw bytes of any matched
|
||||
// secret. The Pattern field names the matcher (e.g. "anthropic_api_key",
|
||||
// "high_entropy"); TokenLen carries the length of the offending token so
|
||||
// the user can recognise it in a transcript without re-leaking it.
|
||||
type AuditEvent struct {
|
||||
// Timestamp is the wall-clock time of the event in UTC.
|
||||
Timestamp time.Time `json:"ts"`
|
||||
// Action is one of: "block", "redact", "warn", "unicode_sanitize".
|
||||
Action string `json:"action"`
|
||||
// Pattern is the human-readable matcher name (regex tag or
|
||||
// "high_entropy" / "unicode"). Never the matched bytes themselves.
|
||||
Pattern string `json:"pattern,omitempty"`
|
||||
// Source describes where in the data flow the event fired —
|
||||
// "message_text", "tool_result", "tool_call_args",
|
||||
// "system_prompt", etc.
|
||||
Source string `json:"source,omitempty"`
|
||||
// TokenLen is the length of the offending token (or chars
|
||||
// changed for unicode_sanitize). Length only, never the bytes.
|
||||
TokenLen int `json:"token_len,omitempty"`
|
||||
}
|
||||
|
||||
// AuditLogger appends AuditEvent records to a per-session JSON Lines
|
||||
// file. Safe for concurrent use. Writes are skipped while incognito
|
||||
// mode is active so the no-persistence contract is honoured.
|
||||
//
|
||||
// A nil *AuditLogger is a valid no-op — callers can use the same
|
||||
// `audit.Record(...)` shape whether or not auditing is configured.
|
||||
type AuditLogger struct {
|
||||
path string
|
||||
incognito *IncognitoMode
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// AuditLoggerConfig controls how AuditLogger is constructed.
|
||||
type AuditLoggerConfig struct {
|
||||
// Path is the full filesystem path to write JSONL events to.
|
||||
// Parent directories are created lazily on first successful Record.
|
||||
Path string
|
||||
// Incognito gates writes; when active, Record is a no-op.
|
||||
// Optional — pass nil to always persist.
|
||||
Incognito *IncognitoMode
|
||||
// Logger receives one Warn per write failure so the user sees
|
||||
// disk-full / permission errors instead of silently losing
|
||||
// audit records. Defaults to slog.Default() when nil.
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewAuditLogger builds an AuditLogger. Pass a zero Path to disable
|
||||
// auditing (returns nil).
|
||||
func NewAuditLogger(cfg AuditLoggerConfig) *AuditLogger {
|
||||
if cfg.Path == "" {
|
||||
return nil
|
||||
}
|
||||
logger := cfg.Logger
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
return &AuditLogger{
|
||||
path: cfg.Path,
|
||||
incognito: cfg.Incognito,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Record appends an event to the audit log. Safe to call on a nil
|
||||
// receiver (no-op). Skipped silently when incognito is active.
|
||||
// Write failures are logged at Warn level but do not propagate to
|
||||
// the caller — auditing is best-effort and must not crash the
|
||||
// scanner pipeline.
|
||||
func (a *AuditLogger) Record(ev AuditEvent) {
|
||||
if a == nil {
|
||||
return
|
||||
}
|
||||
if a.incognito != nil && a.incognito.Active() {
|
||||
return
|
||||
}
|
||||
if ev.Timestamp.IsZero() {
|
||||
ev.Timestamp = time.Now().UTC()
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(a.path), 0o700); err != nil {
|
||||
a.logger.Warn("audit: mkdir failed", "path", a.path, "err", err)
|
||||
return
|
||||
}
|
||||
f, err := os.OpenFile(a.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600)
|
||||
if err != nil {
|
||||
a.logger.Warn("audit: open failed", "path", a.path, "err", err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
if err := json.NewEncoder(f).Encode(ev); err != nil {
|
||||
a.logger.Warn("audit: encode failed", "path", a.path, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Path returns the file path the logger writes to. Empty when the
|
||||
// logger is disabled (nil receiver returns "").
|
||||
func (a *AuditLogger) Path() string {
|
||||
if a == nil {
|
||||
return ""
|
||||
}
|
||||
return a.path
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
package security
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func readAuditLines(t *testing.T, path string) []AuditEvent {
|
||||
t.Helper()
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open audit log: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
var events []AuditEvent
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
var ev AuditEvent
|
||||
if err := json.Unmarshal(sc.Bytes(), &ev); err != nil {
|
||||
t.Fatalf("decode line %q: %v", sc.Text(), err)
|
||||
}
|
||||
events = append(events, ev)
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
t.Fatalf("scan audit log: %v", err)
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func TestAuditLogger_NilReceiverIsNoop(t *testing.T) {
|
||||
var a *AuditLogger
|
||||
// Must not panic.
|
||||
a.Record(AuditEvent{Action: "block"})
|
||||
}
|
||||
|
||||
func TestAuditLogger_DisabledWhenPathEmpty(t *testing.T) {
|
||||
a := NewAuditLogger(AuditLoggerConfig{})
|
||||
if a != nil {
|
||||
t.Errorf("expected nil logger for empty path, got %v", a)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAuditLogger_AppendsJSONLines(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "audit.jsonl")
|
||||
a := NewAuditLogger(AuditLoggerConfig{Path: path})
|
||||
if a == nil {
|
||||
t.Fatal("expected non-nil logger")
|
||||
}
|
||||
|
||||
a.Record(AuditEvent{Action: "block", Pattern: "anthropic_api_key", Source: "tool_result", TokenLen: 51})
|
||||
a.Record(AuditEvent{Action: "redact", Pattern: "high_entropy", Source: "message_text", TokenLen: 42})
|
||||
|
||||
events := readAuditLines(t, path)
|
||||
if len(events) != 2 {
|
||||
t.Fatalf("expected 2 events, got %d", len(events))
|
||||
}
|
||||
if events[0].Action != "block" || events[0].Pattern != "anthropic_api_key" {
|
||||
t.Errorf("event 0 = %+v", events[0])
|
||||
}
|
||||
if events[0].Timestamp.IsZero() {
|
||||
t.Error("event 0 missing timestamp")
|
||||
}
|
||||
if events[1].Action != "redact" || events[1].TokenLen != 42 {
|
||||
t.Errorf("event 1 = %+v", events[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestAuditLogger_SkipsUnderIncognito(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "audit.jsonl")
|
||||
incog := NewIncognitoMode()
|
||||
a := NewAuditLogger(AuditLoggerConfig{Path: path, Incognito: incog})
|
||||
|
||||
incog.Activate()
|
||||
a.Record(AuditEvent{Action: "block", Pattern: "x"})
|
||||
|
||||
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||
t.Errorf("expected audit file to not exist under incognito, got err=%v", err)
|
||||
}
|
||||
|
||||
incog.Deactivate()
|
||||
a.Record(AuditEvent{Action: "block", Pattern: "y"})
|
||||
|
||||
events := readAuditLines(t, path)
|
||||
if len(events) != 1 {
|
||||
t.Fatalf("expected 1 event after deactivate, got %d", len(events))
|
||||
}
|
||||
if events[0].Pattern != "y" {
|
||||
t.Errorf("expected pattern=y (incognito event dropped), got %q", events[0].Pattern)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAuditLogger_CreatesParentDir(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "deeply", "nested", "audit.jsonl")
|
||||
a := NewAuditLogger(AuditLoggerConfig{Path: path})
|
||||
a.Record(AuditEvent{Action: "block"})
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
t.Errorf("expected audit file at %s, got err=%v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFirewall_RecordsRedactionToAudit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
auditPath := filepath.Join(dir, "audit.jsonl")
|
||||
audit := NewAuditLogger(AuditLoggerConfig{Path: auditPath})
|
||||
|
||||
fw := NewFirewall(FirewallConfig{
|
||||
ScanOutgoing: true,
|
||||
ScanToolResults: true,
|
||||
Audit: audit,
|
||||
})
|
||||
|
||||
// Anthropic key prefix is a built-in redact pattern; emit it
|
||||
// through the tool-result scanning path.
|
||||
cleaned := fw.ScanToolResult("here is the key sk-ant-abcdef1234567890abcdef1234567890abcdef")
|
||||
if !strings.Contains(cleaned, "[REDACTED]") {
|
||||
t.Errorf("expected [REDACTED] in cleaned content, got %q", cleaned)
|
||||
}
|
||||
|
||||
events := readAuditLines(t, auditPath)
|
||||
var sawAnthropicRedact bool
|
||||
for _, ev := range events {
|
||||
if ev.Action == "redact" && ev.Pattern == "anthropic_api_key" && ev.Source == "tool_result" {
|
||||
sawAnthropicRedact = true
|
||||
if ev.TokenLen == 0 {
|
||||
t.Errorf("expected non-zero TokenLen on redact event, got %+v", ev)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !sawAnthropicRedact {
|
||||
t.Errorf("expected an anthropic_api_key redact event in audit log, got %+v", events)
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@ type Firewall struct {
|
||||
scanner *Scanner
|
||||
incognito *IncognitoMode
|
||||
logger *slog.Logger
|
||||
audit *AuditLogger // optional; nil = no per-session audit log
|
||||
|
||||
// Config
|
||||
scanOutgoing bool
|
||||
@@ -27,6 +28,11 @@ type FirewallConfig struct {
|
||||
EntropyThreshold float64
|
||||
EntropySafelist []string
|
||||
Logger *slog.Logger
|
||||
// Audit is the optional per-session audit logger. Set via
|
||||
// SetAudit after the session ID is known — the firewall is
|
||||
// typically constructed before the session ID is generated.
|
||||
// nil is safe; auditing simply turns into a no-op.
|
||||
Audit *AuditLogger
|
||||
}
|
||||
|
||||
func NewFirewall(cfg FirewallConfig) *Firewall {
|
||||
@@ -50,11 +56,20 @@ func NewFirewall(cfg FirewallConfig) *Firewall {
|
||||
scanner: scanner,
|
||||
incognito: NewIncognitoMode(),
|
||||
logger: logger,
|
||||
audit: cfg.Audit,
|
||||
scanOutgoing: cfg.ScanOutgoing,
|
||||
scanToolResults: cfg.ScanToolResults,
|
||||
}
|
||||
}
|
||||
|
||||
// SetAudit attaches an AuditLogger after construction. The firewall
|
||||
// is typically built before the session ID exists, so callers usually
|
||||
// construct the AuditLogger later and inject it via this setter.
|
||||
// Pass nil to disable auditing.
|
||||
func (f *Firewall) SetAudit(a *AuditLogger) {
|
||||
f.audit = a
|
||||
}
|
||||
|
||||
// Incognito returns the incognito mode controller.
|
||||
func (f *Firewall) Incognito() *IncognitoMode {
|
||||
return f.incognito
|
||||
@@ -131,7 +146,16 @@ func (f *Firewall) scanMessage(m message.Message) message.Message {
|
||||
|
||||
func (f *Firewall) scanAndRedact(content, source string) string {
|
||||
// Unicode sanitization first
|
||||
originalLen := len(content)
|
||||
content = SanitizeUnicode(content)
|
||||
if delta := originalLen - len(content); delta != 0 {
|
||||
f.audit.Record(AuditEvent{
|
||||
Action: "unicode_sanitize",
|
||||
Pattern: "unicode",
|
||||
Source: source,
|
||||
TokenLen: delta,
|
||||
})
|
||||
}
|
||||
|
||||
// Secret scanning
|
||||
matches := f.scanner.Scan(content)
|
||||
@@ -146,6 +170,12 @@ func (f *Firewall) scanAndRedact(content, source string) string {
|
||||
"pattern", m.Pattern,
|
||||
"source", source,
|
||||
)
|
||||
f.audit.Record(AuditEvent{
|
||||
Action: "block",
|
||||
Pattern: m.Pattern,
|
||||
Source: source,
|
||||
TokenLen: m.End - m.Start,
|
||||
})
|
||||
return "[BLOCKED: content contained a secret]"
|
||||
default:
|
||||
f.logger.Debug("secret redacted",
|
||||
@@ -153,6 +183,12 @@ func (f *Firewall) scanAndRedact(content, source string) string {
|
||||
"action", m.Action,
|
||||
"source", source,
|
||||
)
|
||||
f.audit.Record(AuditEvent{
|
||||
Action: string(m.Action),
|
||||
Pattern: m.Pattern,
|
||||
Source: source,
|
||||
TokenLen: m.End - m.Start,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user