Files
vikingowl 8b9bdc2978 feat(security): per-session firewall audit log
New AuditLogger writes one JSON line per firewall action to
<projectRoot>/.gnoma/sessions/<sessionID>/audit.jsonl so a user can
grep 'what did the firewall do this session?' after the fact.

Records 'block', 'redact', 'warn', and 'unicode_sanitize' events with
the matcher name, source (tool_result / message_text / etc.), and
token length. Discipline: never the bytes themselves — only the
matcher name and the length, matching the README's scope-note
promise about audit data.

Plumbing:
- Firewall gains an audit *AuditLogger field plus SetAudit setter.
  The firewall is constructed before the session ID exists, so the
  audit logger is wired post-hoc once main.go has the sessionID.
- Honours incognito: Record is a silent no-op when the firewall's
  IncognitoMode is active, preserving the no-persistence contract.
- Tolerant of fs errors: mkdir / open / encode failures log a Warn
  but never propagate; the scan pipeline must not depend on audit
  succeeding.
- Nil receiver is a valid no-op so callers don't need nil-guards
  around every Record.

Tracks 'Security boundary — per-session audit log' from the
v0.3.0 r/SideProject launch thread (u/Secret_Theme3192,
2026-05-24). Per-host egress allowlist remains separately tracked
pending the commenter's reply on host-level vs per-tool semantics.
2026-05-24 22:47:28 +02:00

197 lines
5.2 KiB
Go

package security
import (
"encoding/json"
"log/slog"
"somegit.dev/Owlibou/gnoma/internal/message"
)
// Firewall scans outgoing LLM requests and incoming tool results
// for secrets, sensitive data, and dangerous Unicode. Core security
// layer — not a plugin, everyone benefits by default.
type Firewall struct {
scanner *Scanner
incognito *IncognitoMode
logger *slog.Logger
audit *AuditLogger // optional; nil = no per-session audit log
// Config
scanOutgoing bool
scanToolResults bool
}
type FirewallConfig struct {
ScanOutgoing bool
ScanToolResults bool
RedactHighEntropy bool
EntropyThreshold float64
EntropySafelist []string
Logger *slog.Logger
// Audit is the optional per-session audit logger. Set via
// SetAudit after the session ID is known — the firewall is
// typically constructed before the session ID is generated.
// nil is safe; auditing simply turns into a no-op.
Audit *AuditLogger
}
func NewFirewall(cfg FirewallConfig) *Firewall {
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
scanner := NewScanner(cfg.EntropyThreshold, cfg.RedactHighEntropy)
scanner.SetLogger(logger)
// Validate safelist names at the config boundary so a typo surfaces
// loudly instead of silently disabling FP reduction.
entries, unknown := splitSafelistNames(cfg.EntropySafelist)
for _, name := range unknown {
logger.Warn("ignoring unknown entropy safelist name",
"name", name,
"hint", "valid names: uuid, sha_hex, iso8601, url",
)
}
scanner.safelist = entries
return &Firewall{
scanner: scanner,
incognito: NewIncognitoMode(),
logger: logger,
audit: cfg.Audit,
scanOutgoing: cfg.ScanOutgoing,
scanToolResults: cfg.ScanToolResults,
}
}
// SetAudit attaches an AuditLogger after construction. The firewall
// is typically built before the session ID exists, so callers usually
// construct the AuditLogger later and inject it via this setter.
// Pass nil to disable auditing.
func (f *Firewall) SetAudit(a *AuditLogger) {
f.audit = a
}
// Incognito returns the incognito mode controller.
func (f *Firewall) Incognito() *IncognitoMode {
return f.incognito
}
// Scanner returns the secret scanner for adding custom patterns.
func (f *Firewall) Scanner() *Scanner {
return f.scanner
}
// ScanOutgoingMessages scans all message content before sending to provider.
// Returns cleaned messages with secrets redacted.
func (f *Firewall) ScanOutgoingMessages(msgs []message.Message) []message.Message {
if !f.scanOutgoing {
return msgs
}
cleaned := make([]message.Message, len(msgs))
for i, m := range msgs {
cleaned[i] = f.scanMessage(m)
}
return cleaned
}
// ScanToolResult scans a tool execution result for secrets.
// Returns the cleaned content.
func (f *Firewall) ScanToolResult(content string) string {
if !f.scanToolResults {
return content
}
return f.scanAndRedact(content, "tool_result")
}
// ScanSystemPrompt scans the system prompt for accidentally embedded secrets.
func (f *Firewall) ScanSystemPrompt(prompt string) string {
return f.scanAndRedact(prompt, "system_prompt")
}
func (f *Firewall) scanMessage(m message.Message) message.Message {
cleaned := message.Message{Role: m.Role}
cleaned.Content = make([]message.Content, len(m.Content))
for i, c := range m.Content {
switch c.Type {
case message.ContentText:
cleaned.Content[i] = message.NewTextContent(
f.scanAndRedact(c.Text, "message_text"),
)
case message.ContentToolResult:
if c.ToolResult != nil {
tr := *c.ToolResult
tr.Content = f.scanAndRedact(tr.Content, "tool_result")
cleaned.Content[i] = message.NewToolResultContent(tr)
} else {
cleaned.Content[i] = c
}
case message.ContentToolCall:
// Scan LLM-generated tool arguments for accidentally embedded secrets
if c.ToolCall != nil {
tc := *c.ToolCall
scanned := f.scanAndRedact(string(tc.Arguments), "tool_call_args")
tc.Arguments = json.RawMessage(scanned)
cleaned.Content[i] = message.NewToolCallContent(tc)
} else {
cleaned.Content[i] = c
}
default:
// Thinking blocks — pass through
cleaned.Content[i] = c
}
}
return cleaned
}
func (f *Firewall) scanAndRedact(content, source string) string {
// Unicode sanitization first
originalLen := len(content)
content = SanitizeUnicode(content)
if delta := originalLen - len(content); delta != 0 {
f.audit.Record(AuditEvent{
Action: "unicode_sanitize",
Pattern: "unicode",
Source: source,
TokenLen: delta,
})
}
// Secret scanning
matches := f.scanner.Scan(content)
if len(matches) == 0 {
return content
}
for _, m := range matches {
switch m.Action {
case ActionBlock:
f.logger.Error("blocked: secret detected",
"pattern", m.Pattern,
"source", source,
)
f.audit.Record(AuditEvent{
Action: "block",
Pattern: m.Pattern,
Source: source,
TokenLen: m.End - m.Start,
})
return "[BLOCKED: content contained a secret]"
default:
f.logger.Debug("secret redacted",
"pattern", m.Pattern,
"action", m.Action,
"source", source,
)
f.audit.Record(AuditEvent{
Action: string(m.Action),
Pattern: m.Pattern,
Source: source,
TokenLen: m.End - m.Start,
})
}
}
return Redact(content, matches)
}