diff --git a/cmd/gnoma/main.go b/cmd/gnoma/main.go index ce10268..8809506 100644 --- a/cmd/gnoma/main.go +++ b/cmd/gnoma/main.go @@ -12,6 +12,7 @@ import ( "somegit.dev/Owlibou/gnoma/internal/engine" "somegit.dev/Owlibou/gnoma/internal/provider" + "somegit.dev/Owlibou/gnoma/internal/security" anthropicprov "somegit.dev/Owlibou/gnoma/internal/provider/anthropic" "somegit.dev/Owlibou/gnoma/internal/provider/mistral" googleprov "somegit.dev/Owlibou/gnoma/internal/provider/google" @@ -80,10 +81,19 @@ func main() { // Re-register bash tool with aliases reg.Register(bash.New(bash.WithAliases(aliases))) + // Create firewall + fw := security.NewFirewall(security.FirewallConfig{ + ScanOutgoing: true, + ScanToolResults: true, + EntropyThreshold: 4.5, + Logger: logger, + }) + // Create engine eng, err := engine.New(engine.Config{ Provider: prov, Tools: reg, + Firewall: fw, System: *system, Model: *model, MaxTurns: *maxTurns, diff --git a/go.mod b/go.mod index de2fe5a..e0181b3 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/VikingOwl91/mistral-go-sdk v1.2.1 github.com/anthropics/anthropic-sdk-go v1.29.0 github.com/openai/openai-go v1.12.0 + golang.org/x/text v0.27.0 google.golang.org/genai v1.52.1 ) @@ -28,7 +29,6 @@ require ( golang.org/x/net v0.41.0 // indirect golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.34.0 // indirect - golang.org/x/text v0.27.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect google.golang.org/grpc v1.66.2 // indirect google.golang.org/protobuf v1.34.2 // indirect diff --git a/internal/engine/engine.go b/internal/engine/engine.go index ade20ad..8899da5 100644 --- a/internal/engine/engine.go +++ b/internal/engine/engine.go @@ -7,6 +7,7 @@ import ( "somegit.dev/Owlibou/gnoma/internal/message" "somegit.dev/Owlibou/gnoma/internal/provider" + "somegit.dev/Owlibou/gnoma/internal/security" "somegit.dev/Owlibou/gnoma/internal/tool" ) @@ -14,9 +15,10 @@ import ( type Config struct { Provider provider.Provider Tools *tool.Registry - System string // system prompt - Model string // override model (empty = provider default) - MaxTurns int // safety limit on tool loops (0 = unlimited) + Firewall *security.Firewall // nil = no scanning + System string // system prompt + Model string // override model (empty = provider default) + MaxTurns int // safety limit on tool loops (0 = unlimited) Logger *slog.Logger } diff --git a/internal/engine/loop.go b/internal/engine/loop.go index c02e512..a9a5378 100644 --- a/internal/engine/loop.go +++ b/internal/engine/loop.go @@ -115,10 +115,18 @@ func (e *Engine) runLoop(ctx context.Context, cb Callback) (*Turn, error) { } func (e *Engine) buildRequest(ctx context.Context) provider.Request { + // Scan messages through firewall if configured + messages := e.history + systemPrompt := e.cfg.System + if e.cfg.Firewall != nil { + messages = e.cfg.Firewall.ScanOutgoingMessages(messages) + systemPrompt = e.cfg.Firewall.ScanSystemPrompt(systemPrompt) + } + req := provider.Request{ Model: e.cfg.Model, - SystemPrompt: e.cfg.System, - Messages: e.history, + SystemPrompt: systemPrompt, + Messages: messages, } // Only include tools if the model supports them @@ -169,17 +177,23 @@ func (e *Engine) executeTools(ctx context.Context, calls []message.ToolCall, cb continue } + // Scan tool result through firewall + output := result.Output + if e.cfg.Firewall != nil { + output = e.cfg.Firewall.ScanToolResult(output) + } + // Emit tool result as a text delta event so the UI can show it if cb != nil { cb(stream.Event{ Type: stream.EventTextDelta, - Text: fmt.Sprintf("\n[tool:%s] %s\n", call.Name, truncate(result.Output, 500)), + Text: fmt.Sprintf("\n[tool:%s] %s\n", call.Name, truncate(output, 500)), }) } results = append(results, message.ToolResult{ ToolCallID: call.ID, - Content: result.Output, + Content: output, }) } diff --git a/internal/security/firewall.go b/internal/security/firewall.go new file mode 100644 index 0000000..c04bb61 --- /dev/null +++ b/internal/security/firewall.go @@ -0,0 +1,126 @@ +package security + +import ( + "log/slog" + + "somegit.dev/Owlibou/gnoma/internal/message" +) + +// Firewall scans outgoing LLM requests and incoming tool results +// for secrets, sensitive data, and dangerous Unicode. Core security +// layer — not a plugin, everyone benefits by default. +type Firewall struct { + scanner *Scanner + incognito *IncognitoMode + logger *slog.Logger + + // Config + scanOutgoing bool + scanToolResults bool +} + +type FirewallConfig struct { + ScanOutgoing bool + ScanToolResults bool + EntropyThreshold float64 + Logger *slog.Logger +} + +func NewFirewall(cfg FirewallConfig) *Firewall { + logger := cfg.Logger + if logger == nil { + logger = slog.Default() + } + return &Firewall{ + scanner: NewScanner(cfg.EntropyThreshold), + incognito: NewIncognitoMode(), + logger: logger, + scanOutgoing: cfg.ScanOutgoing, + scanToolResults: cfg.ScanToolResults, + } +} + +// Incognito returns the incognito mode controller. +func (f *Firewall) Incognito() *IncognitoMode { + return f.incognito +} + +// Scanner returns the secret scanner for adding custom patterns. +func (f *Firewall) Scanner() *Scanner { + return f.scanner +} + +// ScanOutgoingMessages scans all message content before sending to provider. +// Returns cleaned messages with secrets redacted. +func (f *Firewall) ScanOutgoingMessages(msgs []message.Message) []message.Message { + if !f.scanOutgoing { + return msgs + } + + cleaned := make([]message.Message, len(msgs)) + for i, m := range msgs { + cleaned[i] = f.scanMessage(m) + } + return cleaned +} + +// ScanToolResult scans a tool execution result for secrets. +// Returns the cleaned content. +func (f *Firewall) ScanToolResult(content string) string { + if !f.scanToolResults { + return content + } + return f.scanAndRedact(content, "tool_result") +} + +// ScanSystemPrompt scans the system prompt for accidentally embedded secrets. +func (f *Firewall) ScanSystemPrompt(prompt string) string { + return f.scanAndRedact(prompt, "system_prompt") +} + +func (f *Firewall) scanMessage(m message.Message) message.Message { + cleaned := message.Message{Role: m.Role} + cleaned.Content = make([]message.Content, len(m.Content)) + + for i, c := range m.Content { + switch c.Type { + case message.ContentText: + cleaned.Content[i] = message.NewTextContent( + f.scanAndRedact(c.Text, "message_text"), + ) + case message.ContentToolResult: + if c.ToolResult != nil { + tr := *c.ToolResult + tr.Content = f.scanAndRedact(tr.Content, "tool_result") + cleaned.Content[i] = message.NewToolResultContent(tr) + } else { + cleaned.Content[i] = c + } + default: + // Tool calls, thinking blocks — pass through + cleaned.Content[i] = c + } + } + return cleaned +} + +func (f *Firewall) scanAndRedact(content, source string) string { + // Unicode sanitization first + content = SanitizeUnicode(content) + + // Secret scanning + matches := f.scanner.Scan(content) + if len(matches) == 0 { + return content + } + + for _, m := range matches { + f.logger.Warn("secret detected", + "pattern", m.Pattern, + "action", m.Action, + "source", source, + ) + } + + return Redact(content, matches) +} diff --git a/internal/security/incognito.go b/internal/security/incognito.go new file mode 100644 index 0000000..6827af5 --- /dev/null +++ b/internal/security/incognito.go @@ -0,0 +1,57 @@ +package security + +import "sync" + +// IncognitoMode controls privacy-sensitive behavior. +// When active: no persistence, no learning, no content logging. +type IncognitoMode struct { + mu sync.RWMutex + active bool + + // Options + LocalOnly bool // only route to local arms when incognito +} + +func NewIncognitoMode() *IncognitoMode { + return &IncognitoMode{} +} + +func (m *IncognitoMode) Activate() { + m.mu.Lock() + defer m.mu.Unlock() + m.active = true +} + +func (m *IncognitoMode) Deactivate() { + m.mu.Lock() + defer m.mu.Unlock() + m.active = false +} + +func (m *IncognitoMode) Toggle() bool { + m.mu.Lock() + defer m.mu.Unlock() + m.active = !m.active + return m.active +} + +func (m *IncognitoMode) Active() bool { + m.mu.RLock() + defer m.mu.RUnlock() + return m.active +} + +// ShouldPersist returns false when incognito is active. +func (m *IncognitoMode) ShouldPersist() bool { + return !m.Active() +} + +// ShouldLearn returns false when incognito is active (no router feedback). +func (m *IncognitoMode) ShouldLearn() bool { + return !m.Active() +} + +// ShouldLogContent returns false when incognito is active. +func (m *IncognitoMode) ShouldLogContent() bool { + return !m.Active() +} diff --git a/internal/security/redactor.go b/internal/security/redactor.go new file mode 100644 index 0000000..d1e3159 --- /dev/null +++ b/internal/security/redactor.go @@ -0,0 +1,51 @@ +package security + +import "sort" + +const redactedPlaceholder = "[REDACTED]" + +// Redact replaces detected secrets in content with [REDACTED]. +// Preserves surrounding context (quotes, delimiters). +func Redact(content string, matches []SecretMatch) string { + if len(matches) == 0 { + return content + } + + // Filter to redact-only and sort by start position ascending + var redacts []SecretMatch + for _, m := range matches { + if m.Action == ActionRedact && m.Start >= 0 && m.End <= len(content) && m.Start < m.End { + redacts = append(redacts, m) + } + } + if len(redacts) == 0 { + return content + } + + sort.Slice(redacts, func(i, j int) bool { + return redacts[i].Start < redacts[j].Start + }) + + // Merge overlapping ranges + merged := []SecretMatch{redacts[0]} + for _, m := range redacts[1:] { + last := &merged[len(merged)-1] + if m.Start <= last.End { + // Overlapping — extend the range + if m.End > last.End { + last.End = m.End + } + } else { + merged = append(merged, m) + } + } + + // Build result replacing merged ranges from end to start + result := []byte(content) + for i := len(merged) - 1; i >= 0; i-- { + m := merged[i] + replacement := []byte(redactedPlaceholder) + result = append(result[:m.Start], append(replacement, result[m.End:]...)...) + } + return string(result) +} diff --git a/internal/security/sanitize.go b/internal/security/sanitize.go new file mode 100644 index 0000000..cecfca2 --- /dev/null +++ b/internal/security/sanitize.go @@ -0,0 +1,57 @@ +package security + +import ( + "strings" + "unicode" + + "golang.org/x/text/unicode/norm" +) + +// SanitizeUnicode removes potentially dangerous invisible Unicode characters. +// Applies NFKC normalization then strips format (Cf), private use (Co), +// and unassigned (Cn) characters. Prevents ASCII smuggling and hidden +// prompt injection attacks. +func SanitizeUnicode(s string) string { + // Step 1: NFKC normalization (handles composed characters) + s = norm.NFKC.String(s) + + // Step 2: Strip dangerous Unicode categories + var b strings.Builder + b.Grow(len(s)) + for _, r := range s { + if shouldStrip(r) { + continue + } + b.WriteRune(r) + } + return b.String() +} + +func shouldStrip(r rune) bool { + // Keep normal printable characters, whitespace, and common symbols + if r <= 0x7E && r >= 0x20 { + return false // ASCII printable + } + if r == '\n' || r == '\t' || r == '\r' { + return false // common whitespace + } + + // Strip Unicode format characters (Cf) — invisible formatting + if unicode.Is(unicode.Cf, r) { + return true + } + // Strip private use (Co) — unregistered characters + if unicode.Is(unicode.Co, r) { + return true + } + + // Strip specific dangerous ranges + switch { + case r >= 0xE0000 && r <= 0xE007F: // Unicode Tag characters (ASCII smuggling) + return true + case r >= 0xFFF0 && r <= 0xFFFD: // Specials (interlinear annotation, etc.) + return true + } + + return false +} diff --git a/internal/security/scanner.go b/internal/security/scanner.go new file mode 100644 index 0000000..8741d84 --- /dev/null +++ b/internal/security/scanner.go @@ -0,0 +1,215 @@ +package security + +import ( + "math" + "regexp" + "strings" +) + +// ScanAction determines what to do when a secret is found. +type ScanAction string + +const ( + ActionRedact ScanAction = "redact" + ActionBlock ScanAction = "block" + ActionWarn ScanAction = "warn" +) + +// SecretPattern defines a pattern for detecting secrets. +type SecretPattern struct { + Name string + Regex *regexp.Regexp + Action ScanAction +} + +// SecretMatch represents a detected secret in content. +type SecretMatch struct { + Pattern string // which pattern matched + Action ScanAction + Start int + End int +} + +// Scanner detects secrets and sensitive data in content. +type Scanner struct { + patterns []SecretPattern + entropyThreshold float64 +} + +func NewScanner(entropyThreshold float64) *Scanner { + if entropyThreshold <= 0 { + entropyThreshold = 4.5 + } + return &Scanner{ + patterns: defaultPatterns(), + entropyThreshold: entropyThreshold, + } +} + +// AddPattern adds a custom detection pattern. +func (s *Scanner) AddPattern(name, regex string, action ScanAction) error { + re, err := regexp.Compile(regex) + if err != nil { + return err + } + s.patterns = append(s.patterns, SecretPattern{ + Name: name, + Regex: re, + Action: action, + }) + return nil +} + +// Scan checks content for secrets. Returns all matches found. +func (s *Scanner) Scan(content string) []SecretMatch { + var matches []SecretMatch + seen := make(map[string]bool) // deduplicate by position + + for _, p := range s.patterns { + locs := p.Regex.FindAllStringIndex(content, -1) + for _, loc := range locs { + key := strings.Join([]string{p.Name, string(rune(loc[0])), string(rune(loc[1]))}, ":") + if seen[key] { + continue + } + seen[key] = true + matches = append(matches, SecretMatch{ + Pattern: p.Name, + Action: p.Action, + Start: loc[0], + End: loc[1], + }) + } + } + + // Entropy-based detection for unknown secret formats + matches = append(matches, s.scanEntropy(content)...) + + return matches +} + +// HasSecrets returns true if any secrets are detected. +func (s *Scanner) HasSecrets(content string) bool { + return len(s.Scan(content)) > 0 +} + +// scanEntropy detects high-entropy strings that might be secrets. +func (s *Scanner) scanEntropy(content string) []SecretMatch { + var matches []SecretMatch + // Check each word-like token that's long enough to be a secret + words := entropyTokenize(content) + for _, w := range words { + if len(w.text) < 20 { // secrets are typically 20+ chars + continue + } + entropy := shannonEntropy(w.text) + if entropy >= s.entropyThreshold { + matches = append(matches, SecretMatch{ + Pattern: "high_entropy", + Action: ActionWarn, + Start: w.start, + End: w.start + len(w.text), + }) + } + } + return matches +} + +type token struct { + text string + start int +} + +func entropyTokenize(s string) []token { + var tokens []token + start := -1 + for i, r := range s { + isTokenChar := (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || + (r >= '0' && r <= '9') || r == '_' || r == '-' || r == '/' + if isTokenChar { + if start == -1 { + start = i + } + } else { + if start != -1 { + tokens = append(tokens, token{text: s[start:i], start: start}) + start = -1 + } + } + } + if start != -1 { + tokens = append(tokens, token{text: s[start:], start: start}) + } + return tokens +} + +// shannonEntropy calculates the Shannon entropy of a string. +func shannonEntropy(s string) float64 { + if len(s) == 0 { + return 0 + } + freq := make(map[rune]float64) + for _, r := range s { + freq[r]++ + } + n := float64(len([]rune(s))) + var entropy float64 + for _, count := range freq { + p := count / n + if p > 0 { + entropy -= p * math.Log2(p) + } + } + return entropy +} + +// defaultPatterns returns gitleaks-derived patterns for common secret formats. +func defaultPatterns() []SecretPattern { + patterns := []struct { + name string + regex string + }{ + // Anthropic + {"anthropic_api_key", `sk-ant-(?:api)?[a-zA-Z0-9_-]{20,}`}, + // OpenAI + {"openai_api_key", `sk-(?:proj-)?[a-zA-Z0-9_-]{20,}`}, + // Google + {"google_api_key", `AIza[a-zA-Z0-9_-]{35}`}, + // AWS + {"aws_access_key", `(?:AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}`}, + {"aws_secret_key", `(?i)aws_secret_access_key\s*=\s*[a-zA-Z0-9/+=]{40}`}, + // GitHub + {"github_pat", `gh[pousr]_[a-zA-Z0-9]{36,}`}, + {"github_fine_grained", `github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}`}, + // GitLab + {"gitlab_pat", `glpat-[a-zA-Z0-9_-]{20,}`}, + // Slack + {"slack_token", `xox[bpears]-[a-zA-Z0-9-]{10,}`}, + // Stripe + {"stripe_key", `(?:sk|pk)_(?:live|test)_[a-zA-Z0-9]{24,}`}, + // Private keys + {"private_key", `-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----`}, + // Generic secrets in assignments + {"generic_secret_assign", `(?i)(?:password|secret|token|api_key|apikey|auth)\s*[:=]\s*['"][a-zA-Z0-9_/+=\-]{8,}['"]`}, + // Mistral + {"mistral_api_key", `[a-zA-Z0-9]{32}` + `(?:` + `[a-zA-Z0-9]{0}` + `)`}, // 32-char hex-like strings caught by entropy + // Database URLs with credentials + {"database_url", `(?i)(?:postgres|mysql|mongodb|redis)://[^:]+:[^@]+@`}, + // .env file patterns + {"env_secret", `(?i)^[A-Z_]{2,}(?:_KEY|_SECRET|_TOKEN|_PASSWORD)\s*=\s*.{8,}$`}, + } + + var result []SecretPattern + for _, p := range patterns { + re, err := regexp.Compile(p.regex) + if err != nil { + continue // skip invalid patterns + } + result = append(result, SecretPattern{ + Name: p.name, + Regex: re, + Action: ActionRedact, + }) + } + return result +} diff --git a/internal/security/security_test.go b/internal/security/security_test.go new file mode 100644 index 0000000..456db71 --- /dev/null +++ b/internal/security/security_test.go @@ -0,0 +1,377 @@ +package security + +import ( + "strings" + "testing" + + "somegit.dev/Owlibou/gnoma/internal/message" +) + +// --- Scanner --- + +func TestScanner_DetectsAnthropicKey(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan("my key is sk-ant-api03-abcdefghijklmnopqrstuvwxyz") + if len(matches) == 0 { + t.Error("should detect Anthropic API key") + } + if matches[0].Pattern != "anthropic_api_key" { + t.Errorf("pattern = %q, want anthropic_api_key", matches[0].Pattern) + } +} + +func TestScanner_DetectsOpenAIKey(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan("key: sk-proj-abcdefghijklmnopqrstuvwxyz123456") + if len(matches) == 0 { + t.Error("should detect OpenAI API key") + } +} + +func TestScanner_DetectsAWSKey(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan("AKIAIOSFODNN7EXAMPLE") + if len(matches) == 0 { + t.Error("should detect AWS access key") + } + if matches[0].Pattern != "aws_access_key" { + t.Errorf("pattern = %q", matches[0].Pattern) + } +} + +func TestScanner_DetectsGitHubPAT(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan("token: ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij") + hasGH := false + for _, m := range matches { + if m.Pattern == "github_pat" { + hasGH = true + break + } + } + if !hasGH { + t.Error("should detect GitHub PAT") + } +} + +func TestScanner_DetectsPrivateKey(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan("-----BEGIN RSA PRIVATE KEY-----\nMIIE...") + hasKey := false + for _, m := range matches { + if m.Pattern == "private_key" { + hasKey = true + break + } + } + if !hasKey { + t.Error("should detect private key header") + } +} + +func TestScanner_DetectsGenericSecret(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan(`password = "supersecretpassword123"`) + hasGeneric := false + for _, m := range matches { + if m.Pattern == "generic_secret_assign" { + hasGeneric = true + break + } + } + if !hasGeneric { + t.Error("should detect generic secret assignment") + } +} + +func TestScanner_DetectsDatabaseURL(t *testing.T) { + s := NewScanner(4.5) + matches := s.Scan("postgres://admin:secretpass@db.example.com:5432/mydb") + hasDB := false + for _, m := range matches { + if m.Pattern == "database_url" { + hasDB = true + break + } + } + if !hasDB { + t.Error("should detect database URL with credentials") + } +} + +func TestScanner_NoFalsePositives(t *testing.T) { + s := NewScanner(6.0) // high entropy threshold to avoid false positives + safe := []string{ + "hello world", + "func main() {}", + "https://example.com/path", + "go test ./...", + "The quick brown fox jumps over the lazy dog", + } + for _, text := range safe { + matches := s.Scan(text) + if len(matches) > 0 { + t.Errorf("false positive on %q: %v", text, matches[0].Pattern) + } + } +} + +func TestScanner_Entropy(t *testing.T) { + s := NewScanner(4.0) // lower threshold for testing + + // High entropy string (random-looking) + matches := s.Scan("token: aB3dE5fG7hI9jK1lM3nO5pQ7rS9tU1v") + hasEntropy := false + for _, m := range matches { + if m.Pattern == "high_entropy" { + hasEntropy = true + break + } + } + if !hasEntropy { + t.Error("should detect high entropy string") + } +} + +func TestShannonEntropy(t *testing.T) { + tests := []struct { + input string + minBits float64 + maxBits float64 + }{ + {"aaaa", 0, 0.1}, // very low entropy + {"abcd", 1.9, 2.1}, // 4 unique chars = ~2 bits + {"abcdefgh", 2.9, 3.1}, // 8 unique = ~3 bits + {"aB3dE5fG7hI9jK", 3.5, 4.5}, // mixed case + digits + } + for _, tt := range tests { + e := shannonEntropy(tt.input) + if e < tt.minBits || e > tt.maxBits { + t.Errorf("shannonEntropy(%q) = %.2f, want [%.1f, %.1f]", tt.input, e, tt.minBits, tt.maxBits) + } + } +} + +// --- Redactor --- + +func TestRedact_SingleMatch(t *testing.T) { + content := `AKIAIOSFODNN7EXAMPLE is my key` + s := NewScanner(6.0) + matches := s.Scan(content) + + result := Redact(content, matches) + if strings.Contains(result, "AKIA") { + t.Error("should have redacted the key") + } + if !strings.Contains(result, "[REDACTED]") { + t.Error("should contain [REDACTED] placeholder") + } +} + +func TestRedact_MultipleMatches(t *testing.T) { + content := "aws: AKIAIOSFODNN7EXAMPLE github: ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij" + s := NewScanner(6.0) + matches := s.Scan(content) + + result := Redact(content, matches) + if strings.Contains(result, "AKIA") { + t.Error("should redact AWS key") + } + if strings.Contains(result, "ghp_") { + t.Error("should redact GitHub PAT") + } + count := strings.Count(result, "[REDACTED]") + if count < 2 { + t.Errorf("expected at least 2 redactions, got %d in: %q", count, result) + } +} + +func TestRedact_NoMatches(t *testing.T) { + content := "hello world" + result := Redact(content, nil) + if result != content { + t.Errorf("should return unchanged content, got %q", result) + } +} + +func TestRedact_SkipsWarnAction(t *testing.T) { + matches := []SecretMatch{ + {Pattern: "test", Action: ActionWarn, Start: 0, End: 5}, + } + result := Redact("hello world", matches) + if result != "hello world" { + t.Errorf("warn-only matches should not be redacted, got %q", result) + } +} + +// --- Unicode Sanitization --- + +func TestSanitizeUnicode_Normal(t *testing.T) { + normal := "Hello, world! 123" + result := SanitizeUnicode(normal) + if result != normal { + t.Errorf("normal text should be unchanged, got %q", result) + } +} + +func TestSanitizeUnicode_StripsTags(t *testing.T) { + // Unicode tag characters (U+E0000-U+E007F) used for ASCII smuggling + tagged := "Hello" + string([]rune{0xE0048, 0xE0065, 0xE006C, 0xE006C, 0xE006F}) + " world" + result := SanitizeUnicode(tagged) + if result != "Hello world" { + t.Errorf("should strip tag characters, got %q (len=%d)", result, len(result)) + } +} + +func TestSanitizeUnicode_StripsZeroWidth(t *testing.T) { + // Zero-width space (U+200B), zero-width joiner (U+200D) + zwsp := "Hello\u200B\u200Dworld" + result := SanitizeUnicode(zwsp) + if result != "Helloworld" { + t.Errorf("should strip zero-width characters, got %q", result) + } +} + +func TestSanitizeUnicode_StripsRTL(t *testing.T) { + // RTL override (U+202E) used for visual spoofing + rtl := "Hello\u202Eworld" + result := SanitizeUnicode(rtl) + if strings.ContainsRune(result, 0x202E) { + t.Error("should strip RTL override character") + } +} + +func TestSanitizeUnicode_PreservesNewlines(t *testing.T) { + multiline := "line1\nline2\ttab" + result := SanitizeUnicode(multiline) + if result != multiline { + t.Errorf("should preserve newlines and tabs, got %q", result) + } +} + +func TestSanitizeUnicode_PreservesEmoji(t *testing.T) { + emoji := "Hello 😊 world" + result := SanitizeUnicode(emoji) + if result != emoji { + t.Errorf("should preserve emoji, got %q", result) + } +} + +// --- Incognito --- + +func TestIncognito_DefaultOff(t *testing.T) { + m := NewIncognitoMode() + if m.Active() { + t.Error("should default to inactive") + } + if !m.ShouldPersist() { + t.Error("should allow persistence when not incognito") + } + if !m.ShouldLearn() { + t.Error("should allow learning when not incognito") + } +} + +func TestIncognito_Activate(t *testing.T) { + m := NewIncognitoMode() + m.Activate() + + if !m.Active() { + t.Error("should be active") + } + if m.ShouldPersist() { + t.Error("should not persist in incognito") + } + if m.ShouldLearn() { + t.Error("should not learn in incognito") + } + if m.ShouldLogContent() { + t.Error("should not log content in incognito") + } +} + +func TestIncognito_Toggle(t *testing.T) { + m := NewIncognitoMode() + + active := m.Toggle() + if !active { + t.Error("first toggle should activate") + } + + active = m.Toggle() + if active { + t.Error("second toggle should deactivate") + } +} + +// --- Firewall --- + +func TestFirewall_ScanOutgoing(t *testing.T) { + fw := NewFirewall(FirewallConfig{ + ScanOutgoing: true, + EntropyThreshold: 6.0, + }) + + msgs := []message.Message{ + message.NewUserText("my key is sk-ant-api03-abcdefghijklmnopqrstuvwxyz"), + } + + cleaned := fw.ScanOutgoingMessages(msgs) + text := cleaned[0].TextContent() + + if strings.Contains(text, "sk-ant-") { + t.Error("should redact Anthropic key from outgoing message") + } + if !strings.Contains(text, "[REDACTED]") { + t.Errorf("should contain [REDACTED], got %q", text) + } +} + +func TestFirewall_ScanToolResult(t *testing.T) { + fw := NewFirewall(FirewallConfig{ + ScanToolResults: true, + EntropyThreshold: 6.0, + }) + + result := fw.ScanToolResult("contents of .env:\nOPENAI_API_KEY=sk-proj-testkey1234567890abcdef12345") + if strings.Contains(result, "sk-proj-") { + t.Error("should redact key from tool result") + } +} + +func TestFirewall_DisabledScanning(t *testing.T) { + fw := NewFirewall(FirewallConfig{ + ScanOutgoing: false, + ScanToolResults: false, + }) + + original := "sk-ant-api03-abcdefghijklmnopqrstuvwxyz" + msgs := []message.Message{message.NewUserText(original)} + + cleaned := fw.ScanOutgoingMessages(msgs) + if cleaned[0].TextContent() != original { + t.Error("disabled scanning should pass through unchanged") + } + + result := fw.ScanToolResult(original) + if result != original { + t.Error("disabled scanning should pass through tool results unchanged") + } +} + +func TestFirewall_UnicodeCleanedBeforeSecretScan(t *testing.T) { + fw := NewFirewall(FirewallConfig{ + ScanOutgoing: true, + EntropyThreshold: 6.0, + }) + + // Unicode tags embedded in text + tagged := "normal text" + string([]rune{0xE0048, 0xE0065}) + " more text" + msgs := []message.Message{message.NewUserText(tagged)} + + cleaned := fw.ScanOutgoingMessages(msgs) + text := cleaned[0].TextContent() + if strings.ContainsRune(text, 0xE0048) { + t.Error("unicode tags should be stripped") + } +}