From c0c2e4bff572c1923f536e1abfbac564fad67abf Mon Sep 17 00:00:00 2001 From: vikingowl Date: Mon, 25 May 2026 01:19:51 +0200 Subject: [PATCH] fix(slm): enforce JSON output + strip thinking-block prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two structural fixes for the SLM classifier's 100% failure rate: (1) Pass ResponseFormat=json_object + Temperature=0 + TopP=1 + MaxTokens=128 in the classifier Request. The provider type already supports these but callSLM was leaving them unset, which meant ollama (and any other backend) ran with default sampling and free-form text output. format=json mode in particular makes ollama emit only valid JSON at decoding time — eliminates the majority of parse failures. (2) Harden extractJSON to strip common thinking-block tags before hunting for the brace. Seen in the wild: (Qwen3 distillations) and (tiny3.5). Defensive list also covers , . Unterminated thinking blocks fall back to brace-search so we still have a shot. Table-driven tests cover all variants plus the no-tag and fenced-json paths to confirm no regression. Even with format=json on a capable provider, the extractor is the safety net for backends that don't enforce format strictly — same defence-in-depth shape as the existing fence stripping. Doesn't fix the deeper architecture question (encoder + bandit preferred over decoder-SLM as classifier — see plan doc landing in the same PR); fixes the immediate bug. --- internal/slm/classifier.go | 55 ++++++++++++++++++++++++++++++++- internal/slm/classifier_test.go | 42 +++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/internal/slm/classifier.go b/internal/slm/classifier.go index 086a803..5afe423 100644 --- a/internal/slm/classifier.go +++ b/internal/slm/classifier.go @@ -102,9 +102,25 @@ func (c *Classifier) Classify(ctx context.Context, prompt string, history []mess } func (c *Classifier) callSLM(ctx context.Context, prompt string) (*classifyResponse, error) { + // Constrain the model toward valid, deterministic JSON output. Without + // these settings small models routinely ignore the JSON-only system + // prompt, emit reasoning blocks (, ) or just + // answer the user's prompt in prose. ResponseFormat=json_object asks + // the provider to enforce JSON at decoding time where supported + // (ollama 'format=json', llama.cpp grammar, OpenAI json_object). Even + // when the provider can't enforce, the explicit signal nudges the + // adapter to set the right backend flag. + temp := 0.0 + topP := 1.0 req := provider.Request{ Model: c.model, SystemPrompt: classifySystemPrompt, + Temperature: &temp, + TopP: &topP, + MaxTokens: 128, // classification output is ~50 tokens; cap to prevent runaway reasoning + ResponseFormat: &provider.ResponseFormat{ + Type: provider.ResponseJSON, + }, Messages: []message.Message{ { Role: message.RoleUser, @@ -138,10 +154,22 @@ func (c *Classifier) callSLM(ctx context.Context, prompt string) (*classifyRespo return &resp, nil } -// extractJSON pulls the first {...} substring from s, stripping markdown fences if present. +// extractJSON pulls the first {...} substring from s, stripping markdown +// fences and known thinking-block tags. Small models routinely violate +// the JSON-only system prompt by emitting reasoning tokens first, so +// the extractor must tolerate prefixes the model wasn't asked to emit. func extractJSON(s string) string { s = strings.TrimSpace(s) + // Strip known thinking-block tags. Order matters: longer/more- + // specific names first so a partial match doesn't shadow a real + // one. Seen in the wild on Qwen3 () and tiny3.5 + // (); the others are defensive against similar + // fine-tunes. + for _, tag := range []string{"Thought Process", "thinking", "reasoning", "thoughts", "think"} { + s = stripTagBlock(s, tag) + } + // Strip ```json ... ``` fences. if strings.HasPrefix(s, "```") { end := strings.LastIndex(s, "```") @@ -171,3 +199,28 @@ func extractJSON(s string) string { } return s[start:] } + +// stripTagBlock removes ... blocks (case-insensitive on the +// tag name) from the start of s. Returns the original string if the tag +// is not at the start. Idempotent; safe to call repeatedly. +func stripTagBlock(s, tag string) string { + trimmed := strings.TrimSpace(s) + open := "<" + tag + lower := strings.ToLower(trimmed) + if !strings.HasPrefix(lower, strings.ToLower(open)) { + return s + } + // Find the matching closing tag, case-insensitive. + close := "" + closeIdx := strings.Index(strings.ToLower(trimmed), strings.ToLower(close)) + if closeIdx < 0 { + // Unterminated thinking block — strip up to the first '{' + // so we still have a shot at extracting JSON that follows. + braceIdx := strings.IndexByte(trimmed, '{') + if braceIdx > 0 { + return strings.TrimSpace(trimmed[braceIdx:]) + } + return s + } + return strings.TrimSpace(trimmed[closeIdx+len(close):]) +} diff --git a/internal/slm/classifier_test.go b/internal/slm/classifier_test.go index 4311dde..977c7b5 100644 --- a/internal/slm/classifier_test.go +++ b/internal/slm/classifier_test.go @@ -215,3 +215,45 @@ func TestClassifier_ContextPassedToHistory(t *testing.T) { t.Errorf("Type = %s, want Explain", task.Type) } } + +func TestExtractJSON_StripsThinkingTags(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + { + name: "qwen-think-block", + in: `Let me decide{"task_type":"Debug","complexity":0.5,"requires_tools":true}`, + want: `{"task_type":"Debug","complexity":0.5,"requires_tools":true}`, + }, + { + name: "tiny3.5-thought-process", + in: "\nUser wants debugging help.\n\n{\"task_type\":\"Debug\",\"complexity\":0.4,\"requires_tools\":true}", + want: `{"task_type":"Debug","complexity":0.4,"requires_tools":true}`, + }, + { + name: "unterminated-think-falls-back-to-brace", + in: `incomplete reasoning {"task_type":"Explain","complexity":0.2,"requires_tools":false}`, + want: `{"task_type":"Explain","complexity":0.2,"requires_tools":false}`, + }, + { + name: "no-tags-still-works", + in: `{"task_type":"Generation","complexity":0.6,"requires_tools":false}`, + want: `{"task_type":"Generation","complexity":0.6,"requires_tools":false}`, + }, + { + name: "fenced-json-still-works", + in: "```json\n{\"task_type\":\"Refactor\",\"complexity\":0.5,\"requires_tools\":true}\n```", + want: `{"task_type":"Refactor","complexity":0.5,"requires_tools":true}`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := extractJSON(tc.in) + if got != tc.want { + t.Errorf("extractJSON(...)\n got: %q\n want: %q", got, tc.want) + } + }) + } +}