fix(slm): enforce JSON output + strip thinking-block prefixes
Two structural fixes for the SLM classifier's 100% failure rate: (1) Pass ResponseFormat=json_object + Temperature=0 + TopP=1 + MaxTokens=128 in the classifier Request. The provider type already supports these but callSLM was leaving them unset, which meant ollama (and any other backend) ran with default sampling and free-form text output. format=json mode in particular makes ollama emit only valid JSON at decoding time — eliminates the majority of parse failures. (2) Harden extractJSON to strip common thinking-block tags before hunting for the brace. Seen in the wild: <think>…</think> (Qwen3 distillations) and <Thought Process>…</Thought Process> (tiny3.5). Defensive list also covers <reasoning>, <thoughts>. Unterminated thinking blocks fall back to brace-search so we still have a shot. Table-driven tests cover all variants plus the no-tag and fenced-json paths to confirm no regression. Even with format=json on a capable provider, the extractor is the safety net for backends that don't enforce format strictly — same defence-in-depth shape as the existing fence stripping. Doesn't fix the deeper architecture question (encoder + bandit preferred over decoder-SLM as classifier — see plan doc landing in the same PR); fixes the immediate bug.
This commit is contained in:
@@ -102,9 +102,25 @@ func (c *Classifier) Classify(ctx context.Context, prompt string, history []mess
|
||||
}
|
||||
|
||||
func (c *Classifier) callSLM(ctx context.Context, prompt string) (*classifyResponse, error) {
|
||||
// Constrain the model toward valid, deterministic JSON output. Without
|
||||
// these settings small models routinely ignore the JSON-only system
|
||||
// prompt, emit reasoning blocks (<think>, <Thought Process>) or just
|
||||
// answer the user's prompt in prose. ResponseFormat=json_object asks
|
||||
// the provider to enforce JSON at decoding time where supported
|
||||
// (ollama 'format=json', llama.cpp grammar, OpenAI json_object). Even
|
||||
// when the provider can't enforce, the explicit signal nudges the
|
||||
// adapter to set the right backend flag.
|
||||
temp := 0.0
|
||||
topP := 1.0
|
||||
req := provider.Request{
|
||||
Model: c.model,
|
||||
SystemPrompt: classifySystemPrompt,
|
||||
Temperature: &temp,
|
||||
TopP: &topP,
|
||||
MaxTokens: 128, // classification output is ~50 tokens; cap to prevent runaway reasoning
|
||||
ResponseFormat: &provider.ResponseFormat{
|
||||
Type: provider.ResponseJSON,
|
||||
},
|
||||
Messages: []message.Message{
|
||||
{
|
||||
Role: message.RoleUser,
|
||||
@@ -138,10 +154,22 @@ func (c *Classifier) callSLM(ctx context.Context, prompt string) (*classifyRespo
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
// extractJSON pulls the first {...} substring from s, stripping markdown fences if present.
|
||||
// extractJSON pulls the first {...} substring from s, stripping markdown
|
||||
// fences and known thinking-block tags. Small models routinely violate
|
||||
// the JSON-only system prompt by emitting reasoning tokens first, so
|
||||
// the extractor must tolerate prefixes the model wasn't asked to emit.
|
||||
func extractJSON(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
|
||||
// Strip known thinking-block tags. Order matters: longer/more-
|
||||
// specific names first so a partial match doesn't shadow a real
|
||||
// one. Seen in the wild on Qwen3 (<think>) and tiny3.5
|
||||
// (<Thought Process>); the others are defensive against similar
|
||||
// fine-tunes.
|
||||
for _, tag := range []string{"Thought Process", "thinking", "reasoning", "thoughts", "think"} {
|
||||
s = stripTagBlock(s, tag)
|
||||
}
|
||||
|
||||
// Strip ```json ... ``` fences.
|
||||
if strings.HasPrefix(s, "```") {
|
||||
end := strings.LastIndex(s, "```")
|
||||
@@ -171,3 +199,28 @@ func extractJSON(s string) string {
|
||||
}
|
||||
return s[start:]
|
||||
}
|
||||
|
||||
// stripTagBlock removes <tag>...</tag> blocks (case-insensitive on the
|
||||
// tag name) from the start of s. Returns the original string if the tag
|
||||
// is not at the start. Idempotent; safe to call repeatedly.
|
||||
func stripTagBlock(s, tag string) string {
|
||||
trimmed := strings.TrimSpace(s)
|
||||
open := "<" + tag
|
||||
lower := strings.ToLower(trimmed)
|
||||
if !strings.HasPrefix(lower, strings.ToLower(open)) {
|
||||
return s
|
||||
}
|
||||
// Find the matching closing tag, case-insensitive.
|
||||
close := "</" + tag + ">"
|
||||
closeIdx := strings.Index(strings.ToLower(trimmed), strings.ToLower(close))
|
||||
if closeIdx < 0 {
|
||||
// Unterminated thinking block — strip up to the first '{'
|
||||
// so we still have a shot at extracting JSON that follows.
|
||||
braceIdx := strings.IndexByte(trimmed, '{')
|
||||
if braceIdx > 0 {
|
||||
return strings.TrimSpace(trimmed[braceIdx:])
|
||||
}
|
||||
return s
|
||||
}
|
||||
return strings.TrimSpace(trimmed[closeIdx+len(close):])
|
||||
}
|
||||
|
||||
@@ -215,3 +215,45 @@ func TestClassifier_ContextPassedToHistory(t *testing.T) {
|
||||
t.Errorf("Type = %s, want Explain", task.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractJSON_StripsThinkingTags(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "qwen-think-block",
|
||||
in: `<think>Let me decide</think>{"task_type":"Debug","complexity":0.5,"requires_tools":true}`,
|
||||
want: `{"task_type":"Debug","complexity":0.5,"requires_tools":true}`,
|
||||
},
|
||||
{
|
||||
name: "tiny3.5-thought-process",
|
||||
in: "<Thought Process>\nUser wants debugging help.\n</Thought Process>\n{\"task_type\":\"Debug\",\"complexity\":0.4,\"requires_tools\":true}",
|
||||
want: `{"task_type":"Debug","complexity":0.4,"requires_tools":true}`,
|
||||
},
|
||||
{
|
||||
name: "unterminated-think-falls-back-to-brace",
|
||||
in: `<think>incomplete reasoning {"task_type":"Explain","complexity":0.2,"requires_tools":false}`,
|
||||
want: `{"task_type":"Explain","complexity":0.2,"requires_tools":false}`,
|
||||
},
|
||||
{
|
||||
name: "no-tags-still-works",
|
||||
in: `{"task_type":"Generation","complexity":0.6,"requires_tools":false}`,
|
||||
want: `{"task_type":"Generation","complexity":0.6,"requires_tools":false}`,
|
||||
},
|
||||
{
|
||||
name: "fenced-json-still-works",
|
||||
in: "```json\n{\"task_type\":\"Refactor\",\"complexity\":0.5,\"requires_tools\":true}\n```",
|
||||
want: `{"task_type":"Refactor","complexity":0.5,"requires_tools":true}`,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := extractJSON(tc.in)
|
||||
if got != tc.want {
|
||||
t.Errorf("extractJSON(...)\n got: %q\n want: %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user