1606d19366
codex 0.133.0 emits two token-accounting fields at top level that
we previously dropped:
cached_input_tokens — subset of input_tokens that hit the prompt
cache (cheaper, but still counted in
input_tokens per OpenAI Responses API
semantics)
reasoning_output_tokens — separately reported billable thinking
tokens on reasoning-capable models
Map cached_input_tokens to message.Usage.CacheReadTokens and subtract
it from InputTokens. message.Usage.Add() sums InputTokens and
CacheReadTokens as peers, so the uncached residual goes in
InputTokens — matches the anthropic provider's convention and keeps
cumulative usage tracking arithmetically correct.
Fold reasoning_output_tokens into OutputTokens for accurate cost
tracking. The top-level peer positioning (vs nested in
output_tokens_details) implies a separately counted billable
quantity, not a subset of output_tokens.
Defensive clamp at zero in case a future codex build reports
cached > input due to schema drift. Includes a verbatim regression
guard against the live 2026-05-22 codex 0.133.0 output to catch
schema changes early.
331 lines
10 KiB
Go
331 lines
10 KiB
Go
package subprocess
|
|
|
|
import (
|
|
"slices"
|
|
"testing"
|
|
|
|
"somegit.dev/Owlibou/gnoma/internal/message"
|
|
"somegit.dev/Owlibou/gnoma/internal/stream"
|
|
)
|
|
|
|
func TestCodexPromptArgs_BypassDefaultsOn(t *testing.T) {
|
|
t.Setenv("GNOMA_CODEX_BYPASS_SANDBOX", "")
|
|
args := codexPromptArgs("hi")
|
|
if !slices.Contains(args, "--dangerously-bypass-approvals-and-sandbox") {
|
|
t.Errorf("default args should include sandbox bypass; got %v", args)
|
|
}
|
|
}
|
|
|
|
func TestCodexPromptArgs_BypassOptOut(t *testing.T) {
|
|
for _, val := range []string{"0", "false", "no", "off", "FALSE"} {
|
|
t.Run(val, func(t *testing.T) {
|
|
t.Setenv("GNOMA_CODEX_BYPASS_SANDBOX", val)
|
|
args := codexPromptArgs("hi")
|
|
if slices.Contains(args, "--dangerously-bypass-approvals-and-sandbox") {
|
|
t.Errorf("env=%q should drop bypass flag; got %v", val, args)
|
|
}
|
|
if !slices.Contains(args, "exec") || !slices.Contains(args, "--json") {
|
|
t.Errorf("required base args missing; got %v", args)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestCodexPromptArgs_UnknownValueDefaultsOn(t *testing.T) {
|
|
t.Setenv("GNOMA_CODEX_BYPASS_SANDBOX", "maybe")
|
|
args := codexPromptArgs("hi")
|
|
if !slices.Contains(args, "--dangerously-bypass-approvals-and-sandbox") {
|
|
t.Errorf("non-falsy value should keep bypass on; got %v", args)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_ExtractsTextDelta(t *testing.T) {
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"item.completed","item":{"type":"agent_message","text":"hello world"}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(evts) == 0 {
|
|
t.Fatal("expected at least one event")
|
|
}
|
|
if evts[0].Type != stream.EventTextDelta {
|
|
t.Errorf("got type %v, want EventTextDelta", evts[0].Type)
|
|
}
|
|
if evts[0].Text != "hello world" {
|
|
t.Errorf("got text %q, want %q", evts[0].Text, "hello world")
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_ExtractsUsageFromTurnCompleted(t *testing.T) {
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":123,"output_tokens":45}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
var usageEvt *stream.Event
|
|
for i := range evts {
|
|
if evts[i].Type == stream.EventUsage {
|
|
usageEvt = &evts[i]
|
|
}
|
|
}
|
|
if usageEvt == nil {
|
|
t.Fatal("no EventUsage emitted")
|
|
}
|
|
if usageEvt.Usage.InputTokens != 123 {
|
|
t.Errorf("input_tokens: got %d, want 123", usageEvt.Usage.InputTokens)
|
|
}
|
|
if usageEvt.Usage.OutputTokens != 45 {
|
|
t.Errorf("output_tokens: got %d, want 45", usageEvt.Usage.OutputTokens)
|
|
}
|
|
if usageEvt.StopReason != message.StopEndTurn {
|
|
t.Errorf("stop_reason: got %v, want StopEndTurn", usageEvt.StopReason)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_ExtractsUsageFromPromptCompletionTokens(t *testing.T) {
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"prompt_tokens":123,"completion_tokens":45}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
var usageEvt *stream.Event
|
|
for i := range evts {
|
|
if evts[i].Type == stream.EventUsage {
|
|
usageEvt = &evts[i]
|
|
}
|
|
}
|
|
if usageEvt == nil {
|
|
t.Fatal("no EventUsage emitted")
|
|
}
|
|
if usageEvt.Usage.InputTokens != 123 {
|
|
t.Errorf("input_tokens: got %d, want 123", usageEvt.Usage.InputTokens)
|
|
}
|
|
if usageEvt.Usage.OutputTokens != 45 {
|
|
t.Errorf("output_tokens: got %d, want 45", usageEvt.Usage.OutputTokens)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_IgnoresOtherItemsAndTypes(t *testing.T) {
|
|
p := newCodexParser()
|
|
lines := [][]byte{
|
|
[]byte(`{"type":"item.completed","item":{"type":"tool_call","text":"something"}}`),
|
|
[]byte(`{"type":"other_type"}`),
|
|
}
|
|
|
|
for _, line := range lines {
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Errorf("unexpected error: %v", err)
|
|
}
|
|
if len(evts) != 0 {
|
|
t.Errorf("expected 0 events, got %d", len(evts))
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_SkipsNonJSONBanners(t *testing.T) {
|
|
p := newCodexParser()
|
|
// Real codex output interleaves banner lines, blank lines, and
|
|
// human-readable warnings with the JSON event stream. None of
|
|
// these may abort the turn — only the JSON events matter.
|
|
lines := [][]byte{
|
|
[]byte(""),
|
|
[]byte(" "),
|
|
[]byte("codex v1.2.3 starting"),
|
|
[]byte(`WARNING: sandbox bypass enabled`),
|
|
[]byte(`{"type":"item.completed","item":{"type":"agent_message","text":"ok"}}`),
|
|
[]byte("trailing diagnostics: 42ms"),
|
|
}
|
|
var sawText bool
|
|
for _, line := range lines {
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Errorf("non-JSON line %q caused error: %v", string(line), err)
|
|
continue
|
|
}
|
|
for _, e := range evts {
|
|
if e.Type == stream.EventTextDelta {
|
|
sawText = true
|
|
}
|
|
}
|
|
}
|
|
if !sawText {
|
|
t.Error("legitimate JSON line was swallowed by banner-skip logic")
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_MalformedJSONSkippedNotFatal(t *testing.T) {
|
|
p := newCodexParser()
|
|
// Starts with `{` so the banner-skip heuristic doesn't filter it,
|
|
// but is not valid JSON — must skip silently, not return an error.
|
|
bad := []byte(`{"type":"item.completed",`)
|
|
evts, err := p.ParseLine(bad)
|
|
if err != nil {
|
|
t.Errorf("malformed JSON should be skipped, got error: %v", err)
|
|
}
|
|
if len(evts) != 0 {
|
|
t.Errorf("expected 0 events from malformed JSON, got %d", len(evts))
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_UsageMaxOfPaths(t *testing.T) {
|
|
// Both input_tokens and prompt_tokens present with different values
|
|
// — accounting must not silently undercount by always preferring
|
|
// one field.
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":100,"prompt_tokens":120,"output_tokens":30,"completion_tokens":35}}`)
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(evts) != 1 || evts[0].Type != stream.EventUsage {
|
|
t.Fatalf("expected single EventUsage, got %+v", evts)
|
|
}
|
|
if evts[0].Usage.InputTokens != 120 {
|
|
t.Errorf("input tokens = %d, want max(100, 120) = 120", evts[0].Usage.InputTokens)
|
|
}
|
|
if evts[0].Usage.OutputTokens != 35 {
|
|
t.Errorf("output tokens = %d, want max(30, 35) = 35", evts[0].Usage.OutputTokens)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_CachedInputTokens(t *testing.T) {
|
|
// codex 0.133.0 reports input_tokens as the TOTAL input (cache hits
|
|
// + new). To keep message.Usage.Add() correct — which sums
|
|
// InputTokens and CacheReadTokens as peers, not subsets — store
|
|
// the uncached residual in InputTokens and the hits separately.
|
|
// This matches the Anthropic provider's convention.
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":17712,"cached_input_tokens":4992,"output_tokens":5}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(evts) != 1 || evts[0].Type != stream.EventUsage {
|
|
t.Fatalf("expected single EventUsage, got %+v", evts)
|
|
}
|
|
got := evts[0].Usage
|
|
if got.InputTokens != 12720 {
|
|
t.Errorf("InputTokens = %d, want 17712-4992 = 12720 (uncached residual)", got.InputTokens)
|
|
}
|
|
if got.CacheReadTokens != 4992 {
|
|
t.Errorf("CacheReadTokens = %d, want 4992", got.CacheReadTokens)
|
|
}
|
|
if got.OutputTokens != 5 {
|
|
t.Errorf("OutputTokens = %d, want 5", got.OutputTokens)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_ReasoningOutputTokens(t *testing.T) {
|
|
// reasoning_output_tokens appears at top level as a peer to
|
|
// output_tokens (codex 0.133.0). The peer positioning implies a
|
|
// separate billable counter, not a subset of output_tokens — so
|
|
// fold it into OutputTokens for accurate cost tracking.
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":100,"output_tokens":50,"reasoning_output_tokens":200}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(evts) != 1 || evts[0].Type != stream.EventUsage {
|
|
t.Fatalf("expected single EventUsage, got %+v", evts)
|
|
}
|
|
if got := evts[0].Usage.OutputTokens; got != 250 {
|
|
t.Errorf("OutputTokens = %d, want 50 + 200 = 250", got)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_ZeroReasoningIsNoOp(t *testing.T) {
|
|
// Live codex 0.133.0 sample: 0 reasoning tokens (non-thinking
|
|
// model). Folding still produces the original output count.
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":100,"output_tokens":5,"reasoning_output_tokens":0}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if got := evts[0].Usage.OutputTokens; got != 5 {
|
|
t.Errorf("OutputTokens = %d, want 5", got)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_CachedExceedsInputDoesNotUnderflow(t *testing.T) {
|
|
// Defensive: if a future codex build reports cached > input
|
|
// (schema drift, off-by-one), don't produce negative InputTokens.
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":100,"cached_input_tokens":150}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if got := evts[0].Usage.InputTokens; got < 0 {
|
|
t.Errorf("InputTokens = %d, must not be negative", got)
|
|
}
|
|
if got := evts[0].Usage.CacheReadTokens; got != 150 {
|
|
t.Errorf("CacheReadTokens = %d, want 150 (recorded verbatim)", got)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_LiveSampleFromV0133(t *testing.T) {
|
|
// Verbatim line from the 2026-05-22 live `codex exec ... --json`
|
|
// run on codex-cli 0.133.0 — regression guard against schema drift.
|
|
p := newCodexParser()
|
|
line := []byte(`{"type":"turn.completed","usage":{"input_tokens":17712,"cached_input_tokens":4992,"output_tokens":5,"reasoning_output_tokens":0}}`)
|
|
|
|
evts, err := p.ParseLine(line)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(evts) != 1 || evts[0].Type != stream.EventUsage {
|
|
t.Fatalf("expected single EventUsage, got %+v", evts)
|
|
}
|
|
got := evts[0].Usage
|
|
if got.InputTokens != 12720 {
|
|
t.Errorf("InputTokens = %d, want 12720", got.InputTokens)
|
|
}
|
|
if got.OutputTokens != 5 {
|
|
t.Errorf("OutputTokens = %d, want 5", got.OutputTokens)
|
|
}
|
|
if got.CacheReadTokens != 4992 {
|
|
t.Errorf("CacheReadTokens = %d, want 4992", got.CacheReadTokens)
|
|
}
|
|
}
|
|
|
|
func TestCodexParser_FixtureFile(t *testing.T) {
|
|
lines := loadFixture(t, "codex")
|
|
p := newCodexParser()
|
|
evts := collectEvents(t, p, lines)
|
|
|
|
var textEvts, usageEvts int
|
|
for _, e := range evts {
|
|
switch e.Type {
|
|
case stream.EventTextDelta:
|
|
textEvts++
|
|
if e.Text != "hello" {
|
|
t.Errorf("expected text 'hello', got %q", e.Text)
|
|
}
|
|
case stream.EventUsage:
|
|
usageEvts++
|
|
if e.Usage.InputTokens != 10 || e.Usage.OutputTokens != 5 {
|
|
t.Errorf("expected 10/5 tokens, got %d/%d", e.Usage.InputTokens, e.Usage.OutputTokens)
|
|
}
|
|
}
|
|
}
|
|
if textEvts != 1 {
|
|
t.Errorf("expected 1 EventTextDelta, got %d", textEvts)
|
|
}
|
|
if usageEvts != 1 {
|
|
t.Errorf("expected 1 EventUsage, got %d", usageEvts)
|
|
}
|
|
}
|