Files
vikingowl 2f8d4c412f feat(router): cloud-arm defaults, gpt-5.3-codex registration
Closes R-4 and R-5 of the routing-defaults plan.

R-4: Strengths + CostWeight defaults for closed frontier models.
Cloud entries land in the same knownFamilyDefaults table as local
ones, with MaxComplexity intentionally left zero (cloud arms get
no complexity ceiling). CostWeight tuned per the plan's rationale:

  claude-opus-4-7    → Planning/SecurityReview/Debug/Refactor, 0.3
  claude-sonnet-4-6  → Generation/Refactor/Review,             0.7
  gpt-5.5            → Planning/SecurityReview/Generation,     0.3
  gpt-5.3-codex      → Generation/Refactor/Debug/UnitTest,     0.6
  gpt-5.2            → Orchestration/Review,                   0.8
  gemini-3.1-pro     → Planning/Review/Orchestration,          0.5
  gemini-3.5-flash   → Boilerplate/Explain/Orchestration,      1.2

The 0.3 weight on frontier arms keeps them competitive on
SecurityReview / Planning despite $4+/Mtok; 1.2 on Gemini Flash
penalizes cost more so it only wins when cost is genuinely
decisive (boilerplate, explain).

Mechanism: extracted applyFamilyDefaults into defaults.go and call
it from Router.RegisterArm. Single source of truth — both local
discovery and the primary-provider path in cmd/gnoma/main.go now
flow through the same defaults application. Removed the duplicate
apply block from RegisterDiscoveredModels.

Legacy model IDs (claude-opus-4-20250514, gpt-4o, o3, gemini-2.5-pro,
etc.) intentionally do not match any table entry — keeps users on
pinned older models safe from imposed 2026 Strengths.

R-5: gpt-5.3-codex registration.

  - internal/provider/openai/provider.go: added to fallbackModels
    and inferOpenAIModelCapabilities (400K context, 32K output).
  - internal/provider/ratelimits.go: gpt-5.3-codex and its dated
    alias gpt-5.3-codex-2026-02-15 added with the same Tier 1
    quotas as gpt-5.2.

Gemini 3.x (3.1-pro-preview, 3.5-flash, 3.1-flash-lite) was already
registered in both google/provider.go and ratelimits.go — no change
needed for that part of R-5.

Test coverage:
- ResolveFamilyDefaults table-driven across all 7 cloud entries
  including prefix-sharing (gpt-5.5-pro → gpt-5.5 defaults,
  gemini-3.1-pro-preview → gemini-3.1-pro defaults).
- Legacy IDs return !ok.
- RegisterArm applies cloud defaults end-to-end.
- User-supplied Strengths and CostWeight are not overridden.
- ID.Model() fallback works when ModelName is empty (test code
  often constructs arms this way).

Refs: docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md
2026-05-23 21:39:48 +02:00

247 lines
6.9 KiB
Go

package openai
import (
"context"
"fmt"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/stream"
oai "github.com/openai/openai-go"
"github.com/openai/openai-go/option"
)
const defaultModel = "gpt-5.5"
// Provider implements provider.Provider for the OpenAI API.
type Provider struct {
client *oai.Client
name string
model string
streamOpts []option.RequestOption // injected per-request (e.g. think:false for Ollama)
}
// New creates an OpenAI provider from config.
func New(cfg provider.ProviderConfig) (provider.Provider, error) {
return NewWithStreamOptions(cfg, nil)
}
// NewWithStreamOptions creates an OpenAI provider with extra per-request stream options.
// Use this for Ollama/llama.cpp adapters that need non-standard body fields.
func NewWithStreamOptions(cfg provider.ProviderConfig, streamOpts []option.RequestOption) (provider.Provider, error) {
if cfg.APIKey == "" {
return nil, fmt.Errorf("openai: api key required")
}
opts := []option.RequestOption{
option.WithAPIKey(cfg.APIKey),
}
if cfg.BaseURL != "" {
opts = append(opts, option.WithBaseURL(cfg.BaseURL))
}
if cfg.MaxRetries != nil {
opts = append(opts, option.WithMaxRetries(*cfg.MaxRetries))
}
client := oai.NewClient(opts...)
model := cfg.Model
if model == "" {
model = defaultModel
}
return &Provider{
client: &client,
name: "openai",
model: model,
streamOpts: streamOpts,
}, nil
}
// Stream initiates a streaming chat completion request.
func (p *Provider) Stream(ctx context.Context, req provider.Request) (stream.Stream, error) {
model := req.Model
if model == "" {
model = p.model
}
params := translateRequest(req)
params.Model = model
raw := p.client.Chat.Completions.NewStreaming(ctx, params, p.streamOpts...)
return newOpenAIStream(raw), nil
}
// Name returns "openai".
func (p *Provider) Name() string { return p.name }
// DefaultModel returns the configured default model.
func (p *Provider) DefaultModel() string { return p.model }
// Models returns available OpenAI models with capabilities by querying the API.
func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
pager := p.client.Models.ListAutoPaging(ctx)
var models []provider.ModelInfo
for pager.Next() {
m := pager.Current()
caps := inferOpenAIModelCapabilities(m.ID)
models = append(models, provider.ModelInfo{
ID: m.ID,
Name: m.ID,
Provider: p.name,
Capabilities: caps,
})
}
if err := pager.Err(); err != nil {
// Fallback to hardcoded list if API call fails
return p.fallbackModels(), nil
}
if len(models) == 0 {
// API returned no models, use fallback
return p.fallbackModels(), nil
}
return models, nil
}
// fallbackModels returns a hardcoded list of known OpenAI models.
func (p *Provider) fallbackModels() []provider.ModelInfo {
return []provider.ModelInfo{
{
ID: "gpt-5.5", Name: "GPT-5.5", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 1_000_000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.5-pro", Name: "GPT-5.5 Pro", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 1_000_000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.3-codex", Name: "GPT-5.3 Codex", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 400000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.2", Name: "GPT-5.2 Thinking", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 400000,
MaxOutput: 32000,
},
},
{
ID: "gpt-5.2-chat-latest", Name: "GPT-5.2 Instant", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 400000, MaxOutput: 32000,
},
},
// Legacy IDs retained for users pinned to older models.
{
ID: "gpt-4o", Name: "GPT-4o (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 128000, MaxOutput: 16384,
},
},
{
ID: "gpt-4o-mini", Name: "GPT-4o Mini (legacy)", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true, JSONOutput: true, Vision: true,
ContextWindow: 128000, MaxOutput: 16384,
},
},
{
ID: "o3", Name: "o3", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 200000,
MaxOutput: 100000,
},
},
{
ID: "o3-mini", Name: "o3 Mini", Provider: p.name,
Capabilities: provider.Capabilities{
ToolUse: true,
JSONOutput: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 200000,
MaxOutput: 100000,
},
},
}
}
// inferOpenAIModelCapabilities infers capabilities from model ID.
func inferOpenAIModelCapabilities(modelID string) provider.Capabilities {
// Default capabilities for most modern OpenAI models (GPT-5.x baseline).
caps := provider.Capabilities{
ToolUse: true,
JSONOutput: true,
Vision: true,
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
ContextWindow: 400000,
MaxOutput: 32000,
}
// Model-specific overrides
switch modelID {
case "gpt-5.5", "gpt-5.5-pro":
caps.ContextWindow = 1_000_000
caps.MaxOutput = 32000
case "gpt-5.3-codex":
caps.ContextWindow = 400000
caps.MaxOutput = 32000
case "gpt-5.2", "gpt-5.2-chat-latest":
caps.ContextWindow = 400000
caps.MaxOutput = 32000
case "gpt-4o", "gpt-4o-mini":
caps.ThinkingModes = nil
caps.ContextWindow = 128000
caps.MaxOutput = 16384
case "o3", "o3-mini":
caps.ContextWindow = 200000
caps.MaxOutput = 100000
case "gpt-4", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613":
caps.ThinkingModes = nil
caps.Vision = false
caps.ContextWindow = 8192
caps.MaxOutput = 8192
case "gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613":
caps.ThinkingModes = nil
caps.Vision = false
caps.ToolUse = false
caps.ContextWindow = 16384
caps.MaxOutput = 4096
}
return caps
}