2f8d4c412f
Closes R-4 and R-5 of the routing-defaults plan.
R-4: Strengths + CostWeight defaults for closed frontier models.
Cloud entries land in the same knownFamilyDefaults table as local
ones, with MaxComplexity intentionally left zero (cloud arms get
no complexity ceiling). CostWeight tuned per the plan's rationale:
claude-opus-4-7 → Planning/SecurityReview/Debug/Refactor, 0.3
claude-sonnet-4-6 → Generation/Refactor/Review, 0.7
gpt-5.5 → Planning/SecurityReview/Generation, 0.3
gpt-5.3-codex → Generation/Refactor/Debug/UnitTest, 0.6
gpt-5.2 → Orchestration/Review, 0.8
gemini-3.1-pro → Planning/Review/Orchestration, 0.5
gemini-3.5-flash → Boilerplate/Explain/Orchestration, 1.2
The 0.3 weight on frontier arms keeps them competitive on
SecurityReview / Planning despite $4+/Mtok; 1.2 on Gemini Flash
penalizes cost more so it only wins when cost is genuinely
decisive (boilerplate, explain).
Mechanism: extracted applyFamilyDefaults into defaults.go and call
it from Router.RegisterArm. Single source of truth — both local
discovery and the primary-provider path in cmd/gnoma/main.go now
flow through the same defaults application. Removed the duplicate
apply block from RegisterDiscoveredModels.
Legacy model IDs (claude-opus-4-20250514, gpt-4o, o3, gemini-2.5-pro,
etc.) intentionally do not match any table entry — keeps users on
pinned older models safe from imposed 2026 Strengths.
R-5: gpt-5.3-codex registration.
- internal/provider/openai/provider.go: added to fallbackModels
and inferOpenAIModelCapabilities (400K context, 32K output).
- internal/provider/ratelimits.go: gpt-5.3-codex and its dated
alias gpt-5.3-codex-2026-02-15 added with the same Tier 1
quotas as gpt-5.2.
Gemini 3.x (3.1-pro-preview, 3.5-flash, 3.1-flash-lite) was already
registered in both google/provider.go and ratelimits.go — no change
needed for that part of R-5.
Test coverage:
- ResolveFamilyDefaults table-driven across all 7 cloud entries
including prefix-sharing (gpt-5.5-pro → gpt-5.5 defaults,
gemini-3.1-pro-preview → gemini-3.1-pro defaults).
- Legacy IDs return !ok.
- RegisterArm applies cloud defaults end-to-end.
- User-supplied Strengths and CostWeight are not overridden.
- ID.Model() fallback works when ModelName is empty (test code
often constructs arms this way).
Refs: docs/superpowers/plans/2026-05-23-routing-defaults-refresh.md
247 lines
6.9 KiB
Go
247 lines
6.9 KiB
Go
package openai
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"somegit.dev/Owlibou/gnoma/internal/provider"
|
|
"somegit.dev/Owlibou/gnoma/internal/stream"
|
|
|
|
oai "github.com/openai/openai-go"
|
|
"github.com/openai/openai-go/option"
|
|
)
|
|
|
|
const defaultModel = "gpt-5.5"
|
|
|
|
// Provider implements provider.Provider for the OpenAI API.
|
|
type Provider struct {
|
|
client *oai.Client
|
|
name string
|
|
model string
|
|
streamOpts []option.RequestOption // injected per-request (e.g. think:false for Ollama)
|
|
}
|
|
|
|
// New creates an OpenAI provider from config.
|
|
func New(cfg provider.ProviderConfig) (provider.Provider, error) {
|
|
return NewWithStreamOptions(cfg, nil)
|
|
}
|
|
|
|
// NewWithStreamOptions creates an OpenAI provider with extra per-request stream options.
|
|
// Use this for Ollama/llama.cpp adapters that need non-standard body fields.
|
|
func NewWithStreamOptions(cfg provider.ProviderConfig, streamOpts []option.RequestOption) (provider.Provider, error) {
|
|
if cfg.APIKey == "" {
|
|
return nil, fmt.Errorf("openai: api key required")
|
|
}
|
|
|
|
opts := []option.RequestOption{
|
|
option.WithAPIKey(cfg.APIKey),
|
|
}
|
|
if cfg.BaseURL != "" {
|
|
opts = append(opts, option.WithBaseURL(cfg.BaseURL))
|
|
}
|
|
if cfg.MaxRetries != nil {
|
|
opts = append(opts, option.WithMaxRetries(*cfg.MaxRetries))
|
|
}
|
|
|
|
client := oai.NewClient(opts...)
|
|
|
|
model := cfg.Model
|
|
if model == "" {
|
|
model = defaultModel
|
|
}
|
|
|
|
return &Provider{
|
|
client: &client,
|
|
name: "openai",
|
|
model: model,
|
|
streamOpts: streamOpts,
|
|
}, nil
|
|
}
|
|
|
|
// Stream initiates a streaming chat completion request.
|
|
func (p *Provider) Stream(ctx context.Context, req provider.Request) (stream.Stream, error) {
|
|
model := req.Model
|
|
if model == "" {
|
|
model = p.model
|
|
}
|
|
|
|
params := translateRequest(req)
|
|
params.Model = model
|
|
|
|
raw := p.client.Chat.Completions.NewStreaming(ctx, params, p.streamOpts...)
|
|
|
|
return newOpenAIStream(raw), nil
|
|
}
|
|
|
|
// Name returns "openai".
|
|
func (p *Provider) Name() string { return p.name }
|
|
|
|
// DefaultModel returns the configured default model.
|
|
func (p *Provider) DefaultModel() string { return p.model }
|
|
|
|
// Models returns available OpenAI models with capabilities by querying the API.
|
|
func (p *Provider) Models(ctx context.Context) ([]provider.ModelInfo, error) {
|
|
pager := p.client.Models.ListAutoPaging(ctx)
|
|
|
|
var models []provider.ModelInfo
|
|
for pager.Next() {
|
|
m := pager.Current()
|
|
caps := inferOpenAIModelCapabilities(m.ID)
|
|
models = append(models, provider.ModelInfo{
|
|
ID: m.ID,
|
|
Name: m.ID,
|
|
Provider: p.name,
|
|
Capabilities: caps,
|
|
})
|
|
}
|
|
if err := pager.Err(); err != nil {
|
|
// Fallback to hardcoded list if API call fails
|
|
return p.fallbackModels(), nil
|
|
}
|
|
|
|
if len(models) == 0 {
|
|
// API returned no models, use fallback
|
|
return p.fallbackModels(), nil
|
|
}
|
|
|
|
return models, nil
|
|
}
|
|
|
|
// fallbackModels returns a hardcoded list of known OpenAI models.
|
|
func (p *Provider) fallbackModels() []provider.ModelInfo {
|
|
return []provider.ModelInfo{
|
|
{
|
|
ID: "gpt-5.5", Name: "GPT-5.5", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
Vision: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 1_000_000,
|
|
MaxOutput: 32000,
|
|
},
|
|
},
|
|
{
|
|
ID: "gpt-5.5-pro", Name: "GPT-5.5 Pro", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
Vision: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 1_000_000,
|
|
MaxOutput: 32000,
|
|
},
|
|
},
|
|
{
|
|
ID: "gpt-5.3-codex", Name: "GPT-5.3 Codex", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
Vision: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 400000,
|
|
MaxOutput: 32000,
|
|
},
|
|
},
|
|
{
|
|
ID: "gpt-5.2", Name: "GPT-5.2 Thinking", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
Vision: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 400000,
|
|
MaxOutput: 32000,
|
|
},
|
|
},
|
|
{
|
|
ID: "gpt-5.2-chat-latest", Name: "GPT-5.2 Instant", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true, JSONOutput: true, Vision: true,
|
|
ContextWindow: 400000, MaxOutput: 32000,
|
|
},
|
|
},
|
|
// Legacy IDs retained for users pinned to older models.
|
|
{
|
|
ID: "gpt-4o", Name: "GPT-4o (legacy)", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true, JSONOutput: true, Vision: true,
|
|
ContextWindow: 128000, MaxOutput: 16384,
|
|
},
|
|
},
|
|
{
|
|
ID: "gpt-4o-mini", Name: "GPT-4o Mini (legacy)", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true, JSONOutput: true, Vision: true,
|
|
ContextWindow: 128000, MaxOutput: 16384,
|
|
},
|
|
},
|
|
{
|
|
ID: "o3", Name: "o3", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 200000,
|
|
MaxOutput: 100000,
|
|
},
|
|
},
|
|
{
|
|
ID: "o3-mini", Name: "o3 Mini", Provider: p.name,
|
|
Capabilities: provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 200000,
|
|
MaxOutput: 100000,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// inferOpenAIModelCapabilities infers capabilities from model ID.
|
|
func inferOpenAIModelCapabilities(modelID string) provider.Capabilities {
|
|
// Default capabilities for most modern OpenAI models (GPT-5.x baseline).
|
|
caps := provider.Capabilities{
|
|
ToolUse: true,
|
|
JSONOutput: true,
|
|
Vision: true,
|
|
ThinkingModes: []provider.EffortLevel{provider.EffortLow, provider.EffortMedium, provider.EffortHigh},
|
|
ContextWindow: 400000,
|
|
MaxOutput: 32000,
|
|
}
|
|
|
|
// Model-specific overrides
|
|
switch modelID {
|
|
case "gpt-5.5", "gpt-5.5-pro":
|
|
caps.ContextWindow = 1_000_000
|
|
caps.MaxOutput = 32000
|
|
case "gpt-5.3-codex":
|
|
caps.ContextWindow = 400000
|
|
caps.MaxOutput = 32000
|
|
case "gpt-5.2", "gpt-5.2-chat-latest":
|
|
caps.ContextWindow = 400000
|
|
caps.MaxOutput = 32000
|
|
case "gpt-4o", "gpt-4o-mini":
|
|
caps.ThinkingModes = nil
|
|
caps.ContextWindow = 128000
|
|
caps.MaxOutput = 16384
|
|
case "o3", "o3-mini":
|
|
caps.ContextWindow = 200000
|
|
caps.MaxOutput = 100000
|
|
case "gpt-4", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613":
|
|
caps.ThinkingModes = nil
|
|
caps.Vision = false
|
|
caps.ContextWindow = 8192
|
|
caps.MaxOutput = 8192
|
|
case "gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613":
|
|
caps.ThinkingModes = nil
|
|
caps.Vision = false
|
|
caps.ToolUse = false
|
|
caps.ContextWindow = 16384
|
|
caps.MaxOutput = 4096
|
|
}
|
|
|
|
return caps
|
|
}
|