obm/internal/inference/inference.go
MermaidMan 33d9a2cb2e deploy walkthrough, API validation, inference client, Hetzner provider
- Interactive deploy command with 8-step walkthrough:
  framework → provider → token → SSH → server → inference → tailscale → discord
- .env file generation from walkthrough config
- DeploymentConfig struct with framework-aware defaults
- Inference API client with validation for Venice, OpenRouter, OpenAI, Anthropic
- Hetzner Cloud provider: token validation, SSH key listing
- DotEnv parser/writer with schema validation
- Destroy command with confirmation prompt
- Validation subcommand for checking existing .env files
- All tests passing, go vet clean
2026-05-22 15:29:27 +00:00

249 lines
7.1 KiB
Go

// Package inference defines inference provider types and selection logic.
package inference
import (
"fmt"
"sort"
"strings"
)
// Provider represents an LLM inference provider.
type Provider string
const (
// ProviderZAI is Z.ai's coding API (highest priority for GLM models).
ProviderZAI Provider = "zai"
// ProviderVenice is Venice.ai's API.
ProviderVenice Provider = "venice"
// ProviderOpenRouter is OpenRouter's model routing API.
ProviderOpenRouter Provider = "openrouter"
)
// ProviderConfig holds provider-specific configuration.
type ProviderConfig struct {
Provider Provider `json:"provider"`
Model string `json:"model"`
MaxTokens int `json:"max_tokens,omitempty"`
BaseURL string `json:"base_url,omitempty"`
APIKeyEnv string `json:"api_key_env,omitempty"` // Environment variable for API key
Description string `json:"-"`
}
// ProviderInfo returns human-readable information about a provider.
func (p Provider) Info() (name, apiKeyEnv, baseURL string) {
switch p {
case ProviderZAI:
return "Z.ai", "GLM_API_KEY", "https://api.z.ai/api/coding/paas/v4"
case ProviderVenice:
return "Venice.ai", "VENICE_API_KEY", "https://api.venice.ai/api/v1"
case ProviderOpenRouter:
return "OpenRouter", "OPENROUTER_API_KEY", "https://openrouter.ai/api/v1"
default:
return "Unknown", "", ""
}
}
// String returns the provider identifier string.
func (p Provider) String() string {
return string(p)
}
// MarshalText implements encoding.TextMarshaler.
func (p Provider) MarshalText() ([]byte, error) {
return []byte(p), nil
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (p *Provider) UnmarshalText(text []byte) error {
s := strings.ToLower(string(text))
switch s {
case "zai", "z.ai":
*p = ProviderZAI
case "venice", "venice.ai":
*p = ProviderVenice
case "openrouter", "open-router":
*p = ProviderOpenRouter
default:
return fmt.Errorf("unknown inference provider: %s", text)
}
return nil
}
// AllProviders returns all supported inference providers.
func AllProviders() []Provider {
return []Provider{ProviderZAI, ProviderVenice, ProviderOpenRouter}
}
// DefaultGLMConfig returns the recommended configuration for GLM models.
// Priority: Z.ai (coding) → Venice → OpenRouter
// Sets max_tokens=16384 to prevent the over-compression bug (Venice defaults to 131K otherwise).
func DefaultGLMConfig() ProviderConfig {
return ProviderConfig{
Provider: ProviderZAI,
Model: "glm-5.1",
MaxTokens: 16384,
APIKeyEnv: "GLM_API_KEY",
}
}
// FallbackChain returns the recommended fallback chain for a starting provider.
// GLM models: ZAI → Venice → OpenRouter
func (p Provider) FallbackChain() []Provider {
// All fallback chains end up at OpenRouter as the final fallback
chain := []Provider{p}
switch p {
case ProviderZAI:
chain = append(chain, ProviderVenice, ProviderOpenRouter)
case ProviderVenice:
chain = append(chain, ProviderOpenRouter)
case ProviderOpenRouter:
// OpenRouter is the final fallback, no further options
}
return chain
}
// ProviderSelection represents a user's provider selection with optional fallback chain.
type ProviderSelection struct {
Primary Provider `json:"primary"`
FallbackChain []Provider `json:"fallback_chain,omitempty"`
Model string `json:"model"`
MaxTokens int `json:"max_tokens"`
Configs map[Provider]ProviderConfig `json:"configs,omitempty"`
}
// NewProviderSelection creates a new provider selection with sensible defaults.
func NewProviderSelection(primary Provider) *ProviderSelection {
return &ProviderSelection{
Primary: primary,
FallbackChain: primary.FallbackChain(),
Model: "glm-5.1", // Default to GLM-5.1
MaxTokens: 16384, // Prevent over-compression bug
Configs: make(map[Provider]ProviderConfig),
}
}
// Validate checks that the provider selection is valid.
func (s *ProviderSelection) Validate() error {
if s.MaxTokens <= 0 {
return fmt.Errorf("max_tokens must be positive, got %d", s.MaxTokens)
}
if s.MaxTokens > 131072 {
return fmt.Errorf("max_tokens %d exceeds context limit (131072)", s.MaxTokens)
}
if s.Model == "" {
return fmt.Errorf("model cannot be empty")
}
if !isValidProvider(s.Primary) {
return fmt.Errorf("unknown primary provider: %s", s.Primary)
}
for _, p := range s.FallbackChain {
if !isValidProvider(p) {
return fmt.Errorf("unknown fallback provider: %s", p)
}
}
return nil
}
// isValidProvider checks if a provider is supported.
func isValidProvider(p Provider) bool {
for _, supported := range AllProviders() {
if p == supported {
return true
}
}
return false
}
// ProviderOption represents a choice in a selection prompt.
type ProviderOption struct {
Provider Provider
Name string
Description string
Recommended bool
}
// GetProviderOptions returns provider options for interactive selection.
func GetProviderOptions() []ProviderOption {
return []ProviderOption{
{
Provider: ProviderZAI,
Name: "Z.ai",
Description: "Z.ai coding API - best for GLM models, optimized for code tasks",
Recommended: true,
},
{
Provider: ProviderVenice,
Name: "Venice.ai",
Description: "Venice.ai API - uncensored, private inference, custom model support",
Recommended: false,
},
{
Provider: ProviderOpenRouter,
Name: "OpenRouter",
Description: "OpenRouter - route to 100+ models, good fallback option",
Recommended: false,
},
}
}
// FormatProviderList returns a formatted list of providers for display.
func FormatProviderList() string {
var sb strings.Builder
sb.WriteString("Inference Providers:\n\n")
options := GetProviderOptions()
maxNameLen := 0
for _, opt := range options {
if len(opt.Name) > maxNameLen {
maxNameLen = len(opt.Name)
}
}
for i, opt := range options {
recMark := ""
if opt.Recommended {
recMark = " (recommended)"
}
fmt.Fprintf(&sb, " [%d] %-*s%s\n", i+1, maxNameLen, opt.Name, recMark)
fmt.Fprintf(&sb, " %s\n", opt.Description)
if i < len(options)-1 {
sb.WriteString("\n")
}
}
return sb.String()
}
// SortedProviders returns providers sorted by priority for GLM models.
func SortedProviders() []Provider {
// Z.ai is preferred for GLM coding tasks
return []Provider{ProviderZAI, ProviderVenice, ProviderOpenRouter}
}
// ProviderDescriptions returns a map of provider descriptions.
func ProviderDescriptions() map[Provider]string {
return map[Provider]string{
ProviderZAI: "Z.ai coding API - optimized for GLM code generation",
ProviderVenice: "Venice.ai - uncensored, private inference",
ProviderOpenRouter: "OpenRouter - route to multiple model providers",
}
}
// APIKeyEnvVars returns the required environment variables for a provider.
func APIKeyEnvVars(providers ...Provider) []string {
var envVars []string
seen := make(map[string]bool)
for _, p := range providers {
_, apiKeyEnv, _ := p.Info()
if apiKeyEnv != "" && !seen[apiKeyEnv] {
envVars = append(envVars, apiKeyEnv)
seen[apiKeyEnv] = true
}
}
sort.Strings(envVars)
return envVars
}