- Interactive deploy command with 8-step walkthrough: framework → provider → token → SSH → server → inference → tailscale → discord - .env file generation from walkthrough config - DeploymentConfig struct with framework-aware defaults - Inference API client with validation for Venice, OpenRouter, OpenAI, Anthropic - Hetzner Cloud provider: token validation, SSH key listing - DotEnv parser/writer with schema validation - Destroy command with confirmation prompt - Validation subcommand for checking existing .env files - All tests passing, go vet clean
249 lines
7.1 KiB
Go
249 lines
7.1 KiB
Go
// Package inference defines inference provider types and selection logic.
|
|
package inference
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// Provider represents an LLM inference provider.
|
|
type Provider string
|
|
|
|
const (
|
|
// ProviderZAI is Z.ai's coding API (highest priority for GLM models).
|
|
ProviderZAI Provider = "zai"
|
|
// ProviderVenice is Venice.ai's API.
|
|
ProviderVenice Provider = "venice"
|
|
// ProviderOpenRouter is OpenRouter's model routing API.
|
|
ProviderOpenRouter Provider = "openrouter"
|
|
)
|
|
|
|
// ProviderConfig holds provider-specific configuration.
|
|
type ProviderConfig struct {
|
|
Provider Provider `json:"provider"`
|
|
Model string `json:"model"`
|
|
MaxTokens int `json:"max_tokens,omitempty"`
|
|
BaseURL string `json:"base_url,omitempty"`
|
|
APIKeyEnv string `json:"api_key_env,omitempty"` // Environment variable for API key
|
|
Description string `json:"-"`
|
|
}
|
|
|
|
// ProviderInfo returns human-readable information about a provider.
|
|
func (p Provider) Info() (name, apiKeyEnv, baseURL string) {
|
|
switch p {
|
|
case ProviderZAI:
|
|
return "Z.ai", "GLM_API_KEY", "https://api.z.ai/api/coding/paas/v4"
|
|
case ProviderVenice:
|
|
return "Venice.ai", "VENICE_API_KEY", "https://api.venice.ai/api/v1"
|
|
case ProviderOpenRouter:
|
|
return "OpenRouter", "OPENROUTER_API_KEY", "https://openrouter.ai/api/v1"
|
|
default:
|
|
return "Unknown", "", ""
|
|
}
|
|
}
|
|
|
|
// String returns the provider identifier string.
|
|
func (p Provider) String() string {
|
|
return string(p)
|
|
}
|
|
|
|
// MarshalText implements encoding.TextMarshaler.
|
|
func (p Provider) MarshalText() ([]byte, error) {
|
|
return []byte(p), nil
|
|
}
|
|
|
|
// UnmarshalText implements encoding.TextUnmarshaler.
|
|
func (p *Provider) UnmarshalText(text []byte) error {
|
|
s := strings.ToLower(string(text))
|
|
switch s {
|
|
case "zai", "z.ai":
|
|
*p = ProviderZAI
|
|
case "venice", "venice.ai":
|
|
*p = ProviderVenice
|
|
case "openrouter", "open-router":
|
|
*p = ProviderOpenRouter
|
|
default:
|
|
return fmt.Errorf("unknown inference provider: %s", text)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AllProviders returns all supported inference providers.
|
|
func AllProviders() []Provider {
|
|
return []Provider{ProviderZAI, ProviderVenice, ProviderOpenRouter}
|
|
}
|
|
|
|
// DefaultGLMConfig returns the recommended configuration for GLM models.
|
|
// Priority: Z.ai (coding) → Venice → OpenRouter
|
|
// Sets max_tokens=16384 to prevent the over-compression bug (Venice defaults to 131K otherwise).
|
|
func DefaultGLMConfig() ProviderConfig {
|
|
return ProviderConfig{
|
|
Provider: ProviderZAI,
|
|
Model: "glm-5.1",
|
|
MaxTokens: 16384,
|
|
APIKeyEnv: "GLM_API_KEY",
|
|
}
|
|
}
|
|
|
|
// FallbackChain returns the recommended fallback chain for a starting provider.
|
|
// GLM models: ZAI → Venice → OpenRouter
|
|
func (p Provider) FallbackChain() []Provider {
|
|
// All fallback chains end up at OpenRouter as the final fallback
|
|
chain := []Provider{p}
|
|
|
|
switch p {
|
|
case ProviderZAI:
|
|
chain = append(chain, ProviderVenice, ProviderOpenRouter)
|
|
case ProviderVenice:
|
|
chain = append(chain, ProviderOpenRouter)
|
|
case ProviderOpenRouter:
|
|
// OpenRouter is the final fallback, no further options
|
|
}
|
|
|
|
return chain
|
|
}
|
|
|
|
// ProviderSelection represents a user's provider selection with optional fallback chain.
|
|
type ProviderSelection struct {
|
|
Primary Provider `json:"primary"`
|
|
FallbackChain []Provider `json:"fallback_chain,omitempty"`
|
|
Model string `json:"model"`
|
|
MaxTokens int `json:"max_tokens"`
|
|
Configs map[Provider]ProviderConfig `json:"configs,omitempty"`
|
|
}
|
|
|
|
// NewProviderSelection creates a new provider selection with sensible defaults.
|
|
func NewProviderSelection(primary Provider) *ProviderSelection {
|
|
return &ProviderSelection{
|
|
Primary: primary,
|
|
FallbackChain: primary.FallbackChain(),
|
|
Model: "glm-5.1", // Default to GLM-5.1
|
|
MaxTokens: 16384, // Prevent over-compression bug
|
|
Configs: make(map[Provider]ProviderConfig),
|
|
}
|
|
}
|
|
|
|
// Validate checks that the provider selection is valid.
|
|
func (s *ProviderSelection) Validate() error {
|
|
if s.MaxTokens <= 0 {
|
|
return fmt.Errorf("max_tokens must be positive, got %d", s.MaxTokens)
|
|
}
|
|
if s.MaxTokens > 131072 {
|
|
return fmt.Errorf("max_tokens %d exceeds context limit (131072)", s.MaxTokens)
|
|
}
|
|
if s.Model == "" {
|
|
return fmt.Errorf("model cannot be empty")
|
|
}
|
|
if !isValidProvider(s.Primary) {
|
|
return fmt.Errorf("unknown primary provider: %s", s.Primary)
|
|
}
|
|
for _, p := range s.FallbackChain {
|
|
if !isValidProvider(p) {
|
|
return fmt.Errorf("unknown fallback provider: %s", p)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// isValidProvider checks if a provider is supported.
|
|
func isValidProvider(p Provider) bool {
|
|
for _, supported := range AllProviders() {
|
|
if p == supported {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ProviderOption represents a choice in a selection prompt.
|
|
type ProviderOption struct {
|
|
Provider Provider
|
|
Name string
|
|
Description string
|
|
Recommended bool
|
|
}
|
|
|
|
// GetProviderOptions returns provider options for interactive selection.
|
|
func GetProviderOptions() []ProviderOption {
|
|
return []ProviderOption{
|
|
{
|
|
Provider: ProviderZAI,
|
|
Name: "Z.ai",
|
|
Description: "Z.ai coding API - best for GLM models, optimized for code tasks",
|
|
Recommended: true,
|
|
},
|
|
{
|
|
Provider: ProviderVenice,
|
|
Name: "Venice.ai",
|
|
Description: "Venice.ai API - uncensored, private inference, custom model support",
|
|
Recommended: false,
|
|
},
|
|
{
|
|
Provider: ProviderOpenRouter,
|
|
Name: "OpenRouter",
|
|
Description: "OpenRouter - route to 100+ models, good fallback option",
|
|
Recommended: false,
|
|
},
|
|
}
|
|
}
|
|
|
|
// FormatProviderList returns a formatted list of providers for display.
|
|
func FormatProviderList() string {
|
|
var sb strings.Builder
|
|
sb.WriteString("Inference Providers:\n\n")
|
|
|
|
options := GetProviderOptions()
|
|
maxNameLen := 0
|
|
for _, opt := range options {
|
|
if len(opt.Name) > maxNameLen {
|
|
maxNameLen = len(opt.Name)
|
|
}
|
|
}
|
|
|
|
for i, opt := range options {
|
|
recMark := ""
|
|
if opt.Recommended {
|
|
recMark = " (recommended)"
|
|
}
|
|
fmt.Fprintf(&sb, " [%d] %-*s%s\n", i+1, maxNameLen, opt.Name, recMark)
|
|
fmt.Fprintf(&sb, " %s\n", opt.Description)
|
|
if i < len(options)-1 {
|
|
sb.WriteString("\n")
|
|
}
|
|
}
|
|
|
|
return sb.String()
|
|
}
|
|
|
|
// SortedProviders returns providers sorted by priority for GLM models.
|
|
func SortedProviders() []Provider {
|
|
// Z.ai is preferred for GLM coding tasks
|
|
return []Provider{ProviderZAI, ProviderVenice, ProviderOpenRouter}
|
|
}
|
|
|
|
// ProviderDescriptions returns a map of provider descriptions.
|
|
func ProviderDescriptions() map[Provider]string {
|
|
return map[Provider]string{
|
|
ProviderZAI: "Z.ai coding API - optimized for GLM code generation",
|
|
ProviderVenice: "Venice.ai - uncensored, private inference",
|
|
ProviderOpenRouter: "OpenRouter - route to multiple model providers",
|
|
}
|
|
}
|
|
|
|
// APIKeyEnvVars returns the required environment variables for a provider.
|
|
func APIKeyEnvVars(providers ...Provider) []string {
|
|
var envVars []string
|
|
seen := make(map[string]bool)
|
|
|
|
for _, p := range providers {
|
|
_, apiKeyEnv, _ := p.Info()
|
|
if apiKeyEnv != "" && !seen[apiKeyEnv] {
|
|
envVars = append(envVars, apiKeyEnv)
|
|
seen[apiKeyEnv] = true
|
|
}
|
|
}
|
|
|
|
sort.Strings(envVars)
|
|
return envVars
|
|
}
|