feat: add Deepgram STT provider and cloud-first config
- New DeepgramSTT class using Deepgram nova-3 via REST API - Factory function create_stt_engine() for provider switching - faster-whisper import now optional (graceful fallback) - Config defaults to cloud providers (deepgram STT + venice TTS) - .env.example updated with DEEPGRAM_API_KEY and VENICE_API_KEY - requirements.txt adds deepgram-sdk, marks faster-whisper as optional - Zero GPU required for default configuration
This commit is contained in:
parent
3eea942772
commit
f0458b9b40
4 changed files with 213 additions and 16 deletions
32
config.yaml
32
config.yaml
|
|
@ -108,20 +108,19 @@ pipeline:
|
|||
# Using v3.2 GPU model for best performance with RTX 5090
|
||||
model_path: "smart-turn-v3.2-gpu.onnx"
|
||||
|
||||
# Speech-to-Text (faster-whisper)
|
||||
# Speech-to-Text
|
||||
stt:
|
||||
# Model size: tiny, base, small, medium, large-v3
|
||||
# Using "small" for faster transcription (was "medium")
|
||||
# Provider: "deepgram" (cloud, no GPU) or "local" (faster-whisper, requires GPU)
|
||||
provider: "deepgram"
|
||||
|
||||
# Deepgram settings (used when provider is "deepgram")
|
||||
model: "nova-3"
|
||||
language: "en"
|
||||
|
||||
# Local faster-whisper settings (used when provider is "local")
|
||||
model_size: "small"
|
||||
|
||||
# Device: cuda or cpu
|
||||
device: "cuda"
|
||||
|
||||
# Compute type: float16, float32, int8
|
||||
compute_type: "float16"
|
||||
|
||||
# Beam size for decoding (higher = more accurate, slower)
|
||||
# Optimized for voice chat: beam_size=1 is 3-5x faster with minimal quality loss
|
||||
beam_size: 1
|
||||
|
||||
# Language hint (null = auto-detect)
|
||||
|
|
@ -165,10 +164,17 @@ pipeline:
|
|||
|
||||
# Text-to-Speech
|
||||
tts:
|
||||
# TTS engine: chatterbox, coqui, piper
|
||||
engine: "coqui"
|
||||
# Provider: "venice" (cloud, no GPU) or "local" (chatterbox, requires GPU)
|
||||
provider: "venice"
|
||||
|
||||
# Device: cuda or cpu
|
||||
# Venice settings (used when provider is "venice")
|
||||
venice:
|
||||
voice: "am_liam"
|
||||
base_url: "https://api.venice.ai/api/v1"
|
||||
# API key from env: VENICE_API_KEY
|
||||
|
||||
# Local settings (used when provider is "local")
|
||||
engine: "chatterbox"
|
||||
device: "cuda"
|
||||
|
||||
# Streaming: generate and play audio in chunks
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue