Wire cloud STT/TTS providers into pipeline
- Add provider field to STTConfig and TTSConfig (deepgram/venice) - Add VeniceTTSConfig model for venice voice/base_url settings - Add CloudTTSSynthesizer adapter wrapping VeniceKokoroTTS - Loosen STTTranscriber type hint to accept any engine with transcribe_async - Update run.py to use create_stt_engine/create_tts_engine factories - Provider-based init: reads config.pipeline.stt.provider and .tts.provider - Fix duplicate language key in config.yaml - Remove duplicate language field from STT config Cloud-only path: VAD (local) -> Deepgram STT -> OpenClaw -> Venice TTS -> Discord
This commit is contained in:
parent
f0458b9b40
commit
7d3e13a3ca
5 changed files with 115 additions and 48 deletions
86
run.py
86
run.py
|
|
@ -84,10 +84,10 @@ async def main():
|
|||
logger.info(f" Default Agent: {config.agents.default}")
|
||||
logger.info(f" OpenClaw Gateway: {config.openclaw.base_url}")
|
||||
logger.info(f" OpenClaw Agent ID: {config.openclaw.agent_id}")
|
||||
logger.info(f" STT Model: {config.pipeline.stt.model_size}")
|
||||
logger.info(f" STT Device: {config.pipeline.stt.device}")
|
||||
logger.info(f" TTS Engine: {config.pipeline.tts.engine}")
|
||||
logger.info(f" TTS Device: {config.pipeline.tts.device}")
|
||||
logger.info(f" STT Provider: {config.pipeline.stt.provider}")
|
||||
logger.info(f" TTS Provider: {config.pipeline.tts.provider}")
|
||||
if config.pipeline.tts.provider == "venice":
|
||||
logger.info(f" TTS Voice: {config.pipeline.tts.venice.voice}")
|
||||
logger.info(f" Server Port: {config.server.port}")
|
||||
logger.info(f" Latency Tracking: {config.logging.track_latency}")
|
||||
logger.info("")
|
||||
|
|
@ -95,38 +95,60 @@ async def main():
|
|||
# Initialize shared TTS and STT engines
|
||||
logger.info("Initializing TTS and STT engines...")
|
||||
|
||||
from server.stt import create_transcriber
|
||||
from server.tts import create_tts_synthesizer
|
||||
from server.stt import create_stt_engine, STTTranscriber
|
||||
from server.tts import create_tts_engine, CloudTTSSynthesizer
|
||||
import os
|
||||
|
||||
# Create voice references map
|
||||
voice_refs = {
|
||||
"jarvis": str(jarvis_voice),
|
||||
"sage": str(sage_voice),
|
||||
}
|
||||
# --- TTS ---
|
||||
tts_provider = config.pipeline.tts.provider
|
||||
if tts_provider == "venice":
|
||||
tts_engine = create_tts_engine("venice", {
|
||||
"api_key": os.getenv("VENICE_API_KEY", ""),
|
||||
"voice": config.pipeline.tts.venice.voice,
|
||||
"base_url": config.pipeline.tts.venice.base_url,
|
||||
})
|
||||
tts_synthesizer = CloudTTSSynthesizer(tts_engine)
|
||||
logger.info(f"✓ TTS engine initialized (Venice Kokoro, voice={config.pipeline.tts.venice.voice})")
|
||||
else:
|
||||
# Local Chatterbox (requires GPU + voice files)
|
||||
voice_refs = {
|
||||
"jarvis": str(jarvis_voice),
|
||||
"sage": str(sage_voice),
|
||||
}
|
||||
tts_engine = create_tts_engine("chatterbox", {
|
||||
"device": config.pipeline.tts.device,
|
||||
"sample_rate": 24000,
|
||||
})
|
||||
from server.tts import TTSSynthesizer
|
||||
voice_map = {a: Path(p) for a, p in voice_refs.items()}
|
||||
tts_synthesizer = TTSSynthesizer(engine=tts_engine, voice_map=voice_map)
|
||||
logger.info(f"✓ TTS engine initialized (Chatterbox on {config.pipeline.tts.device})")
|
||||
|
||||
# Initialize TTS synthesizer (shared between Discord and API)
|
||||
tts_synthesizer = await create_tts_synthesizer(
|
||||
voice_refs=voice_refs,
|
||||
device=config.pipeline.tts.device,
|
||||
sample_rate=24000, # Default sample rate for Chatterbox TTS
|
||||
)
|
||||
logger.info(f"✓ TTS engine initialized ({config.pipeline.tts.device})")
|
||||
|
||||
# Warmup TTS and cache common phrases
|
||||
logger.info("Warming up TTS engine and caching common phrases...")
|
||||
# Warmup TTS
|
||||
logger.info("Warming up TTS engine...")
|
||||
await tts_synthesizer.warmup()
|
||||
logger.info(f"✓ TTS warmup complete ({len(tts_synthesizer.phrase_cache)} phrases cached)")
|
||||
logger.info("✓ TTS warmup complete")
|
||||
|
||||
# Initialize STT transcriber (shared between Discord and API)
|
||||
stt_transcriber = await create_transcriber(
|
||||
model_size=config.pipeline.stt.model_size,
|
||||
device=config.pipeline.stt.device,
|
||||
compute_type=config.pipeline.stt.compute_type,
|
||||
)
|
||||
logger.info(
|
||||
f"✓ STT engine initialized "
|
||||
f"({config.pipeline.stt.model_size} on {config.pipeline.stt.device})"
|
||||
)
|
||||
# --- STT ---
|
||||
stt_provider = config.pipeline.stt.provider
|
||||
if stt_provider == "deepgram":
|
||||
stt_engine = create_stt_engine("deepgram",
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY", ""),
|
||||
model=config.pipeline.stt.model,
|
||||
language=config.pipeline.stt.language,
|
||||
)
|
||||
stt_transcriber = STTTranscriber(engine=stt_engine, max_concurrent=3)
|
||||
logger.info(f"✓ STT engine initialized (Deepgram {config.pipeline.stt.model})")
|
||||
else:
|
||||
stt_engine = create_stt_engine("local",
|
||||
model_size=config.pipeline.stt.model_size,
|
||||
device=config.pipeline.stt.device,
|
||||
compute_type=config.pipeline.stt.compute_type,
|
||||
beam_size=config.pipeline.stt.beam_size,
|
||||
language=config.pipeline.stt.language,
|
||||
)
|
||||
stt_transcriber = STTTranscriber(engine=stt_engine, max_concurrent=1)
|
||||
logger.info(f"✓ STT engine initialized (faster-whisper {config.pipeline.stt.model_size} on {config.pipeline.stt.device})")
|
||||
|
||||
# Initialize OpenClaw Gateway client
|
||||
logger.info("Initializing OpenClaw Gateway client...")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue