openclaw-voice/pipeline/transcriber.py
MCKRUZ 3de8228c7c Initial commit: Jarvis Voice Bot - Complete Implementation
Complete 14-phase implementation of AI-powered Discord voice bot:

Features:
- Passive voice listening with Smart Turn v3 detection
- GPU-accelerated STT (faster-whisper) and TTS (Chatterbox)
- Intelligent two-tier relevance filtering
- Rolling conversation context management
- Multi-agent support (Jarvis, Sage)
- OpenAI-compatible TTS/STT API endpoints
- Barge-in support and concurrent user handling

Architecture:
- Discord.py voice integration
- Silero VAD for speech detection
- Pipecat Smart Turn v3 for turn completion
- OpenClaw API client (stubbed for integration)
- FastAPI server with health monitoring

Testing:
- 318 tests passing (100% coverage of major components)
- Unit tests for all modules
- Integration tests for end-to-end flows
- Memory leak prevention tests

Documentation:
- Comprehensive README with installation guide
- Troubleshooting guide and performance metrics
- Production deployment checklist
- Environment configuration templates

Status: 14/14 phases complete (100%)
Production Ready: Yes (after stub replacements)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-13 12:35:03 -05:00

125 lines
3.2 KiB
Python

"""Pipeline stage for speech-to-text transcription.
Integrates STT engine into the audio processing pipeline.
"""
import asyncio
from typing import Callable, Optional
import numpy as np
from server.stt import STTTranscriber, TranscriptionResult
from utils.logging import get_logger
logger = get_logger(__name__)
class PipelineTranscriber:
"""
Pipeline transcription stage.
Receives speech segments from turn detector and produces transcripts.
"""
def __init__(
self,
transcriber: STTTranscriber,
transcription_callback: Optional[
Callable[[int, TranscriptionResult], None]
] = None,
):
"""
Initialize pipeline transcriber.
Args:
transcriber: STT transcriber instance
transcription_callback: Async callback when transcription completes
"""
self.transcriber = transcriber
self.transcription_callback = transcription_callback
# Stats
self.total_transcriptions = 0
self.total_failures = 0
async def process_speech(
self,
user_id: int,
audio: np.ndarray,
language: Optional[str] = None,
) -> Optional[TranscriptionResult]:
"""
Process speech segment and transcribe.
Args:
user_id: User ID
audio: Audio segment (float32, mono, 16kHz)
language: Optional language hint
Returns:
TranscriptionResult if successful, None on error
"""
try:
# Transcribe
result = await self.transcriber.transcribe(
audio=audio,
user_id=user_id,
language=language,
)
# Update stats
self.total_transcriptions += 1
# Invoke callback
if self.transcription_callback:
await self.transcription_callback(user_id, result)
return result
except Exception as e:
logger.error(f"Failed to transcribe for user {user_id}: {e}")
self.total_failures += 1
return None
def get_stats(self) -> dict:
"""
Get transcription statistics.
Returns:
Dictionary with stats
"""
transcriber_stats = self.transcriber.get_stats()
return {
**transcriber_stats,
"total_transcriptions": self.total_transcriptions,
"total_failures": self.total_failures,
"success_rate": (
self.total_transcriptions
/ (self.total_transcriptions + self.total_failures)
if (self.total_transcriptions + self.total_failures) > 0
else 0.0
),
}
async def create_pipeline_transcriber(
transcriber: STTTranscriber,
transcription_callback: Optional[
Callable[[int, TranscriptionResult], None]
] = None,
) -> PipelineTranscriber:
"""
Create pipeline transcriber.
Args:
transcriber: STT transcriber instance
transcription_callback: Async callback for transcriptions
Returns:
PipelineTranscriber instance
"""
return PipelineTranscriber(
transcriber=transcriber,
transcription_callback=transcription_callback,
)