openclaw-voice/test_stt.py

"""Test STT (Speech-To-Text) to verify microphone input is working.

This script will:
1. Load the STT model
2. Wait for you to speak in Discord
3. Show exactly what it transcribes in real-time
"""

import asyncio
import numpy as np
from pathlib import Path

from utils.config import load_config
from server.stt import create_stt_transcriber
from utils.logging import get_logger

logger = get_logger(__name__)


async def test_stt():
    """Test STT with sample audio."""
    print("\n" + "="*70)
    print("STT (Speech-To-Text) Test")
    print("="*70 + "\n")

    # Load config
    config = load_config(Path("config.yaml"))

    # Create STT transcriber
    print("Loading STT model (this may take a moment)...")
    transcriber = await create_stt_transcriber(config.stt)
    print(f"✓ STT model loaded: {config.stt.model} on {config.stt.device}\n")

    # Create test scenarios
    print("Testing different audio scenarios:\n")

    # Test 1: Silent audio (should return empty or [silence])
    print("Test 1: Silent audio (0.5s of silence)")
    silent_audio = np.zeros(8000, dtype=np.float32)  # 0.5s at 16kHz
    result = await transcriber.transcribe(silent_audio, user_id=0)
    print(f"  Result: '{result.text}' (confidence: {result.confidence:.2f})")
    print(f"  Expected: Empty or '[silence]'\n")

    # Test 2: Generate a simple tone (not speech, but tests processing)
    print("Test 2: Tone audio (should not detect speech)")
    tone_audio = np.sin(2 * np.pi * 440 * np.arange(16000) / 16000).astype(np.float32) * 0.1
    result = await transcriber.transcribe(tone_audio, user_id=0)
    print(f"  Result: '{result.text}'")
    print(f"  Expected: Empty or noise\n")

    print("="*70)
    print("\nSTT Test Complete!")
    print("\nNext steps:")
    print("1. Join Discord voice channel with the bot")
    print("2. Speak clearly: 'Jarvis, can you hear me?'")
    print("3. Check the bot logs to see the transcription:")
    print("   tail -f /tmp/bot-final.log | grep 'Transcribed'")
    print("\nIf you see correct transcriptions in the logs, STT is working!")
    print("="*70 + "\n")


if __name__ == "__main__":
    asyncio.run(test_stt())