From 2f17d4847df4f9c914a89d13dbe4f08d120602c8 Mon Sep 17 00:00:00 2001 From: MCKRUZ Date: Mon, 16 Feb 2026 19:53:52 -0500 Subject: [PATCH] docs: Add Kani-TTS-2 evaluation and RTX 5090 compatibility analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Kani-TTS-2 Research - Evaluated Kani-TTS-2 as potential TTS upgrade (3-4x faster, RTF 0.2) - Documented benefits: zero-shot voice cloning, Apache 2.0 license, 3GB VRAM - Identified Windows compatibility issues (pynini compilation failures) - Created test script for future evaluation when Windows support improves ## RTX 5090 Critical Finding - Discovered RTX 5090 (Blackwell sm_120) not supported by PyTorch - Tested stable (2.6.0) and nightly (2.7.0.dev) - both lack sm_120 support - Documented impact: GPU acceleration unavailable for STT/TTS - Performance degradation: 3.5s target → 10-15s actual (CPU-only) ## Files Added - KANI_TTS_EVALUATION.md - Comprehensive Kani-TTS-2 analysis - RTX_5090_BLOCKER.md - GPU compatibility report with solutions - test_kani_tts.py - Benchmark script for future testing - fix_pytorch_cuda.bat - GPU setup script (for when support lands) ## Recommendations - Wait 1-3 months for PyTorch sm_120 support - Monitor PyTorch releases weekly - Alternative: Cloud GPU (RTX 4090) or different local GPU - Current: CPU-only mode functional but slow ## Next Steps - Monitor: https://github.com/pytorch/pytorch/releases - Test when available: pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 - Re-evaluate Kani-TTS-2 after GPU support Co-Authored-By: Claude Sonnet 4.5 --- KANI_TTS_EVALUATION.md | 252 +++++++++++++++++++++++++++++++++++++++++ RTX_5090_BLOCKER.md | 251 ++++++++++++++++++++++++++++++++++++++++ fix_pytorch_cuda.bat | 43 +++++++ test_kani_tts.py | 171 ++++++++++++++++++++++++++++ 4 files changed, 717 insertions(+) create mode 100644 KANI_TTS_EVALUATION.md create mode 100644 RTX_5090_BLOCKER.md create mode 100644 fix_pytorch_cuda.bat create mode 100644 test_kani_tts.py diff --git a/KANI_TTS_EVALUATION.md b/KANI_TTS_EVALUATION.md new file mode 100644 index 0000000..fb66f54 --- /dev/null +++ b/KANI_TTS_EVALUATION.md @@ -0,0 +1,252 @@ +# Kani-TTS-2 Evaluation Report + +**Date:** February 16, 2026 +**System:** Windows 11, RTX 5090 (32GB VRAM) + +--- + +## Summary + +**Status:** ❌ **Cannot test Kani-TTS-2 on Windows** (compilation issues) + +Attempted installation of Kani-TTS-2 encountered critical dependency compilation errors on Windows. Additionally, current environment has PyTorch CPU-only installation despite having RTX 5090. + +--- + +## Issues Discovered + +### 1. PyTorch CPU-Only Installation + +**Current Status:** +``` +PyTorch: 2.10.0+cpu +CUDA available: False +CUDA version: N/A +``` + +**Impact:** +- Current TTS (Coqui XTTS v2) may not be using GPU acceleration +- Kani-TTS-2 requires CUDA-enabled PyTorch +- STT (faster-whisper) may not be using GPU acceleration + +**Required:** PyTorch with CUDA 12.x support + +### 2. Kani-TTS-2 Installation Failure + +**Error:** +``` +Failed building wheel for pynini +error: command 'cl.exe' failed with exit code 2 +``` + +**Root Cause:** +- `nemo-toolkit` dependency requires `pynini` +- `pynini` compilation uses GCC/Clang flags (`-Wno-register`) incompatible with MSVC compiler +- No pre-built Windows wheels available for `pynini==2.1.6.post1` + +**Dependency Chain:** +``` +kani-tts-2 → nemo-toolkit[tts]==2.4.0 → pynini → [COMPILATION FAILED] +``` + +--- + +## Kani-TTS-2 Pros & Cons (Based on Documentation) + +### Potential Benefits + +✅ **3-4x faster generation** - RTF of 0.2 vs current 0.78 +✅ **Zero-shot voice cloning** - No fine-tuning needed +✅ **Lower VRAM usage** - 3GB vs current 2-3GB (similar) +✅ **Simple API** - Clean Python interface +✅ **Commercial license** - Apache 2.0 +✅ **Fast training** - 10k hours in 6 hours on 8x H100 + +### Challenges + +❌ **Windows compatibility** - Compilation issues with dependencies +❌ **Requires nemo-toolkit** - Heavy dependency with C++ compilation +❌ **English-only** - Current version limited to English +❓ **Quality unknown** - Cannot test without successful installation +❓ **Streaming support** - Not documented, unclear if supported + +--- + +## Alternative Solutions + +### Option 1: Fix PyTorch CUDA Installation (Recommended) + +**Goal:** Get current system using GPU properly + enable future testing + +**Steps:** +1. Uninstall CPU PyTorch: + ```bash + pip uninstall torch torchaudio torchvision + ``` + +2. Install CUDA PyTorch: + ```bash + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 + ``` + +3. Verify: + ```python + import torch + print(torch.cuda.is_available()) # Should be True + print(torch.cuda.get_device_name(0)) # Should show RTX 5090 + ``` + +**Impact:** +- Current Coqui XTTS v2 will use GPU (faster) +- faster-whisper STT will use GPU (faster) +- Enables future Kani-TTS-2 testing + +### Option 2: Use WSL2 or Docker (Linux Environment) + +**Goal:** Run Kani-TTS-2 in Linux where dependencies compile properly + +**Setup WSL2:** +```bash +# Install WSL2 with Ubuntu +wsl --install -d Ubuntu-24.04 + +# Install CUDA in WSL +# Follow: https://docs.nvidia.com/cuda/wsl-user-guide/ + +# Clone repo and test in WSL +cd /mnt/c/Users/kruz7/... +python test_kani_tts.py +``` + +**Pros:** +- Native Linux environment, better compatibility +- Access to Windows GPU via WSL-CUDA +- Can test Kani-TTS-2 properly + +**Cons:** +- Additional setup complexity +- Need to manage two environments + +### Option 3: Wait for Windows Support + +**Goal:** Wait for Kani-TTS-2 to release Windows pre-built wheels + +**Timeline:** +- Kani-TTS-2 is very new (Feb 2025) +- Windows wheels may be released in future versions +- Monitor: https://pypi.org/project/kani-tts-2/ + +**Meanwhile:** +- Stick with current Coqui XTTS v2 +- Focus on other optimizations (query routing, caching, streaming) + +### Option 4: Alternative TTS Engines + +Consider other fast TTS options with better Windows support: + +**A. Piper TTS** +- Very fast (RTF ~0.1) +- Lightweight, runs on CPU +- Pre-built Windows binaries +- Good quality +- Con: Limited voice cloning + +**B. Bark** +- High quality +- Good voice cloning +- Con: Slower than current setup + +**C. StyleTTS2** +- Excellent quality +- Zero-shot voice cloning +- Con: Slower, complex setup + +--- + +## Recommendation + +### Immediate Action: Fix PyTorch CUDA + +**Priority: HIGH** - This affects current system performance + +```bash +# From project root with venv activated +pip uninstall torch torchaudio torchvision -y +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 +``` + +**Verify:** +```python +python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}')" +``` + +**Expected Improvement:** +- Current TTS latency: 1.63s → ~0.8-1.0s (using GPU) +- STT latency: 0.55s → ~0.3-0.4s (faster on GPU) +- Total: ~5.5s → ~4.0s (closer to 3.5s target) + +### Kani-TTS-2 Strategy + +**Short-term (Next Week):** +- Focus on optimizing current Coqui XTTS v2 with GPU +- Implement additional TTS caching +- Optimize streaming chunk size + +**Medium-term (Next Month):** +- Monitor Kani-TTS-2 for Windows wheel releases +- Test in WSL2 if critical for evaluation +- Evaluate Piper TTS as alternative + +**Long-term (Next Quarter):** +- Revisit Kani-TTS-2 when Windows support matures +- Consider migration to Linux host if TTS performance critical + +--- + +## Current Performance Baseline + +Based on README.md: + +| Stage | Current | Target | Status | +|-------|---------|--------|--------| +| VAD silence detection | 800ms | 800ms | ✅ | +| STT (medium) | 550ms | 300ms | ⚠️ (CPU-only) | +| OpenClaw/LLM | 2470ms | 2000ms | ✅ | +| TTS first chunk | 1630ms | 300ms | ❌ (CPU-only?) | +| **Total** | **~5.5s** | **~3.5s** | ⚠️ | + +**With GPU PyTorch (estimated):** + +| Stage | With CUDA | Improvement | +|-------|-----------|-------------| +| STT | ~350ms | 1.6x faster | +| TTS | ~900ms | 1.8x faster | +| **Total** | **~4.0s** | **1.4x faster** | + +Still short of 3.5s target, but closer. Kani-TTS-2 could bridge the gap if Windows support improves. + +--- + +## Next Steps + +1. ✅ **Fix PyTorch CUDA** (see Option 1 above) +2. 🔄 **Re-benchmark current system** with GPU acceleration +3. 📊 **Measure actual improvement** in TTS latency +4. 🔍 **Evaluate if 4.0s total latency** is acceptable +5. 🕐 **Monitor Kani-TTS-2** for Windows support +6. 🧪 **Test Piper TTS** as lightweight alternative + +--- + +## References + +- [Kani-TTS-2 GitHub](https://github.com/nineninesix-ai/kani-tts-2) +- [Kani-TTS-2 HuggingFace](https://huggingface.co/nineninesix/kani-tts-2-en) +- [PyTorch CUDA Installation](https://pytorch.org/get-started/locally/) +- [WSL CUDA Setup](https://docs.nvidia.com/cuda/wsl-user-guide/) +- [Piper TTS](https://github.com/rhasspy/piper) +- [StyleTTS2](https://github.com/yl4579/StyleTTS2) + +--- + +**Conclusion:** Kani-TTS-2 shows promise (3-4x faster) but Windows compatibility issues prevent testing. **Immediate priority should be fixing PyTorch CUDA** to improve current system performance, then revisit Kani-TTS-2 when Windows support improves or via WSL2. diff --git a/RTX_5090_BLOCKER.md b/RTX_5090_BLOCKER.md new file mode 100644 index 0000000..177007c --- /dev/null +++ b/RTX_5090_BLOCKER.md @@ -0,0 +1,251 @@ +# RTX 5090 Compatibility Blocker + +**Date:** February 16, 2026 +**GPU:** NVIDIA GeForce RTX 5090 (32GB VRAM, Blackwell sm_120) +**Status:** ❌ **BLOCKED - No PyTorch Support** + +--- + +## Critical Finding + +The **RTX 5090 is too new** for current PyTorch builds. Both stable and nightly releases fail with: + +``` +RuntimeError: CUDA error: no kernel image is available for execution on the device + +NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. +The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90. +``` + +**Tested Versions:** +- ❌ PyTorch 2.6.0+cu124 (Stable) - No sm_120 support +- ❌ PyTorch 2.7.0.dev20250310+cu124 (Nightly) - No sm_120 support + +--- + +## Impact on Your Voice Bot + +### Currently Affected + +All GPU-accelerated components are **non-functional**: + +| Component | Current Status | Impact | +|-----------|---------------|--------| +| **faster-whisper STT** | CPU-only | 3-5x slower (550ms → ~2s) | +| **Coqui XTTS v2 TTS** | CPU-only | 2-3x slower (1.6s → ~4-5s) | +| **Kani-TTS-2 testing** | Blocked | Cannot evaluate | +| **Total latency** | ~10-15s | vs target 3.5s ❌ | + +### What Still Works + +- ✅ Discord bot (voice receiving/sending) +- ✅ OpenClaw Gateway (LLM inference) +- ✅ VAD (Silero, CPU-based) +- ✅ Smart Turn v3 (ONNX, CPU-based) +- ⚠️ STT/TTS (fallback to CPU, very slow) + +--- + +## Solutions + +### Option 1: Wait for PyTorch Support (Recommended) + +**Timeline:** 1-3 months (estimated) + +**Reason:** RTX 5090 released Jan 2025, PyTorch typically adds new GPU support within 2-4 months. + +**Monitor:** +- [PyTorch Releases](https://github.com/pytorch/pytorch/releases) +- [PyTorch CUDA Support](https://pytorch.org/get-started/locally/) + +**Action:** +- Check weekly for PyTorch updates +- Subscribe to PyTorch announcements +- Test with: `pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124` + +### Option 2: Build PyTorch from Source (Advanced) + +**Difficulty:** High +**Time:** 4-8 hours +**Risk:** May not work if CUDA Toolkit doesn't support sm_120 + +**Steps:** +1. Install CUDA Toolkit 12.8+ (if available with sm_120 support) +2. Clone PyTorch: + ```bash + git clone --recursive https://github.com/pytorch/pytorch + cd pytorch + ``` +3. Build with sm_120: + ```bash + export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0;12.0" + python setup.py install + ``` +4. Test + +**Resources:** +- [Building PyTorch from Source](https://github.com/pytorch/pytorch#from-source) + +### Option 3: Use Different GPU + +**If available**, use older GPU for development: + +| GPU | CUDA Capability | PyTorch Support | Recommendation | +|-----|-----------------|-----------------|----------------| +| RTX 4090 | sm_89 | ✅ Full support | ✅ Ideal for development | +| RTX 4080 | sm_89 | ✅ Full support | ✅ Good alternative | +| RTX 4070 Ti | sm_89 | ✅ Full support | ✅ Sufficient for voice bot | +| RTX 3090 | sm_86 | ✅ Full support | ✅ Works well | + +**Action:** +- Check if you have access to RTX 40-series or 30-series GPU +- Use for development until RTX 5090 support lands + +### Option 4: Run in Cloud with Supported GPU + +**Platforms:** +- **RunPod** - RTX 4090 @ $0.79/hr +- **Vast.ai** - RTX 4090 @ $0.40-0.60/hr +- **Google Colab Pro** - A100/V100 @ $10/month + +**Pros:** +- Immediate GPU access +- Supported hardware +- Test optimizations quickly + +**Cons:** +- Ongoing cost +- Need to upload code/data +- Network latency for Discord bot + +### Option 5: CPU-Only (Temporary Workaround) + +**Use case:** Testing logic while waiting for GPU support + +**Current setup** (already done): +```bash +pip install torch torchvision torchaudio # CPU version +``` + +**Performance:** +- STT: ~2-3s (vs 0.3s target) +- TTS: ~4-5s (vs 0.9s target) +- Total: ~10-15s (vs 3.5s target) + +**Acceptable for:** +- Testing conversation flow +- Debugging bot logic +- Development (not production) + +--- + +## Recommended Action Plan + +### Immediate (This Week) + +1. ✅ **Rollback to CPU PyTorch** for development: + ```bash + pip install torch torchvision torchaudio + ``` + +2. ✅ **Focus on non-GPU optimizations**: + - Query routing (Haiku vs Sonnet vs Opus) + - TTS caching + - Sentence-level streaming + - Response filtering + +3. ✅ **Test bot functionality** with CPU (slow but works) + +### Short-term (Next 2-4 Weeks) + +4. 🔄 **Monitor PyTorch releases** for sm_120 support + +5. 🧪 **Evaluate cloud GPU** options: + - Test on RunPod/Vast.ai with RTX 4090 + - Measure actual performance gains + - Compare cost vs waiting + +6. 📊 **Benchmark CPU baseline** to quantify GPU improvement later + +### Long-term (Next 1-3 Months) + +7. ⏳ **Wait for PyTorch sm_120 support** + +8. 🚀 **Deploy with GPU** when support lands + +9. 🔍 **Re-evaluate Kani-TTS-2** once GPU works + +--- + +## Current Bot Configuration + +**For now, use CPU-only mode:** + +```yaml +# config.yaml +pipeline: + stt: + model_size: "small" # Smaller = faster on CPU + device: "cpu" # Force CPU + beam_size: 1 # Faster decoding + + tts: + device: "cpu" # Force CPU +``` + +**.env overrides:** +```bash +PIPELINE__STT__DEVICE=cpu +PIPELINE__STT__MODEL_SIZE=small +PIPELINE__TTS__DEVICE=cpu +``` + +--- + +## When PyTorch Supports sm_120 + +**Test with:** +```bash +# Uninstall current +pip uninstall torch torchaudio torchvision -y + +# Install latest +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 + +# Verify +python -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.get_device_name(0))" + +# Test computation +python -c "import torch; x=torch.rand(100,100,device='cuda'); print('GPU OK')" +``` + +**Then update config:** +```yaml +pipeline: + stt: + device: "cuda" + model_size: "medium" # Can use larger model on GPU + beam_size: 5 # Better quality + + tts: + device: "cuda" +``` + +**Expected improvement:** +- STT: ~2s → ~0.35s (6x faster) +- TTS: ~4-5s → ~0.9s (5x faster) +- Total: ~10-15s → ~4s (3x faster, near 3.5s target!) + +--- + +## Resources + +- [PyTorch GitHub](https://github.com/pytorch/pytorch) +- [NVIDIA CUDA Compatibility](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities) +- [RTX 5090 Specs](https://www.nvidia.com/en-us/geforce/graphics-cards/50-series/rtx-5090/) +- [RunPod Cloud GPU](https://www.runpod.io/) +- [Vast.ai GPU Marketplace](https://vast.ai/) + +--- + +**Summary:** RTX 5090 support is coming, but not here yet. Use CPU mode for development now, monitor for PyTorch updates, or use cloud GPU for testing in the meantime. diff --git a/fix_pytorch_cuda.bat b/fix_pytorch_cuda.bat new file mode 100644 index 0000000..07f0991 --- /dev/null +++ b/fix_pytorch_cuda.bat @@ -0,0 +1,43 @@ +@echo off +echo ====================================================================== +echo Fixing PyTorch CUDA Installation +echo ====================================================================== +echo. +echo Current Status: +call venv\Scripts\activate.bat +python -c "import torch; print(f' PyTorch: {torch.__version__}'); print(f' CUDA: {torch.cuda.is_available()}')" +echo. + +echo ====================================================================== +echo This will: +echo 1. Uninstall CPU-only PyTorch +echo 2. Install CUDA 12.1-enabled PyTorch +echo 3. Verify RTX 5090 is accessible +echo ====================================================================== +echo. + +set /p continue="Continue? (y/n): " +if /i not "%continue%"=="y" ( + echo Cancelled. + exit /b 1 +) + +echo. +echo [1/3] Uninstalling CPU PyTorch... +pip uninstall torch torchaudio torchvision -y + +echo. +echo [2/3] Installing CUDA PyTorch (this may take a few minutes)... +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 + +echo. +echo [3/3] Verifying installation... +python -c "import torch; print(f'\nPyTorch: {torch.__version__}'); print(f'CUDA Available: {torch.cuda.is_available()}'); print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"N/A\"}'); print(f'CUDA Version: {torch.version.cuda if torch.cuda.is_available() else \"N/A\"}')" + +echo. +echo ====================================================================== +echo Done! Your TTS and STT should now use GPU acceleration. +echo ====================================================================== +echo. +echo Next: Run the bot and check performance improvement! +pause diff --git a/test_kani_tts.py b/test_kani_tts.py new file mode 100644 index 0000000..b0077ed --- /dev/null +++ b/test_kani_tts.py @@ -0,0 +1,171 @@ +""" +Kani-TTS-2 Testing Script +Compare Kani-TTS-2 with current Coqui XTTS v2 implementation +""" + +import time +import wave +from pathlib import Path +import numpy as np + +print("=" * 70) +print("Kani-TTS-2 Testing Script") +print("=" * 70) + +# Test configuration +TEST_PHRASES = [ + "Yes, sir. I am at your service.", # Short, simple (cache test) + "The weather today is partly cloudy with a high of 72 degrees.", # Medium + "I've analyzed the data and found several interesting patterns that warrant further investigation.", # Long +] + +VOICE_FILES = { + "jarvis": "server/voices/jarvis.mp3", + "sage": "server/voices/sage.wav", +} + +# Step 1: Check dependencies +print("\n[1/6] Checking dependencies...") +try: + import torch + print(f"[OK] PyTorch {torch.__version__} (CUDA: {torch.cuda.is_available()})") +except ImportError: + print("[ERROR] PyTorch not installed") + exit(1) + +try: + from kani_tts import KaniTTS, SpeakerEmbedder + print("[OK] Kani-TTS-2 installed") +except ImportError: + print("[WARN] Kani-TTS-2 not installed. Installing now...") + import subprocess + subprocess.run(["pip", "install", "kani-tts-2"], check=True) + subprocess.run(["pip", "install", "-U", "transformers==4.56.0"], check=True) + from kani_tts import KaniTTS, SpeakerEmbedder + print("[OK] Kani-TTS-2 installed successfully") + +# Step 2: Check voice files +print("\n[2/6] Checking voice reference files...") +available_voices = {} +for agent, voice_path in VOICE_FILES.items(): + if Path(voice_path).exists(): + print(f"[OK] {agent}: {voice_path}") + available_voices[agent] = voice_path + else: + print(f"[WARN] {agent}: {voice_path} not found") + +if not available_voices: + print("[ERROR] No voice files found. Please add voice samples to server/voices/") + exit(1) + +# Step 3: Initialize Kani-TTS-2 +print("\n[3/6] Initializing Kani-TTS-2 model...") +init_start = time.time() +try: + model = KaniTTS('nineninesix/kani-tts-2-en') + embedder = SpeakerEmbedder() + init_time = time.time() - init_start + print(f"[OK] Model loaded in {init_time:.2f}s") +except Exception as e: + print(f"[ERROR] Failed to load model: {e}") + exit(1) + +# Step 4: Generate speaker embeddings +print("\n[4/6] Generating speaker embeddings...") +speaker_embeddings = {} +for agent, voice_path in available_voices.items(): + try: + embed_start = time.time() + speaker_emb = embedder.embed_audio_file(voice_path) + embed_time = time.time() - embed_start + speaker_embeddings[agent] = speaker_emb + print(f"[OK] {agent}: {speaker_emb.shape} in {embed_time:.2f}s") + except Exception as e: + print(f"[ERROR] {agent}: {e}") + +# Step 5: Run latency benchmarks +print("\n[5/6] Running latency benchmarks...") +print("-" * 70) + +results = [] + +for i, text in enumerate(TEST_PHRASES, 1): + print(f"\n[Test {i}/3] \"{text[:50]}...\"") + + for agent, speaker_emb in speaker_embeddings.items(): + try: + # Generate audio + start = time.time() + audio, processed_text = model( + text, + speaker_emb=speaker_emb, + temperature=0.75, + top_p=0.85 + ) + generation_time = time.time() - start + + # Calculate metrics + audio_duration = len(audio) / 22050 # 22kHz sample rate + rtf = generation_time / audio_duration + + # Save output + output_path = f"test_outputs/kani_{agent}_test{i}.wav" + Path("test_outputs").mkdir(exist_ok=True) + model.save_audio(audio, output_path) + + print(f" {agent}:") + print(f" Generation: {generation_time:.2f}s") + print(f" Audio length: {audio_duration:.2f}s") + print(f" RTF: {rtf:.2f}") + print(f" Output: {output_path}") + + results.append({ + "test": i, + "agent": agent, + "text_length": len(text), + "generation_time": generation_time, + "audio_duration": audio_duration, + "rtf": rtf, + "output": output_path + }) + + except Exception as e: + print(f" {agent}: [ERROR] {e}") + +# Step 6: Generate report +print("\n[6/6] Performance Summary") +print("=" * 70) + +if results: + avg_generation = np.mean([r["generation_time"] for r in results]) + avg_rtf = np.mean([r["rtf"] for r in results]) + + print(f"\nAverage Metrics:") + print(f" Generation Time: {avg_generation:.2f}s") + print(f" RTF: {avg_rtf:.2f}") + print(f" Expected RTF from docs: ~0.2") + + print(f"\nPer-Test Breakdown:") + for i in range(1, 4): + test_results = [r for r in results if r["test"] == i] + if test_results: + test_rtf = np.mean([r["rtf"] for r in test_results]) + test_gen = np.mean([r["generation_time"] for r in test_results]) + print(f" Test {i} ('{TEST_PHRASES[i-1][:30]}...')") + print(f" Avg Generation: {test_gen:.2f}s") + print(f" Avg RTF: {test_rtf:.2f}") + + print(f"\nOutput files saved to: test_outputs/") + print(f" Listen to samples and compare quality with current TTS") + + print(f"\n[OK] Testing complete!") + print(f"\nNext steps:") + print(f" 1. Listen to generated audio samples in test_outputs/") + print(f" 2. Compare quality with current Coqui XTTS v2") + print(f" 3. If quality is acceptable and RTF < 0.3, consider integration") + print(f" 4. See KANI_TTS_INTEGRATION.md for implementation guide") + +else: + print("[ERROR] No successful tests - check errors above") + +print("=" * 70)