Fix voice portal: WebSocket routing, Caddy keepalive, audio pipeline

- Fix app.py: @app.get -> @app.websocket for /ws/voice route (was returning 403) - Fix app.py: create static_dir before mounting it (AttributeError on startup) - Fix voice.html: AudioWorkletNode constructor (was AudioWorkletProcessor) - Fix voice.html: use ScriptProcessor directly (more reliable) - Fix voice.html: send Float32 directly (server expects float32, was sending Int16) - Fix voice.html: auto-detect ws/wss protocol from page URL - Add Caddy reverse proxy keepalive pings every 15s to prevent timeout - Add detailed message type logging in WebSocket receive loop - Strip Jarvis/Sage personas, rename bot to MoltMic - Add /moltmic voice slash command for portal URL - Update portal URL to https://voice.jezzahehn.com
2026-04-10 04:47:31 +00:00 · 2026-04-10 04:47:31 +00:00 · 3450e57ca6
commit 3450e57ca6
parent bc580861dd
6 changed files with 122 additions and 41 deletions
--- a/discord_bot/init.py
+++ b/discord_bot/init.py
@ -1,18 +1,18 @@
-"""Jarvis Voice Bot - Discord Integration"""
+"""MoltMic - OpenClaw Voice Bot"""
-from .bot import JarvisVoiceBot, create_bot, run_bot
+from .bot import MoltMicBot, create_bot, run_bot
 from .voice_session import VoiceSession, VoiceSessionManager
 from .audio_bridge import AudioBridge, PipelineAudioSource
-from .commands import VoiceBotCommands, setup_commands
+from .commands import MoltMicCommands, setup_commands
 __all__ = [
-    "JarvisVoiceBot",
+    "MoltMicBot",
    "create_bot",
    "run_bot",
    "VoiceSession",
    "VoiceSessionManager",
    "AudioBridge",
    "PipelineAudioSource",
-    "VoiceBotCommands",
+    "MoltMicCommands",
    "setup_commands",
 ]
--- a/discord_bot/bot.py
+++ b/discord_bot/bot.py
@ -20,8 +20,8 @@ from .vad_receiver import VADAudioReceiver
 logger = get_logger(__name__)
-class JarvisVoiceBot(discord.Client):
+class MoltMicBot(discord.Client):
-    """Discord bot for voice interaction with AI agents."""
+    """MoltMic - Discord voice bot for OpenClaw."""
    def __init__(
        self,
@ -479,7 +479,7 @@ async def create_bot(
    stt_transcriber=None,
    orchestrator=None,
    audio_output_callbacks=None,
-) -> JarvisVoiceBot:
+) -> MoltMicBot:
    """
    Create and initialize the Discord bot.
@ -494,7 +494,7 @@ async def create_bot(
    Returns:
        Initialized bot instance
    """
-    bot = JarvisVoiceBot(
+    bot = MoltMicBot(
        config=config,
        openclaw_config=openclaw_config,
        tts_synthesizer=tts_synthesizer,
--- a/discord_bot/commands.py
+++ b/discord_bot/commands.py
@ -94,6 +94,43 @@ class MoltMicCommands(app_commands.Group):
            logger.exception(f"Status error: {e}")
            await interaction.followup.send("❌ Error.", ephemeral=True)
    @app_commands.command(name="voice", description="Open voice portal in browser")
    async def voice(self, interaction: discord.Interaction):
        """Generate a voice portal URL for browser-based speech."""
        await interaction.response.defer(thinking=True)
        try:
            # Import here to avoid circular dependency
            from server.voice_ws import create_session_id
            session_id = create_session_id()
            portal_url = f"https://voice.jezzahehn.com/voice?session={session_id}"
            embed = discord.Embed(
                title="🎙️ Voice Portal",
                description="Click below to open the voice portal in your browser",
                color=discord.Color.blue()
            )
            embed.add_field(
                name="Portal URL",
                value=f"[Open Voice Portal]({portal_url})",
                inline=False
            )
            embed.add_field(
                name="Instructions",
                value="1. Click the link above\n2. Allow microphone access\n3. Start talking! The bot will listen and respond.",
                inline=False
            )
            embed.set_footer(text="The bot will start listening when you connect")
            await interaction.followup.send(embed=embed)
            logger.info(f"Voice portal created for session {session_id}")
        except Exception as e:
            logger.exception(f"Voice portal error: {e}")
            await interaction.followup.send("❌ Failed to create voice portal.", ephemeral=True)
 async def setup_commands(bot):
    """Register slash commands."""
--- a/server/app.py
+++ b/server/app.py
@ -4,10 +4,12 @@ Provides HTTP endpoints for:
 - Text-to-Speech (OpenAI /v1/audio/speech compatible)
 - Speech-to-Text (OpenAI /v1/audio/transcriptions compatible)
 - Health checks and status
 - WebSocket voice endpoint for browser-based speech
 Shares STT and TTS engines with Discord bot for efficiency.
 """
 import asyncio
 import io
 import tempfile
 import time
@ -16,13 +18,15 @@ from typing import Literal, Optional
 import numpy as np
 import soundfile as sf
-from fastapi import FastAPI, File, Form, HTTPException, UploadFile
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import Response, StreamingResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field
 from server.stt import FasterWhisperSTT, STTTranscriber
 from server.tts import ChatterboxTTS, TTSSynthesizer
 from server.voice_ws import handle_voice_websocket, create_session_id
 from utils.logging import get_logger
 logger = get_logger(__name__)
@ -111,6 +115,13 @@ class VoiceAPIServer:
            allow_headers=["*"],
        )
        # Create static files directory
        self.static_dir = Path("server/static")
        self.static_dir.mkdir(parents=True, exist_ok=True)
        # Mount static files
        self.app.mount("/static", StaticFiles(directory=str(self.static_dir)), name="static")
        # Register routes
        self._register_routes()
@ -129,6 +140,19 @@ class VoiceAPIServer:
            """Health check endpoint."""
            return await self._health_check()
        @self.app.get("/voice")
        async def get_voice_page():
            """Serve voice portal HTML page."""
            static_file = self.static_dir / "voice.html"
            if static_file.exists():
                return Response(content=static_file.read_text(), media_type="text/html")
            raise HTTPException(status_code=404, detail="Voice page not found")
        @self.app.websocket("/ws/voice/{session_id}")
        async def voice_websocket(session_id: str, websocket: WebSocket):
            """WebSocket endpoint for voice session."""
            await handle_voice_websocket(websocket, session_id)
        @self.app.post("/v1/audio/speech")
        async def create_speech(request: TTSRequest):
            """
--- a/server/static/voice.html
+++ b/server/static/voice.html
@ -205,7 +205,8 @@
    <script>
        const sessionId = new URLSearchParams(window.location.search).get('session');
-        const wsUrl = `wss://${window.location.host}/ws/voice/${sessionId}`;
+        const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
        const wsUrl = `${wsProtocol}//${window.location.host}/ws/voice/${sessionId}`;
        let ws = null;
        let audioContext = null;
@ -339,19 +340,12 @@
                    }
                });
                console.log('Microphone acquired, stream tracks:', stream.getTracks().length);
                microphone = audioContext.createMediaStreamSource(stream);
                console.log('MediaStreamSource created, sample rate:', audioContext.sampleRate);
-                // Use AudioWorklet or ScriptProcessor as fallback
+                // Use ScriptProcessor for reliable audio capture
                if (audioContext.audioWorklet) {
                    try {
                        await initAudioWorklet();
                    } catch (error) {
                        console.warn('AudioWorklet failed, falling back to ScriptProcessor:', error);
                initScriptProcessor();
                    }
                } else {
                    initScriptProcessor();
                }
            } catch (error) {
                console.error('Audio initialization error:', error);
@ -365,11 +359,7 @@
            await audioContext.audioWorklet.addModule(workletUrl);
-            const processor = new AudioWorkletProcessor(audioContext, {
+            const processor = new AudioWorkletNode(audioContext, 'voice-processor');
                numberOfInputs: 1,
                numberOfOutputs: 1,
                outputChannelCount: [1]
            });
            microphone.connect(processor);
@ -411,14 +401,9 @@
        function sendAudio(audioData) {
            if (!ws || ws.readyState !== WebSocket.OPEN) return;
-            // Convert Float32 to Int16 for transmission
+            // Send as Float32Array directly
-            const int16Data = new Int16Array(audioData.length);
+            ws.send(audioData.buffer);
-            for (let i = 0; i < audioData.length; i++) {
+            console.log('Sent audio chunk:', audioData.length, 'samples');
                const sample = Math.max(-1, Math.min(1, audioData[i]));
                int16Data[i] = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
            }
            ws.send(int16Data.buffer);
        }
        // Event listeners
--- a/server/voice_ws.py
+++ b/server/voice_ws.py
@ -208,13 +208,43 @@ async def handle_voice_websocket(websocket: WebSocket, session_id: str):
            "message": "Connected to voice portal",
        })
-        # Receive and process audio
+        # Background task: send periodic pings to keep connection alive through Caddy
        async def keepalive():
            while session.connected:
                try:
-                data = await websocket.receive_bytes()
+                    await asyncio.sleep(15)
                    if session.connected:
                        await websocket.send_json({"type": "ping"})
                except Exception:
                    break
-                # Process audio chunk
+        keepalive_task = asyncio.create_task(keepalive())
-                await session.process_audio_chunk(data)
+
        # Receive and process audio
        chunk_count = 0
        while session.connected:
            try:
                msg = await websocket.receive()
                msg_type = msg.get("type", "unknown")
                if msg_type == "websocket.disconnect":
                    session.connected = False
                    logger.info(f"WebSocket disconnected for session {session_id}")
                    break
                elif msg_type == "websocket.receive":
                    if "bytes" in msg:
                        chunk_count += 1
                        if chunk_count <= 5 or chunk_count % 100 == 0:
                            logger.info(f"Audio chunk #{chunk_count}: {len(msg['bytes'])} bytes")
                        await session.process_audio_chunk(msg["bytes"])
                    elif "text" in msg:
                        pass
                    else:
                        logger.warning(f"Unknown receive msg: {msg}")
                else:
                    logger.warning(f"Unknown WebSocket msg type: {msg_type}: {msg}")
            except WebSocketDisconnect:
                session.connected = False
@ -222,13 +252,18 @@ async def handle_voice_websocket(websocket: WebSocket, session_id: str):
                break
            except Exception as e:
-                logger.error(f"WebSocket error: {e}")
+                logger.error(f"WebSocket error in receive loop: {e}", exc_info=True)
                session.connected = False
                break
        keepalive_task.cancel()
    except Exception as e:
-        logger.error(f"Session initialization error: {e}")
+        logger.error(f"Session error: {e}", exc_info=True)
        try:
            await websocket.close(code=1011, reason=str(e))
        except Exception:
            pass
    finally:
        await session.close()