From 3450e57ca6fa34ffe44db65de9730d7931b4c18f Mon Sep 17 00:00:00 2001 From: Jezza Hehn Date: Fri, 10 Apr 2026 04:47:31 +0000 Subject: [PATCH] Fix voice portal: WebSocket routing, Caddy keepalive, audio pipeline - Fix app.py: @app.get -> @app.websocket for /ws/voice route (was returning 403) - Fix app.py: create static_dir before mounting it (AttributeError on startup) - Fix voice.html: AudioWorkletNode constructor (was AudioWorkletProcessor) - Fix voice.html: use ScriptProcessor directly (more reliable) - Fix voice.html: send Float32 directly (server expects float32, was sending Int16) - Fix voice.html: auto-detect ws/wss protocol from page URL - Add Caddy reverse proxy keepalive pings every 15s to prevent timeout - Add detailed message type logging in WebSocket receive loop - Strip Jarvis/Sage personas, rename bot to MoltMic - Add /moltmic voice slash command for portal URL - Update portal URL to https://voice.jezzahehn.com --- discord_bot/__init__.py | 10 ++++----- discord_bot/bot.py | 8 +++---- discord_bot/commands.py | 37 +++++++++++++++++++++++++++++++ server/app.py | 26 +++++++++++++++++++++- server/static/voice.html | 35 +++++++++--------------------- server/voice_ws.py | 47 +++++++++++++++++++++++++++++++++++----- 6 files changed, 122 insertions(+), 41 deletions(-) diff --git a/discord_bot/__init__.py b/discord_bot/__init__.py index 7662387..4125ef4 100644 --- a/discord_bot/__init__.py +++ b/discord_bot/__init__.py @@ -1,18 +1,18 @@ -"""Jarvis Voice Bot - Discord Integration""" +"""MoltMic - OpenClaw Voice Bot""" -from .bot import JarvisVoiceBot, create_bot, run_bot +from .bot import MoltMicBot, create_bot, run_bot from .voice_session import VoiceSession, VoiceSessionManager from .audio_bridge import AudioBridge, PipelineAudioSource -from .commands import VoiceBotCommands, setup_commands +from .commands import MoltMicCommands, setup_commands __all__ = [ - "JarvisVoiceBot", + "MoltMicBot", "create_bot", "run_bot", "VoiceSession", "VoiceSessionManager", "AudioBridge", "PipelineAudioSource", - "VoiceBotCommands", + "MoltMicCommands", "setup_commands", ] diff --git a/discord_bot/bot.py b/discord_bot/bot.py index 18bdbd7..f430b39 100644 --- a/discord_bot/bot.py +++ b/discord_bot/bot.py @@ -20,8 +20,8 @@ from .vad_receiver import VADAudioReceiver logger = get_logger(__name__) -class JarvisVoiceBot(discord.Client): - """Discord bot for voice interaction with AI agents.""" +class MoltMicBot(discord.Client): + """MoltMic - Discord voice bot for OpenClaw.""" def __init__( self, @@ -479,7 +479,7 @@ async def create_bot( stt_transcriber=None, orchestrator=None, audio_output_callbacks=None, -) -> JarvisVoiceBot: +) -> MoltMicBot: """ Create and initialize the Discord bot. @@ -494,7 +494,7 @@ async def create_bot( Returns: Initialized bot instance """ - bot = JarvisVoiceBot( + bot = MoltMicBot( config=config, openclaw_config=openclaw_config, tts_synthesizer=tts_synthesizer, diff --git a/discord_bot/commands.py b/discord_bot/commands.py index 816ab54..957e969 100644 --- a/discord_bot/commands.py +++ b/discord_bot/commands.py @@ -94,6 +94,43 @@ class MoltMicCommands(app_commands.Group): logger.exception(f"Status error: {e}") await interaction.followup.send("❌ Error.", ephemeral=True) + @app_commands.command(name="voice", description="Open voice portal in browser") + async def voice(self, interaction: discord.Interaction): + """Generate a voice portal URL for browser-based speech.""" + await interaction.response.defer(thinking=True) + + try: + # Import here to avoid circular dependency + from server.voice_ws import create_session_id + + session_id = create_session_id() + portal_url = f"https://voice.jezzahehn.com/voice?session={session_id}" + + embed = discord.Embed( + title="🎙️ Voice Portal", + description="Click below to open the voice portal in your browser", + color=discord.Color.blue() + ) + embed.add_field( + name="Portal URL", + value=f"[Open Voice Portal]({portal_url})", + inline=False + ) + embed.add_field( + name="Instructions", + value="1. Click the link above\n2. Allow microphone access\n3. Start talking! The bot will listen and respond.", + inline=False + ) + embed.set_footer(text="The bot will start listening when you connect") + + await interaction.followup.send(embed=embed) + + logger.info(f"Voice portal created for session {session_id}") + + except Exception as e: + logger.exception(f"Voice portal error: {e}") + await interaction.followup.send("❌ Failed to create voice portal.", ephemeral=True) + async def setup_commands(bot): """Register slash commands.""" diff --git a/server/app.py b/server/app.py index 12aa38c..ccf54ca 100644 --- a/server/app.py +++ b/server/app.py @@ -4,10 +4,12 @@ Provides HTTP endpoints for: - Text-to-Speech (OpenAI /v1/audio/speech compatible) - Speech-to-Text (OpenAI /v1/audio/transcriptions compatible) - Health checks and status +- WebSocket voice endpoint for browser-based speech Shares STT and TTS engines with Discord bot for efficiency. """ +import asyncio import io import tempfile import time @@ -16,13 +18,15 @@ from typing import Literal, Optional import numpy as np import soundfile as sf -from fastapi import FastAPI, File, Form, HTTPException, UploadFile +from fastapi import FastAPI, File, Form, HTTPException, UploadFile, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import Response, StreamingResponse +from fastapi.staticfiles import StaticFiles from pydantic import BaseModel, Field from server.stt import FasterWhisperSTT, STTTranscriber from server.tts import ChatterboxTTS, TTSSynthesizer +from server.voice_ws import handle_voice_websocket, create_session_id from utils.logging import get_logger logger = get_logger(__name__) @@ -111,6 +115,13 @@ class VoiceAPIServer: allow_headers=["*"], ) + # Create static files directory + self.static_dir = Path("server/static") + self.static_dir.mkdir(parents=True, exist_ok=True) + + # Mount static files + self.app.mount("/static", StaticFiles(directory=str(self.static_dir)), name="static") + # Register routes self._register_routes() @@ -129,6 +140,19 @@ class VoiceAPIServer: """Health check endpoint.""" return await self._health_check() + @self.app.get("/voice") + async def get_voice_page(): + """Serve voice portal HTML page.""" + static_file = self.static_dir / "voice.html" + if static_file.exists(): + return Response(content=static_file.read_text(), media_type="text/html") + raise HTTPException(status_code=404, detail="Voice page not found") + + @self.app.websocket("/ws/voice/{session_id}") + async def voice_websocket(session_id: str, websocket: WebSocket): + """WebSocket endpoint for voice session.""" + await handle_voice_websocket(websocket, session_id) + @self.app.post("/v1/audio/speech") async def create_speech(request: TTSRequest): """ diff --git a/server/static/voice.html b/server/static/voice.html index 87e4071..a316727 100644 --- a/server/static/voice.html +++ b/server/static/voice.html @@ -205,7 +205,8 @@