Fix voice portal: WebSocket routing, Caddy keepalive, audio pipeline
- Fix app.py: @app.get -> @app.websocket for /ws/voice route (was returning 403) - Fix app.py: create static_dir before mounting it (AttributeError on startup) - Fix voice.html: AudioWorkletNode constructor (was AudioWorkletProcessor) - Fix voice.html: use ScriptProcessor directly (more reliable) - Fix voice.html: send Float32 directly (server expects float32, was sending Int16) - Fix voice.html: auto-detect ws/wss protocol from page URL - Add Caddy reverse proxy keepalive pings every 15s to prevent timeout - Add detailed message type logging in WebSocket receive loop - Strip Jarvis/Sage personas, rename bot to MoltMic - Add /moltmic voice slash command for portal URL - Update portal URL to https://voice.jezzahehn.com
This commit is contained in:
parent
bc580861dd
commit
3450e57ca6
6 changed files with 122 additions and 41 deletions
|
|
@ -1,18 +1,18 @@
|
||||||
"""Jarvis Voice Bot - Discord Integration"""
|
"""MoltMic - OpenClaw Voice Bot"""
|
||||||
|
|
||||||
from .bot import JarvisVoiceBot, create_bot, run_bot
|
from .bot import MoltMicBot, create_bot, run_bot
|
||||||
from .voice_session import VoiceSession, VoiceSessionManager
|
from .voice_session import VoiceSession, VoiceSessionManager
|
||||||
from .audio_bridge import AudioBridge, PipelineAudioSource
|
from .audio_bridge import AudioBridge, PipelineAudioSource
|
||||||
from .commands import VoiceBotCommands, setup_commands
|
from .commands import MoltMicCommands, setup_commands
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"JarvisVoiceBot",
|
"MoltMicBot",
|
||||||
"create_bot",
|
"create_bot",
|
||||||
"run_bot",
|
"run_bot",
|
||||||
"VoiceSession",
|
"VoiceSession",
|
||||||
"VoiceSessionManager",
|
"VoiceSessionManager",
|
||||||
"AudioBridge",
|
"AudioBridge",
|
||||||
"PipelineAudioSource",
|
"PipelineAudioSource",
|
||||||
"VoiceBotCommands",
|
"MoltMicCommands",
|
||||||
"setup_commands",
|
"setup_commands",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ from .vad_receiver import VADAudioReceiver
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class JarvisVoiceBot(discord.Client):
|
class MoltMicBot(discord.Client):
|
||||||
"""Discord bot for voice interaction with AI agents."""
|
"""MoltMic - Discord voice bot for OpenClaw."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -479,7 +479,7 @@ async def create_bot(
|
||||||
stt_transcriber=None,
|
stt_transcriber=None,
|
||||||
orchestrator=None,
|
orchestrator=None,
|
||||||
audio_output_callbacks=None,
|
audio_output_callbacks=None,
|
||||||
) -> JarvisVoiceBot:
|
) -> MoltMicBot:
|
||||||
"""
|
"""
|
||||||
Create and initialize the Discord bot.
|
Create and initialize the Discord bot.
|
||||||
|
|
||||||
|
|
@ -494,7 +494,7 @@ async def create_bot(
|
||||||
Returns:
|
Returns:
|
||||||
Initialized bot instance
|
Initialized bot instance
|
||||||
"""
|
"""
|
||||||
bot = JarvisVoiceBot(
|
bot = MoltMicBot(
|
||||||
config=config,
|
config=config,
|
||||||
openclaw_config=openclaw_config,
|
openclaw_config=openclaw_config,
|
||||||
tts_synthesizer=tts_synthesizer,
|
tts_synthesizer=tts_synthesizer,
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,43 @@ class MoltMicCommands(app_commands.Group):
|
||||||
logger.exception(f"Status error: {e}")
|
logger.exception(f"Status error: {e}")
|
||||||
await interaction.followup.send("❌ Error.", ephemeral=True)
|
await interaction.followup.send("❌ Error.", ephemeral=True)
|
||||||
|
|
||||||
|
@app_commands.command(name="voice", description="Open voice portal in browser")
|
||||||
|
async def voice(self, interaction: discord.Interaction):
|
||||||
|
"""Generate a voice portal URL for browser-based speech."""
|
||||||
|
await interaction.response.defer(thinking=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Import here to avoid circular dependency
|
||||||
|
from server.voice_ws import create_session_id
|
||||||
|
|
||||||
|
session_id = create_session_id()
|
||||||
|
portal_url = f"https://voice.jezzahehn.com/voice?session={session_id}"
|
||||||
|
|
||||||
|
embed = discord.Embed(
|
||||||
|
title="🎙️ Voice Portal",
|
||||||
|
description="Click below to open the voice portal in your browser",
|
||||||
|
color=discord.Color.blue()
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="Portal URL",
|
||||||
|
value=f"[Open Voice Portal]({portal_url})",
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="Instructions",
|
||||||
|
value="1. Click the link above\n2. Allow microphone access\n3. Start talking! The bot will listen and respond.",
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
embed.set_footer(text="The bot will start listening when you connect")
|
||||||
|
|
||||||
|
await interaction.followup.send(embed=embed)
|
||||||
|
|
||||||
|
logger.info(f"Voice portal created for session {session_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Voice portal error: {e}")
|
||||||
|
await interaction.followup.send("❌ Failed to create voice portal.", ephemeral=True)
|
||||||
|
|
||||||
|
|
||||||
async def setup_commands(bot):
|
async def setup_commands(bot):
|
||||||
"""Register slash commands."""
|
"""Register slash commands."""
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,12 @@ Provides HTTP endpoints for:
|
||||||
- Text-to-Speech (OpenAI /v1/audio/speech compatible)
|
- Text-to-Speech (OpenAI /v1/audio/speech compatible)
|
||||||
- Speech-to-Text (OpenAI /v1/audio/transcriptions compatible)
|
- Speech-to-Text (OpenAI /v1/audio/transcriptions compatible)
|
||||||
- Health checks and status
|
- Health checks and status
|
||||||
|
- WebSocket voice endpoint for browser-based speech
|
||||||
|
|
||||||
Shares STT and TTS engines with Discord bot for efficiency.
|
Shares STT and TTS engines with Discord bot for efficiency.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import io
|
import io
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
|
@ -16,13 +18,15 @@ from typing import Literal, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
from fastapi import FastAPI, File, Form, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import Response, StreamingResponse
|
from fastapi.responses import Response, StreamingResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from server.stt import FasterWhisperSTT, STTTranscriber
|
from server.stt import FasterWhisperSTT, STTTranscriber
|
||||||
from server.tts import ChatterboxTTS, TTSSynthesizer
|
from server.tts import ChatterboxTTS, TTSSynthesizer
|
||||||
|
from server.voice_ws import handle_voice_websocket, create_session_id
|
||||||
from utils.logging import get_logger
|
from utils.logging import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
@ -111,6 +115,13 @@ class VoiceAPIServer:
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create static files directory
|
||||||
|
self.static_dir = Path("server/static")
|
||||||
|
self.static_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Mount static files
|
||||||
|
self.app.mount("/static", StaticFiles(directory=str(self.static_dir)), name="static")
|
||||||
|
|
||||||
# Register routes
|
# Register routes
|
||||||
self._register_routes()
|
self._register_routes()
|
||||||
|
|
||||||
|
|
@ -129,6 +140,19 @@ class VoiceAPIServer:
|
||||||
"""Health check endpoint."""
|
"""Health check endpoint."""
|
||||||
return await self._health_check()
|
return await self._health_check()
|
||||||
|
|
||||||
|
@self.app.get("/voice")
|
||||||
|
async def get_voice_page():
|
||||||
|
"""Serve voice portal HTML page."""
|
||||||
|
static_file = self.static_dir / "voice.html"
|
||||||
|
if static_file.exists():
|
||||||
|
return Response(content=static_file.read_text(), media_type="text/html")
|
||||||
|
raise HTTPException(status_code=404, detail="Voice page not found")
|
||||||
|
|
||||||
|
@self.app.websocket("/ws/voice/{session_id}")
|
||||||
|
async def voice_websocket(session_id: str, websocket: WebSocket):
|
||||||
|
"""WebSocket endpoint for voice session."""
|
||||||
|
await handle_voice_websocket(websocket, session_id)
|
||||||
|
|
||||||
@self.app.post("/v1/audio/speech")
|
@self.app.post("/v1/audio/speech")
|
||||||
async def create_speech(request: TTSRequest):
|
async def create_speech(request: TTSRequest):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -205,7 +205,8 @@
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
const sessionId = new URLSearchParams(window.location.search).get('session');
|
const sessionId = new URLSearchParams(window.location.search).get('session');
|
||||||
const wsUrl = `wss://${window.location.host}/ws/voice/${sessionId}`;
|
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||||
|
const wsUrl = `${wsProtocol}//${window.location.host}/ws/voice/${sessionId}`;
|
||||||
|
|
||||||
let ws = null;
|
let ws = null;
|
||||||
let audioContext = null;
|
let audioContext = null;
|
||||||
|
|
@ -339,19 +340,12 @@
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
console.log('Microphone acquired, stream tracks:', stream.getTracks().length);
|
||||||
microphone = audioContext.createMediaStreamSource(stream);
|
microphone = audioContext.createMediaStreamSource(stream);
|
||||||
|
console.log('MediaStreamSource created, sample rate:', audioContext.sampleRate);
|
||||||
|
|
||||||
// Use AudioWorklet or ScriptProcessor as fallback
|
// Use ScriptProcessor for reliable audio capture
|
||||||
if (audioContext.audioWorklet) {
|
|
||||||
try {
|
|
||||||
await initAudioWorklet();
|
|
||||||
} catch (error) {
|
|
||||||
console.warn('AudioWorklet failed, falling back to ScriptProcessor:', error);
|
|
||||||
initScriptProcessor();
|
initScriptProcessor();
|
||||||
}
|
|
||||||
} else {
|
|
||||||
initScriptProcessor();
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Audio initialization error:', error);
|
console.error('Audio initialization error:', error);
|
||||||
|
|
@ -365,11 +359,7 @@
|
||||||
|
|
||||||
await audioContext.audioWorklet.addModule(workletUrl);
|
await audioContext.audioWorklet.addModule(workletUrl);
|
||||||
|
|
||||||
const processor = new AudioWorkletProcessor(audioContext, {
|
const processor = new AudioWorkletNode(audioContext, 'voice-processor');
|
||||||
numberOfInputs: 1,
|
|
||||||
numberOfOutputs: 1,
|
|
||||||
outputChannelCount: [1]
|
|
||||||
});
|
|
||||||
|
|
||||||
microphone.connect(processor);
|
microphone.connect(processor);
|
||||||
|
|
||||||
|
|
@ -411,14 +401,9 @@
|
||||||
function sendAudio(audioData) {
|
function sendAudio(audioData) {
|
||||||
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
||||||
|
|
||||||
// Convert Float32 to Int16 for transmission
|
// Send as Float32Array directly
|
||||||
const int16Data = new Int16Array(audioData.length);
|
ws.send(audioData.buffer);
|
||||||
for (let i = 0; i < audioData.length; i++) {
|
console.log('Sent audio chunk:', audioData.length, 'samples');
|
||||||
const sample = Math.max(-1, Math.min(1, audioData[i]));
|
|
||||||
int16Data[i] = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
ws.send(int16Data.buffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Event listeners
|
// Event listeners
|
||||||
|
|
|
||||||
|
|
@ -208,13 +208,43 @@ async def handle_voice_websocket(websocket: WebSocket, session_id: str):
|
||||||
"message": "Connected to voice portal",
|
"message": "Connected to voice portal",
|
||||||
})
|
})
|
||||||
|
|
||||||
# Receive and process audio
|
# Background task: send periodic pings to keep connection alive through Caddy
|
||||||
|
async def keepalive():
|
||||||
while session.connected:
|
while session.connected:
|
||||||
try:
|
try:
|
||||||
data = await websocket.receive_bytes()
|
await asyncio.sleep(15)
|
||||||
|
if session.connected:
|
||||||
|
await websocket.send_json({"type": "ping"})
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
|
||||||
# Process audio chunk
|
keepalive_task = asyncio.create_task(keepalive())
|
||||||
await session.process_audio_chunk(data)
|
|
||||||
|
# Receive and process audio
|
||||||
|
chunk_count = 0
|
||||||
|
while session.connected:
|
||||||
|
try:
|
||||||
|
msg = await websocket.receive()
|
||||||
|
msg_type = msg.get("type", "unknown")
|
||||||
|
|
||||||
|
if msg_type == "websocket.disconnect":
|
||||||
|
session.connected = False
|
||||||
|
logger.info(f"WebSocket disconnected for session {session_id}")
|
||||||
|
break
|
||||||
|
|
||||||
|
elif msg_type == "websocket.receive":
|
||||||
|
if "bytes" in msg:
|
||||||
|
chunk_count += 1
|
||||||
|
if chunk_count <= 5 or chunk_count % 100 == 0:
|
||||||
|
logger.info(f"Audio chunk #{chunk_count}: {len(msg['bytes'])} bytes")
|
||||||
|
await session.process_audio_chunk(msg["bytes"])
|
||||||
|
elif "text" in msg:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown receive msg: {msg}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown WebSocket msg type: {msg_type}: {msg}")
|
||||||
|
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
session.connected = False
|
session.connected = False
|
||||||
|
|
@ -222,13 +252,18 @@ async def handle_voice_websocket(websocket: WebSocket, session_id: str):
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"WebSocket error: {e}")
|
logger.error(f"WebSocket error in receive loop: {e}", exc_info=True)
|
||||||
session.connected = False
|
session.connected = False
|
||||||
break
|
break
|
||||||
|
|
||||||
|
keepalive_task.cancel()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Session initialization error: {e}")
|
logger.error(f"Session error: {e}", exc_info=True)
|
||||||
|
try:
|
||||||
await websocket.close(code=1011, reason=str(e))
|
await websocket.close(code=1011, reason=str(e))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
await session.close()
|
await session.close()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue