openclaw-voice/server/static/voice.html
Jezza Hehn f0072593ae voice: asyncio.Queue rewrite, browser TTS playback, silence detection, pipeline audit
- Rewrote voice_ws.py: receive loop uses queue.put_nowait(), separate consumer
  task handles STT->LLM->TTS pipeline (no more blocking the WebSocket)
- Updated voice.html: TTS audio playback, transcript display, thinking indicator
- Added energy-based silence detection (skip STT on silent buffers)
- Fixed sample rate mismatch (16kHz throughout, not 24kHz)
- Added AUDIT.md: full pipeline audit confirming STT/TTS/OpenClaw client work

Known blocker: OpenClaw gateway chat.send requires operator.write scope,
gateway password token doesn't grant scopes. Needs device pairing fix.
2026-04-10 05:41:00 +00:00

520 lines
15 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MoltMic Voice Portal</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
min-height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
padding: 20px;
}
.container {
max-width: 600px;
width: 100%;
text-align: center;
}
h1 {
color: #fff;
margin-bottom: 20px;
font-size: 2rem;
}
.status {
display: inline-flex;
align-items: center;
gap: 8px;
padding: 8px 16px;
border-radius: 20px;
font-size: 14px;
font-weight: 500;
margin-bottom: 20px;
}
.status.connected {
background: #4ade80;
color: #1a1a2e;
}
.status.disconnected {
background: #ef4444;
color: white;
}
.status.connecting {
background: #f59e0b;
color: white;
}
.status-dot {
width: 10px;
height: 10px;
border-radius: 50%;
background: currentColor;
animation: pulse 2s infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.thinking {
display: inline-flex;
align-items: center;
gap: 8px;
padding: 8px 16px;
border-radius: 20px;
font-size: 14px;
font-weight: 500;
margin-bottom: 20px;
background: #8b5cf6;
color: white;
}
.thinking .status-dot {
animation: bounce 1s infinite;
}
@keyframes bounce {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-4px); }
}
.transcript {
background: rgba(255, 255, 255, 0.1);
border-radius: 12px;
padding: 20px;
margin: 20px 0;
min-height: 120px;
max-height: 300px;
overflow-y: auto;
text-align: left;
}
.transcript-label {
color: #9ca3af;
font-size: 12px;
margin-bottom: 10px;
text-transform: uppercase;
letter-spacing: 1px;
}
.transcript-item {
padding: 10px 0;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
.transcript-item:last-child {
border-bottom: none;
}
.transcript-transcript {
color: #e5e7eb;
font-size: 14px;
margin-bottom: 4px;
}
.transcript-response {
color: #a5b4fc;
font-size: 13px;
}
.controls {
display: flex;
gap: 16px;
justify-content: center;
margin-bottom: 30px;
}
button {
padding: 16px 32px;
font-size: 16px;
font-weight: 600;
border: none;
border-radius: 12px;
cursor: pointer;
transition: all 0.2s;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.connect-btn {
background: #6366f1;
color: white;
}
.connect-btn:hover:not(:disabled) {
background: #4f46e5;
transform: translateY(-2px);
}
.disconnect-btn {
background: #ef4444;
color: white;
}
.disconnect-btn:hover:not(:disabled) {
background: #dc2626;
transform: translateY(-2px);
}
.retry-btn {
background: #10b981;
color: white;
}
.retry-btn:hover:not(:disabled) {
background: #059669;
transform: translateY(-2px);
}
.error {
background: rgba(239, 68, 68, 0.2);
color: #fca5a5;
padding: 12px 16px;
border-radius: 8px;
margin: 10px 0;
font-size: 14px;
}
.info {
color: #9ca3af;
font-size: 14px;
margin-top: 20px;
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ MoltMic Voice</h1>
<div id="status" class="status disconnected">
<span class="status-dot"></span>
<span id="status-text">Disconnected</span>
</div>
<div id="thinking" class="thinking" style="display: none;">
<span class="status-dot"></span>
<span>Thinking...</span>
</div>
<div id="transcript" class="transcript" style="display: none;">
<div class="transcript-label">Transcript</div>
<div id="transcript-content"></div>
</div>
<div class="controls">
<button id="connect-btn" class="connect-btn">Connect</button>
<button id="disconnect-btn" class="disconnect-btn" disabled>Disconnect</button>
</div>
<div id="error" class="error" style="display: none;"></div>
<p class="info">Say something and the bot will respond. Auto-reconnects on disconnect.</p>
</div>
<script>
const sessionId = new URLSearchParams(window.location.search).get('session');
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${wsProtocol}//${window.location.host}/ws/voice/${sessionId}`;
let ws = null;
let inputAudioContext = null;
let outputAudioContext = null;
let microphone = null;
let scriptProcessor = null;
let isConnected = false;
let reconnectAttempts = 0;
const maxReconnectAttempts = 5;
const statusEl = document.getElementById('status');
const statusTextEl = document.getElementById('status-text');
const thinkingEl = document.getElementById('thinking');
const connectBtn = document.getElementById('connect-btn');
const disconnectBtn = document.getElementById('disconnect-btn');
const transcriptEl = document.getElementById('transcript');
const transcriptContentEl = document.getElementById('transcript-content');
const errorEl = document.getElementById('error');
function updateStatus(status, text) {
status.className = `status ${status}`;
statusTextEl.textContent = text;
}
function showThinking(show) {
thinkingEl.style.display = show ? 'inline-flex' : 'none';
}
function showError(message) {
errorEl.textContent = message;
errorEl.style.display = 'block';
}
function hideError() {
errorEl.style.display = 'none';
}
function addTranscript(text, type = 'transcript') {
const item = document.createElement('div');
item.className = 'transcript-item';
const content = document.createElement('div');
content.className = type === 'transcript' ? 'transcript-transcript' : 'transcript-response';
content.textContent = text;
item.appendChild(content);
transcriptContentEl.appendChild(item);
// Auto-scroll to bottom
transcriptEl.scrollTop = transcriptEl.scrollHeight;
}
async function connect() {
if (isConnected) return;
updateStatus('connecting', 'Connecting...');
hideError();
connectBtn.disabled = true;
try {
// Open WebSocket
ws = new WebSocket(wsUrl);
ws.onopen = async () => {
console.log('WebSocket connected');
// Initialize audio
await initAudio();
isConnected = true;
reconnectAttempts = 0;
updateStatus('connected', 'Connected');
connectBtn.disabled = true;
disconnectBtn.disabled = false;
};
ws.onmessage = (event) => {
if (event.data instanceof Blob) {
// Binary audio data
handleAudioData(event.data);
} else {
// JSON text data
const data = JSON.parse(event.data);
handleWebsocketMessage(data);
}
};
ws.onclose = () => {
console.log('WebSocket disconnected');
handleDisconnect();
};
ws.onerror = (error) => {
console.error('WebSocket error:', error);
showError('Connection error. Please try again.');
};
} catch (error) {
console.error('Connection error:', error);
showError('Failed to connect: ' + error.message);
updateStatus('disconnected', 'Disconnected');
connectBtn.disabled = false;
}
}
function handleWebsocketMessage(data) {
switch (data.type) {
case 'welcome':
console.log('Server greeting:', data.message);
break;
case 'transcript':
addTranscript(data.text, 'transcript');
break;
case 'response':
addTranscript(data.text, 'response');
showThinking(false);
break;
case 'tts_audio':
console.log('TTS audio header received:', data.samples, 'samples @', data.sample_rate, 'Hz');
break;
case 'ping':
// Keepalive - ignore
break;
default:
console.warn('Unknown message type:', data.type);
}
}
async function handleAudioData(blob) {
try {
const arrayBuffer = await blob.arrayBuffer();
const audioFloat32Array = new Float32Array(arrayBuffer);
// Decode audio using output AudioContext
const audioBuffer = await outputAudioContext.decodeAudioData(audioFloat32Array.buffer);
// Play the audio
playAudioBuffer(audioBuffer);
} catch (error) {
console.error('Audio playback error:', error);
}
}
async function playAudioBuffer(audioBuffer) {
const source = outputAudioContext.createBufferSource();
source.buffer = audioBuffer;
// Connect to destination
source.connect(outputAudioContext.destination);
// Start playback
source.start();
}
async function disconnect() {
if (!ws) return;
isConnected = false;
ws.close();
disconnectAudio();
updateStatus('disconnected', 'Disconnected');
connectBtn.disabled = false;
disconnectBtn.disabled = true;
}
async function handleDisconnect() {
if (!isConnected) return;
isConnected = false;
disconnectAudio();
updateStatus('disconnected', 'Disconnected');
connectBtn.disabled = false;
disconnectBtn.disabled = true;
// Auto-reconnect
if (reconnectAttempts < maxReconnectAttempts) {
const delay = Math.min(1000 * Math.pow(2, reconnectAttempts), 30000);
console.log(`Reconnecting in ${delay}ms...`);
updateStatus('connecting', `Reconnecting (${reconnectAttempts + 1}/${maxReconnectAttempts})...`);
setTimeout(() => {
reconnectAttempts++;
connect();
}, delay);
}
}
async function initAudio() {
try {
// Create input audio context for microphone (16kHz)
inputAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
// Create output audio context for playback (will be set to server sample rate)
outputAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
// Get microphone
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
console.log('Microphone acquired, stream tracks:', stream.getTracks().length);
microphone = inputAudioContext.createMediaStreamSource(stream);
console.log('MediaStreamSource created, sample rate:', inputAudioContext.sampleRate);
// Use ScriptProcessor for reliable audio capture
initScriptProcessor();
} catch (error) {
console.error('Audio initialization error:', error);
throw error;
}
}
function initScriptProcessor() {
scriptProcessor = inputAudioContext.createScriptProcessor(4096, 1, 1);
microphone.connect(scriptProcessor);
scriptProcessor.connect(inputAudioContext.destination);
scriptProcessor.onaudioprocess = (event) => {
const inputData = event.inputBuffer.getChannelData(0);
sendAudio(inputData);
};
}
function disconnectAudio() {
if (microphone) {
microphone.disconnect();
microphone = null;
}
if (scriptProcessor) {
scriptProcessor.disconnect();
scriptProcessor = null;
}
if (inputAudioContext && inputAudioContext.state !== 'closed') {
inputAudioContext.close();
}
if (outputAudioContext && outputAudioContext.state !== 'closed') {
outputAudioContext.close();
}
}
function sendAudio(audioData) {
if (!ws || ws.readyState !== WebSocket.OPEN) return;
// Send as Float32Array directly
ws.send(audioData.buffer);
console.log('Sent audio chunk:', audioData.length, 'samples');
}
// Event listeners
connectBtn.addEventListener('click', connect);
disconnectBtn.addEventListener('click', disconnect);
// Handle page visibility
document.addEventListener('visibilitychange', () => {
if (document.hidden && isConnected) {
disconnect();
}
});
</script>
</body>
</html>