nanobot-voice-interface/static/index.html

929 lines
34 KiB
HTML
Raw Normal View History

2026-02-28 22:12:04 -05:00
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
2026-03-04 08:20:42 -05:00
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
<title>Nanobot</title>
2026-02-28 22:12:04 -05:00
<style>
* {
box-sizing: border-box;
2026-03-04 08:20:42 -05:00
user-select: none;
-webkit-user-select: none;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
html, body {
2026-02-28 22:12:04 -05:00
margin: 0;
2026-03-04 08:20:42 -05:00
padding: 0;
width: 100%;
height: 100%;
overflow: hidden;
background: #1a1510;
touch-action: none;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
#log {
position: fixed;
bottom: calc(5vh + 20px);
left: 50%;
transform: translateX(-50%);
width: calc(90vw - 40px);
max-height: 22vh;
overflow-y: auto;
padding: 12px 14px;
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
line-height: 1.6;
color: rgba(255, 245, 235, 0.35);
white-space: pre-wrap;
word-break: break-word;
2026-02-28 22:12:04 -05:00
display: flex;
2026-03-04 08:20:42 -05:00
flex-direction: column-reverse;
border-radius: 10px;
background: transparent;
transition: color 0.3s, background 0.3s;
z-index: 10;
pointer-events: auto;
-webkit-mask-image: linear-gradient(to top, black 55%, transparent 100%);
mask-image: linear-gradient(to top, black 55%, transparent 100%);
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
#log:hover {
color: rgba(255, 245, 235, 0.92);
background: rgba(0, 0, 0, 0.18);
-webkit-mask-image: none;
mask-image: none;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
#log * {
user-select: text;
-webkit-user-select: text;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
#log-inner {
display: flex;
flex-direction: column;
2026-02-28 22:12:04 -05:00
}
.line {
2026-03-04 08:20:42 -05:00
margin-bottom: 4px;
2026-02-28 22:12:04 -05:00
}
.line.user {
2026-03-04 08:20:42 -05:00
color: rgba(255, 255, 255, 0.9);
2026-02-28 22:12:04 -05:00
}
.line.system {
2026-03-04 08:20:42 -05:00
color: rgba(255, 220, 180, 0.5);
2026-02-28 22:12:04 -05:00
}
.line.wisper {
2026-03-04 08:20:42 -05:00
color: rgba(255, 200, 160, 0.4);
}
#log:hover .line.user { color: rgba(255, 255, 255, 1.0); }
#log:hover .line.system { color: rgba(255, 220, 180, 0.85); }
#log:hover .line.wisper { color: rgba(255, 200, 160, 0.75); }
#voiceStatus {
position: fixed;
bottom: 12px;
left: 50%;
transform: translateX(-50%);
background: rgba(0, 0, 0, 0.08);
color: #111111;
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
padding: 4px 12px;
border-radius: 99px;
pointer-events: none;
white-space: nowrap;
opacity: 0;
transition: opacity 0.2s;
}
#voiceStatus.visible {
opacity: 1;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
/* Agent state indicator */
#agentIndicator {
position: fixed;
top: 0;
left: 0;
right: 0;
height: 100vh;
2026-02-28 22:12:04 -05:00
display: flex;
2026-03-04 08:20:42 -05:00
flex-direction: column;
2026-02-28 22:12:04 -05:00
align-items: center;
2026-03-04 08:20:42 -05:00
justify-content: center;
gap: 18px;
pointer-events: none;
opacity: 0;
transition: opacity 0.4s;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
#agentIndicator.visible {
opacity: 1;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
#agentViz {
width: 90vw;
height: 90vh;
aspect-ratio: unset;
border-radius: 24px;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
overflow: hidden;
}
#agentViz canvas {
width: 100% !important;
height: 100% !important;
display: block;
}
#agentIndicator .label {
display: none;
}
#agentIndicator.idle {
color: #6b3a28;
}
#agentIndicator.listening {
color: #d4553f;
}
#agentIndicator.thinking {
color: #a0522d;
}
#agentIndicator.speaking {
color: #8b4513;
}
/* Deepen the background while PTT is active */
body.ptt-active {
background: radial-gradient(ellipse at 50% 44%, #f2caa8 0%, #e8b898 100%);
}
#controls {
position: fixed;
top: 12px;
right: 12px;
z-index: 20;
pointer-events: auto;
}
.control-btn {
border: none;
background: #ffffff;
color: #111111;
border-radius: 10px;
padding: 7px 12px;
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
2026-02-28 22:12:04 -05:00
font-size: 12px;
2026-03-04 08:20:42 -05:00
letter-spacing: 0.04em;
cursor: pointer;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
.control-btn:active {
transform: translateY(1px);
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
2026-02-28 22:12:04 -05:00
}
</style>
</head>
<body>
2026-03-04 08:20:42 -05:00
<div id="controls" data-no-ptt="1">
<button id="resetSessionBtn" class="control-btn" type="button" data-no-ptt="1">Reset</button>
2026-02-28 22:12:04 -05:00
</div>
2026-03-04 08:20:42 -05:00
<div id="log"><div id="log-inner"></div></div>
<div id="agentIndicator">
<div id="agentViz"></div>
<span class="label"></span>
</div>
<div id="voiceStatus"></div>
<audio id="remoteAudio" autoplay playsinline hidden></audio>
2026-02-28 22:12:04 -05:00
2026-03-04 08:20:42 -05:00
<script src="/static/three.min.js"></script>
2026-02-28 22:12:04 -05:00
<script>
2026-03-04 08:20:42 -05:00
const logEl = document.getElementById("log-inner");
2026-02-28 22:12:04 -05:00
const voiceStatus = document.getElementById("voiceStatus");
const remoteAudio = document.getElementById("remoteAudio");
2026-03-04 08:20:42 -05:00
const agentIndicator = document.getElementById("agentIndicator");
const agentVizEl = document.getElementById("agentViz");
const agentLabel = agentIndicator.querySelector(".label");
const resetSessionBtn = document.getElementById("resetSessionBtn");
// --- Agent state indicator ---
const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
const STATE_COLORS = {
[STATES.idle]: 0xfff5eb,
[STATES.listening]: 0xfff5eb,
[STATES.thinking]: 0xfff5eb,
[STATES.speaking]: 0xfff5eb,
};
let agentState = STATES.idle;
let agentVisualizer = null;
let lastRemoteAudioActivityS = 0;
agentIndicator.classList.add("visible", "idle");
const setAgentState = (state) => {
agentState = state;
agentIndicator.classList.remove("listening", "thinking", "speaking", "idle");
agentIndicator.classList.add("visible", state);
agentLabel.textContent = state === STATES.idle ? "" : state;
if (agentVisualizer) agentVisualizer.setState(state);
};
const createParaboloidRing = (radius = 1.1, segments = 320, curvature = 0.06, tubeRadius = 0.022, waves = 5) => {
const points = [];
for (let i = 0; i <= segments; i += 1) {
const theta = (i / segments) * Math.PI * 2;
const x = radius * Math.cos(theta);
const z = radius * Math.sin(theta);
// Smooth round lumps: plain cosine is inherently smooth with
// symmetric rounded peaks and valleys — no sharpening needed.
const y = curvature * radius * Math.cos(waves * theta);
points.push(new THREE.Vector3(x, y, z));
}
const curve = new THREE.CatmullRomCurve3(points, true);
return new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
};
const createAgentVisualizer = () => {
if (!window.THREE || !agentVizEl) return null;
const renderer = new THREE.WebGLRenderer({
antialias: true,
alpha: false,
powerPreference: "high-performance",
});
renderer.setPixelRatio(1);
renderer.setClearColor(0xa09b96, 1);
agentVizEl.innerHTML = "";
agentVizEl.appendChild(renderer.domElement);
const scene = new THREE.Scene();
const orthoSize = 2.0;
const camera = new THREE.OrthographicCamera(-orthoSize, orthoSize, orthoSize, -orthoSize, 0.1, 40);
const lookAt = new THREE.Vector3(0, 0, 0);
const speakingSideView = new THREE.Vector3(3.45, 0, 0);
const topView = new THREE.Vector3(0, 3.25, 0.001);
camera.position.copy(topView);
camera.lookAt(lookAt);
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
scene.add(ambient);
const geometry = createParaboloidRing();
const ringMaterial = new THREE.MeshBasicMaterial({
color: 0xfff5eb,
transparent: false,
side: THREE.DoubleSide,
});
const ring = new THREE.Mesh(geometry, ringMaterial);
const group = new THREE.Group();
group.add(ring);
group.rotation.y = Math.PI * 0.18;
scene.add(group);
const resize = () => {
const width = Math.max(2, agentVizEl.clientWidth);
const height = Math.max(2, agentVizEl.clientHeight);
renderer.setSize(width, height, false);
const aspect = width / height;
// Keep the ring fully visible in both landscape and portrait.
// Landscape (aspect >= 1): expand horizontally, keep vertical fixed.
// Portrait (aspect < 1): keep horizontal fixed at orthoSize,
// expand vertically so the ring isn't clipped.
if (aspect >= 1) {
camera.left = -orthoSize * aspect;
camera.right = orthoSize * aspect;
camera.top = orthoSize;
camera.bottom = -orthoSize;
} else {
camera.left = -orthoSize;
camera.right = orthoSize;
camera.top = orthoSize / aspect;
camera.bottom = -orthoSize / aspect;
}
camera.updateProjectionMatrix();
};
resize();
window.addEventListener("resize", resize);
let currentState = STATES.idle;
let currentAudioLevel = 0;
let smoothAudioLevel = 0;
let deformScale = 1.0;
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
let spinSpeed = 0.0;
// Card background colour lerp: 0 = idle coral, 1 = dark listening
let cardColorT = 0.0;
let connectedT = 0.0; // 0 = gray (disconnected), 1 = coral (connected)
const CARD_GRAY_RGB = [160, 155, 150]; // disconnected gray
const CARD_IDLE_RGB = [212, 85, 63]; // #d4553f
const CARD_LISTEN_RGB = [120, 40, 28]; // dark desaturated coral
const setStateColor = (_state) => { /* no-op: MeshBasicMaterial, colour is fixed */ };
let prevCardRGB = "";
let targetConnected = 0.0;
let isConnecting = false;
const renderFrame = (now = 0) => {
const dt = Math.min((now - (renderFrame._lastNow || now)) / 1000, 0.1);
renderFrame._lastNow = now;
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
const t = dt * 60;
const lerpAudio = 1 - Math.pow(0.85, t);
const lerpDeform = 1 - Math.pow(0.88, t);
const lerpSpin = 1 - Math.pow(0.86, t);
const lerpRing = 1 - Math.pow(0.90, t);
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
const speakingActive = currentState === STATES.speaking;
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
if (speakingActive) {
targetDeformScale = 2.05 + (smoothAudioLevel * 2.9);
} else if (currentState === STATES.thinking) {
targetDeformScale = 0.55 + (smoothAudioLevel * 0.35);
}
deformScale += (targetDeformScale - deformScale) * lerpDeform;
group.scale.y = deformScale;
// Thickness throb when thinking: pulse xz scale at 1 s rate.
const targetRingScale = currentState === STATES.thinking
? 1.0 + 0.18 * (0.5 + 0.5 * Math.sin(now * (Math.PI * 2 / 1000)))
: 1.0;
ringScale += (targetRingScale - ringScale) * lerpRing;
group.scale.x = ringScale;
group.scale.z = ringScale;
const targetSpinSpeed = speakingActive
? (0.012 + smoothAudioLevel * 0.105)
: (currentState === STATES.thinking ? 0.006 : 0.0022);
spinSpeed += (targetSpinSpeed - spinSpeed) * lerpSpin;
group.rotation.y += spinSpeed * t;
// Only move camera (and call lookAt) when in speaking state.
if (speakingActive || camera.position.distanceToSquared(topView) > 0.0001) {
const lerpCamera = 1 - Math.pow(0.96, t);
const targetCameraPosition = speakingActive ? speakingSideView : topView;
camera.position.lerp(targetCameraPosition, lerpCamera);
camera.lookAt(lookAt);
}
// Card background: gray → coral as connection is established, then darken when listening.
// While connecting, throb the gray base with a slow sine pulse.
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
const throb = isConnecting && targetConnected === 0
? 0.22 * (0.5 - 0.5 * Math.sin(now * (Math.PI * 2 / 1000))) // 00.22 darkness pulse, 1 s period
: 0.0;
const baseR = Math.round(CARD_GRAY_RGB[0] + (CARD_IDLE_RGB[0] - CARD_GRAY_RGB[0]) * connectedT - throb * CARD_GRAY_RGB[0]);
const baseG = Math.round(CARD_GRAY_RGB[1] + (CARD_IDLE_RGB[1] - CARD_GRAY_RGB[1]) * connectedT - throb * CARD_GRAY_RGB[1]);
const baseB = Math.round(CARD_GRAY_RGB[2] + (CARD_IDLE_RGB[2] - CARD_GRAY_RGB[2]) * connectedT - throb * CARD_GRAY_RGB[2]);
const targetCardT = currentState === STATES.listening ? 1.0 : 0.0;
const cardBase = targetCardT > cardColorT ? 0.05 : 0.7;
cardColorT += (targetCardT - cardColorT) * (1 - Math.pow(cardBase, t));
const r = Math.min(255, Math.round(baseR + (CARD_LISTEN_RGB[0] - baseR) * cardColorT));
const g = Math.min(255, Math.round(baseG + (CARD_LISTEN_RGB[1] - baseG) * cardColorT));
const b = Math.min(255, Math.round(baseB + (CARD_LISTEN_RGB[2] - baseB) * cardColorT));
const cardRGB = `${r},${g},${b}`;
if (cardRGB !== prevCardRGB) {
renderer.setClearColor((r << 16) | (g << 8) | b, 1);
prevCardRGB = cardRGB;
}
renderer.render(scene, camera);
requestAnimationFrame(renderFrame);
};
setStateColor(currentState);
requestAnimationFrame(renderFrame);
return {
setAudioLevel: (level) => {
currentAudioLevel = Math.max(0, Math.min(1, Number(level) || 0));
},
setState: (state) => {
if (!STATES[state]) return;
currentState = state;
setStateColor(state);
},
setConnected: (connected) => {
targetConnected = connected ? 1.0 : 0.0;
if (connected) isConnecting = false;
},
setConnecting: (connecting) => {
isConnecting = !!connecting;
},
};
};
agentVisualizer = createAgentVisualizer();
if (agentVisualizer) agentVisualizer.setState(agentState);
const markRemoteAudioActivity = () => {
lastRemoteAudioActivityS = performance.now() / 1000;
};
remoteAudio.addEventListener("playing", markRemoteAudioActivity);
remoteAudio.addEventListener("timeupdate", markRemoteAudioActivity);
remoteAudio.addEventListener("canplay", markRemoteAudioActivity);
remoteAudio.addEventListener("seeked", markRemoteAudioActivity);
2026-02-28 22:12:04 -05:00
const wsProto = location.protocol === "https:" ? "wss" : "ws";
const ws = new WebSocket(`${wsProto}://${location.host}/ws/chat`);
let peerConnection = null;
let micStream = null;
let remoteStream = null;
let voiceConnected = false;
let disconnectedTimer = null;
let reconnectTimer = null;
let reconnectAttempts = 0;
let voiceDesired = false;
let connectingVoice = false;
let pttPressed = false;
let rtcAnswerApplied = false;
let pendingRemoteCandidates = [];
2026-03-04 08:20:42 -05:00
let appStarted = false;
2026-02-28 22:12:04 -05:00
const MAX_RECONNECT_ATTEMPTS = 2;
2026-03-04 08:20:42 -05:00
const AudioContextCtor = window.AudioContext || window.webkitAudioContext;
let visualizerAudioContext = null;
let visualizerSourceNode = null;
let visualizerSourceStream = null;
let visualizerAnalyser = null;
let visualizerWaveform = null;
let visualizerMeterRunning = false;
// --- Status overlay ---
let statusTimer = null;
const showStatus = (text, persistMs = 0) => {
voiceStatus.textContent = text;
voiceStatus.classList.add("visible");
if (statusTimer) { clearTimeout(statusTimer); statusTimer = null; }
if (persistMs > 0) {
statusTimer = setTimeout(() => {
voiceStatus.classList.remove("visible");
statusTimer = null;
}, persistMs);
}
};
const startVisualizerMeter = () => {
if (visualizerMeterRunning) return;
visualizerMeterRunning = true;
const sampleLevel = () => {
let level = 0;
if (visualizerAnalyser && visualizerWaveform) {
visualizerAnalyser.getByteTimeDomainData(visualizerWaveform);
let sum = 0;
for (let idx = 0; idx < visualizerWaveform.length; idx += 1) {
const value = (visualizerWaveform[idx] - 128) / 128;
sum += value * value;
}
const rms = Math.sqrt(sum / visualizerWaveform.length);
level = Math.min(1, rms * 4.8);
}
if (agentVisualizer) agentVisualizer.setAudioLevel(level);
requestAnimationFrame(sampleLevel);
};
requestAnimationFrame(sampleLevel);
};
const ensureVisualizerAudioMeter = async () => {
if (!agentVisualizer || !AudioContextCtor) return;
if (!visualizerAudioContext) {
visualizerAudioContext = new AudioContextCtor();
}
if (visualizerAudioContext.state === "suspended") {
try { await visualizerAudioContext.resume(); } catch (_) {}
}
if (!visualizerAnalyser) {
visualizerAnalyser = visualizerAudioContext.createAnalyser();
visualizerAnalyser.fftSize = 512;
visualizerAnalyser.smoothingTimeConstant = 0.84;
visualizerWaveform = new Uint8Array(visualizerAnalyser.fftSize);
}
if (
remoteStream
&& remoteStream.getAudioTracks
&& remoteStream.getAudioTracks().length > 0
&& visualizerSourceStream !== remoteStream
) {
if (visualizerSourceNode) {
try { visualizerSourceNode.disconnect(); } catch (_) {}
visualizerSourceNode = null;
}
try {
visualizerSourceNode = visualizerAudioContext.createMediaStreamSource(remoteStream);
visualizerSourceNode.connect(visualizerAnalyser);
visualizerSourceStream = remoteStream;
} catch (_err) {
visualizerSourceNode = null;
visualizerSourceStream = null;
}
}
startVisualizerMeter();
};
// --- Log ---
const MAX_LOG_LINES = 250;
const MAX_PENDING_LOG_LINES = 500;
const pendingLogItems = [];
let logFlushScheduled = false;
const flushPendingLogItems = () => {
logFlushScheduled = false;
if (pendingLogItems.length === 0) return;
const fragment = document.createDocumentFragment();
for (const item of pendingLogItems.splice(0)) {
const role = item.role || "system";
const line = document.createElement("div");
line.className = `line ${role}`;
const time = item.timestamp ? new Date(item.timestamp).toLocaleTimeString() : "";
const normalizedRole = role.toString().trim().toLowerCase();
const rawText = (item.text || "").toString();
if (normalizedRole === "nanobot") {
const cleaned = rawText.replace(/^(?:nanobot|napbot)\b\s*[:>\-]?\s*/i, "");
line.textContent = `[${time}] ${cleaned}`;
} else {
line.textContent = `[${time}] ${role}: ${rawText}`;
}
fragment.appendChild(line);
}
logEl.appendChild(fragment);
while (logEl.childElementCount > MAX_LOG_LINES) {
if (!logEl.firstElementChild) break;
logEl.removeChild(logEl.firstElementChild);
}
};
const scheduleLogFlush = () => {
if (logFlushScheduled) return;
logFlushScheduled = true;
requestAnimationFrame(flushPendingLogItems);
};
2026-02-28 22:12:04 -05:00
const appendLine = (role, text, timestamp) => {
2026-03-04 08:20:42 -05:00
pendingLogItems.push({ role, text, timestamp });
if (pendingLogItems.length > MAX_PENDING_LOG_LINES) {
pendingLogItems.splice(0, pendingLogItems.length - MAX_PENDING_LOG_LINES);
}
scheduleLogFlush();
2026-02-28 22:12:04 -05:00
};
const sendJson = (payload) => {
2026-03-04 08:20:42 -05:00
if (ws.readyState !== WebSocket.OPEN) return;
ws.send(JSON.stringify(payload));
};
const sendUserMessage = (text) => {
const message = (text || "").toString().trim();
if (!message) return false;
2026-02-28 22:12:04 -05:00
if (ws.readyState !== WebSocket.OPEN) {
2026-03-04 08:20:42 -05:00
showStatus("WebSocket disconnected.", 2000);
return false;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
sendJson({ type: "user-message", text: message });
return true;
2026-02-28 22:12:04 -05:00
};
2026-03-04 08:20:42 -05:00
// --- Voice state ---
const setVoiceConnected = (connected) => {
2026-02-28 22:12:04 -05:00
voiceConnected = connected;
2026-03-04 08:20:42 -05:00
if (agentVisualizer) agentVisualizer.setConnected(connected);
2026-02-28 22:12:04 -05:00
};
const setMicCaptureEnabled = (enabled) => {
if (!micStream) return;
2026-03-04 08:20:42 -05:00
micStream.getAudioTracks().forEach((track) => { track.enabled = enabled; });
2026-02-28 22:12:04 -05:00
};
const setPushToTalkState = (pressed, notifyServer = true) => {
pttPressed = pressed;
2026-03-04 08:20:42 -05:00
document.body.classList.toggle("ptt-active", pressed);
2026-02-28 22:12:04 -05:00
setMicCaptureEnabled(pressed);
if (notifyServer && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
}
2026-03-04 08:20:42 -05:00
if (pressed) {
setAgentState(STATES.listening);
showStatus("Listening...");
} else {
if (agentState === STATES.listening) setAgentState(STATES.idle);
if (voiceConnected) showStatus("Hold anywhere to talk", 1800);
}
2026-02-28 22:12:04 -05:00
};
2026-03-04 08:20:42 -05:00
const beginPushToTalk = () => {
if (!voiceConnected || !peerConnection || !micStream) return;
2026-02-28 22:12:04 -05:00
if (pttPressed) return;
setPushToTalkState(true);
};
2026-03-04 08:20:42 -05:00
const endPushToTalk = () => {
2026-02-28 22:12:04 -05:00
if (!pttPressed) return;
setPushToTalkState(false);
};
2026-03-04 08:20:42 -05:00
// --- Reconnect ---
2026-02-28 22:12:04 -05:00
const clearReconnectTimer = () => {
2026-03-04 08:20:42 -05:00
if (reconnectTimer) { clearTimeout(reconnectTimer); reconnectTimer = null; }
2026-02-28 22:12:04 -05:00
};
const scheduleReconnect = (reason, delayMs = 1200) => {
if (!voiceDesired) return;
if (voiceConnected || connectingVoice) return;
if (reconnectTimer) return;
if (reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {
2026-03-04 08:20:42 -05:00
showStatus("Voice reconnect failed.");
2026-02-28 22:12:04 -05:00
return;
}
reconnectAttempts += 1;
2026-03-04 08:20:42 -05:00
showStatus(`${reason} Retrying (${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS})...`);
2026-02-28 22:12:04 -05:00
reconnectTimer = setTimeout(async () => {
reconnectTimer = null;
await connectVoiceChannel();
}, delayMs);
};
const stopVoiceChannel = async (statusText = "", clearDesired = false) => {
if (clearDesired) {
voiceDesired = false;
reconnectAttempts = 0;
clearReconnectTimer();
}
2026-03-04 08:20:42 -05:00
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
2026-02-28 22:12:04 -05:00
pendingRemoteCandidates = [];
rtcAnswerApplied = false;
2026-03-04 08:20:42 -05:00
setPushToTalkState(false, false);
2026-02-28 22:12:04 -05:00
if (peerConnection) {
peerConnection.ontrack = null;
peerConnection.onicecandidate = null;
peerConnection.onconnectionstatechange = null;
peerConnection.close();
peerConnection = null;
}
if (micStream) {
micStream.getTracks().forEach((track) => track.stop());
micStream = null;
}
if (remoteStream) {
remoteStream.getTracks().forEach((track) => track.stop());
remoteStream = null;
}
remoteAudio.srcObject = null;
2026-03-04 08:20:42 -05:00
setVoiceConnected(false);
lastRemoteAudioActivityS = 0;
visualizerSourceStream = null;
if (visualizerSourceNode) {
try { visualizerSourceNode.disconnect(); } catch (_) {}
visualizerSourceNode = null;
2026-02-28 22:12:04 -05:00
}
2026-03-04 08:20:42 -05:00
if (agentVisualizer) agentVisualizer.setAudioLevel(0);
if (agentVisualizer) agentVisualizer.setConnecting(false);
if (statusText) showStatus(statusText, 3000);
2026-02-28 22:12:04 -05:00
};
2026-03-04 08:20:42 -05:00
// --- WebRTC ---
2026-02-28 22:12:04 -05:00
const applyRtcAnswer = async (message) => {
if (!peerConnection) return;
const rawSdp = (message.sdp || "").toString();
if (!rawSdp.trim()) return;
const sdp = `${rawSdp
.replace(/\r\n/g, "\n")
.replace(/\r/g, "\n")
.split("\n")
.map((line) => line.trimEnd())
.join("\r\n")
.trim()}\r\n`;
try {
2026-03-04 08:20:42 -05:00
await peerConnection.setRemoteDescription({ type: message.rtcType || "answer", sdp });
2026-02-28 22:12:04 -05:00
rtcAnswerApplied = true;
const queued = pendingRemoteCandidates;
pendingRemoteCandidates = [];
for (const candidate of queued) {
2026-03-04 08:20:42 -05:00
try { await peerConnection.addIceCandidate(candidate); } catch (_) {}
2026-02-28 22:12:04 -05:00
}
reconnectAttempts = 0;
} catch (err) {
2026-03-04 08:20:42 -05:00
await stopVoiceChannel("Voice setup failed.");
2026-02-28 22:12:04 -05:00
scheduleReconnect("Failed to apply answer.");
2026-03-04 08:20:42 -05:00
appendLine("system", `RTC answer error: ${err}`, new Date().toISOString());
2026-02-28 22:12:04 -05:00
}
};
const applyRtcIceCandidate = async (message) => {
if (!peerConnection) return;
if (message.candidate == null) {
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
pendingRemoteCandidates.push(null);
return;
}
2026-03-04 08:20:42 -05:00
try { await peerConnection.addIceCandidate(null); } catch (_) {}
2026-02-28 22:12:04 -05:00
return;
}
try {
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
pendingRemoteCandidates.push(message.candidate);
return;
}
await peerConnection.addIceCandidate(message.candidate);
} catch (err) {
appendLine("system", `RTC ICE error: ${err}`, new Date().toISOString());
}
};
const connectVoiceChannel = async () => {
if (voiceConnected || peerConnection || connectingVoice) return;
2026-03-04 08:20:42 -05:00
if (!window.RTCPeerConnection || !navigator.mediaDevices?.getUserMedia) {
showStatus("Voice unavailable in this browser.", 4000);
2026-02-28 22:12:04 -05:00
return;
}
if (ws.readyState !== WebSocket.OPEN) {
2026-03-04 08:20:42 -05:00
showStatus("Connecting...");
2026-02-28 22:12:04 -05:00
return;
}
connectingVoice = true;
2026-03-04 08:20:42 -05:00
if (agentVisualizer) agentVisualizer.setConnecting(true);
showStatus("Connecting voice...");
2026-02-28 22:12:04 -05:00
try {
clearReconnectTimer();
rtcAnswerApplied = false;
pendingRemoteCandidates = [];
try {
micStream = await navigator.mediaDevices.getUserMedia({
2026-03-04 08:20:42 -05:00
audio: { channelCount: 1, sampleRate: 48000, sampleSize: 16, latency: 0,
echoCancellation: true, noiseSuppression: true, autoGainControl: false },
2026-02-28 22:12:04 -05:00
video: false,
});
2026-03-04 08:20:42 -05:00
} catch (_) {
micStream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
2026-02-28 22:12:04 -05:00
}
setMicCaptureEnabled(false);
2026-03-04 08:20:42 -05:00
peerConnection = new RTCPeerConnection({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
2026-02-28 22:12:04 -05:00
remoteStream = new MediaStream();
remoteAudio.srcObject = remoteStream;
peerConnection.ontrack = (event) => {
if (event.track.kind !== "audio") return;
remoteStream.addTrack(event.track);
2026-03-04 08:20:42 -05:00
remoteAudio.play().then(() => {
markRemoteAudioActivity();
ensureVisualizerAudioMeter();
}).catch(() => {});
2026-02-28 22:12:04 -05:00
};
peerConnection.onicecandidate = (event) => {
2026-03-04 08:20:42 -05:00
if (!event.candidate) { sendJson({ type: "rtc-ice-candidate", candidate: null }); return; }
sendJson({ type: "rtc-ice-candidate", candidate: event.candidate.toJSON() });
2026-02-28 22:12:04 -05:00
};
peerConnection.onconnectionstatechange = () => {
const state = peerConnection?.connectionState || "new";
if (state === "connected") {
2026-03-04 08:20:42 -05:00
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
2026-02-28 22:12:04 -05:00
clearReconnectTimer();
reconnectAttempts = 0;
2026-03-04 08:20:42 -05:00
setVoiceConnected(true);
showStatus("Hold anywhere to talk", 2500);
2026-02-28 22:12:04 -05:00
return;
}
if (state === "failed" || state === "closed") {
stopVoiceChannel(`Voice channel ${state}.`);
scheduleReconnect(`Voice channel ${state}.`);
return;
}
if (state === "disconnected") {
if (disconnectedTimer) clearTimeout(disconnectedTimer);
2026-03-04 08:20:42 -05:00
showStatus("Voice disconnected. Waiting...");
2026-02-28 22:12:04 -05:00
disconnectedTimer = setTimeout(() => {
if (peerConnection?.connectionState === "disconnected") {
stopVoiceChannel("Voice channel disconnected.");
scheduleReconnect("Voice channel disconnected.");
}
}, 8000);
return;
}
};
2026-03-04 08:20:42 -05:00
micStream.getAudioTracks().forEach((track) => { peerConnection.addTrack(track, micStream); });
2026-02-28 22:12:04 -05:00
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
2026-03-04 08:20:42 -05:00
sendJson({ type: "rtc-offer", sdp: offer.sdp, rtcType: offer.type });
2026-02-28 22:12:04 -05:00
} catch (err) {
2026-03-04 08:20:42 -05:00
await stopVoiceChannel("Voice setup failed.");
2026-02-28 22:12:04 -05:00
scheduleReconnect("Voice setup failed.");
appendLine("system", `Voice setup error: ${err}`, new Date().toISOString());
} finally {
connectingVoice = false;
2026-03-04 08:20:42 -05:00
// Stop throb if connection failed (success path clears it in setConnected)
if (!voiceConnected && agentVisualizer) agentVisualizer.setConnecting(false);
2026-02-28 22:12:04 -05:00
}
};
2026-03-04 08:20:42 -05:00
// --- First-tap bootstrap ---
const bootstrap = async () => {
if (appStarted) return;
appStarted = true;
// Unblock audio context (required by browsers before user gesture resolves)
remoteAudio.play().catch(() => {});
await ensureVisualizerAudioMeter();
sendJson({ type: "spawn" });
voiceDesired = true;
reconnectAttempts = 0;
await connectVoiceChannel();
};
if (resetSessionBtn) {
resetSessionBtn.addEventListener("click", async (event) => {
event.preventDefault();
event.stopPropagation();
if (!appStarted) {
await bootstrap();
}
if (sendUserMessage("/reset")) {
showStatus("Reset command sent.", 1500);
}
});
}
// --- Whole-screen PTT pointer handling ---
// We track active pointer IDs so multi-touch doesn't double-fire.
const activePointers = new Set();
document.addEventListener("pointerdown", async (event) => {
if (event.target instanceof Element && event.target.closest("[data-no-ptt='1']")) {
return;
}
if (!appStarted) {
await bootstrap();
return;
}
ensureVisualizerAudioMeter();
activePointers.add(event.pointerId);
if (activePointers.size === 1) beginPushToTalk();
}, { passive: false });
document.addEventListener("pointerup", (event) => {
activePointers.delete(event.pointerId);
if (activePointers.size === 0) endPushToTalk();
}, { passive: false });
document.addEventListener("pointercancel", (event) => {
activePointers.delete(event.pointerId);
if (activePointers.size === 0) endPushToTalk();
}, { passive: false });
// --- WebSocket ---
2026-02-28 22:12:04 -05:00
ws.onopen = () => {
appendLine("system", "WebSocket connected.", new Date().toISOString());
2026-03-04 08:20:42 -05:00
showStatus("Tap anywhere to start", 0);
2026-02-28 22:12:04 -05:00
};
ws.onclose = async () => {
appendLine("system", "WebSocket disconnected.", new Date().toISOString());
2026-03-04 08:20:42 -05:00
await stopVoiceChannel("Disconnected.", true);
2026-02-28 22:12:04 -05:00
};
ws.onerror = () => appendLine("system", "WebSocket error.", new Date().toISOString());
ws.onmessage = async (event) => {
try {
const msg = JSON.parse(event.data);
2026-03-04 08:20:42 -05:00
if (msg.type === "rtc-answer") { await applyRtcAnswer(msg); return; }
if (msg.type === "rtc-ice-candidate") { await applyRtcIceCandidate(msg); return; }
2026-02-28 22:12:04 -05:00
if (msg.type === "rtc-state") {
const state = (msg.state || "").toString();
2026-03-04 08:20:42 -05:00
if (state === "connected") {
setVoiceConnected(true);
showStatus("Hold anywhere to talk", 2500);
2026-02-28 22:12:04 -05:00
}
return;
}
if (msg.type === "rtc-error") {
const text = (msg.message || "Unknown WebRTC error.").toString();
2026-03-04 08:20:42 -05:00
showStatus(`Voice error: ${text}`, 4000);
2026-02-28 22:12:04 -05:00
appendLine("system", `Voice error: ${text}`, new Date().toISOString());
await stopVoiceChannel("Voice channel error.");
scheduleReconnect("Voice channel error.");
return;
}
2026-03-04 08:20:42 -05:00
// Drive agent state indicator from server-sent agent-state events
if (msg.role === "agent-state") {
const newState = (msg.text || "").trim();
// Don't override listening state (user is holding PTT)
if (agentState !== STATES.listening && STATES[newState]) {
setAgentState(newState);
}
} else if (msg.role === "wisper") {
// suppress wisper debug output
} else {
appendLine(msg.role || "system", msg.text || "", msg.timestamp || "");
}
2026-02-28 22:12:04 -05:00
} catch (_err) {
appendLine("system", event.data, new Date().toISOString());
}
};
</script>
</body>
</html>