nanobot-voice-interface/static/index.html
2026-03-05 15:10:14 -05:00

1394 lines
51 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
<title>Nanobot</title>
<style>
* {
box-sizing: border-box;
user-select: none;
-webkit-user-select: none;
}
html, body {
margin: 0;
padding: 0;
width: 100%;
height: 100%;
overflow: hidden;
background: #ffffff;
touch-action: none;
}
#log {
position: fixed;
bottom: calc(5vh + 20px);
left: 50%;
transform: translateX(-50%);
width: calc(90vw - 40px);
max-height: 22vh;
overflow-y: auto;
padding: 12px 14px;
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
line-height: 1.6;
color: rgba(30, 20, 10, 0.35);
white-space: pre-wrap;
word-break: break-word;
display: flex;
flex-direction: column-reverse;
border-radius: 10px;
background: transparent;
transition: color 0.3s, background 0.3s;
z-index: 10;
pointer-events: auto;
-webkit-mask-image: linear-gradient(to top, black 55%, transparent 100%);
mask-image: linear-gradient(to top, black 55%, transparent 100%);
}
#log:hover {
color: rgba(30, 20, 10, 0.85);
background: rgba(0, 0, 0, 0.06);
-webkit-mask-image: none;
mask-image: none;
}
#log * {
user-select: text;
-webkit-user-select: text;
}
#log-inner {
display: flex;
flex-direction: column;
}
.line {
margin-bottom: 4px;
}
.line.user {
color: rgba(20, 10, 0, 0.85);
}
.line.system {
color: rgba(120, 80, 40, 0.5);
}
.line.wisper {
color: rgba(120, 80, 40, 0.4);
}
#log:hover .line.user { color: rgba(20, 10, 0, 1.0); }
#log:hover .line.system { color: rgba(120, 80, 40, 0.85); }
#log:hover .line.wisper { color: rgba(120, 80, 40, 0.75); }
#voiceStatus {
position: fixed;
bottom: 12px;
left: 50%;
transform: translateX(-50%);
background: rgba(0, 0, 0, 0.08);
color: #111111;
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
padding: 4px 12px;
border-radius: 99px;
pointer-events: none;
white-space: nowrap;
opacity: 0;
transition: opacity 0.2s;
}
#voiceStatus.visible {
opacity: 1;
}
/* Agent state indicator */
#agentIndicator {
position: fixed;
top: 0;
left: 0;
right: 0;
height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
gap: 18px;
pointer-events: none;
opacity: 0;
transition: opacity 0.4s;
}
#agentIndicator.visible {
opacity: 1;
}
#agentViz {
width: 90vw;
height: 90vh;
aspect-ratio: unset;
border-radius: 24px;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
overflow: hidden;
pointer-events: auto;
cursor: pointer;
}
#agentViz canvas {
width: 100% !important;
height: 100% !important;
display: block;
pointer-events: auto;
}
#agentIndicator .label {
display: none;
}
#agentIndicator.idle {
color: #6b3a28;
}
#agentIndicator.listening {
color: #d4553f;
}
#agentIndicator.thinking {
color: #a0522d;
}
#agentIndicator.speaking {
color: #8b4513;
}
#controls {
position: fixed;
top: 12px;
right: 12px;
z-index: 20;
pointer-events: auto;
}
.control-btn {
border: none;
background: #ffffff;
color: #111111;
border-radius: 10px;
padding: 7px 12px;
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
letter-spacing: 0.04em;
cursor: pointer;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
}
.control-btn:active {
transform: translateY(1px);
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
}
/* Toast notifications */
#toast-container {
position: fixed;
top: 16px;
left: 50%;
transform: translateX(-50%);
width: min(92vw, 480px);
max-height: calc(100vh - 32px);
overflow-y: auto;
overflow-x: hidden;
display: flex;
flex-direction: column;
gap: 10px;
z-index: 100;
pointer-events: auto;
/* Hide scrollbar until hovered */
scrollbar-width: thin;
scrollbar-color: rgba(255,200,140,0.25) transparent;
padding-bottom: 4px;
}
#toast-container::-webkit-scrollbar {
width: 4px;
}
#toast-container::-webkit-scrollbar-track {
background: transparent;
}
#toast-container::-webkit-scrollbar-thumb {
background: rgba(255,200,140,0.25);
border-radius: 2px;
}
.toast {
pointer-events: auto;
background: rgba(28, 22, 16, 0.92);
border: 1px solid rgba(255, 200, 140, 0.18);
border-radius: 12px;
padding: 14px 16px 14px 16px;
display: flex;
flex-direction: column;
gap: 8px;
box-shadow: 0 4px 24px rgba(0, 0, 0, 0.45);
animation: toast-in 0.22s cubic-bezier(0.34, 1.4, 0.64, 1) both;
position: relative;
overflow: hidden;
max-width: 100%;
}
.toast.dismissing {
animation: toast-out 0.18s ease-in both;
}
@keyframes toast-in {
from { opacity: 0; transform: translateY(-14px) scale(0.96); }
to { opacity: 1; transform: translateY(0) scale(1); }
}
@keyframes toast-out {
from { opacity: 1; transform: translateY(0) scale(1); }
to { opacity: 0; transform: translateY(-10px) scale(0.96); }
}
.toast-progress {
position: absolute;
bottom: 0;
left: 0;
height: 2px;
background: rgba(255, 190, 120, 0.55);
width: 100%;
transform-origin: left;
animation: toast-progress-shrink linear both;
}
@keyframes toast-progress-shrink {
from { transform: scaleX(1); }
to { transform: scaleX(0); }
}
.toast-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 10px;
}
.toast-title {
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 11px;
font-weight: 600;
letter-spacing: 0.07em;
color: rgba(255, 200, 140, 0.85);
text-transform: uppercase;
flex: 1;
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.toast-close {
background: none;
border: none;
color: rgba(255, 245, 235, 0.35);
font-size: 16px;
line-height: 1;
cursor: pointer;
padding: 0 2px;
flex-shrink: 0;
transition: color 0.15s;
}
.toast-close:hover {
color: rgba(255, 245, 235, 0.85);
}
.toast-body {
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
line-height: 1.65;
color: rgba(255, 245, 235, 0.82);
white-space: normal;
word-break: break-word;
user-select: text;
-webkit-user-select: text;
}
.toast-body p { margin: 0 0 6px; }
.toast-body p:last-child { margin-bottom: 0; }
.toast-body h1, .toast-body h2, .toast-body h3,
.toast-body h4, .toast-body h5, .toast-body h6 {
font-size: 13px;
font-weight: 700;
color: rgba(255, 200, 140, 0.95);
margin: 8px 0 4px;
}
.toast-body ul, .toast-body ol {
margin: 4px 0 6px;
padding-left: 18px;
}
.toast-body li { margin-bottom: 2px; }
.toast-body code {
background: rgba(255,255,255,0.07);
border-radius: 4px;
padding: 1px 5px;
font-size: 11px;
}
.toast-body pre {
background: rgba(0,0,0,0.35);
border-radius: 6px;
padding: 8px 10px;
overflow-x: auto;
margin: 6px 0;
}
.toast-body pre code {
background: none;
padding: 0;
font-size: 11px;
}
.toast-body table {
border-collapse: collapse;
width: 100%;
font-size: 11px;
margin: 6px 0;
}
.toast-body th, .toast-body td {
border: 1px solid rgba(255,200,140,0.2);
padding: 4px 8px;
text-align: left;
}
.toast-body th {
background: rgba(255,200,140,0.08);
color: rgba(255,200,140,0.9);
font-weight: 600;
}
.toast-body a {
color: rgba(255,200,140,0.85);
text-decoration: underline;
}
.toast-body blockquote {
border-left: 3px solid rgba(255,200,140,0.3);
margin: 6px 0;
padding-left: 10px;
color: rgba(255,245,235,0.55);
}
.toast-body hr {
border: none;
border-top: 1px solid rgba(255,200,140,0.15);
margin: 8px 0;
}
.toast-choices {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 4px;
}
.toast-choice-btn {
background: rgba(255, 200, 140, 0.12);
border: 1px solid rgba(255, 200, 140, 0.35);
border-radius: 8px;
color: rgba(255, 245, 235, 0.90);
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
font-size: 12px;
padding: 6px 14px;
cursor: pointer;
transition: background 0.15s, border-color 0.15s;
flex: 1 1 auto;
text-align: center;
}
.toast-choice-btn:hover {
background: rgba(255, 200, 140, 0.25);
border-color: rgba(255, 200, 140, 0.65);
}
.toast-choice-btn:active {
background: rgba(255, 200, 140, 0.38);
}
.toast-choice-btn:disabled {
opacity: 0.4;
cursor: default;
}
.toast-image {
width: 100%;
max-height: 320px;
object-fit: contain;
border-radius: 8px;
display: block;
}
</style>
</head>
<body>
<div id="controls">
<button id="resetSessionBtn" class="control-btn" type="button">Reset</button>
</div>
<div id="log"><div id="log-inner"></div></div>
<div id="agentIndicator" data-ptt="1">
<div id="agentViz" data-ptt="1"></div>
<span class="label"></span>
</div>
<div id="voiceStatus"></div>
<div id="toast-container"></div>
<audio id="remoteAudio" autoplay playsinline hidden></audio>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="/static/three.min.js"></script>
<script>
const logEl = document.getElementById("log-inner");
const voiceStatus = document.getElementById("voiceStatus");
const remoteAudio = document.getElementById("remoteAudio");
const agentIndicator = document.getElementById("agentIndicator");
const agentVizEl = document.getElementById("agentViz");
const agentLabel = agentIndicator.querySelector(".label");
const resetSessionBtn = document.getElementById("resetSessionBtn");
const toastContainer = document.getElementById("toast-container");
// --- Toast notifications ---
const showToast = (kind, content, title, durationMs) => {
const toast = document.createElement("div");
toast.className = "toast";
// Header row (title + close button)
const header = document.createElement("div");
header.className = "toast-header";
if (title) {
const titleEl = document.createElement("span");
titleEl.className = "toast-title";
titleEl.textContent = title;
header.appendChild(titleEl);
}
const closeBtn = document.createElement("button");
closeBtn.className = "toast-close";
closeBtn.setAttribute("type", "button");
closeBtn.setAttribute("aria-label", "Dismiss");
closeBtn.textContent = "×";
header.appendChild(closeBtn);
toast.appendChild(header);
// Body
if (kind === "image") {
const img = document.createElement("img");
img.className = "toast-image";
img.src = content;
img.alt = title || "image";
toast.appendChild(img);
} else {
const body = document.createElement("div");
body.className = "toast-body";
// If content looks like HTML, inject directly; otherwise render as markdown.
const looksLikeHtml = /^\s*<[a-zA-Z]/.test(content);
if (looksLikeHtml) {
body.innerHTML = content;
} else if (typeof marked !== "undefined") {
body.innerHTML = marked.parse(content);
} else {
body.textContent = content;
}
toast.appendChild(body);
}
// dismiss must be declared before close button references it
const dismiss = () => {
toast.classList.add("dismissing");
const fallback = setTimeout(() => toast.remove(), 400);
toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
};
closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
toastContainer.prepend(toast);
toastContainer.scrollTop = 0;
};
// --- Choice toasts (ask_user tool) ---
const showChoice = (requestId, question, choices, title) => {
const toast = document.createElement("div");
toast.className = "toast";
// Header
const header = document.createElement("div");
header.className = "toast-header";
if (title) {
const titleEl = document.createElement("span");
titleEl.className = "toast-title";
titleEl.textContent = title;
header.appendChild(titleEl);
}
const closeBtn = document.createElement("button");
closeBtn.className = "toast-close";
closeBtn.setAttribute("type", "button");
closeBtn.setAttribute("aria-label", "Dismiss");
closeBtn.textContent = "×";
header.appendChild(closeBtn);
toast.appendChild(header);
// Question body
const body = document.createElement("div");
body.className = "toast-body";
body.textContent = question;
toast.appendChild(body);
// Choice buttons
const choicesEl = document.createElement("div");
choicesEl.className = "toast-choices";
const dismiss = () => {
toast.classList.add("dismissing");
const fallback = setTimeout(() => toast.remove(), 400);
toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
};
choices.forEach((label) => {
const btn = document.createElement("button");
btn.className = "toast-choice-btn";
btn.setAttribute("type", "button");
btn.textContent = label;
btn.addEventListener("click", (e) => {
e.stopPropagation();
// Disable all buttons to prevent double-send
choicesEl.querySelectorAll(".toast-choice-btn").forEach((b) => { b.disabled = true; });
sendJson({ type: "ui-response", request_id: requestId, value: label });
dismiss();
});
choicesEl.appendChild(btn);
});
toast.appendChild(choicesEl);
closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
toastContainer.prepend(toast);
toastContainer.scrollTop = 0;
};
// --- Agent state indicator ---
const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
const STATE_COLORS = {
[STATES.idle]: 0xfff5eb,
[STATES.listening]: 0xfff5eb,
[STATES.thinking]: 0xfff5eb,
[STATES.speaking]: 0xfff5eb,
};
let agentState = STATES.idle;
let agentVisualizer = null;
let lastRemoteAudioActivityS = 0;
agentIndicator.classList.add("visible", "idle");
const setAgentState = (state) => {
agentState = state;
agentIndicator.classList.remove("listening", "thinking", "speaking", "idle");
agentIndicator.classList.add("visible", state);
agentLabel.textContent = state === STATES.idle ? "" : state;
if (agentVisualizer) agentVisualizer.setState(state);
};
// Creates a flat torus-path ring with a per-vertex theta attribute.
// The Y wave displacement is applied in the vertex shader so phase can be animated.
const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
const points = [];
for (let i = 0; i <= segments; i += 1) {
const theta = (i / segments) * Math.PI * 2;
points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
}
const curve = new THREE.CatmullRomCurve3(points, true);
const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
// Store normalised t (0→1 around the ring) for each vertex so the shader
// can reconstruct theta and apply the animated wave.
const posCount = geo.attributes.position.count;
const tAttr = new Float32Array(posCount);
// TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
const tubularSegments = segments;
const radialSegments = 12;
for (let tube = 0; tube <= tubularSegments; tube++) {
const tVal = tube / tubularSegments;
for (let rad = 0; rad <= radialSegments; rad++) {
tAttr[tube * (radialSegments + 1) + rad] = tVal;
}
}
geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
return geo;
};
const createAgentVisualizer = () => {
if (!window.THREE || !agentVizEl) return null;
const renderer = new THREE.WebGLRenderer({
antialias: true,
alpha: false,
powerPreference: "high-performance",
});
renderer.setPixelRatio(1);
renderer.setClearColor(0xe8e4e0, 1);
renderer.domElement.dataset.ptt = "1";
agentVizEl.innerHTML = "";
agentVizEl.appendChild(renderer.domElement);
const scene = new THREE.Scene();
const orthoSize = 2.0;
const camera = new THREE.OrthographicCamera(-orthoSize, orthoSize, orthoSize, -orthoSize, 0.1, 40);
const lookAt = new THREE.Vector3(0, 0, 0);
const speakingSideView = new THREE.Vector3(3.45, 0, 0);
const topView = new THREE.Vector3(0, 3.25, 0.001);
camera.position.copy(topView);
camera.lookAt(lookAt);
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
scene.add(ambient);
const geometry = createParaboloidRing();
const geometry2 = createParaboloidRing();
// Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
// so phase can be animated each frame without rebuilding geometry.
const ringVertexShader = `
attribute float aRingT;
uniform float uPhase;
uniform float uAmplitude;
varying float vWorldX;
void main() {
float theta = aRingT * 6.28318530718;
vec3 pos = position;
pos.y += uAmplitude * cos(5.0 * theta + uPhase);
vec4 wp = modelMatrix * vec4(pos, 1.0);
vWorldX = wp.x;
gl_Position = projectionMatrix * viewMatrix * wp;
}
`;
const ringFragmentShader = `
uniform vec3 uColor;
uniform float uFade;
uniform float uFadeOffset;
varying float vWorldX;
void main() {
float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
gl_FragColor = vec4(uColor, alpha);
}
`;
const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
uniforms: {
uColor: { value: new THREE.Color(0xfff5eb) },
uFade: { value: 0.0 },
uFadeOffset: { value: 0.0 },
uPhase: { value: phase },
uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius
},
vertexShader: ringVertexShader,
fragmentShader: ringFragmentShader,
transparent: true,
side: THREE.DoubleSide,
depthWrite: false,
});
const ringMaterial = makeRingMaterial(0.0);
const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset
const ring = new THREE.Mesh(geometry, ringMaterial);
const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
const group = new THREE.Group();
group.add(ring);
group.add(ring2);
group.rotation.y = Math.PI * 0.18;
scene.add(group);
let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect
const applyFrustum = () => {
const width = Math.max(2, agentVizEl.clientWidth);
const height = Math.max(2, agentVizEl.clientHeight);
const aspect = width / height;
const s = orthoSize * orthoScale;
if (aspect >= 1) {
camera.left = -s * aspect;
camera.right = s * aspect;
camera.top = s;
camera.bottom = -s;
} else {
camera.left = -s;
camera.right = s;
camera.top = s / aspect;
camera.bottom = -s / aspect;
}
camera.updateProjectionMatrix();
};
const resize = () => {
const width = Math.max(2, agentVizEl.clientWidth);
const height = Math.max(2, agentVizEl.clientHeight);
renderer.setSize(width, height, false);
applyFrustum();
};
resize();
window.addEventListener("resize", resize);
let currentState = STATES.idle;
let currentAudioLevel = 0;
let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed
let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings
let deformScale = 1.0;
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
let spinSpeed = 0.0;
// Card background colour lerp: 0 = idle coral, 1 = dark coral (PTT/listening)
let cardColorT = 0.0;
let connectedT = 0.0; // 0 = gray (disconnected), 1 = coral (connected)
const CARD_GRAY_RGB = [232, 228, 224]; // #e8e4e0 — disconnected light warm gray
const CARD_IDLE_RGB = [212, 85, 63]; // #d4553f — connected idle coral
const CARD_LISTEN_RGB = [120, 40, 28]; // #782c1c — PTT active dark coral
const setStateColor = (_state) => { /* no-op: MeshBasicMaterial, colour is fixed */ };
let prevCardRGB = "";
let targetConnected = 0.0;
let isConnecting = false;
const renderFrame = (now = 0) => {
const dt = Math.min((now - (renderFrame._lastNow || now)) / 1000, 0.1);
renderFrame._lastNow = now;
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
const t = dt * 60;
const lerpAudio = 1 - Math.pow(0.85, t); // fast
const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1
const lerpDeform = 1 - Math.pow(0.88, t);
const lerpSpin = 1 - Math.pow(0.86, t);
const lerpRing = 1 - Math.pow(0.90, t);
const lerpAmp = 1 - Math.pow(0.88, t);
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
const speakingActive = currentState === STATES.speaking;
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
if (speakingActive) {
targetDeformScale = 2.05 + (smoothAudioLevel * 2.9);
} else if (currentState === STATES.thinking) {
targetDeformScale = 0.55 + (smoothAudioLevel * 0.35);
}
deformScale += (targetDeformScale - deformScale) * lerpDeform;
group.scale.y = deformScale;
// Thickness throb when thinking: pulse xz scale at 1 s rate.
const targetRingScale = currentState === STATES.thinking
? 1.0 + 0.18 * (0.5 + 0.5 * Math.sin(now * (Math.PI * 2 / 1000)))
: 1.0;
ringScale += (targetRingScale - ringScale) * lerpRing;
group.scale.x = ringScale;
group.scale.z = ringScale;
const targetSpinSpeed = speakingActive
? (0.012 + smoothAudioLevel * 0.105)
: (currentState === STATES.thinking ? 0.006 : 0.0022);
spinSpeed += (targetSpinSpeed - spinSpeed) * lerpSpin;
group.rotation.y += spinSpeed * t;
// Only move camera (and call lookAt) when in speaking state.
if (speakingActive || camera.position.distanceToSquared(topView) > 0.0001) {
const lerpCamera = 1 - Math.pow(0.96, t);
const targetCameraPosition = speakingActive ? speakingSideView : topView;
camera.position.lerp(targetCameraPosition, lerpCamera);
camera.lookAt(lookAt);
}
// Smoothly fade out the back half of the ring as the camera moves into side view.
// sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
const camLen = camera.position.length();
const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
const lerpSide = 1 - Math.pow(0.88, t);
ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide;
ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide;
ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide;
ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
// Phase animation + reactive amplitude while speaking.
const baseAmp = 0.06 * 1.1;
if (speakingActive) {
const breathe = Math.sin(now * 0.0018); // ~3.5 s period
const base = 1.8 + smoothAudioLevel * 4.0;
const ring1Speed = (base + breathe * 0.6) * dt;
const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
ringMaterial.uniforms.uPhase.value += ring1Speed;
ringMaterial2.uniforms.uPhase.value += ring2Speed;
// Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5);
const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
} else {
// Settle phase and amplitude back to rest values.
ringMaterial2.uniforms.uPhase.value +=
(Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
}
// Zoom in when in side view by shrinking the ortho frustum.
const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side
if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
applyFrustum();
}
// Card background: gray → coral as connection is established, then darken when listening.
// While connecting, throb the gray base with a slow sine pulse.
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
const throb = isConnecting && targetConnected === 0
? 0.22 * (0.5 - 0.5 * Math.sin(now * (Math.PI * 2 / 1000))) // 00.22 darkness pulse, 1 s period
: 0.0;
const baseR = Math.round(CARD_GRAY_RGB[0] + (CARD_IDLE_RGB[0] - CARD_GRAY_RGB[0]) * connectedT - throb * CARD_GRAY_RGB[0]);
const baseG = Math.round(CARD_GRAY_RGB[1] + (CARD_IDLE_RGB[1] - CARD_GRAY_RGB[1]) * connectedT - throb * CARD_GRAY_RGB[1]);
const baseB = Math.round(CARD_GRAY_RGB[2] + (CARD_IDLE_RGB[2] - CARD_GRAY_RGB[2]) * connectedT - throb * CARD_GRAY_RGB[2]);
const targetCardT = currentState === STATES.listening ? 1.0 : 0.0;
const cardBase = targetCardT > cardColorT ? 0.05 : 0.7;
cardColorT += (targetCardT - cardColorT) * (1 - Math.pow(cardBase, t));
const r = Math.min(255, Math.round(baseR + (CARD_LISTEN_RGB[0] - baseR) * cardColorT));
const g = Math.min(255, Math.round(baseG + (CARD_LISTEN_RGB[1] - baseG) * cardColorT));
const b = Math.min(255, Math.round(baseB + (CARD_LISTEN_RGB[2] - baseB) * cardColorT));
const cardRGB = `${r},${g},${b}`;
if (cardRGB !== prevCardRGB) {
renderer.setClearColor((r << 16) | (g << 8) | b, 1);
prevCardRGB = cardRGB;
}
renderer.render(scene, camera);
requestAnimationFrame(renderFrame);
};
setStateColor(currentState);
requestAnimationFrame(renderFrame);
return {
setAudioLevel: (level) => {
currentAudioLevel = Math.max(0, Math.min(1, Number(level) || 0));
},
setState: (state) => {
if (!STATES[state]) return;
currentState = state;
setStateColor(state);
},
setConnected: (connected) => {
targetConnected = connected ? 1.0 : 0.0;
if (connected) isConnecting = false;
},
setConnecting: (connecting) => {
isConnecting = !!connecting;
},
};
};
agentVisualizer = createAgentVisualizer();
if (agentVisualizer) agentVisualizer.setState(agentState);
const markRemoteAudioActivity = () => {
lastRemoteAudioActivityS = performance.now() / 1000;
};
remoteAudio.addEventListener("playing", markRemoteAudioActivity);
remoteAudio.addEventListener("timeupdate", markRemoteAudioActivity);
remoteAudio.addEventListener("canplay", markRemoteAudioActivity);
remoteAudio.addEventListener("seeked", markRemoteAudioActivity);
const wsProto = location.protocol === "https:" ? "wss" : "ws";
const ws = new WebSocket(`${wsProto}://${location.host}/ws/chat`);
let peerConnection = null;
let micStream = null;
let remoteStream = null;
let voiceConnected = false;
let disconnectedTimer = null;
let reconnectTimer = null;
let reconnectAttempts = 0;
let voiceDesired = false;
let connectingVoice = false;
let pttPressed = false;
let rtcAnswerApplied = false;
let pendingRemoteCandidates = [];
let appStarted = false;
const MAX_RECONNECT_ATTEMPTS = 2;
const AudioContextCtor = window.AudioContext || window.webkitAudioContext;
let visualizerAudioContext = null;
let visualizerSourceNode = null;
let visualizerSourceStream = null;
let visualizerAnalyser = null;
let visualizerWaveform = null;
let visualizerMeterRunning = false;
// --- Status overlay ---
let statusTimer = null;
const showStatus = (text, persistMs = 0) => {
voiceStatus.textContent = text;
voiceStatus.classList.add("visible");
if (statusTimer) { clearTimeout(statusTimer); statusTimer = null; }
if (persistMs > 0) {
statusTimer = setTimeout(() => {
voiceStatus.classList.remove("visible");
statusTimer = null;
}, persistMs);
}
};
const startVisualizerMeter = () => {
if (visualizerMeterRunning) return;
visualizerMeterRunning = true;
const sampleLevel = () => {
let level = 0;
if (visualizerAnalyser && visualizerWaveform) {
visualizerAnalyser.getByteTimeDomainData(visualizerWaveform);
let sum = 0;
for (let idx = 0; idx < visualizerWaveform.length; idx += 1) {
const value = (visualizerWaveform[idx] - 128) / 128;
sum += value * value;
}
const rms = Math.sqrt(sum / visualizerWaveform.length);
level = Math.min(1, rms * 4.8);
}
if (agentVisualizer) agentVisualizer.setAudioLevel(level);
requestAnimationFrame(sampleLevel);
};
requestAnimationFrame(sampleLevel);
};
const ensureVisualizerAudioMeter = async () => {
if (!agentVisualizer || !AudioContextCtor) return;
if (!visualizerAudioContext) {
visualizerAudioContext = new AudioContextCtor();
}
if (visualizerAudioContext.state === "suspended") {
try { await visualizerAudioContext.resume(); } catch (_) {}
}
if (!visualizerAnalyser) {
visualizerAnalyser = visualizerAudioContext.createAnalyser();
visualizerAnalyser.fftSize = 512;
visualizerAnalyser.smoothingTimeConstant = 0.84;
visualizerWaveform = new Uint8Array(visualizerAnalyser.fftSize);
}
if (
remoteStream
&& remoteStream.getAudioTracks
&& remoteStream.getAudioTracks().length > 0
&& visualizerSourceStream !== remoteStream
) {
if (visualizerSourceNode) {
try { visualizerSourceNode.disconnect(); } catch (_) {}
visualizerSourceNode = null;
}
try {
visualizerSourceNode = visualizerAudioContext.createMediaStreamSource(remoteStream);
visualizerSourceNode.connect(visualizerAnalyser);
visualizerSourceStream = remoteStream;
} catch (_err) {
visualizerSourceNode = null;
visualizerSourceStream = null;
}
}
startVisualizerMeter();
};
// --- Log ---
const MAX_LOG_LINES = 250;
const MAX_PENDING_LOG_LINES = 500;
const pendingLogItems = [];
let logFlushScheduled = false;
const flushPendingLogItems = () => {
logFlushScheduled = false;
if (pendingLogItems.length === 0) return;
const fragment = document.createDocumentFragment();
for (const item of pendingLogItems.splice(0)) {
const role = item.role || "system";
const line = document.createElement("div");
line.className = `line ${role}`;
const time = item.timestamp ? new Date(item.timestamp).toLocaleTimeString() : "";
const normalizedRole = role.toString().trim().toLowerCase();
const rawText = (item.text || "").toString();
if (normalizedRole === "nanobot") {
const cleaned = rawText.replace(/^(?:nanobot|napbot)\b\s*[:>\-]?\s*/i, "");
line.textContent = `[${time}] ${cleaned}`;
} else {
line.textContent = `[${time}] ${role}: ${rawText}`;
}
fragment.appendChild(line);
}
logEl.appendChild(fragment);
while (logEl.childElementCount > MAX_LOG_LINES) {
if (!logEl.firstElementChild) break;
logEl.removeChild(logEl.firstElementChild);
}
};
const scheduleLogFlush = () => {
if (logFlushScheduled) return;
logFlushScheduled = true;
requestAnimationFrame(flushPendingLogItems);
};
const appendLine = (role, text, timestamp) => {
pendingLogItems.push({ role, text, timestamp });
if (pendingLogItems.length > MAX_PENDING_LOG_LINES) {
pendingLogItems.splice(0, pendingLogItems.length - MAX_PENDING_LOG_LINES);
}
scheduleLogFlush();
};
const sendJson = (payload) => {
if (ws.readyState !== WebSocket.OPEN) return;
ws.send(JSON.stringify(payload));
};
const sendUserMessage = (text) => {
const message = (text || "").toString().trim();
if (!message) return false;
if (ws.readyState !== WebSocket.OPEN) {
showStatus("WebSocket disconnected.", 2000);
return false;
}
sendJson({ type: "user-message", text: message });
return true;
};
// --- Voice state ---
const setVoiceConnected = (connected) => {
voiceConnected = connected;
if (agentVisualizer) agentVisualizer.setConnected(connected);
};
const setMicCaptureEnabled = (enabled) => {
if (!micStream) return;
micStream.getAudioTracks().forEach((track) => { track.enabled = enabled; });
};
const setPushToTalkState = (pressed, notifyServer = true) => {
pttPressed = pressed;
setMicCaptureEnabled(pressed);
if (notifyServer && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
}
if (pressed) {
setAgentState(STATES.listening);
showStatus("Listening...");
} else {
if (agentState === STATES.listening) setAgentState(STATES.idle);
if (voiceConnected) showStatus("Hold anywhere to talk", 1800);
}
};
const beginPushToTalk = () => {
if (!voiceConnected || !peerConnection || !micStream) return;
if (pttPressed) return;
setPushToTalkState(true);
};
const endPushToTalk = () => {
if (!pttPressed) return;
setPushToTalkState(false);
};
// --- Reconnect ---
const clearReconnectTimer = () => {
if (reconnectTimer) { clearTimeout(reconnectTimer); reconnectTimer = null; }
};
const scheduleReconnect = (reason, delayMs = 1200) => {
if (!voiceDesired) return;
if (voiceConnected || connectingVoice) return;
if (reconnectTimer) return;
if (reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {
showStatus("Voice reconnect failed.");
return;
}
reconnectAttempts += 1;
showStatus(`${reason} Retrying (${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS})...`);
reconnectTimer = setTimeout(async () => {
reconnectTimer = null;
await connectVoiceChannel();
}, delayMs);
};
const stopVoiceChannel = async (statusText = "", clearDesired = false) => {
if (clearDesired) {
voiceDesired = false;
reconnectAttempts = 0;
clearReconnectTimer();
}
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
pendingRemoteCandidates = [];
rtcAnswerApplied = false;
setPushToTalkState(false, false);
if (peerConnection) {
peerConnection.ontrack = null;
peerConnection.onicecandidate = null;
peerConnection.onconnectionstatechange = null;
peerConnection.close();
peerConnection = null;
}
if (micStream) {
micStream.getTracks().forEach((track) => track.stop());
micStream = null;
}
if (remoteStream) {
remoteStream.getTracks().forEach((track) => track.stop());
remoteStream = null;
}
remoteAudio.srcObject = null;
setVoiceConnected(false);
lastRemoteAudioActivityS = 0;
visualizerSourceStream = null;
if (visualizerSourceNode) {
try { visualizerSourceNode.disconnect(); } catch (_) {}
visualizerSourceNode = null;
}
if (agentVisualizer) agentVisualizer.setAudioLevel(0);
if (agentVisualizer) agentVisualizer.setConnecting(false);
if (statusText) showStatus(statusText, 3000);
};
// --- WebRTC ---
const applyRtcAnswer = async (message) => {
if (!peerConnection) return;
const rawSdp = (message.sdp || "").toString();
if (!rawSdp.trim()) return;
const sdp = `${rawSdp
.replace(/\r\n/g, "\n")
.replace(/\r/g, "\n")
.split("\n")
.map((line) => line.trimEnd())
.join("\r\n")
.trim()}\r\n`;
try {
await peerConnection.setRemoteDescription({ type: message.rtcType || "answer", sdp });
rtcAnswerApplied = true;
const queued = pendingRemoteCandidates;
pendingRemoteCandidates = [];
for (const candidate of queued) {
try { await peerConnection.addIceCandidate(candidate); } catch (_) {}
}
reconnectAttempts = 0;
} catch (err) {
await stopVoiceChannel("Voice setup failed.");
scheduleReconnect("Failed to apply answer.");
appendLine("system", `RTC answer error: ${err}`, new Date().toISOString());
}
};
const applyRtcIceCandidate = async (message) => {
if (!peerConnection) return;
if (message.candidate == null) {
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
pendingRemoteCandidates.push(null);
return;
}
try { await peerConnection.addIceCandidate(null); } catch (_) {}
return;
}
try {
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
pendingRemoteCandidates.push(message.candidate);
return;
}
await peerConnection.addIceCandidate(message.candidate);
} catch (err) {
appendLine("system", `RTC ICE error: ${err}`, new Date().toISOString());
}
};
const connectVoiceChannel = async () => {
if (voiceConnected || peerConnection || connectingVoice) return;
if (!window.RTCPeerConnection || !navigator.mediaDevices?.getUserMedia) {
showStatus("Voice unavailable in this browser.", 4000);
return;
}
if (ws.readyState !== WebSocket.OPEN) {
showStatus("Connecting...");
return;
}
connectingVoice = true;
if (agentVisualizer) agentVisualizer.setConnecting(true);
showStatus("Connecting voice...");
try {
clearReconnectTimer();
rtcAnswerApplied = false;
pendingRemoteCandidates = [];
try {
micStream = await navigator.mediaDevices.getUserMedia({
audio: { channelCount: 1, sampleRate: 48000, sampleSize: 16, latency: 0,
echoCancellation: true, noiseSuppression: true, autoGainControl: false },
video: false,
});
} catch (_) {
micStream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
}
setMicCaptureEnabled(false);
peerConnection = new RTCPeerConnection({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
remoteStream = new MediaStream();
remoteAudio.srcObject = remoteStream;
peerConnection.ontrack = (event) => {
if (event.track.kind !== "audio") return;
remoteStream.addTrack(event.track);
remoteAudio.play().then(() => {
markRemoteAudioActivity();
ensureVisualizerAudioMeter();
}).catch(() => {});
};
peerConnection.onicecandidate = (event) => {
if (!event.candidate) { sendJson({ type: "rtc-ice-candidate", candidate: null }); return; }
sendJson({ type: "rtc-ice-candidate", candidate: event.candidate.toJSON() });
};
peerConnection.onconnectionstatechange = () => {
const state = peerConnection?.connectionState || "new";
if (state === "connected") {
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
clearReconnectTimer();
reconnectAttempts = 0;
setVoiceConnected(true);
showStatus("Hold anywhere to talk", 2500);
return;
}
if (state === "failed" || state === "closed") {
stopVoiceChannel(`Voice channel ${state}.`);
scheduleReconnect(`Voice channel ${state}.`);
return;
}
if (state === "disconnected") {
if (disconnectedTimer) clearTimeout(disconnectedTimer);
showStatus("Voice disconnected. Waiting...");
disconnectedTimer = setTimeout(() => {
if (peerConnection?.connectionState === "disconnected") {
stopVoiceChannel("Voice channel disconnected.");
scheduleReconnect("Voice channel disconnected.");
}
}, 8000);
return;
}
};
micStream.getAudioTracks().forEach((track) => { peerConnection.addTrack(track, micStream); });
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
sendJson({ type: "rtc-offer", sdp: offer.sdp, rtcType: offer.type });
} catch (err) {
await stopVoiceChannel("Voice setup failed.");
scheduleReconnect("Voice setup failed.");
appendLine("system", `Voice setup error: ${err}`, new Date().toISOString());
} finally {
connectingVoice = false;
// Stop throb if connection failed (success path clears it in setConnected)
if (!voiceConnected && agentVisualizer) agentVisualizer.setConnecting(false);
}
};
// --- First-tap bootstrap ---
const bootstrap = async () => {
if (appStarted) return;
appStarted = true;
// Unblock audio context (required by browsers before user gesture resolves)
remoteAudio.play().catch(() => {});
await ensureVisualizerAudioMeter();
sendJson({ type: "spawn" });
voiceDesired = true;
reconnectAttempts = 0;
await connectVoiceChannel();
};
if (resetSessionBtn) {
resetSessionBtn.addEventListener("click", async (event) => {
event.preventDefault();
event.stopPropagation();
if (!appStarted) {
await bootstrap();
}
if (ws.readyState === WebSocket.OPEN) {
sendJson({ type: "command", command: "reset" });
showStatus("Session reset.", 1500);
}
});
}
// --- Center-card PTT pointer handling ---
// Only touches that land on #agentIndicator / #agentViz (data-ptt="1") trigger PTT.
// We track active pointer IDs so multi-touch doesn't double-fire.
const activePointers = new Set();
document.addEventListener("pointerdown", async (event) => {
if (!(event.target instanceof Element) || !event.target.closest("[data-ptt='1']")) {
return;
}
activePointers.add(event.pointerId);
if (!appStarted) {
await bootstrap();
}
ensureVisualizerAudioMeter();
if (activePointers.size === 1) beginPushToTalk();
}, { passive: false });
document.addEventListener("pointerup", (event) => {
activePointers.delete(event.pointerId);
if (activePointers.size === 0) endPushToTalk();
}, { passive: false });
document.addEventListener("pointercancel", (event) => {
activePointers.delete(event.pointerId);
if (activePointers.size === 0) endPushToTalk();
}, { passive: false });
// --- WebSocket ---
ws.onopen = () => {
appendLine("system", "WebSocket connected.", new Date().toISOString());
showStatus("Tap anywhere to start", 0);
};
ws.onclose = async () => {
appendLine("system", "WebSocket disconnected.", new Date().toISOString());
await stopVoiceChannel("Disconnected.", true);
};
ws.onerror = () => appendLine("system", "WebSocket error.", new Date().toISOString());
ws.onmessage = async (event) => {
try {
const msg = JSON.parse(event.data);
if (msg.type === "rtc-answer") { await applyRtcAnswer(msg); return; }
if (msg.type === "rtc-ice-candidate") { await applyRtcIceCandidate(msg); return; }
if (msg.type === "rtc-state") {
const state = (msg.state || "").toString();
if (state === "connected") {
setVoiceConnected(true);
showStatus("Hold anywhere to talk", 2500);
}
return;
}
if (msg.type === "rtc-error") {
const text = (msg.message || "Unknown WebRTC error.").toString();
showStatus(`Voice error: ${text}`, 4000);
appendLine("system", `Voice error: ${text}`, new Date().toISOString());
await stopVoiceChannel("Voice channel error.");
scheduleReconnect("Voice channel error.");
return;
}
// Drive agent state indicator from server-sent agent-state events
if (msg.role === "agent-state") {
const newState = (msg.text || "").trim();
// Don't override listening state (user is holding PTT)
if (agentState !== STATES.listening && STATES[newState]) {
setAgentState(newState);
}
} else if (msg.role === "toast") {
try {
const t = JSON.parse(msg.text || "{}");
showToast(
t.kind || "text",
t.content || "",
t.title || "",
typeof t.duration_ms === "number" ? t.duration_ms : 6000,
);
} catch (_) {
showToast("text", msg.text || "", "", 6000);
}
} else if (msg.role === "choice") {
try {
const c = JSON.parse(msg.text || "{}");
showChoice(
c.request_id || "",
c.question || "",
Array.isArray(c.choices) ? c.choices : [],
c.title || "",
);
} catch (_) {
// Malformed choice payload — ignore.
}
} else if (msg.role === "wisper") {
// suppress wisper debug output
} else {
appendLine(msg.role || "system", msg.text || "", msg.timestamp || "");
}
} catch (_err) {
appendLine("system", event.data, new Date().toISOString());
}
};
</script>
</body>
</html>