1035 lines
39 KiB
HTML
1035 lines
39 KiB
HTML
<!doctype html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
|
||
<title>Nanobot</title>
|
||
<style>
|
||
* {
|
||
box-sizing: border-box;
|
||
user-select: none;
|
||
-webkit-user-select: none;
|
||
}
|
||
html, body {
|
||
margin: 0;
|
||
padding: 0;
|
||
width: 100%;
|
||
height: 100%;
|
||
overflow: hidden;
|
||
background: #1a1510;
|
||
touch-action: none;
|
||
}
|
||
#log {
|
||
position: fixed;
|
||
bottom: calc(5vh + 20px);
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
width: calc(90vw - 40px);
|
||
max-height: 22vh;
|
||
overflow-y: auto;
|
||
padding: 12px 14px;
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
line-height: 1.6;
|
||
color: rgba(255, 245, 235, 0.35);
|
||
white-space: pre-wrap;
|
||
word-break: break-word;
|
||
display: flex;
|
||
flex-direction: column-reverse;
|
||
border-radius: 10px;
|
||
background: transparent;
|
||
transition: color 0.3s, background 0.3s;
|
||
z-index: 10;
|
||
pointer-events: auto;
|
||
-webkit-mask-image: linear-gradient(to top, black 55%, transparent 100%);
|
||
mask-image: linear-gradient(to top, black 55%, transparent 100%);
|
||
}
|
||
#log:hover {
|
||
color: rgba(255, 245, 235, 0.92);
|
||
background: rgba(0, 0, 0, 0.18);
|
||
-webkit-mask-image: none;
|
||
mask-image: none;
|
||
}
|
||
#log * {
|
||
user-select: text;
|
||
-webkit-user-select: text;
|
||
}
|
||
#log-inner {
|
||
display: flex;
|
||
flex-direction: column;
|
||
}
|
||
.line {
|
||
margin-bottom: 4px;
|
||
}
|
||
.line.user {
|
||
color: rgba(255, 255, 255, 0.9);
|
||
}
|
||
.line.system {
|
||
color: rgba(255, 220, 180, 0.5);
|
||
}
|
||
.line.wisper {
|
||
color: rgba(255, 200, 160, 0.4);
|
||
}
|
||
#log:hover .line.user { color: rgba(255, 255, 255, 1.0); }
|
||
#log:hover .line.system { color: rgba(255, 220, 180, 0.85); }
|
||
#log:hover .line.wisper { color: rgba(255, 200, 160, 0.75); }
|
||
#voiceStatus {
|
||
position: fixed;
|
||
bottom: 12px;
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
background: rgba(0, 0, 0, 0.08);
|
||
color: #111111;
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
padding: 4px 12px;
|
||
border-radius: 99px;
|
||
pointer-events: none;
|
||
white-space: nowrap;
|
||
opacity: 0;
|
||
transition: opacity 0.2s;
|
||
}
|
||
#voiceStatus.visible {
|
||
opacity: 1;
|
||
}
|
||
|
||
/* Agent state indicator */
|
||
#agentIndicator {
|
||
position: fixed;
|
||
top: 0;
|
||
left: 0;
|
||
right: 0;
|
||
height: 100vh;
|
||
display: flex;
|
||
flex-direction: column;
|
||
align-items: center;
|
||
justify-content: center;
|
||
gap: 18px;
|
||
pointer-events: none;
|
||
opacity: 0;
|
||
transition: opacity 0.4s;
|
||
}
|
||
#agentIndicator.visible {
|
||
opacity: 1;
|
||
}
|
||
#agentViz {
|
||
width: 90vw;
|
||
height: 90vh;
|
||
aspect-ratio: unset;
|
||
border-radius: 24px;
|
||
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
|
||
overflow: hidden;
|
||
}
|
||
#agentViz canvas {
|
||
width: 100% !important;
|
||
height: 100% !important;
|
||
display: block;
|
||
}
|
||
#agentIndicator .label {
|
||
display: none;
|
||
}
|
||
#agentIndicator.idle {
|
||
color: #6b3a28;
|
||
}
|
||
#agentIndicator.listening {
|
||
color: #d4553f;
|
||
}
|
||
#agentIndicator.thinking {
|
||
color: #a0522d;
|
||
}
|
||
#agentIndicator.speaking {
|
||
color: #8b4513;
|
||
}
|
||
/* Deepen the background while PTT is active */
|
||
body.ptt-active {
|
||
background: radial-gradient(ellipse at 50% 44%, #f2caa8 0%, #e8b898 100%);
|
||
}
|
||
#controls {
|
||
position: fixed;
|
||
top: 12px;
|
||
right: 12px;
|
||
z-index: 20;
|
||
pointer-events: auto;
|
||
}
|
||
.control-btn {
|
||
border: none;
|
||
background: #ffffff;
|
||
color: #111111;
|
||
border-radius: 10px;
|
||
padding: 7px 12px;
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
letter-spacing: 0.04em;
|
||
cursor: pointer;
|
||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
|
||
}
|
||
.control-btn:active {
|
||
transform: translateY(1px);
|
||
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div id="controls" data-no-ptt="1">
|
||
<button id="resetSessionBtn" class="control-btn" type="button" data-no-ptt="1">Reset</button>
|
||
</div>
|
||
<div id="log"><div id="log-inner"></div></div>
|
||
<div id="agentIndicator">
|
||
<div id="agentViz"></div>
|
||
<span class="label"></span>
|
||
</div>
|
||
<div id="voiceStatus"></div>
|
||
<audio id="remoteAudio" autoplay playsinline hidden></audio>
|
||
|
||
<script src="/static/three.min.js"></script>
|
||
<script>
|
||
const logEl = document.getElementById("log-inner");
|
||
const voiceStatus = document.getElementById("voiceStatus");
|
||
const remoteAudio = document.getElementById("remoteAudio");
|
||
const agentIndicator = document.getElementById("agentIndicator");
|
||
const agentVizEl = document.getElementById("agentViz");
|
||
const agentLabel = agentIndicator.querySelector(".label");
|
||
const resetSessionBtn = document.getElementById("resetSessionBtn");
|
||
|
||
// --- Agent state indicator ---
|
||
const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
|
||
const STATE_COLORS = {
|
||
[STATES.idle]: 0xfff5eb,
|
||
[STATES.listening]: 0xfff5eb,
|
||
[STATES.thinking]: 0xfff5eb,
|
||
[STATES.speaking]: 0xfff5eb,
|
||
};
|
||
let agentState = STATES.idle;
|
||
let agentVisualizer = null;
|
||
let lastRemoteAudioActivityS = 0;
|
||
agentIndicator.classList.add("visible", "idle");
|
||
const setAgentState = (state) => {
|
||
agentState = state;
|
||
agentIndicator.classList.remove("listening", "thinking", "speaking", "idle");
|
||
agentIndicator.classList.add("visible", state);
|
||
agentLabel.textContent = state === STATES.idle ? "" : state;
|
||
if (agentVisualizer) agentVisualizer.setState(state);
|
||
};
|
||
|
||
// Creates a flat torus-path ring with a per-vertex theta attribute.
|
||
// The Y wave displacement is applied in the vertex shader so phase can be animated.
|
||
const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
|
||
const points = [];
|
||
for (let i = 0; i <= segments; i += 1) {
|
||
const theta = (i / segments) * Math.PI * 2;
|
||
points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
|
||
}
|
||
const curve = new THREE.CatmullRomCurve3(points, true);
|
||
const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
|
||
|
||
// Store normalised t (0→1 around the ring) for each vertex so the shader
|
||
// can reconstruct theta and apply the animated wave.
|
||
const posCount = geo.attributes.position.count;
|
||
const tAttr = new Float32Array(posCount);
|
||
// TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
|
||
const tubularSegments = segments;
|
||
const radialSegments = 12;
|
||
for (let tube = 0; tube <= tubularSegments; tube++) {
|
||
const tVal = tube / tubularSegments;
|
||
for (let rad = 0; rad <= radialSegments; rad++) {
|
||
tAttr[tube * (radialSegments + 1) + rad] = tVal;
|
||
}
|
||
}
|
||
geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
|
||
return geo;
|
||
};
|
||
|
||
const createAgentVisualizer = () => {
|
||
if (!window.THREE || !agentVizEl) return null;
|
||
|
||
const renderer = new THREE.WebGLRenderer({
|
||
antialias: true,
|
||
alpha: false,
|
||
powerPreference: "high-performance",
|
||
});
|
||
renderer.setPixelRatio(1);
|
||
renderer.setClearColor(0xa09b96, 1);
|
||
agentVizEl.innerHTML = "";
|
||
agentVizEl.appendChild(renderer.domElement);
|
||
|
||
const scene = new THREE.Scene();
|
||
const orthoSize = 2.0;
|
||
const camera = new THREE.OrthographicCamera(-orthoSize, orthoSize, orthoSize, -orthoSize, 0.1, 40);
|
||
const lookAt = new THREE.Vector3(0, 0, 0);
|
||
const speakingSideView = new THREE.Vector3(3.45, 0, 0);
|
||
const topView = new THREE.Vector3(0, 3.25, 0.001);
|
||
camera.position.copy(topView);
|
||
camera.lookAt(lookAt);
|
||
|
||
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
|
||
scene.add(ambient);
|
||
|
||
const geometry = createParaboloidRing();
|
||
const geometry2 = createParaboloidRing();
|
||
|
||
// Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
|
||
// so phase can be animated each frame without rebuilding geometry.
|
||
const ringVertexShader = `
|
||
attribute float aRingT;
|
||
uniform float uPhase;
|
||
uniform float uAmplitude;
|
||
varying float vWorldX;
|
||
void main() {
|
||
float theta = aRingT * 6.28318530718;
|
||
vec3 pos = position;
|
||
pos.y += uAmplitude * cos(5.0 * theta + uPhase);
|
||
vec4 wp = modelMatrix * vec4(pos, 1.0);
|
||
vWorldX = wp.x;
|
||
gl_Position = projectionMatrix * viewMatrix * wp;
|
||
}
|
||
`;
|
||
const ringFragmentShader = `
|
||
uniform vec3 uColor;
|
||
uniform float uFade;
|
||
uniform float uFadeOffset;
|
||
varying float vWorldX;
|
||
void main() {
|
||
float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
|
||
gl_FragColor = vec4(uColor, alpha);
|
||
}
|
||
`;
|
||
|
||
const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
|
||
uniforms: {
|
||
uColor: { value: new THREE.Color(0xfff5eb) },
|
||
uFade: { value: 0.0 },
|
||
uFadeOffset: { value: 0.0 },
|
||
uPhase: { value: phase },
|
||
uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius
|
||
},
|
||
vertexShader: ringVertexShader,
|
||
fragmentShader: ringFragmentShader,
|
||
transparent: true,
|
||
side: THREE.DoubleSide,
|
||
depthWrite: false,
|
||
});
|
||
|
||
const ringMaterial = makeRingMaterial(0.0);
|
||
const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset
|
||
|
||
const ring = new THREE.Mesh(geometry, ringMaterial);
|
||
const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
|
||
|
||
const group = new THREE.Group();
|
||
group.add(ring);
|
||
group.add(ring2);
|
||
group.rotation.y = Math.PI * 0.18;
|
||
scene.add(group);
|
||
|
||
let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect
|
||
|
||
const applyFrustum = () => {
|
||
const width = Math.max(2, agentVizEl.clientWidth);
|
||
const height = Math.max(2, agentVizEl.clientHeight);
|
||
const aspect = width / height;
|
||
const s = orthoSize * orthoScale;
|
||
if (aspect >= 1) {
|
||
camera.left = -s * aspect;
|
||
camera.right = s * aspect;
|
||
camera.top = s;
|
||
camera.bottom = -s;
|
||
} else {
|
||
camera.left = -s;
|
||
camera.right = s;
|
||
camera.top = s / aspect;
|
||
camera.bottom = -s / aspect;
|
||
}
|
||
camera.updateProjectionMatrix();
|
||
};
|
||
|
||
const resize = () => {
|
||
const width = Math.max(2, agentVizEl.clientWidth);
|
||
const height = Math.max(2, agentVizEl.clientHeight);
|
||
renderer.setSize(width, height, false);
|
||
applyFrustum();
|
||
};
|
||
resize();
|
||
window.addEventListener("resize", resize);
|
||
|
||
let currentState = STATES.idle;
|
||
let currentAudioLevel = 0;
|
||
let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed
|
||
let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings
|
||
let deformScale = 1.0;
|
||
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
|
||
let spinSpeed = 0.0;
|
||
// Card background colour lerp: 0 = idle coral, 1 = dark listening
|
||
let cardColorT = 0.0;
|
||
let connectedT = 0.0; // 0 = gray (disconnected), 1 = coral (connected)
|
||
const CARD_GRAY_RGB = [160, 155, 150]; // disconnected gray
|
||
const CARD_IDLE_RGB = [212, 85, 63]; // #d4553f
|
||
const CARD_LISTEN_RGB = [120, 40, 28]; // dark desaturated coral
|
||
|
||
const setStateColor = (_state) => { /* no-op: MeshBasicMaterial, colour is fixed */ };
|
||
|
||
let prevCardRGB = "";
|
||
let targetConnected = 0.0;
|
||
let isConnecting = false;
|
||
|
||
const renderFrame = (now = 0) => {
|
||
const dt = Math.min((now - (renderFrame._lastNow || now)) / 1000, 0.1);
|
||
renderFrame._lastNow = now;
|
||
|
||
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
|
||
const t = dt * 60;
|
||
const lerpAudio = 1 - Math.pow(0.85, t); // fast
|
||
const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1
|
||
const lerpDeform = 1 - Math.pow(0.88, t);
|
||
const lerpSpin = 1 - Math.pow(0.86, t);
|
||
const lerpRing = 1 - Math.pow(0.90, t);
|
||
const lerpAmp = 1 - Math.pow(0.88, t);
|
||
|
||
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
|
||
smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
|
||
const speakingActive = currentState === STATES.speaking;
|
||
|
||
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
|
||
if (speakingActive) {
|
||
targetDeformScale = 2.05 + (smoothAudioLevel * 2.9);
|
||
} else if (currentState === STATES.thinking) {
|
||
targetDeformScale = 0.55 + (smoothAudioLevel * 0.35);
|
||
}
|
||
deformScale += (targetDeformScale - deformScale) * lerpDeform;
|
||
group.scale.y = deformScale;
|
||
|
||
// Thickness throb when thinking: pulse xz scale at 1 s rate.
|
||
const targetRingScale = currentState === STATES.thinking
|
||
? 1.0 + 0.18 * (0.5 + 0.5 * Math.sin(now * (Math.PI * 2 / 1000)))
|
||
: 1.0;
|
||
ringScale += (targetRingScale - ringScale) * lerpRing;
|
||
group.scale.x = ringScale;
|
||
group.scale.z = ringScale;
|
||
|
||
const targetSpinSpeed = speakingActive
|
||
? (0.012 + smoothAudioLevel * 0.105)
|
||
: (currentState === STATES.thinking ? 0.006 : 0.0022);
|
||
spinSpeed += (targetSpinSpeed - spinSpeed) * lerpSpin;
|
||
group.rotation.y += spinSpeed * t;
|
||
|
||
// Only move camera (and call lookAt) when in speaking state.
|
||
if (speakingActive || camera.position.distanceToSquared(topView) > 0.0001) {
|
||
const lerpCamera = 1 - Math.pow(0.96, t);
|
||
const targetCameraPosition = speakingActive ? speakingSideView : topView;
|
||
camera.position.lerp(targetCameraPosition, lerpCamera);
|
||
camera.lookAt(lookAt);
|
||
}
|
||
|
||
// Smoothly fade out the back half of the ring as the camera moves into side view.
|
||
// sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
|
||
const camLen = camera.position.length();
|
||
const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
|
||
const lerpSide = 1 - Math.pow(0.88, t);
|
||
ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide;
|
||
ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide;
|
||
ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide;
|
||
ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
|
||
|
||
// Phase animation + reactive amplitude while speaking.
|
||
const baseAmp = 0.06 * 1.1;
|
||
if (speakingActive) {
|
||
const breathe = Math.sin(now * 0.0018); // ~3.5 s period
|
||
const base = 1.8 + smoothAudioLevel * 4.0;
|
||
const ring1Speed = (base + breathe * 0.6) * dt;
|
||
const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
|
||
ringMaterial.uniforms.uPhase.value += ring1Speed;
|
||
ringMaterial2.uniforms.uPhase.value += ring2Speed;
|
||
|
||
// Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
|
||
const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5);
|
||
const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
|
||
ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
|
||
ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
|
||
} else {
|
||
// Settle phase and amplitude back to rest values.
|
||
ringMaterial2.uniforms.uPhase.value +=
|
||
(Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
|
||
ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
|
||
ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
|
||
}
|
||
|
||
// Zoom in when in side view by shrinking the ortho frustum.
|
||
const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side
|
||
if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
|
||
orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
|
||
applyFrustum();
|
||
}
|
||
|
||
// Card background: gray → coral as connection is established, then darken when listening.
|
||
// While connecting, throb the gray base with a slow sine pulse.
|
||
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
|
||
const throb = isConnecting && targetConnected === 0
|
||
? 0.22 * (0.5 - 0.5 * Math.sin(now * (Math.PI * 2 / 1000))) // 0–0.22 darkness pulse, 1 s period
|
||
: 0.0;
|
||
const baseR = Math.round(CARD_GRAY_RGB[0] + (CARD_IDLE_RGB[0] - CARD_GRAY_RGB[0]) * connectedT - throb * CARD_GRAY_RGB[0]);
|
||
const baseG = Math.round(CARD_GRAY_RGB[1] + (CARD_IDLE_RGB[1] - CARD_GRAY_RGB[1]) * connectedT - throb * CARD_GRAY_RGB[1]);
|
||
const baseB = Math.round(CARD_GRAY_RGB[2] + (CARD_IDLE_RGB[2] - CARD_GRAY_RGB[2]) * connectedT - throb * CARD_GRAY_RGB[2]);
|
||
const targetCardT = currentState === STATES.listening ? 1.0 : 0.0;
|
||
const cardBase = targetCardT > cardColorT ? 0.05 : 0.7;
|
||
cardColorT += (targetCardT - cardColorT) * (1 - Math.pow(cardBase, t));
|
||
const r = Math.min(255, Math.round(baseR + (CARD_LISTEN_RGB[0] - baseR) * cardColorT));
|
||
const g = Math.min(255, Math.round(baseG + (CARD_LISTEN_RGB[1] - baseG) * cardColorT));
|
||
const b = Math.min(255, Math.round(baseB + (CARD_LISTEN_RGB[2] - baseB) * cardColorT));
|
||
const cardRGB = `${r},${g},${b}`;
|
||
if (cardRGB !== prevCardRGB) {
|
||
renderer.setClearColor((r << 16) | (g << 8) | b, 1);
|
||
prevCardRGB = cardRGB;
|
||
}
|
||
|
||
renderer.render(scene, camera);
|
||
requestAnimationFrame(renderFrame);
|
||
};
|
||
|
||
setStateColor(currentState);
|
||
requestAnimationFrame(renderFrame);
|
||
|
||
return {
|
||
setAudioLevel: (level) => {
|
||
currentAudioLevel = Math.max(0, Math.min(1, Number(level) || 0));
|
||
},
|
||
setState: (state) => {
|
||
if (!STATES[state]) return;
|
||
currentState = state;
|
||
setStateColor(state);
|
||
},
|
||
setConnected: (connected) => {
|
||
targetConnected = connected ? 1.0 : 0.0;
|
||
if (connected) isConnecting = false;
|
||
},
|
||
setConnecting: (connecting) => {
|
||
isConnecting = !!connecting;
|
||
},
|
||
};
|
||
};
|
||
|
||
agentVisualizer = createAgentVisualizer();
|
||
if (agentVisualizer) agentVisualizer.setState(agentState);
|
||
const markRemoteAudioActivity = () => {
|
||
lastRemoteAudioActivityS = performance.now() / 1000;
|
||
};
|
||
remoteAudio.addEventListener("playing", markRemoteAudioActivity);
|
||
remoteAudio.addEventListener("timeupdate", markRemoteAudioActivity);
|
||
remoteAudio.addEventListener("canplay", markRemoteAudioActivity);
|
||
remoteAudio.addEventListener("seeked", markRemoteAudioActivity);
|
||
|
||
const wsProto = location.protocol === "https:" ? "wss" : "ws";
|
||
const ws = new WebSocket(`${wsProto}://${location.host}/ws/chat`);
|
||
|
||
let peerConnection = null;
|
||
let micStream = null;
|
||
let remoteStream = null;
|
||
let voiceConnected = false;
|
||
let disconnectedTimer = null;
|
||
let reconnectTimer = null;
|
||
let reconnectAttempts = 0;
|
||
let voiceDesired = false;
|
||
let connectingVoice = false;
|
||
let pttPressed = false;
|
||
let rtcAnswerApplied = false;
|
||
let pendingRemoteCandidates = [];
|
||
let appStarted = false;
|
||
const MAX_RECONNECT_ATTEMPTS = 2;
|
||
const AudioContextCtor = window.AudioContext || window.webkitAudioContext;
|
||
let visualizerAudioContext = null;
|
||
let visualizerSourceNode = null;
|
||
let visualizerSourceStream = null;
|
||
let visualizerAnalyser = null;
|
||
let visualizerWaveform = null;
|
||
let visualizerMeterRunning = false;
|
||
|
||
// --- Status overlay ---
|
||
let statusTimer = null;
|
||
const showStatus = (text, persistMs = 0) => {
|
||
voiceStatus.textContent = text;
|
||
voiceStatus.classList.add("visible");
|
||
if (statusTimer) { clearTimeout(statusTimer); statusTimer = null; }
|
||
if (persistMs > 0) {
|
||
statusTimer = setTimeout(() => {
|
||
voiceStatus.classList.remove("visible");
|
||
statusTimer = null;
|
||
}, persistMs);
|
||
}
|
||
};
|
||
|
||
const startVisualizerMeter = () => {
|
||
if (visualizerMeterRunning) return;
|
||
visualizerMeterRunning = true;
|
||
|
||
const sampleLevel = () => {
|
||
let level = 0;
|
||
if (visualizerAnalyser && visualizerWaveform) {
|
||
visualizerAnalyser.getByteTimeDomainData(visualizerWaveform);
|
||
let sum = 0;
|
||
for (let idx = 0; idx < visualizerWaveform.length; idx += 1) {
|
||
const value = (visualizerWaveform[idx] - 128) / 128;
|
||
sum += value * value;
|
||
}
|
||
const rms = Math.sqrt(sum / visualizerWaveform.length);
|
||
level = Math.min(1, rms * 4.8);
|
||
}
|
||
|
||
if (agentVisualizer) agentVisualizer.setAudioLevel(level);
|
||
requestAnimationFrame(sampleLevel);
|
||
};
|
||
|
||
requestAnimationFrame(sampleLevel);
|
||
};
|
||
|
||
const ensureVisualizerAudioMeter = async () => {
|
||
if (!agentVisualizer || !AudioContextCtor) return;
|
||
|
||
if (!visualizerAudioContext) {
|
||
visualizerAudioContext = new AudioContextCtor();
|
||
}
|
||
if (visualizerAudioContext.state === "suspended") {
|
||
try { await visualizerAudioContext.resume(); } catch (_) {}
|
||
}
|
||
|
||
if (!visualizerAnalyser) {
|
||
visualizerAnalyser = visualizerAudioContext.createAnalyser();
|
||
visualizerAnalyser.fftSize = 512;
|
||
visualizerAnalyser.smoothingTimeConstant = 0.84;
|
||
visualizerWaveform = new Uint8Array(visualizerAnalyser.fftSize);
|
||
}
|
||
|
||
if (
|
||
remoteStream
|
||
&& remoteStream.getAudioTracks
|
||
&& remoteStream.getAudioTracks().length > 0
|
||
&& visualizerSourceStream !== remoteStream
|
||
) {
|
||
if (visualizerSourceNode) {
|
||
try { visualizerSourceNode.disconnect(); } catch (_) {}
|
||
visualizerSourceNode = null;
|
||
}
|
||
try {
|
||
visualizerSourceNode = visualizerAudioContext.createMediaStreamSource(remoteStream);
|
||
visualizerSourceNode.connect(visualizerAnalyser);
|
||
visualizerSourceStream = remoteStream;
|
||
} catch (_err) {
|
||
visualizerSourceNode = null;
|
||
visualizerSourceStream = null;
|
||
}
|
||
}
|
||
|
||
startVisualizerMeter();
|
||
};
|
||
|
||
// --- Log ---
|
||
const MAX_LOG_LINES = 250;
|
||
const MAX_PENDING_LOG_LINES = 500;
|
||
const pendingLogItems = [];
|
||
let logFlushScheduled = false;
|
||
|
||
const flushPendingLogItems = () => {
|
||
logFlushScheduled = false;
|
||
if (pendingLogItems.length === 0) return;
|
||
|
||
const fragment = document.createDocumentFragment();
|
||
for (const item of pendingLogItems.splice(0)) {
|
||
const role = item.role || "system";
|
||
const line = document.createElement("div");
|
||
line.className = `line ${role}`;
|
||
const time = item.timestamp ? new Date(item.timestamp).toLocaleTimeString() : "";
|
||
const normalizedRole = role.toString().trim().toLowerCase();
|
||
const rawText = (item.text || "").toString();
|
||
if (normalizedRole === "nanobot") {
|
||
const cleaned = rawText.replace(/^(?:nanobot|napbot)\b\s*[:>\-]?\s*/i, "");
|
||
line.textContent = `[${time}] ${cleaned}`;
|
||
} else {
|
||
line.textContent = `[${time}] ${role}: ${rawText}`;
|
||
}
|
||
fragment.appendChild(line);
|
||
}
|
||
|
||
logEl.appendChild(fragment);
|
||
while (logEl.childElementCount > MAX_LOG_LINES) {
|
||
if (!logEl.firstElementChild) break;
|
||
logEl.removeChild(logEl.firstElementChild);
|
||
}
|
||
};
|
||
|
||
const scheduleLogFlush = () => {
|
||
if (logFlushScheduled) return;
|
||
logFlushScheduled = true;
|
||
requestAnimationFrame(flushPendingLogItems);
|
||
};
|
||
|
||
const appendLine = (role, text, timestamp) => {
|
||
pendingLogItems.push({ role, text, timestamp });
|
||
if (pendingLogItems.length > MAX_PENDING_LOG_LINES) {
|
||
pendingLogItems.splice(0, pendingLogItems.length - MAX_PENDING_LOG_LINES);
|
||
}
|
||
scheduleLogFlush();
|
||
};
|
||
|
||
const sendJson = (payload) => {
|
||
if (ws.readyState !== WebSocket.OPEN) return;
|
||
ws.send(JSON.stringify(payload));
|
||
};
|
||
|
||
const sendUserMessage = (text) => {
|
||
const message = (text || "").toString().trim();
|
||
if (!message) return false;
|
||
if (ws.readyState !== WebSocket.OPEN) {
|
||
showStatus("WebSocket disconnected.", 2000);
|
||
return false;
|
||
}
|
||
sendJson({ type: "user-message", text: message });
|
||
return true;
|
||
};
|
||
|
||
// --- Voice state ---
|
||
const setVoiceConnected = (connected) => {
|
||
voiceConnected = connected;
|
||
if (agentVisualizer) agentVisualizer.setConnected(connected);
|
||
};
|
||
|
||
const setMicCaptureEnabled = (enabled) => {
|
||
if (!micStream) return;
|
||
micStream.getAudioTracks().forEach((track) => { track.enabled = enabled; });
|
||
};
|
||
|
||
const setPushToTalkState = (pressed, notifyServer = true) => {
|
||
pttPressed = pressed;
|
||
document.body.classList.toggle("ptt-active", pressed);
|
||
setMicCaptureEnabled(pressed);
|
||
if (notifyServer && ws.readyState === WebSocket.OPEN) {
|
||
ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
|
||
}
|
||
if (pressed) {
|
||
setAgentState(STATES.listening);
|
||
showStatus("Listening...");
|
||
} else {
|
||
if (agentState === STATES.listening) setAgentState(STATES.idle);
|
||
if (voiceConnected) showStatus("Hold anywhere to talk", 1800);
|
||
}
|
||
};
|
||
|
||
const beginPushToTalk = () => {
|
||
if (!voiceConnected || !peerConnection || !micStream) return;
|
||
if (pttPressed) return;
|
||
setPushToTalkState(true);
|
||
};
|
||
|
||
const endPushToTalk = () => {
|
||
if (!pttPressed) return;
|
||
setPushToTalkState(false);
|
||
};
|
||
|
||
// --- Reconnect ---
|
||
const clearReconnectTimer = () => {
|
||
if (reconnectTimer) { clearTimeout(reconnectTimer); reconnectTimer = null; }
|
||
};
|
||
|
||
const scheduleReconnect = (reason, delayMs = 1200) => {
|
||
if (!voiceDesired) return;
|
||
if (voiceConnected || connectingVoice) return;
|
||
if (reconnectTimer) return;
|
||
if (reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {
|
||
showStatus("Voice reconnect failed.");
|
||
return;
|
||
}
|
||
reconnectAttempts += 1;
|
||
showStatus(`${reason} Retrying (${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS})...`);
|
||
reconnectTimer = setTimeout(async () => {
|
||
reconnectTimer = null;
|
||
await connectVoiceChannel();
|
||
}, delayMs);
|
||
};
|
||
|
||
const stopVoiceChannel = async (statusText = "", clearDesired = false) => {
|
||
if (clearDesired) {
|
||
voiceDesired = false;
|
||
reconnectAttempts = 0;
|
||
clearReconnectTimer();
|
||
}
|
||
|
||
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
|
||
|
||
pendingRemoteCandidates = [];
|
||
rtcAnswerApplied = false;
|
||
setPushToTalkState(false, false);
|
||
|
||
if (peerConnection) {
|
||
peerConnection.ontrack = null;
|
||
peerConnection.onicecandidate = null;
|
||
peerConnection.onconnectionstatechange = null;
|
||
peerConnection.close();
|
||
peerConnection = null;
|
||
}
|
||
|
||
if (micStream) {
|
||
micStream.getTracks().forEach((track) => track.stop());
|
||
micStream = null;
|
||
}
|
||
|
||
if (remoteStream) {
|
||
remoteStream.getTracks().forEach((track) => track.stop());
|
||
remoteStream = null;
|
||
}
|
||
|
||
remoteAudio.srcObject = null;
|
||
setVoiceConnected(false);
|
||
lastRemoteAudioActivityS = 0;
|
||
visualizerSourceStream = null;
|
||
if (visualizerSourceNode) {
|
||
try { visualizerSourceNode.disconnect(); } catch (_) {}
|
||
visualizerSourceNode = null;
|
||
}
|
||
if (agentVisualizer) agentVisualizer.setAudioLevel(0);
|
||
if (agentVisualizer) agentVisualizer.setConnecting(false);
|
||
if (statusText) showStatus(statusText, 3000);
|
||
};
|
||
|
||
// --- WebRTC ---
|
||
const applyRtcAnswer = async (message) => {
|
||
if (!peerConnection) return;
|
||
const rawSdp = (message.sdp || "").toString();
|
||
if (!rawSdp.trim()) return;
|
||
const sdp = `${rawSdp
|
||
.replace(/\r\n/g, "\n")
|
||
.replace(/\r/g, "\n")
|
||
.split("\n")
|
||
.map((line) => line.trimEnd())
|
||
.join("\r\n")
|
||
.trim()}\r\n`;
|
||
try {
|
||
await peerConnection.setRemoteDescription({ type: message.rtcType || "answer", sdp });
|
||
rtcAnswerApplied = true;
|
||
const queued = pendingRemoteCandidates;
|
||
pendingRemoteCandidates = [];
|
||
for (const candidate of queued) {
|
||
try { await peerConnection.addIceCandidate(candidate); } catch (_) {}
|
||
}
|
||
reconnectAttempts = 0;
|
||
} catch (err) {
|
||
await stopVoiceChannel("Voice setup failed.");
|
||
scheduleReconnect("Failed to apply answer.");
|
||
appendLine("system", `RTC answer error: ${err}`, new Date().toISOString());
|
||
}
|
||
};
|
||
|
||
const applyRtcIceCandidate = async (message) => {
|
||
if (!peerConnection) return;
|
||
if (message.candidate == null) {
|
||
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
|
||
pendingRemoteCandidates.push(null);
|
||
return;
|
||
}
|
||
try { await peerConnection.addIceCandidate(null); } catch (_) {}
|
||
return;
|
||
}
|
||
try {
|
||
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
|
||
pendingRemoteCandidates.push(message.candidate);
|
||
return;
|
||
}
|
||
await peerConnection.addIceCandidate(message.candidate);
|
||
} catch (err) {
|
||
appendLine("system", `RTC ICE error: ${err}`, new Date().toISOString());
|
||
}
|
||
};
|
||
|
||
const connectVoiceChannel = async () => {
|
||
if (voiceConnected || peerConnection || connectingVoice) return;
|
||
if (!window.RTCPeerConnection || !navigator.mediaDevices?.getUserMedia) {
|
||
showStatus("Voice unavailable in this browser.", 4000);
|
||
return;
|
||
}
|
||
if (ws.readyState !== WebSocket.OPEN) {
|
||
showStatus("Connecting...");
|
||
return;
|
||
}
|
||
|
||
connectingVoice = true;
|
||
if (agentVisualizer) agentVisualizer.setConnecting(true);
|
||
showStatus("Connecting voice...");
|
||
try {
|
||
clearReconnectTimer();
|
||
rtcAnswerApplied = false;
|
||
pendingRemoteCandidates = [];
|
||
|
||
try {
|
||
micStream = await navigator.mediaDevices.getUserMedia({
|
||
audio: { channelCount: 1, sampleRate: 48000, sampleSize: 16, latency: 0,
|
||
echoCancellation: true, noiseSuppression: true, autoGainControl: false },
|
||
video: false,
|
||
});
|
||
} catch (_) {
|
||
micStream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
|
||
}
|
||
setMicCaptureEnabled(false);
|
||
|
||
peerConnection = new RTCPeerConnection({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
|
||
remoteStream = new MediaStream();
|
||
remoteAudio.srcObject = remoteStream;
|
||
|
||
peerConnection.ontrack = (event) => {
|
||
if (event.track.kind !== "audio") return;
|
||
remoteStream.addTrack(event.track);
|
||
remoteAudio.play().then(() => {
|
||
markRemoteAudioActivity();
|
||
ensureVisualizerAudioMeter();
|
||
}).catch(() => {});
|
||
};
|
||
|
||
peerConnection.onicecandidate = (event) => {
|
||
if (!event.candidate) { sendJson({ type: "rtc-ice-candidate", candidate: null }); return; }
|
||
sendJson({ type: "rtc-ice-candidate", candidate: event.candidate.toJSON() });
|
||
};
|
||
|
||
peerConnection.onconnectionstatechange = () => {
|
||
const state = peerConnection?.connectionState || "new";
|
||
if (state === "connected") {
|
||
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
|
||
clearReconnectTimer();
|
||
reconnectAttempts = 0;
|
||
setVoiceConnected(true);
|
||
showStatus("Hold anywhere to talk", 2500);
|
||
return;
|
||
}
|
||
if (state === "failed" || state === "closed") {
|
||
stopVoiceChannel(`Voice channel ${state}.`);
|
||
scheduleReconnect(`Voice channel ${state}.`);
|
||
return;
|
||
}
|
||
if (state === "disconnected") {
|
||
if (disconnectedTimer) clearTimeout(disconnectedTimer);
|
||
showStatus("Voice disconnected. Waiting...");
|
||
disconnectedTimer = setTimeout(() => {
|
||
if (peerConnection?.connectionState === "disconnected") {
|
||
stopVoiceChannel("Voice channel disconnected.");
|
||
scheduleReconnect("Voice channel disconnected.");
|
||
}
|
||
}, 8000);
|
||
return;
|
||
}
|
||
};
|
||
|
||
micStream.getAudioTracks().forEach((track) => { peerConnection.addTrack(track, micStream); });
|
||
|
||
const offer = await peerConnection.createOffer();
|
||
await peerConnection.setLocalDescription(offer);
|
||
sendJson({ type: "rtc-offer", sdp: offer.sdp, rtcType: offer.type });
|
||
} catch (err) {
|
||
await stopVoiceChannel("Voice setup failed.");
|
||
scheduleReconnect("Voice setup failed.");
|
||
appendLine("system", `Voice setup error: ${err}`, new Date().toISOString());
|
||
} finally {
|
||
connectingVoice = false;
|
||
// Stop throb if connection failed (success path clears it in setConnected)
|
||
if (!voiceConnected && agentVisualizer) agentVisualizer.setConnecting(false);
|
||
}
|
||
};
|
||
|
||
// --- First-tap bootstrap ---
|
||
const bootstrap = async () => {
|
||
if (appStarted) return;
|
||
appStarted = true;
|
||
// Unblock audio context (required by browsers before user gesture resolves)
|
||
remoteAudio.play().catch(() => {});
|
||
await ensureVisualizerAudioMeter();
|
||
sendJson({ type: "spawn" });
|
||
voiceDesired = true;
|
||
reconnectAttempts = 0;
|
||
await connectVoiceChannel();
|
||
};
|
||
|
||
if (resetSessionBtn) {
|
||
resetSessionBtn.addEventListener("click", async (event) => {
|
||
event.preventDefault();
|
||
event.stopPropagation();
|
||
if (!appStarted) {
|
||
await bootstrap();
|
||
}
|
||
if (sendUserMessage("/reset")) {
|
||
showStatus("Reset command sent.", 1500);
|
||
}
|
||
});
|
||
}
|
||
|
||
// --- Whole-screen PTT pointer handling ---
|
||
// We track active pointer IDs so multi-touch doesn't double-fire.
|
||
const activePointers = new Set();
|
||
|
||
document.addEventListener("pointerdown", async (event) => {
|
||
if (event.target instanceof Element && event.target.closest("[data-no-ptt='1']")) {
|
||
return;
|
||
}
|
||
if (!appStarted) {
|
||
await bootstrap();
|
||
return;
|
||
}
|
||
ensureVisualizerAudioMeter();
|
||
activePointers.add(event.pointerId);
|
||
if (activePointers.size === 1) beginPushToTalk();
|
||
}, { passive: false });
|
||
|
||
document.addEventListener("pointerup", (event) => {
|
||
activePointers.delete(event.pointerId);
|
||
if (activePointers.size === 0) endPushToTalk();
|
||
}, { passive: false });
|
||
|
||
document.addEventListener("pointercancel", (event) => {
|
||
activePointers.delete(event.pointerId);
|
||
if (activePointers.size === 0) endPushToTalk();
|
||
}, { passive: false });
|
||
|
||
// --- WebSocket ---
|
||
ws.onopen = () => {
|
||
appendLine("system", "WebSocket connected.", new Date().toISOString());
|
||
showStatus("Tap anywhere to start", 0);
|
||
};
|
||
ws.onclose = async () => {
|
||
appendLine("system", "WebSocket disconnected.", new Date().toISOString());
|
||
await stopVoiceChannel("Disconnected.", true);
|
||
};
|
||
ws.onerror = () => appendLine("system", "WebSocket error.", new Date().toISOString());
|
||
ws.onmessage = async (event) => {
|
||
try {
|
||
const msg = JSON.parse(event.data);
|
||
|
||
if (msg.type === "rtc-answer") { await applyRtcAnswer(msg); return; }
|
||
if (msg.type === "rtc-ice-candidate") { await applyRtcIceCandidate(msg); return; }
|
||
if (msg.type === "rtc-state") {
|
||
const state = (msg.state || "").toString();
|
||
if (state === "connected") {
|
||
setVoiceConnected(true);
|
||
showStatus("Hold anywhere to talk", 2500);
|
||
}
|
||
return;
|
||
}
|
||
if (msg.type === "rtc-error") {
|
||
const text = (msg.message || "Unknown WebRTC error.").toString();
|
||
showStatus(`Voice error: ${text}`, 4000);
|
||
appendLine("system", `Voice error: ${text}`, new Date().toISOString());
|
||
await stopVoiceChannel("Voice channel error.");
|
||
scheduleReconnect("Voice channel error.");
|
||
return;
|
||
}
|
||
|
||
// Drive agent state indicator from server-sent agent-state events
|
||
if (msg.role === "agent-state") {
|
||
const newState = (msg.text || "").trim();
|
||
// Don't override listening state (user is holding PTT)
|
||
if (agentState !== STATES.listening && STATES[newState]) {
|
||
setAgentState(newState);
|
||
}
|
||
} else if (msg.role === "wisper") {
|
||
// suppress wisper debug output
|
||
} else {
|
||
appendLine(msg.role || "system", msg.text || "", msg.timestamp || "");
|
||
}
|
||
|
||
} catch (_err) {
|
||
appendLine("system", event.data, new Date().toISOString());
|
||
}
|
||
};
|
||
</script>
|
||
</body>
|
||
</html>
|