1378 lines
50 KiB
HTML
1378 lines
50 KiB
HTML
<!doctype html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
|
||
<title>Nanobot</title>
|
||
<style>
|
||
* {
|
||
box-sizing: border-box;
|
||
user-select: none;
|
||
-webkit-user-select: none;
|
||
}
|
||
html, body {
|
||
margin: 0;
|
||
padding: 0;
|
||
width: 100%;
|
||
height: 100%;
|
||
overflow: hidden;
|
||
background: #ffffff;
|
||
touch-action: none;
|
||
}
|
||
#log {
|
||
position: fixed;
|
||
bottom: calc(5vh + 20px);
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
width: calc(90vw - 40px);
|
||
max-height: 22vh;
|
||
overflow-y: auto;
|
||
padding: 12px 14px;
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
line-height: 1.6;
|
||
color: rgba(30, 20, 10, 0.35);
|
||
white-space: pre-wrap;
|
||
word-break: break-word;
|
||
display: flex;
|
||
flex-direction: column-reverse;
|
||
border-radius: 10px;
|
||
background: transparent;
|
||
transition: color 0.3s, background 0.3s;
|
||
z-index: 10;
|
||
pointer-events: auto;
|
||
-webkit-mask-image: linear-gradient(to top, black 55%, transparent 100%);
|
||
mask-image: linear-gradient(to top, black 55%, transparent 100%);
|
||
}
|
||
#log:hover {
|
||
color: rgba(30, 20, 10, 0.85);
|
||
background: rgba(0, 0, 0, 0.06);
|
||
-webkit-mask-image: none;
|
||
mask-image: none;
|
||
}
|
||
#log * {
|
||
user-select: text;
|
||
-webkit-user-select: text;
|
||
}
|
||
#log-inner {
|
||
display: flex;
|
||
flex-direction: column;
|
||
}
|
||
.line {
|
||
margin-bottom: 4px;
|
||
}
|
||
.line.user {
|
||
color: rgba(20, 10, 0, 0.85);
|
||
}
|
||
.line.system {
|
||
color: rgba(120, 80, 40, 0.5);
|
||
}
|
||
.line.wisper {
|
||
color: rgba(120, 80, 40, 0.4);
|
||
}
|
||
#log:hover .line.user { color: rgba(20, 10, 0, 1.0); }
|
||
#log:hover .line.system { color: rgba(120, 80, 40, 0.85); }
|
||
#log:hover .line.wisper { color: rgba(120, 80, 40, 0.75); }
|
||
#voiceStatus {
|
||
position: fixed;
|
||
bottom: 12px;
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
background: rgba(0, 0, 0, 0.08);
|
||
color: #111111;
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
padding: 4px 12px;
|
||
border-radius: 99px;
|
||
pointer-events: none;
|
||
white-space: nowrap;
|
||
opacity: 0;
|
||
transition: opacity 0.2s;
|
||
}
|
||
#voiceStatus.visible {
|
||
opacity: 1;
|
||
}
|
||
|
||
/* Agent state indicator */
|
||
#agentIndicator {
|
||
position: fixed;
|
||
top: 0;
|
||
left: 0;
|
||
right: 0;
|
||
height: 100vh;
|
||
display: flex;
|
||
flex-direction: column;
|
||
align-items: center;
|
||
justify-content: center;
|
||
gap: 18px;
|
||
pointer-events: none;
|
||
opacity: 0;
|
||
transition: opacity 0.4s;
|
||
}
|
||
#agentIndicator.visible {
|
||
opacity: 1;
|
||
}
|
||
#agentViz {
|
||
width: 90vw;
|
||
height: 90vh;
|
||
aspect-ratio: unset;
|
||
border-radius: 24px;
|
||
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
|
||
overflow: hidden;
|
||
pointer-events: auto;
|
||
cursor: pointer;
|
||
}
|
||
#agentViz canvas {
|
||
width: 100% !important;
|
||
height: 100% !important;
|
||
display: block;
|
||
pointer-events: auto;
|
||
}
|
||
#agentIndicator.idle {
|
||
color: #6b3a28;
|
||
}
|
||
#agentIndicator.listening {
|
||
color: #d4553f;
|
||
}
|
||
#agentIndicator.thinking {
|
||
color: #a0522d;
|
||
}
|
||
#agentIndicator.speaking {
|
||
color: #8b4513;
|
||
}
|
||
#controls {
|
||
position: fixed;
|
||
top: 12px;
|
||
right: 12px;
|
||
z-index: 20;
|
||
pointer-events: auto;
|
||
}
|
||
.control-btn {
|
||
border: none;
|
||
background: #ffffff;
|
||
color: #111111;
|
||
border-radius: 10px;
|
||
padding: 7px 12px;
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
letter-spacing: 0.04em;
|
||
cursor: pointer;
|
||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
|
||
}
|
||
.control-btn:active {
|
||
transform: translateY(1px);
|
||
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
|
||
}
|
||
|
||
/* Toast notifications */
|
||
#toast-container {
|
||
position: fixed;
|
||
top: 16px;
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
width: min(92vw, 480px);
|
||
max-height: calc(100vh - 32px);
|
||
overflow-y: auto;
|
||
overflow-x: hidden;
|
||
display: flex;
|
||
flex-direction: column;
|
||
gap: 10px;
|
||
z-index: 100;
|
||
pointer-events: auto;
|
||
/* Hide scrollbar until hovered */
|
||
scrollbar-width: thin;
|
||
scrollbar-color: rgba(255,200,140,0.25) transparent;
|
||
padding-bottom: 4px;
|
||
}
|
||
#toast-container::-webkit-scrollbar {
|
||
width: 4px;
|
||
}
|
||
#toast-container::-webkit-scrollbar-track {
|
||
background: transparent;
|
||
}
|
||
#toast-container::-webkit-scrollbar-thumb {
|
||
background: rgba(255,200,140,0.25);
|
||
border-radius: 2px;
|
||
}
|
||
.toast {
|
||
pointer-events: auto;
|
||
background: rgba(28, 22, 16, 0.92);
|
||
border: 1px solid rgba(255, 200, 140, 0.18);
|
||
border-radius: 12px;
|
||
padding: 14px 16px 14px 16px;
|
||
display: flex;
|
||
flex-direction: column;
|
||
gap: 8px;
|
||
box-shadow: 0 4px 24px rgba(0, 0, 0, 0.45);
|
||
animation: toast-in 0.22s cubic-bezier(0.34, 1.4, 0.64, 1) both;
|
||
position: relative;
|
||
overflow: hidden;
|
||
max-width: 100%;
|
||
}
|
||
.toast.dismissing {
|
||
animation: toast-out 0.18s ease-in both;
|
||
}
|
||
@keyframes toast-in {
|
||
from { opacity: 0; transform: translateY(-14px) scale(0.96); }
|
||
to { opacity: 1; transform: translateY(0) scale(1); }
|
||
}
|
||
@keyframes toast-out {
|
||
from { opacity: 1; transform: translateY(0) scale(1); }
|
||
to { opacity: 0; transform: translateY(-10px) scale(0.96); }
|
||
}
|
||
.toast-progress {
|
||
position: absolute;
|
||
bottom: 0;
|
||
left: 0;
|
||
height: 2px;
|
||
background: rgba(255, 190, 120, 0.55);
|
||
width: 100%;
|
||
transform-origin: left;
|
||
animation: toast-progress-shrink linear both;
|
||
}
|
||
@keyframes toast-progress-shrink {
|
||
from { transform: scaleX(1); }
|
||
to { transform: scaleX(0); }
|
||
}
|
||
.toast-header {
|
||
display: flex;
|
||
justify-content: space-between;
|
||
align-items: flex-start;
|
||
gap: 10px;
|
||
}
|
||
.toast-title {
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 11px;
|
||
font-weight: 600;
|
||
letter-spacing: 0.07em;
|
||
color: rgba(255, 200, 140, 0.85);
|
||
text-transform: uppercase;
|
||
flex: 1;
|
||
min-width: 0;
|
||
overflow: hidden;
|
||
text-overflow: ellipsis;
|
||
white-space: nowrap;
|
||
}
|
||
.toast-close {
|
||
background: none;
|
||
border: none;
|
||
color: rgba(255, 245, 235, 0.35);
|
||
font-size: 16px;
|
||
line-height: 1;
|
||
cursor: pointer;
|
||
padding: 0 2px;
|
||
flex-shrink: 0;
|
||
transition: color 0.15s;
|
||
}
|
||
.toast-close:hover {
|
||
color: rgba(255, 245, 235, 0.85);
|
||
}
|
||
.toast-body {
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
line-height: 1.65;
|
||
color: rgba(255, 245, 235, 0.82);
|
||
white-space: normal;
|
||
word-break: break-word;
|
||
user-select: text;
|
||
-webkit-user-select: text;
|
||
}
|
||
.toast-body p { margin: 0 0 6px; }
|
||
.toast-body p:last-child { margin-bottom: 0; }
|
||
.toast-body h1, .toast-body h2, .toast-body h3,
|
||
.toast-body h4, .toast-body h5, .toast-body h6 {
|
||
font-size: 13px;
|
||
font-weight: 700;
|
||
color: rgba(255, 200, 140, 0.95);
|
||
margin: 8px 0 4px;
|
||
}
|
||
.toast-body ul, .toast-body ol {
|
||
margin: 4px 0 6px;
|
||
padding-left: 18px;
|
||
}
|
||
.toast-body li { margin-bottom: 2px; }
|
||
.toast-body code {
|
||
background: rgba(255,255,255,0.07);
|
||
border-radius: 4px;
|
||
padding: 1px 5px;
|
||
font-size: 11px;
|
||
}
|
||
.toast-body pre {
|
||
background: rgba(0,0,0,0.35);
|
||
border-radius: 6px;
|
||
padding: 8px 10px;
|
||
overflow-x: auto;
|
||
margin: 6px 0;
|
||
}
|
||
.toast-body pre code {
|
||
background: none;
|
||
padding: 0;
|
||
font-size: 11px;
|
||
}
|
||
.toast-body table {
|
||
border-collapse: collapse;
|
||
width: 100%;
|
||
font-size: 11px;
|
||
margin: 6px 0;
|
||
}
|
||
.toast-body th, .toast-body td {
|
||
border: 1px solid rgba(255,200,140,0.2);
|
||
padding: 4px 8px;
|
||
text-align: left;
|
||
}
|
||
.toast-body th {
|
||
background: rgba(255,200,140,0.08);
|
||
color: rgba(255,200,140,0.9);
|
||
font-weight: 600;
|
||
}
|
||
.toast-body a {
|
||
color: rgba(255,200,140,0.85);
|
||
text-decoration: underline;
|
||
}
|
||
.toast-body blockquote {
|
||
border-left: 3px solid rgba(255,200,140,0.3);
|
||
margin: 6px 0;
|
||
padding-left: 10px;
|
||
color: rgba(255,245,235,0.55);
|
||
}
|
||
.toast-body hr {
|
||
border: none;
|
||
border-top: 1px solid rgba(255,200,140,0.15);
|
||
margin: 8px 0;
|
||
}
|
||
.toast-choices {
|
||
display: flex;
|
||
flex-wrap: wrap;
|
||
gap: 8px;
|
||
margin-top: 4px;
|
||
}
|
||
.toast-choice-btn {
|
||
background: rgba(255, 200, 140, 0.12);
|
||
border: 1px solid rgba(255, 200, 140, 0.35);
|
||
border-radius: 8px;
|
||
color: rgba(255, 245, 235, 0.90);
|
||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||
font-size: 12px;
|
||
padding: 6px 14px;
|
||
cursor: pointer;
|
||
transition: background 0.15s, border-color 0.15s;
|
||
flex: 1 1 auto;
|
||
text-align: center;
|
||
}
|
||
.toast-choice-btn:hover {
|
||
background: rgba(255, 200, 140, 0.25);
|
||
border-color: rgba(255, 200, 140, 0.65);
|
||
}
|
||
.toast-choice-btn:active {
|
||
background: rgba(255, 200, 140, 0.38);
|
||
}
|
||
.toast-choice-btn:disabled {
|
||
opacity: 0.4;
|
||
cursor: default;
|
||
}
|
||
.toast-image {
|
||
width: 100%;
|
||
max-height: 320px;
|
||
object-fit: contain;
|
||
border-radius: 8px;
|
||
display: block;
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div id="controls">
|
||
<button id="resetSessionBtn" class="control-btn" type="button">Reset</button>
|
||
</div>
|
||
<div id="log"><div id="log-inner"></div></div>
|
||
<div id="agentIndicator" data-ptt="1">
|
||
<div id="agentViz" data-ptt="1"></div>
|
||
</div>
|
||
<div id="voiceStatus"></div>
|
||
<div id="toast-container"></div>
|
||
<audio id="remoteAudio" autoplay playsinline hidden></audio>
|
||
|
||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||
<script src="/static/three.min.js"></script>
|
||
<script>
|
||
const logEl = document.getElementById("log-inner");
|
||
const voiceStatus = document.getElementById("voiceStatus");
|
||
const remoteAudio = document.getElementById("remoteAudio");
|
||
const agentIndicator = document.getElementById("agentIndicator");
|
||
const agentVizEl = document.getElementById("agentViz");
|
||
const resetSessionBtn = document.getElementById("resetSessionBtn");
|
||
const toastContainer = document.getElementById("toast-container");
|
||
|
||
// --- Toast notifications ---
|
||
const showToast = (kind, content, title, durationMs) => {
|
||
const toast = document.createElement("div");
|
||
toast.className = "toast";
|
||
|
||
// Header row (title + close button)
|
||
const header = document.createElement("div");
|
||
header.className = "toast-header";
|
||
|
||
if (title) {
|
||
const titleEl = document.createElement("span");
|
||
titleEl.className = "toast-title";
|
||
titleEl.textContent = title;
|
||
header.appendChild(titleEl);
|
||
}
|
||
|
||
const closeBtn = document.createElement("button");
|
||
closeBtn.className = "toast-close";
|
||
closeBtn.setAttribute("type", "button");
|
||
closeBtn.setAttribute("aria-label", "Dismiss");
|
||
closeBtn.textContent = "×";
|
||
header.appendChild(closeBtn);
|
||
|
||
toast.appendChild(header);
|
||
|
||
// Body
|
||
if (kind === "image") {
|
||
const img = document.createElement("img");
|
||
img.className = "toast-image";
|
||
img.src = content;
|
||
img.alt = title || "image";
|
||
toast.appendChild(img);
|
||
} else {
|
||
const body = document.createElement("div");
|
||
body.className = "toast-body";
|
||
// If content looks like HTML, inject directly; otherwise render as markdown.
|
||
const looksLikeHtml = /^\s*<[a-zA-Z]/.test(content);
|
||
if (looksLikeHtml) {
|
||
body.innerHTML = content;
|
||
} else if (typeof marked !== "undefined") {
|
||
body.innerHTML = marked.parse(content);
|
||
} else {
|
||
body.textContent = content;
|
||
}
|
||
toast.appendChild(body);
|
||
}
|
||
|
||
// dismiss must be declared before close button references it
|
||
const dismiss = () => {
|
||
toast.classList.add("dismissing");
|
||
const fallback = setTimeout(() => toast.remove(), 400);
|
||
toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
|
||
};
|
||
|
||
closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
|
||
toastContainer.prepend(toast);
|
||
toastContainer.scrollTop = 0;
|
||
};
|
||
|
||
// --- Choice toasts (ask_user tool) ---
|
||
const showChoice = (requestId, question, choices, title) => {
|
||
const toast = document.createElement("div");
|
||
toast.className = "toast";
|
||
|
||
// Header
|
||
const header = document.createElement("div");
|
||
header.className = "toast-header";
|
||
if (title) {
|
||
const titleEl = document.createElement("span");
|
||
titleEl.className = "toast-title";
|
||
titleEl.textContent = title;
|
||
header.appendChild(titleEl);
|
||
}
|
||
const closeBtn = document.createElement("button");
|
||
closeBtn.className = "toast-close";
|
||
closeBtn.setAttribute("type", "button");
|
||
closeBtn.setAttribute("aria-label", "Dismiss");
|
||
closeBtn.textContent = "×";
|
||
header.appendChild(closeBtn);
|
||
toast.appendChild(header);
|
||
|
||
// Question body
|
||
const body = document.createElement("div");
|
||
body.className = "toast-body";
|
||
body.textContent = question;
|
||
toast.appendChild(body);
|
||
|
||
// Choice buttons
|
||
const choicesEl = document.createElement("div");
|
||
choicesEl.className = "toast-choices";
|
||
|
||
const dismiss = () => {
|
||
toast.classList.add("dismissing");
|
||
const fallback = setTimeout(() => toast.remove(), 400);
|
||
toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
|
||
};
|
||
|
||
choices.forEach((label) => {
|
||
const btn = document.createElement("button");
|
||
btn.className = "toast-choice-btn";
|
||
btn.setAttribute("type", "button");
|
||
btn.textContent = label;
|
||
btn.addEventListener("click", (e) => {
|
||
e.stopPropagation();
|
||
// Disable all buttons to prevent double-send
|
||
choicesEl.querySelectorAll(".toast-choice-btn").forEach((b) => { b.disabled = true; });
|
||
sendJson({ type: "ui-response", request_id: requestId, value: label });
|
||
dismiss();
|
||
});
|
||
choicesEl.appendChild(btn);
|
||
});
|
||
toast.appendChild(choicesEl);
|
||
|
||
closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
|
||
toastContainer.prepend(toast);
|
||
toastContainer.scrollTop = 0;
|
||
};
|
||
|
||
// --- Agent state indicator ---
|
||
const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
|
||
let agentState = STATES.idle;
|
||
let agentVisualizer = null;
|
||
let lastRemoteAudioActivityS = 0;
|
||
agentIndicator.classList.add("visible", "idle");
|
||
const setAgentState = (state) => {
|
||
agentState = state;
|
||
agentIndicator.classList.remove("listening", "thinking", "speaking", "idle");
|
||
agentIndicator.classList.add("visible", state);
|
||
if (agentVisualizer) agentVisualizer.setState(state);
|
||
};
|
||
|
||
// Creates a flat torus-path ring with a per-vertex theta attribute.
|
||
// The Y wave displacement is applied in the vertex shader so phase can be animated.
|
||
const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
|
||
const points = [];
|
||
for (let i = 0; i <= segments; i += 1) {
|
||
const theta = (i / segments) * Math.PI * 2;
|
||
points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
|
||
}
|
||
const curve = new THREE.CatmullRomCurve3(points, true);
|
||
const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
|
||
|
||
// Store normalised t (0→1 around the ring) for each vertex so the shader
|
||
// can reconstruct theta and apply the animated wave.
|
||
const posCount = geo.attributes.position.count;
|
||
const tAttr = new Float32Array(posCount);
|
||
// TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
|
||
const tubularSegments = segments;
|
||
const radialSegments = 12;
|
||
for (let tube = 0; tube <= tubularSegments; tube++) {
|
||
const tVal = tube / tubularSegments;
|
||
for (let rad = 0; rad <= radialSegments; rad++) {
|
||
tAttr[tube * (radialSegments + 1) + rad] = tVal;
|
||
}
|
||
}
|
||
geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
|
||
return geo;
|
||
};
|
||
|
||
const createAgentVisualizer = () => {
|
||
if (!window.THREE || !agentVizEl) return null;
|
||
|
||
const renderer = new THREE.WebGLRenderer({
|
||
antialias: true,
|
||
alpha: false,
|
||
powerPreference: "high-performance",
|
||
});
|
||
renderer.setPixelRatio(1);
|
||
renderer.setClearColor(0xe8e4e0, 1);
|
||
renderer.domElement.dataset.ptt = "1";
|
||
agentVizEl.innerHTML = "";
|
||
agentVizEl.appendChild(renderer.domElement);
|
||
|
||
const scene = new THREE.Scene();
|
||
const orthoSize = 2.0;
|
||
const camera = new THREE.OrthographicCamera(-orthoSize, orthoSize, orthoSize, -orthoSize, 0.1, 40);
|
||
const lookAt = new THREE.Vector3(0, 0, 0);
|
||
const speakingSideView = new THREE.Vector3(3.45, 0, 0);
|
||
const topView = new THREE.Vector3(0, 3.25, 0.001);
|
||
camera.position.copy(topView);
|
||
camera.lookAt(lookAt);
|
||
|
||
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
|
||
scene.add(ambient);
|
||
|
||
const geometry = createParaboloidRing();
|
||
const geometry2 = createParaboloidRing();
|
||
|
||
// Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
|
||
// so phase can be animated each frame without rebuilding geometry.
|
||
const ringVertexShader = `
|
||
attribute float aRingT;
|
||
uniform float uPhase;
|
||
uniform float uAmplitude;
|
||
varying float vWorldX;
|
||
void main() {
|
||
float theta = aRingT * 6.28318530718;
|
||
vec3 pos = position;
|
||
pos.y += uAmplitude * cos(5.0 * theta + uPhase);
|
||
vec4 wp = modelMatrix * vec4(pos, 1.0);
|
||
vWorldX = wp.x;
|
||
gl_Position = projectionMatrix * viewMatrix * wp;
|
||
}
|
||
`;
|
||
const ringFragmentShader = `
|
||
uniform vec3 uColor;
|
||
uniform float uFade;
|
||
uniform float uFadeOffset;
|
||
varying float vWorldX;
|
||
void main() {
|
||
float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
|
||
gl_FragColor = vec4(uColor, alpha);
|
||
}
|
||
`;
|
||
|
||
const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
|
||
uniforms: {
|
||
uColor: { value: new THREE.Color(0xfff5eb) },
|
||
uFade: { value: 0.0 },
|
||
uFadeOffset: { value: 0.0 },
|
||
uPhase: { value: phase },
|
||
uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius
|
||
},
|
||
vertexShader: ringVertexShader,
|
||
fragmentShader: ringFragmentShader,
|
||
transparent: true,
|
||
side: THREE.DoubleSide,
|
||
depthWrite: false,
|
||
});
|
||
|
||
const ringMaterial = makeRingMaterial(0.0);
|
||
const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset
|
||
|
||
const ring = new THREE.Mesh(geometry, ringMaterial);
|
||
const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
|
||
|
||
const group = new THREE.Group();
|
||
group.add(ring);
|
||
group.add(ring2);
|
||
group.rotation.y = Math.PI * 0.18;
|
||
scene.add(group);
|
||
|
||
let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect
|
||
|
||
const applyFrustum = () => {
|
||
const width = Math.max(2, agentVizEl.clientWidth);
|
||
const height = Math.max(2, agentVizEl.clientHeight);
|
||
const aspect = width / height;
|
||
const s = orthoSize * orthoScale;
|
||
if (aspect >= 1) {
|
||
camera.left = -s * aspect;
|
||
camera.right = s * aspect;
|
||
camera.top = s;
|
||
camera.bottom = -s;
|
||
} else {
|
||
camera.left = -s;
|
||
camera.right = s;
|
||
camera.top = s / aspect;
|
||
camera.bottom = -s / aspect;
|
||
}
|
||
camera.updateProjectionMatrix();
|
||
};
|
||
|
||
const resize = () => {
|
||
const width = Math.max(2, agentVizEl.clientWidth);
|
||
const height = Math.max(2, agentVizEl.clientHeight);
|
||
renderer.setSize(width, height, false);
|
||
applyFrustum();
|
||
};
|
||
resize();
|
||
window.addEventListener("resize", resize);
|
||
|
||
let currentState = STATES.idle;
|
||
let currentAudioLevel = 0;
|
||
let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed
|
||
let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings
|
||
let deformScale = 1.0;
|
||
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
|
||
let spinSpeed = 0.0;
|
||
// Card background colour lerp: 0 = idle coral, 1 = dark coral (PTT/listening)
|
||
let cardColorT = 0.0;
|
||
let connectedT = 0.0; // 0 = gray (disconnected), 1 = coral (connected)
|
||
const CARD_GRAY_RGB = [232, 228, 224]; // #e8e4e0 — disconnected light warm gray
|
||
const CARD_IDLE_RGB = [212, 85, 63]; // #d4553f — connected idle coral
|
||
const CARD_LISTEN_RGB = [120, 40, 28]; // #782c1c — PTT active dark coral
|
||
|
||
let prevCardRGB = "";
|
||
let targetConnected = 0.0;
|
||
let isConnecting = false;
|
||
|
||
const renderFrame = (now = 0) => {
|
||
const dt = Math.min((now - (renderFrame._lastNow || now)) / 1000, 0.1);
|
||
renderFrame._lastNow = now;
|
||
|
||
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
|
||
const t = dt * 60;
|
||
const lerpAudio = 1 - Math.pow(0.85, t); // fast
|
||
const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1
|
||
const lerpDeform = 1 - Math.pow(0.88, t);
|
||
const lerpSpin = 1 - Math.pow(0.86, t);
|
||
const lerpRing = 1 - Math.pow(0.90, t);
|
||
const lerpAmp = 1 - Math.pow(0.88, t);
|
||
|
||
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
|
||
smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
|
||
const speakingActive = currentState === STATES.speaking;
|
||
|
||
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
|
||
if (speakingActive) {
|
||
targetDeformScale = 2.05 + (smoothAudioLevel * 2.9);
|
||
} else if (currentState === STATES.thinking) {
|
||
targetDeformScale = 0.55 + (smoothAudioLevel * 0.35);
|
||
}
|
||
deformScale += (targetDeformScale - deformScale) * lerpDeform;
|
||
group.scale.y = deformScale;
|
||
|
||
// Thickness throb when thinking: pulse xz scale at 1 s rate.
|
||
const targetRingScale = currentState === STATES.thinking
|
||
? 1.0 + 0.18 * (0.5 + 0.5 * Math.sin(now * (Math.PI * 2 / 1000)))
|
||
: 1.0;
|
||
ringScale += (targetRingScale - ringScale) * lerpRing;
|
||
group.scale.x = ringScale;
|
||
group.scale.z = ringScale;
|
||
|
||
const targetSpinSpeed = speakingActive
|
||
? (0.012 + smoothAudioLevel * 0.105)
|
||
: (currentState === STATES.thinking ? 0.006 : 0.0022);
|
||
spinSpeed += (targetSpinSpeed - spinSpeed) * lerpSpin;
|
||
group.rotation.y += spinSpeed * t;
|
||
|
||
// Only move camera (and call lookAt) when in speaking state.
|
||
if (speakingActive || camera.position.distanceToSquared(topView) > 0.0001) {
|
||
const lerpCamera = 1 - Math.pow(0.96, t);
|
||
const targetCameraPosition = speakingActive ? speakingSideView : topView;
|
||
camera.position.lerp(targetCameraPosition, lerpCamera);
|
||
camera.lookAt(lookAt);
|
||
}
|
||
|
||
// Smoothly fade out the back half of the ring as the camera moves into side view.
|
||
// sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
|
||
const camLen = camera.position.length();
|
||
const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
|
||
const lerpSide = 1 - Math.pow(0.88, t);
|
||
ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide;
|
||
ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide;
|
||
ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide;
|
||
ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
|
||
|
||
// Phase animation + reactive amplitude while speaking.
|
||
const baseAmp = 0.06 * 1.1;
|
||
if (speakingActive) {
|
||
const breathe = Math.sin(now * 0.0018); // ~3.5 s period
|
||
const base = 1.8 + smoothAudioLevel * 4.0;
|
||
const ring1Speed = (base + breathe * 0.6) * dt;
|
||
const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
|
||
ringMaterial.uniforms.uPhase.value += ring1Speed;
|
||
ringMaterial2.uniforms.uPhase.value += ring2Speed;
|
||
|
||
// Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
|
||
const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5);
|
||
const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
|
||
ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
|
||
ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
|
||
} else {
|
||
// Settle phase and amplitude back to rest values.
|
||
ringMaterial2.uniforms.uPhase.value +=
|
||
(Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
|
||
ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
|
||
ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
|
||
}
|
||
|
||
// Zoom in when in side view by shrinking the ortho frustum.
|
||
const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side
|
||
if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
|
||
orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
|
||
applyFrustum();
|
||
}
|
||
|
||
// Card background: gray → coral as connection is established, then darken when listening.
|
||
// While connecting, throb the gray base with a slow sine pulse.
|
||
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
|
||
const throb = isConnecting && targetConnected === 0
|
||
? 0.22 * (0.5 - 0.5 * Math.sin(now * (Math.PI * 2 / 1000))) // 0–0.22 darkness pulse, 1 s period
|
||
: 0.0;
|
||
const baseR = Math.round(CARD_GRAY_RGB[0] + (CARD_IDLE_RGB[0] - CARD_GRAY_RGB[0]) * connectedT - throb * CARD_GRAY_RGB[0]);
|
||
const baseG = Math.round(CARD_GRAY_RGB[1] + (CARD_IDLE_RGB[1] - CARD_GRAY_RGB[1]) * connectedT - throb * CARD_GRAY_RGB[1]);
|
||
const baseB = Math.round(CARD_GRAY_RGB[2] + (CARD_IDLE_RGB[2] - CARD_GRAY_RGB[2]) * connectedT - throb * CARD_GRAY_RGB[2]);
|
||
const targetCardT = currentState === STATES.listening ? 1.0 : 0.0;
|
||
const cardBase = targetCardT > cardColorT ? 0.05 : 0.7;
|
||
cardColorT += (targetCardT - cardColorT) * (1 - Math.pow(cardBase, t));
|
||
const r = Math.min(255, Math.round(baseR + (CARD_LISTEN_RGB[0] - baseR) * cardColorT));
|
||
const g = Math.min(255, Math.round(baseG + (CARD_LISTEN_RGB[1] - baseG) * cardColorT));
|
||
const b = Math.min(255, Math.round(baseB + (CARD_LISTEN_RGB[2] - baseB) * cardColorT));
|
||
const cardRGB = `${r},${g},${b}`;
|
||
if (cardRGB !== prevCardRGB) {
|
||
renderer.setClearColor((r << 16) | (g << 8) | b, 1);
|
||
prevCardRGB = cardRGB;
|
||
}
|
||
|
||
renderer.render(scene, camera);
|
||
requestAnimationFrame(renderFrame);
|
||
};
|
||
|
||
requestAnimationFrame(renderFrame);
|
||
|
||
return {
|
||
setAudioLevel: (level) => {
|
||
currentAudioLevel = Math.max(0, Math.min(1, Number(level) || 0));
|
||
},
|
||
setState: (state) => {
|
||
if (!STATES[state]) return;
|
||
currentState = state;
|
||
},
|
||
setConnected: (connected) => {
|
||
targetConnected = connected ? 1.0 : 0.0;
|
||
if (connected) isConnecting = false;
|
||
},
|
||
setConnecting: (connecting) => {
|
||
isConnecting = !!connecting;
|
||
},
|
||
};
|
||
};
|
||
|
||
agentVisualizer = createAgentVisualizer();
|
||
if (agentVisualizer) agentVisualizer.setState(agentState);
|
||
const markRemoteAudioActivity = () => {
|
||
lastRemoteAudioActivityS = performance.now() / 1000;
|
||
};
|
||
remoteAudio.addEventListener("playing", markRemoteAudioActivity);
|
||
remoteAudio.addEventListener("timeupdate", markRemoteAudioActivity);
|
||
remoteAudio.addEventListener("canplay", markRemoteAudioActivity);
|
||
remoteAudio.addEventListener("seeked", markRemoteAudioActivity);
|
||
|
||
const wsProto = location.protocol === "https:" ? "wss" : "ws";
|
||
const ws = new WebSocket(`${wsProto}://${location.host}/ws/chat`);
|
||
|
||
let peerConnection = null;
|
||
let micStream = null;
|
||
let remoteStream = null;
|
||
let voiceConnected = false;
|
||
let disconnectedTimer = null;
|
||
let reconnectTimer = null;
|
||
let reconnectAttempts = 0;
|
||
let voiceDesired = false;
|
||
let connectingVoice = false;
|
||
let pttPressed = false;
|
||
let rtcAnswerApplied = false;
|
||
let pendingRemoteCandidates = [];
|
||
let appStarted = false;
|
||
const MAX_RECONNECT_ATTEMPTS = 2;
|
||
const AudioContextCtor = window.AudioContext || window.webkitAudioContext;
|
||
let visualizerAudioContext = null;
|
||
let visualizerSourceNode = null;
|
||
let visualizerSourceStream = null;
|
||
let visualizerAnalyser = null;
|
||
let visualizerWaveform = null;
|
||
let visualizerMeterRunning = false;
|
||
|
||
// --- Status overlay ---
|
||
let statusTimer = null;
|
||
const showStatus = (text, persistMs = 0) => {
|
||
voiceStatus.textContent = text;
|
||
voiceStatus.classList.add("visible");
|
||
if (statusTimer) { clearTimeout(statusTimer); statusTimer = null; }
|
||
if (persistMs > 0) {
|
||
statusTimer = setTimeout(() => {
|
||
voiceStatus.classList.remove("visible");
|
||
statusTimer = null;
|
||
}, persistMs);
|
||
}
|
||
};
|
||
|
||
const startVisualizerMeter = () => {
|
||
if (visualizerMeterRunning) return;
|
||
visualizerMeterRunning = true;
|
||
|
||
const sampleLevel = () => {
|
||
let level = 0;
|
||
if (visualizerAnalyser && visualizerWaveform) {
|
||
visualizerAnalyser.getByteTimeDomainData(visualizerWaveform);
|
||
let sum = 0;
|
||
for (let idx = 0; idx < visualizerWaveform.length; idx += 1) {
|
||
const value = (visualizerWaveform[idx] - 128) / 128;
|
||
sum += value * value;
|
||
}
|
||
const rms = Math.sqrt(sum / visualizerWaveform.length);
|
||
level = Math.min(1, rms * 4.8);
|
||
}
|
||
|
||
if (agentVisualizer) agentVisualizer.setAudioLevel(level);
|
||
requestAnimationFrame(sampleLevel);
|
||
};
|
||
|
||
requestAnimationFrame(sampleLevel);
|
||
};
|
||
|
||
const ensureVisualizerAudioMeter = async () => {
|
||
if (!agentVisualizer || !AudioContextCtor) return;
|
||
|
||
if (!visualizerAudioContext) {
|
||
visualizerAudioContext = new AudioContextCtor();
|
||
}
|
||
if (visualizerAudioContext.state === "suspended") {
|
||
try { await visualizerAudioContext.resume(); } catch (_) {}
|
||
}
|
||
|
||
if (!visualizerAnalyser) {
|
||
visualizerAnalyser = visualizerAudioContext.createAnalyser();
|
||
visualizerAnalyser.fftSize = 512;
|
||
visualizerAnalyser.smoothingTimeConstant = 0.84;
|
||
visualizerWaveform = new Uint8Array(visualizerAnalyser.fftSize);
|
||
}
|
||
|
||
if (
|
||
remoteStream
|
||
&& remoteStream.getAudioTracks
|
||
&& remoteStream.getAudioTracks().length > 0
|
||
&& visualizerSourceStream !== remoteStream
|
||
) {
|
||
if (visualizerSourceNode) {
|
||
try { visualizerSourceNode.disconnect(); } catch (_) {}
|
||
visualizerSourceNode = null;
|
||
}
|
||
try {
|
||
visualizerSourceNode = visualizerAudioContext.createMediaStreamSource(remoteStream);
|
||
visualizerSourceNode.connect(visualizerAnalyser);
|
||
visualizerSourceStream = remoteStream;
|
||
} catch (_err) {
|
||
visualizerSourceNode = null;
|
||
visualizerSourceStream = null;
|
||
}
|
||
}
|
||
|
||
startVisualizerMeter();
|
||
};
|
||
|
||
// --- Log ---
|
||
const MAX_LOG_LINES = 250;
|
||
const MAX_PENDING_LOG_LINES = 500;
|
||
const pendingLogItems = [];
|
||
let logFlushScheduled = false;
|
||
|
||
const flushPendingLogItems = () => {
|
||
logFlushScheduled = false;
|
||
if (pendingLogItems.length === 0) return;
|
||
|
||
const fragment = document.createDocumentFragment();
|
||
for (const item of pendingLogItems.splice(0)) {
|
||
const role = item.role || "system";
|
||
const line = document.createElement("div");
|
||
line.className = `line ${role}`;
|
||
const time = item.timestamp ? new Date(item.timestamp).toLocaleTimeString() : "";
|
||
const normalizedRole = role.toString().trim().toLowerCase();
|
||
const rawText = (item.text || "").toString();
|
||
if (normalizedRole === "nanobot") {
|
||
const cleaned = rawText.replace(/^(?:nanobot|napbot)\b\s*[:>\-]?\s*/i, "");
|
||
line.textContent = `[${time}] ${cleaned}`;
|
||
} else {
|
||
line.textContent = `[${time}] ${role}: ${rawText}`;
|
||
}
|
||
fragment.appendChild(line);
|
||
}
|
||
|
||
logEl.appendChild(fragment);
|
||
while (logEl.childElementCount > MAX_LOG_LINES) {
|
||
if (!logEl.firstElementChild) break;
|
||
logEl.removeChild(logEl.firstElementChild);
|
||
}
|
||
};
|
||
|
||
const scheduleLogFlush = () => {
|
||
if (logFlushScheduled) return;
|
||
logFlushScheduled = true;
|
||
requestAnimationFrame(flushPendingLogItems);
|
||
};
|
||
|
||
const appendLine = (role, text, timestamp) => {
|
||
pendingLogItems.push({ role, text, timestamp });
|
||
if (pendingLogItems.length > MAX_PENDING_LOG_LINES) {
|
||
pendingLogItems.splice(0, pendingLogItems.length - MAX_PENDING_LOG_LINES);
|
||
}
|
||
scheduleLogFlush();
|
||
};
|
||
|
||
const sendJson = (payload) => {
|
||
if (ws.readyState !== WebSocket.OPEN) return;
|
||
ws.send(JSON.stringify(payload));
|
||
};
|
||
|
||
const sendUserMessage = (text) => {
|
||
const message = (text || "").toString().trim();
|
||
if (!message) return false;
|
||
if (ws.readyState !== WebSocket.OPEN) {
|
||
showStatus("WebSocket disconnected.", 2000);
|
||
return false;
|
||
}
|
||
sendJson({ type: "user-message", text: message });
|
||
return true;
|
||
};
|
||
|
||
// --- Voice state ---
|
||
const setVoiceConnected = (connected) => {
|
||
voiceConnected = connected;
|
||
if (agentVisualizer) agentVisualizer.setConnected(connected);
|
||
};
|
||
|
||
const setMicCaptureEnabled = (enabled) => {
|
||
if (!micStream) return;
|
||
micStream.getAudioTracks().forEach((track) => { track.enabled = enabled; });
|
||
};
|
||
|
||
const setPushToTalkState = (pressed, notifyServer = true) => {
|
||
pttPressed = pressed;
|
||
setMicCaptureEnabled(pressed);
|
||
if (notifyServer && ws.readyState === WebSocket.OPEN) {
|
||
ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
|
||
}
|
||
if (pressed) {
|
||
setAgentState(STATES.listening);
|
||
showStatus("Listening...");
|
||
} else {
|
||
if (agentState === STATES.listening) setAgentState(STATES.idle);
|
||
if (voiceConnected) showStatus("Hold anywhere to talk", 1800);
|
||
}
|
||
};
|
||
|
||
const beginPushToTalk = () => {
|
||
if (!voiceConnected || !peerConnection || !micStream) return;
|
||
if (pttPressed) return;
|
||
setPushToTalkState(true);
|
||
};
|
||
|
||
const endPushToTalk = () => {
|
||
if (!pttPressed) return;
|
||
setPushToTalkState(false);
|
||
};
|
||
|
||
// --- Reconnect ---
|
||
const clearReconnectTimer = () => {
|
||
if (reconnectTimer) { clearTimeout(reconnectTimer); reconnectTimer = null; }
|
||
};
|
||
|
||
const scheduleReconnect = (reason, delayMs = 1200) => {
|
||
if (!voiceDesired) return;
|
||
if (voiceConnected || connectingVoice) return;
|
||
if (reconnectTimer) return;
|
||
if (reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {
|
||
showStatus("Voice reconnect failed.");
|
||
return;
|
||
}
|
||
reconnectAttempts += 1;
|
||
showStatus(`${reason} Retrying (${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS})...`);
|
||
reconnectTimer = setTimeout(async () => {
|
||
reconnectTimer = null;
|
||
await connectVoiceChannel();
|
||
}, delayMs);
|
||
};
|
||
|
||
const stopVoiceChannel = async (statusText = "", clearDesired = false) => {
|
||
if (clearDesired) {
|
||
voiceDesired = false;
|
||
reconnectAttempts = 0;
|
||
clearReconnectTimer();
|
||
}
|
||
|
||
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
|
||
|
||
pendingRemoteCandidates = [];
|
||
rtcAnswerApplied = false;
|
||
setPushToTalkState(false, false);
|
||
|
||
if (peerConnection) {
|
||
peerConnection.ontrack = null;
|
||
peerConnection.onicecandidate = null;
|
||
peerConnection.onconnectionstatechange = null;
|
||
peerConnection.close();
|
||
peerConnection = null;
|
||
}
|
||
|
||
if (micStream) {
|
||
micStream.getTracks().forEach((track) => track.stop());
|
||
micStream = null;
|
||
}
|
||
|
||
if (remoteStream) {
|
||
remoteStream.getTracks().forEach((track) => track.stop());
|
||
remoteStream = null;
|
||
}
|
||
|
||
remoteAudio.srcObject = null;
|
||
setVoiceConnected(false);
|
||
lastRemoteAudioActivityS = 0;
|
||
visualizerSourceStream = null;
|
||
if (visualizerSourceNode) {
|
||
try { visualizerSourceNode.disconnect(); } catch (_) {}
|
||
visualizerSourceNode = null;
|
||
}
|
||
if (agentVisualizer) agentVisualizer.setAudioLevel(0);
|
||
if (agentVisualizer) agentVisualizer.setConnecting(false);
|
||
if (statusText) showStatus(statusText, 3000);
|
||
};
|
||
|
||
// --- WebRTC ---
|
||
const applyRtcAnswer = async (message) => {
|
||
if (!peerConnection) return;
|
||
const rawSdp = (message.sdp || "").toString();
|
||
if (!rawSdp.trim()) return;
|
||
const sdp = `${rawSdp
|
||
.replace(/\r\n/g, "\n")
|
||
.replace(/\r/g, "\n")
|
||
.split("\n")
|
||
.map((line) => line.trimEnd())
|
||
.join("\r\n")
|
||
.trim()}\r\n`;
|
||
try {
|
||
await peerConnection.setRemoteDescription({ type: message.rtcType || "answer", sdp });
|
||
rtcAnswerApplied = true;
|
||
const queued = pendingRemoteCandidates;
|
||
pendingRemoteCandidates = [];
|
||
for (const candidate of queued) {
|
||
try { await peerConnection.addIceCandidate(candidate); } catch (_) {}
|
||
}
|
||
reconnectAttempts = 0;
|
||
} catch (err) {
|
||
await stopVoiceChannel("Voice setup failed.");
|
||
scheduleReconnect("Failed to apply answer.");
|
||
appendLine("system", `RTC answer error: ${err}`, new Date().toISOString());
|
||
}
|
||
};
|
||
|
||
const applyRtcIceCandidate = async (message) => {
|
||
if (!peerConnection) return;
|
||
if (message.candidate == null) {
|
||
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
|
||
pendingRemoteCandidates.push(null);
|
||
return;
|
||
}
|
||
try { await peerConnection.addIceCandidate(null); } catch (_) {}
|
||
return;
|
||
}
|
||
try {
|
||
if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
|
||
pendingRemoteCandidates.push(message.candidate);
|
||
return;
|
||
}
|
||
await peerConnection.addIceCandidate(message.candidate);
|
||
} catch (err) {
|
||
appendLine("system", `RTC ICE error: ${err}`, new Date().toISOString());
|
||
}
|
||
};
|
||
|
||
const connectVoiceChannel = async () => {
|
||
if (voiceConnected || peerConnection || connectingVoice) return;
|
||
if (!window.RTCPeerConnection || !navigator.mediaDevices?.getUserMedia) {
|
||
showStatus("Voice unavailable in this browser.", 4000);
|
||
return;
|
||
}
|
||
if (ws.readyState !== WebSocket.OPEN) {
|
||
showStatus("Connecting...");
|
||
return;
|
||
}
|
||
|
||
connectingVoice = true;
|
||
if (agentVisualizer) agentVisualizer.setConnecting(true);
|
||
showStatus("Connecting voice...");
|
||
try {
|
||
clearReconnectTimer();
|
||
rtcAnswerApplied = false;
|
||
pendingRemoteCandidates = [];
|
||
|
||
try {
|
||
micStream = await navigator.mediaDevices.getUserMedia({
|
||
audio: { channelCount: 1, sampleRate: 48000, sampleSize: 16, latency: 0,
|
||
echoCancellation: true, noiseSuppression: true, autoGainControl: false },
|
||
video: false,
|
||
});
|
||
} catch (_) {
|
||
micStream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
|
||
}
|
||
setMicCaptureEnabled(false);
|
||
|
||
peerConnection = new RTCPeerConnection({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
|
||
remoteStream = new MediaStream();
|
||
remoteAudio.srcObject = remoteStream;
|
||
|
||
peerConnection.ontrack = (event) => {
|
||
if (event.track.kind !== "audio") return;
|
||
remoteStream.addTrack(event.track);
|
||
remoteAudio.play().then(() => {
|
||
markRemoteAudioActivity();
|
||
ensureVisualizerAudioMeter();
|
||
}).catch(() => {});
|
||
};
|
||
|
||
peerConnection.onicecandidate = (event) => {
|
||
if (!event.candidate) { sendJson({ type: "rtc-ice-candidate", candidate: null }); return; }
|
||
sendJson({ type: "rtc-ice-candidate", candidate: event.candidate.toJSON() });
|
||
};
|
||
|
||
peerConnection.onconnectionstatechange = () => {
|
||
const state = peerConnection?.connectionState || "new";
|
||
if (state === "connected") {
|
||
if (disconnectedTimer) { clearTimeout(disconnectedTimer); disconnectedTimer = null; }
|
||
clearReconnectTimer();
|
||
reconnectAttempts = 0;
|
||
setVoiceConnected(true);
|
||
showStatus("Hold anywhere to talk", 2500);
|
||
return;
|
||
}
|
||
if (state === "failed" || state === "closed") {
|
||
stopVoiceChannel(`Voice channel ${state}.`);
|
||
scheduleReconnect(`Voice channel ${state}.`);
|
||
return;
|
||
}
|
||
if (state === "disconnected") {
|
||
if (disconnectedTimer) clearTimeout(disconnectedTimer);
|
||
showStatus("Voice disconnected. Waiting...");
|
||
disconnectedTimer = setTimeout(() => {
|
||
if (peerConnection?.connectionState === "disconnected") {
|
||
stopVoiceChannel("Voice channel disconnected.");
|
||
scheduleReconnect("Voice channel disconnected.");
|
||
}
|
||
}, 8000);
|
||
return;
|
||
}
|
||
};
|
||
|
||
micStream.getAudioTracks().forEach((track) => { peerConnection.addTrack(track, micStream); });
|
||
|
||
const offer = await peerConnection.createOffer();
|
||
await peerConnection.setLocalDescription(offer);
|
||
sendJson({ type: "rtc-offer", sdp: offer.sdp, rtcType: offer.type });
|
||
} catch (err) {
|
||
await stopVoiceChannel("Voice setup failed.");
|
||
scheduleReconnect("Voice setup failed.");
|
||
appendLine("system", `Voice setup error: ${err}`, new Date().toISOString());
|
||
} finally {
|
||
connectingVoice = false;
|
||
// Stop throb if connection failed (success path clears it in setConnected)
|
||
if (!voiceConnected && agentVisualizer) agentVisualizer.setConnecting(false);
|
||
}
|
||
};
|
||
|
||
// --- First-tap bootstrap ---
|
||
const bootstrap = async () => {
|
||
if (appStarted) return;
|
||
appStarted = true;
|
||
// Unblock audio context (required by browsers before user gesture resolves)
|
||
remoteAudio.play().catch(() => {});
|
||
await ensureVisualizerAudioMeter();
|
||
sendJson({ type: "spawn" });
|
||
voiceDesired = true;
|
||
reconnectAttempts = 0;
|
||
await connectVoiceChannel();
|
||
};
|
||
|
||
if (resetSessionBtn) {
|
||
resetSessionBtn.addEventListener("click", async (event) => {
|
||
event.preventDefault();
|
||
event.stopPropagation();
|
||
if (!appStarted) {
|
||
await bootstrap();
|
||
}
|
||
if (ws.readyState === WebSocket.OPEN) {
|
||
sendJson({ type: "command", command: "reset" });
|
||
showStatus("Session reset.", 1500);
|
||
}
|
||
});
|
||
}
|
||
|
||
// --- Center-card PTT pointer handling ---
|
||
// Only touches that land on #agentIndicator / #agentViz (data-ptt="1") trigger PTT.
|
||
// We track active pointer IDs so multi-touch doesn't double-fire.
|
||
const activePointers = new Set();
|
||
|
||
document.addEventListener("pointerdown", async (event) => {
|
||
if (!(event.target instanceof Element) || !event.target.closest("[data-ptt='1']")) {
|
||
return;
|
||
}
|
||
activePointers.add(event.pointerId);
|
||
if (!appStarted) {
|
||
await bootstrap();
|
||
}
|
||
ensureVisualizerAudioMeter();
|
||
if (activePointers.size === 1) beginPushToTalk();
|
||
}, { passive: false });
|
||
|
||
document.addEventListener("pointerup", (event) => {
|
||
activePointers.delete(event.pointerId);
|
||
if (activePointers.size === 0) endPushToTalk();
|
||
}, { passive: false });
|
||
|
||
document.addEventListener("pointercancel", (event) => {
|
||
activePointers.delete(event.pointerId);
|
||
if (activePointers.size === 0) endPushToTalk();
|
||
}, { passive: false });
|
||
|
||
// --- WebSocket ---
|
||
ws.onopen = () => {
|
||
appendLine("system", "WebSocket connected.", new Date().toISOString());
|
||
showStatus("Tap anywhere to start", 0);
|
||
};
|
||
ws.onclose = async () => {
|
||
appendLine("system", "WebSocket disconnected.", new Date().toISOString());
|
||
await stopVoiceChannel("Disconnected.", true);
|
||
};
|
||
ws.onerror = () => appendLine("system", "WebSocket error.", new Date().toISOString());
|
||
ws.onmessage = async (event) => {
|
||
try {
|
||
const msg = JSON.parse(event.data);
|
||
|
||
if (msg.type === "rtc-answer") { await applyRtcAnswer(msg); return; }
|
||
if (msg.type === "rtc-ice-candidate") { await applyRtcIceCandidate(msg); return; }
|
||
if (msg.type === "rtc-state") {
|
||
const state = (msg.state || "").toString();
|
||
if (state === "connected") {
|
||
setVoiceConnected(true);
|
||
showStatus("Hold anywhere to talk", 2500);
|
||
}
|
||
return;
|
||
}
|
||
if (msg.type === "rtc-error") {
|
||
const text = (msg.message || "Unknown WebRTC error.").toString();
|
||
showStatus(`Voice error: ${text}`, 4000);
|
||
appendLine("system", `Voice error: ${text}`, new Date().toISOString());
|
||
await stopVoiceChannel("Voice channel error.");
|
||
scheduleReconnect("Voice channel error.");
|
||
return;
|
||
}
|
||
|
||
// Drive agent state indicator from server-sent agent-state events
|
||
if (msg.role === "agent-state") {
|
||
const newState = (msg.text || "").trim();
|
||
// Don't override listening state (user is holding PTT)
|
||
if (agentState !== STATES.listening && STATES[newState]) {
|
||
setAgentState(newState);
|
||
}
|
||
} else if (msg.role === "toast") {
|
||
try {
|
||
const t = JSON.parse(msg.text || "{}");
|
||
showToast(
|
||
t.kind || "text",
|
||
t.content || "",
|
||
t.title || "",
|
||
typeof t.duration_ms === "number" ? t.duration_ms : 6000,
|
||
);
|
||
} catch (_) {
|
||
showToast("text", msg.text || "", "", 6000);
|
||
}
|
||
} else if (msg.role === "choice") {
|
||
try {
|
||
const c = JSON.parse(msg.text || "{}");
|
||
showChoice(
|
||
c.request_id || "",
|
||
c.question || "",
|
||
Array.isArray(c.choices) ? c.choices : [],
|
||
c.title || "",
|
||
);
|
||
} catch (_) {
|
||
// Malformed choice payload — ignore.
|
||
}
|
||
} else if (msg.role === "wisper") {
|
||
// suppress wisper debug output
|
||
} else {
|
||
appendLine(msg.role || "system", msg.text || "", msg.timestamp || "");
|
||
}
|
||
|
||
} catch (_err) {
|
||
appendLine("system", event.data, new Date().toISOString());
|
||
}
|
||
};
|
||
</script>
|
||
</body>
|
||
</html>
|