nanobot-voice-interface/static/index.html

<!doctype html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Nanobot Chat (SuperTonic + Wisper)</title>
    <style>
      :root {
        --bg: #f6f8fa;
        --panel: #ffffff;
        --text: #1f2937;
        --muted: #6b7280;
        --accent: #0d9488;
        --border: #d1d5db;
      }
      * {
        box-sizing: border-box;
      }
      body {
        margin: 0;
        font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
        background: linear-gradient(180deg, #eef6ff 0%, var(--bg) 100%);
        color: var(--text);
      }
      .wrap {
        max-width: 980px;
        margin: 24px auto;
        padding: 0 16px;
      }
      h1 {
        margin: 0 0 12px;
        font-size: 1.2rem;
      }
      .panel {
        background: var(--panel);
        border: 1px solid var(--border);
        border-radius: 10px;
        padding: 12px;
      }
      .controls {
        display: flex;
        gap: 8px;
        margin-bottom: 12px;
      }
      button {
        border: 1px solid var(--border);
        background: white;
        border-radius: 8px;
        padding: 8px 12px;
        cursor: pointer;
      }
      button:disabled {
        opacity: 0.6;
        cursor: not-allowed;
      }
      button.primary {
        background: var(--accent);
        color: white;
        border-color: var(--accent);
      }
      button.ptt-active {
        background: #be123c;
        color: white;
        border-color: #be123c;
      }
      .log {
        border: 1px solid var(--border);
        border-radius: 8px;
        min-height: 420px;
        max-height: 420px;
        overflow: auto;
        padding: 10px;
        background: #0b1020;
        color: #d6e2ff;
        white-space: pre-wrap;
      }
      .line {
        margin-bottom: 8px;
      }
      .line.user {
        color: #9be5ff;
      }
      .line.system {
        color: #ffd28f;
      }
      .line.wisper {
        color: #c4f0be;
      }

      .voice {
        display: flex;
        gap: 8px;
        align-items: center;
        margin-top: 8px;
      }
      .voice-status {
        color: var(--muted);
        font-size: 12px;
      }

      .hint {
        margin-top: 10px;
        color: var(--muted);
        font-size: 12px;
      }
      @media (max-width: 700px) {
        .controls,
        .voice {
          flex-direction: column;
          align-items: stretch;
        }
      }
    </style>
  </head>
  <body>
    <div class="wrap">
      <h1>Nanobot Web Chat (SuperTonic + Wisper)</h1>
      <div class="panel">
        <div class="controls">
          <button id="spawnBtn" class="primary">Spawn Nanobot TUI</button>
          <button id="stopBtn">Stop TUI</button>
        </div>
        <div id="log" class="log"></div>
        <div class="voice">
          <button id="recordBtn">Connect Voice Channel</button>
          <button id="pttBtn" disabled>Hold to Talk</button>
          <span id="voiceStatus" class="voice-status"></span>
        </div>
        <audio id="remoteAudio" autoplay playsinline hidden></audio>
        <div class="hint">
          Voice input and output run over a host WebRTC audio channel. Hold Push-to-Talk to send microphone audio for host STT.
        </div>
      </div>
    </div>

    <script>
      const logEl = document.getElementById("log");
      const spawnBtn = document.getElementById("spawnBtn");
      const stopBtn = document.getElementById("stopBtn");
      const recordBtn = document.getElementById("recordBtn");
      const pttBtn = document.getElementById("pttBtn");
      const voiceStatus = document.getElementById("voiceStatus");
      const remoteAudio = document.getElementById("remoteAudio");

      const wsProto = location.protocol === "https:" ? "wss" : "ws";
      const ws = new WebSocket(`${wsProto}://${location.host}/ws/chat`);

      let peerConnection = null;
      let micStream = null;
      let remoteStream = null;
      let voiceConnected = false;
      let disconnectedTimer = null;
      let reconnectTimer = null;
      let reconnectAttempts = 0;
      let voiceDesired = false;
      let connectingVoice = false;
      let pttPressed = false;
      let rtcAnswerApplied = false;
      let pendingRemoteCandidates = [];
      const MAX_RECONNECT_ATTEMPTS = 2;

      const appendLine = (role, text, timestamp) => {
        const line = document.createElement("div");
        line.className = `line ${role || "system"}`;
        const time = timestamp ? new Date(timestamp).toLocaleTimeString() : "";
        line.textContent = `[${time}] ${role}: ${text}`;
        logEl.appendChild(line);
        logEl.scrollTop = logEl.scrollHeight;
      };

      const sendJson = (payload) => {
        if (ws.readyState !== WebSocket.OPEN) {
          appendLine("system", "Socket not ready.", new Date().toISOString());
          return;
        }
        ws.send(JSON.stringify(payload));
      };

      const setVoiceState = (connected) => {
        voiceConnected = connected;
        recordBtn.textContent = connected ? "Disconnect Voice Channel" : "Connect Voice Channel";
        pttBtn.disabled = !connected;
        if (!connected) {
          pttBtn.textContent = "Hold to Talk";
          pttBtn.classList.remove("ptt-active");
        }
      };

      const setMicCaptureEnabled = (enabled) => {
        if (!micStream) return;
        micStream.getAudioTracks().forEach((track) => {
          track.enabled = enabled;
        });
      };

      const setPushToTalkState = (pressed, notifyServer = true) => {
        pttPressed = pressed;
        pttBtn.textContent = pressed ? "Release to Send" : "Hold to Talk";
        pttBtn.classList.toggle("ptt-active", pressed);
        setMicCaptureEnabled(pressed);
        if (notifyServer && ws.readyState === WebSocket.OPEN) {
          ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
        }
      };

      const beginPushToTalk = (event) => {
        if (event) event.preventDefault();
        if (!voiceConnected || !peerConnection || !micStream) {
          voiceStatus.textContent = "Connect voice channel first.";
          return;
        }
        if (pttPressed) return;
        setPushToTalkState(true);
        voiceStatus.textContent = "Listening while button is held...";
      };

      const endPushToTalk = (event) => {
        if (event) event.preventDefault();
        if (!pttPressed) return;
        setPushToTalkState(false);
        if (voiceConnected) {
          voiceStatus.textContent = "Voice channel connected. Hold Push-to-Talk to speak.";
        }
      };

      const clearReconnectTimer = () => {
        if (reconnectTimer) {
          clearTimeout(reconnectTimer);
          reconnectTimer = null;
        }
      };

      const scheduleReconnect = (reason, delayMs = 1200) => {
        if (!voiceDesired) return;
        if (voiceConnected || connectingVoice) return;
        if (reconnectTimer) return;
        if (reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {
          voiceStatus.textContent = "Voice reconnect attempts exhausted.";
          return;
        }
        reconnectAttempts += 1;
        voiceStatus.textContent = `${reason} Retrying (${reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS})...`;
        reconnectTimer = setTimeout(async () => {
          reconnectTimer = null;
          await connectVoiceChannel();
        }, delayMs);
      };

      const stopVoiceChannel = async (statusText = "", clearDesired = false) => {
        if (clearDesired) {
          voiceDesired = false;
          reconnectAttempts = 0;
          clearReconnectTimer();
        }

        if (disconnectedTimer) {
          clearTimeout(disconnectedTimer);
          disconnectedTimer = null;
        }

        pendingRemoteCandidates = [];
        rtcAnswerApplied = false;
        setPushToTalkState(false);

        if (peerConnection) {
          peerConnection.ontrack = null;
          peerConnection.onicecandidate = null;
          peerConnection.onconnectionstatechange = null;
          peerConnection.close();
          peerConnection = null;
        }

        if (micStream) {
          micStream.getTracks().forEach((track) => track.stop());
          micStream = null;
        }

        if (remoteStream) {
          remoteStream.getTracks().forEach((track) => track.stop());
          remoteStream = null;
        }

        remoteAudio.srcObject = null;
        setVoiceState(false);
        if (statusText) {
          voiceStatus.textContent = statusText;
        }
      };

      const applyRtcAnswer = async (message) => {
        if (!peerConnection) return;
        const rawSdp = (message.sdp || "").toString();
        if (!rawSdp.trim()) return;
        const sdp = `${rawSdp
          .replace(/\r\n/g, "\n")
          .replace(/\r/g, "\n")
          .split("\n")
          .map((line) => line.trimEnd())
          .join("\r\n")
          .trim()}\r\n`;
        try {
          await peerConnection.setRemoteDescription({
            type: message.rtcType || "answer",
            sdp,
          });
          rtcAnswerApplied = true;
          const queued = pendingRemoteCandidates;
          pendingRemoteCandidates = [];
          for (const candidate of queued) {
            try {
              await peerConnection.addIceCandidate(candidate);
            } catch (candidateErr) {
              appendLine("system", `Queued ICE apply error: ${candidateErr}`, new Date().toISOString());
            }
          }
          reconnectAttempts = 0;
          voiceStatus.textContent = "Voice channel negotiated.";
        } catch (err) {
          await stopVoiceChannel("Failed to apply WebRTC answer.");
          scheduleReconnect("Failed to apply answer.");
          const preview = sdp
            .split(/\r\n/)
            .slice(0, 6)
            .join(" | ");
          appendLine(
            "system",
            `RTC answer error: ${err}. SDP preview: ${preview}`,
            new Date().toISOString()
          );
        }
      };

      const applyRtcIceCandidate = async (message) => {
        if (!peerConnection) return;
        if (message.candidate == null) {
          if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
            pendingRemoteCandidates.push(null);
            return;
          }
          try {
            await peerConnection.addIceCandidate(null);
          } catch (err) {
            appendLine("system", `RTC ICE end error: ${err}`, new Date().toISOString());
          }
          return;
        }
        try {
          if (!rtcAnswerApplied || !peerConnection.remoteDescription) {
            pendingRemoteCandidates.push(message.candidate);
            return;
          }
          await peerConnection.addIceCandidate(message.candidate);
        } catch (err) {
          appendLine("system", `RTC ICE error: ${err}`, new Date().toISOString());
        }
      };

      const connectVoiceChannel = async () => {
        if (voiceConnected || peerConnection || connectingVoice) return;
        if (!window.RTCPeerConnection) {
          voiceStatus.textContent = "WebRTC unavailable in this browser.";
          return;
        }
        if (!navigator.mediaDevices?.getUserMedia) {
          voiceStatus.textContent = "Microphone capture is unavailable.";
          return;
        }
        if (ws.readyState !== WebSocket.OPEN) {
          voiceStatus.textContent = "Socket not ready yet.";
          return;
        }

        connectingVoice = true;
        try {
          clearReconnectTimer();
          rtcAnswerApplied = false;
          pendingRemoteCandidates = [];

          try {
            micStream = await navigator.mediaDevices.getUserMedia({
              audio: {
                channelCount: 1,
                sampleRate: 48000,
                sampleSize: 16,
                latency: 0,
                echoCancellation: true,
                noiseSuppression: true,
                autoGainControl: false,
              },
              video: false,
            });
          } catch (_constraintErr) {
            micStream = await navigator.mediaDevices.getUserMedia({
              audio: true,
              video: false,
            });
            voiceStatus.textContent = "Using browser default microphone settings.";
          }
          setMicCaptureEnabled(false);

          peerConnection = new RTCPeerConnection({
            iceServers: [{ urls: "stun:stun.l.google.com:19302" }],
          });
          remoteStream = new MediaStream();
          remoteAudio.srcObject = remoteStream;

          peerConnection.ontrack = (event) => {
            if (event.track.kind !== "audio") return;
            remoteStream.addTrack(event.track);
            remoteAudio.play().catch(() => {
              voiceStatus.textContent = "Tap the page once to allow voice playback.";
            });
          };

          peerConnection.onicecandidate = (event) => {
            if (!event.candidate) {
              sendJson({ type: "rtc-ice-candidate", candidate: null });
              return;
            }
            sendJson({
              type: "rtc-ice-candidate",
              candidate: event.candidate.toJSON(),
            });
          };

          peerConnection.onconnectionstatechange = () => {
            const state = peerConnection?.connectionState || "new";
            if (state === "connected") {
              if (disconnectedTimer) {
                clearTimeout(disconnectedTimer);
                disconnectedTimer = null;
              }
              clearReconnectTimer();
              reconnectAttempts = 0;
              voiceStatus.textContent = "Voice channel connected. Hold Push-to-Talk to speak.";
              return;
            }
            if (state === "failed" || state === "closed") {
              stopVoiceChannel(`Voice channel ${state}.`);
              scheduleReconnect(`Voice channel ${state}.`);
              return;
            }
            if (state === "disconnected") {
              if (disconnectedTimer) clearTimeout(disconnectedTimer);
              voiceStatus.textContent = "Voice channel disconnected. Waiting to recover...";
              disconnectedTimer = setTimeout(() => {
                if (peerConnection?.connectionState === "disconnected") {
                  stopVoiceChannel("Voice channel disconnected.");
                  scheduleReconnect("Voice channel disconnected.");
                }
              }, 8000);
              return;
            }
            voiceStatus.textContent = `Voice channel ${state}...`;
          };

          micStream.getAudioTracks().forEach((track) => {
            peerConnection.addTrack(track, micStream);
          });

          setVoiceState(true);
          voiceStatus.textContent = "Connecting voice channel...";
          setPushToTalkState(false);

          const offer = await peerConnection.createOffer();
          await peerConnection.setLocalDescription(offer);
          sendJson({
            type: "rtc-offer",
            sdp: offer.sdp,
            rtcType: offer.type,
          });
        } catch (err) {
          await stopVoiceChannel("Voice channel setup failed.");
          scheduleReconnect("Voice setup failed.");
          appendLine("system", `Voice setup error: ${err}`, new Date().toISOString());
        } finally {
          connectingVoice = false;
        }
      };

      ws.onopen = () => {
        appendLine("system", "WebSocket connected.", new Date().toISOString());
      };
      ws.onclose = async () => {
        appendLine("system", "WebSocket disconnected.", new Date().toISOString());
        await stopVoiceChannel("Voice channel disconnected.", true);
      };
      ws.onerror = () => appendLine("system", "WebSocket error.", new Date().toISOString());
      ws.onmessage = async (event) => {
        try {
          const msg = JSON.parse(event.data);

          if (msg.type === "rtc-answer") {
            await applyRtcAnswer(msg);
            return;
          }
          if (msg.type === "rtc-ice-candidate") {
            await applyRtcIceCandidate(msg);
            return;
          }
          if (msg.type === "rtc-state") {
            const state = (msg.state || "").toString();
            if (state) {
              if (state === "connected") {
                voiceStatus.textContent = "Voice channel connected. Hold Push-to-Talk to speak.";
              } else {
                voiceStatus.textContent = `Voice channel ${state}.`;
              }
            }
            return;
          }
          if (msg.type === "rtc-error") {
            const text = (msg.message || "Unknown WebRTC error.").toString();
            voiceStatus.textContent = `Voice error: ${text}`;
            appendLine("system", `Voice error: ${text}`, new Date().toISOString());
            await stopVoiceChannel("Voice channel error.");
            scheduleReconnect("Voice channel error.");
            return;
          }

          appendLine(msg.role || "system", msg.text || "", msg.timestamp || "");
        } catch (_err) {
          appendLine("system", event.data, new Date().toISOString());
        }
      };

      spawnBtn.onclick = () => sendJson({ type: "spawn" });
      stopBtn.onclick = () => sendJson({ type: "stop" });
      pttBtn.onpointerdown = (event) => {
        if (event.button !== 0) return;
        if (pttBtn.setPointerCapture) {
          pttBtn.setPointerCapture(event.pointerId);
        }
        beginPushToTalk(event);
      };
      pttBtn.onpointerup = (event) => endPushToTalk(event);
      pttBtn.onpointercancel = (event) => endPushToTalk(event);
      pttBtn.onlostpointercapture = (event) => endPushToTalk(event);
      pttBtn.addEventListener("keydown", (event) => {
        const isSpace = event.code === "Space" || event.key === " ";
        if (!isSpace || event.repeat) return;
        beginPushToTalk(event);
      });
      pttBtn.addEventListener("keyup", (event) => {
        const isSpace = event.code === "Space" || event.key === " ";
        if (!isSpace) return;
        endPushToTalk(event);
      });
      recordBtn.onclick = async () => {
        if (voiceConnected || peerConnection || connectingVoice) {
          await stopVoiceChannel("Voice channel disconnected.", true);
          return;
        }
        voiceDesired = true;
        reconnectAttempts = 0;
        await connectVoiceChannel();
      };
      document.body.addEventListener("click", () => {
        if (remoteAudio.srcObject && remoteAudio.paused) {
          remoteAudio.play().catch(() => {});
        }
      });
      setVoiceState(false);
    </script>
  </body>
</html>