api channel and tools

2026-03-05 15:10:14 -05:00 · 2026-03-05 15:10:14 -05:00 · 3816a9627e
commit 3816a9627e
parent 9222c59f03
4 changed files with 684 additions and 582 deletions
--- a/app.py
+++ b/app.py
@ -5,7 +5,7 @@ from pathlib import Path
 from typing import Any, Awaitable, Callable

 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.responses import FileResponse, JSONResponse
+from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles

 from supertonic_gateway import SuperTonicGateway
@ -28,8 +28,9 @@ async def health() -> JSONResponse:


@app.get("/")
-async def index() -> FileResponse:
-    return FileResponse(INDEX_PATH)
+async def index() -> HTMLResponse:
+    html = INDEX_PATH.read_text(encoding="utf-8")
+    return HTMLResponse(content=html)


@app.websocket("/ws/chat")
@ -65,18 +66,24 @@ async def websocket_chat(websocket: WebSocket) -> None:
            elif msg_type == "rtc-ice-candidate":
                await voice_session.handle_ice_candidate(message)
            elif msg_type == "voice-ptt":
-                voice_session.set_push_to_talk_pressed(
-                    bool(message.get("pressed", False))
-                )
+                voice_session.set_push_to_talk_pressed(bool(message.get("pressed", False)))
            elif msg_type == "user-message":
                await gateway.send_user_message(str(message.get("text", "")))
+            elif msg_type == "ui-response":
+                await gateway.send_ui_response(
+                    str(message.get("request_id", "")),
+                    str(message.get("value", "")),
+                )
+            elif msg_type == "command":
+                await gateway.send_command(str(message.get("command", "")))
            else:
                await safe_send_json(
                    {
                        "role": "system",
                        "text": (
                            "Unknown message type. Use spawn, stop, rtc-offer, "
-                            "rtc-ice-candidate, voice-ptt, or user-message."
+                            "rtc-ice-candidate, voice-ptt, user-message, "
+                            "ui-response, or command."
                        ),
                        "timestamp": "",
                    }
--- a/static/index.html
+++ b/static/index.html
@ -16,7 +16,7 @@
        width: 100%;
        height: 100%;
        overflow: hidden;
-        background: #1a1510;
+        background: #ffffff;
        touch-action: none;
      }
      #log {
@ -31,7 +31,7 @@
        font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
        font-size: 12px;
        line-height: 1.6;
-        color: rgba(255, 245, 235, 0.35);
+        color: rgba(30, 20, 10, 0.35);
        white-space: pre-wrap;
        word-break: break-word;
        display: flex;
@ -45,8 +45,8 @@
        mask-image: linear-gradient(to top, black 55%, transparent 100%);
      }
      #log:hover {
-        color: rgba(255, 245, 235, 0.92);
-        background: rgba(0, 0, 0, 0.18);
+        color: rgba(30, 20, 10, 0.85);
+        background: rgba(0, 0, 0, 0.06);
        -webkit-mask-image: none;
        mask-image: none;
      }
@ -62,17 +62,17 @@
        margin-bottom: 4px;
      }
      .line.user {
-        color: rgba(255, 255, 255, 0.9);
+        color: rgba(20, 10, 0, 0.85);
      }
      .line.system {
-        color: rgba(255, 220, 180, 0.5);
+        color: rgba(120, 80, 40, 0.5);
      }
      .line.wisper {
-        color: rgba(255, 200, 160, 0.4);
+        color: rgba(120, 80, 40, 0.4);
      }
-      #log:hover .line.user   { color: rgba(255, 255, 255, 1.0); }
-      #log:hover .line.system { color: rgba(255, 220, 180, 0.85); }
-      #log:hover .line.wisper { color: rgba(255, 200, 160, 0.75); }
+      #log:hover .line.user   { color: rgba(20, 10, 0, 1.0); }
+      #log:hover .line.system { color: rgba(120, 80, 40, 0.85); }
+      #log:hover .line.wisper { color: rgba(120, 80, 40, 0.75); }
      #voiceStatus {
        position: fixed;
        bottom: 12px;
@ -119,11 +119,14 @@
        border-radius: 24px;
        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
        overflow: hidden;
+        pointer-events: auto;
+        cursor: pointer;
      }
      #agentViz canvas {
        width: 100% !important;
        height: 100% !important;
        display: block;
+        pointer-events: auto;
      }
      #agentIndicator .label {
        display: none;
@ -140,10 +143,6 @@
      #agentIndicator.speaking {
        color: #8b4513;
      }
-      /* Deepen the background while PTT is active */
-      body.ptt-active {
-        background: radial-gradient(ellipse at 50% 44%, #f2caa8 0%, #e8b898 100%);
-      }
      #controls {
        position: fixed;
        top: 12px;
@ -167,20 +166,236 @@
        transform: translateY(1px);
        box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
      }
+
+      /* Toast notifications */
+      #toast-container {
+        position: fixed;
+        top: 16px;
+        left: 50%;
+        transform: translateX(-50%);
+        width: min(92vw, 480px);
+        max-height: calc(100vh - 32px);
+        overflow-y: auto;
+        overflow-x: hidden;
+        display: flex;
+        flex-direction: column;
+        gap: 10px;
+        z-index: 100;
+        pointer-events: auto;
+        /* Hide scrollbar until hovered */
+        scrollbar-width: thin;
+        scrollbar-color: rgba(255,200,140,0.25) transparent;
+        padding-bottom: 4px;
+      }
+      #toast-container::-webkit-scrollbar {
+        width: 4px;
+      }
+      #toast-container::-webkit-scrollbar-track {
+        background: transparent;
+      }
+      #toast-container::-webkit-scrollbar-thumb {
+        background: rgba(255,200,140,0.25);
+        border-radius: 2px;
+      }
+      .toast {
+        pointer-events: auto;
+        background: rgba(28, 22, 16, 0.92);
+        border: 1px solid rgba(255, 200, 140, 0.18);
+        border-radius: 12px;
+        padding: 14px 16px 14px 16px;
+        display: flex;
+        flex-direction: column;
+        gap: 8px;
+        box-shadow: 0 4px 24px rgba(0, 0, 0, 0.45);
+        animation: toast-in 0.22s cubic-bezier(0.34, 1.4, 0.64, 1) both;
+        position: relative;
+        overflow: hidden;
+        max-width: 100%;
+      }
+      .toast.dismissing {
+        animation: toast-out 0.18s ease-in both;
+      }
+      @keyframes toast-in {
+        from { opacity: 0; transform: translateY(-14px) scale(0.96); }
+        to   { opacity: 1; transform: translateY(0)     scale(1); }
+      }
+      @keyframes toast-out {
+        from { opacity: 1; transform: translateY(0)     scale(1); }
+        to   { opacity: 0; transform: translateY(-10px) scale(0.96); }
+      }
+      .toast-progress {
+        position: absolute;
+        bottom: 0;
+        left: 0;
+        height: 2px;
+        background: rgba(255, 190, 120, 0.55);
+        width: 100%;
+        transform-origin: left;
+        animation: toast-progress-shrink linear both;
+      }
+      @keyframes toast-progress-shrink {
+        from { transform: scaleX(1); }
+        to   { transform: scaleX(0); }
+      }
+      .toast-header {
+        display: flex;
+        justify-content: space-between;
+        align-items: flex-start;
+        gap: 10px;
+      }
+      .toast-title {
+        font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+        font-size: 11px;
+        font-weight: 600;
+        letter-spacing: 0.07em;
+        color: rgba(255, 200, 140, 0.85);
+        text-transform: uppercase;
+        flex: 1;
+        min-width: 0;
+        overflow: hidden;
+        text-overflow: ellipsis;
+        white-space: nowrap;
+      }
+      .toast-close {
+        background: none;
+        border: none;
+        color: rgba(255, 245, 235, 0.35);
+        font-size: 16px;
+        line-height: 1;
+        cursor: pointer;
+        padding: 0 2px;
+        flex-shrink: 0;
+        transition: color 0.15s;
+      }
+      .toast-close:hover {
+        color: rgba(255, 245, 235, 0.85);
+      }
+      .toast-body {
+        font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+        font-size: 12px;
+        line-height: 1.65;
+        color: rgba(255, 245, 235, 0.82);
+        white-space: normal;
+        word-break: break-word;
+        user-select: text;
+        -webkit-user-select: text;
+      }
+      .toast-body p { margin: 0 0 6px; }
+      .toast-body p:last-child { margin-bottom: 0; }
+      .toast-body h1, .toast-body h2, .toast-body h3,
+      .toast-body h4, .toast-body h5, .toast-body h6 {
+        font-size: 13px;
+        font-weight: 700;
+        color: rgba(255, 200, 140, 0.95);
+        margin: 8px 0 4px;
+      }
+      .toast-body ul, .toast-body ol {
+        margin: 4px 0 6px;
+        padding-left: 18px;
+      }
+      .toast-body li { margin-bottom: 2px; }
+      .toast-body code {
+        background: rgba(255,255,255,0.07);
+        border-radius: 4px;
+        padding: 1px 5px;
+        font-size: 11px;
+      }
+      .toast-body pre {
+        background: rgba(0,0,0,0.35);
+        border-radius: 6px;
+        padding: 8px 10px;
+        overflow-x: auto;
+        margin: 6px 0;
+      }
+      .toast-body pre code {
+        background: none;
+        padding: 0;
+        font-size: 11px;
+      }
+      .toast-body table {
+        border-collapse: collapse;
+        width: 100%;
+        font-size: 11px;
+        margin: 6px 0;
+      }
+      .toast-body th, .toast-body td {
+        border: 1px solid rgba(255,200,140,0.2);
+        padding: 4px 8px;
+        text-align: left;
+      }
+      .toast-body th {
+        background: rgba(255,200,140,0.08);
+        color: rgba(255,200,140,0.9);
+        font-weight: 600;
+      }
+      .toast-body a {
+        color: rgba(255,200,140,0.85);
+        text-decoration: underline;
+      }
+      .toast-body blockquote {
+        border-left: 3px solid rgba(255,200,140,0.3);
+        margin: 6px 0;
+        padding-left: 10px;
+        color: rgba(255,245,235,0.55);
+      }
+      .toast-body hr {
+        border: none;
+        border-top: 1px solid rgba(255,200,140,0.15);
+        margin: 8px 0;
+      }
+      .toast-choices {
+        display: flex;
+        flex-wrap: wrap;
+        gap: 8px;
+        margin-top: 4px;
+      }
+      .toast-choice-btn {
+        background: rgba(255, 200, 140, 0.12);
+        border: 1px solid rgba(255, 200, 140, 0.35);
+        border-radius: 8px;
+        color: rgba(255, 245, 235, 0.90);
+        font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+        font-size: 12px;
+        padding: 6px 14px;
+        cursor: pointer;
+        transition: background 0.15s, border-color 0.15s;
+        flex: 1 1 auto;
+        text-align: center;
+      }
+      .toast-choice-btn:hover {
+        background: rgba(255, 200, 140, 0.25);
+        border-color: rgba(255, 200, 140, 0.65);
+      }
+      .toast-choice-btn:active {
+        background: rgba(255, 200, 140, 0.38);
+      }
+      .toast-choice-btn:disabled {
+        opacity: 0.4;
+        cursor: default;
+      }
+      .toast-image {
+        width: 100%;
+        max-height: 320px;
+        object-fit: contain;
+        border-radius: 8px;
+        display: block;
+      }
    </style>
  </head>
  <body>
-    <div id="controls" data-no-ptt="1">
-      <button id="resetSessionBtn" class="control-btn" type="button" data-no-ptt="1">Reset</button>
+    <div id="controls">
+      <button id="resetSessionBtn" class="control-btn" type="button">Reset</button>
    </div>
    <div id="log"><div id="log-inner"></div></div>
-    <div id="agentIndicator">
-      <div id="agentViz"></div>
+      <div id="agentIndicator" data-ptt="1">
+        <div id="agentViz" data-ptt="1"></div>
      <span class="label"></span>
    </div>
    <div id="voiceStatus"></div>
+    <div id="toast-container"></div>
    <audio id="remoteAudio" autoplay playsinline hidden></audio>

+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <script src="/static/three.min.js"></script>
    <script>
      const logEl = document.getElementById("log-inner");
@ -190,6 +405,125 @@
      const agentVizEl = document.getElementById("agentViz");
      const agentLabel = agentIndicator.querySelector(".label");
      const resetSessionBtn = document.getElementById("resetSessionBtn");
+      const toastContainer = document.getElementById("toast-container");
+
+      // --- Toast notifications ---
+      const showToast = (kind, content, title, durationMs) => {
+        const toast = document.createElement("div");
+        toast.className = "toast";
+
+        // Header row (title + close button)
+        const header = document.createElement("div");
+        header.className = "toast-header";
+
+        if (title) {
+          const titleEl = document.createElement("span");
+          titleEl.className = "toast-title";
+          titleEl.textContent = title;
+          header.appendChild(titleEl);
+        }
+
+        const closeBtn = document.createElement("button");
+        closeBtn.className = "toast-close";
+        closeBtn.setAttribute("type", "button");
+        closeBtn.setAttribute("aria-label", "Dismiss");
+        closeBtn.textContent = "×";
+        header.appendChild(closeBtn);
+
+        toast.appendChild(header);
+
+        // Body
+        if (kind === "image") {
+          const img = document.createElement("img");
+          img.className = "toast-image";
+          img.src = content;
+          img.alt = title || "image";
+          toast.appendChild(img);
+        } else {
+          const body = document.createElement("div");
+          body.className = "toast-body";
+          // If content looks like HTML, inject directly; otherwise render as markdown.
+          const looksLikeHtml = /^\s*<[a-zA-Z]/.test(content);
+          if (looksLikeHtml) {
+            body.innerHTML = content;
+          } else if (typeof marked !== "undefined") {
+            body.innerHTML = marked.parse(content);
+          } else {
+            body.textContent = content;
+          }
+          toast.appendChild(body);
+        }
+
+        // dismiss must be declared before close button references it
+        const dismiss = () => {
+          toast.classList.add("dismissing");
+          const fallback = setTimeout(() => toast.remove(), 400);
+          toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
+        };
+
+        closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
+        toastContainer.prepend(toast);
+        toastContainer.scrollTop = 0;
+      };
+
+      // --- Choice toasts (ask_user tool) ---
+      const showChoice = (requestId, question, choices, title) => {
+        const toast = document.createElement("div");
+        toast.className = "toast";
+
+        // Header
+        const header = document.createElement("div");
+        header.className = "toast-header";
+        if (title) {
+          const titleEl = document.createElement("span");
+          titleEl.className = "toast-title";
+          titleEl.textContent = title;
+          header.appendChild(titleEl);
+        }
+        const closeBtn = document.createElement("button");
+        closeBtn.className = "toast-close";
+        closeBtn.setAttribute("type", "button");
+        closeBtn.setAttribute("aria-label", "Dismiss");
+        closeBtn.textContent = "×";
+        header.appendChild(closeBtn);
+        toast.appendChild(header);
+
+        // Question body
+        const body = document.createElement("div");
+        body.className = "toast-body";
+        body.textContent = question;
+        toast.appendChild(body);
+
+        // Choice buttons
+        const choicesEl = document.createElement("div");
+        choicesEl.className = "toast-choices";
+
+        const dismiss = () => {
+          toast.classList.add("dismissing");
+          const fallback = setTimeout(() => toast.remove(), 400);
+          toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
+        };
+
+        choices.forEach((label) => {
+          const btn = document.createElement("button");
+          btn.className = "toast-choice-btn";
+          btn.setAttribute("type", "button");
+          btn.textContent = label;
+          btn.addEventListener("click", (e) => {
+            e.stopPropagation();
+            // Disable all buttons to prevent double-send
+            choicesEl.querySelectorAll(".toast-choice-btn").forEach((b) => { b.disabled = true; });
+            sendJson({ type: "ui-response", request_id: requestId, value: label });
+            dismiss();
+          });
+          choicesEl.appendChild(btn);
+        });
+        toast.appendChild(choicesEl);
+
+        closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
+        toastContainer.prepend(toast);
+        toastContainer.scrollTop = 0;
+      };

      // --- Agent state indicator ---
      const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
@ -248,7 +582,8 @@
          powerPreference: "high-performance",
        });
        renderer.setPixelRatio(1);
-        renderer.setClearColor(0xa09b96, 1);
+        renderer.setClearColor(0xe8e4e0, 1);
+        renderer.domElement.dataset.ptt = "1";
        agentVizEl.innerHTML = "";
        agentVizEl.appendChild(renderer.domElement);

@ -358,12 +693,12 @@
        let deformScale = 1.0;
        let ringScale = 1.0;   // uniform xz scale — used for thickness throb when thinking
        let spinSpeed = 0.0;
-        // Card background colour lerp: 0 = idle coral, 1 = dark listening
+        // Card background colour lerp: 0 = idle coral, 1 = dark coral (PTT/listening)
        let cardColorT = 0.0;
        let connectedT = 0.0;   // 0 = gray (disconnected), 1 = coral (connected)
-        const CARD_GRAY_RGB   = [160, 155, 150]; // disconnected gray
-        const CARD_IDLE_RGB   = [212, 85, 63];   // #d4553f
-        const CARD_LISTEN_RGB = [120, 40, 28];   // dark desaturated coral
+        const CARD_GRAY_RGB   = [232, 228, 224]; // #e8e4e0 — disconnected light warm gray
+        const CARD_IDLE_RGB   = [212, 85,  63];  // #d4553f — connected idle coral
+        const CARD_LISTEN_RGB = [120, 40,  28];  // #782c1c — PTT active dark coral

        const setStateColor = (_state) => { /* no-op: MeshBasicMaterial, colour is fixed */ };

@ -696,7 +1031,6 @@

      const setPushToTalkState = (pressed, notifyServer = true) => {
        pttPressed = pressed;
-        document.body.classList.toggle("ptt-active", pressed);
        setMicCaptureEnabled(pressed);
        if (notifyServer && ws.readyState === WebSocket.OPEN) {
          ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
@ -947,26 +1281,27 @@
          if (!appStarted) {
            await bootstrap();
          }
-          if (sendUserMessage("/reset")) {
-            showStatus("Reset command sent.", 1500);
+          if (ws.readyState === WebSocket.OPEN) {
+            sendJson({ type: "command", command: "reset" });
+            showStatus("Session reset.", 1500);
          }
        });
      }

-      // --- Whole-screen PTT pointer handling ---
+      // --- Center-card PTT pointer handling ---
+      // Only touches that land on #agentIndicator / #agentViz (data-ptt="1") trigger PTT.
      // We track active pointer IDs so multi-touch doesn't double-fire.
      const activePointers = new Set();

      document.addEventListener("pointerdown", async (event) => {
-        if (event.target instanceof Element && event.target.closest("[data-no-ptt='1']")) {
+        if (!(event.target instanceof Element) || !event.target.closest("[data-ptt='1']")) {
          return;
        }
+        activePointers.add(event.pointerId);
        if (!appStarted) {
          await bootstrap();
-          return;
        }
        ensureVisualizerAudioMeter();
-        activePointers.add(event.pointerId);
        if (activePointers.size === 1) beginPushToTalk();
      }, { passive: false });

@ -1020,6 +1355,30 @@
            if (agentState !== STATES.listening && STATES[newState]) {
              setAgentState(newState);
            }
+          } else if (msg.role === "toast") {
+            try {
+              const t = JSON.parse(msg.text || "{}");
+              showToast(
+                t.kind || "text",
+                t.content || "",
+                t.title || "",
+                typeof t.duration_ms === "number" ? t.duration_ms : 6000,
+              );
+            } catch (_) {
+              showToast("text", msg.text || "", "", 6000);
+            }
+          } else if (msg.role === "choice") {
+            try {
+              const c = JSON.parse(msg.text || "{}");
+              showChoice(
+                c.request_id || "",
+                c.question || "",
+                Array.isArray(c.choices) ? c.choices : [],
+                c.title || "",
+              );
+            } catch (_) {
+              // Malformed choice payload — ignore.
+            }
          } else if (msg.role === "wisper") {
            // suppress wisper debug output
          } else {
--- a/supertonic_gateway.py
+++ b/supertonic_gateway.py
@ -1,442 +1,270 @@
+"""SuperTonic Gateway — nanobot integration for the web UI.
+
+Connects to the already-running nanobot process via a Unix domain socket.
+nanobot must be started separately (e.g. ``nanobot gateway``) with the API
+channel enabled in its config.
+
+Wire protocol (newline-delimited JSON)
+---------------------------------------
+Client → nanobot::
+
+    {"type": "message",     "content": "hello", "chat_id": "web"}
+    {"type": "ping"}
+    {"type": "ui-response", "request_id": "<uuid>", "value": "Option A", "chat_id": "web"}
+    {"type": "command",     "command": "reset", "chat_id": "web"}
+
+nanobot → client::
+
+    {"type": "message",     "content": "Hi!", "chat_id": "web", "is_progress": false}
+    {"type": "agent_state", "state": "thinking", "chat_id": "web"}
+    {"type": "toast",       "kind": "text"|"image", "content": "...", "title": "...", "duration_ms": 5000}
+    {"type": "choice",      "request_id": "<uuid>", "question": "...", "choices": ["A", "B"],
+                            "title": "...", "chat_id": "web"}
+    {"type": "pong"}
+    {"type": "error",       "error": "..."}
+
+The public ``SuperTonicGateway`` interface (``spawn_tui``, ``send_user_message``,
+``stop_tui``, ``shutdown``) is unchanged so ``app.py`` and ``voice_rtc.py``
+require no modification.
+"""
+
+from __future__ import annotations
+
 import asyncio
-import contextlib
+import json
 import os
-import pty
-import re
-import shlex
-import signal
-import subprocess
-import time
-from collections import deque
 from pathlib import Path

 from wisper import WisperBus, WisperEvent

-
-ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
-CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b-\x1f\x7f]")
-BRAILLE_SPINNER_RE = re.compile(r"[\u2800-\u28ff]")
-SPINNER_ONLY_RE = re.compile(r"^[\s|/\\\-]+$")
-BOX_DRAWING_ONLY_RE = re.compile(r"^[\s\u2500-\u257f]+$")
-THINKING_LINE_RE = re.compile(
-    r"\b(?:agent|nanobot|napbot)\b(?:\s+is)?\s+thinking\b",
-    re.IGNORECASE,
-)
-USER_ECHO_LINE_RE = re.compile(r"^(?:you|user)\s*:", re.IGNORECASE)
-TOOL_STREAM_LINE_RE = re.compile(
-    r"^(?:tool(?:\s+call|\s+output)?|calling\s+tool|running\s+tool|executing\s+tool)\b",
-    re.IGNORECASE,
-)
-LEADING_NON_WORD_RE = re.compile(r"^[^\w]+")
-WHITESPACE_RE = re.compile(r"\s+")
-AGENT_OUTPUT_PREFIX_RE = re.compile(
-    r"^(?:nanobot|napbot)\b\s*[:>\-]?\s*", re.IGNORECASE
-)
-EMOJI_RE = re.compile(
-    "["  # Common emoji and pictograph blocks.
-    "\U0001f1e6-\U0001f1ff"
-    "\U0001f300-\U0001f5ff"
-    "\U0001f600-\U0001f64f"
-    "\U0001f680-\U0001f6ff"
-    "\U0001f700-\U0001f77f"
-    "\U0001f780-\U0001f7ff"
-    "\U0001f800-\U0001f8ff"
-    "\U0001f900-\U0001f9ff"
-    "\U0001fa00-\U0001faff"
-    "\u2600-\u26ff"
-    "\u2700-\u27bf"
-    "\ufe0f"
-    "\u200d"
-    "]"
-)
+# Default path — must match nanobot's channels.api.socket_path config value.
+DEFAULT_SOCKET_PATH = Path.home() / ".nanobot" / "api.sock"


-def _clean_output(text: str) -> str:
-    cleaned = ANSI_ESCAPE_RE.sub("", text)
-    cleaned = BRAILLE_SPINNER_RE.sub(" ", cleaned)
-    cleaned = CONTROL_CHAR_RE.sub("", cleaned)
-    return cleaned.replace("\r", "\n")
+# ---------------------------------------------------------------------------
+# NanobotApiProcess — connects to the running nanobot via its Unix socket
+# ---------------------------------------------------------------------------


-def _resolve_nanobot_command_and_workdir() -> tuple[str, Path]:
-    command_override = os.getenv("NANOBOT_COMMAND")
-    workdir_override = os.getenv("NANOBOT_WORKDIR")
+class NanobotApiProcess:
+    """Connects to the running nanobot process via its Unix domain socket.

-    if workdir_override:
-        default_workdir = Path(workdir_override).expanduser()
-    else:
-        default_workdir = Path.home()
+    Lifecycle
+    ---------
+    ``start()``  — opens a connection to nanobot's API socket.
+    ``send()``   — writes a user message over the socket.
+    ``stop()``   — closes the connection.
+    """

-    if command_override:
-        return command_override, default_workdir
-
-    nanobot_dir = Path.home() / "nanobot"
-    nanobot_python_candidates = [
-        nanobot_dir / ".venv" / "bin" / "python",
-        nanobot_dir / "venv" / "bin" / "python",
-    ]
-    for nanobot_venv_python in nanobot_python_candidates:
-        if nanobot_venv_python.exists():
-            if not workdir_override:
-                default_workdir = nanobot_dir
-            return (
-                f"{nanobot_venv_python} -m nanobot agent --no-markdown",
-                default_workdir,
-            )
-
-    return "nanobot agent --no-markdown", default_workdir
-
-
-def _infer_venv_root(command_parts: list[str], workdir: Path) -> Path | None:
-    if not command_parts:
-        return None
-
-    binary = Path(command_parts[0]).expanduser()
-    if (
-        binary.is_absolute()
-        and binary.name.startswith("python")
-        and binary.parent.name == "bin"
-    ):
-        return binary.parent.parent
-
-    for candidate in (workdir / ".venv", workdir / "venv"):
-        if (candidate / "bin" / "python").exists():
-            return candidate
-    return None
-
-
-def _build_process_env(
-    command_parts: list[str], workdir: Path
-) -> tuple[dict[str, str], Path | None]:
-    env = os.environ.copy()
-    env.pop("PYTHONHOME", None)
-
-    venv_root = _infer_venv_root(command_parts, workdir)
-    if not venv_root:
-        return env, None
-
-    venv_bin = str(venv_root / "bin")
-    path_entries = [entry for entry in env.get("PATH", "").split(os.pathsep) if entry]
-    path_entries = [entry for entry in path_entries if entry != venv_bin]
-    path_entries.insert(0, venv_bin)
-    env["PATH"] = os.pathsep.join(path_entries)
-    env["VIRTUAL_ENV"] = str(venv_root)
-    return env, venv_root
-
-
-class NanobotTUIProcess:
-    def __init__(self, bus: WisperBus, command: str, workdir: Path) -> None:
+    def __init__(self, bus: WisperBus, socket_path: Path) -> None:
        self._bus = bus
-        self._command = command
-        self._workdir = workdir
-        self._process: subprocess.Popen[bytes] | None = None
-        self._master_fd: int | None = None
-        self._read_task: asyncio.Task[None] | None = None
-        self._pending_output = ""
-        self._suppress_noisy_ui = os.getenv(
-            "NANOBOT_SUPPRESS_NOISY_UI", "1"
-        ).strip() not in {
-            "0",
-            "false",
-            "False",
-            "no",
-            "off",
-        }
-        self._dedup_window_s = max(
-            0.2, float(os.getenv("NANOBOT_OUTPUT_DEDUP_WINDOW_S", "1.5"))
-        )
-        self._recent_lines: deque[tuple[str, float]] = deque()
-        self._last_tts_line = ""
+        self._socket_path = socket_path
+        self._reader: asyncio.StreamReader | None = None
+        self._writer: asyncio.StreamWriter | None = None
+        self._read_task: asyncio.Task | None = None

    @property
    def running(self) -> bool:
-        return self._process is not None and self._process.poll() is None
+        return (
+            self._writer is not None
+            and not self._writer.is_closing()
+            and self._read_task is not None
+            and not self._read_task.done()
+        )

    async def start(self) -> None:
        if self.running:
            await self._bus.publish(
-                WisperEvent(role="system", text="Nanobot TUI is already running.")
+                WisperEvent(role="system", text="Already connected to nanobot.")
            )
            return

-        command_parts = [
-            os.path.expandvars(os.path.expanduser(part))
-            for part in shlex.split(self._command)
-        ]
-        if not command_parts:
-            await self._bus.publish(
-                WisperEvent(role="system", text="NANOBOT_COMMAND is empty.")
-            )
-            return
-
-        if not self._workdir.exists():
-            await self._bus.publish(
-                WisperEvent(
-                    role="system",
-                    text=f"NANOBOT_WORKDIR does not exist: {self._workdir}",
-                )
-            )
-            return
-
-        master_fd, slave_fd = pty.openpty()
-        child_env, child_venv_root = _build_process_env(
-            command_parts=command_parts, workdir=self._workdir
-        )
-        try:
-            self._process = subprocess.Popen(
-                command_parts,
-                stdin=slave_fd,
-                stdout=slave_fd,
-                stderr=slave_fd,
-                cwd=str(self._workdir),
-                start_new_session=True,
-                env=child_env,
-            )
-        except FileNotFoundError as exc:
-            os.close(master_fd)
-            os.close(slave_fd)
+        if not self._socket_path.exists():
            await self._bus.publish(
                WisperEvent(
                    role="system",
                    text=(
-                        "Could not start Nanobot process "
-                        f"(command='{command_parts[0]}', workdir='{self._workdir}'): {exc}. "
-                        "Check NANOBOT_COMMAND and NANOBOT_WORKDIR."
+                        f"Nanobot API socket not found at {self._socket_path}. "
+                        "Make sure nanobot is running with the API channel enabled "
+                        "(set channels.api.enabled = true in ~/.nanobot/config.json, "
+                        "then run: nanobot gateway)."
                    ),
                )
            )
            return
-        except Exception as exc:
-            os.close(master_fd)
-            os.close(slave_fd)
-            await self._bus.publish(
-                WisperEvent(role="system", text=f"Failed to spawn TUI process: {exc}")
-            )
-            return

-        os.close(slave_fd)
-        os.set_blocking(master_fd, False)
-        self._master_fd = master_fd
-        self._read_task = asyncio.create_task(
-            self._read_output(), name="nanobot-tui-reader"
-        )
-        await self._bus.publish(
-            WisperEvent(
-                role="system",
-                text=f"Spawned Nanobot TUI with command: {' '.join(command_parts)}",
-            )
-        )
-        if child_venv_root:
-            await self._bus.publish(
-                WisperEvent(
-                    role="system",
-                    text=f"Nanobot runtime venv: {child_venv_root}",
-                )
-            )
-
-    async def send(self, text: str) -> None:
-        if not self.running or self._master_fd is None:
-            await self._bus.publish(
-                WisperEvent(
-                    role="system", text="Nanobot TUI is not running. Click spawn first."
-                )
-            )
-            return
-        message = text.rstrip("\n") + "\n"
        try:
-            os.write(self._master_fd, message.encode())
+            self._reader, self._writer = await asyncio.open_unix_connection(
+                path=str(self._socket_path)
+            )
        except OSError as exc:
            await self._bus.publish(
-                WisperEvent(role="system", text=f"Failed to write to TUI: {exc}")
+                WisperEvent(
+                    role="system",
+                    text=f"Could not connect to nanobot API socket: {exc}",
+                )
            )
-
-    async def stop(self) -> None:
-        if self._read_task:
-            self._read_task.cancel()
-            with contextlib.suppress(asyncio.CancelledError):
-                await self._read_task
-            self._read_task = None
-
-        if self.running and self._process:
-            try:
-                os.killpg(self._process.pid, signal.SIGTERM)
-            except ProcessLookupError:
-                pass
-            except Exception:
-                self._process.terminate()
-            try:
-                self._process.wait(timeout=3)
-            except Exception:
-                self._process.kill()
-                self._process.wait(timeout=1)
-
-        if self._master_fd is not None:
-            try:
-                os.close(self._master_fd)
-            except OSError:
-                pass
-            self._master_fd = None
-        self._process = None
-        self._pending_output = ""
-        self._recent_lines.clear()
-        self._last_tts_line = ""
-        await self._bus.publish(WisperEvent(role="system", text="Stopped Nanobot TUI."))
-
-    async def _read_output(self) -> None:
-        if self._master_fd is None:
            return
-        while self.running:
-            if not await self._wait_for_fd_readable():
-                break
-            try:
-                chunk = os.read(self._master_fd, 4096)
-            except BlockingIOError:
-                continue
-            except OSError:
-                break

-            if not chunk:
-                if not self.running:
-                    break
-                await asyncio.sleep(0.01)
-                continue
+        self._read_task = asyncio.create_task(self._read_loop(), name="nanobot-api-reader")
+        await self._bus.publish(WisperEvent(role="system", text="Connected to nanobot."))

-            text = _clean_output(chunk.decode(errors="ignore"))
-            if not text.strip():
-                continue
-
-            displayable, tts_publishable, saw_thinking = self._consume_output_chunk(
-                text
-            )
-            if saw_thinking:
-                await self._bus.publish(
-                    WisperEvent(role="agent-state", text="thinking")
-                )
-            if displayable:
-                await self._bus.publish(WisperEvent(role="nanobot", text=displayable))
-            if tts_publishable:
-                await self._bus.publish(
-                    WisperEvent(role="nanobot-tts", text=tts_publishable)
-                )
-
-        trailing_display, trailing_tts, _ = self._consume_output_chunk("\n")
-        if trailing_display:
-            await self._bus.publish(WisperEvent(role="nanobot", text=trailing_display))
-        if trailing_tts:
-            await self._bus.publish(WisperEvent(role="nanobot-tts", text=trailing_tts))
-
-        if self._process is not None:
-            exit_code = self._process.poll()
+    async def send(self, text: str) -> None:
+        if not self.running or self._writer is None:
            await self._bus.publish(
                WisperEvent(
-                    role="system", text=f"Nanobot TUI exited (code={exit_code})."
+                    role="system",
+                    text="Not connected to nanobot. Click spawn first.",
                )
            )
-
-    def _consume_output_chunk(self, text: str) -> tuple[str, str, bool]:
-        """Return (displayable, tts_publishable, saw_thinking)."""
-        self._pending_output += text
-
-        lines = self._pending_output.split("\n")
-        self._pending_output = lines.pop()
-
-        if len(self._pending_output) > 1024:
-            lines.append(self._pending_output)
-            self._pending_output = ""
-
-        kept_lines: list[str] = []
-        tts_lines: list[str] = []
-        saw_thinking = False
-        for line in lines:
-            normalized = self._normalize_line(line)
-            if not normalized:
-                continue
-            if self._suppress_noisy_ui and self._is_noisy_ui_line(normalized):
-                # Detect thinking lines even though they are filtered from display.
-                candidate = LEADING_NON_WORD_RE.sub("", normalized)
-                if THINKING_LINE_RE.search(candidate):
-                    saw_thinking = True
-                continue
-            if normalized != self._last_tts_line:
-                tts_lines.append(normalized)
-                self._last_tts_line = normalized
-            if self._is_recent_duplicate(normalized):
-                continue
-            kept_lines.append(normalized)
-
-        return "\n".join(kept_lines).strip(), "\n".join(tts_lines).strip(), saw_thinking
-
-    def _normalize_line(self, line: str) -> str:
-        without_emoji = EMOJI_RE.sub(" ", line)
-        normalized = WHITESPACE_RE.sub(" ", without_emoji).strip()
-        # Strip leading "nanobot:" prefix that the TUI echoes in its own output,
-        # since the frontend already labels lines with the role name and TTS
-        # should not read the agent's own name aloud.
-        normalized = AGENT_OUTPUT_PREFIX_RE.sub("", normalized)
-        return normalized
-
-    def _is_noisy_ui_line(self, line: str) -> bool:
-        if SPINNER_ONLY_RE.fullmatch(line):
-            return True
-        if BOX_DRAWING_ONLY_RE.fullmatch(line):
-            return True
-
-        candidate = LEADING_NON_WORD_RE.sub("", line)
-        if THINKING_LINE_RE.search(candidate):
-            return True
-        if TOOL_STREAM_LINE_RE.match(candidate):
-            return True
-        if USER_ECHO_LINE_RE.match(candidate):
-            return True
-        return False
-
-    async def _wait_for_fd_readable(self) -> bool:
-        if self._master_fd is None:
-            return False
-
-        loop = asyncio.get_running_loop()
-        ready: asyncio.Future[None] = loop.create_future()
-
-        def _mark_ready() -> None:
-            if not ready.done():
-                ready.set_result(None)
-
+            return
+        payload = json.dumps({"type": "message", "content": text, "chat_id": "web"}) + "\n"
        try:
-            loop.add_reader(self._master_fd, _mark_ready)
-        except (AttributeError, NotImplementedError, OSError, ValueError):
-            await asyncio.sleep(0.01)
-            return True
+            self._writer.write(payload.encode())
+            await self._writer.drain()
+        except OSError as exc:
+            await self._bus.publish(WisperEvent(role="system", text=f"Send failed: {exc}"))
+            await self._cleanup()

+    async def send_ui_response(self, request_id: str, value: str) -> None:
+        """Forward a ui-response (choice selection) back to nanobot."""
+        if not self.running or self._writer is None:
+            return
+        payload = (
+            json.dumps(
+                {"type": "ui-response", "request_id": request_id, "value": value, "chat_id": "web"}
+            )
+            + "\n"
+        )
        try:
-            await ready
-            return True
+            self._writer.write(payload.encode())
+            await self._writer.drain()
+        except OSError as exc:
+            await self._bus.publish(WisperEvent(role="system", text=f"Send failed: {exc}"))
+            await self._cleanup()
+
+    async def send_command(self, command: str) -> None:
+        """Send a command (e.g. 'reset') to nanobot."""
+        if not self.running or self._writer is None:
+            await self._bus.publish(
+                WisperEvent(
+                    role="system",
+                    text="Not connected to nanobot. Click spawn first.",
+                )
+            )
+            return
+        payload = json.dumps({"type": "command", "command": command, "chat_id": "web"}) + "\n"
+        try:
+            self._writer.write(payload.encode())
+            await self._writer.drain()
+        except OSError as exc:
+            await self._bus.publish(WisperEvent(role="system", text=f"Send failed: {exc}"))
+            await self._cleanup()
+
+    async def stop(self) -> None:
+        await self._cleanup()
+        await self._bus.publish(WisperEvent(role="system", text="Disconnected from nanobot."))
+
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+
+    async def _cleanup(self) -> None:
+        if self._read_task and not self._read_task.done():
+            self._read_task.cancel()
+            try:
+                await self._read_task
+            except asyncio.CancelledError:
+                pass
+        self._read_task = None
+
+        if self._writer:
+            try:
+                self._writer.close()
+                await self._writer.wait_closed()
+            except OSError:
+                pass
+        self._writer = None
+        self._reader = None
+
+    async def _read_loop(self) -> None:
+        """Read newline-delimited JSON from nanobot and publish WisperEvents."""
+        assert self._reader is not None
+        try:
+            while True:
+                try:
+                    line = await self._reader.readline()
+                except OSError:
+                    break
+                if not line:
+                    break  # EOF — nanobot closed the connection
+                await self._handle_line(line)
        finally:
-            with contextlib.suppress(Exception):
-                loop.remove_reader(self._master_fd)
+            await self._bus.publish(
+                WisperEvent(role="system", text="Nanobot closed the connection.")
+            )
+            # Clear writer so running → False
+            self._writer = None
+            self._reader = None

-    def _is_recent_duplicate(self, line: str) -> bool:
-        now = time.monotonic()
-        normalized = line.lower()
+    async def _handle_line(self, line: bytes) -> None:
+        raw = line.decode(errors="replace").strip()
+        if not raw:
+            return
+        try:
+            obj = json.loads(raw)
+        except json.JSONDecodeError:
+            await self._bus.publish(
+                WisperEvent(role="system", text=f"Malformed response from nanobot: {raw[:200]}")
+            )
+            return

-        while (
-            self._recent_lines
-            and (now - self._recent_lines[0][1]) > self._dedup_window_s
-        ):
-            self._recent_lines.popleft()
+        msg_type = str(obj.get("type", ""))

-        for previous, _timestamp in self._recent_lines:
-            if previous == normalized:
-                return True
+        if msg_type == "message":
+            content = str(obj.get("content", ""))
+            is_progress = bool(obj.get("is_progress", False))
+            if is_progress:
+                # Intermediate tool-call hint — show in UI, skip TTS
+                await self._bus.publish(WisperEvent(role="nanobot-progress", text=content))
+            else:
+                # Final answer — display + TTS
+                await self._bus.publish(WisperEvent(role="nanobot", text=content))
+                await self._bus.publish(WisperEvent(role="nanobot-tts", text=content))

-        self._recent_lines.append((normalized, now))
-        return False
+        elif msg_type == "agent_state":
+            state = str(obj.get("state", ""))
+            await self._bus.publish(WisperEvent(role="agent-state", text=state))
+
+        elif msg_type == "toast":
+            # Forward the full toast payload as JSON so the frontend can render it.
+            await self._bus.publish(WisperEvent(role="toast", text=json.dumps(obj)))
+
+        elif msg_type == "choice":
+            # Forward the full choice payload as JSON so the frontend can render it.
+            await self._bus.publish(WisperEvent(role="choice", text=json.dumps(obj)))
+
+        elif msg_type == "pong":
+            pass  # keepalive, ignore
+
+        elif msg_type == "error":
+            await self._bus.publish(
+                WisperEvent(role="system", text=f"Nanobot error: {obj.get('error', '')}")
+            )
+
+
+# ---------------------------------------------------------------------------
+# SuperTonicGateway — public interface (unchanged from original)
+# ---------------------------------------------------------------------------


 class SuperTonicGateway:
    def __init__(self) -> None:
        self.bus = WisperBus()
        self._lock = asyncio.Lock()
-        self._tui: NanobotTUIProcess | None = None
+        self._process: NanobotApiProcess | None = None
+        socket_path = Path(os.getenv("NANOBOT_API_SOCKET", str(DEFAULT_SOCKET_PATH))).expanduser()
+        self._socket_path = socket_path

    async def subscribe(self) -> asyncio.Queue[WisperEvent]:
        return await self.bus.subscribe()
@ -445,18 +273,15 @@ class SuperTonicGateway:
        await self.bus.unsubscribe(queue)

    async def spawn_tui(self) -> None:
+        """Connect to nanobot (name kept for API compatibility with app.py)."""
        async with self._lock:
-            if self._tui and self._tui.running:
+            if self._process and self._process.running:
                await self.bus.publish(
-                    WisperEvent(role="system", text="Nanobot TUI is already running.")
+                    WisperEvent(role="system", text="Already connected to nanobot.")
                )
                return
-
-            command, workdir = _resolve_nanobot_command_and_workdir()
-            self._tui = NanobotTUIProcess(
-                bus=self.bus, command=command, workdir=workdir
-            )
-            await self._tui.start()
+            self._process = NanobotApiProcess(bus=self.bus, socket_path=self._socket_path)
+            await self._process.start()

    async def send_user_message(self, text: str) -> None:
        message = text.strip()
@ -464,20 +289,34 @@ class SuperTonicGateway:
            return
        await self.bus.publish(WisperEvent(role="user", text=message))
        async with self._lock:
-            if not self._tui:
+            if not self._process:
                await self.bus.publish(
                    WisperEvent(
                        role="system",
-                        text="Nanobot TUI is not running. Click spawn first.",
+                        text="Not connected to nanobot. Click spawn first.",
                    )
                )
                return
-            await self._tui.send(message)
+            await self._process.send(message)
+
+    async def send_ui_response(self, request_id: str, value: str) -> None:
+        """Forward a choice selection back to nanobot."""
+        async with self._lock:
+            if self._process:
+                await self._process.send_ui_response(request_id, value)
+
+    async def send_command(self, command: str) -> None:
+        """Send a command (e.g. 'reset') to nanobot."""
+        async with self._lock:
+            if self._process:
+                await self._process.send_command(command)

    async def stop_tui(self) -> None:
+        """Disconnect from nanobot (name kept for API compatibility with app.py)."""
        async with self._lock:
-            if self._tui:
-                await self._tui.stop()
+            if self._process:
+                await self._process.stop()
+                self._process = None

    async def shutdown(self) -> None:
        await self.stop_tui()
--- a/voice_rtc.py
+++ b/voice_rtc.py
@ -41,9 +41,7 @@ try:
    from faster_whisper import WhisperModel

    FASTER_WHISPER_AVAILABLE = True
-except (
-    Exception
-):  # pragma: no cover - runtime fallback when faster-whisper is unavailable
+except Exception:  # pragma: no cover - runtime fallback when faster-whisper is unavailable
    WhisperModel = None  # type: ignore[assignment]
    FASTER_WHISPER_AVAILABLE = False

@ -82,10 +80,7 @@ ANSI_ESCAPE_RE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
 CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f]")
 BRAILLE_SPINNER_RE = re.compile(r"[\u2800-\u28ff]")
 TTS_ALLOWED_ASCII = set(
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    "abcdefghijklmnopqrstuvwxyz"
-    "0123456789"
-    " .,!?;:'\"()[]{}@#%&*+-_/<>|"
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?;:'\"()[]{}@#%&*+-_/<>|"
 )


@ -95,9 +90,7 @@ def _sanitize_tts_text(text: str) -> str:
    cleaned = cleaned.replace("\u00a0", " ")
    cleaned = cleaned.replace("•", " ")
    cleaned = CONTROL_CHAR_RE.sub(" ", cleaned)
-    cleaned = "".join(
-        ch if (ch in TTS_ALLOWED_ASCII or ch.isspace()) else " " for ch in cleaned
-    )
+    cleaned = "".join(ch if (ch in TTS_ALLOWED_ASCII or ch.isspace()) else " " for ch in cleaned)
    cleaned = re.sub(r"\s+", " ", cleaned).strip()
    return cleaned

@ -131,15 +124,9 @@ if AIORTC_AVAILABLE:
            self._timestamp = 0
            self._resample_state = None
            self._resample_source_rate: int | None = None
-            self._lead_in_ms = max(
-                0, int(os.getenv("HOST_RTC_OUTBOUND_LEAD_IN_MS", "120"))
-            )
-            self._lead_in_frames = (
-                self._lead_in_ms + self._frame_ms - 1
-            ) // self._frame_ms
-            self._lead_in_idle_s = max(
-                0.1, float(os.getenv("HOST_RTC_OUTBOUND_IDLE_S", "0.6"))
-            )
+            self._lead_in_ms = max(0, int(os.getenv("HOST_RTC_OUTBOUND_LEAD_IN_MS", "120")))
+            self._lead_in_frames = (self._lead_in_ms + self._frame_ms - 1) // self._frame_ms
+            self._lead_in_idle_s = max(0.1, float(os.getenv("HOST_RTC_OUTBOUND_IDLE_S", "0.6")))
            self._last_enqueue_at = 0.0
            self._closed = False
            self._frame_duration_s = frame_ms / 1000.0
@ -154,9 +141,7 @@ if AIORTC_AVAILABLE:
            )
            self._on_playing_changed: Callable[[bool], None] | None = None

-        async def enqueue_pcm(
-            self, pcm: bytes, sample_rate: int, channels: int = 1
-        ) -> None:
+        async def enqueue_pcm(self, pcm: bytes, sample_rate: int, channels: int = 1) -> None:
            if self._closed or not pcm:
                return

@ -244,9 +229,7 @@ if AIORTC_AVAILABLE:

            self._last_recv_at = loop.time()

-            frame = AudioFrame(
-                format="s16", layout="mono", samples=self._samples_per_frame
-            )
+            frame = AudioFrame(format="s16", layout="mono", samples=self._samples_per_frame)
            frame.planes[0].update(payload)
            frame.sample_rate = self._sample_rate
            frame.time_base = Fraction(1, self._sample_rate)
@ -263,9 +246,7 @@ else:
    class QueueAudioTrack:  # pragma: no cover - used only when aiortc is unavailable
        _on_playing_changed: Callable[[bool], None] | None = None

-        async def enqueue_pcm(
-            self, pcm: bytes, sample_rate: int, channels: int = 1
-        ) -> None:
+        async def enqueue_pcm(self, pcm: bytes, sample_rate: int, channels: int = 1) -> None:
            return

        def stop(self) -> None:
@ -296,23 +277,17 @@ class CommandSpeechToText:
    ) -> str | None:
        if not self.enabled or not pcm:
            return None
-        return await asyncio.to_thread(
-            self._transcribe_blocking, pcm, sample_rate, channels
-        )
+        return await asyncio.to_thread(self._transcribe_blocking, pcm, sample_rate, channels)

    def unavailable_reason(self) -> str:
        if not self._command_template:
            return "HOST_STT_COMMAND is not configured."
        return "HOST_STT_COMMAND failed to produce transcript."

-    def _transcribe_blocking(
-        self, pcm: bytes, sample_rate: int, channels: int
-    ) -> str | None:
+    def _transcribe_blocking(self, pcm: bytes, sample_rate: int, channels: int) -> str | None:
        tmp_path: str | None = None
        try:
-            tmp_path = _write_temp_wav(
-                pcm=pcm, sample_rate=sample_rate, channels=channels
-            )
+            tmp_path = _write_temp_wav(pcm=pcm, sample_rate=sample_rate, channels=channels)

            command = self._command_template
            if "{input_wav}" in command:
@ -343,9 +318,7 @@ class FasterWhisperSpeechToText:
    def __init__(self) -> None:
        self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
        self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
-        self._compute_type = (
-            os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
-        )
+        self._compute_type = os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
        self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
        self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
        self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
@ -357,12 +330,8 @@ class FasterWhisperSpeechToText:
            "off",
        }
        self._temperature = float(os.getenv("HOST_STT_TEMPERATURE", "0.0"))
-        self._log_prob_threshold = float(
-            os.getenv("HOST_STT_LOG_PROB_THRESHOLD", "-1.0")
-        )
-        self._no_speech_threshold = float(
-            os.getenv("HOST_STT_NO_SPEECH_THRESHOLD", "0.6")
-        )
+        self._log_prob_threshold = float(os.getenv("HOST_STT_LOG_PROB_THRESHOLD", "-1.0"))
+        self._no_speech_threshold = float(os.getenv("HOST_STT_NO_SPEECH_THRESHOLD", "0.6"))
        self._compression_ratio_threshold = float(
            os.getenv("HOST_STT_COMPRESSION_RATIO_THRESHOLD", "2.4")
        )
@ -373,9 +342,7 @@ class FasterWhisperSpeechToText:
            ).strip()
            or None
        )
-        self._repetition_penalty = float(
-            os.getenv("HOST_STT_REPETITION_PENALTY", "1.0")
-        )
+        self._repetition_penalty = float(os.getenv("HOST_STT_REPETITION_PENALTY", "1.0"))
        raw_hallucination_threshold = os.getenv(
            "HOST_STT_HALLUCINATION_SILENCE_THRESHOLD", ""
        ).strip()
@ -401,9 +368,7 @@ class FasterWhisperSpeechToText:
        if not self.enabled or not pcm:
            return None
        async with self._lock:
-            return await asyncio.to_thread(
-                self._transcribe_blocking, pcm, sample_rate, channels
-            )
+            return await asyncio.to_thread(self._transcribe_blocking, pcm, sample_rate, channels)

    async def warmup(self) -> None:
        if not self.enabled:
@ -428,15 +393,11 @@ class FasterWhisperSpeechToText:
            self._init_error = str(exc)
            self._model = None

-    def _transcribe_blocking(
-        self, pcm: bytes, sample_rate: int, channels: int
-    ) -> str | None:
+    def _transcribe_blocking(self, pcm: bytes, sample_rate: int, channels: int) -> str | None:
        self._initialize_blocking()
        if self._model is None:
            if self._init_error:
-                raise RuntimeError(
-                    f"faster-whisper initialization failed: {self._init_error}"
-                )
+                raise RuntimeError(f"faster-whisper initialization failed: {self._init_error}")
            return None

        if NUMPY_AVAILABLE and np is not None:
@ -481,9 +442,7 @@ class FasterWhisperSpeechToText:

        tmp_path: str | None = None
        try:
-            tmp_path = _write_temp_wav(
-                pcm=pcm, sample_rate=sample_rate, channels=channels
-            )
+            tmp_path = _write_temp_wav(pcm=pcm, sample_rate=sample_rate, channels=channels)
            segments, _info = self._model.transcribe(
                tmp_path,
                language=self._language or None,
@ -580,20 +539,14 @@ class HostSpeechToText:

 class SupertonicTextToSpeech:
    def __init__(self) -> None:
-        self._model = (
-            os.getenv("SUPERTONIC_MODEL", "supertonic-2").strip() or "supertonic-2"
-        )
-        self._voice_style_name = (
-            os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
-        )
+        self._model = os.getenv("SUPERTONIC_MODEL", "supertonic-2").strip() or "supertonic-2"
+        self._voice_style_name = os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
        self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
        self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
        self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
        self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
        self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
-        self._auto_download = os.getenv(
-            "SUPERTONIC_AUTO_DOWNLOAD", "1"
-        ).strip() not in {
+        self._auto_download = os.getenv("SUPERTONIC_AUTO_DOWNLOAD", "1").strip() not in {
            "0",
            "false",
            "False",
@ -608,9 +561,7 @@ class SupertonicTextToSpeech:

    @property
    def enabled(self) -> bool:
-        return (
-            SUPERTONIC_TTS_AVAILABLE and SupertonicTTS is not None and NUMPY_AVAILABLE
-        )
+        return SUPERTONIC_TTS_AVAILABLE and SupertonicTTS is not None and NUMPY_AVAILABLE

    @property
    def init_error(self) -> str | None:
@ -723,9 +674,7 @@ class SupertonicTextToSpeech:

 class HostTextToSpeech:
    def __init__(self) -> None:
-        provider = (
-            os.getenv("HOST_TTS_PROVIDER", "supertonic").strip() or "supertonic"
-        ).lower()
+        provider = (os.getenv("HOST_TTS_PROVIDER", "supertonic").strip() or "supertonic").lower()
        if provider not in {"supertonic", "command", "espeak", "auto"}:
            provider = "auto"
        self._provider = provider
@ -770,9 +719,7 @@ class HostTextToSpeech:
            if not self._supertonic.enabled:
                return "supertonic package is not available."
            if self._supertonic.init_error:
-                return (
-                    f"supertonic initialization failed: {self._supertonic.init_error}"
-                )
+                return f"supertonic initialization failed: {self._supertonic.init_error}"
            return "supertonic did not return audio."
        if self._provider == "command":
            return "HOST_TTS_COMMAND is not configured."
@ -797,13 +744,9 @@ class HostTextToSpeech:
        if "{output_wav}" in command:
            tmp_path: str | None = None
            try:
-                with tempfile.NamedTemporaryFile(
-                    suffix=".wav", delete=False
-                ) as tmp_file:
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
                    tmp_path = tmp_file.name
-                command_with_output = command.replace(
-                    "{output_wav}", shlex.quote(tmp_path)
-                )
+                command_with_output = command.replace("{output_wav}", shlex.quote(tmp_path))
                result = subprocess.run(
                    command_with_output,
                    shell=True,
@ -872,9 +815,7 @@ SendJsonCallable = Callable[[dict[str, Any]], Awaitable[None]]


 class WebRTCVoiceSession:
-    def __init__(
-        self, gateway: "SuperTonicGateway", send_json: SendJsonCallable
-    ) -> None:
+    def __init__(self, gateway: "SuperTonicGateway", send_json: SendJsonCallable) -> None:
        self._gateway = gateway
        self._send_json = send_json

@ -886,9 +827,7 @@ class WebRTCVoiceSession:

        self._stt = HostSpeechToText()
        self._tts = HostTextToSpeech()
-        self._stt_segment_queue_size = max(
-            1, int(os.getenv("HOST_STT_SEGMENT_QUEUE_SIZE", "2"))
-        )
+        self._stt_segment_queue_size = max(1, int(os.getenv("HOST_STT_SEGMENT_QUEUE_SIZE", "2")))
        self._stt_segments: asyncio.Queue[bytes] = asyncio.Queue(
            maxsize=self._stt_segment_queue_size
        )
@ -913,11 +852,7 @@ class WebRTCVoiceSession:

        self._stt_min_ptt_ms = max(
            120,
-            int(
-                os.getenv(
-                    "HOST_STT_MIN_PTT_MS", os.getenv("HOST_STT_MIN_SEGMENT_MS", "220")
-                )
-            ),
+            int(os.getenv("HOST_STT_MIN_PTT_MS", os.getenv("HOST_STT_MIN_SEGMENT_MS", "220"))),
        )

        self._stt_suppress_during_tts = os.getenv(
@ -973,9 +908,7 @@ class WebRTCVoiceSession:
        sdp = str(payload.get("sdp", "")).strip()
        rtc_type = str(payload.get("rtcType", "offer")).strip() or "offer"
        if not sdp:
-            await self._send_json(
-                {"type": "rtc-error", "message": "Missing SDP offer payload."}
-            )
+            await self._send_json({"type": "rtc-error", "message": "Missing SDP offer payload."})
            return

        await self._close_peer_connection()
@ -1009,9 +942,7 @@ class WebRTCVoiceSession:
                name="voice-inbound-track",
            )

-        await peer_connection.setRemoteDescription(
-            RTCSessionDescription(sdp=sdp, type=rtc_type)
-        )
+        await peer_connection.setRemoteDescription(RTCSessionDescription(sdp=sdp, type=rtc_type))
        await self._drain_pending_ice_candidates(peer_connection)
        answer = await peer_connection.createAnswer()
        await peer_connection.setLocalDescription(answer)
@ -1021,10 +952,7 @@ class WebRTCVoiceSession:
        sdp_answer = str(local_description.sdp or "")
        if sdp_answer:
            sdp_answer = (
-                sdp_answer.replace("\r\n", "\n")
-                .replace("\r", "\n")
-                .strip()
-                .replace("\n", "\r\n")
+                sdp_answer.replace("\r\n", "\n").replace("\r", "\n").strip().replace("\n", "\r\n")
                + "\r\n"
            )
        await self._send_json(
@ -1036,15 +964,9 @@ class WebRTCVoiceSession:
        )

        if self._stt.enabled and not self._stt_worker_task:
-            self._stt_worker_task = asyncio.create_task(
-                self._stt_worker(), name="voice-stt-worker"
-            )
-        if self._stt.enabled and (
-            self._stt_warmup_task is None or self._stt_warmup_task.done()
-        ):
-            self._stt_warmup_task = asyncio.create_task(
-                self._warmup_stt(), name="voice-stt-warmup"
-            )
+            self._stt_worker_task = asyncio.create_task(self._stt_worker(), name="voice-stt-worker")
+        if self._stt.enabled and (self._stt_warmup_task is None or self._stt_warmup_task.done()):
+            self._stt_warmup_task = asyncio.create_task(self._warmup_stt(), name="voice-stt-warmup")
        elif not self._stt.enabled and not self._stt_unavailable_notice_sent:
            self._stt_unavailable_notice_sent = True
            await self._publish_system(
@ -1103,9 +1025,7 @@ class WebRTCVoiceSession:
            candidate = candidate_from_sdp(candidate_sdp)
            candidate.sdpMid = raw_candidate.get("sdpMid")
            line_index = raw_candidate.get("sdpMLineIndex")
-            candidate.sdpMLineIndex = (
-                int(line_index) if line_index is not None else None
-            )
+            candidate.sdpMLineIndex = int(line_index) if line_index is not None else None
            await peer_connection.addIceCandidate(candidate)
        except Exception as exc:
            await self._publish_system(f"Failed to add ICE candidate: {exc}")
@ -1147,9 +1067,7 @@ class WebRTCVoiceSession:
        if self._tts_flush_handle:
            self._tts_flush_handle.cancel()
        loop = asyncio.get_running_loop()
-        self._tts_flush_handle = loop.call_later(
-            max(0.05, delay_s), self._schedule_tts_flush
-        )
+        self._tts_flush_handle = loop.call_later(max(0.05, delay_s), self._schedule_tts_flush)

    async def _flush_tts(self) -> None:
        async with self._tts_flush_lock:
@ -1230,9 +1148,7 @@ class WebRTCVoiceSession:
        try:
            while True:
                frame = await track.recv()
-                pcm16, frame_ms, resample_state = self._frame_to_pcm16k_mono(
-                    frame, resample_state
-                )
+                pcm16, frame_ms, resample_state = self._frame_to_pcm16k_mono(frame, resample_state)
                if not pcm16:
                    continue

@ -1249,10 +1165,9 @@ class WebRTCVoiceSession:
                        f"time_base={getattr(frame, 'time_base', None)}."
                    )

-                if (
-                    self._stt_suppress_during_tts
-                    and asyncio.get_running_loop().time() < self._stt_suppress_until
-                ):
+                loop = asyncio.get_running_loop()
+
+                if self._stt_suppress_during_tts and loop.time() < self._stt_suppress_until:
                    recording = False
                    recording_started_at = 0.0
                    segment_ms = 0.0
@ -1262,7 +1177,7 @@ class WebRTCVoiceSession:
                if self._ptt_pressed:
                    if not recording:
                        recording = True
-                        recording_started_at = asyncio.get_running_loop().time()
+                        recording_started_at = loop.time()
                        segment_ms = 0.0
                        segment_buffer = bytearray()

@ -1273,8 +1188,7 @@ class WebRTCVoiceSession:
                if recording:
                    observed_duration_ms = max(
                        1.0,
-                        (asyncio.get_running_loop().time() - recording_started_at)
-                        * 1000.0,
+                        (loop.time() - recording_started_at) * 1000.0,
                    )
                    await self._finalize_ptt_segment(
                        bytes(segment_buffer),
@ -1285,6 +1199,7 @@ class WebRTCVoiceSession:
                    recording_started_at = 0.0
                    segment_ms = 0.0
                    segment_buffer = bytearray()
+
        except asyncio.CancelledError:
            raise
        except Exception as exc:
@ -1294,9 +1209,7 @@ class WebRTCVoiceSession:
                    f"Voice input stream ended ({exc.__class__.__name__}): {details}"
                )
            else:
-                await self._publish_system(
-                    f"Voice input stream ended ({exc.__class__.__name__})."
-                )
+                await self._publish_system(f"Voice input stream ended ({exc.__class__.__name__}).")
        finally:
            if recording and segment_ms >= self._stt_min_ptt_ms:
                observed_duration_ms = max(
@ -1355,9 +1268,7 @@ class WebRTCVoiceSession:
                            f"(estimated source={nearest_source_rate}Hz)."
                        )

-        await self._enqueue_stt_segment(
-            pcm16=normalized_pcm, duration_ms=normalized_duration_ms
-        )
+        await self._enqueue_stt_segment(pcm16=normalized_pcm, duration_ms=normalized_duration_ms)

    async def _enqueue_stt_segment(self, pcm16: bytes, duration_ms: float) -> None:
        if duration_ms < self._stt_min_ptt_ms:
@ -1368,13 +1279,9 @@ class WebRTCVoiceSession:
                self._stt_segments.get_nowait()

            now = asyncio.get_running_loop().time()
-            if (
-                now - self._last_stt_backlog_notice_at
-            ) >= self._stt_backlog_notice_interval_s:
+            if (now - self._last_stt_backlog_notice_at) >= self._stt_backlog_notice_interval_s:
                self._last_stt_backlog_notice_at = now
-                await self._publish_system(
-                    "Voice input backlog detected; dropping stale segment."
-                )
+                await self._publish_system("Voice input backlog detected; dropping stale segment.")

        with contextlib.suppress(asyncio.QueueFull):
            self._stt_segments.put_nowait(pcm16)
@ -1384,9 +1291,7 @@ class WebRTCVoiceSession:
            pcm16 = await self._stt_segments.get()
            if not self._stt_first_segment_notice_sent:
                self._stt_first_segment_notice_sent = True
-                await self._publish_system(
-                    "Push-to-talk audio captured. Running host STT..."
-                )
+                await self._publish_system("Push-to-talk audio captured. Running host STT...")
            try:
                transcript = await self._stt.transcribe_pcm(
                    pcm=pcm16,
@ -1478,11 +1383,7 @@ class WebRTCVoiceSession:
        except TypeError:
            pcm = frame.to_ndarray()

-        if (
-            NUMPY_AVAILABLE
-            and np is not None
-            and getattr(pcm, "dtype", None) is not None
-        ):
+        if NUMPY_AVAILABLE and np is not None and getattr(pcm, "dtype", None) is not None:
            if pcm.dtype != np.int16:
                if np.issubdtype(pcm.dtype, np.floating):
                    pcm = np.clip(pcm, -1.0, 1.0)
@ -1521,9 +1422,7 @@ class WebRTCVoiceSession:
                else:
                    frames_channels = pcm.reshape(-1, 1)

-            channel_count = (
-                int(frames_channels.shape[1]) if frames_channels.ndim == 2 else 1
-            )
+            channel_count = int(frames_channels.shape[1]) if frames_channels.ndim == 2 else 1
            if channel_count <= 1:
                mono = frames_channels.reshape(-1).tobytes()
            elif NUMPY_AVAILABLE and np is not None:
@ -1537,9 +1436,7 @@ class WebRTCVoiceSession:
        else:
            return b"", 0.0, resample_state

-        source_rate = int(
-            getattr(frame, "sample_rate", 0) or getattr(frame, "rate", 0) or 0
-        )
+        source_rate = int(getattr(frame, "sample_rate", 0) or getattr(frame, "rate", 0) or 0)

        time_base = getattr(frame, "time_base", None)
        tb_rate = 0