api channel and tools
This commit is contained in:
parent
9222c59f03
commit
3816a9627e
4 changed files with 684 additions and 582 deletions
21
app.py
21
app.py
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
||||||
from typing import Any, Awaitable, Callable
|
from typing import Any, Awaitable, Callable
|
||||||
|
|
||||||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
||||||
from fastapi.responses import FileResponse, JSONResponse
|
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
from supertonic_gateway import SuperTonicGateway
|
from supertonic_gateway import SuperTonicGateway
|
||||||
|
|
@ -28,8 +28,9 @@ async def health() -> JSONResponse:
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def index() -> FileResponse:
|
async def index() -> HTMLResponse:
|
||||||
return FileResponse(INDEX_PATH)
|
html = INDEX_PATH.read_text(encoding="utf-8")
|
||||||
|
return HTMLResponse(content=html)
|
||||||
|
|
||||||
|
|
||||||
@app.websocket("/ws/chat")
|
@app.websocket("/ws/chat")
|
||||||
|
|
@ -65,18 +66,24 @@ async def websocket_chat(websocket: WebSocket) -> None:
|
||||||
elif msg_type == "rtc-ice-candidate":
|
elif msg_type == "rtc-ice-candidate":
|
||||||
await voice_session.handle_ice_candidate(message)
|
await voice_session.handle_ice_candidate(message)
|
||||||
elif msg_type == "voice-ptt":
|
elif msg_type == "voice-ptt":
|
||||||
voice_session.set_push_to_talk_pressed(
|
voice_session.set_push_to_talk_pressed(bool(message.get("pressed", False)))
|
||||||
bool(message.get("pressed", False))
|
|
||||||
)
|
|
||||||
elif msg_type == "user-message":
|
elif msg_type == "user-message":
|
||||||
await gateway.send_user_message(str(message.get("text", "")))
|
await gateway.send_user_message(str(message.get("text", "")))
|
||||||
|
elif msg_type == "ui-response":
|
||||||
|
await gateway.send_ui_response(
|
||||||
|
str(message.get("request_id", "")),
|
||||||
|
str(message.get("value", "")),
|
||||||
|
)
|
||||||
|
elif msg_type == "command":
|
||||||
|
await gateway.send_command(str(message.get("command", "")))
|
||||||
else:
|
else:
|
||||||
await safe_send_json(
|
await safe_send_json(
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"text": (
|
"text": (
|
||||||
"Unknown message type. Use spawn, stop, rtc-offer, "
|
"Unknown message type. Use spawn, stop, rtc-offer, "
|
||||||
"rtc-ice-candidate, voice-ptt, or user-message."
|
"rtc-ice-candidate, voice-ptt, user-message, "
|
||||||
|
"ui-response, or command."
|
||||||
),
|
),
|
||||||
"timestamp": "",
|
"timestamp": "",
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@
|
||||||
width: 100%;
|
width: 100%;
|
||||||
height: 100%;
|
height: 100%;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
background: #1a1510;
|
background: #ffffff;
|
||||||
touch-action: none;
|
touch-action: none;
|
||||||
}
|
}
|
||||||
#log {
|
#log {
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||||||
font-size: 12px;
|
font-size: 12px;
|
||||||
line-height: 1.6;
|
line-height: 1.6;
|
||||||
color: rgba(255, 245, 235, 0.35);
|
color: rgba(30, 20, 10, 0.35);
|
||||||
white-space: pre-wrap;
|
white-space: pre-wrap;
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
display: flex;
|
display: flex;
|
||||||
|
|
@ -45,8 +45,8 @@
|
||||||
mask-image: linear-gradient(to top, black 55%, transparent 100%);
|
mask-image: linear-gradient(to top, black 55%, transparent 100%);
|
||||||
}
|
}
|
||||||
#log:hover {
|
#log:hover {
|
||||||
color: rgba(255, 245, 235, 0.92);
|
color: rgba(30, 20, 10, 0.85);
|
||||||
background: rgba(0, 0, 0, 0.18);
|
background: rgba(0, 0, 0, 0.06);
|
||||||
-webkit-mask-image: none;
|
-webkit-mask-image: none;
|
||||||
mask-image: none;
|
mask-image: none;
|
||||||
}
|
}
|
||||||
|
|
@ -62,17 +62,17 @@
|
||||||
margin-bottom: 4px;
|
margin-bottom: 4px;
|
||||||
}
|
}
|
||||||
.line.user {
|
.line.user {
|
||||||
color: rgba(255, 255, 255, 0.9);
|
color: rgba(20, 10, 0, 0.85);
|
||||||
}
|
}
|
||||||
.line.system {
|
.line.system {
|
||||||
color: rgba(255, 220, 180, 0.5);
|
color: rgba(120, 80, 40, 0.5);
|
||||||
}
|
}
|
||||||
.line.wisper {
|
.line.wisper {
|
||||||
color: rgba(255, 200, 160, 0.4);
|
color: rgba(120, 80, 40, 0.4);
|
||||||
}
|
}
|
||||||
#log:hover .line.user { color: rgba(255, 255, 255, 1.0); }
|
#log:hover .line.user { color: rgba(20, 10, 0, 1.0); }
|
||||||
#log:hover .line.system { color: rgba(255, 220, 180, 0.85); }
|
#log:hover .line.system { color: rgba(120, 80, 40, 0.85); }
|
||||||
#log:hover .line.wisper { color: rgba(255, 200, 160, 0.75); }
|
#log:hover .line.wisper { color: rgba(120, 80, 40, 0.75); }
|
||||||
#voiceStatus {
|
#voiceStatus {
|
||||||
position: fixed;
|
position: fixed;
|
||||||
bottom: 12px;
|
bottom: 12px;
|
||||||
|
|
@ -119,11 +119,14 @@
|
||||||
border-radius: 24px;
|
border-radius: 24px;
|
||||||
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25), 4px 4px 0px rgba(0,0,0,0.15);
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
|
pointer-events: auto;
|
||||||
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
#agentViz canvas {
|
#agentViz canvas {
|
||||||
width: 100% !important;
|
width: 100% !important;
|
||||||
height: 100% !important;
|
height: 100% !important;
|
||||||
display: block;
|
display: block;
|
||||||
|
pointer-events: auto;
|
||||||
}
|
}
|
||||||
#agentIndicator .label {
|
#agentIndicator .label {
|
||||||
display: none;
|
display: none;
|
||||||
|
|
@ -140,10 +143,6 @@
|
||||||
#agentIndicator.speaking {
|
#agentIndicator.speaking {
|
||||||
color: #8b4513;
|
color: #8b4513;
|
||||||
}
|
}
|
||||||
/* Deepen the background while PTT is active */
|
|
||||||
body.ptt-active {
|
|
||||||
background: radial-gradient(ellipse at 50% 44%, #f2caa8 0%, #e8b898 100%);
|
|
||||||
}
|
|
||||||
#controls {
|
#controls {
|
||||||
position: fixed;
|
position: fixed;
|
||||||
top: 12px;
|
top: 12px;
|
||||||
|
|
@ -167,20 +166,236 @@
|
||||||
transform: translateY(1px);
|
transform: translateY(1px);
|
||||||
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
|
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.15);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Toast notifications */
|
||||||
|
#toast-container {
|
||||||
|
position: fixed;
|
||||||
|
top: 16px;
|
||||||
|
left: 50%;
|
||||||
|
transform: translateX(-50%);
|
||||||
|
width: min(92vw, 480px);
|
||||||
|
max-height: calc(100vh - 32px);
|
||||||
|
overflow-y: auto;
|
||||||
|
overflow-x: hidden;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 10px;
|
||||||
|
z-index: 100;
|
||||||
|
pointer-events: auto;
|
||||||
|
/* Hide scrollbar until hovered */
|
||||||
|
scrollbar-width: thin;
|
||||||
|
scrollbar-color: rgba(255,200,140,0.25) transparent;
|
||||||
|
padding-bottom: 4px;
|
||||||
|
}
|
||||||
|
#toast-container::-webkit-scrollbar {
|
||||||
|
width: 4px;
|
||||||
|
}
|
||||||
|
#toast-container::-webkit-scrollbar-track {
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
#toast-container::-webkit-scrollbar-thumb {
|
||||||
|
background: rgba(255,200,140,0.25);
|
||||||
|
border-radius: 2px;
|
||||||
|
}
|
||||||
|
.toast {
|
||||||
|
pointer-events: auto;
|
||||||
|
background: rgba(28, 22, 16, 0.92);
|
||||||
|
border: 1px solid rgba(255, 200, 140, 0.18);
|
||||||
|
border-radius: 12px;
|
||||||
|
padding: 14px 16px 14px 16px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
box-shadow: 0 4px 24px rgba(0, 0, 0, 0.45);
|
||||||
|
animation: toast-in 0.22s cubic-bezier(0.34, 1.4, 0.64, 1) both;
|
||||||
|
position: relative;
|
||||||
|
overflow: hidden;
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
.toast.dismissing {
|
||||||
|
animation: toast-out 0.18s ease-in both;
|
||||||
|
}
|
||||||
|
@keyframes toast-in {
|
||||||
|
from { opacity: 0; transform: translateY(-14px) scale(0.96); }
|
||||||
|
to { opacity: 1; transform: translateY(0) scale(1); }
|
||||||
|
}
|
||||||
|
@keyframes toast-out {
|
||||||
|
from { opacity: 1; transform: translateY(0) scale(1); }
|
||||||
|
to { opacity: 0; transform: translateY(-10px) scale(0.96); }
|
||||||
|
}
|
||||||
|
.toast-progress {
|
||||||
|
position: absolute;
|
||||||
|
bottom: 0;
|
||||||
|
left: 0;
|
||||||
|
height: 2px;
|
||||||
|
background: rgba(255, 190, 120, 0.55);
|
||||||
|
width: 100%;
|
||||||
|
transform-origin: left;
|
||||||
|
animation: toast-progress-shrink linear both;
|
||||||
|
}
|
||||||
|
@keyframes toast-progress-shrink {
|
||||||
|
from { transform: scaleX(1); }
|
||||||
|
to { transform: scaleX(0); }
|
||||||
|
}
|
||||||
|
.toast-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: flex-start;
|
||||||
|
gap: 10px;
|
||||||
|
}
|
||||||
|
.toast-title {
|
||||||
|
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||||||
|
font-size: 11px;
|
||||||
|
font-weight: 600;
|
||||||
|
letter-spacing: 0.07em;
|
||||||
|
color: rgba(255, 200, 140, 0.85);
|
||||||
|
text-transform: uppercase;
|
||||||
|
flex: 1;
|
||||||
|
min-width: 0;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.toast-close {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
color: rgba(255, 245, 235, 0.35);
|
||||||
|
font-size: 16px;
|
||||||
|
line-height: 1;
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 0 2px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
transition: color 0.15s;
|
||||||
|
}
|
||||||
|
.toast-close:hover {
|
||||||
|
color: rgba(255, 245, 235, 0.85);
|
||||||
|
}
|
||||||
|
.toast-body {
|
||||||
|
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||||||
|
font-size: 12px;
|
||||||
|
line-height: 1.65;
|
||||||
|
color: rgba(255, 245, 235, 0.82);
|
||||||
|
white-space: normal;
|
||||||
|
word-break: break-word;
|
||||||
|
user-select: text;
|
||||||
|
-webkit-user-select: text;
|
||||||
|
}
|
||||||
|
.toast-body p { margin: 0 0 6px; }
|
||||||
|
.toast-body p:last-child { margin-bottom: 0; }
|
||||||
|
.toast-body h1, .toast-body h2, .toast-body h3,
|
||||||
|
.toast-body h4, .toast-body h5, .toast-body h6 {
|
||||||
|
font-size: 13px;
|
||||||
|
font-weight: 700;
|
||||||
|
color: rgba(255, 200, 140, 0.95);
|
||||||
|
margin: 8px 0 4px;
|
||||||
|
}
|
||||||
|
.toast-body ul, .toast-body ol {
|
||||||
|
margin: 4px 0 6px;
|
||||||
|
padding-left: 18px;
|
||||||
|
}
|
||||||
|
.toast-body li { margin-bottom: 2px; }
|
||||||
|
.toast-body code {
|
||||||
|
background: rgba(255,255,255,0.07);
|
||||||
|
border-radius: 4px;
|
||||||
|
padding: 1px 5px;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
.toast-body pre {
|
||||||
|
background: rgba(0,0,0,0.35);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 8px 10px;
|
||||||
|
overflow-x: auto;
|
||||||
|
margin: 6px 0;
|
||||||
|
}
|
||||||
|
.toast-body pre code {
|
||||||
|
background: none;
|
||||||
|
padding: 0;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
.toast-body table {
|
||||||
|
border-collapse: collapse;
|
||||||
|
width: 100%;
|
||||||
|
font-size: 11px;
|
||||||
|
margin: 6px 0;
|
||||||
|
}
|
||||||
|
.toast-body th, .toast-body td {
|
||||||
|
border: 1px solid rgba(255,200,140,0.2);
|
||||||
|
padding: 4px 8px;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
.toast-body th {
|
||||||
|
background: rgba(255,200,140,0.08);
|
||||||
|
color: rgba(255,200,140,0.9);
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
.toast-body a {
|
||||||
|
color: rgba(255,200,140,0.85);
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
.toast-body blockquote {
|
||||||
|
border-left: 3px solid rgba(255,200,140,0.3);
|
||||||
|
margin: 6px 0;
|
||||||
|
padding-left: 10px;
|
||||||
|
color: rgba(255,245,235,0.55);
|
||||||
|
}
|
||||||
|
.toast-body hr {
|
||||||
|
border: none;
|
||||||
|
border-top: 1px solid rgba(255,200,140,0.15);
|
||||||
|
margin: 8px 0;
|
||||||
|
}
|
||||||
|
.toast-choices {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 8px;
|
||||||
|
margin-top: 4px;
|
||||||
|
}
|
||||||
|
.toast-choice-btn {
|
||||||
|
background: rgba(255, 200, 140, 0.12);
|
||||||
|
border: 1px solid rgba(255, 200, 140, 0.35);
|
||||||
|
border-radius: 8px;
|
||||||
|
color: rgba(255, 245, 235, 0.90);
|
||||||
|
font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
|
||||||
|
font-size: 12px;
|
||||||
|
padding: 6px 14px;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background 0.15s, border-color 0.15s;
|
||||||
|
flex: 1 1 auto;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.toast-choice-btn:hover {
|
||||||
|
background: rgba(255, 200, 140, 0.25);
|
||||||
|
border-color: rgba(255, 200, 140, 0.65);
|
||||||
|
}
|
||||||
|
.toast-choice-btn:active {
|
||||||
|
background: rgba(255, 200, 140, 0.38);
|
||||||
|
}
|
||||||
|
.toast-choice-btn:disabled {
|
||||||
|
opacity: 0.4;
|
||||||
|
cursor: default;
|
||||||
|
}
|
||||||
|
.toast-image {
|
||||||
|
width: 100%;
|
||||||
|
max-height: 320px;
|
||||||
|
object-fit: contain;
|
||||||
|
border-radius: 8px;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="controls" data-no-ptt="1">
|
<div id="controls">
|
||||||
<button id="resetSessionBtn" class="control-btn" type="button" data-no-ptt="1">Reset</button>
|
<button id="resetSessionBtn" class="control-btn" type="button">Reset</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="log"><div id="log-inner"></div></div>
|
<div id="log"><div id="log-inner"></div></div>
|
||||||
<div id="agentIndicator">
|
<div id="agentIndicator" data-ptt="1">
|
||||||
<div id="agentViz"></div>
|
<div id="agentViz" data-ptt="1"></div>
|
||||||
<span class="label"></span>
|
<span class="label"></span>
|
||||||
</div>
|
</div>
|
||||||
<div id="voiceStatus"></div>
|
<div id="voiceStatus"></div>
|
||||||
|
<div id="toast-container"></div>
|
||||||
<audio id="remoteAudio" autoplay playsinline hidden></audio>
|
<audio id="remoteAudio" autoplay playsinline hidden></audio>
|
||||||
|
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||||
<script src="/static/three.min.js"></script>
|
<script src="/static/three.min.js"></script>
|
||||||
<script>
|
<script>
|
||||||
const logEl = document.getElementById("log-inner");
|
const logEl = document.getElementById("log-inner");
|
||||||
|
|
@ -190,6 +405,125 @@
|
||||||
const agentVizEl = document.getElementById("agentViz");
|
const agentVizEl = document.getElementById("agentViz");
|
||||||
const agentLabel = agentIndicator.querySelector(".label");
|
const agentLabel = agentIndicator.querySelector(".label");
|
||||||
const resetSessionBtn = document.getElementById("resetSessionBtn");
|
const resetSessionBtn = document.getElementById("resetSessionBtn");
|
||||||
|
const toastContainer = document.getElementById("toast-container");
|
||||||
|
|
||||||
|
// --- Toast notifications ---
|
||||||
|
const showToast = (kind, content, title, durationMs) => {
|
||||||
|
const toast = document.createElement("div");
|
||||||
|
toast.className = "toast";
|
||||||
|
|
||||||
|
// Header row (title + close button)
|
||||||
|
const header = document.createElement("div");
|
||||||
|
header.className = "toast-header";
|
||||||
|
|
||||||
|
if (title) {
|
||||||
|
const titleEl = document.createElement("span");
|
||||||
|
titleEl.className = "toast-title";
|
||||||
|
titleEl.textContent = title;
|
||||||
|
header.appendChild(titleEl);
|
||||||
|
}
|
||||||
|
|
||||||
|
const closeBtn = document.createElement("button");
|
||||||
|
closeBtn.className = "toast-close";
|
||||||
|
closeBtn.setAttribute("type", "button");
|
||||||
|
closeBtn.setAttribute("aria-label", "Dismiss");
|
||||||
|
closeBtn.textContent = "×";
|
||||||
|
header.appendChild(closeBtn);
|
||||||
|
|
||||||
|
toast.appendChild(header);
|
||||||
|
|
||||||
|
// Body
|
||||||
|
if (kind === "image") {
|
||||||
|
const img = document.createElement("img");
|
||||||
|
img.className = "toast-image";
|
||||||
|
img.src = content;
|
||||||
|
img.alt = title || "image";
|
||||||
|
toast.appendChild(img);
|
||||||
|
} else {
|
||||||
|
const body = document.createElement("div");
|
||||||
|
body.className = "toast-body";
|
||||||
|
// If content looks like HTML, inject directly; otherwise render as markdown.
|
||||||
|
const looksLikeHtml = /^\s*<[a-zA-Z]/.test(content);
|
||||||
|
if (looksLikeHtml) {
|
||||||
|
body.innerHTML = content;
|
||||||
|
} else if (typeof marked !== "undefined") {
|
||||||
|
body.innerHTML = marked.parse(content);
|
||||||
|
} else {
|
||||||
|
body.textContent = content;
|
||||||
|
}
|
||||||
|
toast.appendChild(body);
|
||||||
|
}
|
||||||
|
|
||||||
|
// dismiss must be declared before close button references it
|
||||||
|
const dismiss = () => {
|
||||||
|
toast.classList.add("dismissing");
|
||||||
|
const fallback = setTimeout(() => toast.remove(), 400);
|
||||||
|
toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
|
||||||
|
};
|
||||||
|
|
||||||
|
closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
|
||||||
|
toastContainer.prepend(toast);
|
||||||
|
toastContainer.scrollTop = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --- Choice toasts (ask_user tool) ---
|
||||||
|
const showChoice = (requestId, question, choices, title) => {
|
||||||
|
const toast = document.createElement("div");
|
||||||
|
toast.className = "toast";
|
||||||
|
|
||||||
|
// Header
|
||||||
|
const header = document.createElement("div");
|
||||||
|
header.className = "toast-header";
|
||||||
|
if (title) {
|
||||||
|
const titleEl = document.createElement("span");
|
||||||
|
titleEl.className = "toast-title";
|
||||||
|
titleEl.textContent = title;
|
||||||
|
header.appendChild(titleEl);
|
||||||
|
}
|
||||||
|
const closeBtn = document.createElement("button");
|
||||||
|
closeBtn.className = "toast-close";
|
||||||
|
closeBtn.setAttribute("type", "button");
|
||||||
|
closeBtn.setAttribute("aria-label", "Dismiss");
|
||||||
|
closeBtn.textContent = "×";
|
||||||
|
header.appendChild(closeBtn);
|
||||||
|
toast.appendChild(header);
|
||||||
|
|
||||||
|
// Question body
|
||||||
|
const body = document.createElement("div");
|
||||||
|
body.className = "toast-body";
|
||||||
|
body.textContent = question;
|
||||||
|
toast.appendChild(body);
|
||||||
|
|
||||||
|
// Choice buttons
|
||||||
|
const choicesEl = document.createElement("div");
|
||||||
|
choicesEl.className = "toast-choices";
|
||||||
|
|
||||||
|
const dismiss = () => {
|
||||||
|
toast.classList.add("dismissing");
|
||||||
|
const fallback = setTimeout(() => toast.remove(), 400);
|
||||||
|
toast.addEventListener("animationend", () => { clearTimeout(fallback); toast.remove(); }, { once: true });
|
||||||
|
};
|
||||||
|
|
||||||
|
choices.forEach((label) => {
|
||||||
|
const btn = document.createElement("button");
|
||||||
|
btn.className = "toast-choice-btn";
|
||||||
|
btn.setAttribute("type", "button");
|
||||||
|
btn.textContent = label;
|
||||||
|
btn.addEventListener("click", (e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
// Disable all buttons to prevent double-send
|
||||||
|
choicesEl.querySelectorAll(".toast-choice-btn").forEach((b) => { b.disabled = true; });
|
||||||
|
sendJson({ type: "ui-response", request_id: requestId, value: label });
|
||||||
|
dismiss();
|
||||||
|
});
|
||||||
|
choicesEl.appendChild(btn);
|
||||||
|
});
|
||||||
|
toast.appendChild(choicesEl);
|
||||||
|
|
||||||
|
closeBtn.addEventListener("click", (e) => { e.stopPropagation(); dismiss(); });
|
||||||
|
toastContainer.prepend(toast);
|
||||||
|
toastContainer.scrollTop = 0;
|
||||||
|
};
|
||||||
|
|
||||||
// --- Agent state indicator ---
|
// --- Agent state indicator ---
|
||||||
const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
|
const STATES = { idle: "idle", listening: "listening", thinking: "thinking", speaking: "speaking" };
|
||||||
|
|
@ -248,7 +582,8 @@
|
||||||
powerPreference: "high-performance",
|
powerPreference: "high-performance",
|
||||||
});
|
});
|
||||||
renderer.setPixelRatio(1);
|
renderer.setPixelRatio(1);
|
||||||
renderer.setClearColor(0xa09b96, 1);
|
renderer.setClearColor(0xe8e4e0, 1);
|
||||||
|
renderer.domElement.dataset.ptt = "1";
|
||||||
agentVizEl.innerHTML = "";
|
agentVizEl.innerHTML = "";
|
||||||
agentVizEl.appendChild(renderer.domElement);
|
agentVizEl.appendChild(renderer.domElement);
|
||||||
|
|
||||||
|
|
@ -358,12 +693,12 @@
|
||||||
let deformScale = 1.0;
|
let deformScale = 1.0;
|
||||||
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
|
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
|
||||||
let spinSpeed = 0.0;
|
let spinSpeed = 0.0;
|
||||||
// Card background colour lerp: 0 = idle coral, 1 = dark listening
|
// Card background colour lerp: 0 = idle coral, 1 = dark coral (PTT/listening)
|
||||||
let cardColorT = 0.0;
|
let cardColorT = 0.0;
|
||||||
let connectedT = 0.0; // 0 = gray (disconnected), 1 = coral (connected)
|
let connectedT = 0.0; // 0 = gray (disconnected), 1 = coral (connected)
|
||||||
const CARD_GRAY_RGB = [160, 155, 150]; // disconnected gray
|
const CARD_GRAY_RGB = [232, 228, 224]; // #e8e4e0 — disconnected light warm gray
|
||||||
const CARD_IDLE_RGB = [212, 85, 63]; // #d4553f
|
const CARD_IDLE_RGB = [212, 85, 63]; // #d4553f — connected idle coral
|
||||||
const CARD_LISTEN_RGB = [120, 40, 28]; // dark desaturated coral
|
const CARD_LISTEN_RGB = [120, 40, 28]; // #782c1c — PTT active dark coral
|
||||||
|
|
||||||
const setStateColor = (_state) => { /* no-op: MeshBasicMaterial, colour is fixed */ };
|
const setStateColor = (_state) => { /* no-op: MeshBasicMaterial, colour is fixed */ };
|
||||||
|
|
||||||
|
|
@ -696,7 +1031,6 @@
|
||||||
|
|
||||||
const setPushToTalkState = (pressed, notifyServer = true) => {
|
const setPushToTalkState = (pressed, notifyServer = true) => {
|
||||||
pttPressed = pressed;
|
pttPressed = pressed;
|
||||||
document.body.classList.toggle("ptt-active", pressed);
|
|
||||||
setMicCaptureEnabled(pressed);
|
setMicCaptureEnabled(pressed);
|
||||||
if (notifyServer && ws.readyState === WebSocket.OPEN) {
|
if (notifyServer && ws.readyState === WebSocket.OPEN) {
|
||||||
ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
|
ws.send(JSON.stringify({ type: "voice-ptt", pressed }));
|
||||||
|
|
@ -947,26 +1281,27 @@
|
||||||
if (!appStarted) {
|
if (!appStarted) {
|
||||||
await bootstrap();
|
await bootstrap();
|
||||||
}
|
}
|
||||||
if (sendUserMessage("/reset")) {
|
if (ws.readyState === WebSocket.OPEN) {
|
||||||
showStatus("Reset command sent.", 1500);
|
sendJson({ type: "command", command: "reset" });
|
||||||
|
showStatus("Session reset.", 1500);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Whole-screen PTT pointer handling ---
|
// --- Center-card PTT pointer handling ---
|
||||||
|
// Only touches that land on #agentIndicator / #agentViz (data-ptt="1") trigger PTT.
|
||||||
// We track active pointer IDs so multi-touch doesn't double-fire.
|
// We track active pointer IDs so multi-touch doesn't double-fire.
|
||||||
const activePointers = new Set();
|
const activePointers = new Set();
|
||||||
|
|
||||||
document.addEventListener("pointerdown", async (event) => {
|
document.addEventListener("pointerdown", async (event) => {
|
||||||
if (event.target instanceof Element && event.target.closest("[data-no-ptt='1']")) {
|
if (!(event.target instanceof Element) || !event.target.closest("[data-ptt='1']")) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
activePointers.add(event.pointerId);
|
||||||
if (!appStarted) {
|
if (!appStarted) {
|
||||||
await bootstrap();
|
await bootstrap();
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
ensureVisualizerAudioMeter();
|
ensureVisualizerAudioMeter();
|
||||||
activePointers.add(event.pointerId);
|
|
||||||
if (activePointers.size === 1) beginPushToTalk();
|
if (activePointers.size === 1) beginPushToTalk();
|
||||||
}, { passive: false });
|
}, { passive: false });
|
||||||
|
|
||||||
|
|
@ -1020,6 +1355,30 @@
|
||||||
if (agentState !== STATES.listening && STATES[newState]) {
|
if (agentState !== STATES.listening && STATES[newState]) {
|
||||||
setAgentState(newState);
|
setAgentState(newState);
|
||||||
}
|
}
|
||||||
|
} else if (msg.role === "toast") {
|
||||||
|
try {
|
||||||
|
const t = JSON.parse(msg.text || "{}");
|
||||||
|
showToast(
|
||||||
|
t.kind || "text",
|
||||||
|
t.content || "",
|
||||||
|
t.title || "",
|
||||||
|
typeof t.duration_ms === "number" ? t.duration_ms : 6000,
|
||||||
|
);
|
||||||
|
} catch (_) {
|
||||||
|
showToast("text", msg.text || "", "", 6000);
|
||||||
|
}
|
||||||
|
} else if (msg.role === "choice") {
|
||||||
|
try {
|
||||||
|
const c = JSON.parse(msg.text || "{}");
|
||||||
|
showChoice(
|
||||||
|
c.request_id || "",
|
||||||
|
c.question || "",
|
||||||
|
Array.isArray(c.choices) ? c.choices : [],
|
||||||
|
c.title || "",
|
||||||
|
);
|
||||||
|
} catch (_) {
|
||||||
|
// Malformed choice payload — ignore.
|
||||||
|
}
|
||||||
} else if (msg.role === "wisper") {
|
} else if (msg.role === "wisper") {
|
||||||
// suppress wisper debug output
|
// suppress wisper debug output
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -1,442 +1,270 @@
|
||||||
|
"""SuperTonic Gateway — nanobot integration for the web UI.
|
||||||
|
|
||||||
|
Connects to the already-running nanobot process via a Unix domain socket.
|
||||||
|
nanobot must be started separately (e.g. ``nanobot gateway``) with the API
|
||||||
|
channel enabled in its config.
|
||||||
|
|
||||||
|
Wire protocol (newline-delimited JSON)
|
||||||
|
---------------------------------------
|
||||||
|
Client → nanobot::
|
||||||
|
|
||||||
|
{"type": "message", "content": "hello", "chat_id": "web"}
|
||||||
|
{"type": "ping"}
|
||||||
|
{"type": "ui-response", "request_id": "<uuid>", "value": "Option A", "chat_id": "web"}
|
||||||
|
{"type": "command", "command": "reset", "chat_id": "web"}
|
||||||
|
|
||||||
|
nanobot → client::
|
||||||
|
|
||||||
|
{"type": "message", "content": "Hi!", "chat_id": "web", "is_progress": false}
|
||||||
|
{"type": "agent_state", "state": "thinking", "chat_id": "web"}
|
||||||
|
{"type": "toast", "kind": "text"|"image", "content": "...", "title": "...", "duration_ms": 5000}
|
||||||
|
{"type": "choice", "request_id": "<uuid>", "question": "...", "choices": ["A", "B"],
|
||||||
|
"title": "...", "chat_id": "web"}
|
||||||
|
{"type": "pong"}
|
||||||
|
{"type": "error", "error": "..."}
|
||||||
|
|
||||||
|
The public ``SuperTonicGateway`` interface (``spawn_tui``, ``send_user_message``,
|
||||||
|
``stop_tui``, ``shutdown``) is unchanged so ``app.py`` and ``voice_rtc.py``
|
||||||
|
require no modification.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import contextlib
|
import json
|
||||||
import os
|
import os
|
||||||
import pty
|
|
||||||
import re
|
|
||||||
import shlex
|
|
||||||
import signal
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
from collections import deque
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from wisper import WisperBus, WisperEvent
|
from wisper import WisperBus, WisperEvent
|
||||||
|
|
||||||
|
# Default path — must match nanobot's channels.api.socket_path config value.
|
||||||
ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
|
DEFAULT_SOCKET_PATH = Path.home() / ".nanobot" / "api.sock"
|
||||||
CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b-\x1f\x7f]")
|
|
||||||
BRAILLE_SPINNER_RE = re.compile(r"[\u2800-\u28ff]")
|
|
||||||
SPINNER_ONLY_RE = re.compile(r"^[\s|/\\\-]+$")
|
|
||||||
BOX_DRAWING_ONLY_RE = re.compile(r"^[\s\u2500-\u257f]+$")
|
|
||||||
THINKING_LINE_RE = re.compile(
|
|
||||||
r"\b(?:agent|nanobot|napbot)\b(?:\s+is)?\s+thinking\b",
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
USER_ECHO_LINE_RE = re.compile(r"^(?:you|user)\s*:", re.IGNORECASE)
|
|
||||||
TOOL_STREAM_LINE_RE = re.compile(
|
|
||||||
r"^(?:tool(?:\s+call|\s+output)?|calling\s+tool|running\s+tool|executing\s+tool)\b",
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
LEADING_NON_WORD_RE = re.compile(r"^[^\w]+")
|
|
||||||
WHITESPACE_RE = re.compile(r"\s+")
|
|
||||||
AGENT_OUTPUT_PREFIX_RE = re.compile(
|
|
||||||
r"^(?:nanobot|napbot)\b\s*[:>\-]?\s*", re.IGNORECASE
|
|
||||||
)
|
|
||||||
EMOJI_RE = re.compile(
|
|
||||||
"[" # Common emoji and pictograph blocks.
|
|
||||||
"\U0001f1e6-\U0001f1ff"
|
|
||||||
"\U0001f300-\U0001f5ff"
|
|
||||||
"\U0001f600-\U0001f64f"
|
|
||||||
"\U0001f680-\U0001f6ff"
|
|
||||||
"\U0001f700-\U0001f77f"
|
|
||||||
"\U0001f780-\U0001f7ff"
|
|
||||||
"\U0001f800-\U0001f8ff"
|
|
||||||
"\U0001f900-\U0001f9ff"
|
|
||||||
"\U0001fa00-\U0001faff"
|
|
||||||
"\u2600-\u26ff"
|
|
||||||
"\u2700-\u27bf"
|
|
||||||
"\ufe0f"
|
|
||||||
"\u200d"
|
|
||||||
"]"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _clean_output(text: str) -> str:
|
# ---------------------------------------------------------------------------
|
||||||
cleaned = ANSI_ESCAPE_RE.sub("", text)
|
# NanobotApiProcess — connects to the running nanobot via its Unix socket
|
||||||
cleaned = BRAILLE_SPINNER_RE.sub(" ", cleaned)
|
# ---------------------------------------------------------------------------
|
||||||
cleaned = CONTROL_CHAR_RE.sub("", cleaned)
|
|
||||||
return cleaned.replace("\r", "\n")
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_nanobot_command_and_workdir() -> tuple[str, Path]:
|
class NanobotApiProcess:
|
||||||
command_override = os.getenv("NANOBOT_COMMAND")
|
"""Connects to the running nanobot process via its Unix domain socket.
|
||||||
workdir_override = os.getenv("NANOBOT_WORKDIR")
|
|
||||||
|
|
||||||
if workdir_override:
|
Lifecycle
|
||||||
default_workdir = Path(workdir_override).expanduser()
|
---------
|
||||||
else:
|
``start()`` — opens a connection to nanobot's API socket.
|
||||||
default_workdir = Path.home()
|
``send()`` — writes a user message over the socket.
|
||||||
|
``stop()`` — closes the connection.
|
||||||
|
"""
|
||||||
|
|
||||||
if command_override:
|
def __init__(self, bus: WisperBus, socket_path: Path) -> None:
|
||||||
return command_override, default_workdir
|
|
||||||
|
|
||||||
nanobot_dir = Path.home() / "nanobot"
|
|
||||||
nanobot_python_candidates = [
|
|
||||||
nanobot_dir / ".venv" / "bin" / "python",
|
|
||||||
nanobot_dir / "venv" / "bin" / "python",
|
|
||||||
]
|
|
||||||
for nanobot_venv_python in nanobot_python_candidates:
|
|
||||||
if nanobot_venv_python.exists():
|
|
||||||
if not workdir_override:
|
|
||||||
default_workdir = nanobot_dir
|
|
||||||
return (
|
|
||||||
f"{nanobot_venv_python} -m nanobot agent --no-markdown",
|
|
||||||
default_workdir,
|
|
||||||
)
|
|
||||||
|
|
||||||
return "nanobot agent --no-markdown", default_workdir
|
|
||||||
|
|
||||||
|
|
||||||
def _infer_venv_root(command_parts: list[str], workdir: Path) -> Path | None:
|
|
||||||
if not command_parts:
|
|
||||||
return None
|
|
||||||
|
|
||||||
binary = Path(command_parts[0]).expanduser()
|
|
||||||
if (
|
|
||||||
binary.is_absolute()
|
|
||||||
and binary.name.startswith("python")
|
|
||||||
and binary.parent.name == "bin"
|
|
||||||
):
|
|
||||||
return binary.parent.parent
|
|
||||||
|
|
||||||
for candidate in (workdir / ".venv", workdir / "venv"):
|
|
||||||
if (candidate / "bin" / "python").exists():
|
|
||||||
return candidate
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _build_process_env(
|
|
||||||
command_parts: list[str], workdir: Path
|
|
||||||
) -> tuple[dict[str, str], Path | None]:
|
|
||||||
env = os.environ.copy()
|
|
||||||
env.pop("PYTHONHOME", None)
|
|
||||||
|
|
||||||
venv_root = _infer_venv_root(command_parts, workdir)
|
|
||||||
if not venv_root:
|
|
||||||
return env, None
|
|
||||||
|
|
||||||
venv_bin = str(venv_root / "bin")
|
|
||||||
path_entries = [entry for entry in env.get("PATH", "").split(os.pathsep) if entry]
|
|
||||||
path_entries = [entry for entry in path_entries if entry != venv_bin]
|
|
||||||
path_entries.insert(0, venv_bin)
|
|
||||||
env["PATH"] = os.pathsep.join(path_entries)
|
|
||||||
env["VIRTUAL_ENV"] = str(venv_root)
|
|
||||||
return env, venv_root
|
|
||||||
|
|
||||||
|
|
||||||
class NanobotTUIProcess:
|
|
||||||
def __init__(self, bus: WisperBus, command: str, workdir: Path) -> None:
|
|
||||||
self._bus = bus
|
self._bus = bus
|
||||||
self._command = command
|
self._socket_path = socket_path
|
||||||
self._workdir = workdir
|
self._reader: asyncio.StreamReader | None = None
|
||||||
self._process: subprocess.Popen[bytes] | None = None
|
self._writer: asyncio.StreamWriter | None = None
|
||||||
self._master_fd: int | None = None
|
self._read_task: asyncio.Task | None = None
|
||||||
self._read_task: asyncio.Task[None] | None = None
|
|
||||||
self._pending_output = ""
|
|
||||||
self._suppress_noisy_ui = os.getenv(
|
|
||||||
"NANOBOT_SUPPRESS_NOISY_UI", "1"
|
|
||||||
).strip() not in {
|
|
||||||
"0",
|
|
||||||
"false",
|
|
||||||
"False",
|
|
||||||
"no",
|
|
||||||
"off",
|
|
||||||
}
|
|
||||||
self._dedup_window_s = max(
|
|
||||||
0.2, float(os.getenv("NANOBOT_OUTPUT_DEDUP_WINDOW_S", "1.5"))
|
|
||||||
)
|
|
||||||
self._recent_lines: deque[tuple[str, float]] = deque()
|
|
||||||
self._last_tts_line = ""
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def running(self) -> bool:
|
def running(self) -> bool:
|
||||||
return self._process is not None and self._process.poll() is None
|
return (
|
||||||
|
self._writer is not None
|
||||||
|
and not self._writer.is_closing()
|
||||||
|
and self._read_task is not None
|
||||||
|
and not self._read_task.done()
|
||||||
|
)
|
||||||
|
|
||||||
async def start(self) -> None:
|
async def start(self) -> None:
|
||||||
if self.running:
|
if self.running:
|
||||||
await self._bus.publish(
|
await self._bus.publish(
|
||||||
WisperEvent(role="system", text="Nanobot TUI is already running.")
|
WisperEvent(role="system", text="Already connected to nanobot.")
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
command_parts = [
|
if not self._socket_path.exists():
|
||||||
os.path.expandvars(os.path.expanduser(part))
|
|
||||||
for part in shlex.split(self._command)
|
|
||||||
]
|
|
||||||
if not command_parts:
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(role="system", text="NANOBOT_COMMAND is empty.")
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
if not self._workdir.exists():
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(
|
|
||||||
role="system",
|
|
||||||
text=f"NANOBOT_WORKDIR does not exist: {self._workdir}",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
master_fd, slave_fd = pty.openpty()
|
|
||||||
child_env, child_venv_root = _build_process_env(
|
|
||||||
command_parts=command_parts, workdir=self._workdir
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
self._process = subprocess.Popen(
|
|
||||||
command_parts,
|
|
||||||
stdin=slave_fd,
|
|
||||||
stdout=slave_fd,
|
|
||||||
stderr=slave_fd,
|
|
||||||
cwd=str(self._workdir),
|
|
||||||
start_new_session=True,
|
|
||||||
env=child_env,
|
|
||||||
)
|
|
||||||
except FileNotFoundError as exc:
|
|
||||||
os.close(master_fd)
|
|
||||||
os.close(slave_fd)
|
|
||||||
await self._bus.publish(
|
await self._bus.publish(
|
||||||
WisperEvent(
|
WisperEvent(
|
||||||
role="system",
|
role="system",
|
||||||
text=(
|
text=(
|
||||||
"Could not start Nanobot process "
|
f"Nanobot API socket not found at {self._socket_path}. "
|
||||||
f"(command='{command_parts[0]}', workdir='{self._workdir}'): {exc}. "
|
"Make sure nanobot is running with the API channel enabled "
|
||||||
"Check NANOBOT_COMMAND and NANOBOT_WORKDIR."
|
"(set channels.api.enabled = true in ~/.nanobot/config.json, "
|
||||||
|
"then run: nanobot gateway)."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
except Exception as exc:
|
|
||||||
os.close(master_fd)
|
|
||||||
os.close(slave_fd)
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(role="system", text=f"Failed to spawn TUI process: {exc}")
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
os.close(slave_fd)
|
|
||||||
os.set_blocking(master_fd, False)
|
|
||||||
self._master_fd = master_fd
|
|
||||||
self._read_task = asyncio.create_task(
|
|
||||||
self._read_output(), name="nanobot-tui-reader"
|
|
||||||
)
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(
|
|
||||||
role="system",
|
|
||||||
text=f"Spawned Nanobot TUI with command: {' '.join(command_parts)}",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if child_venv_root:
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(
|
|
||||||
role="system",
|
|
||||||
text=f"Nanobot runtime venv: {child_venv_root}",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def send(self, text: str) -> None:
|
|
||||||
if not self.running or self._master_fd is None:
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(
|
|
||||||
role="system", text="Nanobot TUI is not running. Click spawn first."
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return
|
|
||||||
message = text.rstrip("\n") + "\n"
|
|
||||||
try:
|
try:
|
||||||
os.write(self._master_fd, message.encode())
|
self._reader, self._writer = await asyncio.open_unix_connection(
|
||||||
|
path=str(self._socket_path)
|
||||||
|
)
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
await self._bus.publish(
|
await self._bus.publish(
|
||||||
WisperEvent(role="system", text=f"Failed to write to TUI: {exc}")
|
WisperEvent(
|
||||||
|
role="system",
|
||||||
|
text=f"Could not connect to nanobot API socket: {exc}",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def stop(self) -> None:
|
|
||||||
if self._read_task:
|
|
||||||
self._read_task.cancel()
|
|
||||||
with contextlib.suppress(asyncio.CancelledError):
|
|
||||||
await self._read_task
|
|
||||||
self._read_task = None
|
|
||||||
|
|
||||||
if self.running and self._process:
|
|
||||||
try:
|
|
||||||
os.killpg(self._process.pid, signal.SIGTERM)
|
|
||||||
except ProcessLookupError:
|
|
||||||
pass
|
|
||||||
except Exception:
|
|
||||||
self._process.terminate()
|
|
||||||
try:
|
|
||||||
self._process.wait(timeout=3)
|
|
||||||
except Exception:
|
|
||||||
self._process.kill()
|
|
||||||
self._process.wait(timeout=1)
|
|
||||||
|
|
||||||
if self._master_fd is not None:
|
|
||||||
try:
|
|
||||||
os.close(self._master_fd)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
self._master_fd = None
|
|
||||||
self._process = None
|
|
||||||
self._pending_output = ""
|
|
||||||
self._recent_lines.clear()
|
|
||||||
self._last_tts_line = ""
|
|
||||||
await self._bus.publish(WisperEvent(role="system", text="Stopped Nanobot TUI."))
|
|
||||||
|
|
||||||
async def _read_output(self) -> None:
|
|
||||||
if self._master_fd is None:
|
|
||||||
return
|
return
|
||||||
while self.running:
|
|
||||||
if not await self._wait_for_fd_readable():
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
chunk = os.read(self._master_fd, 4096)
|
|
||||||
except BlockingIOError:
|
|
||||||
continue
|
|
||||||
except OSError:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not chunk:
|
self._read_task = asyncio.create_task(self._read_loop(), name="nanobot-api-reader")
|
||||||
if not self.running:
|
await self._bus.publish(WisperEvent(role="system", text="Connected to nanobot."))
|
||||||
break
|
|
||||||
await asyncio.sleep(0.01)
|
|
||||||
continue
|
|
||||||
|
|
||||||
text = _clean_output(chunk.decode(errors="ignore"))
|
async def send(self, text: str) -> None:
|
||||||
if not text.strip():
|
if not self.running or self._writer is None:
|
||||||
continue
|
|
||||||
|
|
||||||
displayable, tts_publishable, saw_thinking = self._consume_output_chunk(
|
|
||||||
text
|
|
||||||
)
|
|
||||||
if saw_thinking:
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(role="agent-state", text="thinking")
|
|
||||||
)
|
|
||||||
if displayable:
|
|
||||||
await self._bus.publish(WisperEvent(role="nanobot", text=displayable))
|
|
||||||
if tts_publishable:
|
|
||||||
await self._bus.publish(
|
|
||||||
WisperEvent(role="nanobot-tts", text=tts_publishable)
|
|
||||||
)
|
|
||||||
|
|
||||||
trailing_display, trailing_tts, _ = self._consume_output_chunk("\n")
|
|
||||||
if trailing_display:
|
|
||||||
await self._bus.publish(WisperEvent(role="nanobot", text=trailing_display))
|
|
||||||
if trailing_tts:
|
|
||||||
await self._bus.publish(WisperEvent(role="nanobot-tts", text=trailing_tts))
|
|
||||||
|
|
||||||
if self._process is not None:
|
|
||||||
exit_code = self._process.poll()
|
|
||||||
await self._bus.publish(
|
await self._bus.publish(
|
||||||
WisperEvent(
|
WisperEvent(
|
||||||
role="system", text=f"Nanobot TUI exited (code={exit_code})."
|
role="system",
|
||||||
|
text="Not connected to nanobot. Click spawn first.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
return
|
||||||
def _consume_output_chunk(self, text: str) -> tuple[str, str, bool]:
|
payload = json.dumps({"type": "message", "content": text, "chat_id": "web"}) + "\n"
|
||||||
"""Return (displayable, tts_publishable, saw_thinking)."""
|
|
||||||
self._pending_output += text
|
|
||||||
|
|
||||||
lines = self._pending_output.split("\n")
|
|
||||||
self._pending_output = lines.pop()
|
|
||||||
|
|
||||||
if len(self._pending_output) > 1024:
|
|
||||||
lines.append(self._pending_output)
|
|
||||||
self._pending_output = ""
|
|
||||||
|
|
||||||
kept_lines: list[str] = []
|
|
||||||
tts_lines: list[str] = []
|
|
||||||
saw_thinking = False
|
|
||||||
for line in lines:
|
|
||||||
normalized = self._normalize_line(line)
|
|
||||||
if not normalized:
|
|
||||||
continue
|
|
||||||
if self._suppress_noisy_ui and self._is_noisy_ui_line(normalized):
|
|
||||||
# Detect thinking lines even though they are filtered from display.
|
|
||||||
candidate = LEADING_NON_WORD_RE.sub("", normalized)
|
|
||||||
if THINKING_LINE_RE.search(candidate):
|
|
||||||
saw_thinking = True
|
|
||||||
continue
|
|
||||||
if normalized != self._last_tts_line:
|
|
||||||
tts_lines.append(normalized)
|
|
||||||
self._last_tts_line = normalized
|
|
||||||
if self._is_recent_duplicate(normalized):
|
|
||||||
continue
|
|
||||||
kept_lines.append(normalized)
|
|
||||||
|
|
||||||
return "\n".join(kept_lines).strip(), "\n".join(tts_lines).strip(), saw_thinking
|
|
||||||
|
|
||||||
def _normalize_line(self, line: str) -> str:
|
|
||||||
without_emoji = EMOJI_RE.sub(" ", line)
|
|
||||||
normalized = WHITESPACE_RE.sub(" ", without_emoji).strip()
|
|
||||||
# Strip leading "nanobot:" prefix that the TUI echoes in its own output,
|
|
||||||
# since the frontend already labels lines with the role name and TTS
|
|
||||||
# should not read the agent's own name aloud.
|
|
||||||
normalized = AGENT_OUTPUT_PREFIX_RE.sub("", normalized)
|
|
||||||
return normalized
|
|
||||||
|
|
||||||
def _is_noisy_ui_line(self, line: str) -> bool:
|
|
||||||
if SPINNER_ONLY_RE.fullmatch(line):
|
|
||||||
return True
|
|
||||||
if BOX_DRAWING_ONLY_RE.fullmatch(line):
|
|
||||||
return True
|
|
||||||
|
|
||||||
candidate = LEADING_NON_WORD_RE.sub("", line)
|
|
||||||
if THINKING_LINE_RE.search(candidate):
|
|
||||||
return True
|
|
||||||
if TOOL_STREAM_LINE_RE.match(candidate):
|
|
||||||
return True
|
|
||||||
if USER_ECHO_LINE_RE.match(candidate):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _wait_for_fd_readable(self) -> bool:
|
|
||||||
if self._master_fd is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
loop = asyncio.get_running_loop()
|
|
||||||
ready: asyncio.Future[None] = loop.create_future()
|
|
||||||
|
|
||||||
def _mark_ready() -> None:
|
|
||||||
if not ready.done():
|
|
||||||
ready.set_result(None)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loop.add_reader(self._master_fd, _mark_ready)
|
self._writer.write(payload.encode())
|
||||||
except (AttributeError, NotImplementedError, OSError, ValueError):
|
await self._writer.drain()
|
||||||
await asyncio.sleep(0.01)
|
except OSError as exc:
|
||||||
return True
|
await self._bus.publish(WisperEvent(role="system", text=f"Send failed: {exc}"))
|
||||||
|
await self._cleanup()
|
||||||
|
|
||||||
|
async def send_ui_response(self, request_id: str, value: str) -> None:
|
||||||
|
"""Forward a ui-response (choice selection) back to nanobot."""
|
||||||
|
if not self.running or self._writer is None:
|
||||||
|
return
|
||||||
|
payload = (
|
||||||
|
json.dumps(
|
||||||
|
{"type": "ui-response", "request_id": request_id, "value": value, "chat_id": "web"}
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
await ready
|
self._writer.write(payload.encode())
|
||||||
return True
|
await self._writer.drain()
|
||||||
|
except OSError as exc:
|
||||||
|
await self._bus.publish(WisperEvent(role="system", text=f"Send failed: {exc}"))
|
||||||
|
await self._cleanup()
|
||||||
|
|
||||||
|
async def send_command(self, command: str) -> None:
|
||||||
|
"""Send a command (e.g. 'reset') to nanobot."""
|
||||||
|
if not self.running or self._writer is None:
|
||||||
|
await self._bus.publish(
|
||||||
|
WisperEvent(
|
||||||
|
role="system",
|
||||||
|
text="Not connected to nanobot. Click spawn first.",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
payload = json.dumps({"type": "command", "command": command, "chat_id": "web"}) + "\n"
|
||||||
|
try:
|
||||||
|
self._writer.write(payload.encode())
|
||||||
|
await self._writer.drain()
|
||||||
|
except OSError as exc:
|
||||||
|
await self._bus.publish(WisperEvent(role="system", text=f"Send failed: {exc}"))
|
||||||
|
await self._cleanup()
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
await self._cleanup()
|
||||||
|
await self._bus.publish(WisperEvent(role="system", text="Disconnected from nanobot."))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _cleanup(self) -> None:
|
||||||
|
if self._read_task and not self._read_task.done():
|
||||||
|
self._read_task.cancel()
|
||||||
|
try:
|
||||||
|
await self._read_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
self._read_task = None
|
||||||
|
|
||||||
|
if self._writer:
|
||||||
|
try:
|
||||||
|
self._writer.close()
|
||||||
|
await self._writer.wait_closed()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
self._writer = None
|
||||||
|
self._reader = None
|
||||||
|
|
||||||
|
async def _read_loop(self) -> None:
|
||||||
|
"""Read newline-delimited JSON from nanobot and publish WisperEvents."""
|
||||||
|
assert self._reader is not None
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
line = await self._reader.readline()
|
||||||
|
except OSError:
|
||||||
|
break
|
||||||
|
if not line:
|
||||||
|
break # EOF — nanobot closed the connection
|
||||||
|
await self._handle_line(line)
|
||||||
finally:
|
finally:
|
||||||
with contextlib.suppress(Exception):
|
await self._bus.publish(
|
||||||
loop.remove_reader(self._master_fd)
|
WisperEvent(role="system", text="Nanobot closed the connection.")
|
||||||
|
)
|
||||||
|
# Clear writer so running → False
|
||||||
|
self._writer = None
|
||||||
|
self._reader = None
|
||||||
|
|
||||||
def _is_recent_duplicate(self, line: str) -> bool:
|
async def _handle_line(self, line: bytes) -> None:
|
||||||
now = time.monotonic()
|
raw = line.decode(errors="replace").strip()
|
||||||
normalized = line.lower()
|
if not raw:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
obj = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
await self._bus.publish(
|
||||||
|
WisperEvent(role="system", text=f"Malformed response from nanobot: {raw[:200]}")
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
while (
|
msg_type = str(obj.get("type", ""))
|
||||||
self._recent_lines
|
|
||||||
and (now - self._recent_lines[0][1]) > self._dedup_window_s
|
|
||||||
):
|
|
||||||
self._recent_lines.popleft()
|
|
||||||
|
|
||||||
for previous, _timestamp in self._recent_lines:
|
if msg_type == "message":
|
||||||
if previous == normalized:
|
content = str(obj.get("content", ""))
|
||||||
return True
|
is_progress = bool(obj.get("is_progress", False))
|
||||||
|
if is_progress:
|
||||||
|
# Intermediate tool-call hint — show in UI, skip TTS
|
||||||
|
await self._bus.publish(WisperEvent(role="nanobot-progress", text=content))
|
||||||
|
else:
|
||||||
|
# Final answer — display + TTS
|
||||||
|
await self._bus.publish(WisperEvent(role="nanobot", text=content))
|
||||||
|
await self._bus.publish(WisperEvent(role="nanobot-tts", text=content))
|
||||||
|
|
||||||
self._recent_lines.append((normalized, now))
|
elif msg_type == "agent_state":
|
||||||
return False
|
state = str(obj.get("state", ""))
|
||||||
|
await self._bus.publish(WisperEvent(role="agent-state", text=state))
|
||||||
|
|
||||||
|
elif msg_type == "toast":
|
||||||
|
# Forward the full toast payload as JSON so the frontend can render it.
|
||||||
|
await self._bus.publish(WisperEvent(role="toast", text=json.dumps(obj)))
|
||||||
|
|
||||||
|
elif msg_type == "choice":
|
||||||
|
# Forward the full choice payload as JSON so the frontend can render it.
|
||||||
|
await self._bus.publish(WisperEvent(role="choice", text=json.dumps(obj)))
|
||||||
|
|
||||||
|
elif msg_type == "pong":
|
||||||
|
pass # keepalive, ignore
|
||||||
|
|
||||||
|
elif msg_type == "error":
|
||||||
|
await self._bus.publish(
|
||||||
|
WisperEvent(role="system", text=f"Nanobot error: {obj.get('error', '')}")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SuperTonicGateway — public interface (unchanged from original)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class SuperTonicGateway:
|
class SuperTonicGateway:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.bus = WisperBus()
|
self.bus = WisperBus()
|
||||||
self._lock = asyncio.Lock()
|
self._lock = asyncio.Lock()
|
||||||
self._tui: NanobotTUIProcess | None = None
|
self._process: NanobotApiProcess | None = None
|
||||||
|
socket_path = Path(os.getenv("NANOBOT_API_SOCKET", str(DEFAULT_SOCKET_PATH))).expanduser()
|
||||||
|
self._socket_path = socket_path
|
||||||
|
|
||||||
async def subscribe(self) -> asyncio.Queue[WisperEvent]:
|
async def subscribe(self) -> asyncio.Queue[WisperEvent]:
|
||||||
return await self.bus.subscribe()
|
return await self.bus.subscribe()
|
||||||
|
|
@ -445,18 +273,15 @@ class SuperTonicGateway:
|
||||||
await self.bus.unsubscribe(queue)
|
await self.bus.unsubscribe(queue)
|
||||||
|
|
||||||
async def spawn_tui(self) -> None:
|
async def spawn_tui(self) -> None:
|
||||||
|
"""Connect to nanobot (name kept for API compatibility with app.py)."""
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
if self._tui and self._tui.running:
|
if self._process and self._process.running:
|
||||||
await self.bus.publish(
|
await self.bus.publish(
|
||||||
WisperEvent(role="system", text="Nanobot TUI is already running.")
|
WisperEvent(role="system", text="Already connected to nanobot.")
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
self._process = NanobotApiProcess(bus=self.bus, socket_path=self._socket_path)
|
||||||
command, workdir = _resolve_nanobot_command_and_workdir()
|
await self._process.start()
|
||||||
self._tui = NanobotTUIProcess(
|
|
||||||
bus=self.bus, command=command, workdir=workdir
|
|
||||||
)
|
|
||||||
await self._tui.start()
|
|
||||||
|
|
||||||
async def send_user_message(self, text: str) -> None:
|
async def send_user_message(self, text: str) -> None:
|
||||||
message = text.strip()
|
message = text.strip()
|
||||||
|
|
@ -464,20 +289,34 @@ class SuperTonicGateway:
|
||||||
return
|
return
|
||||||
await self.bus.publish(WisperEvent(role="user", text=message))
|
await self.bus.publish(WisperEvent(role="user", text=message))
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
if not self._tui:
|
if not self._process:
|
||||||
await self.bus.publish(
|
await self.bus.publish(
|
||||||
WisperEvent(
|
WisperEvent(
|
||||||
role="system",
|
role="system",
|
||||||
text="Nanobot TUI is not running. Click spawn first.",
|
text="Not connected to nanobot. Click spawn first.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
await self._tui.send(message)
|
await self._process.send(message)
|
||||||
|
|
||||||
|
async def send_ui_response(self, request_id: str, value: str) -> None:
|
||||||
|
"""Forward a choice selection back to nanobot."""
|
||||||
|
async with self._lock:
|
||||||
|
if self._process:
|
||||||
|
await self._process.send_ui_response(request_id, value)
|
||||||
|
|
||||||
|
async def send_command(self, command: str) -> None:
|
||||||
|
"""Send a command (e.g. 'reset') to nanobot."""
|
||||||
|
async with self._lock:
|
||||||
|
if self._process:
|
||||||
|
await self._process.send_command(command)
|
||||||
|
|
||||||
async def stop_tui(self) -> None:
|
async def stop_tui(self) -> None:
|
||||||
|
"""Disconnect from nanobot (name kept for API compatibility with app.py)."""
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
if self._tui:
|
if self._process:
|
||||||
await self._tui.stop()
|
await self._process.stop()
|
||||||
|
self._process = None
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
await self.stop_tui()
|
await self.stop_tui()
|
||||||
|
|
|
||||||
211
voice_rtc.py
211
voice_rtc.py
|
|
@ -41,9 +41,7 @@ try:
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
FASTER_WHISPER_AVAILABLE = True
|
FASTER_WHISPER_AVAILABLE = True
|
||||||
except (
|
except Exception: # pragma: no cover - runtime fallback when faster-whisper is unavailable
|
||||||
Exception
|
|
||||||
): # pragma: no cover - runtime fallback when faster-whisper is unavailable
|
|
||||||
WhisperModel = None # type: ignore[assignment]
|
WhisperModel = None # type: ignore[assignment]
|
||||||
FASTER_WHISPER_AVAILABLE = False
|
FASTER_WHISPER_AVAILABLE = False
|
||||||
|
|
||||||
|
|
@ -82,10 +80,7 @@ ANSI_ESCAPE_RE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
||||||
CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f]")
|
CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f]")
|
||||||
BRAILLE_SPINNER_RE = re.compile(r"[\u2800-\u28ff]")
|
BRAILLE_SPINNER_RE = re.compile(r"[\u2800-\u28ff]")
|
||||||
TTS_ALLOWED_ASCII = set(
|
TTS_ALLOWED_ASCII = set(
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?;:'\"()[]{}@#%&*+-_/<>|"
|
||||||
"abcdefghijklmnopqrstuvwxyz"
|
|
||||||
"0123456789"
|
|
||||||
" .,!?;:'\"()[]{}@#%&*+-_/<>|"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -95,9 +90,7 @@ def _sanitize_tts_text(text: str) -> str:
|
||||||
cleaned = cleaned.replace("\u00a0", " ")
|
cleaned = cleaned.replace("\u00a0", " ")
|
||||||
cleaned = cleaned.replace("•", " ")
|
cleaned = cleaned.replace("•", " ")
|
||||||
cleaned = CONTROL_CHAR_RE.sub(" ", cleaned)
|
cleaned = CONTROL_CHAR_RE.sub(" ", cleaned)
|
||||||
cleaned = "".join(
|
cleaned = "".join(ch if (ch in TTS_ALLOWED_ASCII or ch.isspace()) else " " for ch in cleaned)
|
||||||
ch if (ch in TTS_ALLOWED_ASCII or ch.isspace()) else " " for ch in cleaned
|
|
||||||
)
|
|
||||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
|
|
@ -131,15 +124,9 @@ if AIORTC_AVAILABLE:
|
||||||
self._timestamp = 0
|
self._timestamp = 0
|
||||||
self._resample_state = None
|
self._resample_state = None
|
||||||
self._resample_source_rate: int | None = None
|
self._resample_source_rate: int | None = None
|
||||||
self._lead_in_ms = max(
|
self._lead_in_ms = max(0, int(os.getenv("HOST_RTC_OUTBOUND_LEAD_IN_MS", "120")))
|
||||||
0, int(os.getenv("HOST_RTC_OUTBOUND_LEAD_IN_MS", "120"))
|
self._lead_in_frames = (self._lead_in_ms + self._frame_ms - 1) // self._frame_ms
|
||||||
)
|
self._lead_in_idle_s = max(0.1, float(os.getenv("HOST_RTC_OUTBOUND_IDLE_S", "0.6")))
|
||||||
self._lead_in_frames = (
|
|
||||||
self._lead_in_ms + self._frame_ms - 1
|
|
||||||
) // self._frame_ms
|
|
||||||
self._lead_in_idle_s = max(
|
|
||||||
0.1, float(os.getenv("HOST_RTC_OUTBOUND_IDLE_S", "0.6"))
|
|
||||||
)
|
|
||||||
self._last_enqueue_at = 0.0
|
self._last_enqueue_at = 0.0
|
||||||
self._closed = False
|
self._closed = False
|
||||||
self._frame_duration_s = frame_ms / 1000.0
|
self._frame_duration_s = frame_ms / 1000.0
|
||||||
|
|
@ -154,9 +141,7 @@ if AIORTC_AVAILABLE:
|
||||||
)
|
)
|
||||||
self._on_playing_changed: Callable[[bool], None] | None = None
|
self._on_playing_changed: Callable[[bool], None] | None = None
|
||||||
|
|
||||||
async def enqueue_pcm(
|
async def enqueue_pcm(self, pcm: bytes, sample_rate: int, channels: int = 1) -> None:
|
||||||
self, pcm: bytes, sample_rate: int, channels: int = 1
|
|
||||||
) -> None:
|
|
||||||
if self._closed or not pcm:
|
if self._closed or not pcm:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -244,9 +229,7 @@ if AIORTC_AVAILABLE:
|
||||||
|
|
||||||
self._last_recv_at = loop.time()
|
self._last_recv_at = loop.time()
|
||||||
|
|
||||||
frame = AudioFrame(
|
frame = AudioFrame(format="s16", layout="mono", samples=self._samples_per_frame)
|
||||||
format="s16", layout="mono", samples=self._samples_per_frame
|
|
||||||
)
|
|
||||||
frame.planes[0].update(payload)
|
frame.planes[0].update(payload)
|
||||||
frame.sample_rate = self._sample_rate
|
frame.sample_rate = self._sample_rate
|
||||||
frame.time_base = Fraction(1, self._sample_rate)
|
frame.time_base = Fraction(1, self._sample_rate)
|
||||||
|
|
@ -263,9 +246,7 @@ else:
|
||||||
class QueueAudioTrack: # pragma: no cover - used only when aiortc is unavailable
|
class QueueAudioTrack: # pragma: no cover - used only when aiortc is unavailable
|
||||||
_on_playing_changed: Callable[[bool], None] | None = None
|
_on_playing_changed: Callable[[bool], None] | None = None
|
||||||
|
|
||||||
async def enqueue_pcm(
|
async def enqueue_pcm(self, pcm: bytes, sample_rate: int, channels: int = 1) -> None:
|
||||||
self, pcm: bytes, sample_rate: int, channels: int = 1
|
|
||||||
) -> None:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
|
|
@ -296,23 +277,17 @@ class CommandSpeechToText:
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
if not self.enabled or not pcm:
|
if not self.enabled or not pcm:
|
||||||
return None
|
return None
|
||||||
return await asyncio.to_thread(
|
return await asyncio.to_thread(self._transcribe_blocking, pcm, sample_rate, channels)
|
||||||
self._transcribe_blocking, pcm, sample_rate, channels
|
|
||||||
)
|
|
||||||
|
|
||||||
def unavailable_reason(self) -> str:
|
def unavailable_reason(self) -> str:
|
||||||
if not self._command_template:
|
if not self._command_template:
|
||||||
return "HOST_STT_COMMAND is not configured."
|
return "HOST_STT_COMMAND is not configured."
|
||||||
return "HOST_STT_COMMAND failed to produce transcript."
|
return "HOST_STT_COMMAND failed to produce transcript."
|
||||||
|
|
||||||
def _transcribe_blocking(
|
def _transcribe_blocking(self, pcm: bytes, sample_rate: int, channels: int) -> str | None:
|
||||||
self, pcm: bytes, sample_rate: int, channels: int
|
|
||||||
) -> str | None:
|
|
||||||
tmp_path: str | None = None
|
tmp_path: str | None = None
|
||||||
try:
|
try:
|
||||||
tmp_path = _write_temp_wav(
|
tmp_path = _write_temp_wav(pcm=pcm, sample_rate=sample_rate, channels=channels)
|
||||||
pcm=pcm, sample_rate=sample_rate, channels=channels
|
|
||||||
)
|
|
||||||
|
|
||||||
command = self._command_template
|
command = self._command_template
|
||||||
if "{input_wav}" in command:
|
if "{input_wav}" in command:
|
||||||
|
|
@ -343,9 +318,7 @@ class FasterWhisperSpeechToText:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
|
self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
|
||||||
self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
|
self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
|
||||||
self._compute_type = (
|
self._compute_type = os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
|
||||||
os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
|
|
||||||
)
|
|
||||||
self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
|
self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
|
||||||
self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
|
self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
|
||||||
self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
|
self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
|
||||||
|
|
@ -357,12 +330,8 @@ class FasterWhisperSpeechToText:
|
||||||
"off",
|
"off",
|
||||||
}
|
}
|
||||||
self._temperature = float(os.getenv("HOST_STT_TEMPERATURE", "0.0"))
|
self._temperature = float(os.getenv("HOST_STT_TEMPERATURE", "0.0"))
|
||||||
self._log_prob_threshold = float(
|
self._log_prob_threshold = float(os.getenv("HOST_STT_LOG_PROB_THRESHOLD", "-1.0"))
|
||||||
os.getenv("HOST_STT_LOG_PROB_THRESHOLD", "-1.0")
|
self._no_speech_threshold = float(os.getenv("HOST_STT_NO_SPEECH_THRESHOLD", "0.6"))
|
||||||
)
|
|
||||||
self._no_speech_threshold = float(
|
|
||||||
os.getenv("HOST_STT_NO_SPEECH_THRESHOLD", "0.6")
|
|
||||||
)
|
|
||||||
self._compression_ratio_threshold = float(
|
self._compression_ratio_threshold = float(
|
||||||
os.getenv("HOST_STT_COMPRESSION_RATIO_THRESHOLD", "2.4")
|
os.getenv("HOST_STT_COMPRESSION_RATIO_THRESHOLD", "2.4")
|
||||||
)
|
)
|
||||||
|
|
@ -373,9 +342,7 @@ class FasterWhisperSpeechToText:
|
||||||
).strip()
|
).strip()
|
||||||
or None
|
or None
|
||||||
)
|
)
|
||||||
self._repetition_penalty = float(
|
self._repetition_penalty = float(os.getenv("HOST_STT_REPETITION_PENALTY", "1.0"))
|
||||||
os.getenv("HOST_STT_REPETITION_PENALTY", "1.0")
|
|
||||||
)
|
|
||||||
raw_hallucination_threshold = os.getenv(
|
raw_hallucination_threshold = os.getenv(
|
||||||
"HOST_STT_HALLUCINATION_SILENCE_THRESHOLD", ""
|
"HOST_STT_HALLUCINATION_SILENCE_THRESHOLD", ""
|
||||||
).strip()
|
).strip()
|
||||||
|
|
@ -401,9 +368,7 @@ class FasterWhisperSpeechToText:
|
||||||
if not self.enabled or not pcm:
|
if not self.enabled or not pcm:
|
||||||
return None
|
return None
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
return await asyncio.to_thread(
|
return await asyncio.to_thread(self._transcribe_blocking, pcm, sample_rate, channels)
|
||||||
self._transcribe_blocking, pcm, sample_rate, channels
|
|
||||||
)
|
|
||||||
|
|
||||||
async def warmup(self) -> None:
|
async def warmup(self) -> None:
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
|
|
@ -428,15 +393,11 @@ class FasterWhisperSpeechToText:
|
||||||
self._init_error = str(exc)
|
self._init_error = str(exc)
|
||||||
self._model = None
|
self._model = None
|
||||||
|
|
||||||
def _transcribe_blocking(
|
def _transcribe_blocking(self, pcm: bytes, sample_rate: int, channels: int) -> str | None:
|
||||||
self, pcm: bytes, sample_rate: int, channels: int
|
|
||||||
) -> str | None:
|
|
||||||
self._initialize_blocking()
|
self._initialize_blocking()
|
||||||
if self._model is None:
|
if self._model is None:
|
||||||
if self._init_error:
|
if self._init_error:
|
||||||
raise RuntimeError(
|
raise RuntimeError(f"faster-whisper initialization failed: {self._init_error}")
|
||||||
f"faster-whisper initialization failed: {self._init_error}"
|
|
||||||
)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if NUMPY_AVAILABLE and np is not None:
|
if NUMPY_AVAILABLE and np is not None:
|
||||||
|
|
@ -481,9 +442,7 @@ class FasterWhisperSpeechToText:
|
||||||
|
|
||||||
tmp_path: str | None = None
|
tmp_path: str | None = None
|
||||||
try:
|
try:
|
||||||
tmp_path = _write_temp_wav(
|
tmp_path = _write_temp_wav(pcm=pcm, sample_rate=sample_rate, channels=channels)
|
||||||
pcm=pcm, sample_rate=sample_rate, channels=channels
|
|
||||||
)
|
|
||||||
segments, _info = self._model.transcribe(
|
segments, _info = self._model.transcribe(
|
||||||
tmp_path,
|
tmp_path,
|
||||||
language=self._language or None,
|
language=self._language or None,
|
||||||
|
|
@ -580,20 +539,14 @@ class HostSpeechToText:
|
||||||
|
|
||||||
class SupertonicTextToSpeech:
|
class SupertonicTextToSpeech:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._model = (
|
self._model = os.getenv("SUPERTONIC_MODEL", "supertonic-2").strip() or "supertonic-2"
|
||||||
os.getenv("SUPERTONIC_MODEL", "supertonic-2").strip() or "supertonic-2"
|
self._voice_style_name = os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
|
||||||
)
|
|
||||||
self._voice_style_name = (
|
|
||||||
os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
|
|
||||||
)
|
|
||||||
self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
|
self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
|
||||||
self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
|
self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
|
||||||
self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
|
self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
|
||||||
self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
|
self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
|
||||||
self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
|
self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
|
||||||
self._auto_download = os.getenv(
|
self._auto_download = os.getenv("SUPERTONIC_AUTO_DOWNLOAD", "1").strip() not in {
|
||||||
"SUPERTONIC_AUTO_DOWNLOAD", "1"
|
|
||||||
).strip() not in {
|
|
||||||
"0",
|
"0",
|
||||||
"false",
|
"false",
|
||||||
"False",
|
"False",
|
||||||
|
|
@ -608,9 +561,7 @@ class SupertonicTextToSpeech:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def enabled(self) -> bool:
|
def enabled(self) -> bool:
|
||||||
return (
|
return SUPERTONIC_TTS_AVAILABLE and SupertonicTTS is not None and NUMPY_AVAILABLE
|
||||||
SUPERTONIC_TTS_AVAILABLE and SupertonicTTS is not None and NUMPY_AVAILABLE
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def init_error(self) -> str | None:
|
def init_error(self) -> str | None:
|
||||||
|
|
@ -723,9 +674,7 @@ class SupertonicTextToSpeech:
|
||||||
|
|
||||||
class HostTextToSpeech:
|
class HostTextToSpeech:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
provider = (
|
provider = (os.getenv("HOST_TTS_PROVIDER", "supertonic").strip() or "supertonic").lower()
|
||||||
os.getenv("HOST_TTS_PROVIDER", "supertonic").strip() or "supertonic"
|
|
||||||
).lower()
|
|
||||||
if provider not in {"supertonic", "command", "espeak", "auto"}:
|
if provider not in {"supertonic", "command", "espeak", "auto"}:
|
||||||
provider = "auto"
|
provider = "auto"
|
||||||
self._provider = provider
|
self._provider = provider
|
||||||
|
|
@ -770,9 +719,7 @@ class HostTextToSpeech:
|
||||||
if not self._supertonic.enabled:
|
if not self._supertonic.enabled:
|
||||||
return "supertonic package is not available."
|
return "supertonic package is not available."
|
||||||
if self._supertonic.init_error:
|
if self._supertonic.init_error:
|
||||||
return (
|
return f"supertonic initialization failed: {self._supertonic.init_error}"
|
||||||
f"supertonic initialization failed: {self._supertonic.init_error}"
|
|
||||||
)
|
|
||||||
return "supertonic did not return audio."
|
return "supertonic did not return audio."
|
||||||
if self._provider == "command":
|
if self._provider == "command":
|
||||||
return "HOST_TTS_COMMAND is not configured."
|
return "HOST_TTS_COMMAND is not configured."
|
||||||
|
|
@ -797,13 +744,9 @@ class HostTextToSpeech:
|
||||||
if "{output_wav}" in command:
|
if "{output_wav}" in command:
|
||||||
tmp_path: str | None = None
|
tmp_path: str | None = None
|
||||||
try:
|
try:
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
||||||
suffix=".wav", delete=False
|
|
||||||
) as tmp_file:
|
|
||||||
tmp_path = tmp_file.name
|
tmp_path = tmp_file.name
|
||||||
command_with_output = command.replace(
|
command_with_output = command.replace("{output_wav}", shlex.quote(tmp_path))
|
||||||
"{output_wav}", shlex.quote(tmp_path)
|
|
||||||
)
|
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
command_with_output,
|
command_with_output,
|
||||||
shell=True,
|
shell=True,
|
||||||
|
|
@ -872,9 +815,7 @@ SendJsonCallable = Callable[[dict[str, Any]], Awaitable[None]]
|
||||||
|
|
||||||
|
|
||||||
class WebRTCVoiceSession:
|
class WebRTCVoiceSession:
|
||||||
def __init__(
|
def __init__(self, gateway: "SuperTonicGateway", send_json: SendJsonCallable) -> None:
|
||||||
self, gateway: "SuperTonicGateway", send_json: SendJsonCallable
|
|
||||||
) -> None:
|
|
||||||
self._gateway = gateway
|
self._gateway = gateway
|
||||||
self._send_json = send_json
|
self._send_json = send_json
|
||||||
|
|
||||||
|
|
@ -886,9 +827,7 @@ class WebRTCVoiceSession:
|
||||||
|
|
||||||
self._stt = HostSpeechToText()
|
self._stt = HostSpeechToText()
|
||||||
self._tts = HostTextToSpeech()
|
self._tts = HostTextToSpeech()
|
||||||
self._stt_segment_queue_size = max(
|
self._stt_segment_queue_size = max(1, int(os.getenv("HOST_STT_SEGMENT_QUEUE_SIZE", "2")))
|
||||||
1, int(os.getenv("HOST_STT_SEGMENT_QUEUE_SIZE", "2"))
|
|
||||||
)
|
|
||||||
self._stt_segments: asyncio.Queue[bytes] = asyncio.Queue(
|
self._stt_segments: asyncio.Queue[bytes] = asyncio.Queue(
|
||||||
maxsize=self._stt_segment_queue_size
|
maxsize=self._stt_segment_queue_size
|
||||||
)
|
)
|
||||||
|
|
@ -913,11 +852,7 @@ class WebRTCVoiceSession:
|
||||||
|
|
||||||
self._stt_min_ptt_ms = max(
|
self._stt_min_ptt_ms = max(
|
||||||
120,
|
120,
|
||||||
int(
|
int(os.getenv("HOST_STT_MIN_PTT_MS", os.getenv("HOST_STT_MIN_SEGMENT_MS", "220"))),
|
||||||
os.getenv(
|
|
||||||
"HOST_STT_MIN_PTT_MS", os.getenv("HOST_STT_MIN_SEGMENT_MS", "220")
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self._stt_suppress_during_tts = os.getenv(
|
self._stt_suppress_during_tts = os.getenv(
|
||||||
|
|
@ -973,9 +908,7 @@ class WebRTCVoiceSession:
|
||||||
sdp = str(payload.get("sdp", "")).strip()
|
sdp = str(payload.get("sdp", "")).strip()
|
||||||
rtc_type = str(payload.get("rtcType", "offer")).strip() or "offer"
|
rtc_type = str(payload.get("rtcType", "offer")).strip() or "offer"
|
||||||
if not sdp:
|
if not sdp:
|
||||||
await self._send_json(
|
await self._send_json({"type": "rtc-error", "message": "Missing SDP offer payload."})
|
||||||
{"type": "rtc-error", "message": "Missing SDP offer payload."}
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
await self._close_peer_connection()
|
await self._close_peer_connection()
|
||||||
|
|
@ -1009,9 +942,7 @@ class WebRTCVoiceSession:
|
||||||
name="voice-inbound-track",
|
name="voice-inbound-track",
|
||||||
)
|
)
|
||||||
|
|
||||||
await peer_connection.setRemoteDescription(
|
await peer_connection.setRemoteDescription(RTCSessionDescription(sdp=sdp, type=rtc_type))
|
||||||
RTCSessionDescription(sdp=sdp, type=rtc_type)
|
|
||||||
)
|
|
||||||
await self._drain_pending_ice_candidates(peer_connection)
|
await self._drain_pending_ice_candidates(peer_connection)
|
||||||
answer = await peer_connection.createAnswer()
|
answer = await peer_connection.createAnswer()
|
||||||
await peer_connection.setLocalDescription(answer)
|
await peer_connection.setLocalDescription(answer)
|
||||||
|
|
@ -1021,10 +952,7 @@ class WebRTCVoiceSession:
|
||||||
sdp_answer = str(local_description.sdp or "")
|
sdp_answer = str(local_description.sdp or "")
|
||||||
if sdp_answer:
|
if sdp_answer:
|
||||||
sdp_answer = (
|
sdp_answer = (
|
||||||
sdp_answer.replace("\r\n", "\n")
|
sdp_answer.replace("\r\n", "\n").replace("\r", "\n").strip().replace("\n", "\r\n")
|
||||||
.replace("\r", "\n")
|
|
||||||
.strip()
|
|
||||||
.replace("\n", "\r\n")
|
|
||||||
+ "\r\n"
|
+ "\r\n"
|
||||||
)
|
)
|
||||||
await self._send_json(
|
await self._send_json(
|
||||||
|
|
@ -1036,15 +964,9 @@ class WebRTCVoiceSession:
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._stt.enabled and not self._stt_worker_task:
|
if self._stt.enabled and not self._stt_worker_task:
|
||||||
self._stt_worker_task = asyncio.create_task(
|
self._stt_worker_task = asyncio.create_task(self._stt_worker(), name="voice-stt-worker")
|
||||||
self._stt_worker(), name="voice-stt-worker"
|
if self._stt.enabled and (self._stt_warmup_task is None or self._stt_warmup_task.done()):
|
||||||
)
|
self._stt_warmup_task = asyncio.create_task(self._warmup_stt(), name="voice-stt-warmup")
|
||||||
if self._stt.enabled and (
|
|
||||||
self._stt_warmup_task is None or self._stt_warmup_task.done()
|
|
||||||
):
|
|
||||||
self._stt_warmup_task = asyncio.create_task(
|
|
||||||
self._warmup_stt(), name="voice-stt-warmup"
|
|
||||||
)
|
|
||||||
elif not self._stt.enabled and not self._stt_unavailable_notice_sent:
|
elif not self._stt.enabled and not self._stt_unavailable_notice_sent:
|
||||||
self._stt_unavailable_notice_sent = True
|
self._stt_unavailable_notice_sent = True
|
||||||
await self._publish_system(
|
await self._publish_system(
|
||||||
|
|
@ -1103,9 +1025,7 @@ class WebRTCVoiceSession:
|
||||||
candidate = candidate_from_sdp(candidate_sdp)
|
candidate = candidate_from_sdp(candidate_sdp)
|
||||||
candidate.sdpMid = raw_candidate.get("sdpMid")
|
candidate.sdpMid = raw_candidate.get("sdpMid")
|
||||||
line_index = raw_candidate.get("sdpMLineIndex")
|
line_index = raw_candidate.get("sdpMLineIndex")
|
||||||
candidate.sdpMLineIndex = (
|
candidate.sdpMLineIndex = int(line_index) if line_index is not None else None
|
||||||
int(line_index) if line_index is not None else None
|
|
||||||
)
|
|
||||||
await peer_connection.addIceCandidate(candidate)
|
await peer_connection.addIceCandidate(candidate)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
await self._publish_system(f"Failed to add ICE candidate: {exc}")
|
await self._publish_system(f"Failed to add ICE candidate: {exc}")
|
||||||
|
|
@ -1147,9 +1067,7 @@ class WebRTCVoiceSession:
|
||||||
if self._tts_flush_handle:
|
if self._tts_flush_handle:
|
||||||
self._tts_flush_handle.cancel()
|
self._tts_flush_handle.cancel()
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
self._tts_flush_handle = loop.call_later(
|
self._tts_flush_handle = loop.call_later(max(0.05, delay_s), self._schedule_tts_flush)
|
||||||
max(0.05, delay_s), self._schedule_tts_flush
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _flush_tts(self) -> None:
|
async def _flush_tts(self) -> None:
|
||||||
async with self._tts_flush_lock:
|
async with self._tts_flush_lock:
|
||||||
|
|
@ -1230,9 +1148,7 @@ class WebRTCVoiceSession:
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
frame = await track.recv()
|
frame = await track.recv()
|
||||||
pcm16, frame_ms, resample_state = self._frame_to_pcm16k_mono(
|
pcm16, frame_ms, resample_state = self._frame_to_pcm16k_mono(frame, resample_state)
|
||||||
frame, resample_state
|
|
||||||
)
|
|
||||||
if not pcm16:
|
if not pcm16:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -1249,10 +1165,9 @@ class WebRTCVoiceSession:
|
||||||
f"time_base={getattr(frame, 'time_base', None)}."
|
f"time_base={getattr(frame, 'time_base', None)}."
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
loop = asyncio.get_running_loop()
|
||||||
self._stt_suppress_during_tts
|
|
||||||
and asyncio.get_running_loop().time() < self._stt_suppress_until
|
if self._stt_suppress_during_tts and loop.time() < self._stt_suppress_until:
|
||||||
):
|
|
||||||
recording = False
|
recording = False
|
||||||
recording_started_at = 0.0
|
recording_started_at = 0.0
|
||||||
segment_ms = 0.0
|
segment_ms = 0.0
|
||||||
|
|
@ -1262,7 +1177,7 @@ class WebRTCVoiceSession:
|
||||||
if self._ptt_pressed:
|
if self._ptt_pressed:
|
||||||
if not recording:
|
if not recording:
|
||||||
recording = True
|
recording = True
|
||||||
recording_started_at = asyncio.get_running_loop().time()
|
recording_started_at = loop.time()
|
||||||
segment_ms = 0.0
|
segment_ms = 0.0
|
||||||
segment_buffer = bytearray()
|
segment_buffer = bytearray()
|
||||||
|
|
||||||
|
|
@ -1273,8 +1188,7 @@ class WebRTCVoiceSession:
|
||||||
if recording:
|
if recording:
|
||||||
observed_duration_ms = max(
|
observed_duration_ms = max(
|
||||||
1.0,
|
1.0,
|
||||||
(asyncio.get_running_loop().time() - recording_started_at)
|
(loop.time() - recording_started_at) * 1000.0,
|
||||||
* 1000.0,
|
|
||||||
)
|
)
|
||||||
await self._finalize_ptt_segment(
|
await self._finalize_ptt_segment(
|
||||||
bytes(segment_buffer),
|
bytes(segment_buffer),
|
||||||
|
|
@ -1285,6 +1199,7 @@ class WebRTCVoiceSession:
|
||||||
recording_started_at = 0.0
|
recording_started_at = 0.0
|
||||||
segment_ms = 0.0
|
segment_ms = 0.0
|
||||||
segment_buffer = bytearray()
|
segment_buffer = bytearray()
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
raise
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
@ -1294,9 +1209,7 @@ class WebRTCVoiceSession:
|
||||||
f"Voice input stream ended ({exc.__class__.__name__}): {details}"
|
f"Voice input stream ended ({exc.__class__.__name__}): {details}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
await self._publish_system(
|
await self._publish_system(f"Voice input stream ended ({exc.__class__.__name__}).")
|
||||||
f"Voice input stream ended ({exc.__class__.__name__})."
|
|
||||||
)
|
|
||||||
finally:
|
finally:
|
||||||
if recording and segment_ms >= self._stt_min_ptt_ms:
|
if recording and segment_ms >= self._stt_min_ptt_ms:
|
||||||
observed_duration_ms = max(
|
observed_duration_ms = max(
|
||||||
|
|
@ -1355,9 +1268,7 @@ class WebRTCVoiceSession:
|
||||||
f"(estimated source={nearest_source_rate}Hz)."
|
f"(estimated source={nearest_source_rate}Hz)."
|
||||||
)
|
)
|
||||||
|
|
||||||
await self._enqueue_stt_segment(
|
await self._enqueue_stt_segment(pcm16=normalized_pcm, duration_ms=normalized_duration_ms)
|
||||||
pcm16=normalized_pcm, duration_ms=normalized_duration_ms
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _enqueue_stt_segment(self, pcm16: bytes, duration_ms: float) -> None:
|
async def _enqueue_stt_segment(self, pcm16: bytes, duration_ms: float) -> None:
|
||||||
if duration_ms < self._stt_min_ptt_ms:
|
if duration_ms < self._stt_min_ptt_ms:
|
||||||
|
|
@ -1368,13 +1279,9 @@ class WebRTCVoiceSession:
|
||||||
self._stt_segments.get_nowait()
|
self._stt_segments.get_nowait()
|
||||||
|
|
||||||
now = asyncio.get_running_loop().time()
|
now = asyncio.get_running_loop().time()
|
||||||
if (
|
if (now - self._last_stt_backlog_notice_at) >= self._stt_backlog_notice_interval_s:
|
||||||
now - self._last_stt_backlog_notice_at
|
|
||||||
) >= self._stt_backlog_notice_interval_s:
|
|
||||||
self._last_stt_backlog_notice_at = now
|
self._last_stt_backlog_notice_at = now
|
||||||
await self._publish_system(
|
await self._publish_system("Voice input backlog detected; dropping stale segment.")
|
||||||
"Voice input backlog detected; dropping stale segment."
|
|
||||||
)
|
|
||||||
|
|
||||||
with contextlib.suppress(asyncio.QueueFull):
|
with contextlib.suppress(asyncio.QueueFull):
|
||||||
self._stt_segments.put_nowait(pcm16)
|
self._stt_segments.put_nowait(pcm16)
|
||||||
|
|
@ -1384,9 +1291,7 @@ class WebRTCVoiceSession:
|
||||||
pcm16 = await self._stt_segments.get()
|
pcm16 = await self._stt_segments.get()
|
||||||
if not self._stt_first_segment_notice_sent:
|
if not self._stt_first_segment_notice_sent:
|
||||||
self._stt_first_segment_notice_sent = True
|
self._stt_first_segment_notice_sent = True
|
||||||
await self._publish_system(
|
await self._publish_system("Push-to-talk audio captured. Running host STT...")
|
||||||
"Push-to-talk audio captured. Running host STT..."
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
transcript = await self._stt.transcribe_pcm(
|
transcript = await self._stt.transcribe_pcm(
|
||||||
pcm=pcm16,
|
pcm=pcm16,
|
||||||
|
|
@ -1478,11 +1383,7 @@ class WebRTCVoiceSession:
|
||||||
except TypeError:
|
except TypeError:
|
||||||
pcm = frame.to_ndarray()
|
pcm = frame.to_ndarray()
|
||||||
|
|
||||||
if (
|
if NUMPY_AVAILABLE and np is not None and getattr(pcm, "dtype", None) is not None:
|
||||||
NUMPY_AVAILABLE
|
|
||||||
and np is not None
|
|
||||||
and getattr(pcm, "dtype", None) is not None
|
|
||||||
):
|
|
||||||
if pcm.dtype != np.int16:
|
if pcm.dtype != np.int16:
|
||||||
if np.issubdtype(pcm.dtype, np.floating):
|
if np.issubdtype(pcm.dtype, np.floating):
|
||||||
pcm = np.clip(pcm, -1.0, 1.0)
|
pcm = np.clip(pcm, -1.0, 1.0)
|
||||||
|
|
@ -1521,9 +1422,7 @@ class WebRTCVoiceSession:
|
||||||
else:
|
else:
|
||||||
frames_channels = pcm.reshape(-1, 1)
|
frames_channels = pcm.reshape(-1, 1)
|
||||||
|
|
||||||
channel_count = (
|
channel_count = int(frames_channels.shape[1]) if frames_channels.ndim == 2 else 1
|
||||||
int(frames_channels.shape[1]) if frames_channels.ndim == 2 else 1
|
|
||||||
)
|
|
||||||
if channel_count <= 1:
|
if channel_count <= 1:
|
||||||
mono = frames_channels.reshape(-1).tobytes()
|
mono = frames_channels.reshape(-1).tobytes()
|
||||||
elif NUMPY_AVAILABLE and np is not None:
|
elif NUMPY_AVAILABLE and np is not None:
|
||||||
|
|
@ -1537,9 +1436,7 @@ class WebRTCVoiceSession:
|
||||||
else:
|
else:
|
||||||
return b"", 0.0, resample_state
|
return b"", 0.0, resample_state
|
||||||
|
|
||||||
source_rate = int(
|
source_rate = int(getattr(frame, "sample_rate", 0) or getattr(frame, "rate", 0) or 0)
|
||||||
getattr(frame, "sample_rate", 0) or getattr(frame, "rate", 0) or 0
|
|
||||||
)
|
|
||||||
|
|
||||||
time_base = getattr(frame, "time_base", None)
|
time_base = getattr(frame, "time_base", None)
|
||||||
tb_rate = 0
|
tb_rate = 0
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue