nanobot-voice-interface/frontend/src/hooks/useWebRTC.ts

467 lines
14 KiB
TypeScript
Raw Normal View History

2026-03-06 22:51:19 -05:00
import { useCallback, useEffect, useRef, useState } from "preact/hooks";
import type { AgentState, ClientMessage, LogLine, ServerMessage, ToastItem } from "../types";
const BACKEND_URL = import.meta.env.VITE_BACKEND_URL ?? "";
let toastIdCounter = 0;
let logIdCounter = 0;
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface WebRTCState {
connected: boolean;
connecting: boolean;
agentState: AgentState;
logLines: LogLine[];
toasts: ToastItem[];
voiceStatus: string;
statusVisible: boolean;
remoteAudioEl: HTMLAudioElement | null;
remoteStream: MediaStream | null;
sendJson(msg: ClientMessage): void;
dismissToast(id: number): void;
connect(): Promise<void>;
}
type AppendLine = (role: string, text: string, timestamp: string) => void;
type AddToast = (item: Omit<ToastItem, "id">) => number;
type SetAgentState = (updater: (prev: AgentState) => AgentState) => void;
// ---------------------------------------------------------------------------
// Message handlers (pure functions, outside hook to reduce complexity)
// ---------------------------------------------------------------------------
function handleTypedMessage(
msg: Extract<ServerMessage, { type: string }>,
setAgentState: SetAgentState,
appendLine: AppendLine,
addToast: AddToast,
): void {
if (msg.type === "agent_state") {
const s = (msg as { type: "agent_state"; state: AgentState }).state;
setAgentState((prev) => (prev === "listening" ? prev : s));
return;
}
if (msg.type === "message") {
const mm = msg as { type: "message"; content: string; is_progress: boolean };
if (!mm.is_progress) appendLine("nanobot", mm.content, "");
return;
}
if (msg.type === "toast") {
const tm = msg as {
type: "toast";
kind: "text" | "image";
content: string;
title: string;
duration_ms: number;
};
addToast({
kind: tm.kind,
content: tm.content,
title: tm.title,
durationMs: tm.duration_ms ?? 6000,
});
return;
}
if (msg.type === "choice") {
const cm = msg as {
type: "choice";
request_id: string;
question: string;
choices: string[];
title: string;
};
addToast({
kind: "choice",
content: "",
title: cm.title || "",
durationMs: 0,
requestId: cm.request_id,
question: cm.question,
choices: cm.choices,
});
return;
}
if (msg.type === "error") {
appendLine("system", (msg as { type: "error"; error: string }).error, "");
}
// pong and rtc-* are no-ops
}
function parseLegacyToast(text: string, addToast: AddToast): void {
console.log("[toast] parseLegacyToast raw text:", text);
try {
const t = JSON.parse(text);
console.log("[toast] parsed toast object:", t);
addToast({
kind: t.kind || "text",
content: t.content || "",
title: t.title || "",
durationMs: typeof t.duration_ms === "number" ? t.duration_ms : 6000,
});
} catch {
console.log("[toast] JSON parse failed, using raw text as content");
addToast({ kind: "text", content: text, title: "", durationMs: 6000 });
}
}
function parseLegacyChoice(text: string, addToast: AddToast): void {
try {
const c = JSON.parse(text);
addToast({
kind: "choice",
content: "",
title: c.title || "",
durationMs: 0,
requestId: c.request_id || "",
question: c.question || "",
choices: Array.isArray(c.choices) ? c.choices : [],
});
} catch {
/* ignore malformed */
}
}
function handleLegacyMessage(
rm: { role: string; text: string; timestamp?: string },
setAgentState: SetAgentState,
appendLine: AppendLine,
addToast: AddToast,
): void {
const role = (rm.role || "system").toString();
const text = (rm.text || "").toString();
const ts = rm.timestamp || "";
if (role === "agent-state") {
const newState = text.trim() as AgentState;
setAgentState((prev) => (prev === "listening" ? prev : newState));
return;
}
if (role === "toast") {
parseLegacyToast(text, addToast);
return;
}
if (role === "choice") {
parseLegacyChoice(text, addToast);
return;
}
if (role === "wisper") return; // suppress debug
appendLine(role, text, ts);
}
// ---------------------------------------------------------------------------
// WebRTC helpers
// ---------------------------------------------------------------------------
async function acquireMicStream(): Promise<MediaStream> {
try {
return await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
sampleRate: 48000,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: false,
},
video: false,
});
} catch {
return navigator.mediaDevices.getUserMedia({ audio: true, video: false });
}
}
function waitForIceComplete(pc: RTCPeerConnection): Promise<void> {
return new Promise<void>((resolve) => {
if (pc.iceGatheringState === "complete") {
resolve();
return;
}
const check = () => {
if (pc.iceGatheringState === "complete") {
pc.removeEventListener("icegatheringstatechange", check);
resolve();
}
};
pc.addEventListener("icegatheringstatechange", check);
setTimeout(resolve, 5000); // safety timeout
});
}
async function exchangeSdp(
localDesc: RTCSessionDescription,
): Promise<{ sdp: string; rtcType: string }> {
const rtcUrl = BACKEND_URL ? `${BACKEND_URL}/rtc/offer` : "/rtc/offer";
const resp = await fetch(rtcUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ sdp: localDesc.sdp, rtcType: localDesc.type }),
});
if (!resp.ok) throw new Error(`/rtc/offer returned ${resp.status}`);
return resp.json() as Promise<{ sdp: string; rtcType: string }>;
}
// ---------------------------------------------------------------------------
// Hook internals
// ---------------------------------------------------------------------------
interface RTCRefs {
pcRef: { current: RTCPeerConnection | null };
dcRef: { current: RTCDataChannel | null };
remoteAudioRef: { current: HTMLAudioElement | null };
micSendersRef: { current: RTCRtpSender[] };
}
interface RTCCallbacks {
setConnected: (v: boolean) => void;
setConnecting: (v: boolean) => void;
setRemoteStream: (s: MediaStream | null) => void;
showStatus: (text: string, persistMs?: number) => void;
appendLine: AppendLine;
onDcMessage: (raw: string) => void;
closePC: () => void;
}
async function runConnect(refs: RTCRefs, cbs: RTCCallbacks): Promise<void> {
if (refs.pcRef.current) return;
if (!window.RTCPeerConnection) {
cbs.showStatus("WebRTC unavailable in this browser.", 4000);
return;
}
cbs.setConnecting(true);
cbs.showStatus("Connecting...");
let micStream: MediaStream | null = null;
try {
micStream = await acquireMicStream();
micStream.getAudioTracks().forEach((t) => {
t.enabled = false;
});
const pc = new RTCPeerConnection({ iceServers: [{ urls: "stun:stun.l.google.com:19302" }] });
refs.pcRef.current = pc;
const newRemoteStream = new MediaStream();
cbs.setRemoteStream(newRemoteStream);
if (refs.remoteAudioRef.current) {
refs.remoteAudioRef.current.srcObject = newRemoteStream;
refs.remoteAudioRef.current.play().catch(() => {});
}
pc.ontrack = (event) => {
if (event.track.kind !== "audio") return;
newRemoteStream.addTrack(event.track);
refs.remoteAudioRef.current?.play().catch(() => {});
};
const dc = pc.createDataChannel("app", { ordered: true });
refs.dcRef.current = dc;
dc.onopen = () => {
cbs.setConnected(true);
cbs.setConnecting(false);
cbs.showStatus("Hold anywhere to talk", 2500);
cbs.appendLine("system", "Connected.", new Date().toISOString());
};
dc.onclose = () => {
cbs.appendLine("system", "Disconnected.", new Date().toISOString());
cbs.closePC();
};
dc.onmessage = (e) => cbs.onDcMessage(e.data as string);
const stream = micStream;
stream.getAudioTracks().forEach((track) => {
pc.addTrack(track, stream);
});
refs.micSendersRef.current = pc.getSenders().filter((s) => s.track?.kind === "audio");
const offer = await pc.createOffer();
await pc.setLocalDescription(offer);
await waitForIceComplete(pc);
const localDesc = pc.localDescription;
if (!localDesc) throw new Error("No local description after ICE gathering");
const answer = await exchangeSdp(localDesc);
await pc.setRemoteDescription({ type: answer.rtcType as RTCSdpType, sdp: answer.sdp });
} catch (err) {
cbs.appendLine("system", `Connection failed: ${err}`, new Date().toISOString());
cbs.showStatus("Connection failed.", 3000);
cbs.closePC();
if (micStream)
micStream.getTracks().forEach((t) => {
t.stop();
});
}
}
// ---------------------------------------------------------------------------
// Message state sub-hook
// ---------------------------------------------------------------------------
interface MessageState {
agentState: AgentState;
logLines: LogLine[];
toasts: ToastItem[];
appendLine: AppendLine;
addToast: AddToast;
dismissToast: (id: number) => void;
onDcMessage: (raw: string) => void;
}
function useMessageState(): MessageState {
const [agentState, setAgentState] = useState<AgentState>("idle");
const [logLines, setLogLines] = useState<LogLine[]>([]);
const [toasts, setToasts] = useState<ToastItem[]>([]);
const appendLine = useCallback((role: string, text: string, timestamp: string) => {
setLogLines((prev) => {
const next = [
...prev,
{ id: logIdCounter++, role, text, timestamp: timestamp || new Date().toISOString() },
];
return next.length > 250 ? next.slice(next.length - 250) : next;
});
}, []);
const addToast = useCallback((item: Omit<ToastItem, "id">) => {
const id = toastIdCounter++;
setToasts((prev) => [{ ...item, id }, ...prev]);
return id;
}, []);
const dismissToast = useCallback((id: number) => {
setToasts((prev) => prev.filter((t) => t.id !== id));
}, []);
const onDcMessage = useCallback(
(raw: string) => {
console.log("[dc] onDcMessage raw:", raw);
let msg: ServerMessage;
try {
msg = JSON.parse(raw);
} catch {
console.log("[dc] JSON parse failed for raw message");
return;
}
if ("type" in msg) {
console.log("[dc] typed message, type:", (msg as { type: string }).type);
handleTypedMessage(
msg as Extract<ServerMessage, { type: string }>,
setAgentState,
appendLine,
addToast,
);
} else {
console.log("[dc] legacy message, role:", (msg as { role: string }).role);
handleLegacyMessage(
msg as { role: string; text: string; timestamp?: string },
setAgentState,
appendLine,
addToast,
);
}
},
[appendLine, addToast],
);
return { agentState, logLines, toasts, appendLine, addToast, dismissToast, onDcMessage };
}
// ---------------------------------------------------------------------------
// Hook
// ---------------------------------------------------------------------------
export function useWebRTC(): WebRTCState {
const [connected, setConnected] = useState(false);
const [connecting, setConnecting] = useState(false);
const [voiceStatus, setVoiceStatus] = useState("");
const [statusVisible, setStatusVisible] = useState(false);
const [remoteStream, setRemoteStream] = useState<MediaStream | null>(null);
const pcRef = useRef<RTCPeerConnection | null>(null);
const dcRef = useRef<RTCDataChannel | null>(null);
const remoteAudioRef = useRef<HTMLAudioElement | null>(null);
const statusTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const micSendersRef = useRef<RTCRtpSender[]>([]);
const { agentState, logLines, toasts, appendLine, dismissToast, onDcMessage } = useMessageState();
// Create audio element once
useEffect(() => {
const audio = new Audio();
audio.autoplay = true;
(audio as HTMLAudioElement & { playsInline: boolean }).playsInline = true;
remoteAudioRef.current = audio;
return () => {
audio.srcObject = null;
};
}, []);
useEffect(() => {
const handler = (e: Event) => {
const enabled = (e as CustomEvent<{ enabled: boolean }>).detail?.enabled ?? false;
micSendersRef.current.forEach((sender) => {
if (sender.track) sender.track.enabled = enabled;
});
};
window.addEventListener("nanobot-mic-enable", handler);
return () => window.removeEventListener("nanobot-mic-enable", handler);
}, []);
const showStatus = useCallback((text: string, persistMs = 0) => {
setVoiceStatus(text);
setStatusVisible(true);
if (statusTimerRef.current) clearTimeout(statusTimerRef.current);
if (persistMs > 0) {
statusTimerRef.current = setTimeout(() => setStatusVisible(false), persistMs);
}
}, []);
const sendJson = useCallback((msg: ClientMessage) => {
const dc = dcRef.current;
if (!dc || dc.readyState !== "open") return;
dc.send(JSON.stringify(msg));
}, []);
const closePC = useCallback(() => {
dcRef.current?.close();
dcRef.current = null;
pcRef.current?.close();
pcRef.current = null;
micSendersRef.current = [];
setConnected(false);
setConnecting(false);
if (remoteAudioRef.current) remoteAudioRef.current.srcObject = null;
setRemoteStream(null);
}, []);
const connect = useCallback(async () => {
const refs: RTCRefs = { pcRef, dcRef, remoteAudioRef, micSendersRef };
const cbs: RTCCallbacks = {
setConnected,
setConnecting,
setRemoteStream,
showStatus,
appendLine,
onDcMessage,
closePC,
};
await runConnect(refs, cbs);
}, [setConnected, setConnecting, setRemoteStream, showStatus, appendLine, onDcMessage, closePC]);
return {
connected,
connecting,
agentState,
logLines,
toasts,
voiceStatus,
statusVisible,
remoteAudioEl: remoteAudioRef.current,
remoteStream,
sendJson,
dismissToast,
connect,
};
}