feat: polish life os cards and voice stack

This commit is contained in:
kacper 2026-03-24 08:54:47 -04:00
parent 66362c7176
commit 0edf8c3fef
21 changed files with 3681 additions and 502 deletions

157
app.py
View file

@ -38,6 +38,10 @@ _JSONRPC_VERSION = "2.0"
_TOOL_JOB_TIMEOUT_SECONDS = 300.0
_TOOL_JOB_RETENTION_SECONDS = 15 * 60
_NANOBOT_API_STREAM_LIMIT = 2 * 1024 * 1024
_TTS_SENTENCE_BREAK_RE = re.compile(r"(?<=[.!?])\s+")
_TTS_CLAUSE_BREAK_RE = re.compile(r"(?<=[,;:])\s+")
_TTS_SEGMENT_TARGET_CHARS = 180
_TTS_SEGMENT_MAX_CHARS = 260
CARD_INSTANCES_DIR.mkdir(parents=True, exist_ok=True)
CARD_TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
@ -104,6 +108,19 @@ def _decode_object(raw: str) -> dict[str, Any] | None:
return payload if isinstance(payload, dict) else None
def _parse_iso_datetime(raw: str) -> datetime | None:
value = raw.strip()
if not value:
return None
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
async def _read_json_request(request: Request) -> dict[str, Any]:
try:
payload = await request.json()
@ -114,6 +131,76 @@ async def _read_json_request(request: Request) -> dict[str, Any]:
return payload
def _wrap_tts_words(text: str, max_chars: int) -> list[str]:
words = text.split()
if not words:
return []
chunks: list[str] = []
current = words[0]
for word in words[1:]:
candidate = f"{current} {word}"
if len(candidate) <= max_chars:
current = candidate
continue
chunks.append(current)
current = word
chunks.append(current)
return chunks
def _chunk_tts_text(text: str) -> list[str]:
normalized = text.replace("\r\n", "\n").strip()
if not normalized:
return []
chunks: list[str] = []
paragraphs = [part.strip() for part in re.split(r"\n{2,}", normalized) if part.strip()]
for paragraph in paragraphs:
compact = re.sub(r"\s+", " ", paragraph).strip()
if not compact:
continue
sentences = [
sentence.strip()
for sentence in _TTS_SENTENCE_BREAK_RE.split(compact)
if sentence.strip()
]
if not sentences:
sentences = [compact]
current = ""
for sentence in sentences:
parts = [sentence]
if len(sentence) > _TTS_SEGMENT_MAX_CHARS:
parts = [
clause.strip()
for clause in _TTS_CLAUSE_BREAK_RE.split(sentence)
if clause.strip()
] or [sentence]
for part in parts:
if len(part) > _TTS_SEGMENT_MAX_CHARS:
if current:
chunks.append(current)
current = ""
chunks.extend(_wrap_tts_words(part, _TTS_SEGMENT_MAX_CHARS))
continue
candidate = part if not current else f"{current} {part}"
if len(candidate) <= _TTS_SEGMENT_TARGET_CHARS:
current = candidate
continue
if current:
chunks.append(current)
current = part
if current:
chunks.append(current)
return chunks or [re.sub(r"\s+", " ", normalized).strip()]
def _coerce_card_record(raw: dict[str, Any]) -> dict[str, Any] | None:
card_id = _normalize_card_id(str(raw.get("id", "")))
if not card_id:
@ -158,6 +245,7 @@ def _coerce_card_record(raw: dict[str, Any]) -> dict[str, Any] | None:
"template_state": template_state,
"context_summary": str(raw.get("context_summary", "")),
"chat_id": str(raw.get("chat_id", "web") or "web"),
"snooze_until": str(raw.get("snooze_until", "") or ""),
"created_at": str(raw.get("created_at", "")),
"updated_at": str(raw.get("updated_at", "")),
}
@ -287,6 +375,7 @@ def _sort_cards(cards: list[dict[str, Any]]) -> list[dict[str, Any]]:
def _load_cards() -> list[dict[str, Any]]:
CARD_INSTANCES_DIR.mkdir(parents=True, exist_ok=True)
cards: list[dict[str, Any]] = []
now = datetime.now(timezone.utc)
for instance_dir in CARD_INSTANCES_DIR.iterdir():
if not instance_dir.is_dir():
continue
@ -295,6 +384,9 @@ def _load_cards() -> list[dict[str, Any]]:
continue
if card.get("state") == "archived":
continue
snooze_until = _parse_iso_datetime(str(card.get("snooze_until", "") or ""))
if snooze_until is not None and snooze_until > now:
continue
cards.append(card)
return _sort_cards(cards)
@ -817,6 +909,62 @@ async def delete_card(card_id: str) -> JSONResponse:
return JSONResponse({"status": "ok"})
@app.post("/cards/{card_id}/snooze")
async def snooze_card(card_id: str, request: Request) -> JSONResponse:
if not _normalize_card_id(card_id):
return JSONResponse({"error": "invalid card id"}, status_code=400)
try:
payload = await _read_json_request(request)
except ValueError as exc:
return JSONResponse({"error": str(exc)}, status_code=400)
until_raw = str(payload.get("until", "")).strip()
until_dt = _parse_iso_datetime(until_raw)
if until_dt is None:
return JSONResponse({"error": "until must be a valid ISO datetime"}, status_code=400)
card = _load_card(card_id)
if card is None:
return JSONResponse({"error": "card not found"}, status_code=404)
card["snooze_until"] = until_dt.isoformat()
card["updated_at"] = datetime.now(timezone.utc).isoformat()
persisted = _write_card(card)
if persisted is None:
return JSONResponse({"error": "failed to snooze card"}, status_code=500)
return JSONResponse({"status": "ok", "card": persisted})
@app.post("/cards/{card_id}/state")
async def update_card_state(card_id: str, request: Request) -> JSONResponse:
if not _normalize_card_id(card_id):
return JSONResponse({"error": "invalid card id"}, status_code=400)
try:
payload = await _read_json_request(request)
except ValueError as exc:
return JSONResponse({"error": str(exc)}, status_code=400)
template_state = payload.get("template_state")
if not isinstance(template_state, dict):
return JSONResponse({"error": "template_state must be an object"}, status_code=400)
card = _load_card(card_id)
if card is None:
return JSONResponse({"error": "card not found"}, status_code=404)
if str(card.get("kind", "")) != "text":
return JSONResponse({"error": "only text cards support template_state"}, status_code=400)
card["template_state"] = template_state
card["updated_at"] = datetime.now(timezone.utc).isoformat()
persisted = _write_card(card)
if persisted is None:
return JSONResponse({"error": "failed to update card state"}, status_code=500)
return JSONResponse({"status": "ok", "card": persisted})
@app.get("/templates")
async def get_templates() -> JSONResponse:
return JSONResponse(_list_templates())
@ -975,8 +1123,15 @@ async def _sender_loop(
) -> None:
while True:
event = await queue.get()
if event.role == "nanobot-tts-partial":
await voice_session.queue_output_text(event.text, partial=True)
continue
if event.role == "nanobot-tts-flush":
await voice_session.flush_partial_output_text()
continue
if event.role == "nanobot-tts":
await voice_session.queue_output_text(event.text)
for segment in _chunk_tts_text(event.text):
await voice_session.queue_output_text(segment)
continue
typed_event = _to_typed_message(event.to_dict())
if typed_event is None:

View file

@ -0,0 +1,17 @@
{
"id": "live-calorie-tracker",
"kind": "text",
"title": "Calories",
"question": "",
"choices": [],
"response_value": "",
"slot": "live-calorie-tracker",
"lane": "context",
"priority": 76,
"state": "active",
"template_key": "list-total-live",
"context_summary": "",
"chat_id": "web",
"created_at": "2026-03-21T00:00:00+00:00",
"updated_at": "2026-03-21T00:00:00+00:00"
}

View file

@ -0,0 +1,9 @@
{
"left_label": "Cal",
"right_label": "Food",
"total_label": "Total",
"total_suffix": "cal",
"max_digits": 4,
"score": 76,
"rows": []
}

View file

@ -1,10 +1,12 @@
{
"title": "Weather 01545",
"subtitle": "OpenWeatherMap live context",
"subtitle": "Weather",
"tool_name": "mcp_home_assistant_GetLiveContext",
"forecast_tool_name": "exec",
"forecast_command": "python3 /home/kacper/nanobot/scripts/card_upcoming_conditions.py --nws-entity weather.korh --uv-entity weather.openweathermap_2 --forecast-type hourly --limit 4",
"provider_prefix": "OpenWeatherMap",
"temperature_name": "OpenWeatherMap Temperature",
"humidity_name": "OpenWeatherMap Humidity",
"condition_label": "OpenWeatherMap live context",
"condition_label": "Weather",
"refresh_ms": 86400000
}

View file

@ -0,0 +1,24 @@
{
"key": "list-total-live",
"title": "List Total",
"notes": "Generic editable two-column list card with a numeric left column, freeform right column, and a running total persisted in template_state. Configure left_label, right_label, total_label, total_suffix, max_digits, and rows.",
"example_state": {
"left_label": "Cal",
"right_label": "Food",
"total_label": "Total",
"total_suffix": "cal",
"max_digits": 4,
"rows": [
{
"value": "420",
"name": "Lunch"
},
{
"value": "180",
"name": "Snack"
}
]
},
"created_at": "2026-03-21T00:00:00+00:00",
"updated_at": "2026-03-21T00:00:00+00:00"
}

View file

@ -0,0 +1,336 @@
<style>
.list-total-card {
display: grid;
gap: 10px;
color: #4d392d;
}
.list-total-card__labels,
.list-total-card__row,
.list-total-card__total {
display: grid;
grid-template-columns: 68px minmax(0, 1fr);
gap: 8px;
align-items: center;
}
.list-total-card__labels {
color: rgba(77, 57, 45, 0.72);
font: 700 0.62rem/1 'M-1m Code', var(--card-font, 'SF Mono', ui-monospace, Menlo, Consolas, monospace);
letter-spacing: 0.06em;
text-transform: uppercase;
}
.list-total-card__rows {
display: grid;
gap: 6px;
}
.list-total-card__input {
width: 100%;
min-width: 0;
box-sizing: border-box;
border: 0;
border-bottom: 1px solid rgba(92, 70, 55, 0.14);
border-radius: 0;
background: transparent;
color: #473429;
padding: 4px 0;
outline: none;
box-shadow: none;
}
.list-total-card__input::placeholder {
color: rgba(77, 57, 45, 0.42);
}
.list-total-card__value {
font: 700 0.84rem/1 'M-1m Code', var(--card-font, 'SF Mono', ui-monospace, Menlo, Consolas, monospace);
text-align: right;
}
.list-total-card__name {
font-family: 'IBM Plex Sans Condensed', 'Arial Narrow', sans-serif;
font-size: 0.92rem;
line-height: 1.08;
font-weight: 600;
letter-spacing: -0.008em;
}
.list-total-card__status {
min-height: 0.9rem;
color: #8e3023;
font: 700 0.62rem/1 'M-1m Code', var(--card-font, 'SF Mono', ui-monospace, Menlo, Consolas, monospace);
letter-spacing: 0.04em;
text-transform: uppercase;
}
.list-total-card__status[data-kind='ok'] {
color: rgba(77, 57, 45, 0.5);
}
.list-total-card__total {
padding-top: 8px;
border-top: 1px solid rgba(92, 70, 55, 0.18);
color: #35271f;
}
.list-total-card__total-label {
font: 700 0.66rem/1 'M-1m Code', var(--card-font, 'SF Mono', ui-monospace, Menlo, Consolas, monospace);
letter-spacing: 0.06em;
text-transform: uppercase;
}
.list-total-card__total-value {
font: 700 0.98rem/1 'M-1m Code', var(--card-font, 'SF Mono', ui-monospace, Menlo, Consolas, monospace);
text-align: right;
}
</style>
<div class="list-total-card" data-list-total-card>
<div class="list-total-card__labels">
<div data-list-total-left-label>Value</div>
<div data-list-total-right-label>Item</div>
</div>
<div class="list-total-card__rows" data-list-total-rows></div>
<div class="list-total-card__status" data-list-total-status></div>
<div class="list-total-card__total">
<div class="list-total-card__total-label" data-list-total-total-label>Total</div>
<div class="list-total-card__total-value" data-list-total-total>0</div>
</div>
</div>
<script>
(() => {
const script = document.currentScript;
const root = script?.closest('[data-nanobot-card-root]');
const state = window.__nanobotGetCardState?.(script) || {};
if (!(root instanceof HTMLElement)) return;
const cardId = String(root.dataset.cardId || '').trim();
const rowsEl = root.querySelector('[data-list-total-rows]');
const statusEl = root.querySelector('[data-list-total-status]');
const totalEl = root.querySelector('[data-list-total-total]');
const totalLabelEl = root.querySelector('[data-list-total-total-label]');
const leftLabelEl = root.querySelector('[data-list-total-left-label]');
const rightLabelEl = root.querySelector('[data-list-total-right-label]');
if (
!(rowsEl instanceof HTMLElement) ||
!(statusEl instanceof HTMLElement) ||
!(totalEl instanceof HTMLElement) ||
!(totalLabelEl instanceof HTMLElement) ||
!(leftLabelEl instanceof HTMLElement) ||
!(rightLabelEl instanceof HTMLElement)
) {
return;
}
const maxDigits = Math.max(
1,
Math.min(4, Number.isFinite(Number(state.max_digits)) ? Number(state.max_digits) : 4),
);
const totalSuffix = String(state.total_suffix || '').trim();
const leftLabel = String(state.left_label || 'Value').trim() || 'Value';
const rightLabel = String(state.right_label || 'Item').trim() || 'Item';
const totalLabel = String(state.total_label || 'Total').trim() || 'Total';
leftLabelEl.textContent = leftLabel;
rightLabelEl.textContent = rightLabel;
totalLabelEl.textContent = totalLabel;
function sanitizeValue(raw) {
return String(raw || '').replace(/\D+/g, '').slice(0, maxDigits);
}
function sanitizeName(raw) {
return String(raw || '').replace(/\s+/g, ' ').trimStart();
}
function normalizeRows(raw) {
if (!Array.isArray(raw)) return [];
return raw
.filter((row) => row && typeof row === 'object' && !Array.isArray(row))
.map((row) => ({
value: sanitizeValue(row.value),
name: sanitizeName(row.name),
}));
}
function isBlankRow(row) {
return !row || (!String(row.value || '').trim() && !String(row.name || '').trim());
}
function ensureTrailingBlankRow(items) {
const next = items.map((row) => ({
value: sanitizeValue(row.value),
name: sanitizeName(row.name),
}));
if (!next.length || !isBlankRow(next[next.length - 1])) {
next.push({ value: '', name: '' });
}
return next;
}
function persistedRows() {
return rows
.filter((row) => !isBlankRow(row))
.map((row) => ({
value: sanitizeValue(row.value),
name: sanitizeName(row.name),
}));
}
function computeTotal() {
return persistedRows().reduce((sum, row) => sum + (Number.parseInt(row.value, 10) || 0), 0);
}
function updateTotal() {
const total = computeTotal();
totalEl.textContent = `${total.toLocaleString()}${totalSuffix ? totalSuffix : ''}`;
window.__nanobotSetCardLiveContent?.(script, {
kind: 'list_total',
item_count: persistedRows().length,
total,
total_suffix: totalSuffix || null,
score: persistedRows().length ? 24 : 16,
});
}
function setStatus(text, kind) {
statusEl.textContent = text || '';
statusEl.dataset.kind = kind || '';
}
let rows = ensureTrailingBlankRow(normalizeRows(state.rows));
let saveTimer = null;
let inFlightSave = null;
async function persistState() {
if (!cardId) return;
const nextState = {
...state,
left_label: leftLabel,
right_label: rightLabel,
total_label: totalLabel,
total_suffix: totalSuffix,
max_digits: maxDigits,
rows: persistedRows(),
};
try {
setStatus('Saving', 'ok');
inFlightSave = fetch(`/cards/${encodeURIComponent(cardId)}/state`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ template_state: nextState }),
});
const response = await inFlightSave;
if (!response.ok) {
let message = `save failed (${response.status})`;
try {
const payload = await response.json();
if (payload && typeof payload.error === 'string' && payload.error) {
message = payload.error;
}
} catch (_) {}
throw new Error(message);
}
setStatus('', '');
} catch (error) {
setStatus(error instanceof Error ? error.message : 'save failed', 'error');
} finally {
inFlightSave = null;
}
}
function schedulePersist() {
if (saveTimer) clearTimeout(saveTimer);
saveTimer = window.setTimeout(() => {
void persistState();
}, 280);
}
function pruneRows() {
rows = ensureTrailingBlankRow(
rows.filter((row, index) => !isBlankRow(row) || index === rows.length - 1),
);
}
function renderRows() {
rowsEl.innerHTML = '';
rows.forEach((row, index) => {
const rowEl = document.createElement('div');
rowEl.className = 'list-total-card__row';
const valueInput = document.createElement('input');
valueInput.className = 'list-total-card__input list-total-card__value';
valueInput.type = 'text';
valueInput.inputMode = 'numeric';
valueInput.maxLength = maxDigits;
valueInput.placeholder = '0';
valueInput.value = row.value;
const nameInput = document.createElement('input');
nameInput.className = 'list-total-card__input list-total-card__name';
nameInput.type = 'text';
nameInput.placeholder = 'Item';
nameInput.value = row.name;
valueInput.addEventListener('input', () => {
rows[index].value = sanitizeValue(valueInput.value);
valueInput.value = rows[index].value;
if (index === rows.length - 1 && !isBlankRow(rows[index])) {
rows = ensureTrailingBlankRow(rows);
renderRows();
schedulePersist();
return;
}
updateTotal();
schedulePersist();
});
nameInput.addEventListener('input', () => {
rows[index].name = sanitizeName(nameInput.value);
if (index === rows.length - 1 && !isBlankRow(rows[index])) {
rows = ensureTrailingBlankRow(rows);
renderRows();
schedulePersist();
return;
}
updateTotal();
schedulePersist();
});
const handleBlur = () => {
rows[index].value = sanitizeValue(valueInput.value);
rows[index].name = sanitizeName(nameInput.value);
const nextRows = ensureTrailingBlankRow(
rows.filter((candidate, candidateIndex) => !isBlankRow(candidate) || candidateIndex === rows.length - 1),
);
const changed = JSON.stringify(nextRows) !== JSON.stringify(rows);
rows = nextRows;
if (changed) {
renderRows();
} else {
updateTotal();
}
schedulePersist();
};
valueInput.addEventListener('blur', handleBlur);
nameInput.addEventListener('blur', handleBlur);
rowEl.append(valueInput, nameInput);
rowsEl.appendChild(rowEl);
});
updateTotal();
}
window.__nanobotSetCardRefresh?.(script, () => {
pruneRows();
renderRows();
});
renderRows();
})();
</script>

View file

@ -235,6 +235,8 @@
letter-spacing: 0.005em;
color: #624d40;
opacity: 0.95;
white-space: pre-wrap;
overflow-wrap: anywhere;
cursor: pointer;
touch-action: manipulation;
}
@ -324,175 +326,43 @@
text-align: left;
}
.task-card__editor-overlay {
display: none;
position: fixed;
inset: 0;
z-index: 10001;
padding:
max(18px, env(safe-area-inset-top))
max(16px, env(safe-area-inset-right))
max(18px, env(safe-area-inset-bottom))
max(16px, env(safe-area-inset-left));
background: rgba(38, 27, 21, 0.42);
backdrop-filter: blur(8px);
-webkit-backdrop-filter: blur(8px);
box-sizing: border-box;
.task-card__title--editing,
.task-card__body--editing {
cursor: text;
}
.task-card__editor-sheet {
.task-card__inline-editor {
display: block;
width: 100%;
max-width: 100%;
min-width: 0;
height: 100%;
display: grid;
grid-template-rows: auto 1fr auto;
gap: 14px;
border-radius: 22px;
border: 1px solid rgba(87, 65, 50, 0.16);
background:
radial-gradient(circle at top right, rgba(255, 255, 255, 0.74), transparent 28%),
linear-gradient(160deg, rgba(254, 246, 237, 0.985), rgba(240, 226, 210, 0.985));
color: var(--task-ink);
box-shadow:
0 22px 48px rgba(48, 32, 24, 0.24),
inset 0 1px 0 rgba(255, 255, 255, 0.7);
padding: 18px 16px 16px;
box-sizing: border-box;
overflow: hidden;
}
.task-card__editor-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: 10px;
min-width: 0;
}
.task-card__editor-kicker {
font-size: 0.66rem;
line-height: 1.1;
letter-spacing: 0.12em;
text-transform: uppercase;
font-weight: 700;
color: var(--task-muted);
}
.task-card__editor-title {
margin-top: 3px;
font-family: 'IBM Plex Sans Condensed', 'Arial Narrow', sans-serif;
font-size: 1.08rem;
line-height: 1.04;
font-weight: 700;
letter-spacing: -0.012em;
color: var(--task-ink);
min-width: 0;
overflow-wrap: anywhere;
}
.task-card__editor-close {
appearance: none;
margin: 0;
padding: 0;
border: 0;
background: transparent;
color: var(--task-muted);
font: 700 0.86rem/1 'M-1m Code', var(--card-font, 'SF Mono', ui-monospace, Menlo, Consolas, monospace);
letter-spacing: 0.02em;
padding: 6px 4px;
cursor: pointer;
flex: 0 0 auto;
}
.task-card__editor-fields {
min-height: 0;
min-width: 0;
display: grid;
grid-template-rows: auto auto 1fr;
gap: 12px;
}
.task-card__editor-group {
display: grid;
gap: 5px;
min-height: 0;
min-width: 0;
}
.task-card__editor-label {
font-size: 0.66rem;
line-height: 1.1;
letter-spacing: 0.12em;
text-transform: uppercase;
font-weight: 700;
color: var(--task-muted);
}
.task-card__editor-input,
.task-card__editor-textarea {
width: 100%;
max-width: 100%;
min-width: 0;
box-sizing: border-box;
border-radius: 16px;
border: 1px solid rgba(87, 65, 50, 0.14);
background: rgba(255, 251, 246, 0.92);
color: var(--task-ink);
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.78);
outline: none;
}
.task-card__editor-input {
min-height: 52px;
padding: 12px 13px;
font-family: 'IBM Plex Sans Condensed', 'Arial Narrow', sans-serif;
font-size: 1.02rem;
line-height: 1.1;
font-weight: 700;
letter-spacing: -0.012em;
}
.task-card__editor-textarea {
min-height: 0;
height: 100%;
max-height: 100%;
padding: 12px 13px;
resize: none;
font-family: 'IBM Plex Sans Condensed', 'Arial Narrow', sans-serif;
font-size: 0.94rem;
line-height: 1.36;
font-weight: 400;
letter-spacing: 0.004em;
overflow: hidden;
background: transparent;
color: inherit;
font: inherit;
line-height: inherit;
letter-spacing: inherit;
border-radius: 0;
box-shadow: none;
}
.task-card__editor-input:focus,
.task-card__editor-textarea:focus {
border-color: rgba(88, 112, 111, 0.48);
box-shadow:
inset 0 1px 0 rgba(255, 255, 255, 0.82),
0 0 0 3px rgba(88, 112, 111, 0.12);
.task-card__inline-editor::placeholder {
color: rgba(98, 77, 64, 0.6);
opacity: 1;
font-style: italic;
}
.task-card__editor-actions {
display: flex;
align-items: center;
justify-content: flex-end;
gap: 10px;
min-width: 0;
flex-wrap: wrap;
.task-card__inline-editor--title {
min-height: 1.2em;
}
.task-card__editor-action-row {
display: flex;
align-items: center;
gap: 8px;
min-width: 0;
flex-wrap: wrap;
justify-content: flex-end;
margin-left: auto;
}
.task-card__editor-action-row > .task-card__button {
flex: 0 0 auto;
.task-card__inline-editor--body {
min-height: 1.34em;
}
</style>
@ -759,49 +629,7 @@
}
};
const editorOverlayEl = doc.createElement('div');
editorOverlayEl.className = 'task-card__editor-overlay';
editorOverlayEl.innerHTML = `
<div class="task-card__editor-sheet" role="dialog" aria-modal="true" aria-labelledby="task-editor-title">
<div class="task-card__editor-head">
<div>
<div class="task-card__editor-kicker">${laneLabels[lane] || 'Task'}</div>
<div id="task-editor-title" class="task-card__editor-title">Edit task</div>
</div>
<button type="button" class="task-card__editor-close" data-task-editor-close>Close</button>
</div>
<form class="task-card__editor-fields" data-task-editor-form>
<div class="task-card__editor-group">
<label class="task-card__editor-label" for="task-editor-title-input">Title</label>
<input id="task-editor-title-input" data-task-editor-title-input class="task-card__editor-input" type="text" maxlength="240" />
</div>
<div class="task-card__editor-group" style="min-height:0;">
<label class="task-card__editor-label" for="task-editor-body-input">Description</label>
<textarea id="task-editor-body-input" data-task-editor-body-input class="task-card__editor-textarea" placeholder="Add notes, context, or next steps"></textarea>
</div>
<div class="task-card__editor-actions">
<div class="task-card__editor-action-row">
<button type="button" class="task-card__button task-card__button--secondary" data-task-editor-cancel>Cancel</button>
<button type="submit" class="task-card__button" data-task-editor-save>Save</button>
</div>
</div>
</form>
</div>
`;
const editorFormEl = editorOverlayEl.querySelector('[data-task-editor-form]');
const editorTitleInputEl = editorOverlayEl.querySelector('[data-task-editor-title-input]');
const editorBodyInputEl = editorOverlayEl.querySelector('[data-task-editor-body-input]');
const editorCloseEl = editorOverlayEl.querySelector('[data-task-editor-close]');
const editorCancelEl = editorOverlayEl.querySelector('[data-task-editor-cancel]');
const editorSaveEl = editorOverlayEl.querySelector('[data-task-editor-save]');
if (
!(editorFormEl instanceof HTMLFormElement) ||
!(editorTitleInputEl instanceof HTMLInputElement) ||
!(editorBodyInputEl instanceof HTMLTextAreaElement) ||
!(editorCloseEl instanceof HTMLButtonElement) ||
!(editorCancelEl instanceof HTMLButtonElement) ||
!(editorSaveEl instanceof HTMLButtonElement)
) return;
let activeInlineEdit = null;
const setBusy = (busy) => {
laneToggleEl.disabled = busy || !taskPath;
@ -811,11 +639,11 @@
for (const button of tagsEl.querySelectorAll('button')) {
if (button instanceof HTMLButtonElement) button.disabled = busy;
}
editorTitleInputEl.disabled = busy;
editorBodyInputEl.disabled = busy;
editorCloseEl.disabled = busy;
editorCancelEl.disabled = busy;
editorSaveEl.disabled = busy;
summaryEl.style.pointerEvents = busy ? 'none' : '';
descriptionEl.style.pointerEvents = busy ? 'none' : '';
if (activeInlineEdit?.input instanceof HTMLTextAreaElement) {
activeInlineEdit.input.disabled = busy;
}
};
const closeMoveMenu = () => {
@ -858,26 +686,97 @@
window.dispatchEvent(new Event('nanobot:cards-refresh'));
};
const closeEditor = () => {
editorOverlayEl.style.display = 'none';
const autosizeInlineEditor = (editor, minHeight = 0) => {
editor.style.height = '0px';
const nextHeight = Math.max(Math.ceil(minHeight), editor.scrollHeight);
editor.style.height = `${Math.max(nextHeight, 20)}px`;
};
const openEditor = (focusField = 'title') => {
if (!taskPath) return;
const beginInlineEdit = (field) => {
if (!taskPath || activeInlineEdit) return;
closeMoveMenu();
editorTitleInputEl.value = title;
editorBodyInputEl.value = body;
if (editorOverlayEl.parentElement !== doc.body) {
doc.body.appendChild(editorOverlayEl);
}
editorOverlayEl.style.display = 'block';
view.requestAnimationFrame(() => {
const target = focusField === 'description' ? editorBodyInputEl : editorTitleInputEl;
target.focus();
if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) {
const end = target.value.length;
target.setSelectionRange(end, end);
const host = field === 'title' ? summaryEl : descriptionEl;
const currentValue = field === 'title' ? title : body;
const editor = document.createElement('textarea');
const minHeight = host.getBoundingClientRect().height;
editor.className = `task-card__inline-editor ${
field === 'title' ? 'task-card__inline-editor--title' : 'task-card__inline-editor--body'
}`;
editor.rows = 1;
editor.value = currentValue;
editor.placeholder = field === 'description' ? 'Add description' : '';
editor.setAttribute('aria-label', field === 'title' ? 'Edit task title' : 'Edit task description');
host.textContent = '';
host.classList.remove('task-card__body--placeholder');
host.classList.add(field === 'title' ? 'task-card__title--editing' : 'task-card__body--editing');
host.appendChild(editor);
autosizeInlineEditor(editor, minHeight);
const cancel = () => {
if (activeInlineEdit?.input !== editor) return;
activeInlineEdit = null;
render();
};
const commit = async () => {
if (activeInlineEdit?.input !== editor) return;
const nextValue = editor.value.trim();
if (field === 'title' && !nextValue) {
editor.focus();
return;
}
activeInlineEdit = null;
if (nextValue === currentValue) {
render();
return;
}
const ok = await runTaskEdit(field === 'title' ? { title: nextValue } : { description: nextValue });
if (!ok) render();
};
activeInlineEdit = {
field,
input: editor,
cancel,
commit,
};
editor.addEventListener('input', () => {
autosizeInlineEditor(editor, minHeight);
});
editor.addEventListener('click', (event) => {
event.stopPropagation();
});
editor.addEventListener('keydown', (event) => {
if (event.key === 'Escape') {
event.preventDefault();
cancel();
return;
}
if (field === 'title' && event.key === 'Enter' && !event.shiftKey) {
event.preventDefault();
editor.blur();
return;
}
if ((event.metaKey || event.ctrlKey) && event.key === 'Enter') {
event.preventDefault();
editor.blur();
}
});
editor.addEventListener('blur', () => {
if (activeInlineEdit?.input !== editor) return;
void commit();
});
view.requestAnimationFrame(() => {
editor.focus();
const end = editor.value.length;
editor.setSelectionRange(end, end);
});
};
@ -958,13 +857,14 @@
if (payload && typeof payload === 'object' && payload.error) {
throw new Error(String(payload.error));
}
closeEditor();
refreshCards();
return true;
} catch (error) {
console.error('Task edit failed', error);
setBusy(false);
setStatus('Unavailable', '#8e3023', '#f3d3cc');
publishLiveContent(lane, true, String(error));
return false;
}
};
@ -1094,48 +994,16 @@
doc.body.appendChild(moveMenuEl);
}
editorCloseEl.addEventListener('click', () => {
closeEditor();
});
editorCancelEl.addEventListener('click', () => {
closeEditor();
});
editorOverlayEl.addEventListener('pointerdown', (event) => {
if (event.target === editorOverlayEl) {
closeEditor();
}
});
editorFormEl.addEventListener('submit', (event) => {
event.preventDefault();
const nextTitle = editorTitleInputEl.value.trim();
const nextDescription = editorBodyInputEl.value.trim();
if (!nextTitle) {
editorTitleInputEl.focus();
return;
}
if (nextTitle === title && nextDescription === body) {
closeEditor();
return;
}
void runTaskEdit({
title: nextTitle,
description: nextDescription,
});
});
summaryEl.addEventListener('click', (event) => {
event.preventDefault();
event.stopPropagation();
openEditor('title');
beginInlineEdit('title');
});
descriptionEl.addEventListener('click', (event) => {
event.preventDefault();
event.stopPropagation();
openEditor('description');
beginInlineEdit('description');
});
const render = () => {

View file

@ -3,12 +3,14 @@
"title": "Live Weather",
"notes": "Live weather summary card. Fill template_state with subtitle, tool_name (defaults to Home Assistant GetLiveContext), provider_prefix or exact sensor names, optional condition_label, and refresh_ms. Wind and pressure render when matching sensors exist in the live context payload.",
"example_state": {
"subtitle": "OpenWeatherMap live context",
"subtitle": "Weather",
"tool_name": "mcp_home_assistant_GetLiveContext",
"forecast_tool_name": "exec",
"forecast_command": "python3 /home/kacper/nanobot/scripts/card_upcoming_conditions.py --nws-entity weather.korh --uv-entity weather.openweathermap_2 --forecast-type hourly --limit 4",
"provider_prefix": "OpenWeatherMap",
"temperature_name": "OpenWeatherMap Temperature",
"humidity_name": "OpenWeatherMap Humidity",
"condition_label": "OpenWeatherMap live context",
"condition_label": "Weather",
"refresh_ms": 86400000
},
"created_at": "2026-03-11T04:12:48.601255+00:00",

View file

@ -1,4 +1,13 @@
<div data-weather-card style="font-family: var(--card-font, 'Iosevka', 'SF Mono', ui-monospace, Menlo, Consolas, monospace); background:#ffffff; color:#111827; padding:14px 16px;">
<style>
@font-face {
font-family: 'BlexMono Nerd Font Mono';
src: url('/card-templates/upcoming-conditions-live/assets/BlexMonoNerdFontMono-Regular.ttf') format('truetype');
font-weight: 400;
font-style: normal;
font-display: swap;
}
</style>
<div style="display:flex; align-items:flex-start; justify-content:space-between; gap:12px; margin-bottom:8px;">
<div data-weather-subtitle style="font-size:0.86rem; line-height:1.35; color:#4b5563; font-weight:600;">Loading…</div>
<span data-weather-status style="font-size:0.8rem; line-height:1.2; font-weight:700; color:#6b7280; white-space:nowrap;">Loading…</span>
@ -9,24 +18,22 @@
<span data-weather-unit style="font-size:1.05rem; font-weight:700; color:#4b5563; padding-bottom:0.28rem;">°F</span>
</div>
<div data-weather-condition style="font-size:1rem; line-height:1.3; font-weight:700; color:#1f2937; margin-bottom:10px; text-transform:capitalize;">--</div>
<div style="display:grid; grid-template-columns:repeat(2, minmax(0, 1fr)); gap:10px 12px;">
<div>
<div style="font-size:0.74rem; line-height:1.2; text-transform:uppercase; letter-spacing:0.04em; color:#6b7280;">Humidity</div>
<div title="Humidity" style="font-family:'BlexMono Nerd Font Mono', monospace; font-size:0.86rem; line-height:1.2; color:#6b7280;">󰖎</div>
<div data-weather-humidity style="margin-top:2px; font-size:1rem; line-height:1.25; font-weight:700; color:#111827;">--</div>
</div>
<div>
<div style="font-size:0.74rem; line-height:1.2; text-transform:uppercase; letter-spacing:0.04em; color:#6b7280;">Wind</div>
<div title="Wind" style="font-family:'BlexMono Nerd Font Mono', monospace; font-size:0.86rem; line-height:1.2; color:#6b7280;"></div>
<div data-weather-wind style="margin-top:2px; font-size:1rem; line-height:1.25; font-weight:700; color:#111827;">--</div>
</div>
<div>
<div style="font-size:0.74rem; line-height:1.2; text-transform:uppercase; letter-spacing:0.04em; color:#6b7280;">Pressure</div>
<div data-weather-pressure style="margin-top:2px; font-size:1rem; line-height:1.25; font-weight:700; color:#111827;">--</div>
<div title="Rain" style="font-family:'BlexMono Nerd Font Mono', monospace; font-size:0.86rem; line-height:1.2; color:#6b7280;"></div>
<div data-weather-rain style="margin-top:2px; font-size:1rem; line-height:1.25; font-weight:700; color:#111827;">--</div>
</div>
<div>
<div style="font-size:0.74rem; line-height:1.2; text-transform:uppercase; letter-spacing:0.04em; color:#6b7280;">Updated</div>
<div data-weather-updated style="margin-top:2px; font-size:0.94rem; line-height:1.25; font-weight:700; color:#374151;">--</div>
<div title="UV" style="font-family:'BlexMono Nerd Font Mono', monospace; font-size:0.86rem; line-height:1.2; color:#6b7280;"></div>
<div data-weather-uv style="margin-top:2px; font-size:1rem; line-height:1.25; font-weight:700; color:#111827;">--</div>
</div>
</div>
</div>
@ -40,22 +47,20 @@
const subtitleEl = root.querySelector('[data-weather-subtitle]');
const tempEl = root.querySelector('[data-weather-temp]');
const unitEl = root.querySelector('[data-weather-unit]');
const condEl = root.querySelector('[data-weather-condition]');
const humidityEl = root.querySelector('[data-weather-humidity]');
const windEl = root.querySelector('[data-weather-wind]');
const pressureEl = root.querySelector('[data-weather-pressure]');
const updatedEl = root.querySelector('[data-weather-updated]');
const rainEl = root.querySelector('[data-weather-rain]');
const uvEl = root.querySelector('[data-weather-uv]');
const statusEl = root.querySelector('[data-weather-status]');
if (!(subtitleEl instanceof HTMLElement) || !(tempEl instanceof HTMLElement) || !(unitEl instanceof HTMLElement) || !(condEl instanceof HTMLElement) || !(humidityEl instanceof HTMLElement) || !(windEl instanceof HTMLElement) || !(pressureEl instanceof HTMLElement) || !(updatedEl instanceof HTMLElement) || !(statusEl instanceof HTMLElement)) return;
if (!(subtitleEl instanceof HTMLElement) || !(tempEl instanceof HTMLElement) || !(unitEl instanceof HTMLElement) || !(humidityEl instanceof HTMLElement) || !(windEl instanceof HTMLElement) || !(rainEl instanceof HTMLElement) || !(uvEl instanceof HTMLElement) || !(statusEl instanceof HTMLElement)) return;
const subtitle = typeof state.subtitle === 'string' ? state.subtitle : '';
const configuredToolName = typeof state.tool_name === 'string' ? state.tool_name.trim() : '';
const configuredForecastToolName = typeof state.forecast_tool_name === 'string' ? state.forecast_tool_name.trim() : 'exec';
const forecastCommand = typeof state.forecast_command === 'string' ? state.forecast_command.trim() : '';
const providerPrefix = typeof state.provider_prefix === 'string' ? state.provider_prefix.trim() : '';
const temperatureName = typeof state.temperature_name === 'string' ? state.temperature_name.trim() : '';
const humidityName = typeof state.humidity_name === 'string' ? state.humidity_name.trim() : '';
const pressureName = typeof state.pressure_name === 'string' ? state.pressure_name.trim() : '';
const windName = typeof state.wind_name === 'string' ? state.wind_name.trim() : '';
const conditionLabel = typeof state.condition_label === 'string' ? state.condition_label.trim() : '';
const refreshMsRaw = Number(state.refresh_ms);
const refreshMs = Number.isFinite(refreshMsRaw) && refreshMsRaw >= 60000 ? refreshMsRaw : 24 * 60 * 60 * 1000;
@ -162,6 +167,18 @@
return '';
};
const stripExecFooter = (value) => String(value || '').replace(/\n+\s*Exit code:\s*\d+\s*$/i, '').trim();
const extractExecJson = (toolResult) => {
const parsedText = stripExecFooter(toolResult?.content);
if (!parsedText) return null;
try {
return JSON.parse(parsedText);
} catch {
return null;
}
};
const resolveToolName = async () => {
if (configuredToolName) return configuredToolName;
if (!window.__nanobotListTools) return 'mcp_home_assistant_GetLiveContext';
@ -187,24 +204,41 @@
}) || null;
};
const resolveForecastBundle = async () => {
if (!forecastCommand) return null;
const toolResult = await window.__nanobotCallTool?.(configuredForecastToolName || 'exec', {
command: forecastCommand,
max_output_chars: 200000,
});
const payload = extractExecJson(toolResult);
return payload && typeof payload === 'object' ? payload : null;
};
const firstForecastEntry = (bundle, key, metricKey = '') => {
const source = bundle && typeof bundle === 'object' ? bundle[key] : null;
const forecast = source && typeof source === 'object' && Array.isArray(source.forecast) ? source.forecast : [];
if (!metricKey) {
return forecast.length > 0 && forecast[0] && typeof forecast[0] === 'object' ? forecast[0] : null;
}
return forecast.find((entry) => entry && typeof entry === 'object' && entry[metricKey] !== null && entry[metricKey] !== undefined) || null;
};
const refresh = async () => {
const resolvedToolName = await resolveToolName();
if (!resolvedToolName) {
const errorText = 'Missing tool_name';
setStatus('No tool', '#b91c1c');
updatedEl.textContent = errorText;
updateLiveContent({
kind: 'weather',
subtitle: subtitleEl.textContent || null,
tool_name: null,
temperature: null,
temperature_unit: String(state.unit || '°F'),
condition: null,
humidity: null,
wind: null,
pressure: null,
rain: null,
uv: null,
status: 'No tool',
updated_at: errorText,
error: errorText,
});
return;
@ -212,7 +246,10 @@
setStatus('Refreshing', '#6b7280');
try {
const toolResult = await window.__nanobotCallTool?.(resolvedToolName, {});
const [toolResult, forecastBundle] = await Promise.all([
window.__nanobotCallTool?.(resolvedToolName, {}),
resolveForecastBundle(),
]);
const entries = parseLiveContextEntries(extractLiveContextText(toolResult)).filter((entry) => normalizeText(entry.domain) === 'sensor');
const prefix = providerPrefix || 'OpenWeatherMap';
const temperatureEntry = findEntry(entries, [
@ -223,32 +260,29 @@
humidityName,
`${prefix} Humidity`,
]);
const pressureEntry = findEntry(entries, [
pressureName,
`${prefix} Pressure`,
]);
const windEntry = findEntry(entries, [
windName,
`${prefix} Wind speed`,
`${prefix} Wind`,
]);
const temperature = Number(temperatureEntry?.state);
tempEl.textContent = Number.isFinite(temperature) ? String(Math.round(temperature)) : '--';
unitEl.textContent = String(temperatureEntry?.attributes?.unit_of_measurement || state.unit || '°F');
condEl.textContent = conditionLabel || `${prefix || 'Weather'} live context`;
const humidity = Number(humidityEntry?.state);
humidityEl.textContent = Number.isFinite(humidity) ? `${Math.round(humidity)}%` : '--';
const windSpeed = Number(windEntry?.state);
const windUnit = String(windEntry?.attributes?.unit_of_measurement || 'mph');
windEl.textContent = Number.isFinite(windSpeed) ? `${windSpeed} ${windUnit}` : '--';
const pressure = Number(pressureEntry?.state);
pressureEl.textContent = Number.isFinite(pressure)
? `${pressure} ${String(pressureEntry?.attributes?.unit_of_measurement || '').trim()}`.trim()
: '--';
const updatedText = new Date().toLocaleString([], { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' });
updatedEl.textContent = updatedText;
subtitleEl.textContent = subtitle || prefix || 'Home Assistant live context';
const nwsEntry = firstForecastEntry(forecastBundle, 'nws');
const uvEntry = firstForecastEntry(forecastBundle, 'uv', 'uv_index');
const nwsSource = forecastBundle && typeof forecastBundle === 'object' && forecastBundle.nws && typeof forecastBundle.nws === 'object' ? forecastBundle.nws : null;
const windSpeed = Number(nwsEntry?.wind_speed);
const windUnit = String(nwsSource?.wind_speed_unit || 'mph');
windEl.textContent = Number.isFinite(windSpeed) ? `${Math.round(windSpeed)} ${windUnit}` : '--';
const rainChance = Number(nwsEntry?.precipitation_probability);
rainEl.textContent = Number.isFinite(rainChance) ? `${Math.round(rainChance)}%` : '--';
const uvValue = Number(uvEntry?.uv_index);
uvEl.textContent = Number.isFinite(uvValue) ? String(Math.round(uvValue)) : '--';
subtitleEl.textContent = subtitle || prefix || 'Weather';
setStatus('Live', '#047857');
updateLiveContent({
kind: 'weather',
@ -256,29 +290,32 @@
tool_name: resolvedToolName,
temperature: Number.isFinite(temperature) ? Math.round(temperature) : null,
temperature_unit: unitEl.textContent || null,
condition: condEl.textContent || null,
humidity: Number.isFinite(humidity) ? Math.round(humidity) : null,
wind: windEl.textContent || null,
pressure: pressureEl.textContent || null,
rain: rainEl.textContent || null,
uv: Number.isFinite(uvValue) ? Math.round(uvValue) : null,
status: 'Live',
updated_at: updatedText,
});
} catch (error) {
const errorText = String(error);
setStatus('Unavailable', '#b91c1c');
updatedEl.textContent = errorText;
tempEl.textContent = '--';
unitEl.textContent = String(state.unit || '°F');
humidityEl.textContent = '--';
windEl.textContent = '--';
rainEl.textContent = '--';
uvEl.textContent = '--';
updateLiveContent({
kind: 'weather',
subtitle: subtitleEl.textContent || null,
tool_name: resolvedToolName,
temperature: null,
temperature_unit: unitEl.textContent || null,
condition: null,
humidity: null,
wind: null,
pressure: null,
rain: null,
uv: null,
status: 'Unavailable',
updated_at: errorText,
error: errorText,
});
}

View file

@ -438,9 +438,11 @@ export function App() {
const { agentStateOverride, handlePointerDown, handlePointerMove, handlePointerUp } = usePTT({
connected: rtc.connected && !rtc.textOnly,
currentAgentState: rtc.agentState,
onSendPtt: (pressed) =>
rtc.sendJson({ type: "voice-ptt", pressed, metadata: selectedCardMetadata() }),
onBootstrap: rtc.connect,
onInterrupt: () => rtc.sendJson({ type: "command", command: "reset" }),
});
const effectiveAgentState = agentStateOverride ?? rtc.agentState;

File diff suppressed because it is too large Load diff

View file

@ -6,8 +6,10 @@ const MOVE_CANCEL_PX = 16;
interface UsePTTOptions {
connected: boolean;
currentAgentState: AgentState;
onSendPtt(pressed: boolean): void;
onBootstrap(): Promise<void>;
onInterrupt?(): void;
onTap?(): void; // called on a short press (< HOLD_MS) that didn't activate PTT
}
@ -23,23 +25,31 @@ function dispatchMicEnable(enabled: boolean): void {
}
/** Manages push-to-talk pointer events and mic enable/disable. */
export function usePTT({ connected, onSendPtt, onBootstrap, onTap }: UsePTTOptions): PTTState {
export function usePTT({
connected,
currentAgentState,
onSendPtt,
onBootstrap,
onInterrupt,
onTap,
}: UsePTTOptions): PTTState {
const [agentStateOverride, setAgentStateOverride] = useState<AgentState | null>(null);
const activePointers = useRef(new Set<number>());
const appStartedRef = useRef(false);
const holdTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const pttFiredRef = useRef(false);
const pointerStartRef = useRef<{ x: number; y: number } | null>(null);
const beginPTT = useCallback(() => {
if (!connected) return;
if (agentStateOverride === "listening") return;
if (currentAgentState === "thinking" || currentAgentState === "speaking") {
onInterrupt?.();
}
pttFiredRef.current = true;
setAgentStateOverride("listening");
dispatchMicEnable(true);
onSendPtt(true);
}, [connected, agentStateOverride, onSendPtt]);
}, [connected, agentStateOverride, currentAgentState, onInterrupt, onSendPtt]);
const endPTT = useCallback(() => {
if (agentStateOverride !== "listening") return;
setAgentStateOverride(null);
@ -62,11 +72,8 @@ export function usePTT({ connected, onSendPtt, onBootstrap, onTap }: UsePTTOptio
await onBootstrap();
}
if (activePointers.current.size !== 1) return;
pttFiredRef.current = false;
pointerStartRef.current = { x: pe.clientX, y: pe.clientY };
// Delay activation slightly so horizontal swipe gestures can cancel.
holdTimerRef.current = setTimeout(beginPTT, HOLD_MS);
},
[onBootstrap, beginPTT],
@ -95,18 +102,14 @@ export function usePTT({ connected, onSendPtt, onBootstrap, onTap }: UsePTTOptio
if (!activePointers.current.has(pe.pointerId)) return;
activePointers.current.delete(pe.pointerId);
if (activePointers.current.size !== 0) return;
// Cancel hold timer if it hasn't fired yet
if (holdTimerRef.current !== null) {
clearTimeout(holdTimerRef.current);
holdTimerRef.current = null;
}
pointerStartRef.current = null;
if (pttFiredRef.current) {
endPTT();
} else {
// PTT never fired → short tap.
onTap?.();
}
},

View file

@ -2,7 +2,6 @@ import { useCallback, useEffect, useRef, useState } from "preact/hooks";
import type {
AgentState,
CardItem,
CardLane,
CardMessageMetadata,
CardState,
ClientMessage,
@ -18,13 +17,6 @@ const CARD_LIVE_CONTENT_EVENT = "nanobot:card-live-content-change";
let cardIdCounter = 0;
let logIdCounter = 0;
const LANE_RANK: Record<CardLane, number> = {
attention: 0,
work: 1,
context: 2,
history: 3,
};
const STATE_RANK: Record<CardState, number> = {
active: 0,
stale: 1,
@ -96,8 +88,6 @@ function compareCards(a: CardItem, b: CardItem): number {
if (stateDiff !== 0) return stateDiff;
const scoreDiff = readCardScore(b) - readCardScore(a);
if (scoreDiff !== 0) return scoreDiff;
const laneDiff = LANE_RANK[a.lane] - LANE_RANK[b.lane];
if (laneDiff !== 0) return laneDiff;
if (a.priority !== b.priority) return b.priority - a.priority;
const updatedDiff = b.updatedAt.localeCompare(a.updatedAt);
if (updatedDiff !== 0) return updatedDiff;
@ -131,6 +121,8 @@ function toCardItem(msg: Extract<ServerMessage, { type: "card" }>): Omit<CardIte
priority: msg.priority,
state: msg.state,
templateKey: msg.template_key || undefined,
templateState:
msg.template_state && typeof msg.template_state === "object" ? msg.template_state : undefined,
contextSummary: msg.context_summary || undefined,
createdAt: msg.created_at || new Date().toISOString(),
updatedAt: msg.updated_at || new Date().toISOString(),

View file

@ -4,8 +4,57 @@
-webkit-user-select: none;
}
@font-face {
font-family: "M-1m Code";
src: url("/card-templates/todo-item-live/assets/mplus-1m-regular-sub.ttf") format("truetype");
font-weight: 400;
font-style: normal;
font-display: swap;
}
@font-face {
font-family: "M-1m Code";
src: url("/card-templates/todo-item-live/assets/mplus-1m-bold-sub.ttf") format("truetype");
font-weight: 700;
font-style: normal;
font-display: swap;
}
@font-face {
font-family: "IBM Plex Sans Condensed";
src: url("/card-templates/todo-item-live/assets/ibm-plex-sans-condensed-400.ttf")
format("truetype");
font-weight: 400;
font-style: normal;
font-display: swap;
}
@font-face {
font-family: "IBM Plex Sans Condensed";
src: url("/card-templates/todo-item-live/assets/ibm-plex-sans-condensed-600.ttf")
format("truetype");
font-weight: 600;
font-style: normal;
font-display: swap;
}
@font-face {
font-family: "IBM Plex Sans Condensed";
src: url("/card-templates/todo-item-live/assets/ibm-plex-sans-condensed-700.ttf")
format("truetype");
font-weight: 700;
font-style: normal;
font-display: swap;
}
:root {
--card-font: "Iosevka", "SF Mono", ui-monospace, Menlo, Consolas, monospace;
--feed-surface: #e7ddd0;
--card-surface: linear-gradient(180deg, #b56c3d 0%, #8f4f27 100%);
--card-border: rgba(255, 220, 188, 0.24);
--card-shadow: 0 10px 28px rgba(68, 34, 15, 0.22);
--card-text: rgba(255, 245, 235, 0.9);
--card-muted: rgba(255, 233, 214, 0.72);
}
html {
@ -638,8 +687,8 @@ body {
-webkit-overflow-scrolling: touch;
scrollbar-width: thin;
scrollbar-color: rgba(255, 200, 140, 0.25) transparent;
background: #ece8e1;
box-shadow: inset 0 1px 0 rgba(52, 40, 31, 0.24);
background: var(--feed-surface);
box-shadow: inset 0 1px 0 rgba(86, 53, 31, 0.16);
}
#card-feed::-webkit-scrollbar {
width: 4px;
@ -663,7 +712,7 @@ body {
font-weight: 700;
letter-spacing: 0.08em;
text-transform: uppercase;
color: rgba(150, 110, 68, 0.82);
color: rgba(128, 78, 44, 0.82);
}
.card-group-list {
display: flex;
@ -672,25 +721,25 @@ body {
}
.card {
pointer-events: auto;
background: rgba(28, 22, 16, 0.92);
border: 1px solid rgba(255, 200, 140, 0.18);
background: var(--card-surface);
border: 1px solid var(--card-border);
border-radius: 12px;
padding: 0;
display: flex;
flex-direction: column;
gap: 0;
box-shadow: 0 4px 24px rgba(0, 0, 0, 0.45);
box-shadow: var(--card-shadow);
animation: card-in 0.22s cubic-bezier(0.34, 1.4, 0.64, 1) both;
position: relative;
max-width: 100%;
flex-shrink: 0;
}
.card.kind-text {
background: transparent;
border: none;
background: var(--card-surface);
border: 1px solid var(--card-border);
border-radius: 0;
padding: 0;
box-shadow: none;
box-shadow: var(--card-shadow);
}
.card.dismissing {
animation: card-out 0.18s ease-in both;
@ -757,7 +806,7 @@ body {
font-size: 0.6875rem;
font-weight: 700;
letter-spacing: 0.07em;
color: rgba(255, 200, 140, 0.92);
color: rgba(255, 230, 208, 0.94);
text-transform: uppercase;
min-width: 0;
overflow: hidden;
@ -778,8 +827,8 @@ body {
text-transform: uppercase;
}
.card-state {
background: rgba(255, 255, 255, 0.08);
color: rgba(255, 245, 235, 0.66);
background: rgba(255, 241, 229, 0.14);
color: var(--card-muted);
}
.card-menu-wrap {
position: relative;
@ -788,10 +837,10 @@ body {
.card-menu-trigger {
width: 28px;
height: 28px;
border: 1px solid rgba(255, 200, 140, 0.18);
border: 1px solid rgba(255, 223, 198, 0.2);
border-radius: 999px;
background: rgba(255, 255, 255, 0.03);
color: rgba(255, 245, 235, 0.58);
background: rgba(255, 241, 229, 0.08);
color: rgba(255, 241, 229, 0.7);
display: inline-flex;
align-items: center;
justify-content: center;
@ -802,10 +851,10 @@ body {
border-color 0.15s ease;
}
.card.kind-text .card-menu-trigger {
border-color: rgba(40, 26, 16, 0.1);
background: rgba(255, 255, 255, 0.82);
color: rgba(40, 26, 16, 0.72);
box-shadow: 0 4px 12px rgba(24, 16, 10, 0.12);
border-color: rgba(255, 223, 198, 0.2);
background: rgba(255, 241, 229, 0.12);
color: rgba(255, 245, 235, 0.82);
box-shadow: 0 4px 12px rgba(59, 31, 15, 0.18);
}
.card-menu-trigger svg {
width: 14px;
@ -814,15 +863,15 @@ body {
}
.card-menu-trigger:hover,
.card-menu-trigger.open {
color: rgba(255, 245, 235, 0.9);
background: rgba(255, 200, 140, 0.12);
border-color: rgba(255, 200, 140, 0.38);
color: rgba(255, 247, 239, 0.96);
background: rgba(255, 223, 198, 0.16);
border-color: rgba(255, 223, 198, 0.36);
}
.card.kind-text .card-menu-trigger:hover,
.card.kind-text .card-menu-trigger.open {
color: rgba(20, 10, 0, 0.92);
background: rgba(255, 255, 255, 0.96);
border-color: rgba(40, 26, 16, 0.18);
color: rgba(255, 247, 239, 0.96);
background: rgba(255, 223, 198, 0.18);
border-color: rgba(255, 223, 198, 0.36);
}
.card-menu {
position: absolute;
@ -834,8 +883,8 @@ body {
flex-direction: column;
gap: 4px;
border-radius: 12px;
background: rgba(20, 14, 10, 0.98);
border: 1px solid rgba(255, 200, 140, 0.16);
background: rgba(86, 47, 23, 0.98);
border: 1px solid rgba(255, 223, 198, 0.18);
box-shadow:
0 18px 36px rgba(0, 0, 0, 0.42),
0 4px 10px rgba(0, 0, 0, 0.24);
@ -844,7 +893,7 @@ body {
.card-menu-item {
border: none;
background: transparent;
color: rgba(255, 245, 235, 0.86);
color: var(--card-text);
border-radius: 8px;
padding: 8px 10px;
text-align: left;
@ -854,7 +903,7 @@ body {
cursor: pointer;
}
.card-menu-item:hover {
background: rgba(255, 200, 140, 0.12);
background: rgba(255, 223, 198, 0.12);
}
.card-menu-item.danger {
color: rgba(255, 177, 161, 0.92);
@ -869,7 +918,7 @@ body {
font-family: var(--card-font);
font-size: 0.75rem;
line-height: 1.65;
color: rgba(255, 245, 235, 0.82);
color: var(--card-text);
white-space: normal;
word-break: break-word;
user-select: text;
@ -878,17 +927,416 @@ body {
.card.kind-text .card-body {
color: inherit;
}
.card.kind-text .card-body > [data-nanobot-card-root] {
display: block;
background: transparent;
color: inherit;
}
.card.kind-text .card-body > [data-nanobot-card-root] > :not(script) {
border-radius: 0;
overflow: hidden;
box-shadow: none;
}
.task-card-ui {
--task-accent: #58706f;
--task-accent-soft: rgba(88, 112, 111, 0.12);
--task-ink: #2f241e;
--task-muted: #7e6659;
--task-surface: rgba(255, 248, 239, 0.92);
--task-border: rgba(87, 65, 50, 0.14);
--task-button-ink: #214240;
position: relative;
overflow: visible;
border-radius: 0;
border: 1px solid var(--task-border);
background:
radial-gradient(circle at top right, rgba(255, 255, 255, 0.72), transparent 32%),
linear-gradient(145deg, rgba(253, 245, 235, 0.98), rgba(242, 227, 211, 0.97));
color: var(--task-ink);
font-family: "M-1m Code", var(--card-font);
box-shadow:
inset 0 1px 0 rgba(255, 255, 255, 0.68),
0 18px 36px rgba(79, 56, 43, 0.12);
}
.task-card-ui::before {
content: "";
position: absolute;
inset: 0;
background: repeating-linear-gradient(
135deg,
rgba(122, 97, 78, 0.035) 0,
rgba(122, 97, 78, 0.035) 2px,
transparent 2px,
transparent 10px
);
pointer-events: none;
opacity: 0.55;
}
.task-card-ui__inner {
position: relative;
display: grid;
gap: 10px;
padding: 15px 44px 13px 14px;
}
.task-card-ui__topline {
position: relative;
z-index: 6;
display: flex;
align-items: center;
justify-content: space-between;
gap: 10px;
}
.task-card-ui__lane-wrap {
position: relative;
min-width: 0;
}
.task-card-ui__lane-button {
appearance: none;
display: inline-flex;
align-items: center;
gap: 6px;
min-width: 0;
padding: 0;
border: 0;
background: transparent;
color: inherit;
font: inherit;
cursor: pointer;
}
.task-card-ui__lane-button:disabled {
cursor: default;
opacity: 0.6;
}
.task-card-ui__lane {
min-width: 0;
font-size: 0.64rem;
line-height: 1.1;
letter-spacing: 0.11em;
text-transform: uppercase;
color: var(--task-muted);
font-weight: 700;
white-space: nowrap;
}
.task-card-ui__lane-caret {
flex: 0 0 auto;
font-size: 0.66rem;
line-height: 1;
color: var(--task-muted);
transform: translateY(-1px);
transition: transform 0.18s ease;
}
.task-card-ui__lane-caret.open {
transform: translateY(-1px) rotate(180deg);
}
.task-card-ui__lane-menu {
position: absolute;
top: calc(100% + 6px);
left: 0;
z-index: 12;
display: flex;
flex-direction: column;
gap: 6px;
min-width: 150px;
padding: 6px;
border-radius: 14px;
background: rgba(255, 248, 239, 0.96);
box-shadow:
0 10px 24px rgba(79, 56, 43, 0.12),
inset 0 1px 0 rgba(255, 255, 255, 0.72);
}
.task-card-ui__lane-menu-item {
appearance: none;
border: 1px solid rgba(0, 0, 0, 0.05);
border-radius: 10px;
padding: 8px 10px;
background: rgba(255, 248, 239, 0.78);
color: var(--task-button-ink);
font:
700 0.7rem / 1 "M-1m Code",
var(--card-font);
cursor: pointer;
text-align: left;
}
.task-card-ui__lane-menu-item:disabled {
cursor: default;
opacity: 0.6;
}
.task-card-ui__status {
display: inline-flex;
align-items: center;
justify-content: flex-end;
white-space: nowrap;
padding: 0;
font-size: 0.72rem;
line-height: 1;
font-weight: 700;
letter-spacing: 0.08em;
color: var(--task-muted);
}
.task-card-ui__status.is-error {
color: #8e3023;
}
.task-card-ui__text-button {
appearance: none;
width: 100%;
margin: 0;
padding: 0;
border: 0;
background: transparent;
font: inherit;
text-align: left;
color: inherit;
cursor: pointer;
}
.task-card-ui__text-button:disabled {
cursor: default;
}
.task-card-ui__title {
font-family: "IBM Plex Sans Condensed", "Arial Narrow", sans-serif;
font-size: 0.96rem;
line-height: 1.06;
font-weight: 700;
letter-spacing: -0.008em;
color: var(--task-ink);
text-wrap: balance;
word-break: break-word;
}
.task-card-ui__tags {
display: flex;
flex-wrap: nowrap;
gap: 6px;
position: relative;
z-index: 1;
width: 100%;
overflow-x: auto;
overflow-y: hidden;
scrollbar-width: none;
-ms-overflow-style: none;
-webkit-overflow-scrolling: touch;
overscroll-behavior-x: contain;
}
.task-card-ui__tags::-webkit-scrollbar {
display: none;
}
.task-card-ui__tag {
appearance: none;
display: inline-flex;
flex: 0 0 auto;
align-items: center;
min-height: 24px;
border-radius: 999px;
padding: 4px 9px;
background: var(--task-accent-soft);
color: var(--task-button-ink);
font-family: "M-1m Code", var(--card-font);
font-size: 0.71rem;
line-height: 1;
font-weight: 700;
border: 1px solid rgba(0, 0, 0, 0.035);
white-space: nowrap;
cursor: default;
}
.task-card-ui__tag:disabled {
cursor: default;
opacity: 0.6;
}
.task-card-ui__tag.is-holding {
background: rgba(165, 95, 75, 0.18);
color: #7b2f20;
}
.task-card-ui__tag--action {
border-style: dashed;
background: rgba(255, 248, 239, 0.74);
cursor: pointer;
}
.task-card-ui__body {
font-family: "IBM Plex Sans Condensed", "Arial Narrow", sans-serif;
font-size: 0.86rem;
line-height: 1.34;
font-weight: 400;
letter-spacing: 0.005em;
color: #624d40;
opacity: 0.95;
white-space: pre-wrap;
overflow-wrap: anywhere;
}
.task-card-ui__body.is-placeholder {
opacity: 0.62;
font-style: italic;
}
.task-card-ui__body-markdown {
display: block;
}
.task-card-ui__body-markdown-inner {
display: block;
}
.task-card-ui__md-line {
display: block;
}
.task-card-ui__md-line + .task-card-ui__md-line {
margin-top: 0.06rem;
}
.task-card-ui__md-line--heading {
font-weight: 700;
color: #503d31;
}
.task-card-ui__md-line--quote {
padding-left: 0.55rem;
border-left: 2px solid rgba(95, 120, 132, 0.3);
}
.task-card-ui__md-prefix {
color: var(--task-muted);
}
.task-card-ui__md-break {
display: block;
height: 0.18rem;
}
.task-card-ui__body-markdown code {
font-family: "M-1m Code", var(--card-font);
font-size: 0.78em;
}
.task-card-ui__body-markdown a {
color: inherit;
}
.task-card-ui__editor {
display: block;
width: 100%;
min-width: 0;
box-sizing: border-box;
margin: 0;
padding: 0;
border: 0;
outline: none;
resize: none;
overflow: hidden;
background: transparent;
color: inherit;
border-radius: 0;
box-shadow: none;
}
.task-card-ui__editor::placeholder {
color: rgba(98, 77, 64, 0.6);
opacity: 1;
font-style: italic;
}
.task-card-ui__meta {
display: flex;
flex-wrap: wrap;
gap: 0;
width: 100%;
}
.task-card-ui__chip {
display: flex;
align-items: center;
width: 100%;
min-height: 0;
border-radius: 0;
padding: 0;
background: transparent;
color: #4c3b30;
font-family: "M-1m Code", var(--card-font);
font-size: 0.63rem;
line-height: 1;
font-weight: 700;
}
.list-total-card-ui {
display: grid;
gap: 10px;
padding: 15px 44px 13px 14px;
background:
radial-gradient(circle at top right, rgba(255, 252, 233, 0.68), transparent 34%),
linear-gradient(145deg, rgba(244, 226, 187, 0.98), rgba(226, 198, 145, 0.97));
box-shadow: inset 0 1px 0 rgba(255, 250, 224, 0.62);
color: #4d392d;
}
.list-total-card-ui__labels,
.list-total-card-ui__row,
.list-total-card-ui__total {
display: grid;
grid-template-columns: 68px minmax(0, 1fr);
gap: 8px;
align-items: center;
}
.list-total-card-ui__labels {
color: rgba(77, 57, 45, 0.72);
font:
700 0.62rem / 1 "M-1m Code",
var(--card-font);
letter-spacing: 0.06em;
text-transform: uppercase;
}
.list-total-card-ui__rows {
display: grid;
gap: 6px;
}
.list-total-card-ui__input {
width: 100%;
min-width: 0;
box-sizing: border-box;
border: 0;
border-bottom: 1px solid rgba(92, 70, 55, 0.14);
border-radius: 0;
background: transparent;
color: #473429;
padding: 5px 0 4px;
outline: none;
box-shadow: none;
}
.list-total-card-ui__input:focus {
border-bottom-color: rgba(92, 70, 55, 0.34);
}
.list-total-card-ui__input::placeholder {
color: rgba(77, 57, 45, 0.42);
}
.list-total-card-ui__value {
font:
700 0.84rem / 1 "M-1m Code",
var(--card-font);
text-align: right;
}
.list-total-card-ui__name {
font-family: "IBM Plex Sans Condensed", "Arial Narrow", sans-serif;
font-size: 0.92rem;
line-height: 1.08;
font-weight: 600;
letter-spacing: -0.008em;
}
.list-total-card-ui__status {
min-height: 0.9rem;
color: rgba(77, 57, 45, 0.5);
font:
700 0.62rem / 1 "M-1m Code",
var(--card-font);
letter-spacing: 0.04em;
text-transform: uppercase;
}
.list-total-card-ui__status.is-error {
color: #8e3023;
}
.list-total-card-ui__total {
padding-top: 10px;
border-top: 1px solid rgba(92, 70, 55, 0.18);
color: #35271f;
}
.list-total-card-ui__total-label {
font:
700 0.66rem / 1 "M-1m Code",
var(--card-font);
letter-spacing: 0.06em;
text-transform: uppercase;
}
.list-total-card-ui__total-value {
font:
700 0.98rem / 1 "M-1m Code",
var(--card-font);
text-align: right;
}
.card-question {
color: rgba(255, 245, 235, 0.95);
}
.card-response,
.card-footer {
color: rgba(255, 245, 235, 0.62);
color: var(--card-muted);
}
.card-body p {
margin: 0 0 6px;
@ -904,7 +1352,7 @@ body {
.card-body h6 {
font-size: 0.8125rem;
font-weight: 700;
color: rgba(255, 200, 140, 0.95);
color: rgba(255, 233, 214, 0.96);
margin: 8px 0 4px;
}
.card-body ul,
@ -916,13 +1364,13 @@ body {
margin-bottom: 2px;
}
.card-body code {
background: rgba(255, 255, 255, 0.07);
background: rgba(255, 241, 229, 0.1);
border-radius: 4px;
padding: 1px 5px;
font-size: 0.6875rem;
}
.card-body pre {
background: rgba(0, 0, 0, 0.35);
background: rgba(74, 39, 18, 0.42);
border-radius: 6px;
padding: 8px 10px;
overflow-x: auto;
@ -946,23 +1394,23 @@ body {
text-align: left;
}
.card-body th {
background: rgba(255, 200, 140, 0.08);
color: rgba(255, 200, 140, 0.9);
background: rgba(255, 223, 198, 0.1);
color: rgba(255, 233, 214, 0.94);
font-weight: 600;
}
.card-body a {
color: rgba(255, 200, 140, 0.85);
color: rgba(255, 233, 214, 0.9);
text-decoration: underline;
}
.card-body blockquote {
border-left: 3px solid rgba(255, 200, 140, 0.3);
border-left: 3px solid rgba(255, 223, 198, 0.28);
margin: 6px 0;
padding-left: 10px;
color: rgba(255, 245, 235, 0.55);
}
.card-body hr {
border: none;
border-top: 1px solid rgba(255, 200, 140, 0.15);
border-top: 1px solid rgba(255, 223, 198, 0.16);
margin: 8px 0;
}
.card-body img {
@ -979,10 +1427,10 @@ body {
margin-top: 4px;
}
.card-choice-btn {
background: rgba(255, 200, 140, 0.12);
border: 1px solid rgba(255, 200, 140, 0.35);
background: rgba(255, 223, 198, 0.12);
border: 1px solid rgba(255, 223, 198, 0.34);
border-radius: 8px;
color: rgba(255, 245, 235, 0.9);
color: var(--card-text);
font-family: var(--card-font);
font-size: 0.75rem;
padding: 6px 14px;
@ -994,11 +1442,11 @@ body {
text-align: center;
}
.card-choice-btn:hover {
background: rgba(255, 200, 140, 0.25);
border-color: rgba(255, 200, 140, 0.65);
background: rgba(255, 223, 198, 0.22);
border-color: rgba(255, 223, 198, 0.56);
}
.card-choice-btn:active {
background: rgba(255, 200, 140, 0.38);
background: rgba(255, 223, 198, 0.32);
}
.card-choice-btn:disabled {
opacity: 0.4;

View file

@ -60,6 +60,7 @@ export type ServerMessage =
priority: number;
state: CardState;
template_key: string;
template_state: Record<string, JsonValue>;
context_summary: string;
created_at: string;
updated_at: string;
@ -94,6 +95,7 @@ export interface CardItem {
priority: number;
state: CardState;
templateKey?: string;
templateState?: Record<string, JsonValue>;
contextSummary?: string;
createdAt: string;
updatedAt: string;

View file

@ -5,3 +5,4 @@ av>=14.0.0,<17.0.0
numpy>=1.21.0,<2.0.0
supertonic>=1.1.2,<2.0.0
faster-whisper>=1.1.0,<2.0.0
MeloTTS @ git+https://github.com/myshell-ai/MeloTTS.git

View file

@ -0,0 +1,52 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
VENV_PYTHON="${ROOT_DIR}/.venv/bin/python"
if [[ ! -x "${VENV_PYTHON}" ]]; then
echo "error: ${VENV_PYTHON} does not exist. Create the web UI virtualenv first." >&2
exit 1
fi
"${VENV_PYTHON}" -m pip install \
--index-url https://download.pytorch.org/whl/cpu \
"torch==2.7.1+cpu" \
"torchaudio==2.7.1+cpu"
"${VENV_PYTHON}" -m pip install "setuptools<81"
"${VENV_PYTHON}" -m pip install \
txtsplit \
cached_path \
"transformers==4.46.3" \
"num2words==0.5.12" \
"unidic_lite==1.0.8" \
"mecab-python3==1.0.9" \
fugashi \
"pykakasi==2.2.1" \
"g2p_en==2.1.0" \
"anyascii==0.3.2" \
"jamo==0.4.1" \
"gruut[de,es,fr]==2.2.3" \
"librosa==0.9.1" \
"pydub==0.25.1" \
"eng_to_ipa==0.0.2" \
"inflect==7.0.0" \
"unidecode==1.3.7" \
"pypinyin==0.50.0" \
"cn2an==0.5.22" \
"jieba==0.42.1" \
soundfile \
tqdm
"${VENV_PYTHON}" -m pip install --no-deps "git+https://github.com/myshell-ai/MeloTTS.git"
"${VENV_PYTHON}" - <<'PY'
import os
import nltk
download_dir = os.path.expanduser("~/nltk_data")
for package in ("averaged_perceptron_tagger", "averaged_perceptron_tagger_eng", "cmudict"):
nltk.download(package, download_dir=download_dir)
PY

252
scripts/melotts_server.py Normal file
View file

@ -0,0 +1,252 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import base64
import contextlib
import json
import os
import signal
import socket
import sys
from pathlib import Path
from typing import Any
try:
import numpy as np
except Exception as exc: # pragma: no cover - runtime fallback when dependency is missing
np = None # type: ignore[assignment]
NUMPY_IMPORT_ERROR = exc
else:
NUMPY_IMPORT_ERROR = None
ROOT_DIR = Path(__file__).resolve().parents[1]
WORKSPACE_DIR = Path(os.getenv("NANOBOT_WORKSPACE", str(Path.home() / ".nanobot"))).expanduser()
SOCKET_PATH = Path(os.getenv("MELO_TTS_SOCKET", str(WORKSPACE_DIR / "melotts.sock"))).expanduser()
try:
from melo.api import TTS
MELO_TTS_AVAILABLE = True
except Exception as exc: # pragma: no cover - runtime fallback when dependency is missing
TTS = None # type: ignore[assignment]
MELO_TTS_AVAILABLE = False
IMPORT_ERROR = exc
else:
IMPORT_ERROR = None
class MeloTTSServer:
def __init__(self) -> None:
if not MELO_TTS_AVAILABLE or TTS is None:
raise RuntimeError(f"MeloTTS import failed: {IMPORT_ERROR}")
if np is None:
raise RuntimeError(f"numpy import failed: {NUMPY_IMPORT_ERROR}")
self._language = os.getenv("MELO_TTS_LANGUAGE", "EN").strip() or "EN"
self._device = os.getenv("MELO_TTS_DEVICE", "cpu").strip() or "cpu"
self._speed = float(os.getenv("MELO_TTS_SPEED", "1.0"))
self._speaker_name = os.getenv("MELO_TTS_SPEAKER", "EN-US").strip() or "EN-US"
self._warmup_text = os.getenv("MELO_TTS_WARMUP_TEXT", "Nanobot is ready.").strip()
self._model = TTS(language=self._language, device=self._device)
self._speaker_ids = dict(getattr(self._model.hps.data, "spk2id", {}))
if self._speaker_name not in self._speaker_ids:
available = ", ".join(sorted(self._speaker_ids))
raise RuntimeError(
f"speaker '{self._speaker_name}' is not available for language {self._language}. "
f"Available speakers: {available}"
)
self._speaker_id = self._speaker_ids[self._speaker_name]
if self._warmup_text:
self._warmup()
def ping(self) -> dict[str, Any]:
return {
"ok": True,
"language": self._language,
"device": self._device,
"speaker": self._speaker_name,
"speakers": sorted(self._speaker_ids),
}
def synthesize_pcm(self, text: str) -> dict[str, Any]:
clean_text = " ".join(text.split())
if not clean_text:
raise RuntimeError("text is empty")
pcm, sample_rate, channels = self._synthesize_pcm(clean_text)
return {
"ok": True,
"encoding": "pcm_s16le_base64",
"pcm": base64.b64encode(pcm).decode("ascii"),
"sample_rate": sample_rate,
"channels": channels,
"language": self._language,
"speaker": self._speaker_name,
}
def synthesize_to_file(self, text: str, output_wav: str) -> dict[str, Any]:
clean_text = " ".join(text.split())
if not clean_text:
raise RuntimeError("text is empty")
output_path = Path(output_wav)
output_path.parent.mkdir(parents=True, exist_ok=True)
self._model.tts_to_file(
clean_text,
self._speaker_id,
str(output_path),
speed=self._speed,
quiet=True,
)
return {
"ok": True,
"output_wav": str(output_path),
"language": self._language,
"speaker": self._speaker_name,
}
def _warmup(self) -> None:
self._synthesize_pcm(self._warmup_text)
def _synthesize_pcm(self, text: str) -> tuple[bytes, int, int]:
wav = self._model.tts_to_file(
text,
self._speaker_id,
None,
speed=self._speed,
quiet=True,
)
if np is None:
raise RuntimeError("numpy is unavailable")
samples = np.asarray(wav)
if samples.size == 0:
raise RuntimeError("MeloTTS produced empty audio")
channels = 1
if samples.ndim == 0:
samples = samples.reshape(1)
elif samples.ndim == 1:
channels = 1
elif samples.ndim == 2:
dim0, dim1 = int(samples.shape[0]), int(samples.shape[1])
if dim0 <= 2 and dim1 > dim0:
channels = dim0
samples = samples.T
elif dim1 <= 2 and dim0 > dim1:
channels = dim1
else:
channels = 1
samples = samples.reshape(-1)
else:
channels = 1
samples = samples.reshape(-1)
if np.issubdtype(samples.dtype, np.floating):
samples = np.clip(samples, -1.0, 1.0)
samples = (samples * 32767.0).astype(np.int16)
elif samples.dtype != np.int16:
samples = samples.astype(np.int16)
sample_rate = int(getattr(self._model.hps.data, "sampling_rate", 44100))
return samples.tobytes(), sample_rate, max(1, channels)
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Persistent MeloTTS sidecar for Nanobot voice.")
parser.add_argument("--socket-path", default=str(SOCKET_PATH))
return parser
def _receive_json(conn: socket.socket) -> dict[str, Any]:
chunks: list[bytes] = []
while True:
data = conn.recv(8192)
if not data:
break
chunks.append(data)
if b"\n" in data:
break
payload = b"".join(chunks).decode("utf-8", errors="replace").strip()
if not payload:
return {}
return json.loads(payload)
def _send_json(conn: socket.socket, payload: dict[str, Any]) -> None:
conn.sendall((json.dumps(payload) + "\n").encode("utf-8"))
def main() -> int:
args = _build_parser().parse_args()
socket_path = Path(args.socket_path).expanduser()
socket_path.parent.mkdir(parents=True, exist_ok=True)
with contextlib.suppress(FileNotFoundError):
socket_path.unlink()
stop_requested = False
def request_stop(_signum: int, _frame: object) -> None:
nonlocal stop_requested
stop_requested = True
signal.signal(signal.SIGTERM, request_stop)
signal.signal(signal.SIGINT, request_stop)
try:
server = MeloTTSServer()
except Exception as exc:
print(f"melotts server initialization failed: {exc}", file=sys.stderr, flush=True)
return 1
listener = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
listener.bind(str(socket_path))
listener.listen(8)
listener.settimeout(1.0)
try:
while not stop_requested:
try:
conn, _addr = listener.accept()
except TimeoutError:
continue
except OSError:
if stop_requested:
break
raise
with conn:
try:
request = _receive_json(conn)
action = str(request.get("action", "")).strip().lower()
if action == "ping":
_send_json(conn, server.ping())
continue
if action == "synthesize_pcm":
text = str(request.get("text", ""))
response = server.synthesize_pcm(text)
_send_json(conn, response)
continue
if action == "synthesize":
text = str(request.get("text", ""))
output_wav = str(request.get("output_wav", ""))
if not output_wav:
raise RuntimeError("output_wav is required")
response = server.synthesize_to_file(text, output_wav)
_send_json(conn, response)
continue
raise RuntimeError(f"unsupported action: {action or 'missing'}")
except Exception as exc:
_send_json(conn, {"ok": False, "error": str(exc)})
finally:
listener.close()
with contextlib.suppress(FileNotFoundError):
socket_path.unlink()
return 0
if __name__ == "__main__":
raise SystemExit(main())

106
scripts/melotts_tts.py Normal file
View file

@ -0,0 +1,106 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import contextlib
import json
import os
import socket
import subprocess
import sys
import time
from pathlib import Path
from typing import Any
ROOT_DIR = Path(__file__).resolve().parents[1]
WORKSPACE_DIR = Path(os.getenv("NANOBOT_WORKSPACE", str(Path.home() / ".nanobot"))).expanduser()
LOG_DIR = WORKSPACE_DIR / "logs"
SOCKET_PATH = Path(os.getenv("MELO_TTS_SOCKET", str(WORKSPACE_DIR / "melotts.sock"))).expanduser()
SERVER_SCRIPT = ROOT_DIR / "scripts" / "melotts_server.py"
SERVER_LOG_PATH = LOG_DIR / "melotts-server.log"
DEFAULT_STARTUP_TIMEOUT_S = float(os.getenv("MELO_TTS_SERVER_STARTUP_TIMEOUT_S", "120"))
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Nanobot MeloTTS command adapter.")
parser.add_argument("--text", required=True)
parser.add_argument("--output-wav", required=True)
parser.add_argument("--socket-path", default=str(SOCKET_PATH))
return parser
def _rpc(socket_path: Path, payload: dict[str, Any], timeout_s: float = 10.0) -> dict[str, Any]:
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.settimeout(timeout_s)
try:
sock.connect(str(socket_path))
sock.sendall((json.dumps(payload) + "\n").encode("utf-8"))
response = sock.recv(8192).decode("utf-8", errors="replace").strip()
finally:
sock.close()
if not response:
raise RuntimeError("empty response from MeloTTS server")
return json.loads(response)
def _ping(socket_path: Path) -> bool:
try:
response = _rpc(socket_path, {"action": "ping"}, timeout_s=2.0)
except Exception:
return False
return bool(response.get("ok"))
def _ensure_server(socket_path: Path) -> None:
if _ping(socket_path):
return
with contextlib.suppress(FileNotFoundError):
socket_path.unlink()
LOG_DIR.mkdir(parents=True, exist_ok=True)
with SERVER_LOG_PATH.open("a", encoding="utf-8") as log_handle:
proc = subprocess.Popen(
[sys.executable, str(SERVER_SCRIPT), "--socket-path", str(socket_path)],
cwd=str(ROOT_DIR),
stdin=subprocess.DEVNULL,
stdout=log_handle,
stderr=subprocess.STDOUT,
start_new_session=True,
)
deadline = time.time() + DEFAULT_STARTUP_TIMEOUT_S
while time.time() < deadline:
if _ping(socket_path):
return
exit_code = proc.poll()
if exit_code is not None:
raise RuntimeError(
f"MeloTTS server exited during startup with code {exit_code}. "
f"See {SERVER_LOG_PATH}"
)
time.sleep(0.5)
raise RuntimeError(f"MeloTTS server did not become ready within {DEFAULT_STARTUP_TIMEOUT_S:.0f}s")
def main() -> int:
args = _build_parser().parse_args()
socket_path = Path(args.socket_path).expanduser()
_ensure_server(socket_path)
response = _rpc(
socket_path,
{
"action": "synthesize",
"text": args.text,
"output_wav": args.output_wav,
},
timeout_s=max(30.0, DEFAULT_STARTUP_TIMEOUT_S),
)
if not response.get("ok"):
raise RuntimeError(str(response.get("error", "MeloTTS synthesis failed")))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -28,10 +28,11 @@ nanobot -> client notifications::
from __future__ import annotations
import asyncio
import contextlib
import json
import os
from pathlib import Path
from typing import Any
from typing import Any, Awaitable, Callable
from wisper import WisperBus, WisperEvent
@ -56,13 +57,21 @@ def _jsonrpc_notification(method: str, params: dict[str, Any] | None = None) ->
class NanobotApiProcess:
"""Connects to the running nanobot process via its Unix domain socket."""
def __init__(self, bus: WisperBus, socket_path: Path) -> None:
def __init__(
self,
bus: WisperBus,
socket_path: Path,
on_disconnect: Callable[[], Awaitable[None]] | None = None,
) -> None:
self._bus = bus
self._socket_path = socket_path
self._on_disconnect = on_disconnect
self._reader: asyncio.StreamReader | None = None
self._writer: asyncio.StreamWriter | None = None
self._read_task: asyncio.Task | None = None
self._socket_inode: int | None = None
self._streaming_partial_response = False
self._closing = False
@property
def running(self) -> bool:
@ -88,6 +97,8 @@ class NanobotApiProcess:
await self._bus.publish(WisperEvent(role="system", text="Already connected to nanobot."))
return
self._closing = False
self._streaming_partial_response = False
if not self._socket_path.exists():
await self._bus.publish(
WisperEvent(
@ -172,6 +183,7 @@ class NanobotApiProcess:
await self._bus.publish(WisperEvent(role="system", text="Disconnected from nanobot."))
async def _cleanup(self) -> None:
self._closing = True
if self._read_task and not self._read_task.done():
self._read_task.cancel()
try:
@ -189,6 +201,7 @@ class NanobotApiProcess:
self._writer = None
self._reader = None
self._socket_inode = None
self._streaming_partial_response = False
async def _send_notification(self, method: str, params: dict[str, Any]) -> None:
assert self._writer is not None
@ -207,9 +220,19 @@ class NanobotApiProcess:
break
await self._handle_line(line)
finally:
await self._bus.publish(WisperEvent(role="system", text="Nanobot closed the connection."))
should_notify_disconnect = not self._closing
self._streaming_partial_response = False
self._writer = None
self._reader = None
if should_notify_disconnect:
await self._bus.publish(
WisperEvent(role="system", text="Nanobot closed the connection.")
)
if self._on_disconnect is not None:
asyncio.create_task(
self._on_disconnect(),
name="nanobot-api-reconnect-trigger",
)
async def _handle_line(self, line: bytes) -> None:
raw = line.decode(errors="replace").strip()
@ -245,12 +268,21 @@ class NanobotApiProcess:
content = str(params.get("content", ""))
is_progress = bool(params.get("is_progress", False))
is_tool_hint = bool(params.get("is_tool_hint", False))
is_partial = bool(params.get("is_partial", False))
if is_progress:
if is_partial:
self._streaming_partial_response = True
await self._bus.publish(WisperEvent(role="nanobot-tts-partial", text=content))
return
role = "nanobot-tool" if is_tool_hint else "nanobot-progress"
await self._bus.publish(WisperEvent(role=role, text=content))
else:
await self._bus.publish(WisperEvent(role="nanobot", text=content))
await self._bus.publish(WisperEvent(role="nanobot-tts", text=content))
if self._streaming_partial_response:
self._streaming_partial_response = False
await self._bus.publish(WisperEvent(role="nanobot-tts-flush", text=""))
else:
await self._bus.publish(WisperEvent(role="nanobot-tts", text=content))
elif method == "agent_state":
state = str(params.get("state", ""))
await self._bus.publish(WisperEvent(role="agent-state", text=state))
@ -263,9 +295,52 @@ class SuperTonicGateway:
self.bus = WisperBus()
self._lock = asyncio.Lock()
self._process: NanobotApiProcess | None = None
self._reconnect_task: asyncio.Task[None] | None = None
self._shutdown = False
socket_path = Path(os.getenv("NANOBOT_API_SOCKET", str(DEFAULT_SOCKET_PATH))).expanduser()
self._socket_path = socket_path
def _new_process(self) -> NanobotApiProcess:
return NanobotApiProcess(
bus=self.bus,
socket_path=self._socket_path,
on_disconnect=self._schedule_reconnect,
)
async def _schedule_reconnect(self) -> None:
async with self._lock:
if self._shutdown:
return
if self._process and self._process.running:
return
if self._reconnect_task and not self._reconnect_task.done():
return
self._reconnect_task = asyncio.create_task(
self._reconnect_loop(),
name="nanobot-api-reconnect",
)
async def _reconnect_loop(self) -> None:
delay_s = 0.5
try:
while not self._shutdown:
async with self._lock:
if self._process and self._process.running:
return
self._process = self._new_process()
await self._process.start()
if self._process.running:
return
await asyncio.sleep(delay_s)
delay_s = min(delay_s * 2.0, 5.0)
except asyncio.CancelledError:
raise
finally:
async with self._lock:
current_task = asyncio.current_task()
if self._reconnect_task is current_task:
self._reconnect_task = None
async def subscribe(self) -> asyncio.Queue[WisperEvent]:
return await self.bus.subscribe()
@ -274,10 +349,16 @@ class SuperTonicGateway:
async def connect_nanobot(self) -> None:
async with self._lock:
self._shutdown = False
if self._process and self._process.running:
await self.bus.publish(WisperEvent(role="system", text="Already connected to nanobot."))
return
self._process = NanobotApiProcess(bus=self.bus, socket_path=self._socket_path)
if self._reconnect_task and not self._reconnect_task.done():
self._reconnect_task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await self._reconnect_task
self._reconnect_task = None
self._process = self._new_process()
await self._process.start()
async def _ensure_connected_process(self) -> NanobotApiProcess:
@ -285,7 +366,12 @@ class SuperTonicGateway:
return self._process
if self._process:
await self._process.stop()
self._process = NanobotApiProcess(bus=self.bus, socket_path=self._socket_path)
if self._reconnect_task and not self._reconnect_task.done():
self._reconnect_task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await self._reconnect_task
self._reconnect_task = None
self._process = self._new_process()
await self._process.start()
if not self._process.running or not self._process.matches_current_socket():
raise RuntimeError("Not connected to nanobot.")
@ -312,6 +398,12 @@ class SuperTonicGateway:
async def disconnect_nanobot(self) -> None:
async with self._lock:
self._shutdown = True
if self._reconnect_task and not self._reconnect_task.done():
self._reconnect_task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await self._reconnect_task
self._reconnect_task = None
if self._process:
await self._process.stop()
self._process = None

View file

@ -1,5 +1,6 @@
import asyncio
import audioop
import base64
import contextlib
import io
import json
@ -7,11 +8,15 @@ import os
import re
import shlex
import shutil
import socket
import subprocess
import sys
import tempfile
import time
import wave
from dataclasses import dataclass
from fractions import Fraction
from pathlib import Path
from typing import TYPE_CHECKING, Any, Awaitable, Callable
from wisper import WisperEvent
@ -83,6 +88,9 @@ BRAILLE_SPINNER_RE = re.compile(r"[\u2800-\u28ff]")
TTS_ALLOWED_ASCII = set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?;:'\"()[]{}@#%&*+-_/<>|"
)
TTS_WORD_RE = re.compile(r"[A-Za-z0-9]")
TTS_RETRY_BREAK_RE = re.compile(r"(?<=[.!?,;:])\s+")
TTS_PARTIAL_COMMIT_RE = re.compile(r"[.!?]\s*$|[,;:]\s+$")
LOCAL_ICE_GATHER_TIMEOUT_S = 0.35
@ -94,9 +102,39 @@ def _sanitize_tts_text(text: str) -> str:
cleaned = CONTROL_CHAR_RE.sub(" ", cleaned)
cleaned = "".join(ch if (ch in TTS_ALLOWED_ASCII or ch.isspace()) else " " for ch in cleaned)
cleaned = re.sub(r"\s+", " ", cleaned).strip()
if not TTS_WORD_RE.search(cleaned):
return ""
return cleaned
def _split_tts_retry_segments(text: str, max_chars: int = 120) -> list[str]:
clean = _sanitize_tts_text(text)
if not clean:
return []
parts = [part.strip() for part in TTS_RETRY_BREAK_RE.split(clean) if part.strip()]
if len(parts) <= 1:
words = clean.split()
if len(words) <= 1:
return []
parts = []
current = words[0]
for word in words[1:]:
candidate = f"{current} {word}"
if len(candidate) <= max_chars:
current = candidate
continue
parts.append(current)
current = word
parts.append(current)
compact_parts = [_sanitize_tts_text(part) for part in parts]
compact_parts = [part for part in compact_parts if part]
if len(compact_parts) <= 1:
return []
return compact_parts
def _coerce_message_metadata(raw: Any) -> dict[str, Any]:
def _coerce_jsonish(value: Any, depth: int = 0) -> Any:
if depth > 6:
@ -143,6 +181,12 @@ class PCMChunk:
channels: int = 1
@dataclass(slots=True)
class STTSegment:
pcm: bytes
metadata: dict[str, Any]
if AIORTC_AVAILABLE:
class QueueAudioTrack(MediaStreamTrack):
@ -275,6 +319,19 @@ if AIORTC_AVAILABLE:
self._closed = True
super().stop()
def clear(self) -> None:
while True:
try:
self._queue.get_nowait()
except asyncio.QueueEmpty:
break
self._last_enqueue_at = 0.0
self._idle_frames = 0
if self._playing:
self._playing = False
if self._on_playing_changed:
self._on_playing_changed(False)
else:
class QueueAudioTrack: # pragma: no cover - used only when aiortc is unavailable
@ -286,6 +343,9 @@ else:
def stop(self) -> None:
return
def clear(self) -> None:
return
def _write_temp_wav(pcm: bytes, sample_rate: int, channels: int) -> str:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
@ -706,13 +766,138 @@ class SupertonicTextToSpeech:
self._init_error = None
class MeloTTSTextToSpeech:
def __init__(self) -> None:
self._root_dir = Path(__file__).resolve().parent
self._workspace_dir = Path(
os.getenv("NANOBOT_WORKSPACE", str(Path.home() / ".nanobot"))
).expanduser()
self._socket_path = Path(
os.getenv("MELO_TTS_SOCKET", str(self._workspace_dir / "melotts.sock"))
).expanduser()
self._server_script = self._root_dir / "scripts" / "melotts_server.py"
self._server_log_path = self._workspace_dir / "logs" / "melotts-server.log"
self._startup_timeout_s = max(
5.0, float(os.getenv("MELO_TTS_SERVER_STARTUP_TIMEOUT_S", "120"))
)
self._init_error: str | None = None
self._lock = asyncio.Lock()
@property
def enabled(self) -> bool:
return self._server_script.exists()
@property
def init_error(self) -> str | None:
return self._init_error
async def synthesize(self, text: str) -> PCMChunk | None:
if not self.enabled:
return None
clean_text = " ".join(text.split())
if not clean_text:
return None
async with self._lock:
return await asyncio.to_thread(self._synthesize_blocking, clean_text)
def _synthesize_blocking(self, text: str) -> PCMChunk | None:
self._ensure_server_blocking()
response = self._rpc(
{
"action": "synthesize_pcm",
"text": text,
},
timeout_s=max(30.0, self._startup_timeout_s),
)
if not response.get("ok"):
raise RuntimeError(str(response.get("error", "MeloTTS synthesis failed")))
encoded_pcm = str(response.get("pcm", "")).strip()
if not encoded_pcm:
return None
pcm = base64.b64decode(encoded_pcm)
sample_rate = max(1, int(response.get("sample_rate", 44100)))
channels = max(1, int(response.get("channels", 1)))
return PCMChunk(pcm=pcm, sample_rate=sample_rate, channels=channels)
def _ensure_server_blocking(self) -> None:
if self._ping():
self._init_error = None
return
with contextlib.suppress(FileNotFoundError):
self._socket_path.unlink()
self._server_log_path.parent.mkdir(parents=True, exist_ok=True)
with self._server_log_path.open("a", encoding="utf-8") as log_handle:
proc = subprocess.Popen(
[sys.executable, str(self._server_script), "--socket-path", str(self._socket_path)],
cwd=str(self._root_dir),
stdin=subprocess.DEVNULL,
stdout=log_handle,
stderr=subprocess.STDOUT,
start_new_session=True,
)
deadline = time.time() + self._startup_timeout_s
while time.time() < deadline:
if self._ping():
self._init_error = None
return
exit_code = proc.poll()
if exit_code is not None:
self._init_error = (
f"MeloTTS server exited during startup with code {exit_code}. "
f"See {self._server_log_path}"
)
raise RuntimeError(self._init_error)
time.sleep(0.25)
self._init_error = (
f"MeloTTS server did not become ready within {self._startup_timeout_s:.0f}s."
)
raise RuntimeError(self._init_error)
def _ping(self) -> bool:
try:
response = self._rpc({"action": "ping"}, timeout_s=2.0)
except Exception:
return False
return bool(response.get("ok"))
def _rpc(self, payload: dict[str, Any], timeout_s: float) -> dict[str, Any]:
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.settimeout(timeout_s)
try:
sock.connect(str(self._socket_path))
sock.sendall((json.dumps(payload) + "\n").encode("utf-8"))
chunks: list[bytes] = []
while True:
data = sock.recv(8192)
if not data:
break
chunks.append(data)
if b"\n" in data:
break
finally:
sock.close()
response = b"".join(chunks).decode("utf-8", errors="replace").strip()
if not response:
raise RuntimeError("empty response from MeloTTS server")
return json.loads(response)
class HostTextToSpeech:
def __init__(self) -> None:
provider = (os.getenv("HOST_TTS_PROVIDER", "supertonic").strip() or "supertonic").lower()
if provider not in {"supertonic", "command", "espeak", "auto"}:
if provider not in {"supertonic", "melotts", "command", "espeak", "auto"}:
provider = "auto"
self._provider = provider
self._supertonic = SupertonicTextToSpeech()
self._melotts = MeloTTSTextToSpeech()
self._command_template = os.getenv("HOST_TTS_COMMAND", "").strip()
self._espeak = shutil.which("espeak")
@ -720,11 +905,17 @@ class HostTextToSpeech:
def enabled(self) -> bool:
if self._provider == "supertonic":
return self._supertonic.enabled
if self._provider == "melotts":
return self._melotts.enabled
if self._provider == "command":
return bool(self._command_template)
if self._provider == "espeak":
return bool(self._espeak)
return self._supertonic.enabled or bool(self._command_template or self._espeak)
return (
self._supertonic.enabled
or self._melotts.enabled
or bool(self._command_template or self._espeak)
)
async def synthesize(self, text: str) -> PCMChunk | None:
clean_text = " ".join(text.split())
@ -738,6 +929,13 @@ class HostTextToSpeech:
if self._provider == "supertonic":
return None
if self._provider in {"melotts", "auto"}:
audio = await self._melotts.synthesize(clean_text)
if audio:
return audio
if self._provider == "melotts":
return None
if self._provider in {"command", "auto"} and self._command_template:
return await asyncio.to_thread(self._synthesize_with_command, clean_text)
if self._provider == "command":
@ -755,6 +953,12 @@ class HostTextToSpeech:
if self._supertonic.init_error:
return f"supertonic initialization failed: {self._supertonic.init_error}"
return "supertonic did not return audio."
if self._provider == "melotts":
if not self._melotts.enabled:
return "MeloTTS server script is not available."
if self._melotts.init_error:
return f"MeloTTS initialization failed: {self._melotts.init_error}"
return "MeloTTS did not return audio."
if self._provider == "command":
return "HOST_TTS_COMMAND is not configured."
if self._provider == "espeak":
@ -762,6 +966,8 @@ class HostTextToSpeech:
if self._supertonic.init_error:
return f"supertonic initialization failed: {self._supertonic.init_error}"
if self._melotts.init_error:
return f"MeloTTS initialization failed: {self._melotts.init_error}"
if self._command_template:
return "HOST_TTS_COMMAND failed to produce audio."
if self._espeak:
@ -862,11 +1068,12 @@ class WebRTCVoiceSession:
self._stt = HostSpeechToText()
self._tts = HostTextToSpeech()
self._stt_segment_queue_size = max(1, int(os.getenv("HOST_STT_SEGMENT_QUEUE_SIZE", "2")))
self._stt_segments: asyncio.Queue[bytes] = asyncio.Queue(
self._stt_segments: asyncio.Queue[STTSegment] = asyncio.Queue(
maxsize=self._stt_segment_queue_size
)
self._tts_chunks: list[str] = []
self._tts_partial_buffer = ""
self._tts_flush_handle: asyncio.TimerHandle | None = None
self._tts_flush_lock = asyncio.Lock()
self._tts_buffer_lock = asyncio.Lock()
@ -875,8 +1082,18 @@ class WebRTCVoiceSession:
self._tts_response_end_delay_s = max(
0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "0.5"))
)
self._tts_partial_commit_chars = max(
24, int(os.getenv("HOST_TTS_PARTIAL_COMMIT_CHARS", "72"))
)
self._closed = False
self._audio_debug = os.getenv("HOST_AUDIO_DEBUG", "0").strip() not in {
"0",
"false",
"False",
"no",
"off",
}
self._stt_unavailable_notice_sent = False
self._tts_unavailable_notice_sent = False
self._audio_seen_notice_sent = False
@ -925,20 +1142,65 @@ class WebRTCVoiceSession:
except Exception:
pass
async def queue_output_text(self, chunk: str) -> None:
normalized_chunk = chunk.strip()
if not normalized_chunk:
def _should_commit_partial_buffer(self) -> bool:
stripped = self._tts_partial_buffer.strip()
if not stripped:
return False
if len(stripped) >= self._tts_partial_commit_chars:
return True
return bool(TTS_PARTIAL_COMMIT_RE.search(self._tts_partial_buffer))
def _commit_partial_buffer_locked(self) -> None:
partial = self._tts_partial_buffer.strip()
self._tts_partial_buffer = ""
if partial:
self._tts_chunks.append(partial)
async def queue_output_text(self, chunk: str, *, partial: bool = False) -> None:
if not chunk:
return
async with self._tts_buffer_lock:
if not self._pc or not self._outbound_track:
return
if partial:
self._tts_partial_buffer += chunk
if self._should_commit_partial_buffer():
self._commit_partial_buffer_locked()
self._schedule_tts_flush_after(0.05, reset=True)
else:
self._schedule_tts_flush_after(self._tts_response_end_delay_s, reset=True)
return
normalized_chunk = chunk.strip()
if not normalized_chunk:
return
if self._tts_partial_buffer.strip():
self._commit_partial_buffer_locked()
# Keep line boundaries between streamed chunks so line-based filters
# stay accurate while avoiding repeated full-string copies.
self._tts_chunks.append(normalized_chunk)
# Reset the flush timer on every incoming chunk so the entire
# response is accumulated before synthesis begins. The timer
# fires once no new chunks arrive for the configured delay.
self._schedule_tts_flush_after(self._tts_response_end_delay_s)
# Flush in short rolling windows instead of waiting for the whole
# response so streamed Nanobot output can start speaking sooner.
self._schedule_tts_flush_after(self._tts_response_end_delay_s, reset=False)
async def flush_partial_output_text(self) -> None:
async with self._tts_buffer_lock:
if not self._pc or not self._outbound_track:
return
if not self._tts_partial_buffer.strip():
return
self._commit_partial_buffer_locked()
self._schedule_tts_flush_after(0.05, reset=True)
def interrupt_output(self) -> None:
if self._tts_flush_handle:
self._tts_flush_handle.cancel()
self._tts_flush_handle = None
self._tts_chunks.clear()
self._tts_partial_buffer = ""
self._stt_suppress_until = 0.0
if self._outbound_track:
self._outbound_track.clear()
async def handle_offer(self, payload: dict[str, Any]) -> dict[str, Any] | None:
if not AIORTC_AVAILABLE or not RTCPeerConnection or not RTCSessionDescription:
@ -980,7 +1242,10 @@ class WebRTCVoiceSession:
self._active_message_metadata = _coerce_message_metadata(msg.get("metadata", {}))
self.set_push_to_talk_pressed(bool(msg.get("pressed", False)))
elif msg_type == "command":
asyncio.create_task(self._gateway.send_command(str(msg.get("command", ""))))
command = str(msg.get("command", "")).strip()
if command == "reset":
self.interrupt_output()
asyncio.create_task(self._gateway.send_command(command))
elif msg_type == "card-response":
asyncio.create_task(
self._gateway.send_card_response(
@ -1037,6 +1302,7 @@ class WebRTCVoiceSession:
self._tts_flush_handle.cancel()
self._tts_flush_handle = None
self._tts_chunks.clear()
self._tts_partial_buffer = ""
if self._incoming_audio_task:
self._incoming_audio_task.cancel()
@ -1063,55 +1329,64 @@ class WebRTCVoiceSession:
return
asyncio.create_task(self._flush_tts(), name="voice-tts-flush")
def _schedule_tts_flush_after(self, delay_s: float) -> None:
def _schedule_tts_flush_after(self, delay_s: float, *, reset: bool = True) -> None:
if self._tts_flush_handle:
if not reset:
return
self._tts_flush_handle.cancel()
loop = asyncio.get_running_loop()
self._tts_flush_handle = loop.call_later(max(0.05, delay_s), self._schedule_tts_flush)
async def _flush_tts(self) -> None:
async with self._tts_flush_lock:
async with self._tts_buffer_lock:
self._tts_flush_handle = None
raw_text = "\n".join(self._tts_chunks)
self._tts_chunks.clear()
clean_text = self._clean_tts_text(raw_text)
if not clean_text:
return
if not self._outbound_track:
return
try:
audio = await self._tts.synthesize(clean_text)
except asyncio.CancelledError:
raise
except Exception as exc:
import traceback # noqa: local import in exception handler
traceback.print_exc()
# Restore the lost text so a future flush can retry it.
while True:
async with self._tts_buffer_lock:
self._tts_chunks.insert(0, clean_text)
await self._publish_system(f"TTS synthesis error: {exc}")
return
self._tts_flush_handle = None
if not self._tts_chunks and self._tts_partial_buffer.strip():
self._commit_partial_buffer_locked()
if not self._tts_chunks:
return
raw_text = self._tts_chunks.pop(0)
if not audio:
if not self._tts_unavailable_notice_sent:
self._tts_unavailable_notice_sent = True
await self._publish_system(
f"Host TTS backend is unavailable. {self._tts.unavailable_reason()}"
)
return
clean_text = self._clean_tts_text(raw_text)
if not clean_text:
continue
if not self._outbound_track:
return
self._extend_stt_suppression(audio)
await self._outbound_track.enqueue_pcm(
pcm=audio.pcm,
sample_rate=audio.sample_rate,
channels=audio.channels,
)
if not self._outbound_track:
return
try:
audio = await self._tts.synthesize(clean_text)
except asyncio.CancelledError:
raise
except Exception as exc:
import traceback # noqa: local import in exception handler
traceback.print_exc()
retry_segments = _split_tts_retry_segments(clean_text)
if retry_segments:
async with self._tts_buffer_lock:
self._tts_chunks[0:0] = retry_segments
continue
await self._publish_system(f"TTS synthesis error: {exc}")
return
if not audio:
if not self._tts_unavailable_notice_sent:
self._tts_unavailable_notice_sent = True
await self._publish_system(
f"Host TTS backend is unavailable. {self._tts.unavailable_reason()}"
)
return
if not self._outbound_track:
return
self._extend_stt_suppression(audio)
await self._outbound_track.enqueue_pcm(
pcm=audio.pcm,
sample_rate=audio.sample_rate,
channels=audio.channels,
)
def _extend_stt_suppression(self, audio: PCMChunk) -> None:
if not self._stt_suppress_during_tts:
@ -1152,13 +1427,13 @@ class WebRTCVoiceSession:
if not pcm16:
continue
if not self._audio_seen_notice_sent:
if self._audio_debug and not self._audio_seen_notice_sent:
self._audio_seen_notice_sent = True
await self._publish_system("Receiving microphone audio on host.")
await self._publish_debug("Receiving microphone audio on host.")
if not self._audio_format_notice_sent:
if self._audio_debug and not self._audio_format_notice_sent:
self._audio_format_notice_sent = True
await self._publish_system(
await self._publish_debug(
"Inbound audio frame stats: "
f"sample_rate={int(getattr(frame, 'sample_rate', 0) or 0)}, "
f"samples={int(getattr(frame, 'samples', 0) or 0)}, "
@ -1261,16 +1536,25 @@ class WebRTCVoiceSession:
None,
)
normalized_duration_ms = (len(normalized_pcm) / 2 / 16_000) * 1000.0
if not self._ptt_timing_correction_notice_sent:
if self._audio_debug and not self._ptt_timing_correction_notice_sent:
self._ptt_timing_correction_notice_sent = True
await self._publish_system(
await self._publish_debug(
"Corrected PTT timing mismatch "
f"(estimated source={nearest_source_rate}Hz)."
)
await self._enqueue_stt_segment(pcm16=normalized_pcm, duration_ms=normalized_duration_ms)
await self._enqueue_stt_segment(
pcm16=normalized_pcm,
duration_ms=normalized_duration_ms,
metadata=dict(self._active_message_metadata),
)
async def _enqueue_stt_segment(self, pcm16: bytes, duration_ms: float) -> None:
async def _enqueue_stt_segment(
self,
pcm16: bytes,
duration_ms: float,
metadata: dict[str, Any],
) -> None:
if duration_ms < self._stt_min_ptt_ms:
return
@ -1284,17 +1568,17 @@ class WebRTCVoiceSession:
await self._publish_system("Voice input backlog detected; dropping stale segment.")
with contextlib.suppress(asyncio.QueueFull):
self._stt_segments.put_nowait(pcm16)
self._stt_segments.put_nowait(STTSegment(pcm=pcm16, metadata=dict(metadata)))
async def _stt_worker(self) -> None:
while True:
pcm16 = await self._stt_segments.get()
if not self._stt_first_segment_notice_sent:
segment = await self._stt_segments.get()
if self._audio_debug and not self._stt_first_segment_notice_sent:
self._stt_first_segment_notice_sent = True
await self._publish_system("Push-to-talk audio captured. Running host STT...")
await self._publish_debug("Push-to-talk audio captured. Running host STT...")
try:
transcript = await self._stt.transcribe_pcm(
pcm=pcm16,
pcm=segment.pcm,
sample_rate=16_000,
channels=1,
)
@ -1317,7 +1601,7 @@ class WebRTCVoiceSession:
try:
await self._gateway.send_user_message(
transcript,
metadata=dict(self._active_message_metadata),
metadata=dict(segment.metadata),
)
except RuntimeError as exc:
if self._closed:
@ -1360,6 +1644,11 @@ class WebRTCVoiceSession:
async def _publish_system(self, text: str) -> None:
await self._gateway.bus.publish(WisperEvent(role="system", text=text))
async def _publish_debug(self, text: str) -> None:
if not self._audio_debug:
return
await self._publish_system(text)
async def _publish_agent_state(self, state: str) -> None:
await self._gateway.bus.publish(WisperEvent(role="agent-state", text=state))