diff --git a/static/index.html b/static/index.html
index 60f132e..4139412 100644
--- a/static/index.html
+++ b/static/index.html
@@ -211,19 +211,32 @@
if (agentVisualizer) agentVisualizer.setState(state);
};
- const createParaboloidRing = (radius = 1.1, segments = 320, curvature = 0.06, tubeRadius = 0.022, waves = 5) => {
+ // Creates a flat torus-path ring with a per-vertex theta attribute.
+ // The Y wave displacement is applied in the vertex shader so phase can be animated.
+ const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
const points = [];
for (let i = 0; i <= segments; i += 1) {
const theta = (i / segments) * Math.PI * 2;
- const x = radius * Math.cos(theta);
- const z = radius * Math.sin(theta);
- // Smooth round lumps: plain cosine is inherently smooth with
- // symmetric rounded peaks and valleys — no sharpening needed.
- const y = curvature * radius * Math.cos(waves * theta);
- points.push(new THREE.Vector3(x, y, z));
+ points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
}
const curve = new THREE.CatmullRomCurve3(points, true);
- return new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
+ const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
+
+ // Store normalised t (0→1 around the ring) for each vertex so the shader
+ // can reconstruct theta and apply the animated wave.
+ const posCount = geo.attributes.position.count;
+ const tAttr = new Float32Array(posCount);
+ // TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
+ const tubularSegments = segments;
+ const radialSegments = 12;
+ for (let tube = 0; tube <= tubularSegments; tube++) {
+ const tVal = tube / tubularSegments;
+ for (let rad = 0; rad <= radialSegments; rad++) {
+ tAttr[tube * (radialSegments + 1) + rad] = tVal;
+ }
+ }
+ geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
+ return geo;
};
const createAgentVisualizer = () => {
@@ -251,46 +264,97 @@
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
scene.add(ambient);
- const geometry = createParaboloidRing();
- const ringMaterial = new THREE.MeshBasicMaterial({
- color: 0xfff5eb,
- transparent: false,
+ const geometry = createParaboloidRing();
+ const geometry2 = createParaboloidRing();
+
+ // Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
+ // so phase can be animated each frame without rebuilding geometry.
+ const ringVertexShader = `
+ attribute float aRingT;
+ uniform float uPhase;
+ uniform float uAmplitude;
+ varying float vWorldX;
+ void main() {
+ float theta = aRingT * 6.28318530718;
+ vec3 pos = position;
+ pos.y += uAmplitude * cos(5.0 * theta + uPhase);
+ vec4 wp = modelMatrix * vec4(pos, 1.0);
+ vWorldX = wp.x;
+ gl_Position = projectionMatrix * viewMatrix * wp;
+ }
+ `;
+ const ringFragmentShader = `
+ uniform vec3 uColor;
+ uniform float uFade;
+ uniform float uFadeOffset;
+ varying float vWorldX;
+ void main() {
+ float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
+ gl_FragColor = vec4(uColor, alpha);
+ }
+ `;
+
+ const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
+ uniforms: {
+ uColor: { value: new THREE.Color(0xfff5eb) },
+ uFade: { value: 0.0 },
+ uFadeOffset: { value: 0.0 },
+ uPhase: { value: phase },
+ uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius
+ },
+ vertexShader: ringVertexShader,
+ fragmentShader: ringFragmentShader,
+ transparent: true,
side: THREE.DoubleSide,
+ depthWrite: false,
});
- const ring = new THREE.Mesh(geometry, ringMaterial);
+
+ const ringMaterial = makeRingMaterial(0.0);
+ const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset
+
+ const ring = new THREE.Mesh(geometry, ringMaterial);
+ const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
+
const group = new THREE.Group();
group.add(ring);
+ group.add(ring2);
group.rotation.y = Math.PI * 0.18;
scene.add(group);
+ let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect
+
+ const applyFrustum = () => {
+ const width = Math.max(2, agentVizEl.clientWidth);
+ const height = Math.max(2, agentVizEl.clientHeight);
+ const aspect = width / height;
+ const s = orthoSize * orthoScale;
+ if (aspect >= 1) {
+ camera.left = -s * aspect;
+ camera.right = s * aspect;
+ camera.top = s;
+ camera.bottom = -s;
+ } else {
+ camera.left = -s;
+ camera.right = s;
+ camera.top = s / aspect;
+ camera.bottom = -s / aspect;
+ }
+ camera.updateProjectionMatrix();
+ };
+
const resize = () => {
const width = Math.max(2, agentVizEl.clientWidth);
const height = Math.max(2, agentVizEl.clientHeight);
renderer.setSize(width, height, false);
- const aspect = width / height;
- // Keep the ring fully visible in both landscape and portrait.
- // Landscape (aspect >= 1): expand horizontally, keep vertical fixed.
- // Portrait (aspect < 1): keep horizontal fixed at orthoSize,
- // expand vertically so the ring isn't clipped.
- if (aspect >= 1) {
- camera.left = -orthoSize * aspect;
- camera.right = orthoSize * aspect;
- camera.top = orthoSize;
- camera.bottom = -orthoSize;
- } else {
- camera.left = -orthoSize;
- camera.right = orthoSize;
- camera.top = orthoSize / aspect;
- camera.bottom = -orthoSize / aspect;
- }
- camera.updateProjectionMatrix();
+ applyFrustum();
};
resize();
window.addEventListener("resize", resize);
let currentState = STATES.idle;
let currentAudioLevel = 0;
- let smoothAudioLevel = 0;
+ let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed
+ let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings
let deformScale = 1.0;
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
let spinSpeed = 0.0;
@@ -313,12 +377,15 @@
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
const t = dt * 60;
- const lerpAudio = 1 - Math.pow(0.85, t);
+ const lerpAudio = 1 - Math.pow(0.85, t); // fast
+ const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1
const lerpDeform = 1 - Math.pow(0.88, t);
const lerpSpin = 1 - Math.pow(0.86, t);
const lerpRing = 1 - Math.pow(0.90, t);
+ const lerpAmp = 1 - Math.pow(0.88, t);
- smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
+ smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
+ smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
const speakingActive = currentState === STATES.speaking;
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
@@ -352,6 +419,46 @@
camera.lookAt(lookAt);
}
+ // Smoothly fade out the back half of the ring as the camera moves into side view.
+ // sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
+ const camLen = camera.position.length();
+ const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
+ const lerpSide = 1 - Math.pow(0.88, t);
+ ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide;
+ ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide;
+ ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide;
+ ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
+
+ // Phase animation + reactive amplitude while speaking.
+ const baseAmp = 0.06 * 1.1;
+ if (speakingActive) {
+ const breathe = Math.sin(now * 0.0018); // ~3.5 s period
+ const base = 1.8 + smoothAudioLevel * 4.0;
+ const ring1Speed = (base + breathe * 0.6) * dt;
+ const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
+ ringMaterial.uniforms.uPhase.value += ring1Speed;
+ ringMaterial2.uniforms.uPhase.value += ring2Speed;
+
+ // Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
+ const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5);
+ const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
+ ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
+ ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
+ } else {
+ // Settle phase and amplitude back to rest values.
+ ringMaterial2.uniforms.uPhase.value +=
+ (Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
+ ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
+ ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
+ }
+
+ // Zoom in when in side view by shrinking the ortho frustum.
+ const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side
+ if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
+ orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
+ applyFrustum();
+ }
+
// Card background: gray → coral as connection is established, then darken when listening.
// While connecting, throb the gray base with a slow sine pulse.
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
diff --git a/voice_rtc.py b/voice_rtc.py
index c753c2b..dc2e47f 100644
--- a/voice_rtc.py
+++ b/voice_rtc.py
@@ -341,14 +341,14 @@ class CommandSpeechToText:
class FasterWhisperSpeechToText:
def __init__(self) -> None:
- self._model_name = os.getenv("HOST_STT_MODEL", "base.en").strip() or "base.en"
+ self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
self._compute_type = (
os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
)
self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
- self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "2")))
- self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "2")))
+ self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
+ self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
self._vad_filter = os.getenv("HOST_STT_VAD_FILTER", "0").strip() not in {
"0",
"false",
@@ -587,7 +587,7 @@ class SupertonicTextToSpeech:
os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
)
self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
- self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "8"))
+ self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
@@ -900,7 +900,7 @@ class WebRTCVoiceSession:
# How long to wait after the last incoming chunk before flushing the
# entire accumulated response to TTS in one go.
self._tts_response_end_delay_s = max(
- 0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "1.5"))
+ 0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "0.5"))
)
self._closed = False