diff --git a/static/index.html b/static/index.html index 60f132e..4139412 100644 --- a/static/index.html +++ b/static/index.html @@ -211,19 +211,32 @@ if (agentVisualizer) agentVisualizer.setState(state); }; - const createParaboloidRing = (radius = 1.1, segments = 320, curvature = 0.06, tubeRadius = 0.022, waves = 5) => { + // Creates a flat torus-path ring with a per-vertex theta attribute. + // The Y wave displacement is applied in the vertex shader so phase can be animated. + const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => { const points = []; for (let i = 0; i <= segments; i += 1) { const theta = (i / segments) * Math.PI * 2; - const x = radius * Math.cos(theta); - const z = radius * Math.sin(theta); - // Smooth round lumps: plain cosine is inherently smooth with - // symmetric rounded peaks and valleys — no sharpening needed. - const y = curvature * radius * Math.cos(waves * theta); - points.push(new THREE.Vector3(x, y, z)); + points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta))); } const curve = new THREE.CatmullRomCurve3(points, true); - return new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true); + const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true); + + // Store normalised t (0→1 around the ring) for each vertex so the shader + // can reconstruct theta and apply the animated wave. + const posCount = geo.attributes.position.count; + const tAttr = new Float32Array(posCount); + // TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts. + const tubularSegments = segments; + const radialSegments = 12; + for (let tube = 0; tube <= tubularSegments; tube++) { + const tVal = tube / tubularSegments; + for (let rad = 0; rad <= radialSegments; rad++) { + tAttr[tube * (radialSegments + 1) + rad] = tVal; + } + } + geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1)); + return geo; }; const createAgentVisualizer = () => { @@ -251,46 +264,97 @@ const ambient = new THREE.AmbientLight(0xffffff, 1.0); scene.add(ambient); - const geometry = createParaboloidRing(); - const ringMaterial = new THREE.MeshBasicMaterial({ - color: 0xfff5eb, - transparent: false, + const geometry = createParaboloidRing(); + const geometry2 = createParaboloidRing(); + + // Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform, + // so phase can be animated each frame without rebuilding geometry. + const ringVertexShader = ` + attribute float aRingT; + uniform float uPhase; + uniform float uAmplitude; + varying float vWorldX; + void main() { + float theta = aRingT * 6.28318530718; + vec3 pos = position; + pos.y += uAmplitude * cos(5.0 * theta + uPhase); + vec4 wp = modelMatrix * vec4(pos, 1.0); + vWorldX = wp.x; + gl_Position = projectionMatrix * viewMatrix * wp; + } + `; + const ringFragmentShader = ` + uniform vec3 uColor; + uniform float uFade; + uniform float uFadeOffset; + varying float vWorldX; + void main() { + float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5); + gl_FragColor = vec4(uColor, alpha); + } + `; + + const makeRingMaterial = (phase) => new THREE.ShaderMaterial({ + uniforms: { + uColor: { value: new THREE.Color(0xfff5eb) }, + uFade: { value: 0.0 }, + uFadeOffset: { value: 0.0 }, + uPhase: { value: phase }, + uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius + }, + vertexShader: ringVertexShader, + fragmentShader: ringFragmentShader, + transparent: true, side: THREE.DoubleSide, + depthWrite: false, }); - const ring = new THREE.Mesh(geometry, ringMaterial); + + const ringMaterial = makeRingMaterial(0.0); + const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset + + const ring = new THREE.Mesh(geometry, ringMaterial); + const ring2 = new THREE.Mesh(geometry2, ringMaterial2); + const group = new THREE.Group(); group.add(ring); + group.add(ring2); group.rotation.y = Math.PI * 0.18; scene.add(group); + let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect + + const applyFrustum = () => { + const width = Math.max(2, agentVizEl.clientWidth); + const height = Math.max(2, agentVizEl.clientHeight); + const aspect = width / height; + const s = orthoSize * orthoScale; + if (aspect >= 1) { + camera.left = -s * aspect; + camera.right = s * aspect; + camera.top = s; + camera.bottom = -s; + } else { + camera.left = -s; + camera.right = s; + camera.top = s / aspect; + camera.bottom = -s / aspect; + } + camera.updateProjectionMatrix(); + }; + const resize = () => { const width = Math.max(2, agentVizEl.clientWidth); const height = Math.max(2, agentVizEl.clientHeight); renderer.setSize(width, height, false); - const aspect = width / height; - // Keep the ring fully visible in both landscape and portrait. - // Landscape (aspect >= 1): expand horizontally, keep vertical fixed. - // Portrait (aspect < 1): keep horizontal fixed at orthoSize, - // expand vertically so the ring isn't clipped. - if (aspect >= 1) { - camera.left = -orthoSize * aspect; - camera.right = orthoSize * aspect; - camera.top = orthoSize; - camera.bottom = -orthoSize; - } else { - camera.left = -orthoSize; - camera.right = orthoSize; - camera.top = orthoSize / aspect; - camera.bottom = -orthoSize / aspect; - } - camera.updateProjectionMatrix(); + applyFrustum(); }; resize(); window.addEventListener("resize", resize); let currentState = STATES.idle; let currentAudioLevel = 0; - let smoothAudioLevel = 0; + let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed + let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings let deformScale = 1.0; let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking let spinSpeed = 0.0; @@ -313,12 +377,15 @@ // Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline). const t = dt * 60; - const lerpAudio = 1 - Math.pow(0.85, t); + const lerpAudio = 1 - Math.pow(0.85, t); // fast + const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1 const lerpDeform = 1 - Math.pow(0.88, t); const lerpSpin = 1 - Math.pow(0.86, t); const lerpRing = 1 - Math.pow(0.90, t); + const lerpAmp = 1 - Math.pow(0.88, t); - smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio; + smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio; + smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2; const speakingActive = currentState === STATES.speaking; let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1); @@ -352,6 +419,46 @@ camera.lookAt(lookAt); } + // Smoothly fade out the back half of the ring as the camera moves into side view. + // sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is. + const camLen = camera.position.length(); + const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0; + const lerpSide = 1 - Math.pow(0.88, t); + ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide; + ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide; + ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide; + ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide; + + // Phase animation + reactive amplitude while speaking. + const baseAmp = 0.06 * 1.1; + if (speakingActive) { + const breathe = Math.sin(now * 0.0018); // ~3.5 s period + const base = 1.8 + smoothAudioLevel * 4.0; + const ring1Speed = (base + breathe * 0.6) * dt; + const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt; + ringMaterial.uniforms.uPhase.value += ring1Speed; + ringMaterial2.uniforms.uPhase.value += ring2Speed; + + // Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes. + const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5); + const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5); + ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp; + ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp; + } else { + // Settle phase and amplitude back to rest values. + ringMaterial2.uniforms.uPhase.value += + (Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t)); + ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp; + ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp; + } + + // Zoom in when in side view by shrinking the ortho frustum. + const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side + if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) { + orthoScale += (targetOrthoScale - orthoScale) * lerpSide; + applyFrustum(); + } + // Card background: gray → coral as connection is established, then darken when listening. // While connecting, throb the gray base with a slow sine pulse. connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t)); diff --git a/voice_rtc.py b/voice_rtc.py index c753c2b..dc2e47f 100644 --- a/voice_rtc.py +++ b/voice_rtc.py @@ -341,14 +341,14 @@ class CommandSpeechToText: class FasterWhisperSpeechToText: def __init__(self) -> None: - self._model_name = os.getenv("HOST_STT_MODEL", "base.en").strip() or "base.en" + self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en" self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto" self._compute_type = ( os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8" ) self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip() - self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "2"))) - self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "2"))) + self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1"))) + self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1"))) self._vad_filter = os.getenv("HOST_STT_VAD_FILTER", "0").strip() not in { "0", "false", @@ -587,7 +587,7 @@ class SupertonicTextToSpeech: os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1" ) self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en" - self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "8")) + self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4")) self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5")) self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS") self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS") @@ -900,7 +900,7 @@ class WebRTCVoiceSession: # How long to wait after the last incoming chunk before flushing the # entire accumulated response to TTS in one go. self._tts_response_end_delay_s = max( - 0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "1.5")) + 0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "0.5")) ) self._closed = False