viz update

2026-03-04 11:39:03 -05:00 · 2026-03-04 11:39:03 -05:00 · 9222c59f03
commit 9222c59f03
parent ed629ff60e
2 changed files with 145 additions and 38 deletions
--- a/static/index.html
+++ b/static/index.html
@ -211,19 +211,32 @@
        if (agentVisualizer) agentVisualizer.setState(state);
      };

-      const createParaboloidRing = (radius = 1.1, segments = 320, curvature = 0.06, tubeRadius = 0.022, waves = 5) => {
+      // Creates a flat torus-path ring with a per-vertex theta attribute.
+      // The Y wave displacement is applied in the vertex shader so phase can be animated.
+      const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
        const points = [];
        for (let i = 0; i <= segments; i += 1) {
          const theta = (i / segments) * Math.PI * 2;
-          const x = radius * Math.cos(theta);
-          const z = radius * Math.sin(theta);
-          // Smooth round lumps: plain cosine is inherently smooth with
-          // symmetric rounded peaks and valleys — no sharpening needed.
-          const y = curvature * radius * Math.cos(waves * theta);
-          points.push(new THREE.Vector3(x, y, z));
+          points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
        }
        const curve = new THREE.CatmullRomCurve3(points, true);
-        return new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
+        const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
+
+        // Store normalised t (0→1 around the ring) for each vertex so the shader
+        // can reconstruct theta and apply the animated wave.
+        const posCount = geo.attributes.position.count;
+        const tAttr = new Float32Array(posCount);
+        // TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
+        const tubularSegments = segments;
+        const radialSegments  = 12;
+        for (let tube = 0; tube <= tubularSegments; tube++) {
+          const tVal = tube / tubularSegments;
+          for (let rad = 0; rad <= radialSegments; rad++) {
+            tAttr[tube * (radialSegments + 1) + rad] = tVal;
+          }
+        }
+        geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
+        return geo;
      };

      const createAgentVisualizer = () => {
@ -251,46 +264,97 @@
        const ambient = new THREE.AmbientLight(0xffffff, 1.0);
        scene.add(ambient);

-        const geometry = createParaboloidRing();
-        const ringMaterial = new THREE.MeshBasicMaterial({
-          color: 0xfff5eb,
-          transparent: false,
+        const geometry  = createParaboloidRing();
+        const geometry2 = createParaboloidRing();
+
+        // Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
+        // so phase can be animated each frame without rebuilding geometry.
+        const ringVertexShader = `
+          attribute float aRingT;
+          uniform float uPhase;
+          uniform float uAmplitude;
+          varying float vWorldX;
+          void main() {
+            float theta = aRingT * 6.28318530718;
+            vec3 pos = position;
+            pos.y += uAmplitude * cos(5.0 * theta + uPhase);
+            vec4 wp = modelMatrix * vec4(pos, 1.0);
+            vWorldX = wp.x;
+            gl_Position = projectionMatrix * viewMatrix * wp;
+          }
+        `;
+        const ringFragmentShader = `
+          uniform vec3 uColor;
+          uniform float uFade;
+          uniform float uFadeOffset;
+          varying float vWorldX;
+          void main() {
+            float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
+            gl_FragColor = vec4(uColor, alpha);
+          }
+        `;
+
+        const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
+          uniforms: {
+            uColor:      { value: new THREE.Color(0xfff5eb) },
+            uFade:       { value: 0.0 },
+            uFadeOffset: { value: 0.0 },
+            uPhase:      { value: phase },
+            uAmplitude:  { value: 0.06 * 1.1 },  // base: curvature * radius
+          },
+          vertexShader:   ringVertexShader,
+          fragmentShader: ringFragmentShader,
+          transparent: true,
          side: THREE.DoubleSide,
+          depthWrite: false,
        });
-        const ring = new THREE.Mesh(geometry, ringMaterial);
+
+        const ringMaterial  = makeRingMaterial(0.0);
+        const ringMaterial2 = makeRingMaterial(Math.PI);  // half-wave offset
+
+        const ring  = new THREE.Mesh(geometry,  ringMaterial);
+        const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
+
        const group = new THREE.Group();
        group.add(ring);
+        group.add(ring2);
        group.rotation.y = Math.PI * 0.18;
        scene.add(group);

+        let orthoScale = 1.0;  // lerps to 0.7 in side view for zoom effect
+
+        const applyFrustum = () => {
+          const width = Math.max(2, agentVizEl.clientWidth);
+          const height = Math.max(2, agentVizEl.clientHeight);
+          const aspect = width / height;
+          const s = orthoSize * orthoScale;
+          if (aspect >= 1) {
+            camera.left   = -s * aspect;
+            camera.right  =  s * aspect;
+            camera.top    =  s;
+            camera.bottom = -s;
+          } else {
+            camera.left   = -s;
+            camera.right  =  s;
+            camera.top    =  s / aspect;
+            camera.bottom = -s / aspect;
+          }
+          camera.updateProjectionMatrix();
+        };
+
        const resize = () => {
          const width = Math.max(2, agentVizEl.clientWidth);
          const height = Math.max(2, agentVizEl.clientHeight);
          renderer.setSize(width, height, false);
-          const aspect = width / height;
-          // Keep the ring fully visible in both landscape and portrait.
-          // Landscape (aspect >= 1): expand horizontally, keep vertical fixed.
-          // Portrait  (aspect <  1): keep horizontal fixed at orthoSize,
-          //                          expand vertically so the ring isn't clipped.
-          if (aspect >= 1) {
-            camera.left   = -orthoSize * aspect;
-            camera.right  =  orthoSize * aspect;
-            camera.top    =  orthoSize;
-            camera.bottom = -orthoSize;
-          } else {
-            camera.left   = -orthoSize;
-            camera.right  =  orthoSize;
-            camera.top    =  orthoSize / aspect;
-            camera.bottom = -orthoSize / aspect;
-          }
-          camera.updateProjectionMatrix();
+          applyFrustum();
        };
        resize();
        window.addEventListener("resize", resize);

        let currentState = STATES.idle;
        let currentAudioLevel = 0;
-        let smoothAudioLevel = 0;
+        let smoothAudioLevel  = 0;   // fast follower — ring1 amplitude + phase speed
+        let smoothAudioLevel2 = 0;   // slow follower — ring2 amplitude, creates lag between rings
        let deformScale = 1.0;
        let ringScale = 1.0;   // uniform xz scale — used for thickness throb when thinking
        let spinSpeed = 0.0;
@ -313,12 +377,15 @@

          // Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
          const t = dt * 60;
-          const lerpAudio  = 1 - Math.pow(0.85, t);
+          const lerpAudio  = 1 - Math.pow(0.85, t);   // fast
+          const lerpAudio2 = 1 - Math.pow(0.94, t);   // slow — ring2 lags behind ring1
          const lerpDeform = 1 - Math.pow(0.88, t);
          const lerpSpin   = 1 - Math.pow(0.86, t);
          const lerpRing   = 1 - Math.pow(0.90, t);
+          const lerpAmp    = 1 - Math.pow(0.88, t);

-          smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
+          smoothAudioLevel  += (currentAudioLevel - smoothAudioLevel)  * lerpAudio;
+          smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
          const speakingActive = currentState === STATES.speaking;

          let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
@ -352,6 +419,46 @@
            camera.lookAt(lookAt);
          }

+          // Smoothly fade out the back half of the ring as the camera moves into side view.
+          // sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
+          const camLen = camera.position.length();
+          const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
+          const lerpSide = 1 - Math.pow(0.88, t);
+          ringMaterial.uniforms.uFade.value       += (sideT - ringMaterial.uniforms.uFade.value)       * lerpSide;
+          ringMaterial.uniforms.uFadeOffset.value  += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value)  * lerpSide;
+          ringMaterial2.uniforms.uFade.value       += (sideT - ringMaterial2.uniforms.uFade.value)      * lerpSide;
+          ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
+
+          // Phase animation + reactive amplitude while speaking.
+          const baseAmp = 0.06 * 1.1;
+          if (speakingActive) {
+            const breathe    = Math.sin(now * 0.0018);   // ~3.5 s period
+            const base       = 1.8 + smoothAudioLevel * 4.0;
+            const ring1Speed = (base + breathe * 0.6) * dt;
+            const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
+            ringMaterial.uniforms.uPhase.value  += ring1Speed;
+            ringMaterial2.uniforms.uPhase.value += ring2Speed;
+
+            // Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
+            const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel  * 3.5);
+            const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
+            ringMaterial.uniforms.uAmplitude.value  += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value)  * lerpAmp;
+            ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
+          } else {
+            // Settle phase and amplitude back to rest values.
+            ringMaterial2.uniforms.uPhase.value +=
+              (Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
+            ringMaterial.uniforms.uAmplitude.value  += (baseAmp - ringMaterial.uniforms.uAmplitude.value)  * lerpAmp;
+            ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
+          }
+
+          // Zoom in when in side view by shrinking the ortho frustum.
+          const targetOrthoScale = 1.0 - sideT * 0.3;  // 1.0 top → 0.7 side
+          if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
+            orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
+            applyFrustum();
+          }
+
          // Card background: gray → coral as connection is established, then darken when listening.
          // While connecting, throb the gray base with a slow sine pulse.
          connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
--- a/voice_rtc.py
+++ b/voice_rtc.py
@ -341,14 +341,14 @@ class CommandSpeechToText:

 class FasterWhisperSpeechToText:
    def __init__(self) -> None:
-        self._model_name = os.getenv("HOST_STT_MODEL", "base.en").strip() or "base.en"
+        self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
        self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
        self._compute_type = (
            os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
        )
        self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
-        self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "2")))
-        self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "2")))
+        self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
+        self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
        self._vad_filter = os.getenv("HOST_STT_VAD_FILTER", "0").strip() not in {
            "0",
            "false",
@ -587,7 +587,7 @@ class SupertonicTextToSpeech:
            os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
        )
        self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
-        self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "8"))
+        self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
        self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
        self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
        self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
@ -900,7 +900,7 @@ class WebRTCVoiceSession:
        # How long to wait after the last incoming chunk before flushing the
        # entire accumulated response to TTS in one go.
        self._tts_response_end_delay_s = max(
-            0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "1.5"))
+            0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "0.5"))
        )

        self._closed = False