viz update

This commit is contained in:
kacper 2026-03-04 11:39:03 -05:00
parent ed629ff60e
commit 9222c59f03
2 changed files with 145 additions and 38 deletions

View file

@ -211,19 +211,32 @@
if (agentVisualizer) agentVisualizer.setState(state);
};
const createParaboloidRing = (radius = 1.1, segments = 320, curvature = 0.06, tubeRadius = 0.022, waves = 5) => {
// Creates a flat torus-path ring with a per-vertex theta attribute.
// The Y wave displacement is applied in the vertex shader so phase can be animated.
const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
const points = [];
for (let i = 0; i <= segments; i += 1) {
const theta = (i / segments) * Math.PI * 2;
const x = radius * Math.cos(theta);
const z = radius * Math.sin(theta);
// Smooth round lumps: plain cosine is inherently smooth with
// symmetric rounded peaks and valleys — no sharpening needed.
const y = curvature * radius * Math.cos(waves * theta);
points.push(new THREE.Vector3(x, y, z));
points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
}
const curve = new THREE.CatmullRomCurve3(points, true);
return new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
// Store normalised t (0→1 around the ring) for each vertex so the shader
// can reconstruct theta and apply the animated wave.
const posCount = geo.attributes.position.count;
const tAttr = new Float32Array(posCount);
// TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
const tubularSegments = segments;
const radialSegments = 12;
for (let tube = 0; tube <= tubularSegments; tube++) {
const tVal = tube / tubularSegments;
for (let rad = 0; rad <= radialSegments; rad++) {
tAttr[tube * (radialSegments + 1) + rad] = tVal;
}
}
geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
return geo;
};
const createAgentVisualizer = () => {
@ -251,46 +264,97 @@
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
scene.add(ambient);
const geometry = createParaboloidRing();
const ringMaterial = new THREE.MeshBasicMaterial({
color: 0xfff5eb,
transparent: false,
const geometry = createParaboloidRing();
const geometry2 = createParaboloidRing();
// Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
// so phase can be animated each frame without rebuilding geometry.
const ringVertexShader = `
attribute float aRingT;
uniform float uPhase;
uniform float uAmplitude;
varying float vWorldX;
void main() {
float theta = aRingT * 6.28318530718;
vec3 pos = position;
pos.y += uAmplitude * cos(5.0 * theta + uPhase);
vec4 wp = modelMatrix * vec4(pos, 1.0);
vWorldX = wp.x;
gl_Position = projectionMatrix * viewMatrix * wp;
}
`;
const ringFragmentShader = `
uniform vec3 uColor;
uniform float uFade;
uniform float uFadeOffset;
varying float vWorldX;
void main() {
float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
gl_FragColor = vec4(uColor, alpha);
}
`;
const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
uniforms: {
uColor: { value: new THREE.Color(0xfff5eb) },
uFade: { value: 0.0 },
uFadeOffset: { value: 0.0 },
uPhase: { value: phase },
uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius
},
vertexShader: ringVertexShader,
fragmentShader: ringFragmentShader,
transparent: true,
side: THREE.DoubleSide,
depthWrite: false,
});
const ring = new THREE.Mesh(geometry, ringMaterial);
const ringMaterial = makeRingMaterial(0.0);
const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset
const ring = new THREE.Mesh(geometry, ringMaterial);
const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
const group = new THREE.Group();
group.add(ring);
group.add(ring2);
group.rotation.y = Math.PI * 0.18;
scene.add(group);
let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect
const applyFrustum = () => {
const width = Math.max(2, agentVizEl.clientWidth);
const height = Math.max(2, agentVizEl.clientHeight);
const aspect = width / height;
const s = orthoSize * orthoScale;
if (aspect >= 1) {
camera.left = -s * aspect;
camera.right = s * aspect;
camera.top = s;
camera.bottom = -s;
} else {
camera.left = -s;
camera.right = s;
camera.top = s / aspect;
camera.bottom = -s / aspect;
}
camera.updateProjectionMatrix();
};
const resize = () => {
const width = Math.max(2, agentVizEl.clientWidth);
const height = Math.max(2, agentVizEl.clientHeight);
renderer.setSize(width, height, false);
const aspect = width / height;
// Keep the ring fully visible in both landscape and portrait.
// Landscape (aspect >= 1): expand horizontally, keep vertical fixed.
// Portrait (aspect < 1): keep horizontal fixed at orthoSize,
// expand vertically so the ring isn't clipped.
if (aspect >= 1) {
camera.left = -orthoSize * aspect;
camera.right = orthoSize * aspect;
camera.top = orthoSize;
camera.bottom = -orthoSize;
} else {
camera.left = -orthoSize;
camera.right = orthoSize;
camera.top = orthoSize / aspect;
camera.bottom = -orthoSize / aspect;
}
camera.updateProjectionMatrix();
applyFrustum();
};
resize();
window.addEventListener("resize", resize);
let currentState = STATES.idle;
let currentAudioLevel = 0;
let smoothAudioLevel = 0;
let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed
let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings
let deformScale = 1.0;
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
let spinSpeed = 0.0;
@ -313,12 +377,15 @@
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
const t = dt * 60;
const lerpAudio = 1 - Math.pow(0.85, t);
const lerpAudio = 1 - Math.pow(0.85, t); // fast
const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1
const lerpDeform = 1 - Math.pow(0.88, t);
const lerpSpin = 1 - Math.pow(0.86, t);
const lerpRing = 1 - Math.pow(0.90, t);
const lerpAmp = 1 - Math.pow(0.88, t);
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
const speakingActive = currentState === STATES.speaking;
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
@ -352,6 +419,46 @@
camera.lookAt(lookAt);
}
// Smoothly fade out the back half of the ring as the camera moves into side view.
// sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
const camLen = camera.position.length();
const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
const lerpSide = 1 - Math.pow(0.88, t);
ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide;
ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide;
ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide;
ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
// Phase animation + reactive amplitude while speaking.
const baseAmp = 0.06 * 1.1;
if (speakingActive) {
const breathe = Math.sin(now * 0.0018); // ~3.5 s period
const base = 1.8 + smoothAudioLevel * 4.0;
const ring1Speed = (base + breathe * 0.6) * dt;
const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
ringMaterial.uniforms.uPhase.value += ring1Speed;
ringMaterial2.uniforms.uPhase.value += ring2Speed;
// Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5);
const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
} else {
// Settle phase and amplitude back to rest values.
ringMaterial2.uniforms.uPhase.value +=
(Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
}
// Zoom in when in side view by shrinking the ortho frustum.
const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side
if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
applyFrustum();
}
// Card background: gray → coral as connection is established, then darken when listening.
// While connecting, throb the gray base with a slow sine pulse.
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));

View file

@ -341,14 +341,14 @@ class CommandSpeechToText:
class FasterWhisperSpeechToText:
def __init__(self) -> None:
self._model_name = os.getenv("HOST_STT_MODEL", "base.en").strip() or "base.en"
self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
self._compute_type = (
os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
)
self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "2")))
self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "2")))
self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
self._vad_filter = os.getenv("HOST_STT_VAD_FILTER", "0").strip() not in {
"0",
"false",
@ -587,7 +587,7 @@ class SupertonicTextToSpeech:
os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
)
self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "8"))
self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
@ -900,7 +900,7 @@ class WebRTCVoiceSession:
# How long to wait after the last incoming chunk before flushing the
# entire accumulated response to TTS in one go.
self._tts_response_end_delay_s = max(
0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "1.5"))
0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "0.5"))
)
self._closed = False