viz update
This commit is contained in:
parent
ed629ff60e
commit
9222c59f03
2 changed files with 145 additions and 38 deletions
|
|
@ -211,19 +211,32 @@
|
||||||
if (agentVisualizer) agentVisualizer.setState(state);
|
if (agentVisualizer) agentVisualizer.setState(state);
|
||||||
};
|
};
|
||||||
|
|
||||||
const createParaboloidRing = (radius = 1.1, segments = 320, curvature = 0.06, tubeRadius = 0.022, waves = 5) => {
|
// Creates a flat torus-path ring with a per-vertex theta attribute.
|
||||||
|
// The Y wave displacement is applied in the vertex shader so phase can be animated.
|
||||||
|
const createParaboloidRing = (radius = 1.1, segments = 320, tubeRadius = 0.007) => {
|
||||||
const points = [];
|
const points = [];
|
||||||
for (let i = 0; i <= segments; i += 1) {
|
for (let i = 0; i <= segments; i += 1) {
|
||||||
const theta = (i / segments) * Math.PI * 2;
|
const theta = (i / segments) * Math.PI * 2;
|
||||||
const x = radius * Math.cos(theta);
|
points.push(new THREE.Vector3(radius * Math.cos(theta), 0, radius * Math.sin(theta)));
|
||||||
const z = radius * Math.sin(theta);
|
|
||||||
// Smooth round lumps: plain cosine is inherently smooth with
|
|
||||||
// symmetric rounded peaks and valleys — no sharpening needed.
|
|
||||||
const y = curvature * radius * Math.cos(waves * theta);
|
|
||||||
points.push(new THREE.Vector3(x, y, z));
|
|
||||||
}
|
}
|
||||||
const curve = new THREE.CatmullRomCurve3(points, true);
|
const curve = new THREE.CatmullRomCurve3(points, true);
|
||||||
return new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
|
const geo = new THREE.TubeGeometry(curve, segments, tubeRadius, 12, true);
|
||||||
|
|
||||||
|
// Store normalised t (0→1 around the ring) for each vertex so the shader
|
||||||
|
// can reconstruct theta and apply the animated wave.
|
||||||
|
const posCount = geo.attributes.position.count;
|
||||||
|
const tAttr = new Float32Array(posCount);
|
||||||
|
// TubeGeometry lays out vertices as (radialSegments+1) rings of (tubularSegments+1) verts.
|
||||||
|
const tubularSegments = segments;
|
||||||
|
const radialSegments = 12;
|
||||||
|
for (let tube = 0; tube <= tubularSegments; tube++) {
|
||||||
|
const tVal = tube / tubularSegments;
|
||||||
|
for (let rad = 0; rad <= radialSegments; rad++) {
|
||||||
|
tAttr[tube * (radialSegments + 1) + rad] = tVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
geo.setAttribute("aRingT", new THREE.BufferAttribute(tAttr, 1));
|
||||||
|
return geo;
|
||||||
};
|
};
|
||||||
|
|
||||||
const createAgentVisualizer = () => {
|
const createAgentVisualizer = () => {
|
||||||
|
|
@ -251,46 +264,97 @@
|
||||||
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
|
const ambient = new THREE.AmbientLight(0xffffff, 1.0);
|
||||||
scene.add(ambient);
|
scene.add(ambient);
|
||||||
|
|
||||||
const geometry = createParaboloidRing();
|
const geometry = createParaboloidRing();
|
||||||
const ringMaterial = new THREE.MeshBasicMaterial({
|
const geometry2 = createParaboloidRing();
|
||||||
color: 0xfff5eb,
|
|
||||||
transparent: false,
|
// Vertex shader applies the Y wave using per-vertex ring-t and uPhase uniform,
|
||||||
|
// so phase can be animated each frame without rebuilding geometry.
|
||||||
|
const ringVertexShader = `
|
||||||
|
attribute float aRingT;
|
||||||
|
uniform float uPhase;
|
||||||
|
uniform float uAmplitude;
|
||||||
|
varying float vWorldX;
|
||||||
|
void main() {
|
||||||
|
float theta = aRingT * 6.28318530718;
|
||||||
|
vec3 pos = position;
|
||||||
|
pos.y += uAmplitude * cos(5.0 * theta + uPhase);
|
||||||
|
vec4 wp = modelMatrix * vec4(pos, 1.0);
|
||||||
|
vWorldX = wp.x;
|
||||||
|
gl_Position = projectionMatrix * viewMatrix * wp;
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
const ringFragmentShader = `
|
||||||
|
uniform vec3 uColor;
|
||||||
|
uniform float uFade;
|
||||||
|
uniform float uFadeOffset;
|
||||||
|
varying float vWorldX;
|
||||||
|
void main() {
|
||||||
|
float alpha = 1.0 - uFade * smoothstep(0.0, 1.0, (-vWorldX + uFadeOffset * 1.1) / 1.1 * 0.5 + 0.5);
|
||||||
|
gl_FragColor = vec4(uColor, alpha);
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const makeRingMaterial = (phase) => new THREE.ShaderMaterial({
|
||||||
|
uniforms: {
|
||||||
|
uColor: { value: new THREE.Color(0xfff5eb) },
|
||||||
|
uFade: { value: 0.0 },
|
||||||
|
uFadeOffset: { value: 0.0 },
|
||||||
|
uPhase: { value: phase },
|
||||||
|
uAmplitude: { value: 0.06 * 1.1 }, // base: curvature * radius
|
||||||
|
},
|
||||||
|
vertexShader: ringVertexShader,
|
||||||
|
fragmentShader: ringFragmentShader,
|
||||||
|
transparent: true,
|
||||||
side: THREE.DoubleSide,
|
side: THREE.DoubleSide,
|
||||||
|
depthWrite: false,
|
||||||
});
|
});
|
||||||
const ring = new THREE.Mesh(geometry, ringMaterial);
|
|
||||||
|
const ringMaterial = makeRingMaterial(0.0);
|
||||||
|
const ringMaterial2 = makeRingMaterial(Math.PI); // half-wave offset
|
||||||
|
|
||||||
|
const ring = new THREE.Mesh(geometry, ringMaterial);
|
||||||
|
const ring2 = new THREE.Mesh(geometry2, ringMaterial2);
|
||||||
|
|
||||||
const group = new THREE.Group();
|
const group = new THREE.Group();
|
||||||
group.add(ring);
|
group.add(ring);
|
||||||
|
group.add(ring2);
|
||||||
group.rotation.y = Math.PI * 0.18;
|
group.rotation.y = Math.PI * 0.18;
|
||||||
scene.add(group);
|
scene.add(group);
|
||||||
|
|
||||||
|
let orthoScale = 1.0; // lerps to 0.7 in side view for zoom effect
|
||||||
|
|
||||||
|
const applyFrustum = () => {
|
||||||
|
const width = Math.max(2, agentVizEl.clientWidth);
|
||||||
|
const height = Math.max(2, agentVizEl.clientHeight);
|
||||||
|
const aspect = width / height;
|
||||||
|
const s = orthoSize * orthoScale;
|
||||||
|
if (aspect >= 1) {
|
||||||
|
camera.left = -s * aspect;
|
||||||
|
camera.right = s * aspect;
|
||||||
|
camera.top = s;
|
||||||
|
camera.bottom = -s;
|
||||||
|
} else {
|
||||||
|
camera.left = -s;
|
||||||
|
camera.right = s;
|
||||||
|
camera.top = s / aspect;
|
||||||
|
camera.bottom = -s / aspect;
|
||||||
|
}
|
||||||
|
camera.updateProjectionMatrix();
|
||||||
|
};
|
||||||
|
|
||||||
const resize = () => {
|
const resize = () => {
|
||||||
const width = Math.max(2, agentVizEl.clientWidth);
|
const width = Math.max(2, agentVizEl.clientWidth);
|
||||||
const height = Math.max(2, agentVizEl.clientHeight);
|
const height = Math.max(2, agentVizEl.clientHeight);
|
||||||
renderer.setSize(width, height, false);
|
renderer.setSize(width, height, false);
|
||||||
const aspect = width / height;
|
applyFrustum();
|
||||||
// Keep the ring fully visible in both landscape and portrait.
|
|
||||||
// Landscape (aspect >= 1): expand horizontally, keep vertical fixed.
|
|
||||||
// Portrait (aspect < 1): keep horizontal fixed at orthoSize,
|
|
||||||
// expand vertically so the ring isn't clipped.
|
|
||||||
if (aspect >= 1) {
|
|
||||||
camera.left = -orthoSize * aspect;
|
|
||||||
camera.right = orthoSize * aspect;
|
|
||||||
camera.top = orthoSize;
|
|
||||||
camera.bottom = -orthoSize;
|
|
||||||
} else {
|
|
||||||
camera.left = -orthoSize;
|
|
||||||
camera.right = orthoSize;
|
|
||||||
camera.top = orthoSize / aspect;
|
|
||||||
camera.bottom = -orthoSize / aspect;
|
|
||||||
}
|
|
||||||
camera.updateProjectionMatrix();
|
|
||||||
};
|
};
|
||||||
resize();
|
resize();
|
||||||
window.addEventListener("resize", resize);
|
window.addEventListener("resize", resize);
|
||||||
|
|
||||||
let currentState = STATES.idle;
|
let currentState = STATES.idle;
|
||||||
let currentAudioLevel = 0;
|
let currentAudioLevel = 0;
|
||||||
let smoothAudioLevel = 0;
|
let smoothAudioLevel = 0; // fast follower — ring1 amplitude + phase speed
|
||||||
|
let smoothAudioLevel2 = 0; // slow follower — ring2 amplitude, creates lag between rings
|
||||||
let deformScale = 1.0;
|
let deformScale = 1.0;
|
||||||
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
|
let ringScale = 1.0; // uniform xz scale — used for thickness throb when thinking
|
||||||
let spinSpeed = 0.0;
|
let spinSpeed = 0.0;
|
||||||
|
|
@ -313,12 +377,15 @@
|
||||||
|
|
||||||
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
|
// Precompute lerp alphas once per frame (dt * 60 normalises to 60Hz baseline).
|
||||||
const t = dt * 60;
|
const t = dt * 60;
|
||||||
const lerpAudio = 1 - Math.pow(0.85, t);
|
const lerpAudio = 1 - Math.pow(0.85, t); // fast
|
||||||
|
const lerpAudio2 = 1 - Math.pow(0.94, t); // slow — ring2 lags behind ring1
|
||||||
const lerpDeform = 1 - Math.pow(0.88, t);
|
const lerpDeform = 1 - Math.pow(0.88, t);
|
||||||
const lerpSpin = 1 - Math.pow(0.86, t);
|
const lerpSpin = 1 - Math.pow(0.86, t);
|
||||||
const lerpRing = 1 - Math.pow(0.90, t);
|
const lerpRing = 1 - Math.pow(0.90, t);
|
||||||
|
const lerpAmp = 1 - Math.pow(0.88, t);
|
||||||
|
|
||||||
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
|
smoothAudioLevel += (currentAudioLevel - smoothAudioLevel) * lerpAudio;
|
||||||
|
smoothAudioLevel2 += (currentAudioLevel - smoothAudioLevel2) * lerpAudio2;
|
||||||
const speakingActive = currentState === STATES.speaking;
|
const speakingActive = currentState === STATES.speaking;
|
||||||
|
|
||||||
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
|
let targetDeformScale = 1.0 + (smoothAudioLevel * 1.1);
|
||||||
|
|
@ -352,6 +419,46 @@
|
||||||
camera.lookAt(lookAt);
|
camera.lookAt(lookAt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Smoothly fade out the back half of the ring as the camera moves into side view.
|
||||||
|
// sideT: 0 = top view, 1 = fully side view. Derived from how horizontal the camera is.
|
||||||
|
const camLen = camera.position.length();
|
||||||
|
const sideT = camLen > 0.001 ? Math.abs(camera.position.x) / camLen : 0;
|
||||||
|
const lerpSide = 1 - Math.pow(0.88, t);
|
||||||
|
ringMaterial.uniforms.uFade.value += (sideT - ringMaterial.uniforms.uFade.value) * lerpSide;
|
||||||
|
ringMaterial.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial.uniforms.uFadeOffset.value) * lerpSide;
|
||||||
|
ringMaterial2.uniforms.uFade.value += (sideT - ringMaterial2.uniforms.uFade.value) * lerpSide;
|
||||||
|
ringMaterial2.uniforms.uFadeOffset.value += (sideT * 0.8 - ringMaterial2.uniforms.uFadeOffset.value) * lerpSide;
|
||||||
|
|
||||||
|
// Phase animation + reactive amplitude while speaking.
|
||||||
|
const baseAmp = 0.06 * 1.1;
|
||||||
|
if (speakingActive) {
|
||||||
|
const breathe = Math.sin(now * 0.0018); // ~3.5 s period
|
||||||
|
const base = 1.8 + smoothAudioLevel * 4.0;
|
||||||
|
const ring1Speed = (base + breathe * 0.6) * dt;
|
||||||
|
const ring2Speed = (base - breathe * 1.4 + smoothAudioLevel * 2.0) * dt;
|
||||||
|
ringMaterial.uniforms.uPhase.value += ring1Speed;
|
||||||
|
ringMaterial2.uniforms.uPhase.value += ring2Speed;
|
||||||
|
|
||||||
|
// Amplitude: ring1 reacts fast, ring2 lags — they pulse at different sizes.
|
||||||
|
const targetAmp1 = baseAmp * (1.0 + smoothAudioLevel * 3.5);
|
||||||
|
const targetAmp2 = baseAmp * (1.0 + smoothAudioLevel2 * 3.5);
|
||||||
|
ringMaterial.uniforms.uAmplitude.value += (targetAmp1 - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
|
||||||
|
ringMaterial2.uniforms.uAmplitude.value += (targetAmp2 - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
|
||||||
|
} else {
|
||||||
|
// Settle phase and amplitude back to rest values.
|
||||||
|
ringMaterial2.uniforms.uPhase.value +=
|
||||||
|
(Math.PI - ringMaterial2.uniforms.uPhase.value) * (1 - Math.pow(0.92, t));
|
||||||
|
ringMaterial.uniforms.uAmplitude.value += (baseAmp - ringMaterial.uniforms.uAmplitude.value) * lerpAmp;
|
||||||
|
ringMaterial2.uniforms.uAmplitude.value += (baseAmp - ringMaterial2.uniforms.uAmplitude.value) * lerpAmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zoom in when in side view by shrinking the ortho frustum.
|
||||||
|
const targetOrthoScale = 1.0 - sideT * 0.3; // 1.0 top → 0.7 side
|
||||||
|
if (Math.abs(targetOrthoScale - orthoScale) > 0.0001) {
|
||||||
|
orthoScale += (targetOrthoScale - orthoScale) * lerpSide;
|
||||||
|
applyFrustum();
|
||||||
|
}
|
||||||
|
|
||||||
// Card background: gray → coral as connection is established, then darken when listening.
|
// Card background: gray → coral as connection is established, then darken when listening.
|
||||||
// While connecting, throb the gray base with a slow sine pulse.
|
// While connecting, throb the gray base with a slow sine pulse.
|
||||||
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
|
connectedT += (targetConnected - connectedT) * (1 - Math.pow(0.88, t));
|
||||||
|
|
|
||||||
10
voice_rtc.py
10
voice_rtc.py
|
|
@ -341,14 +341,14 @@ class CommandSpeechToText:
|
||||||
|
|
||||||
class FasterWhisperSpeechToText:
|
class FasterWhisperSpeechToText:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._model_name = os.getenv("HOST_STT_MODEL", "base.en").strip() or "base.en"
|
self._model_name = os.getenv("HOST_STT_MODEL", "tiny.en").strip() or "tiny.en"
|
||||||
self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
|
self._device = os.getenv("HOST_STT_DEVICE", "auto").strip() or "auto"
|
||||||
self._compute_type = (
|
self._compute_type = (
|
||||||
os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
|
os.getenv("HOST_STT_COMPUTE_TYPE", "int8").strip() or "int8"
|
||||||
)
|
)
|
||||||
self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
|
self._language = os.getenv("HOST_STT_LANGUAGE", "en").strip()
|
||||||
self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "2")))
|
self._beam_size = max(1, int(os.getenv("HOST_STT_BEAM_SIZE", "1")))
|
||||||
self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "2")))
|
self._best_of = max(1, int(os.getenv("HOST_STT_BEST_OF", "1")))
|
||||||
self._vad_filter = os.getenv("HOST_STT_VAD_FILTER", "0").strip() not in {
|
self._vad_filter = os.getenv("HOST_STT_VAD_FILTER", "0").strip() not in {
|
||||||
"0",
|
"0",
|
||||||
"false",
|
"false",
|
||||||
|
|
@ -587,7 +587,7 @@ class SupertonicTextToSpeech:
|
||||||
os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
|
os.getenv("SUPERTONIC_VOICE_STYLE", "F1").strip() or "F1"
|
||||||
)
|
)
|
||||||
self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
|
self._lang = os.getenv("SUPERTONIC_LANG", "en").strip() or "en"
|
||||||
self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "8"))
|
self._total_steps = int(os.getenv("SUPERTONIC_TOTAL_STEPS", "4"))
|
||||||
self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
|
self._speed = float(os.getenv("SUPERTONIC_SPEED", "1.5"))
|
||||||
self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
|
self._intra_op_num_threads = _optional_int_env("SUPERTONIC_INTRA_OP_THREADS")
|
||||||
self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
|
self._inter_op_num_threads = _optional_int_env("SUPERTONIC_INTER_OP_THREADS")
|
||||||
|
|
@ -900,7 +900,7 @@ class WebRTCVoiceSession:
|
||||||
# How long to wait after the last incoming chunk before flushing the
|
# How long to wait after the last incoming chunk before flushing the
|
||||||
# entire accumulated response to TTS in one go.
|
# entire accumulated response to TTS in one go.
|
||||||
self._tts_response_end_delay_s = max(
|
self._tts_response_end_delay_s = max(
|
||||||
0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "1.5"))
|
0.1, float(os.getenv("HOST_TTS_RESPONSE_END_DELAY_S", "0.5"))
|
||||||
)
|
)
|
||||||
|
|
||||||
self._closed = False
|
self._closed = False
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue