Building Real-Time Face Motion Capture with React and three.js

Project Setup

First, create a new React application and install the required dependencies:

npx create-react-app facial-sync-demo
cd facial-sync-demo
npm install three @mediapipe/tasks-vision

Copy the model files from the public folder of the sample project into your new project's public directory.

Creating the Three.js Container Component

Create a new file src/components/FaceCaptureCanvas.js with the following implementation:

import * as THREE from "three";
import { OrbitControls } from "three/addons/controls/OrbitControls.js";
import { RoomEnvironment } from "three/addons/environments/RoomEnvironment.js";
import { GLTFLoader } from "three/addons/loaders/GLTFLoader.js";
import { KTX2Loader } from "three/addons/loaders/KTX2Loader.js";
import { MeshoptDecoder } from "three/addons/libs/meshopt_decoder.module.js";
import { GUI } from "three/addons/libs/lil-gui.module.min.js";
import { useRef, useEffect } from "react";
import { FaceLandmarker, FilesetResolver } from "@mediapipe/tasks-vision";

// Mapping between MediaPipe blend shapes and model morph targets
const morphMapping = {
  browDownLeft: "browDown_L",
  browDownRight: "browDown_R",
  browInnerUp: "browInnerUp",
  browOuterUpLeft: "browOuterUp_L",
  browOuterUpRight: "browOuterUp_R",
  cheekPuff: "cheekPuff",
  cheekSquintLeft: "cheekSquint_L",
  cheekSquintRight: "cheekSquint_R",
  eyeBlinkLeft: "eyeBlink_L",
  eyeBlinkRight: "eyeBlink_R",
  eyeLookDownLeft: "eyeLookDown_L",
  eyeLookDownRight: "eyeLookDown_R",
  eyeLookInLeft: "eyeLookIn_L",
  eyeLookInRight: "eyeLookIn_R",
  eyeLookOutLeft: "eyeLookOut_L",
  eyeLookOutRight: "eyeLookOut_R",
  eyeLookUpLeft: "eyeLookUp_L",
  eyeLookUpRight: "eyeLookUp_R",
  eyeSquintLeft: "eyeSquint_L",
  eyeSquintRight: "eyeSquint_R",
  eyeWideLeft: "eyeWide_L",
  eyeWideRight: "eyeWide_R",
  jawForward: "jawForward",
  jawLeft: "jawLeft",
  jawOpen: "jawOpen",
  jawRight: "jawRight",
  mouthClose: "mouthClose",
  mouthDimpleLeft: "mouthDimple_L",
  mouthDimpleRight: "mouthDimple_R",
  mouthFrownLeft: "mouthFrown_L",
  mouthFrownRight: "mouthFrown_R",
  mouthFunnel: "mouthFunnel",
  mouthLeft: "mouthLeft",
  mouthLowerDownLeft: "mouthLowerDown_L",
  mouthLowerDownRight: "mouthLowerDown_R",
  mouthPressLeft: "mouthPress_L",
  mouthPressRight: "mouthPress_R",
  mouthPucker: "mouthPucker",
  mouthRight: "mouthRight",
  mouthRollLower: "mouthRollLower",
  mouthRollUpper: "mouthRollUpper",
  mouthShrugLower: "mouthShrugLower",
  mouthShrugUpper: "mouthShrugUpper",
  mouthSmileLeft: "mouthSmile_L",
  mouthSmileRight: "mouthSmile_R",
  mouthStretchLeft: "mouthStretch_L",
  mouthStretchRight: "mouthStretch_R",
  mouthUpperUpLeft: "mouthUpperUp_L",
  mouthUpperUpRight: "mouthUpperUp_R",
  noseSneerLeft: "noseSneer_L",
  noseSneerRight: "noseSneer_R"
};

function FaceCaptureCanvas() {
  const canvasRef = useRef(null);
  const isRunning = useRef(false);

  useEffect(() => {
    if (isRunning.current || !canvasRef.current) return;
    isRunning.current = true;
    initializeScene();

    async function initializeScene() {
      // Initialize WebGL renderer
      const renderer = new THREE.WebGLRenderer({ antialias: true });
      renderer.setPixelRatio(window.devicePixelRatio);
      renderer.setSize(window.innerWidth, window.innerHeight);
      renderer.toneMapping = THREE.ACESFilmicToneMapping;
      canvasRef.current.appendChild(renderer.domElement);

      // Setup camera
      const camera = new THREE.PerspectiveCamera(
        60,
        window.innerWidth / window.innerHeight,
        1,
        100
      );
      camera.position.z = 5;

      // Setup scene
      const scene = new THREE.Scene();
      scene.scale.x = -1;

      // Setup environment lighting
      const environment = new RoomEnvironment(renderer);
      const generator = new THREE.PMREMGenerator(renderer);
      scene.background = new THREE.Color(0x666666);
      scene.environment = generator.fromScene(environment).texture;

      // Setup orbit controls
      const controls = new OrbitControls(camera, renderer.domElement);

      // Load 3D model
      let faceMesh, leftEye, rightEye;
      const maxEyeRotation = THREE.MathUtils.degToRad(30);

      const ktx2Loader = new KTX2Loader()
        .setTranscoderPath("/basis/")
        .detectSupport(renderer);

      new GLTFLoader()
        .setKTX2Loader(ktx2Loader)
        .setMeshoptDecoder(MeshoptDecoder)
        .load("models/facecap.glb", (gltf) => {
          const model = gltf.scene.children[0];
          scene.add(model);

          faceMesh = model.getObjectByName("mesh_2");
          leftEye = model.getObjectByName("eyeLeft");
          rightEye = model.getObjectByName("eyeRight");

          // Apply normal material for visualization
          faceMesh.material = new THREE.MeshNormalMaterial();

          // Create debug GUI for morph targets
          const gui = new GUI();
          gui.close();
          const influences = faceMesh.morphTargetInfluences;

          for (const [key, value] of Object.entries(
            faceMesh.morphTargetDictionary
          )) {
            gui
              .add(influences, value, 0, 1, 0.01)
              .name(key.replace("blendShape1.", ""))
              .listen(influences);
          }

          renderer.setAnimationLoop(renderLoop);
        });

      // Create video element for webcam feed
      const webcamVideo = document.createElement("video");
      const videoGeometry = new THREE.PlaneGeometry(1, 1);
      const videoMaterial = new THREE.MeshBasicMaterial({
        depthWrite: false
      });
      const videoPlane = new THREE.Mesh(videoGeometry, videoMaterial);
      scene.add(videoPlane);

      // Initialize MediaPipe Face Landmarker
      const resolver = await FilesetResolver.forVisionTasks(
        "fileset_resolver/wasm"
      );

      const landmarker = await FaceLandmarker.createFromOptions(resolver, {
        baseOptions: {
          modelAssetPath: "ai_models/face_landmarker.task",
          delegate: "GPU"
        },
        outputFaceBlendshapes: true,
        outputFacialTransformationMatrixes: true,
        runningMode: "VIDEO",
        numFaces: 1
      });

      // Request camera access
      if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
        navigator.mediaDevices
          .getUserMedia({ video: { facingMode: "user" } })
          .then((stream) => {
            webcamVideo.srcObject = stream;
            webcamVideo.play();
          })
          .catch((err) => {
            console.error("Camera access denied:", err);
          });
      }

      // Transform helper object
      const transformHelper = new THREE.Object3D();

      function renderLoop() {
        if (webcamVideo.readyState >= HTMLMediaElement.HAVE_METADATA) {
          const detectionResults = landmarker.detectForVideo(
            webcamVideo,
            Date.now()
          );

          // Process facial transformation matrix
          if (detectionResults.facialTransformationMatrixes.length > 0) {
            const matrixData =
              detectionResults.facialTransformationMatrixes[0].data;

            transformHelper.matrix.fromArray(matrixData);
            transformHelper.matrix.decompose(
              transformHelper.position,
              transformHelper.quaternion,
              transformHelper.scale
            );

            const transformGroup = scene.getObjectByName("grp_transform");

            transformGroup.position.x = transformHelper.position.x;
            transformGroup.position.y = transformHelper.position.z + 40;
            transformGroup.position.z = -transformHelper.position.y;

            transformGroup.rotation.x = transformHelper.rotation.x;
            transformGroup.rotation.y = transformHelper.rotation.z;
            transformGroup.rotation.z = -transformHelper.rotation.y;
          }

          // Process blend shapes for facial expressions
          if (detectionResults.faceBlendshapes.length > 0) {
            const blendshapes = detectionResults.faceBlendshapes[0].categories;

            const eyeMovement = {
              horizontalLeft: 0,
              horizontalRight: 0,
              verticalLeft: 0,
              verticalRight: 0
            };

            for (const shape of blendshapes) {
              const name = shape.categoryName;
              const confidence = shape.score;

              const morphIndex =
                faceMesh.morphTargetDictionary[morphMapping[name]];

              if (morphIndex !== undefined) {
                faceMesh.morphTargetInfluences[morphIndex] = confidence;
              }

              // Calculate eye rotation from directional gaze blendshapes
              switch (name) {
                case "eyeLookInLeft":
                  eyeMovement.horizontalLeft += confidence;
                  break;
                case "eyeLookOutLeft":
                  eyeMovement.horizontalLeft -= confidence;
                  break;
                case "eyeLookInRight":
                  eyeMovement.horizontalRight -= confidence;
                  break;
                case "eyeLookOutRight":
                  eyeMovement.horizontalRight += confidence;
                  break;
                case "eyeLookUpLeft":
                  eyeMovement.verticalLeft -= confidence;
                  break;
                case "eyeLookDownLeft":
                  eyeMovement.verticalLeft += confidence;
                  break;
                case "eyeLookUpRight":
                  eyeMovement.verticalRight -= confidence;
                  break;
                case "eyeLookDownRight":
                  eyeMovement.verticalRight += confidence;
                  break;
              }
            }

            leftEye.rotation.z =
              eyeMovement.horizontalLeft * maxEyeRotation;
            rightEye.rotation.z =
              eyeMovement.horizontalRight * maxEyeRotation;
            leftEye.rotation.x = eyeMovement.verticalLeft * maxEyeRotation;
            rightEye.rotation.x =
              eyeMovement.verticalRight * maxEyeRotation;
          }
        }

        // Scale video plane to match video dimensions
        videoPlane.scale.x = webcamVideo.videoWidth / 100;
        videoPlane.scale.y = webcamVideo.videoHeight / 100;

        renderer.render(scene, camera);
        controls.update();
      }

      // Handle window resize
      window.addEventListener("resize", () => {
        camera.aspect = window.innerWidth / window.innerHeight;
        camera.updateProjectionMatrix();
        renderer.setSize(window.innerWidth, window.innerHeight);
      });
    }
  }, []);

  return <div ref={canvasRef} />;
}

export default FaceCaptureCanvas;

Integrating the Component

Update your App.js to use the new component:

import "./App.css";
import FaceCaptureCanvas from "./components/FaceCaptureCanvas";

function App() {
  return (
    <div>
      <FaceCaptureCanvas />
    </div>
  );
}

export default App;

How It Works

The implementation follows these core steps:

  1. Camera Access: The application requests access to the user's webcam using the MediaDevices API.
  2. Face Landmark Detection: MediaPipe's FaceLandmarker processes each video frame to identify 468 facial landmarks and calculate blend shape scores.
  3. Morph Target Mapping: Detected blend shapes are mapped to the corresponding morph targets on the 3D model, updating their influence values in real-time.
  4. Eye Rotation: Gaze direction blendshapes are converted to rotation values for the eye meshes, providing natural eye movement.
  5. Head Pose Tracking: The facial transformation matrix is extracted and applied to a trasnform group in the scene for head movement synchronization.

Running the Application

Start the development server with:

npm start

The 3D character model will respond to your facial movements captured through the webcam. The face mesh displays with a normal material for debugging purposes, and the GUI panel allows manual adjustment of individual morph target values.

Tags: React Three.js mediapipe face-tracking 3d-graphics

Posted on Sun, 10 May 2026 05:21:40 +0000 by tofi84