Real-time 3D face mesh

A face mesh is a 3D model representation of a person's face. We can achieve this using a collection of interconnected vertices and edges that define the structure of the face in three-dimensional space. How amazing is that?

Mapping vertices with facial features

The vertices represent specific points on the face, such as the corners of the eyes, nose, mouth, and other facial landmarks. Edges connect these vertices, and polygons form the surface of the 3D model.

OpenCV and Mediapipe

For media processing and face mesh creation, we will be incorporating the two libraries below.

OpenCV

OpenCV is a highly efficient open-source computer vision and machine learning library. We can process images and videos, handle real-time computer vision tasks, perform feature detection, object recognition, and many other computer vision tasks using this library.

Mediapipe

Mediapipe is a library developed by Google that focuses on building pipelines for various media processing tasks including computer vision. It simplifies our work by providing pre-built solutions for tasks like face detection, hand tracking, pose estimation, face mesh generation, and even more.

We define detectFaceLandmarks that takes an imagePath as input and returns the original frame and the results i.e. obtained from the FaceMesh model.
It creates an instance of the mp.solutions.face_mesh.FaceMesh class with customized parameters to perform face landmark detection. We pass static_image_mode as True since we're dealing with an unchangeable image.
The image is read from the provided imagePath through cv2.imread and converted to an RGBred, green, and blue levels format using cv2.cvtColor.
Finally, we apply the face mesh model to our image using mpFaceMesh.process, and the face landmarks are stored in the results variable.

Next up, our displayFaceLandmarks method takes the frame and results as input and displays the image along with the face landmarks.
It extracts the height and width of the frame using frame.shape so that we can map both the face landmarks and the pixels relative to each other.
The function checks if any face landmarks were detected using results.multi_face_landmarks. If landmarks are detected, we draw a green circle at each landmark point on our initial image using cv2.circle.
The processed image is displayed in a window titled '3D face mesh' using cv2.imshow.

Note: The window will close when any key is pressed cv2.waitKey(0), and all OpenCV windows will be closed by cv2.destroyAllWindows().

import cv2
import mediapipe as mp

def detectFaceLandmarks(imagePath):

    mpFaceMesh = mp.solutions.face_mesh.FaceMesh(
        static_image_mode=True,
        max_num_faces=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7,
    )

    frame = cv2.imread(imagePath)

    frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = mpFaceMesh.process(frameRGB)

    return frame, results

def displayFaceLandmarks(frame, results):
    frameHeight, frameWidth, _ = frame.shape

    if results.multi_face_landmarks:
        for faceLandmarks in results.multi_face_landmarks:
            for landmark in faceLandmarks.landmark:
                x, y = int(landmark.x * frameWidth), int(landmark.y * frameHeight)

                cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)

    cv2.imshow('3D face mesh', frame)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    imagePath = 'sample.png'
    frame, results = detectFaceLandmarks(imagePath)
    displayFaceLandmarks(frame, results)

def drawFaceMesh(frame, faceLandmarks):
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
    )
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style()
    )
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_IRISES,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_iris_connections_style()
    )


def main():
    videoCapture = cv2.VideoCapture('https://player.vimeo.com/external/373966277.sd.mp4?s=bc69e79a8007eb5682e9e72a415a2142173228f6&profile_id=164&oauth2_token_id=57447761')
    videoCapture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    videoCapture.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    frameRate = 30 
    with mp_face_mesh.FaceMesh(
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7,
    ) as faceMesh:
        while videoCapture.isOpened():
            isSuccess, frame = videoCapture.read()
            if not isSuccess:
                break
            frame.flags.writeable = False
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = faceMesh.process(frame)
            frame.flags.writeable = True
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if results.multi_face_landmarks:
                for faceLandmarks in results.multi_face_landmarks:
                    drawFaceMesh(frame, faceLandmarks)
            
            cv2.imshow('3D video face mesh', cv2.flip(frame, 1))
            if cv2.waitKey(int(1000 / frameRate)) & 0xFF == 27:  
                break
        videoCapture.release()
        cv2.destroyAllWindows()

Our main method handles all core things, so let's get straight into the functionalities of it!
- Create videoCapture to read frames from the given URL (changeable).
- Set the desired width/height of the video frames.
- Initialize the FaceMesh model using mp_face_mesh.FaceMesh by customizing max_num_faces, min_detection_confidence, and min_tracking_confidence.
- Start a loop. This processes each frame of the video. The iterations include the following flow.
  - Read a frame from the video capture.
  - Convert the frame from BGR to RGB format since it's a mediapipe requirement.
  - Process the frame using the FaceMesh model to get the results i.e. landmark detections.
  - Convert the frame back to BGR format.
  - Check for face landmarks in the current frame using results.multi_face_landmarks.
  - If landmarks were detected, call the drawFaceMesh function to draw them on the said frame.
  - Show the processed frame with face landmarks in our window '3D video face mesh'.
- Release the video capture object and close all windows after the loop.

Note: The loop will exit when the Escape i.e. 0xFF key is pressed.

import cv2
import mediapipe as mp

def drawFaceMesh(frame, faceLandmarks):

    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
    )
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style()
    )
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_IRISES,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_iris_connections_style()
    )

def main():

    videoCapture = cv2.VideoCapture('https://player.vimeo.com/external/373966277.sd.mp4?s=bc69e79a8007eb5682e9e72a415a2142173228f6&profile_id=164&oauth2_token_id=57447761')

    videoCapture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    videoCapture.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    frameRate = 30 

    with mp_face_mesh.FaceMesh(
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7,
    ) as faceMesh:
        while videoCapture.isOpened():
            isSuccess, frame = videoCapture.read()
            if not isSuccess:
                break

            frame.flags.writeable = False
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = faceMesh.process(frame)

            frame.flags.writeable = True
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if results.multi_face_landmarks:
                for faceLandmarks in results.multi_face_landmarks:
                    drawFaceMesh(frame, faceLandmarks)

            
            cv2.imshow('3D video face mesh', cv2.flip(frame, 1))

            if cv2.waitKey(int(1000 / frameRate)) & 0xFF == 27:  
                break

        videoCapture.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles
    mp_face_mesh = mp.solutions.face_mesh
    main()

import cv2
import mediapipe as mp
def drawFaceMesh(frame, faceLandmarks):
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
    )
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style()
    )
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=faceLandmarks,
        connections=mp_face_mesh.FACEMESH_IRISES,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_iris_connections_style()
    )
def main():
    videoCapture = cv2.VideoCapture(0)
    videoCapture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    videoCapture.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    frameRate = 30 
    with mp_face_mesh.FaceMesh(
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7,
    ) as faceMesh:
        while videoCapture.isOpened():
            isSuccess, frame = videoCapture.read()
            if not isSuccess:
                break
            frame.flags.writeable = False
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = faceMesh.process(frame)
            frame.flags.writeable = True
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            if results.multi_face_landmarks:
                for faceLandmarks in results.multi_face_landmarks:
                    drawFaceMesh(frame, faceLandmarks)
            
            cv2.imshow('Web cam face mesh', cv2.flip(frame, 1))
            if cv2.waitKey(int(1000 / frameRate)) & 0xFF == 27:  
                break
        videoCapture.release()
        cv2.destroyAllWindows()
if __name__ == "__main__":
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles
    mp_face_mesh = mp.solutions.face_mesh
    main()

Code for webcam 3D face mesh

Real-time 3D face mesh

Mapping vertices with facial features

OpenCV and Mediapipe

OpenCV

Mediapipe

3D face mesh for images

Complete code

Output

3D face mesh for videos

Complete code

Output

3D face mesh for webcam

Output

Use cases of a 3D face mesh