Spaces:
Runtime error
Runtime error
import mediapipe as mp | |
from mediapipe.tasks import python | |
from mediapipe.tasks.python import vision | |
import cv2 | |
import numpy as np | |
import os | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
import os | |
import cv2 | |
# borrowed from https://github.com/daniilidis-group/neural_renderer/blob/master/neural_renderer/vertices_to_faces.py | |
def face_vertices(vertices, faces): | |
""" | |
:param vertices: [batch size, number of vertices, 3] | |
:param faces: [batch size, number of faces, 3] | |
:return: [batch size, number of faces, 3, 3] | |
""" | |
assert (vertices.ndimension() == 3) | |
assert (faces.ndimension() == 3) | |
assert (vertices.shape[0] == faces.shape[0]) | |
assert (vertices.shape[2] == 3) | |
assert (faces.shape[2] == 3) | |
bs, nv = vertices.shape[:2] | |
bs, nf = faces.shape[:2] | |
device = vertices.device | |
faces = faces + (torch.arange(bs, dtype=torch.int32).to(device) * nv)[:, None, None] | |
vertices = vertices.reshape((bs * nv, 3)) | |
# pytorch only supports long and byte tensors for indexing | |
return vertices[faces.long()] | |
def vertex_normals(vertices, faces): | |
""" | |
:param vertices: [batch size, number of vertices, 3] | |
:param faces: [batch size, number of faces, 3] | |
:return: [batch size, number of vertices, 3] | |
""" | |
assert (vertices.ndimension() == 3) | |
assert (faces.ndimension() == 3) | |
assert (vertices.shape[0] == faces.shape[0]) | |
assert (vertices.shape[2] == 3) | |
assert (faces.shape[2] == 3) | |
bs, nv = vertices.shape[:2] | |
bs, nf = faces.shape[:2] | |
device = vertices.device | |
normals = torch.zeros(bs * nv, 3).to(device) | |
faces = faces + (torch.arange(bs, dtype=torch.int32).to(device) * nv)[:, None, None] # expanded faces | |
vertices_faces = vertices.reshape((bs * nv, 3))[faces.long()] | |
faces = faces.reshape(-1, 3) | |
vertices_faces = vertices_faces.reshape(-1, 3, 3) | |
normals.index_add_(0, faces[:, 1].long(), | |
torch.cross(vertices_faces[:, 2] - vertices_faces[:, 1], vertices_faces[:, 0] - vertices_faces[:, 1])) | |
normals.index_add_(0, faces[:, 2].long(), | |
torch.cross(vertices_faces[:, 0] - vertices_faces[:, 2], vertices_faces[:, 1] - vertices_faces[:, 2])) | |
normals.index_add_(0, faces[:, 0].long(), | |
torch.cross(vertices_faces[:, 1] - vertices_faces[:, 0], vertices_faces[:, 2] - vertices_faces[:, 0])) | |
normals = F.normalize(normals, eps=1e-6, dim=1) | |
normals = normals.reshape((bs, nv, 3)) | |
# pytorch only supports long and byte tensors for indexing | |
return normals | |
def batch_orth_proj(X, camera): | |
''' orthgraphic projection | |
X: 3d vertices, [bz, n_point, 3] | |
camera: scale and translation, [bz, 3], [scale, tx, ty] | |
''' | |
#print('--------') | |
#print(camera[0, 1:].abs()) | |
#print(X[0].abs().mean(0)) | |
camera = camera.clone().view(-1, 1, 3) | |
X_trans = X[:, :, :2] + camera[:, :, 1:] | |
#print(X_trans[0].abs().mean(0)) | |
X_trans = torch.cat([X_trans, X[:,:,2:]], 2) | |
Xn = (camera[:, :, 0:1] * X_trans) | |
return Xn | |
class MP_2_FLAME(): | |
""" | |
Convert Mediapipe 52 blendshape scores to FLAME's coefficients | |
""" | |
def __init__(self, mappings_path): | |
self.bs2exp = np.load(os.path.join(mappings_path, 'bs2exp.npy')) | |
self.bs2pose = np.load(os.path.join(mappings_path, 'bs2pose.npy')) | |
self.bs2eye = np.load(os.path.join(mappings_path, 'bs2eye.npy')) | |
def convert(self, blendshape_scores : np.array): | |
# blendshape_scores: [N, 52] | |
# Calculate expression, pose, and eye_pose using the mappings | |
exp = blendshape_scores @ self.bs2exp | |
pose = blendshape_scores @ self.bs2pose | |
pose[0, :3] = 0 # we do not support head rotation yet | |
eye_pose = blendshape_scores @ self.bs2eye | |
return exp, pose, eye_pose | |
class MediaPipeUtils: | |
def __init__(self, model_asset_path='pretrained_models/mediapipe/face_landmarker.task', mappings_path='pretrained_models/mediapipe/'): | |
base_options = python.BaseOptions(model_asset_path=model_asset_path) | |
options = vision.FaceLandmarkerOptions(base_options=base_options, | |
output_face_blendshapes=True, | |
output_facial_transformation_matrixes=True, | |
num_faces=1, | |
min_face_detection_confidence=0.1, | |
min_face_presence_confidence=0.1) | |
self.detector = vision.FaceLandmarker.create_from_options(options) | |
self.mp2flame = MP_2_FLAME(mappings_path=mappings_path) | |
def run_mediapipe(self, image): | |
image_numpy = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_numpy) | |
detection_result = self.detector.detect(image) | |
if len(detection_result.face_landmarks) == 0: | |
print('No face detected') | |
return None | |
blend_scores = detection_result.face_blendshapes[0] | |
blend_scores = np.array(list(map(lambda l: l.score, blend_scores)), dtype=np.float32).reshape(1, 52) | |
exp, pose, eye_pose = self.mp2flame.convert(blendshape_scores=blend_scores) | |
face_landmarks = detection_result.face_landmarks[0] | |
face_landmarks_numpy = np.zeros((478, 3)) | |
for i, landmark in enumerate(face_landmarks): | |
face_landmarks_numpy[i] = [landmark.x * image.width, landmark.y * image.height, landmark.z] | |
return face_landmarks_numpy, exp, pose, eye_pose | |