step1_recon_3d_face.py

import argparse
import tensorflow as tf 
import numpy as np
import cv2
from PIL import Image
import os
from tqdm import tqdm
from scipy.io import loadmat,savemat
from utils.preprocess import POS, headrecon_preprocess_withmask, facerecon_preprocess_yu_5p, facerecon_preprocess
from utils.loader import load_data, load_lm3d, load_center3d, read_facemodel
from utils.recon_face import  compute_center2d, compute_faceshape
from utils.create_renderer import create_renderer_graph
from PIL import Image


def load_facerecon_graph(graph_filename):
    with tf.gfile.GFile(graph_filename,'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    with tf.Graph().as_default() as graph:
        input = tf.placeholder(name='input_imgs', shape=[
            None, 224, 224, 3], dtype=tf.float32)
        tf.import_graph_def(graph_def, name='resnet', input_map={
                            'input_imgs:0': input})
        output = graph.get_tensor_by_name('resnet/coeff:0')
    return graph, input, output

def face_recon(input_path, output_path, vis_path=None, s_factor=1.5, focal=1015, center=112, align_nums=10):
    print(f'[INFO] [Step1] Read images from {input_path}')
    # load BFM
    facemodel = read_facemodel()
    # read standard landmarks for face recon preprocessing
    lm3D = load_lm3d(align_nums)
    # read head center for depth recon preprocessing
    head_center3d = load_center3d()
    
    # create face recon graph
    face_recon_graph, images, coef = load_facerecon_graph('model/model_mask3_white_light.pb')
    face_recon_sess = tf.Session(graph=face_recon_graph)

    # create renderer graph
    depth_render_graph, input_focal, input_center, input_depth, \
        input_vertex, input_tri, output_depthmap = create_renderer_graph()
    render_sess = tf.Session(graph=depth_render_graph)

    imgs_path = [os.path.join(input_path, i) for i in os.listdir(input_path) 
        if i.endswith('png') or i.endswith('jpg') or i.endswith('jpeg')]
    imgs_path = tqdm(imgs_path)
    for i, name in enumerate(imgs_path):
        # print(i, name.split(os.path.sep)[-1].split('.')[0])
        mask_mat_path = os.path.join(input_path, name.split(os.path.sep)[-1].split('.')[0] + '.mat')
        if not os.path.exists(mask_mat_path): continue
        mask = loadmat(mask_mat_path)['mask']
        ## load images and corresponding 5 facial landmarks
        if align_nums == 5:
            img, lm = load_data(name, 
                os.path.join(input_path, name.split(os.path.sep)[-1].split('.')[0] + '_detection.txt'))

            lm = lm[-10:].reshape([5, 2])
            input_img, inv_params = facerecon_preprocess_yu_5p(img, lm, lm3D)
        elif align_nums == 10:
            landmark_path = os.path.join(input_path, name.split(os.path.sep)[-1].split('.')[0] + '_landmark.txt')
            if not os.path.exists(landmark_path) : continue
            img, lm = load_data(name, landmark_path)

            lm = lm.reshape([68, 2])
            input_img, inv_params = facerecon_preprocess(img, lm, lm3D)
        
        # recon face
        coeff = face_recon_sess.run(coef, feed_dict={images: np.expand_dims(input_img, 0)})[..., :-1]
        # preprocess input image for depth recon net
        # reproject the reconstructed face to raw image with adjusted focal and center
        f = focal * inv_params[0]
        p_center = inv_params[0] * center + inv_params[1]
        face_shape, face_projection, landmarks_2d = compute_faceshape(coeff, facemodel, inv_params)
        
        # crop the raw image with head center as the image center
        center2d, displacement = compute_center2d(head_center3d, coeff, facemodel, f, p_center)
        _, s =  POS(face_projection.transpose(), facemodel.meanshape.reshape([-1, 3]).transpose())
        crop_img, crop_mask, inv_params_, crop_lm, crop_param = headrecon_preprocess_withmask(img, mask, landmarks_2d, center2d.reshape([2]), s*s_factor/100)
        
        # save processed data
        data = np.zeros([3 + 257 + 136])
        data[0] = f / inv_params_[0]                                            
        data[1: 3] = (p_center - inv_params_[1].reshape([2]))/inv_params_[0]     
        data[3: 260] = coeff.reshape([257])
        data[257: 260] = data[257: 260] - displacement.reshape([3])
        data[260:] = crop_lm.reshape([136])
        face_projection_cropped, _ = compute_center2d(np.expand_dims(face_shape, 0),
            np.expand_dims(data[3:260], 0), facemodel, data[0], data[1:3], displace_flag=False, apply_pose=False)

        # render face depth
        d = 10 - face_shape[:, 2:]
        d = np.tile(np.expand_dims(d, 0), [1, 1, 3])   
        d_map = render_sess.run(output_depthmap, feed_dict={
            input_focal: data[0].reshape([1]),
            input_center: data[1: 3].reshape([1, 1, 2]),
            input_depth: d,
            input_vertex: np.expand_dims(face_shape, 0),
            input_tri: np.expand_dims(facemodel.tri, 0) - 1 # start from 0
        })

        if vis_path:
            cv2.imwrite(os.path.join(vis_path, name.split(os.path.sep)[-1].split('.')[0]+ '.png'),
            crop_img.astype(np.uint8))
            cv2.imwrite(os.path.join(vis_path, name.split(os.path.sep)[-1].split('.')[0]+ '_dmap.png'), d_map[0] * 255)
        savemat(os.path.join(output_path, name.split(os.path.sep)[-1].split('.')[0]+ '.mat'), 
            {'img': crop_img.astype(np.uint8),
             'mask': crop_mask.astype(np.uint8),
             'crop_param': crop_param.astype(np.float32),
             'face3d': data.astype(np.float32), 
             # 0: focal; [1, 3) center; [3,260): face coeff; [260~396): landmark  
             'face_shape':face_shape.astype(np.float32),
             'face_projection': face_projection_cropped.squeeze(0).astype(np.float32),
             'face_depthmap': d_map[..., 0].squeeze(0), 
             'face_mask': d_map[..., -1].squeeze(0),
             'face_tri': facemodel.tri}, do_compression=True)

    face_recon_sess.close()
    render_sess.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--root_dir', default='.')
    parser.add_argument('--input_path', default='examples')
    parser.add_argument('--save_path', default='output/step1')
    parser.add_argument('--vis_path', default=None)  # e.g. 'output/step1/vis
    # prepare directory
    args = parser.parse_args()
    input_path = os.path.join(args.root_dir, args.input_path)
    save_path = os.path.join(args.root_dir, args.save_path)
    vis_path = os.path.join(args.vis_path, args.vis_path) if args.vis_path else None
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    if vis_path and not os.path.isdir(vis_path):
        os.makedirs(vis_path)
    # recon 3d face and prepare the input to depth recon
    face_recon(input_path, save_path, vis_path)