forked from Zz-ww/SadTalker-Video-Lip-Sync
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathapp.py
152 lines (133 loc) · 7.16 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import gradio as gr
import torch
from time import strftime
import os, sys
from argparse import Namespace
from src.utils.preprocess import CropAndExtract
from src.test_audio2coeff import Audio2Coeff
from src.facerender.animate import AnimateFromCoeff
from src.generate_batch import get_data
from src.generate_facerender_batch import get_facerender_data
from third_part.GFPGAN.gfpgan import GFPGANer
from third_part.GPEN.gpen_face_enhancer import FaceEnhancement
import warnings
import argparse
from argparse import ArgumentParser
warnings.filterwarnings("ignore")
def process_video(source_video, driven_audio, checkpoint_dir, result_dir, batch_size, enhancer, cpu, use_DAIN, DAIN_weight, dian_output, time_step, remove_duplicates):
args = Namespace(
driven_audio=driven_audio,
source_video=source_video,
checkpoint_dir=checkpoint_dir,
result_dir=result_dir,
batch_size=batch_size,
enhancer=enhancer,
cpu=cpu,
use_DAIN=use_DAIN,
DAIN_weight=DAIN_weight,
dian_output=dian_output,
time_step=time_step,
remove_duplicates=remove_duplicates
)
if torch.cuda.is_available() and not args.cpu:
args.device = "cuda"
else:
args.device = "cpu"
main(args)
return os.path.join(args.result_dir, strftime("%Y_%m_%d_%H.%M.%S"))
def main(args):
pic_path = args.source_video
audio_path = args.driven_audio
enhancer_region = args.enhancer
save_dir = os.path.join(args.result_dir, strftime("%Y_%m_%d_%H.%M.%S"))
os.makedirs(save_dir, exist_ok=True)
device = args.device
batch_size = args.batch_size
current_code_path = sys.argv[0]
current_root_path = os.path.split(current_code_path)[0]
os.environ['TORCH_HOME'] = os.path.join(current_root_path, args.checkpoint_dir)
path_of_lm_croper = os.path.join(current_root_path, args.checkpoint_dir, 'shape_predictor_68_face_landmarks.dat')
path_of_net_recon_model = os.path.join(current_root_path, args.checkpoint_dir, 'epoch_20.pth')
dir_of_BFM_fitting = os.path.join(current_root_path, args.checkpoint_dir, 'BFM_Fitting')
wav2lip_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'wav2lip_gan.pth')
audio2pose_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2pose_00140-model.pth')
audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
audio2exp_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2exp_00300-model.pth')
audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
free_view_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'facevid2vid_00189-model.pth.tar')
mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00109-model.pth.tar')
facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender_still.yaml')
print(path_of_net_recon_model)
preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device)
print(audio2pose_checkpoint)
print(audio2exp_checkpoint)
audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path, audio2exp_checkpoint, audio2exp_yaml_path,
wav2lip_checkpoint, device)
print(free_view_checkpoint)
print(mapping_checkpoint)
animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint, facerender_yaml_path, device)
restorer_model = GFPGANer(model_path='checkpoints/GFPGANv1.4.pth', upscale=1, arch='clean',
channel_multiplier=2, bg_upsampler=None)
enhancer_model = FaceEnhancement(base_dir='checkpoints', size=512, model='GPEN-BFR-512', use_sr=False,
sr_model='rrdb_realesrnet_psnr', channel_multiplier=2, narrow=1, device=device)
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
os.makedirs(first_frame_dir, exist_ok=True)
print('3DMM Extraction for source image')
first_coeff_path, crop_pic_path, crop_info = preprocess_model.generate(pic_path, first_frame_dir)
if first_coeff_path is None:
print("Can't get the coeffs of the input")
return
batch = get_data(first_coeff_path, audio_path, device)
coeff_path = audio_to_coeff.generate(batch, save_dir)
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, device)
tmp_path, new_audio_path, return_path = animate_from_coeff.generate(data, save_dir, pic_path, crop_info,
restorer_model, enhancer_model, enhancer_region)
torch.cuda.empty_cache()
if args.use_DAIN:
import paddle
from src.dain_model import dain_predictor
paddle.enable_static()
predictor_dian = dain_predictor.DAINPredictor(args.dian_output, weight_path=args.DAIN_weight,
time_step=args.time_step,
remove_duplicates=args.remove_duplicates)
frames_path, temp_video_path = predictor_dian.run(tmp_path)
paddle.disable_static()
save_path = return_path[:-4] + '_dain.mp4'
command = r'ffmpeg -y -i "%s" -i "%s" -vcodec copy "%s"' % (temp_video_path, new_audio_path, save_path)
os.system(command)
os.remove(tmp_path)
iface = gr.Interface(
fn=process_video,
inputs=[
gr.Video(label="Исходное видео", type="mp4"),
gr.Audio(label="Исходное аудио", type="filepath"),
gr.Textbox(value='./checkpoints', label="Папка чекпоинтов"),
gr.Textbox(value='./results', label="Папка результатов"),
gr.Number(value=1, label="Batch Size"),
gr.Radio(['none', 'lip', 'face'], label="Выбор использования энхансера", value='lip'),
gr.Checkbox(label="Использовать лишь процессор"),
gr.Checkbox(label="Использовать DAIN"),
gr.Textbox(value='./checkpoints/DAIN_weight', label="DAIN Weight Path"),
gr.Textbox(value='dian_output', label="Директория вывода DAIN"),
gr.Number(value=0.5, label="Количество шагов DAIN"),
gr.Checkbox(label="Убрать повторки")
],
outputs=[
gr.Textbox(label="Папка результатов")
],
title="Обработка видео и аудио",
description="Обработка видео с управляемым звуком с использованием заданных чекпоинтов и настроек модели."
)
def share():
parser = ArgumentParser()
parser.add_argument("--share", type=str, default='yes',
help="создать публичную ссылку")
args = parser.parse_args()
if args.share.lower() == 'yes':
print("Публичная ссылка будет создана")
iface.launch(share=True)
else:
print("Публичная ссылка не будет создана")
iface.launch()
if __name__ == "__main__":
share()