-
Notifications
You must be signed in to change notification settings - Fork 2
/
translate_imgs_folder.py
90 lines (79 loc) · 2.93 KB
/
translate_imgs_folder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
调用方法
python translate_imgs_folder.py "pdfs\Bubeck 等 - 2023 - Sparks of Artificial General Intelligence Early e\imgs" "pdfs\Bubeck 等 - 2023 - Sparks of Artificial General Intelligence Early e\imgs_zh"
"""
import re
import os
import sys
import time
import subprocess
# 定义输入和输出文件夹
imgs_folder = "imgs/"
imgs_zh_folder = "imgs_zh/"
# sys.argv[1]和sys.argv[2]分别是输入和输出文件夹,必须存在这两个参数,否则报错
if len(sys.argv) == 3:
imgs_folder = sys.argv[1]
imgs_zh_folder = sys.argv[2]
# 转换为绝对路径
imgs_folder = os.path.abspath(imgs_folder)
imgs_zh_folder = os.path.abspath(imgs_zh_folder)
else:
sys.exit("请指定输入和输出文件夹")
# 确保输出文件夹存在
if not os.path.exists(imgs_zh_folder):
os.makedirs(imgs_zh_folder)
# 获取输入文件夹中的所有文件
input_files = sorted(os.listdir(imgs_folder))
# 提取页码的正则表达式
page_number_regex = re.compile(r"_page_(\d+)\.png")
# 按照页码排序
def extract_page_number(file_name):
match = page_number_regex.search(file_name)
return int(match.group(1)) if match else 0
input_files.sort(key=extract_page_number)
loop_count = 1
i = 0
while i < len(input_files):
input_file = input_files[i]
i += 1
print(f"正在处理 {input_file} ...")
input_file_path = os.path.join(imgs_folder, input_file)
# output_file_path = os.path.join(imgs_zh_folder, input_file.split("_page_")[1].replace(".png", ".jpg"))
output_file_path = os.path.join(imgs_zh_folder, input_file.replace(".png", ".jpg"))
print("\ninput_file_path: ", input_file_path)
print("\noutput_file_path: ", output_file_path)
# 跳过非图片文件
if not input_file.lower().endswith(('.png', '.jpg', '.jpeg')):
continue
# 如果 output_file_path 存在,跳过
if os.path.exists(output_file_path):
continue
# 调用 wechat_translate_a_img.py 脚本
command = f'python wechat_translate_a_img.py \"{input_file_path}\" \"{output_file_path}\"'
# process = subprocess.Popen(command, shell=True)
os.system(command)
# 等待输出文件存在
elapsed_time = 0
while not os.path.exists(output_file_path):
print("等待输出文件存在...")
wait_time = 3
time.sleep(wait_time) # 每隔0.1秒检查一次
elapsed_time += 1
if elapsed_time > 20 / wait_time:
print("等待时间超过10秒,重新处理该文件")
# process.terminate()
break
if elapsed_time > 3:
i -= 1
continue
# if i == len(input_files):
# i = 0
# loop_count += 1
# if loop_count > 2:
# break
# # 检查输出文件是否存在
# if os.path.exists(output_file_path):
# print(f"{input_file} 转换成功")
# process.terminate() # 确保子进程已经结束
# else:
# print(f"{input_file} 转换失败")