forked from CKRainbow/liliths-throne-localization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
update.py
189 lines (160 loc) · 7.06 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
from pathlib import Path
from typing import List, Dict
import json
import shutil
import asyncio
import re
from const import OUTDATE_DIR_NAME
from logger import logger
def update_dict(
old_dict_path: Path, new_dict_path: Path, ignore_untranslated: bool = False
):
loop = asyncio.get_event_loop()
new_outdated_dir = Path(new_dict_path) / OUTDATE_DIR_NAME
old_outdated_dir = Path(old_dict_path) / OUTDATE_DIR_NAME
# 迁移旧版本过时词条
if old_outdated_dir.exists():
shutil.move(old_outdated_dir, new_outdated_dir)
# 获取所有json文件
old_dict_files: List[Path] = list(old_dict_path.glob("**/*.json"))
file_pairs = [
(
old_dict_file,
new_dict_path / old_dict_file.relative_to(old_dict_path),
new_outdated_dir / old_dict_file.relative_to(old_dict_path),
)
for old_dict_file in old_dict_files
]
tasks = [
update_dict_file(
old_dict_file, new_dict_file, outdated_file, ignore_untranslated
)
for old_dict_file, new_dict_file, outdated_file in file_pairs
]
loop.run_until_complete(asyncio.gather(*tasks))
async def update_dict_file(
old_dict_file: Path,
new_dict_file: Path,
outdated_file: Path,
ignore_untranslated: bool = False,
):
with open(old_dict_file, "r", encoding="utf-8") as old_dict:
old_dict_data: List[Dict] = json.load(old_dict)
no_file = False
# 若在新提取中该文件已不存在
if not new_dict_file.exists():
logger.info("在新提取中该文件已不存在:%s", old_dict_file)
outdated_data = old_dict_data
no_file = True
else:
with open(new_dict_file, "r", encoding="utf-8") as new_dict:
new_dict_data: List[Dict] = json.load(new_dict)
old_dict_data = await update_data(old_dict_data, new_dict_data)
if ignore_untranslated:
# result_dict_data = list(filter(lambda entry: entry["stage"] != 0, new_dict_data))
result_dict_data = new_dict_data
else:
result_dict_data = new_dict_data
with open(new_dict_file, "w", encoding="utf-8") as new_dict:
json.dump(result_dict_data, new_dict, indent=4, ensure_ascii=False)
outdated_data = list(filter(lambda entry: entry is not None, old_dict_data))
if len(outdated_data) > 0:
logger.info("在新提取中该文件存在遗失条目:%s", old_dict_file)
print([entry["key"] for entry in outdated_data])
# 过时条目融合
if outdated_file.exists():
with open(outdated_file, "r", encoding="utf-8") as f:
prev_outdated_data = json.load(f)
else:
prev_outdated_data = []
await update_data(outdated_data, prev_outdated_data, version="0.4.8.9")
if len(prev_outdated_data) <= 0:
if no_file:
logger.warning(" - 文件不再包含任何条目:%s", outdated_file)
return
if not outdated_file.parent.exists():
outdated_file.parent.mkdir(parents=True)
with open(outdated_file, "w", encoding="utf-8") as f:
json.dump(prev_outdated_data, f, ensure_ascii=False, indent=4)
async def update_data(
old_dict_data: List[Dict[str, str]],
new_dict_data: List[Dict[str, str]],
version: str = "",
) -> List[Dict[str, str]]:
new_dict_map: Dict[str, List[int]] = {} # [原文文本, new_dict_data列表中对应序号]
old_dict_map: Dict[str, List[int]] = {} # [原文文本, old_dict_data列表中对应序号]
for idx, data in enumerate(new_dict_data):
original = data["original"]
if not new_dict_map.get(original):
new_dict_map[original] = [idx]
else:
new_dict_map[original].append(idx)
for idx, data in enumerate(old_dict_data):
if data["stage"] == 0: # 旧字典无汉化
old_dict_data[idx] = None
continue
original = data["original"]
# 是否为xml文件
if not data["key"][0].isdigit():
original = original.replace("\\n", "\n")
if not old_dict_map.get(original):
old_dict_map[original] = [idx]
else:
old_dict_map[original].append(idx)
for key, value in old_dict_map.items():
new_idx_list = new_dict_map.get(key)
if new_idx_list is None:
continue
if version != "":
for idx, old_idx in enumerate(value):
# 若旧字典的汉化与原文一致(即无需汉化)则无视
if (
old_dict_data[old_idx]["original"]
== old_dict_data[old_idx]["translation"]
):
continue
if new_idx_list is None or len(new_idx_list) == 0:
new_dict_data.append(old_dict_data[old_idx])
new_dict_data[-1]["key"] += f"_{version}"
continue
new_dict_data[new_idx_list[idx]]["translation"] = old_dict_data[
old_idx
]["translation"]
new_dict_data[new_idx_list[idx]]["stage"] = old_dict_data[old_idx][
"stage"
]
if "." in new_dict_data[new_idx_list[idx]]["key"].split("_")[-1]:
new_dict_data[new_idx_list[idx]]["key"] = "_".join(
new_dict_data[new_idx_list[idx]]["key"].split("_")[:-1]
+ [f"_{version}"]
)
else:
new_dict_data[new_idx_list[idx]]["key"] += f"_{version}"
else:
for idx, old_idx in enumerate(value[: min(len(value), len(new_idx_list))]):
# 保留汉化内容及当前阶段
translation = old_dict_data[old_idx]["translation"]
# 引号使用中文双引号,括号使用半角括号
zh_character = r"[一-龟]"
if not (
"effects" in old_dict_data[old_idx]["key"]
or "preParsingEffects" in old_dict_data[old_idx]["key"]
):
translation = re.sub(rf"'({zh_character}+?)'", r"“\1”", translation)
translation = re.sub(r"(", "(", translation)
translation = re.sub(r")", ")", translation)
translation = re.sub("\t ", "\t", translation)
# 中文与markup代码之间
translation = re.sub(rf"\] ({zh_character})", r"]\1", translation)
translation = re.sub(rf"({zh_character}) \[", r"\1[", translation)
translation = re.sub(r"\] \[", r"][", translation)
# <>左右
translation = re.sub(r" <(i|b)", r"<\1", translation)
translation = re.sub(r"(i|b)> ", r"\1>", translation)
new_dict_data[new_idx_list[idx]]["translation"] = translation
new_dict_data[new_idx_list[idx]]["stage"] = old_dict_data[old_idx][
"stage"
]
# 移除被迁移的旧词条
old_dict_data[old_idx] = None
return old_dict_data