-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
345 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import os | ||
import zipfile | ||
import xml.etree.ElementTree as ET | ||
import tempfile | ||
|
||
def read_strings_from_file(file_path): | ||
with open(file_path, 'r', encoding='utf-8') as f: | ||
return [line.strip() for line in f.readlines()] | ||
|
||
def update_shared_strings_in_xlsx(file_path, strings): | ||
with tempfile.TemporaryDirectory() as tmpdirname: | ||
# 解压缩 .xlsx 文件 | ||
with zipfile.ZipFile(file_path, 'r') as zip_ref: | ||
zip_ref.extractall(tmpdirname) | ||
|
||
# 读取 sharedStrings.xml 文件 | ||
shared_strings_path = os.path.join(tmpdirname, 'xl', 'sharedStrings.xml') | ||
|
||
if not os.path.exists(shared_strings_path): | ||
print("sharedStrings.xml 文件不存在。") | ||
return | ||
|
||
# 解析 XML 文件 | ||
tree = ET.parse(shared_strings_path) | ||
root = tree.getroot() | ||
|
||
# 查找所有 <t> 标签 | ||
t_elements = list(root.iter('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}t')) | ||
|
||
# 确保提供的字符串数量与 <t> 标签数量匹配 | ||
if len(strings) != len(t_elements): | ||
print(f"警告: 提供的字符串数量 ({len(strings)}) 与现有 <t> 标签数量 ({len(t_elements)}) 不匹配。") | ||
|
||
# 按顺序替换 <t> 标签中的文本 | ||
for t_element, new_string in zip(t_elements, strings): | ||
t_element.text = new_string | ||
|
||
# 将修改后的 XML 写回文件 | ||
tree.write(shared_strings_path, xml_declaration=True, encoding='UTF-8') | ||
|
||
# 将修改后的文件压缩回 .xlsx | ||
new_xlsx_path = file_path.replace('.xlsx', '_translated.xlsx') | ||
|
||
# 如果文件已存在,则删除 | ||
if os.path.exists(new_xlsx_path): | ||
os.remove(new_xlsx_path) | ||
|
||
with zipfile.ZipFile(new_xlsx_path, 'w') as zip_ref: | ||
for foldername, subfolders, filenames in os.walk(tmpdirname): | ||
for filename in filenames: | ||
file_path = os.path.join(foldername, filename) | ||
arcname = os.path.relpath(file_path, tmpdirname) | ||
zip_ref.write(file_path, arcname) | ||
|
||
def compose_file(file_type, input_path): | ||
|
||
result_file_path = './out/translated_result.txt' | ||
|
||
# 读取文本文件内容 | ||
result_strings = read_strings_from_file(result_file_path) | ||
|
||
if file_type == "Excel": | ||
# 更新 Excel 文件中的 sharedStrings.xml | ||
update_shared_strings_in_xlsx(input_path, result_strings) | ||
print(f"生成翻译后的电子表格...") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
""" | ||
打开目录下text_extracted.txt | ||
该txt格式如下: | ||
hello word | ||
word 0 | ||
openai | ||
gpt | ||
(每条string一行) | ||
for 第i行,总共m行 | ||
统计词数 = 0 | ||
统计每行的词数(可以以空格区分,n个空格就是n+1个词) | ||
统计词数 += i行词数 | ||
如果第i行词数>500, | ||
截断,从1-i行生成第一个string(array) | ||
打印 | ||
继续上面操作,直至生成完所有string(array) | ||
""" | ||
import httpx | ||
import json | ||
import os | ||
output_dir = './out' | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
deeplx_api = "http://127.0.0.1:1188/translate" | ||
def count_words(line): | ||
# 统计一行中的词数,以空格分隔 | ||
return len(line.split()) | ||
|
||
def process_file(file_path, source_lang, target_lang): | ||
with open(file_path, 'r', encoding='utf-8') as file: | ||
lines = file.readlines() | ||
|
||
total_word_count = 0 | ||
strings_array = [] | ||
current_string = [] | ||
|
||
for i, line in enumerate(lines): | ||
line = line.strip() # 去掉行首尾的空白字符 | ||
line_word_count = count_words(line) | ||
total_word_count += line_word_count | ||
|
||
current_string.append(line) # 添加当前行到当前字符串中 | ||
|
||
if total_word_count > 500: | ||
# 当前字符串超过500词,截断并保存 | ||
strings_array.append('\n'.join(current_string)) | ||
# 重置统计 | ||
total_word_count = 0 | ||
current_string = [] | ||
|
||
# 添加最后一部分(如果有剩余) | ||
if current_string: | ||
strings_array.append('\n'.join(current_string)) | ||
alternative_index = 0 # 用于命名 alternatives 文件 | ||
with open('./out/translated_result.txt', 'w', encoding='utf-8') as result_file: | ||
for s in strings_array: | ||
print(f"正在处理...\n{s}") | ||
json_array = str(s) | ||
data = { | ||
"text": s, | ||
"source_lang": source_lang, | ||
"target_lang": target_lang | ||
} | ||
post_data = json.dumps(data) | ||
try: | ||
# 发送POST请求并打印结果 | ||
r = httpx.post(url=deeplx_api, data=post_data) | ||
response_data = r.json() | ||
|
||
# 保存 data 内容到 translated_result.txt | ||
result_file.write(response_data['data'] + '\n') | ||
print(f"收到数据{response_data}") | ||
|
||
# 如果存在 alternatives,保存每个替代到不同的文件 | ||
if "alternatives" in response_data and response_data["alternatives"] is not None : | ||
alternatives = response_data["alternatives"] | ||
print(alternatives) | ||
for alternative in alternatives: | ||
with open(f'./out/alternatives({alternative_index}).txt', 'w', encoding='utf-8') as alt_file: | ||
alt_file.write(alternative + '\n') | ||
alternative_index += 1 | ||
|
||
except httpx.RequestError as exc: | ||
print(f"An error occurred while requesting {exc.request.url!r}.") |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
""" | ||
Replacing Newline Characters: Within the extract_strings_from_xlsx function, | ||
any newline characters within the text of each <t> tag are replaced with a space. | ||
This ensures that each <t> tag's content is treated as a single line when writing to the output file. | ||
Handling Empty Tags: The code includes a check to ensure that if a <t> tag is empty (None), | ||
it does not cause an error. | ||
""" | ||
|
||
import os | ||
import zipfile | ||
import tempfile | ||
import xml.etree.ElementTree as ET | ||
import tkinter as tk | ||
from tkinter import filedialog | ||
|
||
tmp_dir = './tmp' | ||
if not os.path.exists(tmp_dir): | ||
os.makedirs(tmp_dir) | ||
|
||
input_path = "" | ||
|
||
def extract_strings_from_xlsx(file_path): | ||
# 创建临时目录 | ||
with tempfile.TemporaryDirectory() as tmpdirname: | ||
# 解压缩 .xlsx 文件 | ||
with zipfile.ZipFile(file_path, 'r') as zip_ref: | ||
zip_ref.extractall(tmpdirname) | ||
|
||
# 读取 sharedStrings.xml 文件 | ||
shared_strings_path = os.path.join(tmpdirname, 'xl', 'sharedStrings.xml') | ||
|
||
if not os.path.exists(shared_strings_path): | ||
print("sharedStrings.xml 文件不存在。") | ||
return [] | ||
|
||
# 解析 XML 文件 | ||
tree = ET.parse(shared_strings_path) | ||
root = tree.getroot() | ||
|
||
# 存储字符串 | ||
strings = [] | ||
|
||
# 查找所有 <t> 标签 | ||
for t in root.iter('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}t'): | ||
# 将所有换行符替换为单个空格,确保每个 <t> 标签的内容是一行 | ||
text_content = t.text.replace('\n', ' ') if t.text else '' | ||
strings.append(text_content) | ||
|
||
return strings | ||
|
||
def write_strings_to_file(strings, output_file): | ||
with open(output_file, 'w', encoding='utf-8') as f: | ||
for string in strings: | ||
f.write(f"{string}\n") | ||
|
||
|
||
def extract_file(): | ||
# 创建Tkinter窗口但不显示 | ||
root = tk.Tk() | ||
root.withdraw() # 隐藏主窗口 | ||
|
||
# 打开文件选择对话框 | ||
file_path = filedialog.askopenfilename(title='选择一个 .xlsx 文件,目前只支持了Excel', filetypes=[('Excel files', '*.xlsx')]) | ||
|
||
if not file_path: | ||
print("没有选择文件。") | ||
return | ||
global input_path | ||
input_path = file_path | ||
|
||
supported_files = [".xlsx",".docx"] | ||
file_extension = os.path.splitext(file_path)[1].lower() | ||
if file_extension not in supported_files: | ||
print("目前只支持.xlsx文件") | ||
return | ||
if file_extension == ".xlsx": | ||
global file_type | ||
file_type = "Excel" | ||
|
||
strings = extract_strings_from_xlsx(file_path) | ||
|
||
print(f"已处理{file_type}文件: {input_path}") | ||
output_file = os.path.join(tmp_dir, 'text_extracted.txt') | ||
write_strings_to_file(strings, output_file) | ||
print(f"提取的字符串已写入 {output_file}") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,12 @@ | ||
# DeeplxFile | ||
基于Deeplx提供的免费,不限制文件大小的文件翻译工具 | ||
|
||
------------------ | ||
点击[这里下载](https://github.com/infrost/DeeplxFile/releases) | ||
|
||
## 使用说明 | ||
提供了编译好的exe版本,你也可以下载源代码, | ||
然后运行 | ||
```bash | ||
python deeplxfile.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import threading | ||
import subprocess | ||
import sys | ||
import os | ||
import time | ||
from Lib import compose, data_process, extract | ||
|
||
# 定义线程任务,执行 deeplx_windows_amd64.exe | ||
def run_deeplx(): | ||
exe_path = os.path.join("Lib", "deeplx_windows_amd64.exe") | ||
|
||
try: | ||
# 使用 subprocess 调用可执行文件 | ||
subprocess.run([exe_path], check=True) | ||
except subprocess.CalledProcessError as e: | ||
print(f"Error running deeplx_windows_amd64.exe: {e}") | ||
except FileNotFoundError: | ||
print(f"The file {exe_path} was not found.") | ||
|
||
def loop(): | ||
# 定义语言列表 | ||
languages = [ | ||
("中文", "ZH"), | ||
("英文", "EN"), | ||
("日文", "JA"), | ||
("韩文", "KO"), | ||
("法文", "FR"), | ||
("德文", "DE"), | ||
("俄文", "RU"), | ||
("西班牙文", "ES"), | ||
("意大利文", "IT"), | ||
("葡萄牙文", "PT"), | ||
] | ||
|
||
# 显示语言选项 | ||
print("请输入源文件语言(数字序号,按回车结束):") | ||
for i, (lang_name, _) in enumerate(languages, start=1): | ||
print(f" {i}. {lang_name}") | ||
|
||
# 接收用户输入 | ||
source_lang_num = int(input()) | ||
|
||
# 检查输入是否有效 | ||
if 1 <= source_lang_num <= len(languages): | ||
source_lang = languages[source_lang_num - 1][1] | ||
print(f"你选择的语言代码是: {source_lang}") | ||
else: | ||
print("输入无效,请输入有效的数字序号。") | ||
|
||
# 选择目标语言 | ||
print("\n请输入目标文件语言(数字序号):") | ||
for i, (lang_name, _) in enumerate(languages, start=1): | ||
print(f" {i}. {lang_name}") | ||
|
||
target_lang_num = int(input()) | ||
|
||
if 1 <= target_lang_num <= len(languages): | ||
target_lang = languages[target_lang_num - 1][1] | ||
print(f"你选择的目标语言代码是: {target_lang}") | ||
else: | ||
print("输入无效,请输入有效的数字序号。") | ||
|
||
print("等待选择文件...") | ||
# 创建解压线程 | ||
extract_thread = threading.Thread(target=extract.extract_file()) | ||
extract_thread.start() | ||
extract_thread.join() | ||
|
||
print("完成解压") | ||
data_process.process_file('./tmp/text_extracted.txt', source_lang, target_lang) | ||
print(f"完成翻译,正在回写{extract.input_path}") | ||
|
||
#生成翻译文件 | ||
compose.compose_file(extract.file_type, extract.input_path) | ||
|
||
print("更新完成!已在输入的相同目录下生成文件") | ||
print("DeeplxFile by Kevin, 项目地址https://github.com/infrost/deeplxfile, Version V0.1.0") | ||
input("Enter键继续翻译...") | ||
loop() | ||
|
||
|
||
def main(): | ||
# 创建并启动线程 | ||
deeplx_thread = threading.Thread(target=run_deeplx) | ||
deeplx_thread.start() | ||
print("正在启动deeplx引擎") | ||
# 给 deeplx 一些时间来启动 | ||
time.sleep(1) | ||
loop() | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
main() |