Skip to content

Commit

Permalink
first releases
Browse files Browse the repository at this point in the history
  • Loading branch information
infrost committed Aug 15, 2024
1 parent 957a86f commit 92cd3b1
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 0 deletions.
Empty file added Lib/__init__.py
Empty file.
Binary file added Lib/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file added Lib/__pycache__/compose.cpython-312.pyc
Binary file not shown.
Binary file added Lib/__pycache__/data_process.cpython-312.pyc
Binary file not shown.
Binary file added Lib/__pycache__/extract.cpython-312.pyc
Binary file not shown.
66 changes: 66 additions & 0 deletions Lib/compose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import zipfile
import xml.etree.ElementTree as ET
import tempfile

def read_strings_from_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
return [line.strip() for line in f.readlines()]

def update_shared_strings_in_xlsx(file_path, strings):
with tempfile.TemporaryDirectory() as tmpdirname:
# 解压缩 .xlsx 文件
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(tmpdirname)

# 读取 sharedStrings.xml 文件
shared_strings_path = os.path.join(tmpdirname, 'xl', 'sharedStrings.xml')

if not os.path.exists(shared_strings_path):
print("sharedStrings.xml 文件不存在。")
return

# 解析 XML 文件
tree = ET.parse(shared_strings_path)
root = tree.getroot()

# 查找所有 <t> 标签
t_elements = list(root.iter('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}t'))

# 确保提供的字符串数量与 <t> 标签数量匹配
if len(strings) != len(t_elements):
print(f"警告: 提供的字符串数量 ({len(strings)}) 与现有 <t> 标签数量 ({len(t_elements)}) 不匹配。")

# 按顺序替换 <t> 标签中的文本
for t_element, new_string in zip(t_elements, strings):
t_element.text = new_string

# 将修改后的 XML 写回文件
tree.write(shared_strings_path, xml_declaration=True, encoding='UTF-8')

# 将修改后的文件压缩回 .xlsx
new_xlsx_path = file_path.replace('.xlsx', '_translated.xlsx')

# 如果文件已存在,则删除
if os.path.exists(new_xlsx_path):
os.remove(new_xlsx_path)

with zipfile.ZipFile(new_xlsx_path, 'w') as zip_ref:
for foldername, subfolders, filenames in os.walk(tmpdirname):
for filename in filenames:
file_path = os.path.join(foldername, filename)
arcname = os.path.relpath(file_path, tmpdirname)
zip_ref.write(file_path, arcname)

def compose_file(file_type, input_path):

result_file_path = './out/translated_result.txt'

# 读取文本文件内容
result_strings = read_strings_from_file(result_file_path)

if file_type == "Excel":
# 更新 Excel 文件中的 sharedStrings.xml
update_shared_strings_in_xlsx(input_path, result_strings)
print(f"生成翻译后的电子表格...")

87 changes: 87 additions & 0 deletions Lib/data_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
打开目录下text_extracted.txt
该txt格式如下:
hello word
word 0
openai
gpt
(每条string一行)
for 第i行,总共m行
统计词数 = 0
统计每行的词数(可以以空格区分,n个空格就是n+1个词)
统计词数 += i行词数
如果第i行词数>500,
截断,从1-i行生成第一个string(array)
打印
继续上面操作,直至生成完所有string(array)
"""
import httpx
import json
import os
output_dir = './out'
os.makedirs(output_dir, exist_ok=True)

deeplx_api = "http://127.0.0.1:1188/translate"
def count_words(line):
# 统计一行中的词数,以空格分隔
return len(line.split())

def process_file(file_path, source_lang, target_lang):
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()

total_word_count = 0
strings_array = []
current_string = []

for i, line in enumerate(lines):
line = line.strip() # 去掉行首尾的空白字符
line_word_count = count_words(line)
total_word_count += line_word_count

current_string.append(line) # 添加当前行到当前字符串中

if total_word_count > 500:
# 当前字符串超过500词,截断并保存
strings_array.append('\n'.join(current_string))
# 重置统计
total_word_count = 0
current_string = []

# 添加最后一部分(如果有剩余)
if current_string:
strings_array.append('\n'.join(current_string))
alternative_index = 0 # 用于命名 alternatives 文件
with open('./out/translated_result.txt', 'w', encoding='utf-8') as result_file:
for s in strings_array:
print(f"正在处理...\n{s}")
json_array = str(s)
data = {
"text": s,
"source_lang": source_lang,
"target_lang": target_lang
}
post_data = json.dumps(data)
try:
# 发送POST请求并打印结果
r = httpx.post(url=deeplx_api, data=post_data)
response_data = r.json()

# 保存 data 内容到 translated_result.txt
result_file.write(response_data['data'] + '\n')
print(f"收到数据{response_data}")

# 如果存在 alternatives,保存每个替代到不同的文件
if "alternatives" in response_data and response_data["alternatives"] is not None :
alternatives = response_data["alternatives"]
print(alternatives)
for alternative in alternatives:
with open(f'./out/alternatives({alternative_index}).txt', 'w', encoding='utf-8') as alt_file:
alt_file.write(alternative + '\n')
alternative_index += 1

except httpx.RequestError as exc:
print(f"An error occurred while requesting {exc.request.url!r}.")
Binary file added Lib/deeplx_windows_amd64.exe
Binary file not shown.
88 changes: 88 additions & 0 deletions Lib/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Replacing Newline Characters: Within the extract_strings_from_xlsx function,
any newline characters within the text of each <t> tag are replaced with a space.
This ensures that each <t> tag's content is treated as a single line when writing to the output file.
Handling Empty Tags: The code includes a check to ensure that if a <t> tag is empty (None),
it does not cause an error.
"""

import os
import zipfile
import tempfile
import xml.etree.ElementTree as ET
import tkinter as tk
from tkinter import filedialog

tmp_dir = './tmp'
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)

input_path = ""

def extract_strings_from_xlsx(file_path):
# 创建临时目录
with tempfile.TemporaryDirectory() as tmpdirname:
# 解压缩 .xlsx 文件
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(tmpdirname)

# 读取 sharedStrings.xml 文件
shared_strings_path = os.path.join(tmpdirname, 'xl', 'sharedStrings.xml')

if not os.path.exists(shared_strings_path):
print("sharedStrings.xml 文件不存在。")
return []

# 解析 XML 文件
tree = ET.parse(shared_strings_path)
root = tree.getroot()

# 存储字符串
strings = []

# 查找所有 <t> 标签
for t in root.iter('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}t'):
# 将所有换行符替换为单个空格,确保每个 <t> 标签的内容是一行
text_content = t.text.replace('\n', ' ') if t.text else ''
strings.append(text_content)

return strings

def write_strings_to_file(strings, output_file):
with open(output_file, 'w', encoding='utf-8') as f:
for string in strings:
f.write(f"{string}\n")


def extract_file():
# 创建Tkinter窗口但不显示
root = tk.Tk()
root.withdraw() # 隐藏主窗口

# 打开文件选择对话框
file_path = filedialog.askopenfilename(title='选择一个 .xlsx 文件,目前只支持了Excel', filetypes=[('Excel files', '*.xlsx')])

if not file_path:
print("没有选择文件。")
return
global input_path
input_path = file_path

supported_files = [".xlsx",".docx"]
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension not in supported_files:
print("目前只支持.xlsx文件")
return
if file_extension == ".xlsx":
global file_type
file_type = "Excel"

strings = extract_strings_from_xlsx(file_path)

print(f"已处理{file_type}文件: {input_path}")
output_file = os.path.join(tmp_dir, 'text_extracted.txt')
write_strings_to_file(strings, output_file)
print(f"提取的字符串已写入 {output_file}")

10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
# DeeplxFile
基于Deeplx提供的免费,不限制文件大小的文件翻译工具

------------------
点击[这里下载](https://github.com/infrost/DeeplxFile/releases)

## 使用说明
提供了编译好的exe版本,你也可以下载源代码,
然后运行
```bash
python deeplxfile.py
```
94 changes: 94 additions & 0 deletions deeplxfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import threading
import subprocess
import sys
import os
import time
from Lib import compose, data_process, extract

# 定义线程任务,执行 deeplx_windows_amd64.exe
def run_deeplx():
exe_path = os.path.join("Lib", "deeplx_windows_amd64.exe")

try:
# 使用 subprocess 调用可执行文件
subprocess.run([exe_path], check=True)
except subprocess.CalledProcessError as e:
print(f"Error running deeplx_windows_amd64.exe: {e}")
except FileNotFoundError:
print(f"The file {exe_path} was not found.")

def loop():
# 定义语言列表
languages = [
("中文", "ZH"),
("英文", "EN"),
("日文", "JA"),
("韩文", "KO"),
("法文", "FR"),
("德文", "DE"),
("俄文", "RU"),
("西班牙文", "ES"),
("意大利文", "IT"),
("葡萄牙文", "PT"),
]

# 显示语言选项
print("请输入源文件语言(数字序号,按回车结束):")
for i, (lang_name, _) in enumerate(languages, start=1):
print(f" {i}. {lang_name}")

# 接收用户输入
source_lang_num = int(input())

# 检查输入是否有效
if 1 <= source_lang_num <= len(languages):
source_lang = languages[source_lang_num - 1][1]
print(f"你选择的语言代码是: {source_lang}")
else:
print("输入无效,请输入有效的数字序号。")

# 选择目标语言
print("\n请输入目标文件语言(数字序号):")
for i, (lang_name, _) in enumerate(languages, start=1):
print(f" {i}. {lang_name}")

target_lang_num = int(input())

if 1 <= target_lang_num <= len(languages):
target_lang = languages[target_lang_num - 1][1]
print(f"你选择的目标语言代码是: {target_lang}")
else:
print("输入无效,请输入有效的数字序号。")

print("等待选择文件...")
# 创建解压线程
extract_thread = threading.Thread(target=extract.extract_file())
extract_thread.start()
extract_thread.join()

print("完成解压")
data_process.process_file('./tmp/text_extracted.txt', source_lang, target_lang)
print(f"完成翻译,正在回写{extract.input_path}")

#生成翻译文件
compose.compose_file(extract.file_type, extract.input_path)

print("更新完成!已在输入的相同目录下生成文件")
print("DeeplxFile by Kevin, 项目地址https://github.com/infrost/deeplxfile, Version V0.1.0")
input("Enter键继续翻译...")
loop()


def main():
# 创建并启动线程
deeplx_thread = threading.Thread(target=run_deeplx)
deeplx_thread.start()
print("正在启动deeplx引擎")
# 给 deeplx 一些时间来启动
time.sleep(1)
loop()



if __name__ == "__main__":
main()

0 comments on commit 92cd3b1

Please sign in to comment.