first releases

infrost · Aug 15, 2024 · 92cd3b1 · 92cd3b1
1 parent 957a86f
commit 92cd3b1
Show file tree

Hide file tree

Showing 11 changed files with 345 additions and 0 deletions.
diff --git a/Lib/__init__.py b/Lib/__init__.py
diff --git a/Lib/__pycache__/__init__.cpython-312.pyc b/Lib/__pycache__/__init__.cpython-312.pyc
diff --git a/Lib/__pycache__/compose.cpython-312.pyc b/Lib/__pycache__/compose.cpython-312.pyc
diff --git a/Lib/__pycache__/data_process.cpython-312.pyc b/Lib/__pycache__/data_process.cpython-312.pyc
diff --git a/Lib/__pycache__/extract.cpython-312.pyc b/Lib/__pycache__/extract.cpython-312.pyc
diff --git a/Lib/compose.py b/Lib/compose.py
@@ -0,0 +1,66 @@
+import os
+import zipfile
+import xml.etree.ElementTree as ET
+import tempfile
+
+def read_strings_from_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        return [line.strip() for line in f.readlines()]
+
+def update_shared_strings_in_xlsx(file_path, strings):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # 解压缩 .xlsx 文件
+        with zipfile.ZipFile(file_path, 'r') as zip_ref:
+            zip_ref.extractall(tmpdirname)
+
+        # 读取 sharedStrings.xml 文件
+        shared_strings_path = os.path.join(tmpdirname, 'xl', 'sharedStrings.xml')
+
+        if not os.path.exists(shared_strings_path):
+            print("sharedStrings.xml 文件不存在。")
+            return
+
+        # 解析 XML 文件
+        tree = ET.parse(shared_strings_path)
+        root = tree.getroot()
+
+        # 查找所有 <t> 标签
+        t_elements = list(root.iter('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}t'))
+
+        # 确保提供的字符串数量与 <t> 标签数量匹配
+        if len(strings) != len(t_elements):
+            print(f"警告: 提供的字符串数量 ({len(strings)}) 与现有 <t> 标签数量 ({len(t_elements)}) 不匹配。")
+
+        # 按顺序替换 <t> 标签中的文本
+        for t_element, new_string in zip(t_elements, strings):
+            t_element.text = new_string
+
+        # 将修改后的 XML 写回文件
+        tree.write(shared_strings_path, xml_declaration=True, encoding='UTF-8')
+
+        # 将修改后的文件压缩回 .xlsx
+        new_xlsx_path = file_path.replace('.xlsx', '_translated.xlsx')
+
+        # 如果文件已存在，则删除
+        if os.path.exists(new_xlsx_path):
+            os.remove(new_xlsx_path)
+
+        with zipfile.ZipFile(new_xlsx_path, 'w') as zip_ref:
+            for foldername, subfolders, filenames in os.walk(tmpdirname):
+                for filename in filenames:
+                    file_path = os.path.join(foldername, filename)
+                    arcname = os.path.relpath(file_path, tmpdirname)
+                    zip_ref.write(file_path, arcname)
+
+def compose_file(file_type, input_path):
+
+    result_file_path = './out/translated_result.txt'
+
+    # 读取文本文件内容
+    result_strings = read_strings_from_file(result_file_path)
+
+    if file_type == "Excel":
+        # 更新 Excel 文件中的 sharedStrings.xml
+        update_shared_strings_in_xlsx(input_path, result_strings)
+        print(f"生成翻译后的电子表格...")
+
diff --git a/Lib/data_process.py b/Lib/data_process.py
@@ -0,0 +1,87 @@
+"""
+打开目录下text_extracted.txt
+该txt格式如下：
+hello word
+word 0
+openai
+gpt
+（每条string一行）
+
+for 第i行，总共m行
+统计词数 = 0
+统计每行的词数（可以以空格区分，n个空格就是n+1个词）
+统计词数 += i行词数
+如果第i行词数>500，
+截断，从1-i行生成第一个string（array）
+打印
+继续上面操作，直至生成完所有string（array）
+
+
+"""
+import httpx
+import json
+import os
+output_dir = './out'
+os.makedirs(output_dir, exist_ok=True)
+
+deeplx_api = "http://127.0.0.1:1188/translate"
+def count_words(line):
+    # 统计一行中的词数，以空格分隔
+    return len(line.split())
+
+def process_file(file_path, source_lang, target_lang):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+
+    total_word_count = 0
+    strings_array = []
+    current_string = []
+
+    for i, line in enumerate(lines):
+        line = line.strip()  # 去掉行首尾的空白字符
+        line_word_count = count_words(line)
+        total_word_count += line_word_count
+
+        current_string.append(line)  # 添加当前行到当前字符串中
+
+        if total_word_count > 500:
+            # 当前字符串超过500词，截断并保存
+            strings_array.append('\n'.join(current_string))
+            # 重置统计
+            total_word_count = 0
+            current_string = []
+
+    # 添加最后一部分（如果有剩余）
+    if current_string:
+        strings_array.append('\n'.join(current_string))
+    alternative_index = 0  # 用于命名 alternatives 文件
+    with open('./out/translated_result.txt', 'w', encoding='utf-8') as result_file:
+        for s in strings_array:
+            print(f"正在处理...\n{s}")
+            json_array = str(s)
+            data = {
+                "text": s,
+                "source_lang": source_lang,
+                "target_lang": target_lang
+            }
+            post_data = json.dumps(data)
+            try:
+                # 发送POST请求并打印结果
+                r = httpx.post(url=deeplx_api, data=post_data)
+                response_data = r.json()
+
+                # 保存 data 内容到 translated_result.txt
+                result_file.write(response_data['data'] + '\n')
+                print(f"收到数据{response_data}")
+
+                # 如果存在 alternatives，保存每个替代到不同的文件
+                if "alternatives" in response_data and response_data["alternatives"] is not None :
+                    alternatives = response_data["alternatives"]
+                    print(alternatives)
+                    for alternative in alternatives:
+                        with open(f'./out/alternatives({alternative_index}).txt', 'w', encoding='utf-8') as alt_file:
+                            alt_file.write(alternative + '\n')
+                        alternative_index += 1
+
+            except httpx.RequestError as exc:
+                print(f"An error occurred while requesting {exc.request.url!r}.")
diff --git a/Lib/deeplx_windows_amd64.exe b/Lib/deeplx_windows_amd64.exe
diff --git a/Lib/extract.py b/Lib/extract.py
@@ -0,0 +1,88 @@
+"""
+Replacing Newline Characters: Within the extract_strings_from_xlsx function, 
+any newline characters within the text of each <t> tag are replaced with a space. 
+This ensures that each <t> tag's content is treated as a single line when writing to the output file.
+
+Handling Empty Tags: The code includes a check to ensure that if a <t> tag is empty (None), 
+it does not cause an error.
+
+"""
+
+import os
+import zipfile
+import tempfile
+import xml.etree.ElementTree as ET
+import tkinter as tk
+from tkinter import filedialog
+
+tmp_dir = './tmp'
+if not os.path.exists(tmp_dir):
+    os.makedirs(tmp_dir)
+
+input_path = ""
+
+def extract_strings_from_xlsx(file_path):
+    # 创建临时目录
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # 解压缩 .xlsx 文件
+        with zipfile.ZipFile(file_path, 'r') as zip_ref:
+            zip_ref.extractall(tmpdirname)
+
+        # 读取 sharedStrings.xml 文件
+        shared_strings_path = os.path.join(tmpdirname, 'xl', 'sharedStrings.xml')
+
+        if not os.path.exists(shared_strings_path):
+            print("sharedStrings.xml 文件不存在。")
+            return []
+
+        # 解析 XML 文件
+        tree = ET.parse(shared_strings_path)
+        root = tree.getroot()
+
+        # 存储字符串
+        strings = []
+
+        # 查找所有 <t> 标签
+        for t in root.iter('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}t'):
+            # 将所有换行符替换为单个空格，确保每个 <t> 标签的内容是一行
+            text_content = t.text.replace('\n', ' ') if t.text else ''
+            strings.append(text_content)
+
+        return strings
+
+def write_strings_to_file(strings, output_file):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        for string in strings:
+            f.write(f"{string}\n")
+
+
+def extract_file():
+    # 创建Tkinter窗口但不显示
+    root = tk.Tk()
+    root.withdraw()  # 隐藏主窗口
+
+    # 打开文件选择对话框
+    file_path = filedialog.askopenfilename(title='选择一个 .xlsx 文件，目前只支持了Excel', filetypes=[('Excel files', '*.xlsx')])
+
+    if not file_path:
+        print("没有选择文件。")
+        return
+    global input_path
+    input_path = file_path
+
+    supported_files = [".xlsx",".docx"]
+    file_extension = os.path.splitext(file_path)[1].lower()
+    if file_extension not in supported_files:
+        print("目前只支持.xlsx文件")
+        return
+    if file_extension == ".xlsx":
+        global file_type
+        file_type = "Excel"
+
+        strings = extract_strings_from_xlsx(file_path)
+
+    print(f"已处理{file_type}文件: {input_path}")
+    output_file = os.path.join(tmp_dir, 'text_extracted.txt')
+    write_strings_to_file(strings, output_file)
+    print(f"提取的字符串已写入 {output_file}")
+
diff --git a/README.md b/README.md
@@ -1,2 +1,12 @@
 # DeeplxFile
 基于Deeplx提供的免费，不限制文件大小的文件翻译工具
+
+------------------
+点击[这里下载](https://github.com/infrost/DeeplxFile/releases)
+
+## 使用说明
+提供了编译好的exe版本，你也可以下载源代码，
+然后运行
+```bash
+python deeplxfile.py
+```
diff --git a/deeplxfile.py b/deeplxfile.py
@@ -0,0 +1,94 @@
+import threading
+import subprocess
+import sys
+import os
+import time
+from Lib import compose, data_process, extract
+
+# 定义线程任务，执行 deeplx_windows_amd64.exe
+def run_deeplx():
+    exe_path = os.path.join("Lib", "deeplx_windows_amd64.exe")
+
+    try:
+        # 使用 subprocess 调用可执行文件
+        subprocess.run([exe_path], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error running deeplx_windows_amd64.exe: {e}")
+    except FileNotFoundError:
+        print(f"The file {exe_path} was not found.")
+
+def loop():
+    # 定义语言列表
+    languages = [
+        ("中文", "ZH"),
+        ("英文", "EN"),
+        ("日文", "JA"),
+        ("韩文", "KO"),
+        ("法文", "FR"),
+        ("德文", "DE"),
+        ("俄文", "RU"),
+        ("西班牙文", "ES"),
+        ("意大利文", "IT"),
+        ("葡萄牙文", "PT"),
+    ]
+
+    # 显示语言选项
+    print("请输入源文件语言(数字序号,按回车结束）:")
+    for i, (lang_name, _) in enumerate(languages, start=1):
+        print(f" {i}. {lang_name}")
+
+    # 接收用户输入
+    source_lang_num = int(input())
+
+    # 检查输入是否有效
+    if 1 <= source_lang_num <= len(languages):
+        source_lang = languages[source_lang_num - 1][1]
+        print(f"你选择的语言代码是: {source_lang}")
+    else:
+        print("输入无效，请输入有效的数字序号。")
+
+    # 选择目标语言
+    print("\n请输入目标文件语言(数字序号):")
+    for i, (lang_name, _) in enumerate(languages, start=1):
+        print(f" {i}. {lang_name}")
+
+    target_lang_num = int(input())
+
+    if 1 <= target_lang_num <= len(languages):
+        target_lang = languages[target_lang_num - 1][1]
+        print(f"你选择的目标语言代码是: {target_lang}")
+    else:
+        print("输入无效，请输入有效的数字序号。")
+
+    print("等待选择文件...")
+    # 创建解压线程
+    extract_thread = threading.Thread(target=extract.extract_file())
+    extract_thread.start()
+    extract_thread.join()
+
+    print("完成解压")
+    data_process.process_file('./tmp/text_extracted.txt', source_lang, target_lang)
+    print(f"完成翻译,正在回写{extract.input_path}")
+
+    #生成翻译文件
+    compose.compose_file(extract.file_type, extract.input_path)
+
+    print("更新完成！已在输入的相同目录下生成文件")
+    print("DeeplxFile by Kevin, 项目地址https://github.com/infrost/deeplxfile, Version V0.1.0")
+    input("Enter键继续翻译...")
+    loop()
+
+
+def main():
+    # 创建并启动线程
+    deeplx_thread = threading.Thread(target=run_deeplx)
+    deeplx_thread.start()
+    print("正在启动deeplx引擎")
+    # 给 deeplx 一些时间来启动
+    time.sleep(1)
+    loop()
+
+
+
+if __name__ == "__main__":
+    main()