You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importosfromtoolboximportupdate_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzonefromtoolboximportCatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_strfromfunctoolsimportpartialfrompylatexenc.latexnodesimport*frompylatexenc.latexwalkerimport*from .latex_fns.latex_parser_verimport*importglob, os, requests, timepj=os.path.joindefimport_requirements():
try:
frompylatexenc.latexwalkerimportLatexWalker, LatexCharsNode# 尝试导入依赖except:
# 如果缺少依赖fitz,则给出安装建议report_execption(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pylatexenc```。")
yieldchatbot, history, '正常'returnARXIV_CACHE_DIR=os.path.expanduser(f"~/arxiv_cache/")
# =================================== 工具函数 ===============================================专业词汇声明='If the term "agent" is used in this section, it should be translated to "智能体". 'defswitch_prompt(translate_list, mode, more_requirement):
# 切换prompt""" Generate prompts and system prompts based on the mode for proofreading or translating. Args: - translate_list: Proofreader or Translator instance. - mode: A string specifying the mode, either 'proofread' or 'translate_zh'. Returns: - inputs_array: A list of strings containing prompts for users to respond to. - sys_prompt_array: A list of strings containing prompts for system prompts. """n_split=len(translate_list)
ifmode=='proofread_en':
inputs_array= [r"Below is a section from an academic paper, proofread this section."+r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. "+more_requirement+r"Answer me only with the revised text:"+f"\n\n{frag}"forfraginpfg.sp_file_contents]
sys_prompt_array= ["You are a professional academic paper writer."for_inrange(n_split)]
elifmode=='translate_zh':
inputs_array= [r"Below is a section from an English academic paper, translate it into Chinese. "+more_requirement+r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. "+r"Answer me only with the translated text:"+f"\n\n{frag}"forfraginpfg.sp_file_contents]
sys_prompt_array= ["You are a professional translator."for_inrange(n_split)]
else:
assertFalse, "未知指令"returninputs_array, sys_prompt_arraydefdesend_to_extracted_folder_if_exist(project_folder):
""" Descend into the extracted folder if it exists, otherwise return the original folder. Args: - project_folder: A string specifying the folder path. Returns: - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder. """maybe_dir= [fforfinglob.glob(f'{project_folder}/*') ifos.path.isdir(f)]
iflen(maybe_dir) ==0: returnproject_folderifmaybe_dir[0].endswith('.extract'): returnmaybe_dir[0]
returnproject_folderdefmove_project(project_folder, arxiv_id=None):
""" Create a new work folder and copy the project folder to it. Args: - project_folder: A string specifying the folder path of the project. Returns: - A string specifying the path to the new work folder. """importshutil, timetime.sleep(2) # avoid time string conflictifarxiv_idisnotNone:
new_workfolder=pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
else:
new_workfolder=f'gpt_log/{gen_time_str()}'try:
shutil.rmtree(new_workfolder)
except:
pass# align subfolder if there is a folder wrapperitems=glob.glob(pj(project_folder,'*'))
iflen(glob.glob(pj(project_folder,'*.tex'))) ==0andlen(items) ==1:
ifos.path.isdir(items[0]): project_folder=items[0]
shutil.copytree(src=project_folder, dst=new_workfolder)
returnnew_workfolderdefarxiv_download(chatbot, history, txt):
defcheck_cached_translation_pdf(arxiv_id):
translation_dir=pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
ifnotos.path.exists(translation_dir):
os.makedirs(translation_dir)
target_file=pj(translation_dir, 'translate_zh.pdf')
ifos.path.exists(target_file):
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
returntarget_filereturnFalsedefis_float(s):
try:
float(s)
returnTrueexceptValueError:
returnFalseif ('.'intxt) and ('/'notintxt) andis_float(txt): # is arxiv IDtxt='https://arxiv.org/abs/'+txt.strip()
if ('.'intxt) and ('/'notintxt) andis_float(txt[:10]): # is arxiv IDtxt='https://arxiv.org/abs/'+txt[:10]
ifnottxt.startswith('https://arxiv.org'):
returntxt, None# <-------------- inspect format ------------->chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
yieldfromupdate_ui(chatbot=chatbot, history=history)
time.sleep(1) # 刷新界面url_=txt# https://arxiv.org/abs/1707.06690ifnottxt.startswith('https://arxiv.org/abs/'):
msg=f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}"yieldfromupdate_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面returnmsg, None# <-------------- set format ------------->arxiv_id=url_.split('/abs/')[-1]
if'v'inarxiv_id: arxiv_id=arxiv_id[:10]
cached_translation_pdf=check_cached_translation_pdf(arxiv_id)
ifcached_translation_pdf: returncached_translation_pdf, arxiv_idurl_tar=url_.replace('/abs/', '/e-print/')
translation_dir=pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
extract_dst=pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
os.makedirs(translation_dir, exist_ok=True)
# <-------------- download arxiv source file ------------->dst=pj(translation_dir, arxiv_id+'.tar')
ifos.path.exists(dst):
yieldfromupdate_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面else:
yieldfromupdate_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面proxies, =get_conf('proxies')
r=requests.get(url_tar, proxies=proxies)
withopen(dst, 'wb+') asf:
f.write(r.content)
# <-------------- extract file ------------->yieldfromupdate_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面fromtoolboximportextract_archiveextract_archive(file_path=dst, dest_dir=extract_dst)
returnextract_dst, arxiv_id# ========================================= 插件主程序 ===================================================== @CatchExceptiondef更好的Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# <-------------- information about this plugin ------------->chatbot.append([
"函数插件功能?",
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky, azwphy。"])
yieldfromupdate_ui(chatbot=chatbot, history=history) # 刷新界面# <-------------- more requirements ------------->if ("advanced_arg"inplugin_kwargs) and (plugin_kwargs["advanced_arg"] ==""): plugin_kwargs.pop("advanced_arg")
more_req=plugin_kwargs.get("advanced_arg", "")
_switch_prompt_=partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->import_requirements()
try:
importglob, os, time, subprocess, pylatexenc, itertoolssubprocess.Popen(['pdflatex', '-version'])
exceptExceptionase:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
yieldfromupdate_ui(chatbot=chatbot, history=history) # 刷新界面return# <-------------- clear history and read input ------------->txt, arxiv_id=yieldfromarxiv_download(chatbot, history, txt)
iftxt.endswith('.pdf'):
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
yieldfromupdate_ui(chatbot=chatbot, history=history) # 刷新界面return# <-------------- clear history and read input ------------->history= []
ifos.path.exists(txt):
project_folder=txtelse:
iftxt=="": txt='空空如也的输入栏'report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yieldfromupdate_ui(chatbot=chatbot, history=history) # 刷新界面returnfile_manifest= [fforfinglob.glob(f'{project_folder}/**/*.tex', recursive=True)]
iflen(file_manifest) ==0:
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yieldfromupdate_ui(chatbot=chatbot, history=history) # 刷新界面return# <-------------- if is a zip/tar file ------------->project_folder=desend_to_extracted_folder_if_exist(project_folder)
# <-------------- move latex project away from temp folder ------------->project_folder=move_project(project_folder, arxiv_id=None)
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->ifnotos.path.exists(project_folder+'/merge_translate_zh.tex'):
res_test_file=yieldfromProcessLaTeXMain(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
promote_file_to_downloadzone(file=res_test_file, chatbot=chatbot)
# # <-------------- compile PDF -------------># success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en', # work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)# <-------------- zip PDF -------------># zip_res = zip_result(project_folder)# if success:# chatbot.append((f"成功啦", '请查收结果(压缩包)...'))# yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面# promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)# else:# chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))# yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面# promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)# # <-------------- we are done -------------># return success
Class | 类型
函数插件
Feature Request | 功能请求
基本思路是:用pylatexenc包的parser工具将LaTeX代码转为抽象语法树(AST),通过遍历整个抽象语法树找到需要翻译的纯文本,并记录其在AST中的位置(pos)。把文本丢给ChatGLM翻译,再将得到的代码重新构建为latex代码,导言区加入ctex包,直接编译即可。
尝试开发中的插件代码如下,其中
pylatexenc==3.0a19
:'crazy_functions/latex_fns/latex_parser_ver.py'
crazy_functions/TeX论文翻译
请注意,部分代码未完成。遇到的主要问题为:
每个单词都有一个pos位置信息,所以必须在翻译后的片段也包含这种位置信息。例如,
\emph{}
用于加粗时会将一句话分割为三个部分。如果要保证翻译后的文本还能在适当的位置加粗,则必须将这句话的每一个片段都有对应的翻译(而不是有整段话的翻译)。为了保持上下文的连贯性,我试图使用这样一种方法:按照token数限制将文本分割为片段,每一段都以句点结束。把整段话提供给GLM,并让它只翻译其中的某一片段。把每一个片段都重复这个操作。但无论怎么写prompt,GLM似乎都无法理解我的意图。所以写issue向各位求助。最后还有关于
pylatexenc
包的一个问题。该包不支持将针对AST的修改重构为LaTeX代码。我能想到的替代方案为,按照文本顺序对收集得到的英文文本逐一替换为相应的中文文本。这个思路应该是比较可行的。最主要的问题还是如何连贯的翻译。The text was updated successfully, but these errors were encountered: