Skip to content

Commit

Permalink
1.8 OpenAI 格式化输出
Browse files Browse the repository at this point in the history
1. OpenAI SDK 更新到 1.42.0
2. OpenAI 接口支持格式化输出
3. OpenAI 切换为局部代理
4. 翻译长文本的截断段落长度可配置
  • Loading branch information
lyy289065406 committed Aug 28, 2024
1 parent 7953877 commit da5c380
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 67 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# For a discussion on single-sourcing the version across setup.py and the
# project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='1.7', # Required. eg. 1.2.3
version='1.8', # Required. eg. 1.2.3

# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
Expand Down Expand Up @@ -143,7 +143,7 @@
#
# For an analysis of "install_requires" vs pip's requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=['py-color-log>=1.0.4', 'requests>=2.22.0', 'openai==0.27.2', 'tencentcloud-sdk-python==3.0.681'], # Optional
install_requires=['py-color-log>=1.0.4', 'requests>=2.22.0', 'openai==1.42.0', 'tencentcloud-sdk-python==3.0.681'], # Optional

# List additional groups of dependencies here (e.g. development
# dependencies). Users will be able to install these using the "extras"
Expand Down
1 change: 1 addition & 0 deletions src/transgpt/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
CHARSET = 'UTF-8'
CRLF = "\n"
DOUBLE_CRLF = "\n\n"
LOCALHOST = '127.0.0.1'
6 changes: 4 additions & 2 deletions src/transgpt/_trans_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ class BaseTranslation :

__metaclass__ = ABCMeta # 定义为抽象类

def __init__(self, api_id, api_key) -> None :
def __init__(self, api_id, api_key, cut_len=500) -> None :
"""
翻译文本
:param api_id : 接口 API_ID / APP_ID (ChatGPT 不需要)
:param api_key : 接口 API_KEY / SECRET_KEY
:param cut_len : 自动切割长文本的每一段长度(取决于 API 接口限制每次翻译的字数)
"""
self.api_id = api_id
self.api_key = api_key
self.cut_len = cut_len


def translate(self, content, from_lang='', to_lang='', savepath='', oncesave=False, args={}) -> str :
Expand Down Expand Up @@ -114,7 +116,7 @@ def _save_trans(self, content, savepath) :

@abstractmethod
def _len_limit(self) :
return 500 # API 接口限制每次翻译的字数
return self.cut_len


@abstractmethod
Expand Down
9 changes: 2 additions & 7 deletions src/transgpt/trans_baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

class BaiduTranslation(BaseTranslation) :

def __init__(self, api_id, api_key, api_url=BAIDU_API_URL) -> None :
BaseTranslation.__init__(self, api_id, api_key)
def __init__(self, api_id, api_key, api_url=BAIDU_API_URL, cut_len=2000) -> None :
BaseTranslation.__init__(self, api_id, api_key, cut_len)
self.api_url = api_url


Expand Down Expand Up @@ -71,8 +71,3 @@ def _to_sign(self, data) :
)).encode(encoding=CHARSET)
).hexdigest()
return salt, sign


def _len_limit(self) :
return 2000

76 changes: 31 additions & 45 deletions src/transgpt/trans_chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,44 @@

import os
import time
import openai
import httpx
from openai import OpenAI
from ._settings import *
from ._trans_base import BaseTranslation

HTTP_PROXY = "HTTP_PROXY"
HTTPS_PROXY = "HTTPS_PROXY"

# GPT 接口模型定义 https://platform.openai.com/docs/models/
CHATGPT_35_TURBO = "gpt-3.5-turbo" # 16K
CHATGPT_4 = "gpt-4" # 8K
CHATGPT_4_TRUBO = "gpt-4-turbo" # 128K
CHATGPT_4o = "gpt-4o" # 128K(静态模型,适合翻译)
CHATGPT_4o_LATEST = "chatgpt-4o-latest" # 128K(动态模型,价格高)
CHATGPT_4o_MINI = "gpt-4o-mini" # 128K(翻译较弱)
CHATGPT_35_TURBO = "gpt-3.5-turbo" # 16K
CHATGPT_4 = "gpt-4" # 8K
CHATGPT_4_TRUBO = "gpt-4-turbo" # 128K
CHATGPT_4o = "gpt-4o" # 128K(静态模型,适合翻译)
CHATGPT_4o_20240806 = "gpt-4o-2024-08-06" # 128K(静态模型,100% 格式化输出)
CHATGPT_4o_LATEST = "chatgpt-4o-latest" # 128K(动态模型,价格高)
CHATGPT_4o_MINI = "gpt-4o-mini" # 128K(翻译较弱)

HTTP_PROXY = "HTTP_PROXY"
HTTPS_PROXY = "HTTPS_PROXY"

ARG_ROLE = 'role'
ARG_OPENAI_MODEL = 'openai_model'
ARG_PROXY_IP = 'proxy_ip'
ARG_PROXY_PORT = 'proxy_port'
ARG_RSP_FORMAT = 'response_format'

class ChatgptTranslation(BaseTranslation) :

RETRY = 3
RETRY_WAIT_SECONDS = 30

def __init__(self, openai_key, openai_model=CHATGPT_4o_MINI, proxy_ip='127.0.0.1', proxy_port=0) :
BaseTranslation.__init__(self, '', openai_key)
openai.api_key = openai_key
def __init__(self, openai_key, openai_model=CHATGPT_4o_MINI, proxy_ip='127.0.0.1', proxy_port=0, cut_len=500, response_format=None) :
BaseTranslation.__init__(self, '', openai_key, cut_len)
proxy = f"http://{proxy_ip}:{proxy_port}" if proxy_port > 0 else ""
proxies = { "http://": proxy, "https://": proxy, }
http_cli = httpx.Client(proxies=proxies) if proxy else None

self.ai_cli = OpenAI(api_key=openai_key, http_client=http_cli)
self.response_format = response_format
self.model = openai_model or CHATGPT_4o_MINI
self.proxy = f"http://{proxy_ip}:{proxy_port}" if proxy_port > 0 else ""


def _translate(self, segment, from_lang='英文', to_lang='中文', args={}) :
Expand All @@ -56,49 +64,27 @@ def _translate(self, segment, from_lang='英文', to_lang='中文', args={}) :


def _ask_gpt(self, role_setting, segment) :
self._enable_proxy()
msg = [
role_setting,
{ "role": "user", "content": segment }
]
rsp = self._wait_for_ask(msg)
rst = rsp.get("choices")[0]["message"]["content"]
self._disable_proxy()
return rst
answer = self._wait_for_ask(msg)
return answer


def _wait_for_ask(self, question) :
rsp = ""
answer = ""
for i in range(self.RETRY) :
try :
rsp = openai.ChatCompletion.create(
rsp = self.ai_cli.chat.completions.create(
model=self.model,
messages=question
messages=question,
response_format=self.response_format
)
answer = rsp.choices[0].message.content
break

except Exception as e:
rsp = {
'choices': [{
'message': {
'content': f"[ERROR] ChatGPT No Response: {e}"
}
}]
}
except Exception as e :
answer = f"[ERROR] ChatGPT No Response: {e}"
time.sleep(self.RETRY_WAIT_SECONDS)
return rsp


def _enable_proxy(self) :
os.environ[HTTP_PROXY] = self.proxy
os.environ[HTTPS_PROXY] = self.proxy


def _disable_proxy(self) :
os.environ[HTTP_PROXY] = ""
os.environ[HTTPS_PROXY] = ""


def _len_limit(self) :
return 500

return answer
8 changes: 2 additions & 6 deletions src/transgpt/trans_tencent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

class TencentTranslation(BaseTranslation) :

def __init__(self, api_id, api_key, region=GZ_REGION) :
BaseTranslation.__init__(self, api_id, api_key)
def __init__(self, api_id, api_key, region=GZ_REGION, cut_len=2000) :
BaseTranslation.__init__(self, api_id, api_key, cut_len)
cred = Credential(api_id, api_key)
self.client = TmtClient(cred, region)

Expand All @@ -43,8 +43,4 @@ def _translate(self, segment, from_lang='en', to_lang='zh', args={}) :
rsp = self.client.TextTranslate(req)
return rsp.TargetText


def _len_limit(self) :
return 2000


13 changes: 8 additions & 5 deletions src/transgpt/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
def trans(content, api_id, api_key,
from_lang='', to_lang='',
platform=TENCENT, savepath='', oncesave=False,
cut_len=500,
args={}
) -> str:
"""
Expand All @@ -32,6 +33,7 @@ def trans(content, api_id, api_key,
:param platform : 翻译平台,目前支持: chatgpt, baidu, tencent(默认)
:param savepath : 翻译文本保存的位置,若为空则不保存(可通过返回值获取)
:param oncesave : 是否一次性保存翻译文本(对于超长文本,内部会进行分段翻译,为了避免网络异常导致已翻译文本丢失,此项默认关闭)
:param cut_len : 自动切割长文本的每一段长度(取决于 API 接口限制每次翻译的字数)
:param args : hash 其他参数表
api_url : 仅百度翻译有用:接口 API 路径
region : 仅腾讯翻译有用:翻译服务器所在的区域
Expand All @@ -46,17 +48,18 @@ def trans(content, api_id, api_key,
log.info(f"正在使用 [{platform}] 翻译文本 ...")
if platform == BAIDU :
api_url = args.get(ARG_API_URL) or BAIDU_API_URL
client = BaiduTranslation(api_id, api_key, api_url)
client = BaiduTranslation(api_id, api_key, api_url, cut_len)

elif platform == CHATGPT :
openai_model = args.get(ARG_OPENAI_MODEL) or CHATGPT_35_TURBO
proxy_ip = args.get(ARG_PROXY_IP) or '127.0.0.1'
openai_model = args.get(ARG_OPENAI_MODEL) or CHATGPT_4o_MINI
proxy_ip = args.get(ARG_PROXY_IP) or LOCALHOST
proxy_port = args.get(ARG_PROXY_PORT) or 0
client = ChatgptTranslation(api_key, openai_model, proxy_ip, proxy_port)
response_format = args.get(ARG_RSP_FORMAT)
client = ChatgptTranslation(api_key, openai_model, proxy_ip, proxy_port, cut_len, response_format)

else :
region = args.get(ARG_REGION) or GZ_REGION
client = TencentTranslation(api_id, api_key, region)
client = TencentTranslation(api_id, api_key, region, cut_len)

trans_content = client.translate(content, from_lang, to_lang, savepath, oncesave, args)
log.info(f"文本翻译完成")
Expand Down

0 comments on commit da5c380

Please sign in to comment.