Skip to content

Commit

Permalink
Merge pull request #1754 from hlohaus/goo
Browse files Browse the repository at this point in the history
Add .har file support for OpenaiChat
  • Loading branch information
hlohaus authored Mar 25, 2024
2 parents bc060e5 + 92358bd commit cf3f8cc
Show file tree
Hide file tree
Showing 13 changed files with 243 additions and 79 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ image_url = response.data[0].url

### Webview GUI

Open the GUI in a window of your OS. Runs on a local/static/ssl server with a js api. Supports login into the OpenAI Chat, Image Upload and streamed Text Generation.
Open the GUI in a window of your OS. Runs on a local/static/ssl server and use a JavaScript API.
Supports login into the OpenAI Chat, Image Upload and streamed Text Generation.

Supports all platforms, but only Linux tested.

Expand Down
16 changes: 12 additions & 4 deletions g4f/Provider/HuggingChat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import json

import requests
from aiohttp import ClientSession, BaseConnector

from ..typing import AsyncResult, Messages
Expand All @@ -14,19 +14,27 @@ class HuggingChat(AsyncGeneratorProvider, ProviderModelMixin):
working = True
default_model = "meta-llama/Llama-2-70b-chat-hf"
models = [
"google/gemma-7b-it",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"google/gemma-7b-it",
"meta-llama/Llama-2-70b-chat-hf",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"codellama/CodeLlama-34b-Instruct-hf",
"mistralai/Mistral-7B-Instruct-v0.2",
"openchat/openchat-3.5-0106",
"codellama/CodeLlama-70b-Instruct-hf"
]
model_aliases = {
"openchat/openchat_3.5": "openchat/openchat-3.5-1210",
"openchat/openchat_3.5": "openchat/openchat-3.5-0106",
}

@classmethod
def get_models(cls):
if not cls.models:
url = f"{cls.url}/__data.json"
data = requests.get(url).json()["nodes"][0]["data"]
models = [data[key]["name"] for key in data[data[0]["models"]]]
cls.models = [data[key] for key in models]
return cls.models

@classmethod
async def create_async_generator(
cls,
Expand Down
72 changes: 12 additions & 60 deletions g4f/Provider/needs_auth/OpenaiChat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
import time
from aiohttp import ClientWebSocketResponse

try:
from py_arkose_generator.arkose import get_values_for_request
has_arkose_generator = True
except ImportError:
has_arkose_generator = False

try:
import webview
has_webview = True
Expand All @@ -35,6 +29,7 @@
from ...requests.aiohttp import StreamSession
from ...image import to_image, to_bytes, ImageResponse, ImageRequest
from ...errors import MissingRequirementsError, MissingAuthError, ProviderNotWorkingError
from ..openai.har_file import getArkoseAndAccessToken
from ... import debug

class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin):
Expand Down Expand Up @@ -353,18 +348,6 @@ async def create_async_generator(
timeout=timeout
) as session:
api_key = kwargs["access_token"] if "access_token" in kwargs else api_key
if cls._headers is None or cls._expires is None or time.time() > cls._expires:
if cls._headers is None:
cookies = get_cookies("chat.openai.com", False) if cookies is None else cookies
api_key = cookies["access_token"] if "access_token" in cookies else api_key
if api_key is None:
try:
await cls.webview_access_token() if has_webview else None
except Exception as e:
if debug.logging:
print(f"Use webview failed: {e}")
else:
api_key = cls._api_key if api_key is None else api_key

if api_key is not None:
cls._create_request_args(cookies)
Expand All @@ -380,14 +363,12 @@ async def create_async_generator(
if debug.logging:
print("OpenaiChat: Load default_model failed")
print(f"{e.__class__.__name__}: {e}")

arkose_token = None
if cls.default_model is None:
login_url = os.environ.get("G4F_LOGIN_URL")
if login_url:
yield f"Please login: [ChatGPT]({login_url})\n\n"
try:
cls.browse_access_token(proxy)
except MissingRequirementsError:
raise MissingAuthError(f'Missing "access_token". Add a "api_key" please')
arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy)
cls._create_request_args(cookies)
cls._set_api_key(api_key)
cls.default_model = cls.get_model(await cls.get_default_model(session, cls._headers))

async with session.post(
Expand All @@ -402,9 +383,10 @@ async def create_async_generator(
need_arkose = data["arkose"]["required"]
chat_token = data["token"]

if need_arkose and not has_arkose_generator:
raise ProviderNotWorkingError("OpenAI Plus Subscriber are not working")
raise MissingRequirementsError('Install "py-arkose-generator" package')
if need_arkose and arkose_token is None:
arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy)
cls._create_request_args(cookies)
cls._set_api_key(api_key)

try:
image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None
Expand Down Expand Up @@ -439,8 +421,7 @@ async def create_async_generator(
**cls._headers
}
if need_arkose:
raise ProviderNotWorkingError("OpenAI Plus Subscriber are not working")
headers["OpenAI-Sentinel-Arkose-Token"] = await cls.get_arkose_token(session, cls._headers, blob)
headers["OpenAI-Sentinel-Arkose-Token"] = arkose_token
headers["OpenAI-Sentinel-Chat-Requirements-Token"] = chat_token

async with session.post(
Expand Down Expand Up @@ -491,7 +472,7 @@ async def iter_messages_chunk(
):
yield chunk
finally:
await ws.aclose()
await ws.aclose() if hasattr(ws, "aclose") else await ws.close()
break
async for chunk in cls.iter_messages_line(session, message, fields):
if fields.finish_reason is not None:
Expand Down Expand Up @@ -611,35 +592,6 @@ def browse_access_token(cls, proxy: str = None, timeout: int = 1200) -> None:
finally:
driver.close()

@classmethod
async def get_arkose_token(cls, session: StreamSession, headers: dict, blob: str) -> str:
"""
Obtain an Arkose token for the session.
Args:
session (StreamSession): The session object.
Returns:
str: The Arkose token.
Raises:
RuntimeError: If unable to retrieve the token.
"""
config = {
"pkey": "35536E1E-65B4-4D96-9D97-6ADB7EFF8147",
"surl": "https://tcr9i.chat.openai.com",
"headers": headers,
"site": cls.url,
"data": {"blob": blob}
}
args_for_request = get_values_for_request(config)
async with session.post(**args_for_request) as response:
await raise_for_status(response)
decoded_json = await response.json()
if "token" in decoded_json:
return decoded_json["token"]
raise RuntimeError(f"Response: {decoded_json}")

@classmethod
async def fetch_access_token(cls, session: StreamSession, headers: dict):
async with session.get(
Expand Down
Empty file added g4f/Provider/openai/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions g4f/Provider/openai/crypt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import json
import base64
import hashlib
import random
from Crypto.Cipher import AES

def pad(data: str) -> bytes:
# Convert the string to bytes and calculate the number of bytes to pad
data_bytes = data.encode()
padding = 16 - (len(data_bytes) % 16)
# Append the padding bytes with their value
return data_bytes + bytes([padding] * padding)

def encrypt(data, key):
salt = ""
salted = ""
dx = bytes()

# Generate salt, as 8 random lowercase letters
salt = "".join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in range(8))

# Our final key and IV come from the key and salt being repeatedly hashed
for x in range(3):
dx = hashlib.md5(dx + key.encode() + salt.encode()).digest()
salted += dx.hex()

# Pad the data before encryption
data = pad(data)

aes = AES.new(
bytes.fromhex(salted[:64]), AES.MODE_CBC, bytes.fromhex(salted[64:96])
)

return json.dumps(
{
"ct": base64.b64encode(aes.encrypt(data)).decode(),
"iv": salted[64:96],
"s": salt.encode().hex(),
}
)

def unpad(data: bytes) -> bytes:
# Extract the padding value from the last byte and remove padding
padding_value = data[-1]
return data[:-padding_value]

def decrypt(data: str, key: str):
# Parse JSON data
parsed_data = json.loads(base64.b64decode(data))
ct = base64.b64decode(parsed_data["ct"])
iv = bytes.fromhex(parsed_data["iv"])
salt = bytes.fromhex(parsed_data["s"])

salted = ''
dx = b''
for x in range(3):
dx = hashlib.md5(dx + key.encode() + salt).digest()
salted += dx.hex()

aes = AES.new(
bytes.fromhex(salted[:64]), AES.MODE_CBC, iv
)

data = aes.decrypt(ct)
if data.startswith(b'[{"key":'):
return unpad(data).decode()
124 changes: 124 additions & 0 deletions g4f/Provider/openai/har_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import base64
import json
import os
import re
import time
import uuid
import random
from urllib.parse import unquote
from copy import deepcopy

from .crypt import decrypt, encrypt
from ...requests import StreamSession

arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
sessionUrl = "https://chat.openai.com/api/auth/session"
chatArk = None
accessToken = None

class arkReq:
def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
self.arkURL = arkURL
self.arkBx = arkBx
self.arkHeader = arkHeader
self.arkBody = arkBody
self.arkCookies = arkCookies
self.userAgent = userAgent

def readHAR():
dirPath = "./"
harPath = []
chatArks = []
accessToken = None
for root, dirs, files in os.walk(dirPath):
for file in files:
if file.endswith(".har"):
harPath.append(os.path.join(root, file))
if not harPath:
raise RuntimeError("No .har file found")
for path in harPath:
with open(path, 'r') as file:
try:
harFile = json.load(file)
except json.JSONDecodeError:
# Error: not a HAR file!
continue
for v in harFile['log']['entries']:
if arkPreURL in v['request']['url']:
chatArks.append(parseHAREntry(v))
elif v['request']['url'] == sessionUrl:
accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken")
if not chatArks:
RuntimeError("No arkose requests found in .har files")
if not accessToken:
RuntimeError("No accessToken found in .har files")
return chatArks.pop(), accessToken

def parseHAREntry(entry) -> arkReq:
tmpArk = arkReq(
arkURL=entry['request']['url'],
arkBx="",
arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')},
arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']},
arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']],
userAgent=""
)
tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '')
bda = tmpArk.arkBody["bda"]
bw = tmpArk.arkHeader['x-ark-esync-value']
tmpArk.arkBx = decrypt(bda, tmpArk.userAgent + bw)
return tmpArk

def genArkReq(chatArk: arkReq) -> arkReq:
if not chatArk:
raise RuntimeError("No .har file with arkose found")

tmpArk: arkReq = deepcopy(chatArk)
if tmpArk is None or not tmpArk.arkBody or not tmpArk.arkHeader:
raise RuntimeError("The .har file is not valid")
bda, bw = getBDA(tmpArk)

tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode()
tmpArk.arkBody['rnd'] = str(random.random())
tmpArk.arkHeader['x-ark-esync-value'] = bw
tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies}
return tmpArk

async def sendRequest(tmpArk: arkReq, proxy: str = None):
async with StreamSession(headers=tmpArk.arkHeader, cookies=tmpArk.arkCookies, proxies={"https": proxy}) as session:
async with session.post(tmpArk.arkURL, data=tmpArk.arkBody) as response:
arkose = (await response.json()).get("token")
if "sup=1|rid=" not in arkose:
return RuntimeError("No valid arkose token generated")
return arkose

def getBDA(arkReq: arkReq):
bx = arkReq.arkBx

bx = re.sub(r'"key":"n","value":"\S*?"', f'"key":"n","value":"{getN()}"', bx)
oldUUID_search = re.search(r'"key":"4b4b269e68","value":"(\S*?)"', bx)
if oldUUID_search:
oldUUID = oldUUID_search.group(1)
newUUID = str(uuid.uuid4())
bx = bx.replace(oldUUID, newUUID)

bw = getBw(getBt())
encrypted_bx = encrypt(bx, arkReq.userAgent + bw)
return encrypted_bx, bw

def getBt() -> int:
return int(time.time())

def getBw(bt: int) -> str:
return str(bt - (bt % 21600))

def getN() -> str:
timestamp = str(int(time.time()))
return base64.b64encode(timestamp.encode()).decode()

async def getArkoseAndAccessToken(proxy: str):
global chatArk, accessToken
if chatArk is None or accessToken is None:
chatArk, accessToken = readHAR()
newReq = genArkReq(chatArk)
return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies
4 changes: 2 additions & 2 deletions g4f/gui/client/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@
<div class="box input-box">
<textarea id="message-input" placeholder="Ask a question" cols="30" rows="10"
style="white-space: pre-wrap;resize: none;"></textarea>
<label class="file-label" for="image" title="Works with Bing, Gemini, OpenaiChat and You">
<label class="file-label image-label" for="image" title="Works with Bing, Gemini, OpenaiChat and You">
<input type="file" id="image" name="image" accept="image/*" required/>
<i class="fa-regular fa-image"></i>
</label>
<label class="file-label" for="camera">
<label class="file-label image-label" for="camera">
<input type="file" id="camera" name="camera" accept="image/*" capture="camera" required/>
<i class="fa-solid fa-camera"></i>
</label>
Expand Down
Loading

0 comments on commit cf3f8cc

Please sign in to comment.