Skip to content

Commit

Permalink
change to httpx
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Nov 29, 2023
1 parent 6cd0d58 commit e0c2805
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 53 deletions.
6 changes: 0 additions & 6 deletions src/rethink/dist-local/css/app.09f0dec8.css
Original file line number Diff line number Diff line change
Expand Up @@ -3175,7 +3175,6 @@ img[data-v-4f42ee16] {
.at-search-result[data-v-5527cb1e]:hover {
background-color: #eeeeee;
}

.result-hl[data-v-5527cb1e] {
font-size: 0.8em;
color: #666;
Expand All @@ -3196,15 +3195,12 @@ img[data-v-4f42ee16] {
height: 100%;
margin: 0 auto;
}

.circle-bg[data-v-1bc9a6f2] {
background: #FEFEFE;
}

.circle[data-v-1bc9a6f2] {
height: 50px;
}

.dots[data-v-1bc9a6f2] {
width: 60px;
}
Expand All @@ -3219,11 +3215,9 @@ img[data-v-4f42ee16] {
max-height: 400px;
overflow-y: auto;
}

.at-search-height-sm[data-v-e789ed84] {
max-height: 600px !important;
}

.at-search-result-group-label[data-v-e789ed84] {
font-weight: 500;
font-size: 0.9em;
Expand Down
10 changes: 5 additions & 5 deletions src/rethink/dist-local/js/app.js

Large diffs are not rendered by default.

42 changes: 28 additions & 14 deletions src/rethink/models/files/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import zipfile
from typing import List, Tuple, Optional

import httpx
import pymongo.errors
import requests
from bson import ObjectId
from bson.tz_util import utc
from fastapi import UploadFile
Expand Down Expand Up @@ -630,20 +630,34 @@ async def fetch_image_vditor(uid: str, url: str) -> Tuple[str, const.Code]:
return "", code
if await models.user.user_space_not_enough(u=u):
return "", const.Code.USER_SPACE_NOT_ENOUGH
async with httpx.AsyncClient() as client:
try:
response = await client.get(
url=url,
headers=models.utils.ASYNC_CLIENT_HEADERS,
timeout=5.
)
except (
httpx.ConnectTimeout,
RuntimeError,
httpx.ConnectError,
httpx.ReadTimeout,
httpx.HTTPError
) as e:
logger.info(f"failed to get {url}: {e}")
return "", const.Code.FILE_OPEN_ERROR
if response.status_code != 200:
return "", const.Code.FILE_OPEN_ERROR

content = response.content

file = UploadFile(
filename=url.split("/")[-1],
file=io.BytesIO(content),
headers=Headers(response.headers),
size=len(content)
)

try:
r = requests.get(url)
except requests.exceptions.RequestException:
return url, const.Code.OK

if r.status_code != 200:
return "", const.Code.FILE_OPEN_ERROR
file = UploadFile(
filename=url.split("/")[-1],
file=io.BytesIO(r.content),
headers=Headers(r.headers),
size=len(r.content)
)
res = await file_ops.save_upload_files(
uid=uid,
files=[file],
Expand Down
9 changes: 9 additions & 0 deletions src/rethink/models/search_engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List, Tuple, Sequence

from rethink import const
from rethink.models.utils import strip_html_tags


@dataclass
Expand All @@ -12,6 +13,10 @@ class SearchDoc:
title: str
body: str

def __post_init__(self):
self.title = strip_html_tags(self.title)
self.body = strip_html_tags(self.body)


@dataclass
class RestoreSearchDoc:
Expand All @@ -23,6 +28,10 @@ class RestoreSearchDoc:
disabled: bool
inTrash: bool

def __post_init__(self):
self.title = strip_html_tags(self.title)
self.body = strip_html_tags(self.body)


@dataclass
class SearchResult:
Expand Down
46 changes: 37 additions & 9 deletions src/rethink/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import math
import re
import uuid
from html.parser import HTMLParser
from io import StringIO
from typing import Tuple

import httpx
Expand Down Expand Up @@ -68,7 +70,7 @@ def preprocess_md(md: str, snippet_len: int = 200) -> Tuple[str, str, str]:
title, body = split_title_body(fulltext=md)
title = md2txt(title.strip())
body = md2txt(body.strip())
snippet = body[:snippet_len]
snippet = strip_html_tags(body)[:snippet_len]
return title, body, snippet


Expand Down Expand Up @@ -156,14 +158,14 @@ def contain_only_http_link(md: str) -> str:

async def get_title_description_from_link(url: str, language: str) -> Tuple[str, str]:
if language == const.Language.ZH.value:
title = "网址没发现标题"
description = "网址没发现描述"
no_title = "网址没发现标题"
no_description = "网址没发现描述"
elif language == const.Language.EN.value:
title = "No title found"
description = "No description found"
no_title = "No title found"
no_description = "No description found"
else:
title = "No title found"
description = "No description found"
no_title = "No title found"
no_description = "No description found"
async with httpx.AsyncClient() as client:
try:
response = await client.get(
Expand All @@ -179,14 +181,15 @@ async def get_title_description_from_link(url: str, language: str) -> Tuple[str,
httpx.HTTPError
) as e:
logger.info(f"failed to get {url}: {e}")
return title, description
return no_title, no_description
if response.status_code in [302, 301]:
url = response.headers["Location"]
return await get_title_description_from_link(url=url, language=language)
if response.status_code != 200:
return title, description
return no_title, no_description
html = response.text

title, description = "", ""
found = re.search(r'<meta[^>]*name="title"[^>]*content="([^"]*)"[^>]*>', html, re.DOTALL)
if found is None:
found = re.search(r'<meta[^>]*content="([^"]*)"[^>]*name="title"[^>]*>', html, re.DOTALL)
Expand All @@ -200,4 +203,29 @@ async def get_title_description_from_link(url: str, language: str) -> Tuple[str,
found = re.search(r'<meta[^>]*content="([^"]*)"[^>]*name="description"[^>]*>', html, re.DOTALL)
if found:
description = found.group(1).strip()[:400]
if title == "":
title = no_title
if description == "":
description = no_description
return title, description


class MLStripper(HTMLParser):
def __init__(self):
super().__init__()
self.reset()
self.strict = False
self.convert_charrefs = True
self.text = StringIO()

def handle_data(self, d):
self.text.write(d)

def get_data(self):
return self.text.getvalue()


def strip_html_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
7 changes: 6 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import unittest
from pathlib import Path
from typing import Dict
from unittest.mock import patch
from zipfile import ZipFile

from PIL import Image
Expand Down Expand Up @@ -513,7 +514,11 @@ def test_upload_image(self):
f1.close()
shutil.rmtree("temp", ignore_errors=True)

def test_put_quick_node(self):
@patch(
"rethink.models.utils.httpx.AsyncClient.get",
return_value=Response(200, content="<title>百度一下</title>".encode("utf-8"))
)
def test_put_quick_node(self, mocker):
resp = self.client.put(
"/api/node/quick",
json={
Expand Down
25 changes: 18 additions & 7 deletions tests/test_models_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from io import BytesIO
from pathlib import Path
from textwrap import dedent
from unittest.mock import patch

import requests
import httpx
from PIL import Image
from bson import ObjectId
from bson.tz_util import utc
Expand Down Expand Up @@ -351,7 +352,7 @@ async def test_files_upload_process(self):
"startAt": now,
"running": True,
"obsidian": {},
"problemFiles": [],
"msg": "",
"code": const.Code.OK.value,
}
res = await models.database.COLL.import_data.insert_one(doc)
Expand Down Expand Up @@ -409,22 +410,32 @@ async def test_upload_image_vditor(self):
u, code = await models.user.get(self.uid)
self.assertEqual(used_space + size, u["usedSpace"])

async def test_fetch_image_vditor(self):
@patch(
"rethink.models.files.upload.httpx.AsyncClient.get",
)
async def test_fetch_image_vditor(self, mock_get):
f = open(Path(__file__).parent.parent / "img" / "phone-notes.png", "rb")
mock_get.return_value = httpx.Response(
200,
content=f.read(),
headers={"content-type": "image/png"}
)

u, code = await models.user.get(self.uid)
used_space = u["usedSpace"]

url = "https://rethink.run/favicon.ico"
url = "https://rethink.run/favicon.png"
new_url, code = await models.files.fetch_image_vditor(self.uid, url)
self.assertEqual(const.Code.OK, code)
self.assertTrue(new_url.endswith(".ico"))
self.assertTrue(new_url.endswith(".png"))
self.assertTrue(new_url.startswith("/"))
local_file = Path(__file__).parent / "tmp" / ".data" / new_url[1:]
self.assertTrue(local_file.exists())
local_file.unlink()

u, code = await models.user.get(self.uid)
r = requests.get(url)
self.assertEqual(used_space + len(r.content), u["usedSpace"])
self.assertEqual(used_space + f.tell(), u["usedSpace"])
f.close()

async def test_update_used_space(self):
u, code = await models.user.get(self.uid)
Expand Down
73 changes: 62 additions & 11 deletions tests/test_models_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import unittest
from textwrap import dedent
from unittest.mock import patch

import httpx

from rethink import const, config
from rethink.models import utils
Expand Down Expand Up @@ -92,18 +95,66 @@ def setUpClass(cls) -> None:
def tearDownClass(cls) -> None:
config.get_settings.cache_clear()

@unittest.skip("skip outer connection test")
async def test_get_title_description_from_link(self):
for url, res in [
("https://github.com/MorvanZhou/rethink", True),
# ("https://zhuanlan.zhihu.com/p/610939462?utm_id=0", True),
("https://waqwe12f2f2fa.fffffffff", False),
("https://baidu.com", True),
("https://rethink.run", True),
("https://rethink.run/about", True),
("https://baidu.com/wqwqqqqq", False),
("https://mp.weixin.qq.com/s/jbB0GXbjHpFR8m1-6TSASw", True),
# @unittest.skip("skip outer connection test")
@patch(
"rethink.models.utils.httpx.AsyncClient.get",
)
async def test_get_title_description_from_link(self, mock_get):
for url, content, res in [
(
"https://github.com/MorvanZhou/rethink",
"<title>MorvanZhou/rethink: Rethink: a note taking web app</title>"
"""<meta name="description" content="Rethink: a note taking web app. Contribute to
MorvanZhou/rethink development by creating an account on GitHub.">""",
True
),
(
"https://zhuanlan.zhihu.com/p/610939462?utm_id=0",
"""<head>
<meta charSet="utf-8"/>
<title data-rh="true">python的httpx库如何使用 - 知乎</title>
<meta data-rh="true" name="description" content="httpx是一个基于Python的异步HTTP客户端库,
可以用于发送HTTP请求和接收HTTP响应。以下是一些httpx库的基本使用方法:
发送HTTP GET请求import httpx async with httpx.AsyncClient() as client: response = await…"/>""",
True
),
(
"https://waqwe12f2f2fa.fffffffff",
"",
False
),
(
"https://baidu.com",
"""<title>百度一下,你就知道</title>
<meta name="description" content="全球领先的中文搜索引擎、
致力于让网民更便捷地获取信息,找到所求。百度超过千亿的中文网页数据库,可以瞬间找到相关的搜索结果。">""",
True
),
(
"https://rethink.run",
"""<meta content="Rethink" name="title"><title>rethink</title><meta content="Rethink: think differently" name="description">""",
True
),
(
"https://baidu.com/wqwqqqqq",
"",
False
),
(
"https://mp.weixin.qq.com/s/jbB0GXbjHpFR8m1-6TSASw",
"""<title></title><meta name="description" content="" />""",
False),
]:
if res:
mock_get.return_value = httpx.Response(
status_code=200,
content=content.encode("utf-8"),
)
else:
mock_get.return_value = httpx.Response(
status_code=404,
content=content.encode("utf-8"),
)
title, desc = await utils.get_title_description_from_link(
url, language=const.Language.EN.value)
if res:
Expand Down

0 comments on commit e0c2805

Please sign in to comment.