Skip to content

Commit

Permalink
Merge branch 'sqzw-x:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
northsea4 authored Feb 21, 2024
2 parents 9cb15e5 + 2930694 commit 6cc0b97
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 78 deletions.
49 changes: 19 additions & 30 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,38 +1,27 @@
## 重要
此版本移除了 cloudscraper, 并将 curl-cffi 升级至 0.6.0b9.

这一改动旨在解决 curl 请求出现的相关问题, 但并未经过充分测试.
若网络请求出现问题, 可暂回退至 120240207 版本, 并提交 issue.

现在代码及依赖完全兼容 python 3.8, 可以使用 python 3.8 构建以在 Windows 7 上运行.
由于二者均已停止支持, 因此并不提供官方构建.
## 新增
* 国产刮削优化 by @kikyou18
* 网站: hscangku / cableav by @kikyou18
* UI: hscangku & cableav 指定网站刮削
* macos img 构建 by @
## 修复
* airavcc xpath 失效
* fc2 title 获取不完整
* javdbid 输出逻辑
* 停用 tv.dmm api
* nfo 未知演员写入 by @kikyou18
* 移除标签时仅移除部分
* dmm 搜索页标题 xpath
* 裁剪图片 - 打开图片 处理结果保存至影片目录

<details>
<summary>Full Changelog</summary>

36927a8 Fix: tv.dmm api invalid (#80)
2cf04d9 Merge pull request #84 from sqzw-x/guochan
6ebe99e Merge PR #83
e6198d8 Opt: javdbid 输出逻辑
4c0b778 格式化
51dd142 Merge pull request #73 from kikiyou18/master
ba58501 Fix: PR compliance revisions
55ecbdb Merge branch 'sqzw-x:master' into master
a046c69 Fix: 移除标签时仅移除部分 (#78)
edd03b7 Feat: add hscangku and cableav crawlers
bada9e6 Fix: guochan crawlers remove useless characters; number recognition adjustment
b97dd3b Fix: madouqu subtle parameter adjustment
96f68ef Fix: 7mmtv get more extrafanart pics
0da9440 Chore: guochan data collation
9c3b28e Fix: non-javdb site writes irrelevant searchid field
cfa96c8 Fix: madouqu custom website;more actor matching attempts
192c7e0 Fix: not match unknown actor
e38989d Fix: mmtv custom website;try more actor matching rules
fc88133 Fix: nfo文件写入错误空格;手动设置演员写入nfo文件
8d6cd7e Fix: write custom actor names to nfo file
443d647 Fix: fc2 title (#72)
1d359ba Fix: airav_cc xpath (#70)
15a06ba feat(web)!: del cloudscraper; bump curl-cffi to 0.6.0b9
ca38e46 fix(nfo): python3.8 unsupported with expression
b314755 fix(dmm): title xpath (#90)
edd43a4 CI: refine macos build
5af0b14 fix(dmm): wrong comment (#80)
9f2315a Fix: 裁剪图片保存至原目录 (#86)
eb8207b UI: 添加新网站; 移除 hdouban

</details>
4 changes: 2 additions & 2 deletions config.ini.default
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[modified_time]
modified_time = 2024-02-07 10:33:32
version = 120240207
modified_time = 2024-02-20 19:09:43
version = 120240220

[media]
media_path =
Expand Down
4 changes: 2 additions & 2 deletions requirements-mac.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
lxml==4.9.2
pyquery==1.4.1
cloudscraper==1.2.71
# cloudscraper==1.2.71
requests==2.24.0
beautifulsoup4==4.9.3
Pillow==9.4.0
Expand All @@ -14,5 +14,5 @@ opencv-contrib-python-headless==4.7.0.68
deepl-translate==1.2.0
ping3==4.0.4
oshash==0.1.1
curl-cffi==0.5.10
curl-cffi==0.6.0b9
AppKit
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
lxml==4.9.2
pyquery==1.4.1
cloudscraper==1.2.71
# cloudscraper==1.2.71
requests==2.24.0
beautifulsoup4==4.9.3
Pillow==9.4.0
Expand All @@ -13,4 +13,4 @@ opencv-contrib-python-headless==4.7.0.68
deepl-translate==1.2.0
ping3==4.0.4
oshash==0.1.1
curl-cffi==0.5.10
curl-cffi==0.6.0b9
76 changes: 38 additions & 38 deletions src/models/base/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from threading import Lock
from urllib.parse import quote

import cloudscraper
# import cloudscraper
import curl_cffi.requests
import requests
import urllib3.util.connection as urllib3_cn
Expand Down Expand Up @@ -45,8 +45,8 @@ def __init__(self):
self.session_g = requests.Session()
self.session_g.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100))
self.session_g.mount('http://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100))
self.scraper = cloudscraper.create_scraper(
browser={'browser': 'firefox', 'platform': 'windows', 'mobile': False}) # returns a CloudScraper instance
# self.scraper = cloudscraper.create_scraper(
# browser={'browser': 'firefox', 'platform': 'windows', 'mobile': False}) # returns a CloudScraper instance
self.lock = Lock()
self.pool = ThreadPoolExecutor(32)
self.curl_session = curl_cffi.requests.Session()
Expand Down Expand Up @@ -157,39 +157,39 @@ def post_html(self, url: str, data=None, json=None, headers=None, cookies=None,
signal.add_log(f"🔴 请求失败!{error_info}")
return False, error_info

def scraper_html(self, url: str, proxies=True, cookies=None, headers=None):
# 获取代理信息
is_docker = config.is_docker
timeout = config.timeout
retry_times = config.retry
if is_docker:
return self.get_html(url, proxies=proxies, cookies=cookies)
if proxies:
proxies = config.proxies
else:
proxies = {
"http": None,
"https": None,
}

signal.add_log(f'🔎 Scraper请求 {url}')
for i in range(retry_times):
try:
with self.scraper.get(url, headers=headers, proxies=proxies, cookies=cookies, timeout=timeout) as f:
response = f

if response.status_code > 299:
error_info = f"{response.status_code} {url} {str(f.cookies).replace('<RequestsCookieJar[', '').replace(']>', '')}"
return False, error_info
else:
signal.add_log(f'✅ Scraper成功 {url}')
response.encoding = 'utf-8'
return True, f.text
except Exception as e:
error_info = '%s\nError: %s' % (url, e)
signal.add_log('🔴 重试 [%s/%s] %s' % (i + 1, retry_times, error_info))
signal.add_log(f"🔴 请求失败!{error_info}")
return False, error_info
# def scraper_html(self, url: str, proxies=True, cookies=None, headers=None):
# # 获取代理信息
# is_docker = config.is_docker
# timeout = config.timeout
# retry_times = config.retry
# if is_docker:
# return self.get_html(url, proxies=proxies, cookies=cookies)
# if proxies:
# proxies = config.proxies
# else:
# proxies = {
# "http": None,
# "https": None,
# }
#
# signal.add_log(f'🔎 Scraper请求 {url}')
# for i in range(retry_times):
# try:
# with self.scraper.get(url, headers=headers, proxies=proxies, cookies=cookies, timeout=timeout) as f:
# response = f
#
# if response.status_code > 299:
# error_info = f"{response.status_code} {url} {str(f.cookies).replace('<RequestsCookieJar[', '').replace(']>', '')}"
# return False, error_info
# else:
# signal.add_log(f'✅ Scraper成功 {url}')
# response.encoding = 'utf-8'
# return True, f.text
# except Exception as e:
# error_info = '%s\nError: %s' % (url, e)
# signal.add_log('🔴 重试 [%s/%s] %s' % (i + 1, retry_times, error_info))
# signal.add_log(f"🔴 请求失败!{error_info}")
# return False, error_info

def _get_filesize(self, url):
proxies = config.proxies
Expand Down Expand Up @@ -312,7 +312,7 @@ def curl_html(self, url, headers=None, proxies=True, cookies=None):
for i in range(int(retry_times)):
try:
response = self.curl_session.get(url_encode(url), headers=headers, cookies=cookies, proxies=proxies,
impersonate="edge99")
impersonate="chrome120")
if 'amazon' in url:
response.encoding = 'Shift_JIS'
else:
Expand All @@ -334,7 +334,7 @@ def curl_html(self, url, headers=None, proxies=True, cookies=None):
web = WebRequests()
get_html = web.get_html
post_html = web.post_html
scraper_html = web.scraper_html
scraper_html = web.curl_html
multi_download = web.multi_download
curl_html = web.curl_html

Expand Down
4 changes: 2 additions & 2 deletions src/models/config/config_generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class GeneratedConfig:
Also used as default configuration
"""
# modified_time
modified_time = r'2024-02-07 10:33:32'
version = 120240207
modified_time = r'2024-02-20 19:09:43'
version = 120240220

# media
media_path = r''
Expand Down
2 changes: 1 addition & 1 deletion src/models/config/config_manual.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class ManualConfig:
# 设置软件版本
local_version = 120240207
local_version = 120240220

# 定义配置值类型
INT_KEY = [
Expand Down
2 changes: 1 addition & 1 deletion src/models/core/nfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal
if not os.path.exists(folder_new_path):
os.makedirs(folder_new_path)
delete_file(nfo_new_path) # 避免115出现重复文件
with (open(nfo_new_path, "wt", encoding='UTF-8') as code):
with open(nfo_new_path, "wt", encoding='UTF-8') as code:
print('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>', file=code)
print("<movie>", file=code)

Expand Down

0 comments on commit 6cc0b97

Please sign in to comment.