Skip to content

Commit

Permalink
Merge PR sqzw-x#83
Browse files Browse the repository at this point in the history
* Fix: write custom actor names to nfo file

* Fix: nfo文件写入错误空格;手动设置演员写入nfo文件

* Fix: mmtv custom website;try more actor matching rules

* Fix: not match unknown actor

* Fix: madouqu custom website;more actor matching attempts

* Fix: non-javdb site writes irrelevant searchid field

* Chore: guochan data collation

* Fix: 7mmtv get more extrafanart pics

* Fix: madouqu subtle parameter adjustment

* Fix: guochan crawlers remove useless characters; number recognition a…

* Feat: add hscangku and cableav crawlers

* Merge branch 'sqzw-x:master' into master

* Fix: PR compliance revisions

* Merge pull request sqzw-x#73 from kikiyou18/master

* Opt: javdbid 输出逻辑
  • Loading branch information
sqzw-x authored Feb 7, 2024
1 parent a046c69 commit 6ebe99e
Show file tree
Hide file tree
Showing 12 changed files with 614 additions and 243 deletions.
2 changes: 2 additions & 0 deletions src/controllers/main_window/main_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -2061,6 +2061,8 @@ def _netResult(self):
'mdtv': ['https://www.mdpjzip.xyz', ''],
'madouqu': ['https://madouqu.com', ''],
'cnmdb': ['https://cnmdb.net', ''],
'hscangku': ['https://hscangku.net', ''],
'cableav': ['https://cableav.tv', ''],
'lulubar': ['https://lulubar.co', ''],
'love6': ['https://love6.tv', ''],
'yesjav': ['http://www.yesjav.info', ''],
Expand Down
4 changes: 4 additions & 0 deletions src/models/config/config_manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class ManualConfig:
'lulubar',
'madouqu',
'mdtv',
'hscangku',
'cableav',
'mgstage',
'mywife',
'prestige',
Expand Down Expand Up @@ -513,6 +515,8 @@ class ManualConfig:
'mdtv': 'mdtv',
'mdpjzip': 'mdtv',
'madouqu': 'madouqu',
'hsck': 'hscangku',
'cableav': 'cableav',
'mgstage': 'mgstage',
'7mmtv': '7mmtv',
'bb9711': '7mmtv',
Expand Down
10 changes: 8 additions & 2 deletions src/models/core/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from models.core.flags import Flags
from models.crawlers import airav_cc_new, airav_new, avsex, avsox, cnmdb, dahlia, dmm, faleno, fantastica, fc2, fc2club, \
fc2hub, freejavbt, getchu, getchu_dmm, giga, hdouban, iqqtv_new, jav321, javbus, javdb, javlibrary_new, kin8, love6, \
lulubar, madouqu, mdtv, mgstage, mmtv, mywife, official, prestige, theporndb, xcity
lulubar, madouqu, mdtv, mgstage, mmtv, mywife, official, prestige, theporndb, xcity, hscangku, cableav
from models.entity.enums import FileMode


Expand Down Expand Up @@ -124,7 +124,7 @@ def _call_crawler(json_data, website, language, file_number, short_number, mosai
elif website == 'mgstage':
json_data = json.loads(mgstage.main(file_number, appoint_url, log_info, req_web, language, short_number))
elif website == '7mmtv':
json_data = json.loads(mmtv.main(file_number, appoint_url, log_info, req_web, language))
json_data = json.loads(mmtv.main(file_number, appoint_url, log_info, req_web, language, file_path))
elif website == 'fc2':
json_data = json.loads(fc2.main(file_number, appoint_url, log_info, req_web, language))
elif website == 'fc2hub':
Expand All @@ -137,6 +137,12 @@ def _call_crawler(json_data, website, language, file_number, short_number, mosai
elif website == 'madouqu':
json_data = json.loads(
madouqu.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number))
elif website == 'hscangku':
json_data = json.loads(
hscangku.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number))
elif website == 'cableav':
json_data = json.loads(
cableav.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number))
elif website == 'getchu':
json_data = json.loads(getchu.main(file_number, appoint_url, log_info, req_web, language))
elif website == 'getchu_dmm':
Expand Down
40 changes: 26 additions & 14 deletions src/models/core/nfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal
nfo_title = config.naming_media
if not number:
number = title
# 默认emby视频标题配置为 [number title],国产重复时需去掉一个,去重需注意空格也应一起去掉,否则国产的nfo标题中会多一个空格
# 读取nfo title信息会去掉前面的number和空格以保留title展示出来,同时number和标题一致时,去掉number的逻辑变成去掉整个标题导致读取失败,见426行
if number == title and 'number' in nfo_title and 'title' in nfo_title:
nfo_title = nfo_title.replace('originaltitle', '').replace('title', '')
nfo_title = nfo_title.replace('originaltitle', '').replace('title', '').strip()
first_letter = get_number_first_letter(number)

# 处理演员
Expand All @@ -106,7 +108,7 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal
if not os.path.exists(folder_new_path):
os.makedirs(folder_new_path)
delete_file(nfo_new_path) # 避免115出现重复文件
with open(nfo_new_path, "wt", encoding='UTF-8') as code:
with (open(nfo_new_path, "wt", encoding='UTF-8') as code):
print('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>', file=code)
print("<movie>", file=code)

Expand Down Expand Up @@ -205,18 +207,27 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal
if 'country,' in nfo_include_new:
print(f" <countrycode>{country}</countrycode>", file=code)

# 输出演员
# 初始化 actor_list
actor_list = []
# 输出男女演员
if 'actor_all,' in nfo_include_new:
actor = all_actor
if actor and actor != '未知演员' and actor != '未知演員' and 'actor,' in nfo_include_new:
# 有演员时输出演员
if 'actor,' in nfo_include_new and actor:
actor_list = actor.split(',') # 字符串转列表
actor_list = [actor.strip() for actor in actor_list if actor.strip()] # 去除空白
if actor_list:
for each in actor_list:
print(" <actor>", file=code)
print(" <name>" + each + "</name>", file=code)
print(" <type>Actor</type>", file=code)
print(" </actor>", file=code)
# 无演员时输出演员 以文件命名设置中未知演员设置项为演员名,默认设置和空值不写入NFO
elif 'actor,' in nfo_include_new and config.actor_no_name not in ["未知演员", '未知演員', '']:
actor = config.actor_no_name
actor_list = actor.split(',') # 字符串转列表
actor_list = [actor.strip() for actor in actor_list if actor.strip()] # 去除空白
signal.add_log(f'⛑️ 无演员名, 使用手动命名 写入NFO {config.actor_no_name}')
if actor_list:
for each in actor_list:
print(" <actor>", file=code)
print(" <name>" + each + "</name>", file=code)
print(" <type>Actor</type>", file=code)
print(" </actor>", file=code)

# 输出导演
if director and 'director,' in nfo_include_new:
Expand Down Expand Up @@ -318,10 +329,11 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal
print(" <website>" + website + "</website>", file=code)

# javdb id 输出, 没有时使用番号搜索页
if 'javdbid' in json_data_nfo and json_data_nfo['javdbid']:
print(" <javdbid>" + json_data_nfo["javdbid"] + "</javdbid>", file=code)
else:
print(" <javdbsearchid>" + number + "</javdbsearchid>", file=code)
if "国产" not in json_data_nfo['mosaic'] and "國產" not in json_data_nfo['mosaic']:
if 'javdbid' in json_data_nfo and json_data_nfo['javdbid']:
print(" <javdbid>" + json_data_nfo["javdbid"] + "</javdbid>", file=code)
else:
print(" <javdbsearchid>" + number + "</javdbsearchid>", file=code)
print("</movie>", file=code)
json_data['logs'] += "\n 🍀 Nfo done! (new)(%ss)" % get_used_time(start_time)
return True
Expand Down
176 changes: 176 additions & 0 deletions src/models/crawlers/cableav.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
import time

import urllib3
import zhconv
from lxml import etree

from models.base.web import curl_html
from models.config.config import config
from models.crawlers.guochan import get_extra_info, get_number_list

urllib3.disable_warnings() # yapf: disable


# import traceback

def get_actor_photo(actor):
actor = actor.split(',')
data = {}
for i in actor:
actor_photo = {i: ''}
data.update(actor_photo)
return data


def get_detail_info(html, number, file_path):
title_h1 = html.xpath('//div[@class="entry-content "]/p/text()')
title = title_h1[0].replace(number + ' ', '').strip() if title_h1 else number
actor = get_extra_info(title, file_path, info_type="actor")
tmp_tag = html.xpath('//header//div[@class="categories-wrap"]/a/text()')
# 标签转简体
tag = zhconv.convert(tmp_tag[0], 'zh-cn') if tmp_tag else ''
cover_url = html.xpath(f'//meta[@property="og:image"]/@content')
cover_url = cover_url[0] if cover_url else ''

return number, title, actor, cover_url, tag


def get_real_url(html, number_list):
item_list = html.xpath('//h3[contains(@class,"title")]//a[@href and @title]')
for each in item_list:
# href="https://cableav.tv/Xq1Sg3SvZPk/"
detail_url = each.get('href')
title = each.xpath('text()')[0]
if title and detail_url:
for n in number_list:
temp_n = re.sub(r'[\W_]', '', n).upper()
temp_title = re.sub(r'[\W_]', '', title).upper()
if temp_n in temp_title:
return True, n, title, detail_url
return False, '', '', ''


def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file_path='', appoint_number=''):
start_time = time.time()
website_name = 'cableav'
req_web += '-> %s' % website_name
title = ''
cover_url = ''
web_info = '\n '
log_info += ' \n 🌐 cableav'
debug_info = ''
real_url = appoint_url
cableav_url = getattr(config, 'cableav_website', 'https://cableav.tv')

try:
if not real_url:
# 处理番号
number_list, filename_list = get_number_list(number, appoint_number, file_path)
n_list = number_list[:1] + filename_list
for each in n_list:
real_url = f'{cableav_url}/?s={each}'
# real_url = 'https://cableav.tv/s?s=%E6%9F%9A%E5%AD%90%E7%8C%AB'
debug_info = f'请求地址: {real_url} '
log_info += web_info + debug_info
result, response = curl_html(real_url)
if not result:
debug_info = '网络请求错误: %s' % response
log_info += web_info + debug_info
raise Exception(debug_info)
search_page = etree.fromstring(response, etree.HTMLParser())
result, number, title, real_url = get_real_url(search_page, n_list)
# real_url = 'https://cableav.tv/hyfaqwfjhio'
if result:
break
else:
debug_info = '没有匹配的搜索结果'
log_info += web_info + debug_info
raise Exception(debug_info)

debug_info = f'番号地址: {real_url} '
log_info += web_info + debug_info
result, response = curl_html(real_url)

if not result:
debug_info = '没有找到数据 %s ' % response
log_info += web_info + debug_info
raise Exception(debug_info)

detail_page = etree.fromstring(response, etree.HTMLParser())
number, title, actor, cover_url, tag = get_detail_info(detail_page, number, file_path)
actor_photo = get_actor_photo(actor)

try:
dic = {
'number': number,
'title': title,
'originaltitle': title,
'actor': actor,
'outline': '',
'originalplot': '',
'tag': tag,
'release': '',
'year': '',
'runtime': '',
'score': '',
'series': '',
'country': 'CN',
'director': '',
'studio': '',
'publisher': '',
'source': 'cableav',
'website': real_url,
'actor_photo': actor_photo,
'cover': cover_url,
'poster': '',
'extrafanart': '',
'trailer': '',
'image_download': False,
'image_cut': 'no',
'log_info': log_info,
'error_info': '',
'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )),
'mosaic': '国产',
'wanted': '',
}
debug_info = '数据获取成功!'
log_info += web_info + debug_info
dic['log_info'] = log_info
except Exception as e:
debug_info = '数据生成出错: %s' % str(e)
log_info += web_info + debug_info
raise Exception(debug_info)

except Exception as e:
# print(traceback.format_exc())
debug_info = str(e)
dic = {
'title': '',
'cover': '',
'website': '',
'log_info': log_info,
'error_info': debug_info,
'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )),
}
dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}}
js = json.dumps(
dic,
ensure_ascii=False,
sort_keys=False,
indent=4,
separators=(',', ': '),
)
return js


if __name__ == '__main__':
# yapf: disable
# print(main('SSN010'))
# print(main('國產AV 麻豆傳媒 MD0312 清純嫩穴賣身葬父 露露', file_path='國產AV 麻豆傳媒 MD0312 清純嫩穴賣身葬父 露露'))
# print(main('國產AV 大象傳媒 DA002 性感魅惑色兔兔 李娜娜', file_path='國產AV 大象傳媒 DA002 性感魅惑色兔兔 李娜娜'))
# print(main('韓國高端攝影頂 Yeha 私拍福利', file_path='韓國高端攝影頂 Yeha 私拍福利'))
print(main('EMTC-005', file_path='國產AV 愛神傳媒 EMTC005 怒操高冷社長秘書 米歐'))
Loading

0 comments on commit 6ebe99e

Please sign in to comment.