Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix addic7ed subliminal subtitle provider #10312

Merged
merged 4 commits into from
Feb 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- Fix mass update status page, start a new snatch when changing status to failed. ([10213](https://github.com/pymedusa/Medusa/pull/10213))
- Fix changing process method in manual postprocessing. ([10220](https://github.com/pymedusa/Medusa/pull/10220))
- Fix saving season posters / banners when using tvdb ([10251](https://github.com/pymedusa/Medusa/pull/10251))
- Fix Addic7ed.com subtitle provider ([10312](https://github.com/pymedusa/Medusa/pull/10312))

-----

Expand Down
11 changes: 6 additions & 5 deletions medusa/init/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,16 @@ def _configure_subliminal():
basename = __name__.split('.')[0]

# Unregister
# for name in ('legendastv = subliminal.providers.legendastv:LegendasTVProvider',):
# provider_manager.internal_extensions.remove(name)
# provider_manager.registered_extensions.append(name)
# provider_manager.unregister(name)
for name in ('addic7ed = subliminal.providers.addic7ed:Addic7edProvider',):
provider_manager.internal_extensions.remove(name)
provider_manager.registered_extensions.append(name)
provider_manager.unregister(name)

# Register
for name in ('napiprojekt = subliminal.providers.napiprojekt:NapiProjektProvider',
'subtitulamos = {basename}.subtitle_providers.subtitulamos:SubtitulamosProvider'.format(basename=basename),
'wizdom = {basename}.subtitle_providers.wizdom:WizdomProvider'.format(basename=basename)):
'wizdom = {basename}.subtitle_providers.wizdom:WizdomProvider'.format(basename=basename),
'addic7ed = {basename}.subtitle_providers.addic7ed:Addic7edProvider'.format(basename=basename)):
provider_manager.register(name)

refiner_manager.register('release = {basename}.refiners.release:refine'.format(basename=basename))
Expand Down
312 changes: 312 additions & 0 deletions medusa/subtitle_providers/addic7ed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
# -*- coding: utf-8 -*-
"""Custom subliminal addic7ed.com subtitle provider module."""

import hashlib
import logging
import re

from babelfish import Language

from guessit import guessit

from requests import Session

from subliminal.cache import SHOW_EXPIRATION_TIME, region
from subliminal.exceptions import ConfigurationError, DownloadLimitExceeded
from subliminal.matches import guess_matches
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal.subtitle import Subtitle, fix_line_ending
from subliminal.utils import sanitize
from subliminal.video import Episode

logger = logging.getLogger(__name__)

# language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')

# Series cell matching regex
show_cells_re = re.compile(b'<td class="vr">.*?</td>', re.DOTALL)

#: Series header parsing regex
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')


class Addic7edSubtitle(Subtitle):
"""Addic7ed Subtitle."""

provider_name = 'addic7ed'

def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.year = year
self.version = version
self.download_link = download_link

@property
def id(self):
"""Get id."""
return self.download_link

@property
def info(self):
"""Get info."""
return '{series}{yopen}{year}{yclose} s{season:02d}e{episode:02d}{topen}{title}{tclose}{version}'.format(
series=self.series, season=self.season, episode=self.episode, title=self.title, year=self.year or '',
version=self.version, yopen=' (' if self.year else '', yclose=')' if self.year else '',
topen=' - ' if self.title else '', tclose=' - ' if self.version else ''
)

def get_matches(self, video):
"""Get matches."""
# series name
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'episode_title': self.title,
'year': self.year,
'release_group': self.version,
})

# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# other properties
if self.version:
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)

return matches


class Addic7edProvider(Provider):
"""Addic7ed Provider."""

languages = {Language('por', 'BR')} | {Language(l) for l in [
'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg',
'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus',
'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
subtitle_class = Addic7edSubtitle

def __init__(self, username=None, password=None):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')

self.username = username
self.password = hashlib.md5(password.encode('utf-8')).hexdigest()
self.logged_in = False
self.cookies = {'wikisubtitlesuser': self.username, 'wikisubtitlespass': self.password}

def initialize(self):
"""Initialize Addic7edProvider provider."""
self.session = Session()
self.session.headers['User-Agent'] = self.user_agent

# login
if self.username and self.password:
logger.debug('Logged in')
self.logged_in = True

def terminate(self):
"""Terminate."""
# logout
logger.debug('Logged out')
self.logged_in = False

@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _get_show_ids(self):
"""Get the ``dict`` of show ids per series by querying the `shows.php` page.

:return: show id per series, lower case and without quotes.
:rtype: dict
"""
# get the show page
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=20, cookies=self.cookies)
r.raise_for_status()

# LXML parser seems to fail when parsing Addic7ed.com HTML markup.
# Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content)
if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
else:
# If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser'])

# populate the show ids
show_ids = {}
for show in soup.select('td.vr > h3 > a[href^="/show/"]'):
show_ids[sanitize(show.text)] = int(show['href'][6:])
logger.debug('Found %d show ids', len(show_ids))

return show_ids

@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_show_id(self, series, year=None):
"""Search the show id from the `series` and `year`.

:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:return: the show id, if found.
:rtype: int
"""
# addic7ed doesn't support search with quotes
series = series.replace("'", ' ')

# build the params
series_year = '%s %d' % (series, year) if year is not None else series
params = {'search': series_year, 'Submit': 'Search'}

r = self.session.get('http://www.addic7ed.com/srch.php', params=params, timeout=10, cookies=self.cookies)

# make the search
logger.info('Searching show ids with %r', params)
r.raise_for_status()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

# get the suggestion
suggestion = soup.select('span.titulo > a[href^="/show/"]')
if not suggestion:
logger.warning('Show id not found: no suggestion')
return None
if not sanitize(suggestion[0].i.text.replace("'", ' ')) == sanitize(series_year):
logger.warning('Show id not found: suggestion does not match')
return None
show_id = int(suggestion[0]['href'][6:])
logger.debug('Found show id %d', show_id)

return show_id

def get_show_id(self, series, year=None, country_code=None):
"""Get the best matching show id for `series`, `year` and `country_code`.

First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:param country_code: country code of the series, if any.
:type country_code: str
:return: the show id, if found.
:rtype: int
"""
series_sanitized = sanitize(series).lower()
show_ids = self._get_show_ids()
show_id = None

# attempt with country
if country_code:
logger.debug('Getting show id with country')
show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))

# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
show_id = show_ids.get('%s %d' % (series_sanitized, year))

# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show_ids.get(series_sanitized)

# search as last resort
if not show_id:
logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)

return show_id

def query(self, show_id, series, season, year=None, country=None):
"""Query provider to get all subitles for a specific show + season."""
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=60, cookies=self.cookies)
r.raise_for_status()

if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('No data returned from provider')
return []

soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

# loop over subtitle rows
match = series_year_re.match(soup.select('#header font')[0].text.strip()[:-10])
series = match.group('series')
year = int(match.group('year')) if match.group('year') else None
subtitles = []
for row in soup.select('tr.epeven'):
cells = row('td')

# ignore incomplete subtitles
status = cells[5].text
if status != 'Completed':
logger.debug('Ignoring subtitle with status %s', status)
continue

# read the item
language = Language.fromaddic7ed(cells[3].text)
hearing_impaired = bool(cells[6].text)
page_link = self.server_url + cells[2].a['href'][1:]
season = int(cells[0].text)
episode = int(cells[1].text)
title = cells[2].text
version = cells[4].text
download_link = cells[9].a['href'][1:]

subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)

return subtitles

def list_subtitles(self, video, languages):
"""List Subitles."""
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.get_show_id(title, video.year)
if show_id is not None:
break

# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})

return []

def download_subtitle(self, subtitle):
"""Download subtitles."""
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=20)
r.raise_for_status()

if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('Unable to download subtitle. No data returned from provider')
return

# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded

subtitle.content = fix_line_ending(r.content)
13 changes: 12 additions & 1 deletion themes-default/slim/src/components/config-subtitles.vue
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,18 @@

<div class="col-xs-12 col-md-10">
<fieldset class="component-group-list" style="margin-left: 50px; margin-top:36px;">
<config-textbox v-model="subtitles.providerLogins.addic7ed.user" label="Addic7ed User Name" id="addic7ed_username" />
<config-textbox v-model="subtitles.providerLogins.addic7ed.user" label="Addic7ed User ID" id="addic7ed_username" />
<div v-if="subtitles.providerLogins.addic7ed.user" style="margin-bottom: 4rem;">
<p style="color: red">To bypass addic7ed captcha protection we authenticate using a set cookie. The cookie requires your user id and password.</p>
<span>You can find your user id by following these steps</span>
<ul>
<li>Navigate and login on addic7ed.com</li>
<li>Click on My Profile</li>
<li>Click on your own username</li>
<li>Your user id should now be visible in the address bar</li>
<li>For example: https://www.addic7ed.com/user/12345</li>
</ul>
</div>
<config-textbox type="password" v-model="subtitles.providerLogins.addic7ed.pass" label="Addic7ed Password" id="addic7ed_password" />

<config-textbox v-model="subtitles.providerLogins.opensubtitles.user" label="Opensubtitles User Name" id="opensubtitles_username" />
Expand Down
4 changes: 2 additions & 2 deletions themes-default/slim/src/components/subtitle-search.vue
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ export default {
this.displayQuestion = false;
this.loading = true;
this.loadingMessage = 'Searching for subtitles... ';
apiRoute('home/manualSearchSubtitles', { params: subtitleParams })
apiRoute('home/manualSearchSubtitles', { params: subtitleParams, timeout: 120000 })
.then(response => {
if (response.data.result === 'success') {
this.subtitles.push(...response.data.subtitles);
Expand Down Expand Up @@ -263,7 +263,7 @@ export default {
this.loadingMessage = 'downloading subtitle... ';
this.loading = true;

apiRoute('home/manualSearchSubtitles', { params })
apiRoute('home/manualSearchSubtitles', { params, timeout: 120000 })
.then(response => {
if (response.data.result === 'success') {
// Update the show, as we have new information (subtitles)
Expand Down
Loading