From 96e206ca5e2eca549b75245b8f1b4afd4a6ee085 Mon Sep 17 00:00:00 2001 From: Chris Caron Date: Sun, 1 Feb 2015 16:06:45 -0500 Subject: [PATCH] see https://github.com/Diaoul/subliminal/pull/404 for details on these changes --- Subliminal/subliminal/providers/__init__.py | 4 ---- Subliminal/subliminal/providers/addic7ed.py | 19 +++++++--------- .../subliminal/providers/opensubtitles.py | 10 ++++----- Subliminal/subliminal/providers/podnapisi.py | 10 ++++----- .../subliminal/providers/tvsubtitles.py | 22 +++++++++---------- Subliminal/subliminal/subtitle.py | 21 ++++++++++++++++++ 6 files changed, 49 insertions(+), 37 deletions(-) diff --git a/Subliminal/subliminal/providers/__init__.py b/Subliminal/subliminal/providers/__init__.py index 4949428..581799b 100644 --- a/Subliminal/subliminal/providers/__init__.py +++ b/Subliminal/subliminal/providers/__init__.py @@ -4,10 +4,6 @@ from ..video import Episode, Movie from .. import __version__ from random import randint -import re - -#: The following characters are always stripped -IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') # Agent List AGENT_LIST = ( diff --git a/Subliminal/subliminal/providers/addic7ed.py b/Subliminal/subliminal/providers/addic7ed.py index 9a0fb37..d6cfc5d 100644 --- a/Subliminal/subliminal/providers/addic7ed.py +++ b/Subliminal/subliminal/providers/addic7ed.py @@ -6,11 +6,9 @@ import charade import requests from . import Provider -from . import IGNORED_CHARACTERS_RE -from .. import __version__ from ..cache import region from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle -from ..subtitle import Subtitle, is_valid_subtitle +from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string from ..video import Episode @@ -80,7 +78,7 @@ def get(self, url, params=None): """ try: - r = self.session.get(self.server + url, params=params, timeout=30) + r = self.session.get(self.server + url, params=params, timeout=10) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: @@ -98,8 +96,7 @@ def get_show_ids(self): soup = self.get('/shows.php') show_ids = {} for html_show in soup.select('td.version > h3 > a[href^="/show/"]'): - show_ids[ - IGNORED_CHARACTERS_RE.sub('', html_show.string).lower()] = \ + show_ids[sanitize_string(html_show.string)] = \ int(html_show['href'][6:]) return show_ids @@ -124,11 +121,11 @@ def find_show_id(self, series): def query(self, series, season): show_ids = self.get_show_ids() - _series = IGNORED_CHARACTERS_RE.sub('', series).lower() - if _series in show_ids: - show_id = show_ids[_series] + sanitized_series = sanitize_string(series) + if sanitized_series in show_ids: + show_id = show_ids[sanitized_series] else: - show_id = self.find_show_id(_series) + show_id = self.find_show_id(sanitized_series) if show_id is None: return [] params = {'show_id': show_id, 'season': season} @@ -155,7 +152,7 @@ def list_subtitles(self, video, languages): def download_subtitle(self, subtitle): try: - r = self.session.get(self.server + subtitle.download_link, timeout=30, + r = self.session.get(self.server + subtitle.download_link, timeout=10, headers={'Referer': self.server + subtitle.referer}) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') diff --git a/Subliminal/subliminal/providers/opensubtitles.py b/Subliminal/subliminal/providers/opensubtitles.py index 9f75fe4..1efaf08 100644 --- a/Subliminal/subliminal/providers/opensubtitles.py +++ b/Subliminal/subliminal/providers/opensubtitles.py @@ -10,10 +10,10 @@ import charade import guessit from . import Provider -from . import IGNORED_CHARACTERS_RE from .. import __version__ from ..exceptions import ProviderError, ProviderNotAvailable, InvalidSubtitle from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches +from ..subtitle import sanitize_string from ..video import Episode, Movie @@ -52,8 +52,8 @@ def compute_matches(self, video): if isinstance(video, Episode) and self.movie_kind == 'episode': # series if video.series and \ - IGNORED_CHARACTERS_RE.sub('', self.series_name).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.series).lower(): + sanitize_string(self.series_name) == \ + sanitize_string(video.series): matches.add('series') # season if video.season and self.series_season == video.season: @@ -81,8 +81,8 @@ def compute_matches(self, video): matches.add('imdb_id') # title if video.title and \ - IGNORED_CHARACTERS_RE.sub('', self.movie_name).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.title).lower(): + sanitize_string(self.movie_name) == \ + sanitize_string(video.title): matches.add('title') return matches diff --git a/Subliminal/subliminal/providers/podnapisi.py b/Subliminal/subliminal/providers/podnapisi.py index 2de0b1f..0148020 100644 --- a/Subliminal/subliminal/providers/podnapisi.py +++ b/Subliminal/subliminal/providers/podnapisi.py @@ -12,9 +12,9 @@ import guessit import requests from . import Provider -from . import IGNORED_CHARACTERS_RE from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches +from ..subtitle import sanitize_string from ..video import Episode, Movie @@ -46,8 +46,8 @@ def compute_matches(self, video): if isinstance(video, Episode): # series if video.series and \ - IGNORED_CHARACTERS_RE.sub('', self.series).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.series).lower(): + sanitize_string(self.series) == \ + sanitize_string(video.series): matches.add('series') # season if video.season and self.season == video.season: @@ -62,8 +62,8 @@ def compute_matches(self, video): elif isinstance(video, Movie): # title if video.title and \ - IGNORED_CHARACTERS_RE.sub('', self.title).lower() == \ - IGNORED_CHARACTERS_RE.sub('', video.title).lower(): + sanitize_string(self.title) == \ + sanitize_string(video.title): matches.add('title') # year if video.year and self.year == video.year: diff --git a/Subliminal/subliminal/providers/tvsubtitles.py b/Subliminal/subliminal/providers/tvsubtitles.py index 8b00595..615b7aa 100644 --- a/Subliminal/subliminal/providers/tvsubtitles.py +++ b/Subliminal/subliminal/providers/tvsubtitles.py @@ -10,10 +10,9 @@ import charade import requests from . import Provider -from . import IGNORED_CHARACTERS_RE from ..cache import region from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError -from ..subtitle import Subtitle, is_valid_subtitle +from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string from ..video import Episode IGNORE_DATEMATCH=re.compile('^(.*)[ \t0-9-._)(]*$') @@ -107,15 +106,14 @@ def find_show_id(self, series): logger.debug('Searching series %r', data) soup = self.request('/search.php', data=data, method='POST') links = soup.select('div.left li div a[href^="/tvshow-"]') - _series = IGNORE_DATEMATCH.match( - IGNORED_CHARACTERS_RE.sub('', series)\ - .replace('.', ' ').strip().lower(), + sanitized_series = IGNORE_DATEMATCH.match( + sanitize_string(series).replace('.', ' ').strip(), ) - if not _series: - _series = IGNORED_CHARACTERS_RE.sub('', series)\ - .replace('.', ' ').strip().lower() + if not sanitized_series: + sanitized_series = sanitize_string(series)\ + .replace('.', ' ').strip() else: - _series = _series.group(1) + sanitized_series = sanitized_series.group(1) if not links: logger.info('Series %r not found', series) @@ -127,15 +125,15 @@ def find_show_id(self, series): logger.warning('Could not parse %r', link.string) continue show = IGNORE_DATEMATCH.match( - IGNORED_CHARACTERS_RE.sub('', match.group('series'))\ - .replace('.', ' ').strip().lower(), + sanitize_string(match.group('series'))\ + .replace('.', ' ').strip(), ) if not show: logger.warning('Could not postparse %r', match.group('series')) continue show = show.group(1) - if show == _series: + if show == sanitized_series: return int(link['href'][8:-5]) return int(links[0]['href'][8:-5]) diff --git a/Subliminal/subliminal/subtitle.py b/Subliminal/subliminal/subtitle.py index f4a44a1..af90865 100644 --- a/Subliminal/subliminal/subtitle.py +++ b/Subliminal/subliminal/subtitle.py @@ -4,11 +4,15 @@ import os.path import babelfish import pysrt +import re from .video import Episode, Movie logger = logging.getLogger(__name__) +#: The following characters are always stripped +IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]') + class Subtitle(object): """Base class for subtitle @@ -85,6 +89,23 @@ def __repr__(self): return '<%s [%s]>' % (self.__class__.__name__, self.language) +def sanitize_string(str_in): + """ + Sanitizes a string passed into it by eliminating characters that might + otherwise cause issues when attempting to locate a match on websites by + striping out any special characters and forcing a consistent string that + can be used for caching too. + + :param string str_in: the string to sanitize + :return: sanitized string + :rtype: string + """ + if not isinstance(str_in, basestring): + # handle int, float, etc + str_in = str(str_in) + + return IGNORED_CHARACTERS_RE.sub('', str_in).lower().strip() + def get_subtitle_path(video_path, language=None): """Create the subtitle path from the given `video_path` and `language`