Skip to content

Commit

Permalink
see Diaoul/subliminal#404 for details on these changes
Browse files Browse the repository at this point in the history
  • Loading branch information
caronc committed Feb 1, 2015
1 parent 3c6928b commit 96e206c
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 37 deletions.
4 changes: 0 additions & 4 deletions Subliminal/subliminal/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
from ..video import Episode, Movie
from .. import __version__
from random import randint
import re

#: The following characters are always stripped
IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]')

# Agent List
AGENT_LIST = (
Expand Down
19 changes: 8 additions & 11 deletions Subliminal/subliminal/providers/addic7ed.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
import charade
import requests
from . import Provider
from . import IGNORED_CHARACTERS_RE
from .. import __version__
from ..cache import region
from ..exceptions import ProviderConfigurationError, ProviderNotAvailable, InvalidSubtitle
from ..subtitle import Subtitle, is_valid_subtitle
from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string
from ..video import Episode


Expand Down Expand Up @@ -80,7 +78,7 @@ def get(self, url, params=None):
"""
try:
r = self.session.get(self.server + url, params=params, timeout=30)
r = self.session.get(self.server + url, params=params, timeout=10)
except requests.Timeout:
raise ProviderNotAvailable('Timeout after 10 seconds')
if r.status_code != 200:
Expand All @@ -98,8 +96,7 @@ def get_show_ids(self):
soup = self.get('/shows.php')
show_ids = {}
for html_show in soup.select('td.version > h3 > a[href^="/show/"]'):
show_ids[
IGNORED_CHARACTERS_RE.sub('', html_show.string).lower()] = \
show_ids[sanitize_string(html_show.string)] = \
int(html_show['href'][6:])
return show_ids

Expand All @@ -124,11 +121,11 @@ def find_show_id(self, series):

def query(self, series, season):
show_ids = self.get_show_ids()
_series = IGNORED_CHARACTERS_RE.sub('', series).lower()
if _series in show_ids:
show_id = show_ids[_series]
sanitized_series = sanitize_string(series)
if sanitized_series in show_ids:
show_id = show_ids[sanitized_series]
else:
show_id = self.find_show_id(_series)
show_id = self.find_show_id(sanitized_series)
if show_id is None:
return []
params = {'show_id': show_id, 'season': season}
Expand All @@ -155,7 +152,7 @@ def list_subtitles(self, video, languages):

def download_subtitle(self, subtitle):
try:
r = self.session.get(self.server + subtitle.download_link, timeout=30,
r = self.session.get(self.server + subtitle.download_link, timeout=10,
headers={'Referer': self.server + subtitle.referer})
except requests.Timeout:
raise ProviderNotAvailable('Timeout after 10 seconds')
Expand Down
10 changes: 5 additions & 5 deletions Subliminal/subliminal/providers/opensubtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
import charade
import guessit
from . import Provider
from . import IGNORED_CHARACTERS_RE
from .. import __version__
from ..exceptions import ProviderError, ProviderNotAvailable, InvalidSubtitle
from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches
from ..subtitle import sanitize_string
from ..video import Episode, Movie


Expand Down Expand Up @@ -52,8 +52,8 @@ def compute_matches(self, video):
if isinstance(video, Episode) and self.movie_kind == 'episode':
# series
if video.series and \
IGNORED_CHARACTERS_RE.sub('', self.series_name).lower() == \
IGNORED_CHARACTERS_RE.sub('', video.series).lower():
sanitize_string(self.series_name) == \
sanitize_string(video.series):
matches.add('series')
# season
if video.season and self.series_season == video.season:
Expand Down Expand Up @@ -81,8 +81,8 @@ def compute_matches(self, video):
matches.add('imdb_id')
# title
if video.title and \
IGNORED_CHARACTERS_RE.sub('', self.movie_name).lower() == \
IGNORED_CHARACTERS_RE.sub('', video.title).lower():
sanitize_string(self.movie_name) == \
sanitize_string(video.title):
matches.add('title')
return matches

Expand Down
10 changes: 5 additions & 5 deletions Subliminal/subliminal/providers/podnapisi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import guessit
import requests
from . import Provider
from . import IGNORED_CHARACTERS_RE
from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError
from ..subtitle import Subtitle, is_valid_subtitle, compute_guess_matches
from ..subtitle import sanitize_string
from ..video import Episode, Movie


Expand Down Expand Up @@ -46,8 +46,8 @@ def compute_matches(self, video):
if isinstance(video, Episode):
# series
if video.series and \
IGNORED_CHARACTERS_RE.sub('', self.series).lower() == \
IGNORED_CHARACTERS_RE.sub('', video.series).lower():
sanitize_string(self.series) == \
sanitize_string(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
Expand All @@ -62,8 +62,8 @@ def compute_matches(self, video):
elif isinstance(video, Movie):
# title
if video.title and \
IGNORED_CHARACTERS_RE.sub('', self.title).lower() == \
IGNORED_CHARACTERS_RE.sub('', video.title).lower():
sanitize_string(self.title) == \
sanitize_string(video.title):
matches.add('title')
# year
if video.year and self.year == video.year:
Expand Down
22 changes: 10 additions & 12 deletions Subliminal/subliminal/providers/tvsubtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
import charade
import requests
from . import Provider
from . import IGNORED_CHARACTERS_RE
from ..cache import region
from ..exceptions import InvalidSubtitle, ProviderNotAvailable, ProviderError
from ..subtitle import Subtitle, is_valid_subtitle
from ..subtitle import Subtitle, is_valid_subtitle, sanitize_string
from ..video import Episode

IGNORE_DATEMATCH=re.compile('^(.*)[ \t0-9-._)(]*$')
Expand Down Expand Up @@ -107,15 +106,14 @@ def find_show_id(self, series):
logger.debug('Searching series %r', data)
soup = self.request('/search.php', data=data, method='POST')
links = soup.select('div.left li div a[href^="/tvshow-"]')
_series = IGNORE_DATEMATCH.match(
IGNORED_CHARACTERS_RE.sub('', series)\
.replace('.', ' ').strip().lower(),
sanitized_series = IGNORE_DATEMATCH.match(
sanitize_string(series).replace('.', ' ').strip(),
)
if not _series:
_series = IGNORED_CHARACTERS_RE.sub('', series)\
.replace('.', ' ').strip().lower()
if not sanitized_series:
sanitized_series = sanitize_string(series)\
.replace('.', ' ').strip()
else:
_series = _series.group(1)
sanitized_series = sanitized_series.group(1)

if not links:
logger.info('Series %r not found', series)
Expand All @@ -127,15 +125,15 @@ def find_show_id(self, series):
logger.warning('Could not parse %r', link.string)
continue
show = IGNORE_DATEMATCH.match(
IGNORED_CHARACTERS_RE.sub('', match.group('series'))\
.replace('.', ' ').strip().lower(),
sanitize_string(match.group('series'))\
.replace('.', ' ').strip(),
)
if not show:
logger.warning('Could not postparse %r', match.group('series'))
continue
show = show.group(1)

if show == _series:
if show == sanitized_series:
return int(link['href'][8:-5])
return int(links[0]['href'][8:-5])

Expand Down
21 changes: 21 additions & 0 deletions Subliminal/subliminal/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
import os.path
import babelfish
import pysrt
import re
from .video import Episode, Movie


logger = logging.getLogger(__name__)

#: The following characters are always stripped
IGNORED_CHARACTERS_RE = re.compile('[!@#$\'"]')


class Subtitle(object):
"""Base class for subtitle
Expand Down Expand Up @@ -85,6 +89,23 @@ def __repr__(self):
return '<%s [%s]>' % (self.__class__.__name__, self.language)


def sanitize_string(str_in):
"""
Sanitizes a string passed into it by eliminating characters that might
otherwise cause issues when attempting to locate a match on websites by
striping out any special characters and forcing a consistent string that
can be used for caching too.
:param string str_in: the string to sanitize
:return: sanitized string
:rtype: string
"""
if not isinstance(str_in, basestring):
# handle int, float, etc
str_in = str(str_in)

return IGNORED_CHARACTERS_RE.sub('', str_in).lower().strip()

def get_subtitle_path(video_path, language=None):
"""Create the subtitle path from the given `video_path` and `language`
Expand Down

0 comments on commit 96e206c

Please sign in to comment.