Skip to content

Commit

Permalink
Add field identfier to provider cache tables. (#7305)
Browse files Browse the repository at this point in the history
* Add field `identfier` to provider cache tables.
Currently by default use the url for the identifier. In future this can be overwritten per provider.

* Added methods that can create an identifier.
The method can be overwritten per provider.

* Added identifier overwrite for binsearch
* Fix checking/storing by identifier

* Make add_cache_entry() only take SearchResult objects

* Add Jackett support

* Update CHANGELOG.md
  • Loading branch information
p0psicles authored and medariox committed Nov 3, 2019
1 parent bd176a3 commit c6174b1
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 52 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
- Fixed provider btdb due date format change in layout ([#7250](https://github.com/pymedusa/Medusa/pull/7250))
- Fixed exception when there is no anime XML ([#7256](https://github.com/pymedusa/Medusa/pull/7256))
- Fixed BTDB manual search & updated Xthor domain ([#7303](https://github.com/pymedusa/Medusa/pull/7303))
- Fixed duplicate manual search results for providers without unqiue URLs ([#7305](https://github.com/pymedusa/Medusa/pull/7305))

-----

Expand Down
4 changes: 1 addition & 3 deletions medusa/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,7 @@ def add_result_to_cache(self, cache):
# FIXME: Added repr parsing, as that prevents the logger from throwing an exception.
# This can happen when there are unicode decoded chars in the release name.
log.debug('Adding item from search to cache: {release_name!r}', release_name=self.name)
return cache.add_cache_entry(self.name, self.url, self.seeders,
self.leechers, self.size, self.pubdate, parsed_result=self.parsed_result)
return None
return cache.add_cache_entry(self, parsed_result=self.parsed_result)

def create_episode_object(self):
"""Use this result to create an episode segment out of it."""
Expand Down
45 changes: 32 additions & 13 deletions medusa/databases/cache_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,28 @@ def execute(self):
else:
self.connection.action(query[0], query[1:])

def _get_provider_tables(self):
return self.connection.select(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT IN ('lastUpdate',"
" 'lastSearch', 'scene_names', 'network_timezones', 'scene_exceptions_refresh',"
" 'db_version', 'scene_exceptions', 'last_update');")

def clear_provider_tables(self):
for provider in self._get_provider_tables():
self.connection.action("DELETE FROM '{name}';".format(name=provider['name']))

def drop_provider_tables(self):
for provider in self._get_provider_tables():
self.connection.action("DROP TABLE '{name}';".format(name=provider['name']))

def inc_major_version(self):
major_version, minor_version = self.connection.version
major_version += 1
self.connection.action('UPDATE db_version SET db_version = ?;', [major_version])
log.info('[CACHE-DB] Updated major version to: {}.{}', *self.connection.version)

return self.connection.version


class AddSceneExceptions(InitialSchema):
def test(self):
Expand Down Expand Up @@ -182,19 +204,16 @@ def execute(self):
self.clear_provider_tables()
self.inc_major_version()

def clear_provider_tables(self):
providers = self.connection.select(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT IN ('lastUpdate',"
" 'lastSearch', 'scene_names', 'network_timezones', 'scene_exceptions_refresh',"
" 'db_version', 'scene_exceptions', 'last_update');")

for provider in providers:
self.connection.action("DELETE FROM '{name}';".format(name=provider['name']))
class AddProviderTablesIdentifier(ClearProviderTables):
"""Add new pk field `identifier`."""

def inc_major_version(self):
major_version, minor_version = self.connection.version
major_version += 1
self.connection.action('UPDATE db_version SET db_version = ?;', [major_version])
log.info('[CACHE-DB] Updated major version to: {}.{}', *self.connection.version)
def test(self):
"""Test if the version is at least 3."""
return self.connection.version >= (3, None)

return self.connection.version
def execute(self):
utils.backup_database(self.connection.path, self.connection.version)

self.drop_provider_tables()
self.inc_major_version()
9 changes: 9 additions & 0 deletions medusa/providers/generic_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,15 @@ def _get_title_and_url(self, item):

return title, url

@staticmethod
def _get_identifier(item):
"""
Return the identifier for the item.
By default this is the url. Providers can overwrite this, when needed.
"""
return item.url

@property
def recent_results(self):
"""Return recent RSS results from provier."""
Expand Down
9 changes: 9 additions & 0 deletions medusa/providers/nzb/binsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,5 +288,14 @@ def _get_size(self, item):
"""
return item.get('size', -1)

@staticmethod
def _get_identifier(item):
"""
Return the identifier for the item.
By default this is the url. Providers can overwrite this, when needed.
"""
return '{name}_{size}'.format(name=item.name, size=item.size)


provider = BinSearchProvider()
9 changes: 9 additions & 0 deletions medusa/providers/torrent/torznab/torznab.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,15 @@ def get_providers_list(providers):
"""Return custom rss torrent providers."""
return [TorznabProvider(custom_provider) for custom_provider in providers]

@staticmethod
def _get_identifier(item):
"""
Return the identifier for the item.
By default this is the url. Providers can overwrite this, when needed.
"""
return '{name}_{size}'.format(name=item.name, size=item.size)

def image_name(self):
"""
Check if we have an image for this provider already.
Expand Down
89 changes: 53 additions & 36 deletions medusa/tv/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def __init__(self, provider_id):
log.debug('Creating cache table for provider {0}', provider_id)
self.action(
'CREATE TABLE [{name}]'
' (name TEXT,'
' (identifier TEXT,'
' name TEXT,'
' season NUMERIC,'
' episodes TEXT,'
' indexer NUMERIC,'
Expand All @@ -63,27 +64,27 @@ def __init__(self, provider_id):
' date_added NUMERIC)'.format(name=provider_id))
else:
sql_results = self.select(
'SELECT url, COUNT(url) AS count '
'SELECT identifier, COUNT(identifier) AS count '
'FROM [{name}] '
'GROUP BY url '
'GROUP BY identifier '
'HAVING count > 1'.format(name=provider_id)
)
for duplicate in sql_results:
self.action(
'DELETE FROM [{name}] '
'WHERE url = ?'.format(name=provider_id),
[duplicate['url']]
'WHERE identifier = ?'.format(name=provider_id),
[duplicate['identifier']]
)

# remove wrong old index
self.action('DROP INDEX IF EXISTS idx_url')

# add unique index if one does not exist to prevent further dupes
log.debug('Creating UNIQUE URL index for {0}', provider_id)
log.debug('Creating UNIQUE IDENTIFIER index for {0}', provider_id)
self.action(
'CREATE UNIQUE INDEX '
'IF NOT EXISTS idx_url_{name} '
'ON [{name}] (url)'.format(name=provider_id)
'IF NOT EXISTS idx_identifier_{name} '
'ON [{name}] (identifier)'.format(name=provider_id)
)

# add release_group column to table if missing
Expand Down Expand Up @@ -197,6 +198,14 @@ def _check_item_auth(self, title, url):
"""Check item auth."""
return True

def _get_identifier(self, item):
"""
Return the identifier for the item.
By default this is the url. Providers can overwrite this, when needed.
"""
return self.provider._get_identifier(item)

def update_cache(self, search_start_time):
"""Update provider cache."""
# check if we should update
Expand Down Expand Up @@ -258,13 +267,10 @@ def update_cache_manual_search(self, manual_data=None):

results = []
try:
for item in manual_data:
for search_result in manual_data:
log.debug('Adding to cache item found in manual search: {0}',
item.name)
result = self.add_cache_entry(
item.name, item.url, item.seeders,
item.leechers, item.size, item.pubdate
)
search_result.name)
result = self.add_cache_entry(search_result)
if result is not None:
results.append(result)
except Exception as error:
Expand Down Expand Up @@ -298,24 +304,27 @@ def _translate_link_url(url):

def _parse_item(self, item):
"""Parse item to create cache entry."""
title, url = self._get_title_and_url(item)
seeders, leechers = self._get_result_info(item)
size = self._get_size(item)
pubdate = self._get_pubdate(item)
search_result = self.provider.get_result()

title, url = self._get_title_and_url(item)
self._check_item_auth(title, url)
title = self._translate_title(title)
url = self._translate_link_url(url)
search_result.name = title
search_result.url = url

if title and url:
title = self._translate_title(title)
url = self._translate_link_url(url)
seeders, leechers = self._get_result_info(item)
search_result.seeders = seeders
search_result.leechers = leechers

return self.add_cache_entry(title, url, seeders,
leechers, size, pubdate)
search_result.size = self._get_size(item)
search_result.pubdate = self._get_pubdate(item)

if title and url:
return self.add_cache_entry(search_result)
else:
log.debug('The data returned from the {0} feed is incomplete,'
' this result is unusable', self.provider.name)
return None

@property
def updated(self):
Expand Down Expand Up @@ -376,10 +385,11 @@ def should_update(self, scheduler_start_time):

return True

def add_cache_entry(self, name, url, seeders, leechers, size, pubdate, parsed_result=None):
def add_cache_entry(self, search_result, parsed_result=None):
"""Add item into cache database."""
try:
# Use the already passed parsed_result of possible.
# Use the already passed parsed_result if possible.
name = search_result.name
parse_result = parsed_result or NameParser().parse(name)
except (InvalidNameException, InvalidShowException) as error:
log.debug('{0}', error)
Expand Down Expand Up @@ -418,42 +428,49 @@ def add_cache_entry(self, name, url, seeders, leechers, size, pubdate, parsed_re
# Store proper_tags as proper1|proper2|proper3
proper_tags = '|'.join(parse_result.proper_tags)

if not self.item_in_cache(url):
identifier = self._get_identifier(search_result)
url = search_result.url
seeders = search_result.seeders
leechers = search_result.leechers
size = search_result.size
pubdate = search_result.pubdate

if not self.item_in_cache(identifier):
log.debug('Added item: {0} to cache: {1} with url: {2}', name, self.provider_id, url)
return [
'INSERT INTO [{name}] '
' (name, season, episodes, indexerid, url, time, quality, '
' (identifier, name, season, episodes, indexerid, url, time, quality, '
' release_group, version, seeders, leechers, size, pubdate, '
' proper_tags, date_added, indexer ) '
'VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'.format(
'VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'.format(
name=self.provider_id
),
[name, season, episode_text, parse_result.series.series_id, url,
[identifier, name, season, episode_text, parse_result.series.series_id, url,
cur_timestamp, quality, release_group, version,
seeders, leechers, size, pubdate, proper_tags, cur_timestamp, parse_result.series.indexer]
]
else:
log.debug('Updating item: {0} to cache: {1}', name, self.provider_id)
return [
'UPDATE [{name}] '
'SET name=?, season=?, episodes=?, indexer=?, indexerid=?, '
'SET name=?, url=?, season=?, episodes=?, indexer=?, indexerid=?, '
' time=?, quality=?, release_group=?, version=?, '
' seeders=?, leechers=?, size=?, pubdate=?, proper_tags=? '
'WHERE url=?'.format(
'WHERE identifier=?'.format(
name=self.provider_id
),
[name, season, episode_text, parse_result.series.indexer, parse_result.series.series_id,
[name, url, season, episode_text, parse_result.series.indexer, parse_result.series.series_id,
cur_timestamp, quality, release_group, version,
seeders, leechers, size, pubdate, proper_tags, url]
seeders, leechers, size, pubdate, proper_tags, identifier]
]

def item_in_cache(self, url):
def item_in_cache(self, identifier):
"""Check if the url is already available for the specific provider."""
cache_db_con = self._get_db()
return cache_db_con.select(
'SELECT COUNT(url) as count '
'FROM [{provider}] '
'WHERE url=?'.format(provider=self.provider_id), [url]
'WHERE identifier=?'.format(provider=self.provider_id), [identifier]
)[0]['count']

def find_needed_episodes(self, episodes, forced_search=False, down_cur_quality=False):
Expand Down

0 comments on commit c6174b1

Please sign in to comment.