Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Zamunda provider search #8819

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions sickchill/oldbeard/providers/zamunda.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import time
from urllib.parse import quote_plus

from requests.utils import dict_from_cookiejar
Expand Down Expand Up @@ -31,6 +32,7 @@ def __init__(self):

self.url = self.urls["base_url"]
self.categories = "&c7=1&c33=1&c55=1"
self.pattern = r'<table[^>]*id=["\']zbtable["\'][^>]*>'

self.cache = tvcache.TVCache(self, min_time=30) # only poll Zamunda every 30 minutes max

Expand All @@ -50,18 +52,18 @@ def login(self):

change_language_response = self.get_url(self.urls["change_language"], returns="text")
if not change_language_response:
logger.warning(_("Unable to connect to provider"))
logger.warning("Unable to connect to provider")
return False

login_params = {"username": self.username, "password": self.password}

response = self.get_url(self.urls["login"], post_data=login_params, returns="text")
if not response:
logger.warning(_("Unable to connect to provider"))
logger.warning("Unable to connect to provider")
return False

if re.search("Login failed!", response):
logger.warning(_("Invalid username or password. Check your settings"))
logger.warning("Invalid username or password. Check your settings")
return False

return True
Expand All @@ -78,34 +80,35 @@ def search(self, search_strings):
for mode in search_strings:
items = []

logger.debug(_("Search Mode: {mode}").format(mode=mode))
logger.debug(f"Search Mode: {mode}")
for search_string in {*search_strings[mode]}:
search_url = self.urls["search"] % quote_plus(search_string) + self.categories
logger.debug(_("Search String: {search_string}").format(search_string=search_string))
logger.debug(f"Search String: {search_string}")

data = self.get_url(search_url, returns="text")
if not data or "Flood protection is activated. Please, wait few seconds to continue." in data:
logger.debug(_("Flood protection is activated"))
logger.debug("Flood protection is activated")
continue

if data.find("Sorry, nothing found") != -1:
logger.debug(_("Data returned from provider does not contain any torrents"))
logger.debug("Data returned from provider does not contain any torrents")
continue

# Search result page contains some invalid html that prevents html parser from returning all data.
# We cut everything before the table that contains the data we are interested in thus eliminating
# the invalid html portions
try:
index = data.lower().index('<table align="center" id="zbtable"')
except ValueError:
logger.debug(_("Could not find table of torrents zbtable"))
match = re.search(self.pattern, data, re.IGNORECASE)
if match:
index = match.start()
else:
logger.debug("Table not found")
continue

data = data[index:]

with BS4Parser(data) as html:
if not html:
logger.debug(_("No html data parsed from provider"))
logger.debug("No html data parsed from provider")
continue

torrent_rows = []
Expand All @@ -114,7 +117,7 @@ def search(self, search_strings):
torrent_rows = torrent_table("tr")

if not torrent_rows:
logger.debug(_("Could not find results in returned data"))
logger.debug("Could not find results in returned data")
continue

# Skip column headers
Expand Down Expand Up @@ -144,11 +147,7 @@ def search(self, search_strings):
# Filter unseeded torrent
if seeders < self.minseed or leechers < self.minleech:
if mode != "RSS":
logger.debug(
_("Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})").format(
title, seeders, leechers
)
)
logger.debug(f"Discarding torrent because it doesn't meet the minimum seeders or leechers: {title} (S:{seeders} L:{leechers})")
continue

item = {"title": title, "link": self.url + download_url, "size": size, "seeders": seeders, "leechers": leechers, "hash": ""}
Expand All @@ -159,5 +158,7 @@ def search(self, search_strings):
items.sort(key=lambda d: try_int(d.get("seeders", 0)), reverse=True)

results += items
logger.debug("Waiting to prevent flood protection...")
time.sleep(2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider improving the flood protection mechanism.

The current implementation has two potential issues:

  1. The 2-second delay might be insufficient for some scenarios
  2. The delay at the end of the search won't help prevent flood protection during the search loop

Consider moving the delay inside the search loop and making the delay time configurable.

             for search_string in {*search_strings[mode]}:
+                logger.debug("Waiting to prevent flood protection...")
+                time.sleep(self.flood_protection_delay)  # Add this as a configurable class attribute
                 search_url = self.urls["search"] % quote_plus(search_string) + self.categories
                 logger.debug(f"Search String: {search_string}")

                 data = self.get_url(search_url, returns="text")
-        logger.debug("Waiting to prevent flood protection...")
-        time.sleep(2)

Also, add the delay configuration in __init__:

def __init__(self):
    super().__init__("Zamunda")
    self.flood_protection_delay = 2  # seconds


return results