Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gfycat API #769

Merged
merged 1 commit into from
Feb 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
# See https://pre-commit.com/hooks.html for more hooks

repos:
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.12.1
hooks:
- id: validate-pyproject

- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.1.0
hooks:
- id: black

- repo: https://github.com/pycqa/isort
rev: 5.11.4
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
Expand All @@ -23,3 +28,9 @@ repos:
rev: v0.12.0
hooks:
- id: markdownlint

- repo: https://github.com/adamchainz/blacken-docs
rev: 1.13.0
hooks:
- id: blacken-docs
additional_dependencies: [black>=23.1.0]
2 changes: 1 addition & 1 deletion bdfr/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _check_version(context, param, value):
if not value or context.resilient_parsing:
return
current = __version__
latest = requests.get("https://pypi.org/pypi/bdfr/json").json()["info"]["version"]
latest = requests.get("https://pypi.org/pypi/bdfr/json", timeout=10).json()["info"]["version"]
print(f"You are currently using v{current} the latest is v{latest}")
context.exit()

Expand Down
2 changes: 1 addition & 1 deletion bdfr/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

def _calc_hash(existing_file: Path):
chunk_size = 1024 * 1024
md5_hash = hashlib.md5()
md5_hash = hashlib.md5(usedforsecurity=False)
with existing_file.open("rb") as file:
chunk = file.read(chunk_size)
while chunk:
Expand Down
4 changes: 3 additions & 1 deletion bdfr/oauth2.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def __init__(self, wanted_scopes: set[str], client_id: str, client_secret: str):
@staticmethod
def _check_scopes(wanted_scopes: set[str]):
response = requests.get(
"https://www.reddit.com/api/v1/scopes.json", headers={"User-Agent": "fetch-scopes test"}
"https://www.reddit.com/api/v1/scopes.json",
headers={"User-Agent": "fetch-scopes test"},
timeout=10,
)
known_scopes = [scope for scope, data in response.json().items()]
known_scopes.append("*")
Expand Down
4 changes: 2 additions & 2 deletions bdfr/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def download(self, download_parameters: Optional[dict] = None):
self.create_hash()

def create_hash(self):
self.hash = hashlib.md5(self.content)
self.hash = hashlib.md5(self.content, usedforsecurity=False)

def _determine_extension(self) -> Optional[str]:
extension_pattern = re.compile(r".*(\..{3,5})$")
Expand All @@ -68,7 +68,7 @@ def http_download(url: str, download_parameters: dict) -> Optional[bytes]:
max_wait_time = 300
while True:
try:
response = requests.get(url, headers=headers)
response = requests.get(url, headers=headers, timeout=10)
if re.match(r"^2\d{2}", str(response.status_code)) and response.content:
return response.content
elif response.status_code in (408, 429):
Expand Down
13 changes: 12 additions & 1 deletion bdfr/site_downloaders/base_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,21 @@ def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> l
@staticmethod
def retrieve_url(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
try:
res = requests.get(url, cookies=cookies, headers=headers)
res = requests.get(url, cookies=cookies, headers=headers, timeout=10)
except requests.exceptions.RequestException as e:
logger.exception(e)
raise SiteDownloaderError(f"Failed to get page {url}")
if res.status_code != 200:
raise ResourceNotFound(f"Server responded with {res.status_code} to {url}")
return res

@staticmethod
def post_url(url: str, cookies: dict = None, headers: dict = None, payload: dict = None) -> requests.Response:
try:
res = requests.post(url, cookies=cookies, headers=headers, json=payload, timeout=10)
except requests.exceptions.RequestException as e:
logger.exception(e)
raise SiteDownloaderError(f"Failed to post to {url}")
if res.status_code != 200:
raise ResourceNotFound(f"Server responded with {res.status_code} to {url}")
return res
2 changes: 1 addition & 1 deletion bdfr/site_downloaders/gallery.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _get_links(id_dict: list[dict]) -> list[str]:
possible_extensions = (".jpg", ".png", ".gif", ".gifv", ".jpeg")
for extension in possible_extensions:
test_url = f"https://i.redd.it/{image_id}{extension}"
response = requests.head(test_url)
response = requests.head(test_url, timeout=10)
if response.status_code == 200:
out.append(test_url)
break
Expand Down
43 changes: 36 additions & 7 deletions bdfr/site_downloaders/gfycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
from typing import Optional

from bs4 import BeautifulSoup
from cachetools import TTLCache, cached
from praw.models import Submission

from bdfr.exceptions import SiteDownloaderError
Expand All @@ -21,25 +21,54 @@ def __init__(self, post: Submission):
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
return super().find_resources(authenticator)

@staticmethod
@cached(cache=TTLCache(maxsize=5, ttl=3420))
def _get_auth_token() -> str:
headers = {
"content-type": "text/plain;charset=UTF-8",
"host": "weblogin.gfycat.com",
"origin": "https://gfycat.com",
}
payload = {"access_key": "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"}
token = json.loads(
Gfycat.post_url("https://weblogin.gfycat.com/oauth/webtoken", headers=headers, payload=payload).text
)["access_token"]
return token

@staticmethod
def _get_link(url: str) -> set[str]:
gfycat_id = re.match(r".*/(.*?)(?:/?|-.*|\..{3-4})$", url).group(1)
url = "https://gfycat.com/" + gfycat_id

response = Gfycat.retrieve_url(url)
if re.search(r"(redgifs|gifdeliverynetwork)", response.url):
url = url.lower() # Fixes error with old gfycat/redgifs links
url = url.lower()
return Redgifs._get_link(url)

soup = BeautifulSoup(response.text, "html.parser")
content = soup.find("script", attrs={"data-react-helmet": "true", "type": "application/ld+json"})
auth_token = Gfycat._get_auth_token()
if not auth_token:
raise SiteDownloaderError("Unable to retrieve Gfycat API token")

headers = {
"referer": "https://gfycat.com/",
"origin": "https://gfycat.com",
"content-type": "application/json",
"Authorization": f"Bearer {auth_token}",
}
content = Gfycat.retrieve_url(f"https://api.gfycat.com/v1/gfycats/{gfycat_id}", headers=headers)

if content is None:
raise SiteDownloaderError("Could not read the API source")

try:
out = json.loads(content.contents[0])["video"]["contentUrl"]
response_json = json.loads(content.text)
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Received data was not valid JSON: {e}")

try:
out = response_json["gfyItem"]["mp4Url"]
except (IndexError, KeyError, AttributeError) as e:
raise SiteDownloaderError(f"Failed to download Gfycat link {url}: {e}")
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Did not receive valid JSON data: {e}")
return {
out,
}
15 changes: 9 additions & 6 deletions bdfr/site_downloaders/redgifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Optional

import requests
from cachetools import TTLCache, cached
from praw.models import Submission

from bdfr.exceptions import SiteDownloaderError
Expand All @@ -22,6 +23,12 @@ def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> l
media_urls = self._get_link(self.post.url)
return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls]

@staticmethod
@cached(cache=TTLCache(maxsize=5, ttl=82080))
def _get_auth_token() -> str:
token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
return token

@staticmethod
def _get_id(url: str) -> str:
try:
Expand All @@ -38,7 +45,7 @@ def _get_id(url: str) -> str:
def _get_link(url: str) -> set[str]:
redgif_id = Redgifs._get_id(url)

auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
auth_token = Redgifs._get_auth_token()
if not auth_token:
raise SiteDownloaderError("Unable to retrieve Redgifs API token")

Expand All @@ -48,7 +55,6 @@ def _get_link(url: str) -> set[str]:
"content-type": "application/json",
"Authorization": f"Bearer {auth_token}",
}

content = Redgifs.retrieve_url(f"https://api.redgifs.com/v2/gifs/{redgif_id}", headers=headers)

if content is None:
Expand All @@ -62,7 +68,7 @@ def _get_link(url: str) -> set[str]:
out = set()
try:
if response_json["gif"]["type"] == 1: # type 1 is a video
if requests.get(response_json["gif"]["urls"]["hd"], headers=headers).ok:
if requests.head(response_json["gif"]["urls"]["hd"], headers=headers, timeout=10).ok:
out.add(response_json["gif"]["urls"]["hd"])
else:
out.add(response_json["gif"]["urls"]["sd"])
Expand All @@ -80,7 +86,4 @@ def _get_link(url: str) -> set[str]:
except (KeyError, AttributeError):
raise SiteDownloaderError("Failed to find JSON data in page")

# Update subdomain if old one is returned
out = {re.sub("thumbs2", "thumbs3", link) for link in out}
out = {re.sub("thumbs3", "thumbs4", link) for link in out}
return out
2 changes: 1 addition & 1 deletion bdfr/site_downloaders/vidble.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_links(url: str) -> set[str]:
if not re.search(r"vidble.com/(show/|album/|watch\?v)", url):
url = re.sub(r"/(\w*?)$", r"/show/\1", url)

page = requests.get(url)
page = requests.get(url, timeout=10)
soup = bs4.BeautifulSoup(page.text, "html.parser")
content_div = soup.find("div", attrs={"id": "ContentPlaceHolder1_divContent"})
images = content_div.find_all("img")
Expand Down
13 changes: 7 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ classifiers = [
dependencies = [
"appdirs>=1.4.4",
"beautifulsoup4>=4.10.0",
"cachetools>=5.3.0",
"click>=8.0.0",
"dict2xml>=1.7.0",
"praw>=7.2.0",
"pyyaml>=5.4.1",
"requests>=2.25.1",
"yt-dlp>=2022.11.11",
"requests>=2.28.2",
"yt-dlp>=2023.1.6",
]
dynamic = ["version"]

Expand All @@ -41,11 +42,11 @@ data-files = {"config" = ["bdfr/default_config.cfg",]}

[project.optional-dependencies]
dev = [
"black>=22.12.0",
"black>=23.1.0",
"Flake8-pyproject>=1.2.2",
"isort>=5.11.4",
"pre-commit>=2.20.0",
"pytest>=7.1.0",
"isort>=5.12.0",
"pre-commit>=3.0.4",
"pytest>=7.2.1",
"tox>=3.27.1",
]

Expand Down
5 changes: 1 addition & 4 deletions tests/site_downloaders/test_direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
("test_url", "expected_hash"),
(
("https://i.redd.it/q6ebualjxzea1.jpg", "6ec154859c777cb401132bb991cb3635"),
(
"https://file-examples.com/wp-content/uploads/2017/11/file_example_MP3_700KB.mp3",
"35257826e20227a8a57d0e5a410e03c7",
),
("https://filesamples.com/samples/audio/mp3/sample3.mp3", "d30a2308f188cbb11d74cf20c357891c"),
),
)
def test_download_resource(test_url: str, expected_hash: str):
Expand Down
7 changes: 7 additions & 0 deletions tests/site_downloaders/test_gfycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
from bdfr.site_downloaders.gfycat import Gfycat


@pytest.mark.online
def test_auth_cache():
auth1 = Gfycat._get_auth_token()
auth2 = Gfycat._get_auth_token()
assert auth1 == auth2


@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected_url"),
Expand Down
7 changes: 7 additions & 0 deletions tests/site_downloaders/test_redgifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
from bdfr.site_downloaders.redgifs import Redgifs


@pytest.mark.online
def test_auth_cache():
auth1 = Redgifs._get_auth_token()
auth2 = Redgifs._get_auth_token()
assert auth1 == auth2


@pytest.mark.parametrize(
("test_url", "expected"),
(
Expand Down