Skip to content

Commit

Permalink
Code refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
OperaVaria committed Jul 11, 2024
1 parent 0e452eb commit 4bcad74
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 22 deletions.
4 changes: 2 additions & 2 deletions collect_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def scrape(animal_name, query, subreddits, keys):
else:
post_list = reddit_list + pexels_list + pixabay_list + unsplash_list
# Pickle data.
pfile_path = Path(__file__).parents[0].resolve() / f"data/{animal_name}_data.p"
pfile_path = (Path(__file__).parents[0].resolve()).joinpath(f"data/{animal_name}_data.p")
with open(pfile_path, "wb") as file:
pickle.dump(post_list, file)
# Print number of items.
Expand All @@ -47,7 +47,7 @@ def scrape(animal_name, query, subreddits, keys):
def main():
"""Image scraping main function."""
# Load login information file.
keys_path = Path(__file__).parents[0].resolve() / "scraping/auth/keys.yaml"
keys_path = (Path(__file__).parents[0].resolve()).joinpath("scraping/auth/keys.yaml")
with open(keys_path, "r", encoding="utf-8") as keys_file:
keys = safe_load(keys_file)
# Collect capybara content.
Expand Down
8 changes: 3 additions & 5 deletions config/settings.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
""" Flask and Flask extensions configuration file. """

# Built-in imports:
# Imports:
from datetime import timedelta
from pathlib import Path

# CacheLib import:
from cachelib.file import FileSystemCache

# Minify settings:
bypass = ["robots_txt", "humans_txt", "sitemap_xml"]

# Session settings:
sessions_path = Path(__file__).parents[1].resolve() / "flask_session"
SESSIONS_PATH = (Path(__file__).parents[1].resolve()).joinpath("flask_session/")
SESSION_TYPE = "cachelib"
SESSION_CACHELIB = FileSystemCache(threshold=500, cache_dir=sessions_path)
SESSION_CACHELIB = FileSystemCache(threshold=500, cache_dir=SESSIONS_PATH)
SESSION_PERMANENT = True
PERMANENT_SESSION_LIFETIME = timedelta(hours=24)
SESSION_COOKIE_SECURE = True
Expand Down
13 changes: 9 additions & 4 deletions scraping/reddit_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@ def reddit_collect(sub_red_name, keys):
# Scrape submissions, create Post objects, append to list.
for submission in subreddit.hot(limit=30):
if submission.url.lower().endswith((".png", ".jpg", ".jpeg", ".gif")):
post = Post(submission.title, f"r/{submission.subreddit.display_name}", submission.author,
f"https://www.reddit.com/r/{submission.subreddit.display_name}/",
f"https://www.reddit.com/user/{submission.author}/",
f"https://www.reddit.com{submission.permalink}", submission.url)
post = Post(
submission.title,
f"r/{submission.subreddit.display_name}",
submission.author,
f"https://www.reddit.com/r/{submission.subreddit.display_name}/",
f"https://www.reddit.com/user/{submission.author}/",
f"https://www.reddit.com{submission.permalink}",
submission.url,
)
reddit_post_list.append(post)
# Return list.
return reddit_post_list
Expand Down
20 changes: 16 additions & 4 deletions scraping/unsplash_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@ def unsplash_collect(query, keys):
# API request.
url = "https://api.unsplash.com/search/photos"
headers = {"Accept-Version": "v1"}
params = {"client_id": keys["unsplash"]["access_key"], "query": query, "per_page": 20, "content_filter": "high"}
res = requests.get(url = url, headers = headers, params = params, timeout = 60)
params = {
"client_id": keys["unsplash"]["access_key"],
"query": query,
"per_page": 20,
"content_filter": "high",
}
res = requests.get(url=url, headers=headers, params=params, timeout=60)
# Error handling.
if res.ok is not True:
return res.raise_for_status()
Expand All @@ -25,8 +30,15 @@ def unsplash_collect(query, keys):
unsplash_list = []
# Scrape response, create Post objects, append to list.
for result in res_json_data["results"]:
result = Post(result["description"], "Unsplash", result["user"]["name"], "https://unsplash.com/",
result["user"]["links"]["html"], result["links"]["html"], result["urls"]["regular"])
result = Post(
result["description"],
"Unsplash",
result["user"]["name"],
"https://unsplash.com/",
result["user"]["links"]["html"],
result["links"]["html"],
result["urls"]["regular"],
)
unsplash_list.append(result)
# Return list.
return unsplash_list
Expand Down
15 changes: 8 additions & 7 deletions serotonin_flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@
__copyright__ = "Copyright © 2024, Csaba Latosinszky"

"""
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later
version.
This program is free software: you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation,
either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not,
see <https://www.gnu.org/licenses/>
You should have received a copy of the GNU General Public License along with this program.
If not, see <https://www.gnu.org/licenses/>
"""

# Built-in imports:
Expand Down

0 comments on commit 4bcad74

Please sign in to comment.