Skip to content

Commit

Permalink
Update flickr_scraper.py (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
glenn-jocher authored Jun 22, 2024
1 parent 71d636e commit 55527df
Showing 1 changed file with 10 additions and 25 deletions.
35 changes: 10 additions & 25 deletions flickr_scraper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Generated by Glenn Jocher (glenn.jocher@ultralytics.com) for https://github.com/ultralytics

import argparse
import os
import time
from pathlib import Path

from flickrapi import FlickrAPI

Expand All @@ -26,55 +26,40 @@ def get_urls(search="honeybees on flowers", n=10, download=False):
)

if download:
dir = os.getcwd() + os.sep + "images" + os.sep + search.replace(" ", "_") + os.sep # save directory
if not os.path.exists(dir):
os.makedirs(dir)
dir_path = Path.cwd() / "images" / search.replace(" ", "_")
dir_path.mkdir(parents=True, exist_ok=True)

urls = []
for i, photo in enumerate(photos):
if i <= n:
try:
# construct url https://www.flickr.com/services/api/misc.urls.html
url = photo.get("url_o") # original size
if url is None:
url = (
f"https://farm{photo.get('farm')}.staticflickr.com/{photo.get('server')}/"
f"{photo.get('id')}_{photo.get('secret')}_b.jpg"
)
url = f"https://farm{photo.get('farm')}.staticflickr.com/{photo.get('server')}/{photo.get('id')}_{photo.get('secret')}_b.jpg"

# download
if download:
download_uri(url, dir)
download_uri(url, dir_path)

urls.append(url)
print("%g/%g %s" % (i, n, url))
print(f"{i}/{n} {url}")
except Exception:
print("%g/%g error..." % (i, n))
print(f"{i}/{n} error...")

else:
# import pandas as pd
# urls = pd.Series(urls)
# urls.to_csv(search + "_urls.csv")
print("Done. (%.1fs)" % (time.time() - t) + ("\nAll images saved to %s" % dir if download else ""))
print(f"Done. ({time.time() - t:.1f}s)" + (f"\nAll images saved to {dir_path}" if download else ""))
break


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--search", nargs="+", default="honeybees on flowers", help="flickr search term")
# parser.add_argument('--search', type=str, default='honeybees on flowers', help='flickr search term')
parser.add_argument("--search", nargs="+", default=["honeybees on flowers"], help="flickr search term")
parser.add_argument("--n", type=int, default=10, help="number of images")
parser.add_argument("--download", action="store_true", help="download images")
opt = parser.parse_args()

print(f"nargs {opt.search}")
# Check key
help_url = "https://www.flickr.com/services/apps/create/apply"
assert key and secret, f"Flickr API key required in flickr_scraper.py L11-12. To apply visit {help_url}"

for search in opt.search:
get_urls(
search=search, # search term
n=opt.n, # max number of images
download=opt.download,
) # download images
get_urls(search=search, n=opt.n, download=opt.download)

0 comments on commit 55527df

Please sign in to comment.