From 68c6062ceb8bd80dd94758e2f54edb3dd3c44553 Mon Sep 17 00:00:00 2001 From: Ujjwal Verma Date: Thu, 4 Apr 2019 00:56:57 +0530 Subject: [PATCH] Remove usage of urlembed service --- handlers/linksave_handler.py | 8 ++++++-- util/savelinkhelper.py | 24 +++++++++++------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/handlers/linksave_handler.py b/handlers/linksave_handler.py index e8d6fe2..63d4afc 100644 --- a/handlers/linksave_handler.py +++ b/handlers/linksave_handler.py @@ -36,7 +36,12 @@ def execute(cls, slack_wrapper, args, timestamp, channel_id, user_id, user_is_ad slack_wrapper.post_message(channel_id, "Save Link failed: Unable to extract URL", timestamp) return - url_data = unfurl(url.group()) + try: + url_data = unfurl(url.group()) + except requests.exceptions.Timeout as e: + slack_wrapper.post_message(channel_id, "Save Link failed: Request timed out", timestamp) + log.error(e) + return data = { "options[staticman-token]": LINKSAVE_CONFIG["staticman-token"], @@ -44,7 +49,6 @@ def execute(cls, slack_wrapper, args, timestamp, channel_id, user_id, user_is_ad "fields[link]": url.group(), "fields[excerpt]": url_data["desc"], "fields[category]": args[0], - "fields[content]": url_data["content"], "fields[header][overlay_image]": url_data["img"], "fields[user]": profile_details["display_name"] or profile_details["real_name"] } diff --git a/util/savelinkhelper.py b/util/savelinkhelper.py index 6ad29f0..38a94c7 100644 --- a/util/savelinkhelper.py +++ b/util/savelinkhelper.py @@ -10,7 +10,7 @@ def get_title(soup: BeautifulSoup): title = soup.find("meta", property=re.compile("title", re.I)) or \ - soup.find("meta", attrs={"name": re.compile("title", re.I)}) + soup.find("meta", attrs={"name": re.compile("title", re.I)}) if title: title = title["content"] else: @@ -22,32 +22,30 @@ def get_title(soup: BeautifulSoup): def get_desc(soup: BeautifulSoup): desc = soup.find("meta", property=re.compile("desc", re.I)) or \ - soup.find("meta", attrs={"name": re.compile("desc", re.I)}) + soup.find("meta", attrs={"name": re.compile("desc", re.I)}) if desc: return desc["content"].strip() return "" -def get_content(url: str): - resp = requests.get("https://urlembed.com/json/url/{}".format(url)) - if not resp.ok: - return "", "" - resp = resp.json() - content = BeautifulSoup(resp["content"], "html.parser").prettify() - return content, resp["url"] # resp["url"] is image's URL +def get_img(soup: BeautifulSoup): + img = soup.find("meta", property=re.compile("image", re.I)) or \ + soup.find("meta", attrs={"name": re.compile("image", re.I)}) + if img: + return img["content"].strip() + + return "" def unfurl(url: str): - resp = requests.get(url).text + resp = requests.get(url, timeout=15).text soup = BeautifulSoup(resp, "html.parser") - content, img = get_content(url) details = { "title": get_title(soup), "desc": get_desc(soup), - "content": content, - "img": img + "img": get_img(soup) } return details