From c6c6142e5162ee7998f1c21f643488304fcb6ac5 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 16:04:16 +0900 Subject: [PATCH 1/9] =?UTF-8?q?220=EB=8F=99=20=EC=8B=9D=EB=8B=B9=20?= =?UTF-8?q?=EC=A0=84=ED=99=94=EB=B2=88=ED=98=B8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crawlers/snuco_crawler.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crawlers/snuco_crawler.py b/crawlers/snuco_crawler.py index a66816e..ad9035b 100644 --- a/crawlers/snuco_crawler.py +++ b/crawlers/snuco_crawler.py @@ -1,15 +1,16 @@ import asyncio import datetime import re + from pytz import timezone from crawlers.base_crawler import ( + FindParenthesisHash, + FindPrice, + Meal, MealNormalizer, RestaurantCrawler, - Meal, text_normalizer, - FindPrice, - FindParenthesisHash, ) @@ -77,6 +78,7 @@ class SnucoRestaurantCrawler(RestaurantCrawler): "8805545": "3식당", "8801939": "302동식당", "8898955": "301동식당", + "8871123": "220동식당", } except_restaurant_list = ["기숙사식당"] # snudorm에서 처리 From 3f166fa1486954bad6f25578edbbffbb68592d97 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 16:21:16 +0900 Subject: [PATCH 2/9] Remove dry-run argument in black command --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 516c4e8..ee172f3 100644 --- a/Makefile +++ b/Makefile @@ -5,5 +5,5 @@ default: .PHONY: lint lint: - black --check . + black . pylint --recursive=yes . From e892bc8558542c80dede868150b48b088a289ab2 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 16:21:48 +0900 Subject: [PATCH 3/9] Add http exception handling logic for vet restaurant --- crawlers/base_crawler.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/crawlers/base_crawler.py b/crawlers/base_crawler.py index fa34c24..fae00a6 100644 --- a/crawlers/base_crawler.py +++ b/crawlers/base_crawler.py @@ -1,11 +1,12 @@ -from abc import ABCMeta, abstractmethod -import re import datetime -from bs4 import BeautifulSoup -from pytz import timezone -import urllib3 import json +import re +from abc import ABCMeta, abstractmethod + import aiohttp +import urllib3 +from bs4 import BeautifulSoup +from pytz import timezone def text_normalizer(text, only_letters=False): @@ -189,15 +190,20 @@ async def run(self, url=None, **kwargs): urllib3.disable_warnings() if url is None: url = self.url - async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session: - async with session.get(url) as response: - try: + try: + async with aiohttp.ClientSession( + headers=self.headers, connector=aiohttp.TCPConnector(ssl=False) + ) as session: + async with session.get(url) as response: + if response.status != 200: + print(f"Failed to fetch {url}: Status code {response.status}") + return html = await response.read() # html = await response.text() soup = BeautifulSoup(html, "html.parser") self.crawl(soup, **kwargs) - except Exception as e: - print(f"Error in Run: {str(e)}") + except Exception as e: + print(f"Error in Run: {str(e)}") def normalize(self, meal, **kwargs): for normalizer_cls in self.normalizer_classes: From dc0cda0d53d6ad62682827ab5fed2105f8a62ab3 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 17:12:21 +0900 Subject: [PATCH 4/9] fix: snuco base url --- crawlers/snuco_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawlers/snuco_crawler.py b/crawlers/snuco_crawler.py index ad9035b..e5af4ca 100644 --- a/crawlers/snuco_crawler.py +++ b/crawlers/snuco_crawler.py @@ -49,7 +49,7 @@ def normalize(self, meal, **kwargs): class SnucoRestaurantCrawler(RestaurantCrawler): - url = "https://snuco.snu.ac.kr/ko/foodmenu" + url = "https://snuco.snu.ac.kr/ko/foodmenu/" normalizer_classes = [ FindPrice, FindParenthesisHash, From d387982dc252439323d3fb7d344a36ea3ca181f4 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 17:12:38 +0900 Subject: [PATCH 5/9] Add url debugging msg --- crawlers/base_crawler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crawlers/base_crawler.py b/crawlers/base_crawler.py index fae00a6..644131a 100644 --- a/crawlers/base_crawler.py +++ b/crawlers/base_crawler.py @@ -192,7 +192,8 @@ async def run(self, url=None, **kwargs): url = self.url try: async with aiohttp.ClientSession( - headers=self.headers, connector=aiohttp.TCPConnector(ssl=False) + headers=self.headers, + connector=aiohttp.TCPConnector(ssl=False), ) as session: async with session.get(url) as response: if response.status != 200: @@ -204,6 +205,7 @@ async def run(self, url=None, **kwargs): self.crawl(soup, **kwargs) except Exception as e: print(f"Error in Run: {str(e)}") + print(f"URL: {url}") def normalize(self, meal, **kwargs): for normalizer_cls in self.normalizer_classes: From 94360a69ef74dc013eb8855841792d75624dc958 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 17:13:19 +0900 Subject: [PATCH 6/9] Add error msg in slack message for fail alert --- handler.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/handler.py b/handler.py index b8cc7f4..ef8eb0a 100644 --- a/handler.py +++ b/handler.py @@ -1,20 +1,22 @@ -import pymysql -import os +import argparse +import asyncio import datetime -from pytz import timezone +import os from itertools import compress -import asyncio -import argparse + +import pymysql +from pytz import timezone + from crawlers.base_crawler import text_normalizer -from crawlers.vet_crawler import VetRestaurantCrawler -from crawlers.snudorm_crawler import SnudormRestaurantCrawler from crawlers.snuco_crawler import SnucoRestaurantCrawler +from crawlers.snudorm_crawler import SnudormRestaurantCrawler +from crawlers.vet_crawler import VetRestaurantCrawler from slack import ( - send_new_restaurants_message, + _send_slack_message, send_deleted_menus_message, - send_new_menus_message, send_edited_menus_message, - _send_slack_message, + send_new_menus_message, + send_new_restaurants_message, ) @@ -209,7 +211,7 @@ def crawl(event, context): except Exception as e: siksha_db.rollback() print(e) - _send_slack_message("Crawling has been failed") + _send_slack_message(f"Crawling has been failed: {str(e)}") return "Crawling has been failed" finally: cursor.close() From 02178a98d2a849dbe8056af9f0d820ebed7bfa86 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 17:13:33 +0900 Subject: [PATCH 7/9] Fix slack channel --- slack.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/slack.py b/slack.py index 179b399..7714860 100644 --- a/slack.py +++ b/slack.py @@ -1,14 +1,19 @@ -import requests import os +import requests + def _send_slack_message(message: str): slack_token = os.environ.get("SLACK_TOKEN") - if not slack_token: - return - body = {"channel": slack_token, "text": message} - headers = {"Authorization": f'Bearer {os.environ["SLACK_TOKEN"]}'} - requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100) + slack_channel = os.environ["SLACK_CHANNEL"] + body = {"channel": slack_channel, "text": message} + headers = {"Authorization": f"Bearer {slack_token}"} + try: + res = requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100) + res.raise_for_status() + except Exception as e: + print(f"Failed to send Slack message: {str(e)}") + print(f"Response: {e.response.text if e.response else 'No response'}") def send_deleted_menus_message(menus: list): From 28faf39d20e953a086433bae0275ab84a9382580 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 17:19:33 +0900 Subject: [PATCH 8/9] fix: snuco baseurl --- crawlers/snuco_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawlers/snuco_crawler.py b/crawlers/snuco_crawler.py index e5af4ca..a5d2ad3 100644 --- a/crawlers/snuco_crawler.py +++ b/crawlers/snuco_crawler.py @@ -49,7 +49,7 @@ def normalize(self, meal, **kwargs): class SnucoRestaurantCrawler(RestaurantCrawler): - url = "https://snuco.snu.ac.kr/ko/foodmenu/" + url = "https://snuco.snu.ac.kr/foodmenu/" normalizer_classes = [ FindPrice, FindParenthesisHash, From 1400bf2ca2197d30695c914dd9a3561a74ca9945 Mon Sep 17 00:00:00 2001 From: GoGiants1 Date: Mon, 29 Jul 2024 17:52:22 +0900 Subject: [PATCH 9/9] add logic for checking slack token --- slack.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/slack.py b/slack.py index 7714860..cfe5b40 100644 --- a/slack.py +++ b/slack.py @@ -6,6 +6,9 @@ def _send_slack_message(message: str): slack_token = os.environ.get("SLACK_TOKEN") slack_channel = os.environ["SLACK_CHANNEL"] + if not slack_token: + print("No Slack token provided. Skipping sending message.") + return body = {"channel": slack_channel, "text": message} headers = {"Authorization": f"Bearer {slack_token}"} try: