From 2f1eb5bf833921cc0329ba1c75d6974406da0da2 Mon Sep 17 00:00:00 2001 From: Hyungwook Choi <69342392+GoGiants1@users.noreply.github.com> Date: Mon, 29 Jul 2024 21:30:24 +0900 Subject: [PATCH] =?UTF-8?q?=08Fix:=20220=EB=8F=99=20=EC=8B=9D=EB=8B=B9=20I?= =?UTF-8?q?dentifier=20=EC=B6=94=EA=B0=80=20&=20=EC=8A=AC=EB=9E=99=20?= =?UTF-8?q?=EB=85=B8=ED=8B=B0=20=EB=B2=84=EA=B7=B8=20(#86)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 220동 식당 전화번호 추가 * Remove dry-run argument in black command * Add http exception handling logic for vet restaurant * fix: snuco base url * Add url debugging msg * Add error msg in slack message for fail alert * Fix slack channel * fix: snuco baseurl * add logic for checking slack token --- Makefile | 2 +- crawlers/base_crawler.py | 28 ++++++++++++++++++---------- crawlers/snuco_crawler.py | 10 ++++++---- handler.py | 24 +++++++++++++----------- slack.py | 16 ++++++++++++---- 5 files changed, 50 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 516c4e8..ee172f3 100644 --- a/Makefile +++ b/Makefile @@ -5,5 +5,5 @@ default: .PHONY: lint lint: - black --check . + black . pylint --recursive=yes . diff --git a/crawlers/base_crawler.py b/crawlers/base_crawler.py index fa34c24..644131a 100644 --- a/crawlers/base_crawler.py +++ b/crawlers/base_crawler.py @@ -1,11 +1,12 @@ -from abc import ABCMeta, abstractmethod -import re import datetime -from bs4 import BeautifulSoup -from pytz import timezone -import urllib3 import json +import re +from abc import ABCMeta, abstractmethod + import aiohttp +import urllib3 +from bs4 import BeautifulSoup +from pytz import timezone def text_normalizer(text, only_letters=False): @@ -189,15 +190,22 @@ async def run(self, url=None, **kwargs): urllib3.disable_warnings() if url is None: url = self.url - async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session: - async with session.get(url) as response: - try: + try: + async with aiohttp.ClientSession( + headers=self.headers, + connector=aiohttp.TCPConnector(ssl=False), + ) as session: + async with session.get(url) as response: + if response.status != 200: + print(f"Failed to fetch {url}: Status code {response.status}") + return html = await response.read() # html = await response.text() soup = BeautifulSoup(html, "html.parser") self.crawl(soup, **kwargs) - except Exception as e: - print(f"Error in Run: {str(e)}") + except Exception as e: + print(f"Error in Run: {str(e)}") + print(f"URL: {url}") def normalize(self, meal, **kwargs): for normalizer_cls in self.normalizer_classes: diff --git a/crawlers/snuco_crawler.py b/crawlers/snuco_crawler.py index a66816e..a5d2ad3 100644 --- a/crawlers/snuco_crawler.py +++ b/crawlers/snuco_crawler.py @@ -1,15 +1,16 @@ import asyncio import datetime import re + from pytz import timezone from crawlers.base_crawler import ( + FindParenthesisHash, + FindPrice, + Meal, MealNormalizer, RestaurantCrawler, - Meal, text_normalizer, - FindPrice, - FindParenthesisHash, ) @@ -48,7 +49,7 @@ def normalize(self, meal, **kwargs): class SnucoRestaurantCrawler(RestaurantCrawler): - url = "https://snuco.snu.ac.kr/ko/foodmenu" + url = "https://snuco.snu.ac.kr/foodmenu/" normalizer_classes = [ FindPrice, FindParenthesisHash, @@ -77,6 +78,7 @@ class SnucoRestaurantCrawler(RestaurantCrawler): "8805545": "3식당", "8801939": "302동식당", "8898955": "301동식당", + "8871123": "220동식당", } except_restaurant_list = ["기숙사식당"] # snudorm에서 처리 diff --git a/handler.py b/handler.py index b8cc7f4..ef8eb0a 100644 --- a/handler.py +++ b/handler.py @@ -1,20 +1,22 @@ -import pymysql -import os +import argparse +import asyncio import datetime -from pytz import timezone +import os from itertools import compress -import asyncio -import argparse + +import pymysql +from pytz import timezone + from crawlers.base_crawler import text_normalizer -from crawlers.vet_crawler import VetRestaurantCrawler -from crawlers.snudorm_crawler import SnudormRestaurantCrawler from crawlers.snuco_crawler import SnucoRestaurantCrawler +from crawlers.snudorm_crawler import SnudormRestaurantCrawler +from crawlers.vet_crawler import VetRestaurantCrawler from slack import ( - send_new_restaurants_message, + _send_slack_message, send_deleted_menus_message, - send_new_menus_message, send_edited_menus_message, - _send_slack_message, + send_new_menus_message, + send_new_restaurants_message, ) @@ -209,7 +211,7 @@ def crawl(event, context): except Exception as e: siksha_db.rollback() print(e) - _send_slack_message("Crawling has been failed") + _send_slack_message(f"Crawling has been failed: {str(e)}") return "Crawling has been failed" finally: cursor.close() diff --git a/slack.py b/slack.py index 179b399..cfe5b40 100644 --- a/slack.py +++ b/slack.py @@ -1,14 +1,22 @@ -import requests import os +import requests + def _send_slack_message(message: str): slack_token = os.environ.get("SLACK_TOKEN") + slack_channel = os.environ["SLACK_CHANNEL"] if not slack_token: + print("No Slack token provided. Skipping sending message.") return - body = {"channel": slack_token, "text": message} - headers = {"Authorization": f'Bearer {os.environ["SLACK_TOKEN"]}'} - requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100) + body = {"channel": slack_channel, "text": message} + headers = {"Authorization": f"Bearer {slack_token}"} + try: + res = requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100) + res.raise_for_status() + except Exception as e: + print(f"Failed to send Slack message: {str(e)}") + print(f"Response: {e.response.text if e.response else 'No response'}") def send_deleted_menus_message(menus: list):