Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: 220동 식당 Identifier 추가 & 슬랙 노티 버그 #86

Merged
merged 9 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ default:

.PHONY: lint
lint:
black --check .
black .
pylint --recursive=yes .
28 changes: 18 additions & 10 deletions crawlers/base_crawler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from abc import ABCMeta, abstractmethod
import re
import datetime
from bs4 import BeautifulSoup
from pytz import timezone
import urllib3
import json
import re
from abc import ABCMeta, abstractmethod

import aiohttp
import urllib3
from bs4 import BeautifulSoup
from pytz import timezone


def text_normalizer(text, only_letters=False):
Expand Down Expand Up @@ -189,15 +190,22 @@ async def run(self, url=None, **kwargs):
urllib3.disable_warnings()
if url is None:
url = self.url
async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.get(url) as response:
try:
try:
async with aiohttp.ClientSession(
headers=self.headers,
connector=aiohttp.TCPConnector(ssl=False),
) as session:
GoGiants1 marked this conversation as resolved.
Show resolved Hide resolved
async with session.get(url) as response:
if response.status != 200:
print(f"Failed to fetch {url}: Status code {response.status}")
return
html = await response.read()
# html = await response.text()
soup = BeautifulSoup(html, "html.parser")
self.crawl(soup, **kwargs)
except Exception as e:
print(f"Error in Run: {str(e)}")
except Exception as e:
print(f"Error in Run: {str(e)}")
print(f"URL: {url}")

def normalize(self, meal, **kwargs):
for normalizer_cls in self.normalizer_classes:
Expand Down
10 changes: 6 additions & 4 deletions crawlers/snuco_crawler.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import asyncio
import datetime
import re

from pytz import timezone

from crawlers.base_crawler import (
FindParenthesisHash,
FindPrice,
Meal,
MealNormalizer,
RestaurantCrawler,
Meal,
text_normalizer,
FindPrice,
FindParenthesisHash,
)


Expand Down Expand Up @@ -48,7 +49,7 @@ def normalize(self, meal, **kwargs):


class SnucoRestaurantCrawler(RestaurantCrawler):
url = "https://snuco.snu.ac.kr/ko/foodmenu"
url = "https://snuco.snu.ac.kr/foodmenu/"
normalizer_classes = [
FindPrice,
FindParenthesisHash,
Expand Down Expand Up @@ -77,6 +78,7 @@ class SnucoRestaurantCrawler(RestaurantCrawler):
"8805545": "3식당",
"8801939": "302동식당",
"8898955": "301동식당",
"8871123": "220동식당",
}
except_restaurant_list = ["기숙사식당"] # snudorm에서 처리

Expand Down
24 changes: 13 additions & 11 deletions handler.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import pymysql
import os
import argparse
import asyncio
import datetime
from pytz import timezone
import os
from itertools import compress
import asyncio
import argparse

import pymysql
from pytz import timezone

from crawlers.base_crawler import text_normalizer
from crawlers.vet_crawler import VetRestaurantCrawler
from crawlers.snudorm_crawler import SnudormRestaurantCrawler
from crawlers.snuco_crawler import SnucoRestaurantCrawler
from crawlers.snudorm_crawler import SnudormRestaurantCrawler
from crawlers.vet_crawler import VetRestaurantCrawler
from slack import (
send_new_restaurants_message,
_send_slack_message,
send_deleted_menus_message,
send_new_menus_message,
send_edited_menus_message,
_send_slack_message,
send_new_menus_message,
send_new_restaurants_message,
)


Expand Down Expand Up @@ -209,7 +211,7 @@ def crawl(event, context):
except Exception as e:
siksha_db.rollback()
print(e)
_send_slack_message("Crawling has been failed")
_send_slack_message(f"Crawling has been failed: {str(e)}")
return "Crawling has been failed"
finally:
cursor.close()
Expand Down
16 changes: 12 additions & 4 deletions slack.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
import requests
import os

import requests


def _send_slack_message(message: str):
slack_token = os.environ.get("SLACK_TOKEN")
slack_channel = os.environ["SLACK_CHANNEL"]
if not slack_token:
GoGiants1 marked this conversation as resolved.
Show resolved Hide resolved
print("No Slack token provided. Skipping sending message.")
return
body = {"channel": slack_token, "text": message}
headers = {"Authorization": f'Bearer {os.environ["SLACK_TOKEN"]}'}
requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100)
body = {"channel": slack_channel, "text": message}
headers = {"Authorization": f"Bearer {slack_token}"}
try:
res = requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100)
res.raise_for_status()
except Exception as e:
print(f"Failed to send Slack message: {str(e)}")
print(f"Response: {e.response.text if e.response else 'No response'}")


def send_deleted_menus_message(menus: list):
Expand Down
Loading