diff --git a/crawlers/base_crawler.py b/crawlers/base_crawler.py index 82dd91a..f33f34b 100644 --- a/crawlers/base_crawler.py +++ b/crawlers/base_crawler.py @@ -42,7 +42,19 @@ class Meal: BR = "BR" LU = "LU" DN = "DN" - type_handler = {BR: BR, LU: LU, DN: DN, "아침": BR, "점심": LU, "저녁": DN, "중식": LU, "석식": DN} + type_handler = { + BR: BR, + LU: LU, + DN: DN, + "아침": BR, + "점심": LU, + "저녁": DN, + "중식": LU, + "석식": DN, + "breakfast": BR, + "lunch": LU, + "dinner": DN, + } def __init__(self, restaurant="", name="", date=None, type="", price=None, etc=None): self.set_restaurant(restaurant) diff --git a/crawlers/snuco_crawler.py b/crawlers/snuco_crawler.py index e7c80f7..a2a5f60 100644 --- a/crawlers/snuco_crawler.py +++ b/crawlers/snuco_crawler.py @@ -159,11 +159,7 @@ def crawl(self, soup, **kwargs): table = soup.find("table", {"class": "menu-table"}) if not table: return - ths = table.select("thead > tr > th") trs = table.tbody.find_all("tr", recursive=False) - types = [] - for th in ths[1:]: - types.append(th.text) for tr in trs: tds = tr.find_all("td", recursive=False) @@ -173,7 +169,11 @@ def crawl(self, soup, **kwargs): for except_restaurant_name in self.except_restaurant_name_list ): continue + for col_idx, td in enumerate(tds[1:]): + # meal type이 더 이상 ths에 포함되지 않고 tds 내부로 이동. + meal_type = td["class"][0] + # td.text에서 식단을 한번에 가져오는 것으로 변경 names = td.text.split("\n") restaurant = text_normalizer(row_restaurant) @@ -186,7 +186,7 @@ def crawl(self, soup, **kwargs): filtered_names = self.filter_menu_names(names) for name in filtered_names: - meal = Meal(restaurant, name, date, types[col_idx]) + meal = Meal(restaurant, name, date, meal_type) meal = self.normalize(meal) if self.is_meal_name_when_normalized(meal.name): @@ -210,6 +210,7 @@ def crawl(self, soup, **kwargs): if not next_line_merged and self.is_next_line_keyword(last_meal): last_meal = self.combine(last_meal, meal) next_line_merged = True + else: delimiter = self.get_multi_line_delimiter(last_meal) # delimiter에 해당하는 경우에는 여기 걸림 @@ -228,7 +229,7 @@ def crawl(self, soup, **kwargs): next_line_merged = False elif self.get_multi_line_delimiter(last_meal) is None: if meal.restaurant != restaurant: - meal = Meal(row_restaurant, name, date, types[col_idx]) + meal = Meal(row_restaurant, name, date, meal_type) meal = self.normalize(meal) restaurant = meal.restaurant self.found_meal(last_meal)