-
Notifications
You must be signed in to change notification settings - Fork 525
/
nytimes.py
24 lines (21 loc) · 1007 Bytes
/
nytimes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
class NYTimes(AbstractScraper):
@classmethod
def host(cls):
return "cooking.nytimes.com"
def ingredient_groups(self):
"""
The NYTimes website appears to auto generate it's CSS class names, which results in them ending
with a string a random characters. Matching the exact class name is likely to break fairly quickly
so instead we are going to match on a partial class name.
For example, h3[class*='ingredientgroup_name'] matches an h3 element with a class that contains the value
'ingredient_groupname' at least once anywhere in the element class attribute.
See https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors
"""
return group_ingredients(
self.ingredients(),
self.soup,
"h3[class*='ingredientgroup_name']",
"li[class*='ingredient_ingredient']",
)