Skip to content

Commit

Permalink
Merge branch 'hhursev:main' into grouping-simplyquinoa
Browse files Browse the repository at this point in the history
  • Loading branch information
jknndy authored Oct 23, 2024
2 parents a093871 + 4f86ebe commit 1740798
Show file tree
Hide file tree
Showing 62 changed files with 22,045 additions and 3,417 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:

jobs:
coverage:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:

jobs:
linters:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v3
- name: Set up Python
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Check that the release commit can be found in a release branch
run: git branch main v14 --contains ${{ github.sha }} | egrep '.+'
- name: Set up Python
uses: actions/setup-python@v4
with:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/test_publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ on: workflow_dispatch

jobs:
test_publish:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v3
- name: Check that the release commit can be found in a release branch
run: git branch main v14 --contains ${{ github.sha }} | egrep '.+'
- name: Set up Python
uses: actions/setup-python@v4
with:
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/unittests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-13, windows-latest]
os: [ubuntu-24.04, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
include:
# set toxenv to workaround-darwin on macos (check tox.ini)
- toxenv: py
- os: macos-13
- os: macos-latest
toxenv: workaround-darwin
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -29,6 +29,7 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Run Tests
run: |
pip install tox
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ Scrapers available for:
- `https://kochbucher.com/ <https://kochbucher.com/>`_
- `http://koket.se/ <http://koket.se>`_
- `https://kristineskitchenblog.com/ <https://kristineskitchenblog.com>`_
- `https://krollskorner.com/ <https://krollskorner.com/>`_
- `https://kuchnia-domowa.pl/ <https://www.kuchnia-domowa.pl/>`_
- `https://kuchynalidla.sk/ <https://www.kuchynalidla.sk/>`_
- `https://www.kwestiasmaku.com/ <https://www.kwestiasmaku.com/>`_
Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@
from .kochbucher import Kochbucher
from .koket import Koket
from .kristineskitchenblog import KristinesKitchenBlog
from .krollskorner import KrollsKorner
from .kuchniadomowa import KuchniaDomowa
from .kuchynalidla import KuchynaLidla
from .kwestiasmaku import KwestiaSmaku
Expand Down Expand Up @@ -521,6 +522,7 @@
KitchenAidAustralia.host(): KitchenAidAustralia,
KitchenDreaming.host(): KitchenDreaming,
KristinesKitchenBlog.host(): KristinesKitchenBlog,
KrollsKorner.host(): KrollsKorner,
KuchynaLidla.host(): KuchynaLidla,
LittleSunnyKitchen.host(): LittleSunnyKitchen,
LeitesCulinaria.host(): LeitesCulinaria,
Expand Down
4 changes: 2 additions & 2 deletions recipe_scrapers/_schemaorg.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ def yields(self):
yield_data = self.data.get("recipeYield") or self.data.get("yield")
if yield_data and isinstance(yield_data, list):
yield_data = yield_data[0]
recipe_yield = str(yield_data)
return get_yields(recipe_yield)
if yield_data:
return get_yields(str(yield_data))

def image(self):
image = self.data.get("image")
Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def get_yields(element):
serve_text = element
else:
serve_text = element.get_text()
if not serve_text:
raise ValueError("Cannot extract yield information from empty string")

if SERVE_REGEX_TO.search(serve_text):
serve_text = serve_text.split(SERVE_REGEX_TO.split(serve_text, 2)[1], 2)[1]
Expand Down
35 changes: 35 additions & 0 deletions recipe_scrapers/krollskorner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class KrollsKorner(AbstractScraper):
@classmethod
def host(cls):
return "krollskorner.com"

def author(self):
author_tag = self.soup.select_one(
".wprm-recipe-details.wprm-recipe-author.wprm-block-text-normal a"
)
return author_tag.get_text(strip=True)

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_container = self.soup.select_one(".wprm-recipe-equipment-container")
if not equipment_container:
return None

equipment_items = [
item.select_one(".wprm-recipe-equipment-name").get_text(strip=True)
for item in equipment_container.select(".wprm-recipe-equipment-item")
if item.select_one(".wprm-recipe-equipment-name")
]
return get_equipment(equipment_items)
6 changes: 5 additions & 1 deletion recipe_scrapers/maangchi.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from ._abstract import AbstractScraper
from ._utils import normalize_string

Expand All @@ -8,7 +10,9 @@ def host(cls):
return "maangchi.com"

def ingredients(self):
before = self.soup.find("h2", string="Ingredients").find_all_next("li")
before = self.soup.find("h2", string=re.compile(r"Ingredients")).find_all_next(
"li"
)
after = self.soup.find("h2", string="Directions").find_all_previous("li")
list_before = [normalize_string(b.get_text()) for b in before]
list_after = [normalize_string(a.get_text()) for a in after]
Expand Down
9 changes: 9 additions & 0 deletions recipe_scrapers/thehappyfoodie.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import normalize_string


Expand Down Expand Up @@ -27,3 +28,11 @@ def ingredients(self):
)

return [normalize_string(f"{amount} {name}") for amount, name in ingredients]

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".heading th",
".hf-ingredients__single-group tr:not(.heading, .spacer)",
)
20 changes: 20 additions & 0 deletions recipe_scrapers/thekitchenmagpie.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import csv_to_tags, get_equipment


class TheKitchenMagPie(AbstractScraper):
@classmethod
def host(cls):
return "thekitchenmagpie.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_container = self.soup.select_one(".wprm-recipe-details-container")
if equipment_container:
equipment_text = equipment_container.find("dt", string="Equipment")
if equipment_text:
equipment_list = equipment_text.find_next_sibling("dd")
if equipment_list:
return get_equipment(csv_to_tags(equipment_list.text))
return None
13 changes: 13 additions & 0 deletions recipe_scrapers/thekitchn.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
from ._abstract import AbstractScraper
from ._exceptions import StaticValueException
from ._grouping_utils import group_ingredients


class TheKitchn(AbstractScraper):
@classmethod
def host(cls):
return "thekitchn.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".Recipe__ingredientsGroupName",
".Recipe__ingredient",
)

def site_name(self):
raise StaticValueException(return_value="The Kitchn")
9 changes: 9 additions & 0 deletions recipe_scrapers/themodernproper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
from ._abstract import AbstractScraper
from ._exceptions import ElementNotFoundInHtml
from ._grouping_utils import group_ingredients


class TheModernProper(AbstractScraper):
@classmethod
def host(cls):
return "themodernproper.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".recipe-ingredients__list-title",
".recipe-ingredients__item",
)

def nutrients(self):
container = self.schema.nutrients()
if not container:
Expand Down
9 changes: 9 additions & 0 deletions recipe_scrapers/therecipecritic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients


class Therecipecritic(AbstractScraper):
Expand All @@ -8,3 +9,11 @@ def host(cls):

def author(self):
return "The Recipe Critic"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)
9 changes: 9 additions & 0 deletions recipe_scrapers/thevintagemixer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ._abstract import AbstractScraper
from ._exceptions import StaticValueException
from ._grouping_utils import group_ingredients


class TheVintageMixer(AbstractScraper):
Expand All @@ -9,3 +10,11 @@ def host(cls):

def site_name(self):
raise StaticValueException(return_value="Vintage Mixer")

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)
9 changes: 9 additions & 0 deletions recipe_scrapers/thewoksoflife.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients


class Thewoksoflife(AbstractScraper):
@classmethod
def host(cls):
return "thewoksoflife.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)
14 changes: 14 additions & 0 deletions recipe_scrapers/thinlicious.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class Thinlicious(AbstractScraper):
@classmethod
def host(cls):
return "thinlicious.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-group-name",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_list = self.soup.select(".wprm-recipe-equipment-name")
return get_equipment(item.get_text() for item in equipment_list)
31 changes: 25 additions & 6 deletions recipe_scrapers/usapears.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ class USAPears(AbstractScraper):
def host(cls):
return "usapears.org"

def author(self):
author = self.schema.author()
if author:
return author

d1 = self.soup.find("meta", {"name": "twitter:data1", "content": True})
l1 = self.soup.find("meta", {"name": "twitter:label1", "content": "Written by"})
if d1 and l1:
return d1["content"]

def total_time(self):
total_time = 0
recipe_legends = self.soup.find_all("div", {"class": "recipe-legend"})
Expand Down Expand Up @@ -65,10 +75,19 @@ def nutrients(self):
return results

def ratings(self):
try:
ratings = self.schema.ratings()
if ratings > 0:
return ratings
except Exception:
pass
rating_elements = self.soup.find_all("p", {"class": "comment-rating"})
if not rating_elements:
return None

total_rating = 0
for element in rating_elements:
img = element.find("img", {"src": True})
if not img:
continue
match = re.search(r"(\d+)-star\.svg", img["src"])
if match:
total_rating += int(match.group(1))

if len(rating_elements) > 0:
return round(total_rating / len(rating_elements), 2)
return None
8 changes: 8 additions & 0 deletions tests/library/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
get_minutes,
get_nutrition_keys,
get_url_slug,
get_yields,
url_path_to_dict,
)

Expand Down Expand Up @@ -168,3 +169,10 @@ def test_get_nutrition_keys(self):
"cholesterolContent",
]
self.assertEqual((expected_order), (nutrition_keys))

def test_get_yields(self):
self.assertEqual("5 servings", get_yields("5"))

def test_get_yields_empty_string(self):
with self.assertRaises(ValueError):
get_yields("")
Loading

0 comments on commit 1740798

Please sign in to comment.