Skip to content

Commit

Permalink
Adding canonical_url for all scrapers & associated updates: batch two (
Browse files Browse the repository at this point in the history
  • Loading branch information
jknndy authored Oct 11, 2023
1 parent 6d3694a commit 6383f3b
Show file tree
Hide file tree
Showing 120 changed files with 65,493 additions and 68,351 deletions.
2 changes: 0 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ Scrapers available for:
- `https://forktospoon.com/ <https://forktospoon.com/>`_
- `https://fredriksfika.allas.se/ <https://fredriksfika.allas.se/>`_
- `https://www.750g.com <https://www.750g.com>`_
- `https://geniuskitchen.com/ <https://geniuskitchen.com>`_
- `https://www.gesund-aktiv.com/ <https://www.gesund-aktiv.com>`_
- `https://giallozafferano.it/ <https://giallozafferano.it>`_
- `https://gimmesomeoven.com/ <https://www.gimmesomeoven.com/>`_
Expand All @@ -182,7 +181,6 @@ Scrapers available for:
- `https://www.hassanchef.com/ <https://www.hassanchef.com/>`_
- `https://headbangerskitchen.com/ <https://www.headbangerskitchen.com/>`_
- `https://www.heb.com/ <https://www.heb.com/recipe/landing>`_
- `https://heinzbrasil.com.br/ <https://heinzbrasil.com.br>`_
- `https://hellofresh.com/ <https://hellofresh.com>`_
- `https://hellofresh.co.uk/ <https://hellofresh.co.uk>`_
- `https://www.hellofresh.de/ <https://www.hellofresh.de/>`_
Expand Down
4 changes: 0 additions & 4 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@
from .forktospoon import ForkToSpoon
from .franzoesischkochen import FranzoesischKochen
from .g750g import G750g
from .geniuskitchen import GeniusKitchen
from .gesundaktiv import GesundAktiv
from .giallozafferano import GialloZafferano
from .gimmesomeoven import GimmeSomeOven
Expand All @@ -104,7 +103,6 @@
from .hassanchef import HassanChef
from .headbangerskitchen import HeadbangersKitchen
from .heb import HEB
from .heinzbrasil import HeinzBrasil
from .hellofresh import HelloFresh
from .herseyland import HerseyLand
from .homechef import HomeChef
Expand Down Expand Up @@ -370,7 +368,6 @@
ForksOverKnives.host(): ForksOverKnives,
FranzoesischKochen.host(): FranzoesischKochen,
G750g.host(): G750g,
GeniusKitchen.host(): GeniusKitchen,
GialloZafferano.host(): GialloZafferano,
GimmeSomeOven.host(): GimmeSomeOven,
Globo.host(): Globo,
Expand All @@ -386,7 +383,6 @@
HandleTheHeat.host(): HandleTheHeat,
HassanChef.host(): HassanChef,
HeadbangersKitchen.host(): HeadbangersKitchen,
HeinzBrasil.host(): HeinzBrasil,
HelloFresh.host(): HelloFresh,
HelloFresh.host(domain="at"): HelloFresh,
HelloFresh.host(domain="be"): HelloFresh,
Expand Down
26 changes: 0 additions & 26 deletions recipe_scrapers/geniuskitchen.py

This file was deleted.

3 changes: 0 additions & 3 deletions recipe_scrapers/greatbritishchefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ def title(self):
def total_time(self):
return self.schema.total_time()

def yields(self):
return self.schema.yields()

def image(self):
return self.schema.image()

Expand Down
23 changes: 13 additions & 10 deletions recipe_scrapers/heb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,30 @@ def host(cls, domain="com"):
return f"heb.{domain}"

def title(self):
return self.soup.find("h1", {"class": "title"}).get_text()
title_tag = self.soup.find("h1", {"data-qe-id": "recipeTitle"})
return title_tag.get_text()

def total_time(self):
minutes_tag = self.soup.find("div", {"itemprop": "totalTime"})
return get_minutes(minutes_tag.parent.get_text())
total_time_tag = self.soup.find("span", {"data-qe-id": "recipeTotalTime"})
return get_minutes(total_time_tag.get_text())

def yields(self):
yields_tag = self.soup.find("div", {"itemprop": "recipeYield"})
return get_yields(yields_tag.parent.get_text())
yields_tag = self.soup.find("p", {"data-qe-id": "recipeServingSize"})
return get_yields(yields_tag.get_text())

def ingredients(self):
ingredients_container = self.soup.find(class_="ingredientswrapper")
ingredients = ingredients_container.findAll("div", {"class": "recipestepstxt"})
ingredients_container = self.soup.find(
"div", {"data-qe-id": "recipeIngredientsContainer"}
)
ingredients = ingredients_container.findAll("li")

return [normalize_string(ingredient.get_text()) for ingredient in ingredients]

def _instructions_list(self):
instructions_container = self.soup.find("div", {"class": "instructions"})
instructions = instructions_container.findAll(
"span", {"class": "instructiontxt"}
instructions_container = self.soup.find(
"div", {"data-qe-id": "recipeInstructionsContainer"}
)
instructions = instructions_container.findAll("li")
return [
normalize_string(instruction.get_text()) for instruction in instructions
]
Expand Down
36 changes: 0 additions & 36 deletions recipe_scrapers/heinzbrasil.py

This file was deleted.

4 changes: 1 addition & 3 deletions recipe_scrapers/joyfoodsunshine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def host(cls):
return "joyfoodsunshine.com"

def author(self):
return self.soup.find("span", {"class": "entry-author-name"}).get_text()
return self.schema.author()

def title(self):
return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text()
Expand All @@ -26,8 +26,6 @@ def yields(self):
)
)

return self.schema.ingredients()

def image(self):
# span class = wprm-recipe-image find src
return self.schema.image()
Expand Down
3 changes: 2 additions & 1 deletion recipe_scrapers/latelierderoxane.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def title(self):

def description(self):
div = self.soup.find("div", {"class": "bloc_chapeau bloc_blog"})
return div.find("p").get_text()
cleaned_description = div.find("p").get_text()
return normalize_string(cleaned_description)

def total_time(self):
return get_minutes(self.get_bloc_temps_value_by_index(0))
Expand Down
4 changes: 3 additions & 1 deletion recipe_scrapers/lecker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import SchemaOrgException
from ._utils import normalize_string


class Lecker(AbstractScraper):
Expand Down Expand Up @@ -85,4 +86,5 @@ def cuisine(self):
return None

def description(self):
return self.schema.description()
cleaned_description = self.schema.description()
return normalize_string(cleaned_description)
4,240 changes: 0 additions & 4,240 deletions tests/test_data/geniuskitchen.testhtml

This file was deleted.

Loading

0 comments on commit 6383f3b

Please sign in to comment.