Skip to content

Commit

Permalink
10 new sites (#1258)
Browse files Browse the repository at this point in the history
* abeautifulmess

* barefootinthepines & betterfoodguru

* beyondfrosting

* Update beyondfrosting.py

* bitsofcarey & brokenovenbaking

* cakemehometonight

* cambreabakes

* colleenchristensennutrition

* cookiesandcups

* Update cookiesandcups.py

* update abeautifulmess

* improvments

* Update recipe_scrapers/abeautifulmess.py

Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com>

* Update recipe_scrapers/barefootinthepines.py

Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com>

* fix brokenoven equipment

* bitsofcarey note improvements

* cookiesandcups author

* improve nutrients output

* Update barefootinthepines.py

---------

Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com>
  • Loading branch information
jknndy and jayaddison authored Oct 15, 2024
1 parent 803aee1 commit ca3081c
Show file tree
Hide file tree
Showing 54 changed files with 44,474 additions and 0 deletions.
10 changes: 10 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Scrapers available for:
- `https://101cookbooks.com/ <https://101cookbooks.com/>`_
- `https://15gram.be <https://15gram.be>`_
- `https://www.750g.com <https://www.750g.com>`_
- `https://abeautifulmess.com/ <https://abeautifulmess.com/>`_
- `https://aberlehome.com/ <https://aberlehome.com>`_
- `https://abuelascounter.com/ <https://abuelascounter.com>`_
- `https://www.acouplecooks.com <https://acouplecooks.com/>`_
Expand Down Expand Up @@ -108,14 +109,18 @@ Scrapers available for:
- `https://bakingmischief.com/ <https://bakingmischief.com/>`_
- `https://barefeetinthekitchen.com/ <https://barefeetinthekitchen.com/>`_
- `https://barefootcontessa.com/ <https://barefootcontessa.com>`_
- `https://barefootinthepines.com/ <https://barefootinthepines.com/>`_
- `https://bbc.com/ <https://bbc.com/food/recipes>`_
- `.co.uk <https://bbc.co.uk/food/recipes>`__
- `https://bbcgoodfood.com/ <https://bbcgoodfood.com>`_
- `https://bestrecipes.com.au/ <https://bestrecipes.com.au>`_
- `https://betterfoodguru.com/ <https://betterfoodguru.com/>`_
- `https://bettybossi.ch/ <https://bettybossi.ch>`_
- `https://bettycrocker.com/ <https://bettycrocker.com>`_
- `https://beyondfrosting.com/ <https://beyondfrosting.com/>`_
- `https://biancazapatka.com/ <https://biancazapatka.com>`_
- `https://bigoven.com/ <https://bigoven.com>`_
- `https://bitsofcarey.com/ <https://bitsofcarey.com/>`_
- `https://blueapron.com/ <https://blueapron.com>`_
- `https://bluejeanchef.com/ <https://bluejeanchef.com/>`_
- `https://www.bodybuilding.com/ <https://www.bodybuilding.com/>`_
Expand All @@ -125,8 +130,11 @@ Scrapers available for:
- `https://bowlofdelicious.com/ <https://bowlofdelicious.com/>`_
- `https://breadtopia.com/ <https://breadtopia.com/>`_
- `https://briceletbaklava.ch/ <https://briceletbaklava.ch/>`_
- `https://brokenovenbaking.com/ <https://brokenovenbaking.com/>`_
- `https://budgetbytes.com/ <https://budgetbytes.com>`_
- `https://cafedelites.com/ <https://cafedelites.com/>`_
- `https://cakemehometonight.com/ <https://cakemehometonight.com/>`_
- `https://cambreabakes.com/ <https://cambreabakes.com/>`_
- `https://carlsbadcravings.com/ <https://carlsbadcravings.com/>`_
- `https://castironketo.net/ <https://castironketo.net/>`_
- `https://cdkitchen.com/ <https://cdkitchen.com/>`_
Expand All @@ -136,10 +144,12 @@ Scrapers available for:
- `https://chefsavvy.com/ <https://chefsavvy.com/>`_
- `https://claudia.abril.com.br/ <https://claudia.abril.com.br>`_
- `https://closetcooking.com/ <https://closetcooking.com>`_
- `https://colleenchristensennutrition.com/ <https://colleenchristensennutrition.com/>`_
- `https://comidinhasdochef.com/ <https://comidinhasdochef.com/>`_
- `https://cook-talk.com/ <https://cook-talk.com/>`_
- `https://cookeatshare.com/ <https://cookeatshare.com/>`_
- `https://cookieandkate.com/ <https://cookieandkate.com/>`_
- `https://cookiesandcups.com/ <https://cookiesandcups.com/>`_
- `https://cooking.nytimes.com/ <https://cooking.nytimes.com>`_
- `https://cookingcircle.com/ <https://cookingcircle.com/>`_
- `https://cookinglight.com/ <https://cookinglight.com/>`_
Expand Down
20 changes: 20 additions & 0 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)
from ._factory import SchemaScraperFactory
from ._utils import get_host_name
from .abeautifulmess import ABeautifulMess
from .aberlehome import AberleHome
from .abril import Abril
from .abuelascounter import AbuelasCounter
Expand Down Expand Up @@ -62,13 +63,17 @@
from .bakingsense import BakingSense
from .barefeetinthekitchen import BarefeetInTheKitchen
from .barefootcontessa import BareFootContessa
from .barefootinthepines import BarefootInThePines
from .bbcfood import BBCFood
from .bbcgoodfood import BBCGoodFood
from .bestrecipes import BestRecipes
from .betterfoodguru import BetterFoodGuru
from .bettybossi import BettyBossi
from .bettycrocker import BettyCrocker
from .beyondfrosting import BeyondFrosting
from .biancazapatka import BiancaZapatka
from .bigoven import BigOven
from .bitsofcarey import BitsOfCarey
from .blueapron import BlueApron
from .bluejeanchef import BlueJeanChef
from .bodybuilding import Bodybuilding
Expand All @@ -77,8 +82,11 @@
from .bowlofdelicious import BowlOfDelicious
from .breadtopia import Breadtopia
from .briceletbaklava import BricelEtBaklava
from .brokenovenbaking import BrokenOvenBaking
from .budgetbytes import BudgetBytes
from .cafedelites import CafeDelites
from .cakemehometonight import CakeMeHomeTonight
from .cambreabakes import CambreaBakes
from .carlsbadcravings import CarlsBadCravings
from .castironketo import CastIronKeto
from .cdkitchen import CdKitchen
Expand All @@ -87,9 +95,11 @@
from .chefnini import Chefnini
from .chefsavvy import ChefSavvy
from .closetcooking import ClosetCooking
from .colleenchristensennutrition import ColleenChristensenNutrition
from .comidinhasdochef import ComidinhasDoChef
from .cookeatshare import CookEatShare
from .cookieandkate import CookieAndKate
from .cookiesandcups import CookiesAndCups
from .cookingcircle import CookingCircle
from .cookinglight import CookingLight
from .cookpad import CookPad
Expand Down Expand Up @@ -396,6 +406,7 @@
from .zenbelly import ZenBelly

SCRAPERS = {
ABeautifulMess.host(): ABeautifulMess,
ACoupleCooks.host(): ACoupleCooks,
AFlavorJournal.host(): AFlavorJournal,
ALittleBitYummy.host(): ALittleBitYummy,
Expand Down Expand Up @@ -429,11 +440,15 @@
BakingMischief.host(): BakingMischief,
BareFootContessa.host(): BareFootContessa,
BarefeetInTheKitchen.host(): BarefeetInTheKitchen,
BarefootInThePines.host(): BarefootInThePines,
BestRecipes.host(): BestRecipes,
BetterFoodGuru.host(): BetterFoodGuru,
BettyBossi.host(): BettyBossi,
BettyCrocker.host(): BettyCrocker,
BeyondFrosting.host(): BeyondFrosting,
BiancaZapatka.host(): BiancaZapatka,
BigOven.host(): BigOven,
BitsOfCarey.host(): BitsOfCarey,
BlueApron.host(): BlueApron,
BlueJeanChef.host(): BlueJeanChef,
Bodybuilding.host(): Bodybuilding,
Expand All @@ -442,8 +457,11 @@
BongEats.host(): BongEats,
Breadtopia.host(): Breadtopia,
BricelEtBaklava.host(): BricelEtBaklava,
BrokenOvenBaking.host(): BrokenOvenBaking,
BudgetBytes.host(): BudgetBytes,
CafeDelites.host(): CafeDelites,
CakeMeHomeTonight.host(): CakeMeHomeTonight,
CambreaBakes.host(): CambreaBakes,
CarlsBadCravings.host(): CarlsBadCravings,
CastIronKeto.host(): CastIronKeto,
CdKitchen.host(): CdKitchen,
Expand All @@ -452,11 +470,13 @@
Chefkoch.host(): Chefkoch,
Chefnini.host(): Chefnini,
ClosetCooking.host(): ClosetCooking,
ColleenChristensenNutrition.host(): ColleenChristensenNutrition,
ComidinhasDoChef.host(): ComidinhasDoChef,
CookEatShare.host(): CookEatShare,
CookPad.host(): CookPad,
CookTalk.host(): CookTalk,
CookieAndKate.host(): CookieAndKate,
CookiesAndCups.host(): CookiesAndCups,
CookingCircle.host(): CookingCircle,
CookingLight.host(): CookingLight,
CooksCountry.host(): CooksCountry,
Expand Down
28 changes: 28 additions & 0 deletions recipe_scrapers/abeautifulmess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class ABeautifulMess(AbstractScraper):
@classmethod
def host(cls):
return "abeautifulmess.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_container = self.soup.select_one(".wprm-recipe-equipment-container")
if not equipment_container:
return None

equipment_items = [
item.get_text()
for item in equipment_container.select(".wprm-recipe-equipment-name")
]
return get_equipment(equipment_items)
57 changes: 57 additions & 0 deletions recipe_scrapers/barefootinthepines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients


class BarefootInThePines(AbstractScraper):
@classmethod
def host(cls):
return "barefootinthepines.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".mv-create-ingredients h4",
".mv-create-ingredients ul li",
)

def nutrients(self):
nutrition_section = self.soup.select_one(".mv-create-nutrition-box")
if not nutrition_section:
return None

raw_nutrition_data = {
item.select_one(".mv-create-nutrition-label")
.get_text(strip=True)
.lower()
.rstrip(":"): item.get_text(strip=True)
.replace(
item.select_one(".mv-create-nutrition-label").get_text(strip=True), ""
)
.strip()
for item in nutrition_section.select(".mv-create-nutrition-item")
if item.select_one(".mv-create-nutrition-label")
}

nutrition_label_mapping = {
"calories": "calories",
"carbohydrates": "carbohydrateContent",
"cholesterol": "cholesterolContent",
"total fat": "fatContent",
"fiber": "fiberContent",
"protein": "proteinContent",
"saturated fat": "saturatedFatContent",
"serving size": "servingSize",
"sodium": "sodiumContent",
"sugar": "sugarContent",
"trans fat": "transFatContent",
"unsaturated fat": "unsaturatedFatContent",
}

standardized_nutrition_data = {
nutrition_label_mapping[custom_label]: value
for custom_label, value in raw_nutrition_data.items()
if custom_label in nutrition_label_mapping
}

return standardized_nutrition_data
16 changes: 16 additions & 0 deletions recipe_scrapers/betterfoodguru.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients


class BetterFoodGuru(AbstractScraper):
@classmethod
def host(cls):
return "betterfoodguru.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)
28 changes: 28 additions & 0 deletions recipe_scrapers/beyondfrosting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class BeyondFrosting(AbstractScraper):
@classmethod
def host(cls):
return "beyondfrosting.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".tasty-recipes-ingredients-body p strong",
".tasty-recipes-ingredients-body ul li",
)

def equipment(self):
equipment_items = self.soup.select(
".tasty-recipes-equipment .tasty-link-card a.tasty-link"
)
equipment_list = [
item.find_next("p").get_text(strip=True)
for item in equipment_items
if "affiliate link" not in item.find_next("p").get_text(strip=True).lower()
]
return get_equipment(equipment_list)
38 changes: 38 additions & 0 deletions recipe_scrapers/bitsofcarey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class BitsOfCarey(AbstractScraper):
@classmethod
def host(cls):
return "bitsofcarey.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_items = self.soup.select(
".wprm-recipe-equipment-container .wprm-recipe-equipment-item"
)
equipment_list = []

for item in equipment_items:
name_element = item.select_one(".wprm-recipe-equipment-name")
note = item.select_one(".wprm-recipe-equipment-notes")

name = name_element.get_text(strip=True)

if note:
note_text = note.get_text(strip=True)
name = name.replace(note_text, "").strip()
name += f" (note: {note_text})"

equipment_list.append(name)

return get_equipment(equipment_list)
28 changes: 28 additions & 0 deletions recipe_scrapers/brokenovenbaking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class BrokenOvenBaking(AbstractScraper):
@classmethod
def host(cls):
return "brokenovenbaking.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_items = self.soup.select(
".wprm-recipe-equipment-container .wprm-recipe-equipment-item"
)
equipment_list = [
item.select_one(".wprm-recipe-equipment-name").get_text()
for item in equipment_items
]

return get_equipment(equipment_list)
25 changes: 25 additions & 0 deletions recipe_scrapers/cakemehometonight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from ._abstract import AbstractScraper
from ._grouping_utils import group_ingredients
from ._utils import get_equipment


class CakeMeHomeTonight(AbstractScraper):
@classmethod
def host(cls):
return "cakemehometonight.com"

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".wprm-recipe-ingredient-group h4",
".wprm-recipe-ingredient",
)

def equipment(self):
equipment_items = self.soup.select(
".wprm-recipe-equipment-container .wprm-recipe-equipment-list .wprm-recipe-equipment-item .wprm-recipe-equipment-name"
)
equipment_list = [item.get_text(strip=True) for item in equipment_items]

return get_equipment(equipment_list)
Loading

0 comments on commit ca3081c

Please sign in to comment.