In [12]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

V1: Take 5 recipes from the same blog. Aggregate ingredients (but not combine)

In [49]:
df = pd.read_csv('minimalistbaker_links.csv')
df = pd.read_csv('skinnytaste_links.csv')
df = pd.read_csv('halfbakedharvest_links.csv')

Unfortunately, the below doesn't work for HBH, so here is a workaround

In [125]:
# ===============================================
# HBH WORKAROUND
# Also doesn't work for pinch of yum
# NOPE url = "https://pinchofyum.com/30-minute-vegetarian-meatballs"
# ===============================================

import json

url = "https://www.halfbakedharvest.com/one-pan-four-cheese-sun-dried-tomato-and-spinach-drunken-pasta-bake/"
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
searched_word = 'wprmpuc_recipe_'
results = soup.body.find_all(string=re.compile('.*{0}.*'.format(searched_word)), recursive=True)
print('Found the word "{0}" {1} times'.format(searched_word, len(results)))
clean_result = results[0].split('=')[1].split(';')[0].strip()

info_dict = json.loads(clean_result)
# info_dict
Found the word "wprmpuc_recipe_" 1 times
In [127]:
# url = "https://www.gimmesomeoven.com/poblano-white-chicken-chili/"
# url = "https://www.skinnytaste.com/lentil-soup-with-butternut-and-kale/"
# url = "https://minimalistbaker.com/orange-cranberry-crisp-gluten-free-easy/"
# url = "https://www.twopeasandtheirpod.com/magic-cookie-bars/"
# url = "https://thedefineddish.com/miso-roasted-chicken/"
# url = "https://www.ambitiouskitchen.com/coconut-curried-brown-rice/"
# url = "https://whatsgabycooking.com/chicken-larb-bowls/"
url = "https://paleomg.com/paleo-blueberry-chai-muffins/"

r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
searched_word = 'Print'
results = soup.body.find_all(string=re.compile('.*{0}.*'.format(searched_word)), recursive=True)
print('Found the word "{0}" {1} times'.format(searched_word, len(results)))
results[0].parent['href']
Found the word "Print" 3 times
Out[127]:
'https://paleomg.com/paleo-blueberry-chai-muffins/print/72033/'
In [ ]: