How To Scrape Website If It Has Load More Button To Load More Content On The Page?
from selenium import webdriver import time driver = webdriver.Chrome(executable_path=r'C:\Users\gkhat\Downloads\chromedriver.exe') driver.get('https://www.allrecipes.com/recipes/2
Solution 1:
I tried below code for that. It works, but I am not sure if this is the best way to do it. FYI I handled those pop-ups for email
manually. You need to find a way to handle them.
from selenium import webdriver
import time
from selenium.common.exceptions import StaleElementReferenceException
driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://www.allrecipes.com/recipes/233/world-cuisine/asian/indian/")
receipes = driver.find_elements_by_class_name("card__detailsContainer")
for rec in receipes:
name = rec.find_element_by_tag_name("h3").get_attribute("innerText")
print(name)
loadmore = driver.find_element_by_id("category-page-list-related-load-more-button")
j = 0try:
while loadmore.is_displayed():
loadmore.click()
time.sleep(5)
lrec = driver.find_elements_by_class_name("recipeCard__detailsContainer")
newlist = lrec[j:]
for rec in newlist:
name = rec.find_element_by_tag_name("h3").get_attribute("innerText")
print(name)
j = len(lrec)+1
time.sleep(5)
except StaleElementReferenceException:
pass
driver.quit()
Solution 2:
Actually there is a json that returns the data. However the json returns it in html, so just need to parse that.
Note: You can change the chunk size so you can get more than 24 items per "page"
import requests
from bs4 import BeautifulSoup
size = 24
page = 0
hasNext = Truewhile hasNext == True:
page +=1print('\tPage: %s' %page)
url = 'https://www.allrecipes.com/element-api/content-proxy/aggregate-load-more?sourceFilter%5B%5D=alrcom&id=cms%2Fonecms_posts_alrcom_2007692&excludeIds%5B%5D=cms%2Fallrecipes_recipe_alrcom_142967&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_231026&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_247233&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_246179&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_256599&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_247204&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_34591&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_245131&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_220560&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_212721&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_236563&excludeIds%5B%5D=cms%2Fallrecipes_recipe_alrcom_14565&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8189766&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8188886&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8189135&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_2052087&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_7986932&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_2040338&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_280310&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_142967&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_14565&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_228957&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_46822&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_72349&page={page}&orderBy=Popularity30Days&docTypeFilter%5B%5D=content-type-recipe&docTypeFilter%5B%5D=content-type-gallery&size={size}&pagesize={size}&x-ssst=iTv629LHnNxfbQ1iVslBTZJTH69zVWEa&variant=food'.format(size=size, page=page)
jsonData = requests.get(url).json()
hasNext = jsonData['hasNext']
soup = BeautifulSoup(jsonData['html'], 'html.parser')
cardTitles = soup.find_all('h3',{'class':'recipeCard__title'})
for title in cardTitles:
print(title.text.strip())
Post a Comment for "How To Scrape Website If It Has Load More Button To Load More Content On The Page?"