Skip to content Skip to sidebar Skip to footer

How To Scrape Website If It Has Load More Button To Load More Content On The Page?

from selenium import webdriver import time driver = webdriver.Chrome(executable_path=r'C:\Users\gkhat\Downloads\chromedriver.exe') driver.get('https://www.allrecipes.com/recipes/2

Solution 1:

I tried below code for that. It works, but I am not sure if this is the best way to do it. FYI I handled those pop-ups for email manually. You need to find a way to handle them.

from selenium import webdriver
import time
from selenium.common.exceptions import StaleElementReferenceException

driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://www.allrecipes.com/recipes/233/world-cuisine/asian/indian/")
receipes = driver.find_elements_by_class_name("card__detailsContainer")
for rec in receipes:
    name = rec.find_element_by_tag_name("h3").get_attribute("innerText")
    print(name)
loadmore = driver.find_element_by_id("category-page-list-related-load-more-button")
j = 0try:
    while loadmore.is_displayed():
        loadmore.click()
        time.sleep(5)
        lrec = driver.find_elements_by_class_name("recipeCard__detailsContainer")
        newlist = lrec[j:]
        for rec in newlist:
            name = rec.find_element_by_tag_name("h3").get_attribute("innerText")
            print(name)
        j = len(lrec)+1
        time.sleep(5)
except StaleElementReferenceException:
    pass
driver.quit()

Solution 2:

Actually there is a json that returns the data. However the json returns it in html, so just need to parse that.

Note: You can change the chunk size so you can get more than 24 items per "page"

import requests
from bs4 import BeautifulSoup

size = 24
page = 0

hasNext = Truewhile hasNext == True:
    page +=1print('\tPage: %s' %page)
    url = 'https://www.allrecipes.com/element-api/content-proxy/aggregate-load-more?sourceFilter%5B%5D=alrcom&id=cms%2Fonecms_posts_alrcom_2007692&excludeIds%5B%5D=cms%2Fallrecipes_recipe_alrcom_142967&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_231026&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_247233&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_246179&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_256599&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_247204&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_34591&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_245131&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_220560&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_212721&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_236563&excludeIds%5B%5D=cms%2Fallrecipes_recipe_alrcom_14565&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8189766&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8188886&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8189135&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_2052087&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_7986932&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_2040338&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_280310&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_142967&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_14565&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_228957&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_46822&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_72349&page={page}&orderBy=Popularity30Days&docTypeFilter%5B%5D=content-type-recipe&docTypeFilter%5B%5D=content-type-gallery&size={size}&pagesize={size}&x-ssst=iTv629LHnNxfbQ1iVslBTZJTH69zVWEa&variant=food'.format(size=size, page=page)
    jsonData = requests.get(url).json()
    
    hasNext = jsonData['hasNext']

    soup = BeautifulSoup(jsonData['html'], 'html.parser')
    cardTitles = soup.find_all('h3',{'class':'recipeCard__title'})
    for title in cardTitles:
        print(title.text.strip())
        

Post a Comment for "How To Scrape Website If It Has Load More Button To Load More Content On The Page?"