How To Scrape Website If It Has Load More Button To Load More Content On The Page?

August 21, 2024 Post a Comment

from selenium import webdriver import time driver = webdriver.Chrome(executable_path=r'C:\Users\gkhat\Downloads\chromedriver.exe') driver.get('https://www.allrecipes.com/recipes/2

Solution 1:

I tried below code for that. It works, but I am not sure if this is the best way to do it. FYI I handled those pop-ups for email manually. You need to find a way to handle them.

from selenium import webdriver
import time
from selenium.common.exceptions import StaleElementReferenceException

driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://www.allrecipes.com/recipes/233/world-cuisine/asian/indian/")
receipes = driver.find_elements_by_class_name("card__detailsContainer")
for rec in receipes:
    name = rec.find_element_by_tag_name("h3").get_attribute("innerText")
    print(name)
loadmore = driver.find_element_by_id("category-page-list-related-load-more-button")
j = 0try:
    while loadmore.is_displayed():
        loadmore.click()
        time.sleep(5)
        lrec = driver.find_elements_by_class_name("recipeCard__detailsContainer")
        newlist = lrec[j:]
        for rec in newlist:
            name = rec.find_element_by_tag_name("h3").get_attribute("innerText")
            print(name)
        j = len(lrec)+1
        time.sleep(5)
except StaleElementReferenceException:
    pass
driver.quit()

Solution 2:

Actually there is a json that returns the data. However the json returns it in html, so just need to parse that.

Note: You can change the chunk size so you can get more than 24 items per "page"

import requests
from bs4 import BeautifulSoup

size = 24
page = 0

hasNext = Truewhile hasNext == True:
    page +=1print('\tPage: %s' %page)
    url = 'https://www.allrecipes.com/element-api/content-proxy/aggregate-load-more?sourceFilter%5B%5D=alrcom&id=cms%2Fonecms_posts_alrcom_2007692&excludeIds%5B%5D=cms%2Fallrecipes_recipe_alrcom_142967&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_231026&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_247233&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_246179&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_256599&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_247204&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_34591&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_245131&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_220560&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_212721&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_236563&excludeIds%5B%5D=cms%2Fallrecipes_recipe_alrcom_14565&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8189766&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8188886&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_8189135&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_2052087&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_7986932&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_2040338&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_280310&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_142967&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_14565&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_228957&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_46822&excludeIds%5B%5D=cms%2Fonecms_posts_alrcom_72349&page={page}&orderBy=Popularity30Days&docTypeFilter%5B%5D=content-type-recipe&docTypeFilter%5B%5D=content-type-gallery&size={size}&pagesize={size}&x-ssst=iTv629LHnNxfbQ1iVslBTZJTH69zVWEa&variant=food'.format(size=size, page=page)
    jsonData = requests.get(url).json()
    
    hasNext = jsonData['hasNext']

    soup = BeautifulSoup(jsonData['html'], 'html.parser')
    cardTitles = soup.find_all('h3',{'class':'recipeCard__title'})
    for title in cardTitles:
        print(title.text.strip())

Python Developer

How To Scrape Website If It Has Load More Button To Load More Content On The Page?

Solution 1:

Solution 2:

Post a Comment for "How To Scrape Website If It Has Load More Button To Load More Content On The Page?"