Skip to content

Instantly share code, notes, and snippets.

@LCPallares
Last active August 5, 2024 22:40
Show Gist options
  • Save LCPallares/2bd85d5da472655be267cc2956936f54 to your computer and use it in GitHub Desktop.
Save LCPallares/2bd85d5da472655be267cc2956936f54 to your computer and use it in GitHub Desktop.
analyst_amazon_bestsellers
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime
def scrape_amazon_bestsellers():
url = "https://www.amazon.com/best-sellers-books-Amazon/zgbs/books/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
'''
with open('amazon-bestsellers-scraper.html', 'r') as file:
html = file.read()
soup = BeautifulSoup(html, 'html.parser')
'''
soup = BeautifulSoup(requests.content, 'html.parser')
books = []
for item in soup.find_all("div", id="gridItemRoot"):
rank = item.find("span", class_="zg-bdg-text").text.strip().replace("#", "")
title = item.find_all("a", class_="a-link-normal")[1].text.strip()
author = item.find("div", class_="a-row a-size-small").text.strip()
score = item.find("span", class_="a-icon-alt")
score = score.text.split(" ")[0] if score else "0.0"
price = item.find("span", class_="_cDEzb_p13n-sc-price_3mJ9Z")
price = price.text[1:] if price else "0.0"
type_cover = item.find("span", class_="a-size-small a-color-secondary a-text-normal").text
numbers_reviews = item.find("span", class_="a-size-small").text.replace(",", "")
numbers_reviews = int(numbers_reviews) if numbers_reviews.isdigit() else 0
books.append({
"rank": rank,
"title": title,
"author": author,
"price": float(price),
"score": float(score),
"type_cover": type_cover,
"numbers_reviews": numbers_reviews,
"date_scraped": datetime.now().strftime("%Y-%m-%d")
})
print(books)
return books
def save_to_csv(books, filename):
keys = books[0].keys()
with open(filename, 'w', newline='', encoding='utf-8') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(books)
if __name__ == "__main__":
bestsellers = scrape_amazon_bestsellers()
save_to_csv(bestsellers, "amazon_bestsellers.csv")
print(f"Scraped {len(bestsellers)} books and saved to amazon_bestsellers.csv")
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment