Skip to content

Instantly share code, notes, and snippets.

@LCPallares
Last active August 5, 2024 22:24
Show Gist options
  • Save LCPallares/52f1bf4cfa44ae8458484ed1579d5c99 to your computer and use it in GitHub Desktop.
Save LCPallares/52f1bf4cfa44ae8458484ed1579d5c99 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime
def scrape_amazon_bestsellers():
url = "https://www.amazon.com/best-sellers-books-Amazon/zgbs/books/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
'''
with open('amazon-bestsellers-scraper.html', 'r') as file:
html = file.read()
soup = BeautifulSoup(html, 'html.parser')
'''
soup = BeautifulSoup(requests.content, 'html.parser')
books = []
for item in soup.find_all("div", id="gridItemRoot"):
rank = item.find("span", class_="zg-bdg-text").text.strip().replace("#", "")
title = item.find_all("a", class_="a-link-normal")[1].text.strip()
author = item.find("div", class_="a-row a-size-small").text.strip()
score = item.find("span", class_="a-icon-alt")
score = score.text.split(" ")[0] if score else "0.0"
price = item.find("span", class_="_cDEzb_p13n-sc-price_3mJ9Z")
price = price.text[1:] if price else "0.0"
type_cover = item.find("span", class_="a-size-small a-color-secondary a-text-normal").text
numbers_reviews = item.find("span", class_="a-size-small").text.replace(",", "")
numbers_reviews = int(numbers_reviews) if numbers_reviews.isdigit() else 0
books.append({
"rank": rank,
"title": title,
"author": author,
"price": float(price),
"score": float(score),
"type_cover": type_cover,
"numbers_reviews": numbers_reviews,
"date_scraped": datetime.now().strftime("%Y-%m-%d")
})
print(books)
return books
def save_to_csv(books, filename):
keys = books[0].keys()
with open(filename, 'w', newline='', encoding='utf-8') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(books)
if __name__ == "__main__":
bestsellers = scrape_amazon_bestsellers()
save_to_csv(bestsellers, "amazon_bestsellers.csv")
print(f"Scraped {len(bestsellers)} books and saved to amazon_bestsellers.csv")
import plotly.io as pio
# Asumiendo que 'fig' es tu gráfico de Plotly
html_string = pio.to_html(fig, full_html=False, include_plotlyjs='cdn')
# Opcional: guardar en un archivo
with open('mi_grafico.html', 'w') as f:
f.write(html_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment