Last active
February 19, 2024 19:10
-
-
Save Dalboz/6574985f278914027a77ac592e1b6d79 to your computer and use it in GitHub Desktop.
Scrapping de precios para productos del DIA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
# Script para scrappear productos de los Supermercados DIA | |
# Necesita tener instalado el parser lxml: sudo apt-get install python-lxml | |
from bs4 import BeautifulSoup | |
from requests import get | |
import locale | |
import re | |
import sys | |
import argparse | |
def fetch_data(product_name): | |
url = f'https://www.dia.es/search?q={product_name}' | |
response = get(url).text | |
return response | |
def parse_data(response): | |
soup = BeautifulSoup(response, "lxml") | |
productos = soup.find_all("p", class_="search-product-card__product-name") | |
precios = soup.find_all("p", class_="search-product-card__active-price") | |
precios_ume = soup.find_all("p", class_="search-product-card__price-per-unit") | |
return productos, precios, precios_ume | |
def print_results(productos, precios, precios_ume): | |
print('Producto', ';', 'Precio unitario', ';', 'Moneda', ';', 'Precio UME', ';', 'Moneda', ';', 'Cantidad', ';', 'UME') | |
sep = ';' | |
for producto, precio, precio_ume in zip(productos, precios, precios_ume): | |
price_components = precio.text.split() | |
producto_str = producto.text | |
precio_ume_str = precio_ume.text.replace("(", "").replace(")", "") | |
ume = precio_ume_str.split() | |
unidad = ume[1].split("/") | |
cantidad = round(locale.atof(price_components[0]) / locale.atof(ume[0]), 4) | |
print(producto_str, sep, price_components[0], sep, price_components[1], sep, ume[0], sep, price_components[1], sep, cantidad, sep, unidad[1]) | |
def main(): | |
parser = argparse.ArgumentParser(description='Scrape product information from Supermercados DIA.') | |
parser.add_argument('product_name', help='Name of the product to search for') | |
args = parser.parse_args() | |
locale.setlocale(locale.LC_ALL, "es_ES.UTF-8") | |
response = fetch_data(args.product_name) | |
productos, precios, precios_ume = parse_data(response) | |
print_results(productos, precios, precios_ume) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment