Skip to content

Instantly share code, notes, and snippets.

@Dalboz
Last active February 19, 2024 19:10
Show Gist options
  • Save Dalboz/6574985f278914027a77ac592e1b6d79 to your computer and use it in GitHub Desktop.
Save Dalboz/6574985f278914027a77ac592e1b6d79 to your computer and use it in GitHub Desktop.
Scrapping de precios para productos del DIA
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Script para scrappear productos de los Supermercados DIA
# Necesita tener instalado el parser lxml: sudo apt-get install python-lxml
from bs4 import BeautifulSoup
from requests import get
import locale
import re
import sys
import argparse
def fetch_data(product_name):
url = f'https://www.dia.es/search?q={product_name}'
response = get(url).text
return response
def parse_data(response):
soup = BeautifulSoup(response, "lxml")
productos = soup.find_all("p", class_="search-product-card__product-name")
precios = soup.find_all("p", class_="search-product-card__active-price")
precios_ume = soup.find_all("p", class_="search-product-card__price-per-unit")
return productos, precios, precios_ume
def print_results(productos, precios, precios_ume):
print('Producto', ';', 'Precio unitario', ';', 'Moneda', ';', 'Precio UME', ';', 'Moneda', ';', 'Cantidad', ';', 'UME')
sep = ';'
for producto, precio, precio_ume in zip(productos, precios, precios_ume):
price_components = precio.text.split()
producto_str = producto.text
precio_ume_str = precio_ume.text.replace("(", "").replace(")", "")
ume = precio_ume_str.split()
unidad = ume[1].split("/")
cantidad = round(locale.atof(price_components[0]) / locale.atof(ume[0]), 4)
print(producto_str, sep, price_components[0], sep, price_components[1], sep, ume[0], sep, price_components[1], sep, cantidad, sep, unidad[1])
def main():
parser = argparse.ArgumentParser(description='Scrape product information from Supermercados DIA.')
parser.add_argument('product_name', help='Name of the product to search for')
args = parser.parse_args()
locale.setlocale(locale.LC_ALL, "es_ES.UTF-8")
response = fetch_data(args.product_name)
productos, precios, precios_ume = parse_data(response)
print_results(productos, precios, precios_ume)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment