Skip to content

Instantly share code, notes, and snippets.

@RamonWill
Created July 28, 2020 11:15
Show Gist options
  • Save RamonWill/5572bb49916c87d2f3227bf6ff26f5a3 to your computer and use it in GitHub Desktop.
Save RamonWill/5572bb49916c87d2f3227bf6ff26f5a3 to your computer and use it in GitHub Desktop.
this is from video tutorial on how to create a website for morrisons.com using OOP concepts. It will also show you how to store this data to a CSV and Database
import sqlite3
from bs4 import BeautifulSoup
import requests as re
import pandas as pd
# This code is from my youtube video: https://www.youtube.com/watch?v=ii7CfpdRPYA
def main():
url = "https://groceries.morrisons.com/browse/fresh-176739"
page = MorrisonsWebpage(url)
page_products = page.get_products()
conn = sqlite3.connect("MorrionsProduct.db")
c = conn.cursor()
c.execute(""" CREATE TABLE IF NOT EXISTS products(
id INTEGER PRIMARY KEY,
name VARCHAR(250) NOT NULL,
url VARCHAR(250) NOT NULL,
price REAL,
rating REAL)""")
for product in page_products:
print(f"Inserting {product}...")
c.execute("""INSERT INTO products VALUES(
null, :name, :url, :price, :rating)""", vars(product))
conn.commit()
c.close()
conn.close()
df = pd.DataFrame([vars(product) for product in page_products])
df.to_csv("morrisons.csv")
class MorrisonsProduct(object):
def __init__(self, name, url, price, rating):
self.name = name
self.url = url
self.price = price
self.rating = rating
def __repr__(self):
return f"Product: {self.name}"
def __eq__(self, other):
return (isinstance(other, type(self))
and (self.name, self.url)==
(other.name, other.url))
def __hash__(self):
return hash((self.name, self.url))
class MorrisonsWebpage(object):
def __init__(self, url):
self._url = url
self._page_element = None
self.products = set()
if "https://groceries.morrisons.com/" not in url:
msg = "url must contain https://groceries.morrisons.com/"
raise AttributeError(msg)
else:
self._create_soup_element()
self._extract_products()
def get_products(self):
return list(self.products)
def _create_soup_element(self):
page = re.get(self._url)
page_text = page.text
soup = BeautifulSoup(page_text, "html.parser")
elements = soup.find_all("div", class_="fop-contentWrapper")
self._page_element = elements
return None
def _create_product(self, product):
title = product.h4["title"]
link = "https://groceries.morrisons.com" + product.a["href"]
price = self._parse_price(product)
rating = self._parse_rating(product)
Product = MorrisonsProduct(title, link, price, rating)
return Product
def _parse_price(self, product):
offer_price = product.find("span", class_= "fop-price price-offer")
normal_price = product.find("span", class_="fop-price")
product_price = None
if offer_price is not None:
product_price = offer_price.string
elif normal_price is not None:
product_price = normal_price.string
else:
return None
if "p" in product_price:
product_price_pence = product_price.replace("p", "")
return float(product_price_pence)/100
else:
return float(product_price[1:])
def _parse_rating(self, product):
rating = product.find("span", class_="fop-rating-inner")
product_rating = None
if rating is not None:
product_rating = float(rating["title"][8:13])
return product_rating
def _extract_products(self):
for element in self._page_element:
Product = self._create_product(element)
self.products.add(Product)
return None
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment