Last active
September 1, 2022 06:23
-
-
Save yogeshsinghgit/31241ded459d53517d839e5c7738d766 to your computer and use it in GitHub Desktop.
Web Scraping using Python BeautifulSoup Libraray
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
url = "https://quotes.toscrape.com/" | |
page = requests.get(url) | |
if page.status_code == 200: | |
soup = BeautifulSoup(page.content, "html.parser") | |
# to print the content of the page in nicely formatted | |
# way you can use the prettify method. | |
print(soup.prettify()) | |
# list to store the extracted data | |
quotes = [] | |
authors = [] | |
# scrap quote data | |
quotes_data = soup.find_all(class_="quote") | |
# append all the extracted data into list | |
for quote in quotes_data: | |
quotes.append(quote.find(class_="text").get_text()) | |
authors.append(quote.find(class_="author").get_text()) | |
# storing the data | |
with open("quotes_data",'w') as file: | |
for quote in zip(quotes, authors): | |
file.writelines(str(quote)+'\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment