Skip to content

Instantly share code, notes, and snippets.

@ashutoshkrris
Last active April 16, 2022 08:55
Show Gist options
  • Save ashutoshkrris/d1474273e3abfa3e6aa72fdad8f13b76 to your computer and use it in GitHub Desktop.
Save ashutoshkrris/d1474273e3abfa3e6aa72fdad8f13b76 to your computer and use it in GitHub Desktop.
import csv
import json
from bs4 import BeautifulSoup
from selenium import webdriver
BROWSER = webdriver.Chrome(executable_path="chromedriver.exe")
TOTAL_PERSONS = 100
def data_scraper():
BROWSER.get("https://www.bloomberg.com/billionaires/")
html_source = BROWSER.page_source
BROWSER.close()
soup = BeautifulSoup(html_source, 'html.parser')
response_rank = soup.find_all('div', class_='table-cell t-rank')
ranks = [rank.get_text().strip() for rank in response_rank][:TOTAL_PERSONS]
response_name = soup.find_all('div', class_='table-cell t-name')
names = [name.get_text().strip() for name in response_name][:TOTAL_PERSONS]
links = [(name.find('a')['href']).replace("./", "") for name in response_name]
response_worth = soup.find_all('div', class_='table-cell active t-nw')
worths = [worth.get_text().strip()
for worth in response_worth][1:TOTAL_PERSONS+1]
response_last_change = soup.find_all('div', class_='t-lcd')
last_changes = [change.get_text().strip()
for change in response_last_change][1:TOTAL_PERSONS+1]
response_ytd = soup.find_all('div', class_='t-ycd')
ytds = [ytd.get_text().strip() for ytd in response_ytd][1:TOTAL_PERSONS+1]
response_country = soup.find_all('div', class_='table-cell t-country')
countries = [country.get_text().strip()
for country in response_country][1:TOTAL_PERSONS+1]
response_industry = soup.find_all('div', class_='table-cell t-industry')
industries = [industry.get_text().strip()
for industry in response_industry][1:TOTAL_PERSONS+1]
data_dict = {
"ranks": ranks,
"names": names,
"links": links,
"worths": worths,
"last_changes": last_changes,
"ytds": ytds,
"countries": countries,
"industries": industries
}
return data_dict
if __name__ == '__main__':
data = data_scraper()
print(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment