Skip to content

Instantly share code, notes, and snippets.

@rye761
Created April 23, 2019 02:27
Show Gist options
  • Save rye761/62900e274560155215d83c1c21582433 to your computer and use it in GitHub Desktop.
Save rye761/62900e274560155215d83c1c21582433 to your computer and use it in GitHub Desktop.
Social Blade scrape
import urllib2
import csv
import re
from bs4 import BeautifulSoup
rank_page = 'https://socialblade.com/youtube/top/50/mostviewed'
request = urllib2.Request(rank_page, headers={'User-Agent': 'your user-agent'})
page = urllib2.urlopen(request)
soup = BeautifulSoup(page, 'html.parser')
channels = soup.find('div', attrs={'style': 'float: right; width: 900px;'}).find_all('div', recursive=False)[4:]
file = open('topyoutubers.csv', 'wb')
writer = csv.writer(file)
# write title row
writer.writerow(['Username', 'Uploads', 'Views'])
for channel in channels:
username = channel.find('div', attrs={'style': 'float: left; width: 350px; line-height: 25px;'}).a.text.strip()
uploads = channel.find('div', attrs={'style': 'float: left; width: 80px;'}).span.text.strip()
views = channel.find_all('div', attrs={'style': 'float: left; width: 150px;'})[1].span.text.strip()
print username + ' ' + uploads + ' ' + views
writer.writerow([username.encode('utf-8'), uploads.encode('utf-8'), views.encode('utf-8')])
file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment