Last active
February 23, 2019 16:06
-
-
Save gugarosa/8f5ea5a7db53b689f58b1ac85f7d4dcb to your computer and use it in GitHub Desktop.
Requests the source code of a youtube channel page and scraps any data you need from it.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import requests | |
from bs4 import BeautifulSoup | |
# Youtube channel URL | |
url = 'https://www.youtube.com/channel/UCTCykZFeSbgMuL2ZzhSyVzg' | |
# Sleeping time | |
sleep_time = 5 | |
while True: | |
# Requesting page | |
page = requests.get(url) | |
# Parsing with beautiful soup | |
soup = BeautifulSoup(page.text, 'html.parser') | |
# Saves the found tag (you can modify to whichever tag you need) | |
span = soup.find_all('span', class_='yt-subscription-button-subscriber-count-branded-horizontal') | |
# Parses the tag and retrieves its text | |
count = span[0].get_text() | |
print(f'Subscribed: {count}') | |
# Append on internal file | |
f = open('out.txt', 'a') | |
f.write(f'{count}\n') | |
f.close() | |
# Sleeping until next request | |
print('Sleeping till next request ...') | |
time.sleep(sleep_time) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment