Created
December 31, 2017 15:51
-
-
Save bilelmoussaoui/6c5c64c84625e8a2abd19fc5d1ac8a13 to your computer and use it in GitHub Desktop.
eztv torrent scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
url = "https://eztv.yt/search/avengers-assemble" | |
season_nr = 4 | |
start_episode = 1 | |
end_episode = 13 | |
def get_episodes(season_nr, start_episode, end_episode): | |
"""Return a list of episodes prefixed with E.""" | |
episodes = list(range(start_episode, end_episode + 1)) | |
length = len(str(end_episode)) | |
season = "S{}".format(str(season_nr).zfill(2)) | |
episodes = list(map(lambda episode: season + "E" + str(episode).zfill(length), | |
episodes)) | |
return episodes | |
def read_page(page_url): | |
req = requests.get(page_url) | |
if req.status_code == 200: | |
return req.content | |
exit("Couldn't download the html page") | |
def get_best_results(episodes_info): | |
best_results = {} | |
for episode_nr, results in episodes_info.items(): | |
best_size = "" | |
best_link = "" | |
i = 0 | |
for tr in results: | |
td_list = tr.find_all('td') | |
size = td_list[-4].contents[0] | |
link = td_list[-5].find('a', 'magnet')['href'] | |
if i == 0: | |
best_size = size | |
best_link = link | |
else: | |
if size > best_size: | |
best_size = size | |
best_link = link | |
i+= 1 | |
best_results[episode_nr] = { | |
'size': best_size, | |
'link': link | |
} | |
return best_results | |
def load_episodes(page_url): | |
html = read_page(page_url) | |
if html: | |
soup = BeautifulSoup(html, 'html.parser') | |
episodes = get_episodes(season_nr, start_episode, end_episode) | |
episodes_info = {} | |
for tr in soup.find_all('tr', 'forum_header_border'): | |
links = tr.find_all('a') | |
for link in links: | |
title = link.get('title') | |
for episode in episodes: | |
if title and episode.lower() in title.lower(): | |
if episodes_info.get(episode): | |
episodes_info[episode].append(tr) | |
else: | |
episodes_info[episode] = [tr] | |
episodes_links = get_best_results(episodes_info) | |
return episodes_links | |
return {} | |
episodes = load_episodes(url) | |
for episode_nr, info in episodes.items(): | |
print("{} - Size: {} - Magnet: {}".format(episode_nr, info["size"], info['link'])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment