Skip to content

Instantly share code, notes, and snippets.

@bilelmoussaoui
Created December 31, 2017 15:51
Show Gist options
  • Save bilelmoussaoui/6c5c64c84625e8a2abd19fc5d1ac8a13 to your computer and use it in GitHub Desktop.
Save bilelmoussaoui/6c5c64c84625e8a2abd19fc5d1ac8a13 to your computer and use it in GitHub Desktop.
eztv torrent scrapper
from bs4 import BeautifulSoup
import requests
url = "https://eztv.yt/search/avengers-assemble"
season_nr = 4
start_episode = 1
end_episode = 13
def get_episodes(season_nr, start_episode, end_episode):
"""Return a list of episodes prefixed with E."""
episodes = list(range(start_episode, end_episode + 1))
length = len(str(end_episode))
season = "S{}".format(str(season_nr).zfill(2))
episodes = list(map(lambda episode: season + "E" + str(episode).zfill(length),
episodes))
return episodes
def read_page(page_url):
req = requests.get(page_url)
if req.status_code == 200:
return req.content
exit("Couldn't download the html page")
def get_best_results(episodes_info):
best_results = {}
for episode_nr, results in episodes_info.items():
best_size = ""
best_link = ""
i = 0
for tr in results:
td_list = tr.find_all('td')
size = td_list[-4].contents[0]
link = td_list[-5].find('a', 'magnet')['href']
if i == 0:
best_size = size
best_link = link
else:
if size > best_size:
best_size = size
best_link = link
i+= 1
best_results[episode_nr] = {
'size': best_size,
'link': link
}
return best_results
def load_episodes(page_url):
html = read_page(page_url)
if html:
soup = BeautifulSoup(html, 'html.parser')
episodes = get_episodes(season_nr, start_episode, end_episode)
episodes_info = {}
for tr in soup.find_all('tr', 'forum_header_border'):
links = tr.find_all('a')
for link in links:
title = link.get('title')
for episode in episodes:
if title and episode.lower() in title.lower():
if episodes_info.get(episode):
episodes_info[episode].append(tr)
else:
episodes_info[episode] = [tr]
episodes_links = get_best_results(episodes_info)
return episodes_links
return {}
episodes = load_episodes(url)
for episode_nr, info in episodes.items():
print("{} - Size: {} - Magnet: {}".format(episode_nr, info["size"], info['link']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment