Created
January 7, 2018 00:42
-
-
Save Evantm/bf3fbd84e88dc04174b96d5b9d75600a to your computer and use it in GitHub Desktop.
Command line python script that scrapes TRU Bookstore then searches Libgen for that book.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from requests import get | |
from bs4 import BeautifulSoup | |
# Put in your class details. It scans TRU bookstore to get ISBN | |
# It uses the ISBN to search Libgen to get MD5 | |
# It then downloads from Libgen using the MD5 | |
def make_request(url): | |
headers = { | |
'Host': 'thebookstore.tru.ca', | |
'Connection': 'keep-alive', | |
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", | |
'Accept': "*/*", | |
'Referer': 'https://thebookstore.tru.ca/buy_courselisting.asp?control=course&course=38044&term=181', | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Accept-Language': 'en-CA,en-GB;q=0.9,en-US;q=0.8,en;q=0.7', | |
'DNT': 1 | |
} | |
r = get(url,headers=headers) | |
return BeautifulSoup(r.text, 'html.parser') | |
def get_download_link(search_term): | |
libgen = 'http://gen.lib.rus.ec/search.php?req={}&lg_topic=libgen&open=0&view=simple&res=25&phrase=0&column=def' | |
md5link = 'http://gen.lib.rus.ec/get.php?md5={}' | |
r = get(libgen.format(search_term)) | |
soup = BeautifulSoup(r.text, 'html.parser') | |
md5_s = soup.findAll("a") | |
for i in md5_s: | |
if 'md5' in i.attrs['href']: | |
md5 = i.attrs['href'].split('=')[1] | |
return (md5link.format(md5)) | |
return -1 | |
def get_course_number(subj_num,term,num): | |
soup = make_request("https://thebookstore.tru.ca/textbooks_xml.asp?control=department&dept={}&term={}".format(subj_num,term)) | |
courses = soup.findAll('course') | |
for i in courses: | |
if(i['name'] == num): | |
return i['id'] | |
def get_subject_id(subject,term,campus): | |
soup = make_request("https://thebookstore.tru.ca/textbooks_xml.asp?control=campus&campus={}&term={}".format(campus,term)) | |
departments = soup.findAll('department') | |
for i in departments: | |
if(i['abrev'] == subject): | |
return i['id'] | |
def get_section_id(course_id,term,section_num): | |
soup = make_request("https://thebookstore.tru.ca/textbooks_xml.asp?control=course&course={}&term={}".format(course_id,term)) | |
sections = soup.findAll('section') | |
if(len(sections) == 1): | |
return sections[0]['id'] | |
for i in sections: | |
if(i['name'] == section_num): | |
return i['id'] | |
def get_isbn(section,term): | |
soup = make_request("https://thebookstore.tru.ca/textbooks_xml.asp?control=section§ion={}&term={}".format(section,term)) | |
for i in range(1,10): | |
isbn_str = "isbn-{}".format(i) | |
isbn = soup.find("input", {"id": isbn_str}) | |
#print(isbn_str) | |
if isbn is not None: | |
return isbn.attrs['value'] | |
if(soup.find("input", {"id": "sku-new-1"}) is not None): | |
return -1 #'No Textbooks in stock' #Stupid bookstore not making the isbn available when the book is out of stock | |
else: | |
return -2 #'No Textbooks for this Course' #It does make the SKU available so I'm using that to see if the book exists | |
def main(Subj,Num,Sect,DateCode,Campus): | |
id = get_subject_id(Subj,DateCode,Campus) | |
num = get_course_number(id,DateCode,Num) | |
section_id = get_section_id(num,DateCode,Sect) | |
isbn = get_isbn(section_id,DateCode) | |
link = get_download_link(isbn) | |
if(isbn == -1): | |
return('No Textbooks in stock') | |
if(isbn == -2): | |
return('No Textbooks for this Course') | |
if(link == -1): | |
return('No books found on libgen') | |
return link | |
if __name__ == '__main__': | |
try: | |
Subj = sys.argv[1] | |
Num = sys.argv[2] | |
Sect = sys.argv[3] | |
DateCode = sys.argv[4] | |
Campus = sys.argv[5] | |
print(main(Subj,Num,Sect,DateCode,Campus)) | |
except: | |
print('Improper Usage \n Usage: python textbook.py Subj Num Section DateCode Campus \n Example: python textbook.py COMP 2680 01 181 76') | |
#Subj = 'COMP' | |
#Num = '1130' | |
#Sect = '01' | |
#DateCode = 181 | |
#Campus = 76 #This is kamloops just trust me | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment