Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pristanna/86e3c452195823005ff60c6376b113e2 to your computer and use it in GitHub Desktop.
Save pristanna/86e3c452195823005ff60c6376b113e2 to your computer and use it in GitHub Desktop.
Python Script to Download Springer Textbooks
import pandas as pd, os, shutil
df = pd.read_excel("Free+English+textbooks.xlsx")
for cat in df["English Package Name"].unique():
try:
os.mkdir("download/" + cat)
except FileExistsError:
print("File exists errror")
except:
print("Some other error")
for index, row in df.iterrows():
category = row.loc["English Package Name"]
file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/', '-').replace(':', '-')
try:
shutil.move(f"download/{file_name}.pdf", f"download/{category}/{file_name}.pdf")
except FileNotFoundError:
print("File not found")
except:
print("Some other error")
import pandas as pd
import requests
#import wget
#import urllib.request
df = pd.read_excel("Free+English+textbooks.xlsx")
print(df.head(10))
for index, row in df.iterrows():
# loop through the excel list
file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-')
url = f"{row.loc['OpenURL']}"
r = requests.get(url)
download_url = f"{r.url.replace('book','content/pdf')}.pdf"
print(download_url)
#wget.download(download_url, f"./download/{file_name}.pdf") # Not working
#urllib.request.urlretrieve(download_url, f"./download/{file_name}.pdf") # Not working either
print(f"wget {download_url} -O '{file_name}.pdf'") # Creates a command for bash and than I download it using normal bash wget command
#print(f"downloading {file_name}.pdf Complete ....")
@pristanna
Copy link
Author

pristanna commented May 20, 2020

Download All Free Textbooks from Springer using Python

A Step-by-Step Guide to Download Multiple Files Using Python

by Joe T. Santhanavanich

Article: https://towardsdatascience.com/download-all-free-textbooks-from-springer-using-python-bd0b10e0ccc

Script for classification: https://gist.github.com/juanluisrto/66e2bf157ab32719210cdcac2327f3a3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment