Last active
January 24, 2024 14:08
-
-
Save nixsiow/2874e0582694d3767b837d4ea9aebe46 to your computer and use it in GitHub Desktop.
Run this Python script to download all the MagPi issues to your current working directory. Customisable for other purpose of pdf file downloading.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## ======================================================== ## | |
## PDF files auto downloader. V0.1.4 | |
## date: 16/10/2016 | |
## MIT license (Free to use, just tell them I say hi) | |
## Build by Nix Siow | |
## Visit http://nixsiow.com | |
## Email: nixsiow@hotmail.com | |
## or @nixsiow on Github or Twitter | |
## ======================================================== ## | |
## ========== Modules ========== ## | |
from urllib import urlopen # open webpage | |
from re import findall # regex | |
import wget # downloader, 'pip install wget' if havent already | |
## ========== Defines ========== ## | |
purpose = "MagPi Magazine" | |
download_success = False # dummy start | |
prefix = "https://www.raspberrypi.org/magpi-issues/" | |
regex = '<td><a href="(.*)".*></td>' # view-source:https://www.raspberrypi.org/magpi-issues/ | |
## ========== Log Messsages ========== ## | |
author_msg = ''' | |
"************************************************************" | |
"%s auto downloader - built by Nix Siow" | |
"Visit http://nixsiow.com or @nixsiow on Github or Twitter" | |
"Email: nixsiow@hotmail.com" | |
************************************************************ | |
''' | |
startup_msg = ''' | |
Download process is starting soon ... | |
Press 'Ctrl+c' anytime while it's running to terminate program. | |
There are total %d files waiting to be download. Please be patient ;) | |
--------------------------------------------------------------------- | |
''' | |
success_msg = ''' | |
Awesome! %s is successfully downloaded!" | |
Another %d more to go. Yay!" | |
--------------------- NEXT DOWNLOAD --------------------- | |
''' | |
failure_msg = ''' | |
[ERROR DETECTED] | |
Re-downloading %s now. Please wait ... | |
--------------------- RETRYING --------------------- | |
''' | |
user_input_msg = ''' | |
------------------------------------------ | |
Do you wish to quit or skip to next one? | |
'Yes' to quit. | |
'Next' to skip current download. | |
Your response: ''' | |
error_msg = ''' | |
Either you chose to quit or something went wrong! | |
Program ending ... | |
Bye! | |
''' | |
final_msg = ''' | |
Thanks for using this program. | |
Please check your current working directory for any downloaded file. | |
''' | |
## ========== FUNCTIONS ========== ## | |
# Manually get rid of irrelevant items in the list | |
def housekeeping(files_list): | |
# CUSTOMIZE here for other download purpose | |
del files_list[0] # remove first | |
del files_list[-1] # remove last | |
def downloader(file): | |
full_path_url = prefix + file # full path url is needed to dl properly | |
try: | |
print "Downloading", file, "..." | |
wget.download(full_path_url) | |
download_success = True # set to true and move on to next | |
except KeyboardInterrupt: | |
while True: | |
user_input = raw_input(user_input_msg) | |
if (user_input.lower() == "yes"): | |
exit() | |
# elif (user_input.lower() == "no"): | |
# print "Resuming ..." | |
# break | |
elif (user_input.lower() == "next") or (user_input.lower() == "no"): | |
print "Skipping to the next download" | |
break | |
else: | |
print "Please enter a valid option. Try again." | |
except: | |
download_success = False # to re-dl | |
while (download_success == False): | |
try: # re-download until it dl properly | |
print failure_msg %(file) | |
wget.download(full_path_url) | |
download_success = True | |
except: | |
print "\nOpss... Something went wrong again." | |
# setup to extract html | |
target_url = "https://www.raspberrypi.org/magpi-issues/" | |
target_web_page = urlopen(target_url) | |
target_html = target_web_page.read() # extract content as strings | |
target_web_page.close() # close web server connection | |
# regex to extract each url | |
files_list = findall(regex, target_html) | |
# cleaning up data | |
housekeeping(files_list) | |
## ========== Main program start here ========== ## | |
# self advertise | |
print author_msg %(purpose) # change the purpose for other dl purpose | |
# startup msg | |
print startup_msg %(len(files_list)) | |
# loop through all data in data list | |
for each_file in files_list: | |
try: | |
downloader(each_file) | |
left = len(files_list) - (files_list.index(each_file) + 1) # how many left | |
print success_msg %(each_file, left) | |
except: | |
print error_msg | |
exit() | |
# Final msg after task complete | |
print final_msg | |
## ========== Main program end here ========== ## |
I tried to run this script in Spyder, but all print statements missed parenthesis. That was easy to correct, but now Spyder returns a syntax error on line 127 that says:
try:
^
SyntaxError: invalid syntax
I'm a noob on Python. What is the solution for that SyntaxError?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I tried to run this on my wsl Ubuntu image in Windows but it doesn't seem to work for me. It 'runs' but It pops out a ton of 'command not found' errors. I just run it using this command: ./Auto_downloader.py. Does this still work?