Skip to content

Instantly share code, notes, and snippets.

@YourFriendCaspian
Created November 6, 2022 02:00
Show Gist options
  • Save YourFriendCaspian/a5622c50b8bd55edd2b746ba87c0cfd4 to your computer and use it in GitHub Desktop.
Save YourFriendCaspian/a5622c50b8bd55edd2b746ba87c0cfd4 to your computer and use it in GitHub Desktop.
Script to download all MagPi PDF's - skips existing PDF's - pip install bs4
'''
Download all MagPis available
'''
#!/usr/bin/env python3
# pip install bs4
import ntpath
import os
import sys
import urllib.request
from pathlib import Path
import requests
from bs4 import BeautifulSoup
# Colored output
class Color:
'''
Define colors for cli
'''
GREEN = '\033[92m'
RED = '\033[91m'
END = '\033[0m'
# URL Request
def request(magpi_url):
'''
Gets the content of the site
'''
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
openurl = urllib.request.urlopen(urllib.request.Request(magpi_url))
return openurl.read().decode("utf-8")
# Get all released issues
def get_issues(magpi_url, magpi_ext):
'''
Filter out the magpi releases
'''
soup = BeautifulSoup(request(magpi_url), 'html.parser')
return [magpi_url + node.get('href') for node in soup.find_all('a')
if node.get('href').endswith(magpi_ext)]
def download(magpi_url, filename):
'''
Downloads stuff
'''
with open(filename, 'wb') as file:
response = requests.get(magpi_url, stream=True)
total = response.headers.get('content-length')
if total is None:
file.write(response.content)
else:
downloaded = 0
total = int(total)
for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)):
downloaded += len(data)
file.write(data)
done = int(50*downloaded/total)
sys.stdout.write('\r[{}{}]'.format(
'█' * done, '.' * (50-done)))
sys.stdout.flush()
sys.stdout.write('\n')
def main():
'''
Run this thing
'''
# Directory
while True:
dest = input("Choose download path. Default is ~/Downloads\n: ")
if dest:
if not os.path.exists(dest):
print("Path does not exist. Try again.")
continue
if not dest:
dest = os.path.join(Path.home(), "Downloads")
break
# MagPi
magpi_url = "https://www.raspberrypi.org/magpi-issues/"
magpi_ext = ".pdf"
for issue in get_issues(magpi_url, magpi_ext):
basename = ntpath.basename(issue)
file = Path(dest, basename)
if file.exists():
print(Color.RED, "Exists:", basename, Color.END)
continue
else:
print("\n", Color.GREEN, "Downloading", basename, Color.END)
download(issue, file)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment