Last active
July 24, 2016 03:18
-
-
Save streeter/8fedecdeec3d0c7f960a to your computer and use it in GitHub Desktop.
Scrape the backprint.com event photo site and download the largest sized images from guessable URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import re | |
import requests | |
event = 138191 | |
bibs = (496, ) | |
url = 'http://www.backprint.com/facchinophotography/{event}/{bib}' | |
thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg') | |
for bib in bibs: | |
try: | |
res = requests.get(url.format(event=event, bib=bib)) | |
res.raise_for_status() | |
except requests.exceptions.HTTPError: | |
print('[ERROR] Unable to get info for bib {}'.format(event, bib)) | |
continue | |
# Get all the thumbnail links | |
strings = thumb_pattern.findall(res.content) | |
if not strings: | |
print('[ERROR] Unable to find any matches for the thumbnail pattern!') | |
continue | |
# Create a list of all the large images. There are images at | |
# 'f.jpg', 'h.jpg', and 't.jpg', the size specifier is case-insenstive. | |
images = [thumb.replace('t.jpg', 'h.jpg') for thumb in strings] | |
# Create a directory to store in | |
photo_dir = 'photos_{}'.format(bib) | |
if not os.path.isdir(photo_dir): | |
os.makedirs(photo_dir) | |
for image_url in images: | |
filename = os.path.basename(image_url) | |
try: | |
r = requests.get(image_url) | |
r.raise_for_status() | |
except requests.exceptions.HTTPError: | |
print('[ERROR] Unable to download the image: {}'.format(image_url)) | |
continue | |
with open(os.path.join(photo_dir, filename), 'wb') as fd: | |
for chunk in r.iter_content(512): | |
fd.write(chunk) | |
print('Downloaded ' + filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment