-
-
Save kiwiholmberg/667857e965cc2bf74380 to your computer and use it in GitHub Desktop.
Python script to download all your dailyview.com (aka bilddagboken.se) images.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
Python script to download all your dailyview.com images. | |
Names them after what date they belong to, + random string to avoid name collisions. | |
Run with 'python download.py' | |
Depends on requests and Beautiful soup | |
""" | |
from requests import get | |
from BeautifulSoup import BeautifulSoup, SoupStrainer | |
from time import sleep | |
import re, datetime, hashlib, os | |
date_pattern = re.compile("^\S+\s(\d+)\s(\S+)\s(\d{4})\s.+$") | |
#months = ['januari','februari','mars','april','maj','juni','juli','augusti','september','oktober','november','december'] | |
months = [datetime.date(2000, m, 1).strftime('%B').lower() for m in range(1, 13)] | |
def download(): | |
url = ''#put url to latest image here i.e. "http://dayviews.com/username/11111137/" | |
i = 0 | |
while True: | |
print 'Get page %s' % url | |
r = get(url).content | |
soup = BeautifulSoup(r) | |
date = soup.find(id='showContentTitle').string | |
# print date | |
matches = date_pattern.match(date).groups() # ('5', 'februari', '2006') | |
# Mongle date format. | |
month = months.index(matches[1].lower()) + 1 | |
month = '0'+str(month) if len(str(month))==1 else str(month) | |
day = matches[0] if len(matches[0])==2 else '0'+matches[0] | |
date_string = '%s-%s-%s' % (matches[2], month, day) | |
for img in soup.findAll('img', id="picture"): | |
src = img['src'] | |
fname = './images/%s_(%s)_%s' % (date_string, hashlib.sha1(os.urandom(1024)).hexdigest()[0:10], src[src.rfind('/')+1:] ) | |
print "Downloading %s --> %s" % (src, fname) | |
f = open( fname, 'wb') | |
r = get(src) | |
for chunk in r.iter_content(): | |
f.write(chunk) | |
# Save next image link | |
last_url = url | |
for link in soup.findAll('a', rel=u"fancyImgGrp"): | |
url = link["href"] | |
# If they are stil the same, exit | |
if last_url == url: | |
print "Done \r" | |
exit() | |
i += 1 | |
sleep(1) # be nice to server | |
if __name__ == '__main__': | |
download() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment