Last active
December 6, 2018 13:12
-
-
Save vanne02135/fa364bc8d28d52cbe945220c1e5db2de to your computer and use it in GitHub Desktop.
Download flickr images and retain metadata in exif
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import flickr_api | |
import os | |
import random | |
import sys | |
from fractions import Fraction | |
import piexif | |
import piexif.helper | |
from iptcinfo import IPTCInfo | |
import urllib2 | |
import argparse | |
import time | |
import datetime | |
# TODO: | |
# * Might crash with anything else than jpg? Try videos? How about png? | |
# * debugging with a lot of content. At least tags and comments seem to work. | |
# * use https://github.com/jamesacampbell/iptcinfo3 to write caption, keywords and title | |
AUTH_FILENAME = "flickr_dl_auth.txt" | |
USER_COMMENT_TEMPLATE = """flickr_dl.py converted metadata: | |
Title: %(title)s | |
Description: %(description)s | |
Tags: %(tags)s | |
Comments: %(comments)s""" | |
def get_auth_cache(filename): | |
# Get the auth file as specified in https://github.com/alexis-mignon/python-flickr-api/wiki/Flickr-API-Keys-and-Authentication | |
a = flickr_api.auth.AuthHandler() # creates a new AuthHandler object | |
perms = "read" # set the required permissions | |
url = a.get_authorization_url(perms) | |
print url # this is the url we need! | |
oauth_verifier = raw_input("Please enter oauth verifier code from the url above: ") | |
a.set_verifier(oauth_verifier) # copy your oauth_verifier tag here! | |
flickr_api.set_auth_handler(a) # set the AuthHandler for the session | |
a.save(filename) | |
def to_deg(value, loc): | |
"""convert decimal coordinates into degrees, minutes and seconds tuple | |
Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"] | |
return: tuple like (25, 13, 48.343 ,'N') | |
""" | |
if value < 0: | |
loc_value = loc[0] | |
elif value > 0: | |
loc_value = loc[1] | |
else: | |
loc_value = "" | |
abs_value = abs(value) | |
deg = int(abs_value) | |
t1 = (abs_value-deg)*60 | |
min = int(t1) | |
sec = round((t1 - min)* 60, 5) | |
return (deg, min, sec, loc_value) | |
def change_to_rational(number): | |
"""convert a number to rantional | |
Keyword arguments: number | |
return: tuple like (1, 2), (numerator, denominator) | |
""" | |
f = Fraction(str(number)) | |
return (f.numerator, f.denominator) | |
def set_gps_location(file_name, lat, lng, altitude): | |
"""Adds GPS position as EXIF metadata | |
Keyword arguments: | |
file_name -- image file | |
lat -- latitude (as float) | |
lng -- longitude (as float) | |
altitude -- altitude (as float) | |
""" | |
lat_deg = to_deg(lat, ["S", "N"]) | |
lng_deg = to_deg(lng, ["W", "E"]) | |
exiv_lat = (change_to_rational(lat_deg[0]), change_to_rational(lat_deg[1]), change_to_rational(lat_deg[2])) | |
exiv_lng = (change_to_rational(lng_deg[0]), change_to_rational(lng_deg[1]), change_to_rational(lng_deg[2])) | |
gps_ifd = { | |
piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0), | |
piexif.GPSIFD.GPSAltitudeRef: 1, | |
piexif.GPSIFD.GPSAltitude: change_to_rational(round(altitude)), | |
piexif.GPSIFD.GPSLatitudeRef: lat_deg[3], | |
piexif.GPSIFD.GPSLatitude: exiv_lat, | |
piexif.GPSIFD.GPSLongitudeRef: lng_deg[3], | |
piexif.GPSIFD.GPSLongitude: exiv_lng, | |
} | |
exif_dict = piexif.load(file_name) | |
exif_dict["GPS"] = gps_ifd if exif_dict["GPS"] == {} else exif_dict["GPS"] | |
exif_bytes = piexif.dump(exif_dict) | |
piexif.insert(exif_bytes, file_name) | |
def unfoldComments(comments): | |
# return a string representation of a list of comments | |
return ", ".join(["%s: %s" % (c.author.username, c.text) for c in comments]) | |
def updateSavedExif(filename, title, description, comments, tags, longitude, latitude, altitude): | |
#print title | |
#print description if len(description) > 0 else "No description" | |
#print comments if len(comments) > 0 else "No comments" | |
#print [t.text for t in tags] if len(tags) > 0 else "No tags" | |
#print "Location lat = %s long = %s" % (latitude, longitude) | |
ext = filename.split(".")[-1].lower() | |
piexif_supported_formats = ["jpg", "jpeg", "tif", "tiff"] | |
if ext in piexif_supported_formats: | |
try: | |
# Save GPS data and others in EXIF | |
if longitude and latitude: | |
set_gps_location(filename, float(latitude), float(longitude), altitude) | |
exif_dict = piexif.load(filename) | |
commentString = USER_COMMENT_TEMPLATE % {"title": title, "description": description, "comments": unfoldComments(comments), "tags": ", ".join([t.text for t in tags])} | |
user_comment = piexif.helper.UserComment.dump(commentString) | |
exif_dict["Exif"][piexif.ExifIFD.UserComment] = user_comment | |
exif_bytes = piexif.dump(exif_dict) | |
exif_bytes = piexif.dump(exif_dict) | |
piexif.insert(exif_bytes, filename) | |
except: | |
print("Warning: EXIF data transfer failed despite supported image format") | |
else: | |
print ("Warning: file type %s not supported by piexif" % ext) | |
# Also save metadata to IPTC as follows: | |
# Title -> object name | |
# Description -> caption/abstract | |
# tags -> keywords | |
# Comments -> caption/abstract | |
try: | |
myiptc = IPTCInfo(filename) | |
except: | |
myiptc = IPTCInfo(filename, force=True) | |
myiptc.data["object name"] = title.encode("ascii", "ignore") | |
myiptc.data["caption/abstract"] = description.encode("ascii", "ignore") | |
myiptc.data["keywords"] = [t.text.encode("ascii", "ignore") for t in tags] | |
if len(comments) > 0: | |
myiptc.data["caption/abstract"] += "Flickr comments:\n" | |
myiptc.data["caption/abstract"] += unfoldComments(comments).encode("ascii", "ignore") | |
myiptc.save() | |
def walkFlickr(flickrUser, year): | |
# Better to use the Walker functionality in flickr api than browsing though all | |
t1 = time.mktime(datetime.datetime(year, 1, 1).timetuple()) | |
t2 = time.mktime(datetime.datetime(year+1, 1, 1).timetuple()) | |
#w = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id, lat=38.692, lon=-90.147, radius=32, min_taken_date=1372893401.042554, max_taken_date=1373898201.042593, media='photos', per_page=500, extras='description,date_upload,date_taken,geo') | |
w = flickr_api.Walker(flickr_api.Photo.search, user_id=flickrUser.id, min_taken_date=t1, max_taken_date=t2) | |
return list(w) | |
def getPhotosPaged(user, args): | |
# Old version, where photos were fetched using user.getPhotos() and paging instead of Walker() | |
p = user.getPhotos() | |
nPages = p.info.pages | |
print (args.year) | |
print "Fetcing %d pages of photos" % nPages | |
photoN = 0 | |
for page in [1]: #range(nPages): | |
for photo in user.getPhotos(page=page+1): | |
photoN += 1 | |
if photoN > 10: | |
break | |
sys.stdout.write("Inspecting photo %d %c" % (photoN, 13)) | |
sys.stdout.flush() | |
yearTaken = int(photo.taken.split('-')[0]) | |
if photo.media == "video" and args.download_video: | |
try: | |
videourl = photo.getSizes()["Video Original"]["source"] | |
except: | |
print ("Original video not available for some reason, getting Flickr Site MP4") | |
videourl = photo.getSizes()["Video Original"]["Site MP4"] | |
myurl = urllib2.urlopen(videourl) | |
filename = myurl.headers.dict["content-disposition"].split("=")[1] | |
try: | |
open(filename, "w").write(urllib2.urlopen(myurl.url).read()) # actual data hidden in videourl's url | |
except: | |
print ("Something went wrong while downloading %s from %s" % (filename, videourl)) | |
# TODO: how to handle video's metadata here? | |
elif (args.year == None) or (yearTaken == args.year): | |
photo.save(photo.id) | |
pInfo = photo.getInfo() | |
# TODO: some photos do not have location info, so next line needs an update | |
# Also the following does assume always jpg filetype | |
try: | |
updateSavedExif(str(photo.id) + ".jpg", photo.title, pInfo["description"], photo.getComments(), pInfo["tags"], pInfo["location"]["longitude"], pInfo["location"]["latitude"], 0) | |
except: | |
print ("Something went wrong while updating the exif data for %s" % photo.id) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--verbosity", help="increase output verbosity", action="store_true") | |
parser.add_argument("--download_video", help="download also video files", action="store_true") | |
parser.add_argument("--year", help="download images taken at year", type=int) | |
args = parser.parse_args() | |
try: | |
flickr_api.set_auth_handler(AUTH_FILENAME) | |
user = flickr_api.test.login() | |
except IOError: | |
get_auth_cache(AUTH_FILENAME) | |
flickr_api.set_auth_handler(AUTH_FILENAME) | |
user = flickr_api.test.login() | |
#w = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id, lat=38.692, lon=-90.147, radius=32, min_taken_date=1372893401.042554, max_taken_date=1373898201.042593, media='photos', per_page=500, extras='description,date_upload,date_taken,geo') | |
if args.year: | |
t1 = int(time.mktime(datetime.datetime(args.year, 1, 1).timetuple())) | |
t2 = int(time.mktime(datetime.datetime(args.year+1, 1, 1).timetuple())) | |
walked = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id, min_taken_date=t1, max_taken_date=t2) | |
else: | |
walked = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id) | |
i = 0 | |
for photo in walked: | |
i += 1 | |
#if i < 513: | |
# continue | |
print("%4d : %s" % (i, photo.title)) | |
if photo.media == "video" and args.download_video: | |
# Still this does not work for some videos | |
try: | |
videourl = photo.getSizes()["Video Original"]["source"] | |
except: | |
print ("Original video not available for some reason, getting Flickr Site MP4") | |
videourl = photo.getSizes()["Video Original"]["Site MP4"] | |
myurl = urllib2.urlopen(videourl) | |
filename = myurl.headers.dict["content-disposition"].split("=")[1] | |
try: | |
open(filename, "w").write(urllib2.urlopen(myurl.url).read()) # actual data hidden in videourl's url | |
except: | |
print ("Something went wrong while downloading %s from %s" % (filename, videourl)) | |
# TODO: how to handle video's metadata here? | |
elif photo.media == "photo": | |
photo.save(photo.id) | |
pInfo = photo.getInfo() | |
# TODO: some photos do not have location info, so next line needs an update | |
# Also the following does assume always jpg filetype | |
#try: | |
if "location" in pInfo.keys(): | |
updateSavedExif(str(photo.id) + "." + pInfo["originalformat"], photo.title, pInfo["description"], photo.getComments(), pInfo["tags"], pInfo["location"]["longitude"], pInfo["location"]["latitude"], 0) | |
else: | |
updateSavedExif(str(photo.id) + "." + pInfo["originalformat"], photo.title, pInfo["description"], photo.getComments(), pInfo["tags"], None, None, 0) | |
#except: | |
# print ("Something went wrong while updating the exif data for %s" % photo.id) | |
elif photo.media != "photo" and photo.media != "video": | |
raise Exception("Unknown media: %s" % photo.media) |
Open issues:
- piexif fails to handle 360 panorama images, so EXIF data for those is not transferred
- videos fail from time to time
- no error handling - sometimes Flickr response for photo data is 502 bad gateway
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to save all photos and fetch at least most of the metadata. Should update all the metadata to locally saved photos exif.