Created
June 1, 2017 15:27
-
-
Save larssono/ddc769837f0544f56581a192a2bef855 to your computer and use it in GitHub Desktop.
Get MD5 for a large number of files in Synapse
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import synapseclient | |
import pandas as pd | |
from multiprocessing.dummy import Pool | |
def with_progress_bar(func, totalCalls, prefix = '', postfix='', isBytes=False): | |
"""Adds a progress bar to calls to func | |
:param func: Function being wrapped with progress Bar | |
:param totalCalls: total number of items/bytes when completed | |
:param prefix: String printed before progress bar | |
:param prefix: String printed after progress bar | |
:param isBytes: A boolean indicating weather to convert bytes to kB, MB, GB etc. | |
""" | |
from multiprocessing import Value, Lock | |
completed = Value('d', 0) | |
lock = Lock() | |
def progress(*args, **kwargs): | |
with lock: | |
completed.value +=1 | |
synapseclient.utils.printTransferProgress(completed.value, totalCalls, prefix, postfix, isBytes) | |
return func(*args, **kwargs) | |
return progress | |
syn=synapseclient.Synapse() | |
syn.login() | |
mp = Pool(15) | |
ids = [f['file.id'] for f in syn.chunkedQuery("select id from file where projectId=='syn2351328'")] | |
func = with_progress_bar(lambda x: syn.get(x, downloadFile=False),len(ids)) | |
files = mp.map(func, ids[:]) | |
with open('files_and_md5sum.txt', 'w') as fp: | |
for f in files: | |
fp.write('%s\t%s\t%s\n' %(f.id, f._file_handle['contentMd5'], f._file_handle.get('externalURL'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment