Skip to content

Instantly share code, notes, and snippets.

@tpb1908
Created February 4, 2020 19:41
Show Gist options
  • Save tpb1908/891398ed2d9dae79633c916a00b50c9b to your computer and use it in GitHub Desktop.
Save tpb1908/891398ed2d9dae79633c916a00b50c9b to your computer and use it in GitHub Desktop.
MLP AVA Downloader
import os
import requests
from tqdm import tqdm
from multiprocessing.pool import ThreadPool
path = os.getcwd()
with open(os.path.join(path, "names.txt")) as f:
names = [line.rstrip() for line in f]
print("Loading {} videos".format(len(names)))
base_url = "https://s3.amazonaws.com/ava-dataset/trainval/"
out_dir = os.path.join(path, "videos/")
if not os.path.exists(out_dir):
os.makedirs(out_dir)
def fetch_video(name):
response = requests.get(base_url + name, stream=True)
total_length = int(response.headers.get('content-length'))
with open(os.path.join(out_dir, name), "wb") as out:
pbar = tqdm(total=int(total_length/1024) + 1, desc=name)
for chunk in response.iter_content(chunk_size=1024):
if chunk:
out.write(chunk)
out.flush()
pbar.update(1)
pbar.close()
out.close()
return out.name
results = ThreadPool(8).imap_unordered(fetch_video, names)
for result in results:
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment