Skip to content

Instantly share code, notes, and snippets.

@lucaswerkmeister
Created August 19, 2019 22:43
Show Gist options
  • Save lucaswerkmeister/0bc668691eacd4827811feead1f41f11 to your computer and use it in GitHub Desktop.
Save lucaswerkmeister/0bc668691eacd4827811feead1f41f11 to your computer and use it in GitHub Desktop.
script to download all files in a Wikimedia Commons category
#!/bin/bash
declare -A original_params=(
[action]=query
[generator]=categorymembers
[gcmtitle]="Category:${1:?category not specified}"
[gcmtype]=file
[gcmlimit]=max
[prop]=imageinfo
[iiprop]=size
[format]=json
[formatversion]=2
)
declare -A continue_params=()
declare -a titles=()
declare -i size=0
first_iteration=1
while ((first_iteration)) || ((${#continue_params[@]})); do
first_iteration=0
declare -a params=()
for key in "${!original_params[@]}"; do
params+=('-d' "$key=${original_params[$key]}")
done
for key in "${!continue_params[@]}"; do
params+=('-d' "$key=${continue_params[$key]}")
done
output=$(curl -s https://commons.wikimedia.org/w/api.php "${params[@]}")
declare -A continue_params=()
while IFS=$'\t' read -r key value; do
continue_params[$key]=$value
done < <(jq -r '.continue | select(.) | to_entries | .[] | (.key + "\t" + .value)' <<< "$output")
while IFS=$'\t' read -r title file_size; do
titles+=("$title")
((size+=file_size))
done < <(jq -r '.query.pages | .[] | (.title + "\t" + (.imageinfo[0].size | tostring))' <<< "$output")
done
free=$(df -PB1 . | tail -1 | awk '{print $4}')
if ((size > free)); then
printf >&2 'Need %d free bytes but only detected %d!\n' "$size" "$free"
exit 1
elif ((size > free/10)); then
printf >&2 'This download will consume more than 10%% of the remaining free space on disk (%d out of %d bytes).\n' "$size" "$free"
fi
for title_index in "${!titles[@]}"; do
title=${titles[$title_index]}
printf >&2 '== %d/%d: %s ==\n' "$((title_index + 1))" "${#titles[@]}" "$title"
youtube-dl https://commons.wikimedia.org/wiki/Special:FilePath/"$title"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment