Last active
July 15, 2024 04:51
-
-
Save cyphar/812da259e10ec4f2b41e43e5bfbdf306 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
# Copyright (C) 2018-2024 Aleksa Sarai <cyphar@cyphar.com> | |
# | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
############################################ | |
# API CREDENTIALS -- ONLY CHANGE THIS PART # | |
############################################ | |
TVDB_USERNAME="cyphar" | |
TVDB_USERKEY="O2Y5GQF8Z5O7DONC" | |
TVDB_APIKEY="3ec8d8f8325bb9a78ebbc7f44efe5f6d" | |
MOVIEDB_APIKEY="a4696f0214c1774e883b596d380d9a3f" | |
########################################################### | |
# END OF API CREDENTIALS -- DO NOT MODIFY BELOW THIS LINE # | |
########################################################### | |
[[ "${ZSH_EVAL_CONTEXT:-}" =~ :file$ ]] && IN_SOURCE=1 || IN_SOURCE= | |
[[ "$IN_SOURCE" ]] || set -Eeuo pipefail | |
[[ -z "${DEBUG:-}" ]] || set -x | |
# Valid options: {auto, none, all}. | |
OVERRIDE="${OVERRIDE:-auto}" | |
SERIES_IDX="${SERIES_IDX:-}" | |
RDONLY="${RDONLY-0}" | |
# FIXME TheMovieDB had a weird issue with cURL user agents. | |
# <https://www.themoviedb.org/talk/63fc60cde4b5760085596e63> | |
CURL_UA="Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0" | |
function bail() { | |
echo "$@" >&2 | |
[[ "$IN_SOURCE" ]] || exit 1 | |
} | |
curl --version &>/dev/null || bail "cURL is missing" | |
jq --version &>/dev/null || bail "jq is missing" | |
function ask_user_choice() { | |
list="$1" | |
query="$2" | |
if [ -n "$SERIES_IDX" ] | |
then | |
echo "$SERIES_IDX" | |
return | |
fi | |
option_idx=0 | |
echo "Query was ambiguous, please select which result you'd prefer:" >&2 | |
while read -r line; do | |
echo "$option_idx. $line" >&2 | |
option_idx="$(($option_idx + 1))" | |
done <<<"$(jq -rM ".[] | $query" <<<"$list")" | |
echo -n "[0-$(($option_idx - 1))]> " >&2 | |
read -r choice | |
echo "$choice" | |
} | |
function if_multiple_ask_idx() { | |
option_list="$1" | |
name="$2" | |
msg="${3:-No entries found.}" | |
first_aired="${4:-.firstAired}" | |
id="${5:-"<unknown>"}" | |
# If there is more than one result, we ask the user to confirm which one | |
# they actually wanted. This is necessary because recently TheTVDB has | |
# added a bunch of (relatively unknown) shows with similar names that get | |
# higher listings... | |
len_option="$(jq -rM 'length' <<<"$option_list")" | |
[[ "$len_option" > 0 ]] || bail "$msg" | |
if [[ "$len_option" > 1 ]] | |
then | |
# This is asked for each file even if we've seen this option name | |
# before. Users can use yes(1) if they want to auto-respond to all | |
# requests. | |
option_idx="$(ask_user_choice "$option_list" '"\"\('$name')\" (id \('$id'), aired \('$first_aired'))"')" | |
else | |
option_idx=0 | |
fi | |
echo "$option_idx" | |
} | |
function parse_title() { | |
title="$1" | |
part="$2" | |
default="${3:-}" | |
SEARCH_REGEX='^(.*)[[:space:]]+(S[[:digit:]]+)?(E[[:digit:]]+)[[:space:]]*(.*)?$' | |
PART_REGEX= | |
part_prefix= | |
case "$part" in | |
series) | |
PART_REGEX='\1' | |
;; | |
season) | |
PART_REGEX='\2' | |
part_prefix="S" | |
;; | |
episode) | |
PART_REGEX='\3' | |
part_prefix="E" | |
;; | |
suffix) | |
PART_REGEX='\4' | |
;; | |
*) | |
bail "Unknown title part $part." | |
;; | |
esac | |
match="$(sed -E "s/$SEARCH_REGEX/$PART_REGEX/g" <<<"$title")" | |
if [ -z "$match" ] | |
then | |
if [ -n "$default" ] | |
then | |
match="$default" | |
else | |
bail "Series part $part ($title) didn't match regex." | |
fi | |
fi | |
echo "${match#"$part_prefix"}" | |
} | |
##################### | |
## TheTVDB Scraper ## | |
##################### | |
TVDB_API_ROOT="https://api.thetvdb.com" | |
function tvdb_token() { | |
if [[ -n "${TVDB_TOKEN:-}" ]] | |
then | |
echo "$TVDB_TOKEN" | |
return | |
fi | |
token="$(curl -s -X POST -A "$CURL_UA" -H 'Content-Type: application/json' \ | |
-d "$(printf '{"username":"%b","userkey":"%b","apikey":"%b"}' "$TVDB_USERNAME" "$TVDB_USERKEY" "$TVDB_APIKEY")" \ | |
"$TVDB_API_ROOT/login" | jq -rM '.token')" | |
[[ "$token" != "null" ]] || bail "TheTVDB login failed" | |
export TVDB_TOKEN="$token" | |
echo "$TVDB_TOKEN" | |
} | |
function tvdb_series() { | |
series="$1" | |
query="${2:-.seriesName}" | |
token_hdr="Authorization: Bearer $(tvdb_token)" | |
series_list="$(curl -s -X GET -A "$CURL_UA" -H 'Content-Type: application/json' -H "$token_hdr" \ | |
"$TVDB_API_ROOT/search/series?name=$series" | jq -rM '.data')" | |
series_idx="$(if_multiple_ask_idx "$series_list" ".seriesName" "No series found for '$series'" ".firstAired")" | |
jq -rM ".[$series_idx]$query" <<<"$series_list" | |
} | |
function tvdb_title() { | |
series="$1" | |
season="$2" | |
episode="$3" | |
query="${4:-.episodeName}" | |
token_hdr="Authorization: Bearer $(tvdb_token)" | |
series_id="$(tvdb_series "$series" ".id")" | |
episode_list="$(curl -s -X GET -A "$CURL_UA" -H 'Content-Type: application/json' -H "$token_hdr" \ | |
"$TVDB_API_ROOT/series/${series_id}/episodes/query?airedSeason=${season}&airedEpisode=${episode}" | jq -rM '.data')" | |
episode_idx="$(if_multiple_ask_idx "$episode_list" ".episodeName" "No episodes found for 'series=$series_id S${season}E${episode}'" ".firstAired")" | |
jq -rM ".[$episode_idx]$query" <<<"$episode_list" | |
} | |
################################ | |
## The Movie Database Scraper ## | |
################################ | |
MOVIEDB_API_ROOT="https://api.themoviedb.org/3" | |
LANGUAGE="${LANGUAGE:-}" | |
function moviedb_token() { | |
echo "$MOVIEDB_APIKEY" | |
} | |
function moviedb_series() { | |
series="$1" | |
query="${2:-.name}" | |
# TODO: Support movies. | |
# TODO: Support multi-page results. | |
series_list="$(curl -s -X GET -A "$CURL_UA" -H 'Content-Type: application/json' \ | |
"$MOVIEDB_API_ROOT/search/tv?api_key=$(moviedb_token)&language=$LANGUAGE&query=$series" | jq -rM '.results')" | |
series_idx="$(if_multiple_ask_idx "$series_list" ".name" "No series found for '$series'" ".first_air_date" ".id")" | |
jq -rM ".[$series_idx]$query" <<<"$series_list" | |
} | |
function moviedb_title() { | |
series="$1" | |
season="$2" | |
episode="$3" | |
query="${4:-.name}" | |
series_id="$(moviedb_series "$series" ".id")" | |
episode_data="$(curl -s -X GET -A "$CURL_UA" -H 'Content-Type: application/json' \ | |
"$MOVIEDB_API_ROOT/tv/${series_id}/season/${season}/episode/${episode}?api_key=$(moviedb_token)&language=$LANGUAGE")" | |
[[ "$(jq -rM '.success == false' <<<"$episode_data")" == "true" ]] \ | |
&& bail "No episode found for 'series=$series_id S${season}E${episode}'" | |
jq -rM "${episode_data}${query}" <<<"$episode_data" | |
} | |
################## | |
## Generic Code ## | |
################## | |
API="${API:-moviedb}" | |
function get_series_name() { | |
"$API"_series "$@" | |
return "$?" | |
} | |
function get_episode_name() { | |
"$API"_title "$@" | |
return "$?" | |
} | |
function filename_filter() { | |
name="$(sed "s/[[:space:]]+-[[:space:]]+/ /g;s|[\"\']||g" <<<"$1")" | |
if [[ "$name" = *[![:ascii:]]* ]] | |
then | |
# tr doesn't support utf8 so we need to do this using sed | |
sed -E 's|[ .-/]+|.|g;s|\(|(|g;s|\)|)|g;s|!|!|g;s|\?|?|g' <<<"$name" | |
else | |
tr -sc 'a-zA-Z0-9-.\n' '[.*]' <<<"$name" | |
fi | |
} | |
if [ -z "$IN_SOURCE" ] | |
then | |
if ( parallel --version &>/dev/null ) && [ "$#" -gt 1 ] | |
then | |
printf '%s\0' "$@" | parallel -0 -- "$0" {} | |
exit $? | |
fi | |
for original in "$@" | |
do | |
echo "Source: $original" | |
local language_autodetected= | |
if [ -z "$LANGUAGE" ] | |
then | |
language_autodetected=1 | |
if [[ "$(realpath "$original")" = *nihongo* ]] | |
then | |
# FIXME Use two-letter code for now. | |
# <https://www.themoviedb.org/talk/63fc60cde4b5760085596e63> | |
LANGUAGE="ja" | |
else | |
LANGUAGE="en-US" | |
fi | |
fi | |
is_matroska= | |
if [[ "$(file -b "$original")" =~ ^Matroska ]] | |
then | |
is_matroska=1 | |
fi | |
extension="${original##*.}" | |
title="$(basename "$original" ".$extension")" | |
# If the extension is a subtitle file, check if the previous . | |
# separated section is actually a language code (in which case, bundle | |
# it with the extension and remove it from the title). | |
if [[ "$extension" =~ ^(srt|ass)$ && "${title##*.}" =~ ^[a-z]{2,3}$ ]] | |
then | |
extension="${title##*.}.$extension" | |
title="${title%.*}" | |
fi | |
title="$(tr -s '.' ' ' <<<"$title")" | |
title_series="$(parse_title "$title" series | tr ' ' '+')" | |
title_season="$(parse_title "$title" season "0")" | |
title_episode="$(parse_title "$title" episode)" | |
proper_series_name="$(get_series_name "$title_series")" | |
title_suffix="$(get_episode_name "$title_series" "$title_season" "$title_episode" || echo "")" | |
title_prefix="$proper_series_name " | |
if [ -n "$title_season" ] | |
then | |
title_prefix+="S$title_season" | |
fi | |
if [ -n "$title_episode" ] | |
title_prefix+="E$title_episode" | |
then | |
fi | |
lookup_failed= | |
if [[ -z "$title_suffix" || "$title_suffix" == "null" ]] | |
then | |
lookup_failed=1 | |
title_suffix= | |
echo -n "TVDB Failed -- " | |
fi | |
echo "Title: \"$title_suffix\"" | |
override_title= | |
if [[ -z "$lookup_failed" && "$OVERRIDE" == "all" ]] || | |
[[ -n "$lookup_failed" && "$OVERRIDE" != "none" ]] | |
then | |
echo -n "Override Title: " | |
read override_title | |
if [[ "$OVERRIDE" == "all" || "$override_title" = *[![:space:]]* ]] | |
then | |
title_suffix="$override_title" | |
fi | |
fi | |
if [ -n "$title_suffix" ] | |
then | |
title_suffix="$(echo -n "$title_suffix" | sed 's/^[[:space:]]*//g;s/[[:space:]]*$//g;s/[[:space:]]+/ /g;s/`/'"'"'/g')" | |
full_title="$title_prefix \"$title_suffix\"" | |
else | |
full_title="$title_prefix" | |
fi | |
if [[ -z "$is_matroska" ]] | |
then | |
echo "Not a Matroska file. Skipping mkvpropedit." | |
elif [[ "$(stat -c '%h' "$original")" -gt 1 ]] | |
then | |
echo "File has more than one link -- assuming we cannot modify it. Skipping mkvpropedit." | |
elif [[ "$RDONLY" -ne 0 ]] | |
then | |
echo "Running in read-only mode. Skipping mkvpropedit." | |
else | |
echo mkvpropedit "$original" --edit info --set "'title=$full_title'" | |
mkvpropedit "$original" --edit info --set "title=$full_title" | |
fi | |
rename_file="$(filename_filter "$full_title.$extension")" | |
mv -v "$original" "$rename_file" || echo "... not renaming the file ..." | |
[ -n "$language_autodetected" ] && LANGUAGE= | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment