Skip to content

Instantly share code, notes, and snippets.

@seangeleno
Last active September 17, 2024 19:48
Show Gist options
  • Save seangeleno/0af5d76591fdd11109a095725d24b5b9 to your computer and use it in GitHub Desktop.
Save seangeleno/0af5d76591fdd11109a095725d24b5b9 to your computer and use it in GitHub Desktop.
#!/bin/bash
# advanced python regex for snatching up as many extra videos
# as possible and also neatly organizing video directories
yt-dlp --parse-metadata "description:(?P<download_domain>https?://(?:[a-z]+\.)?(?P<domain>youtube\.com|youtu\.be|vimeo\.com|dailymotion\.com|twitch\.tv|fb\.watch|facebook\.com|soundcloud\.com|mixcloud\.com|bandcamp\.com|twitter\.com|instagr\.am|instagram\.com|redd\.it|reddit\.com|tiktok\.com|bit\.ly|bitchute\.com|peertube\.co|bili\.com|tinyurl\.com|goo\.gl|ow\.ly)/[^\s]+)" \
--parse-metadata "upload_date:(?P<unix_timestamp>[0-9]{8})" \
--replace-in-metadata "unix_timestamp" "[0-9]{8}" "%(upload_date)s" \
--output "%(domain)s/%(unix_timestamp)s/%(sentiment)s/%(view_count)s/%(title)s.%(ext)s" \
--embed-metadata <VIDEO_URL>
#!/bin/bash
# Function to download and organize video based on metadata
download_video() {
local VIDEO_URL="$1"
# Use yt-dlp to get metadata info without downloading the video
yt-dlp --write-info-json --skip-download "$VIDEO_URL"
# Extract metadata filename
metadata_file="$(basename "$VIDEO_URL").info.json"
# Ensure the metadata file exists
if [[ ! -f "$metadata_file" ]]; then
echo "Error: Metadata file not found for $VIDEO_URL."
return 1
fi
# Extract and handle upload_date, convert to Unix timestamp
upload_date=$(jq -r '.upload_date // empty' "$metadata_file")
if [[ -n "$upload_date" && "$upload_date" =~ ^[0-9]{8}$ ]]; then
unix_timestamp=$(date -d "$upload_date" +"%s" 2>/dev/null)
else
unix_timestamp="UNKNOWN_DATE"
fi
# Placeholder for sentiment analysis result (replace this with actual sentiment analysis)
sentiment="SENTIMENT_ANALYSIS_NA"
# Extract domain, handle missing values
domain=$(jq -r '.webpage_url // empty' "$metadata_file" | grep -oP '(?<=://)([^/]+)')
if [[ -z "$domain" ]]; then
domain="UNKNOWN_DOMAIN"
fi
# Extract view count, handle missing values
views=$(jq -r '.view_count // empty' "$metadata_file")
if [[ -z "$views" ]]; then
views="VIEW_COUNT_NA"
fi
# Create directory structure and download the video using yt-dlp
yt-dlp --output-na-placeholder "PLACEHOLDER_NA" \
--output "$domain/$unix_timestamp/$sentiment/$views/%(title)s.%(ext)s" \
"$VIDEO_URL"
}
# Function to download video using the first argument as URL
download_from_argument() {
if [[ -z "$1" ]]; then
echo "Usage: $0 <video_url>"
return 1
fi
download_video "$1"
}
# Call the function using the first argument passed to the script
download_from_argument "$1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment