Skip to content

Instantly share code, notes, and snippets.

@swenson
Created November 4, 2023 23:27
Show Gist options
  • Save swenson/efa268aa5d0d1b0c5e2db32e3e65a771 to your computer and use it in GitHub Desktop.
Save swenson/efa268aa5d0d1b0c5e2db32e3e65a771 to your computer and use it in GitHub Desktop.
Quick and dirty script to find files with missing subtitles and transcribe them with whisper.cpp -- requires ffmpeg and whisper.cpp. Run like: python3 transcribe-missing.py path/to/media/files
#!/bin/bash
set -euxo pipefail
echo "Converting audio"
rm -f temp.wav
ffmpeg -i "$1" -ar 16000 -ac 1 -c:a pcm_s16le temp.wav
echo "Transcribing"
./main -m models/ggml-base.en.bin -f ./temp.wav --output-srt -t 8 -ml 42
mv temp.wav.srt "${1%.*}.en.srt"
# optional: rewrite the original file to include subtitles
#echo "Adding to video file"
#ffmpeg -i "$1" -i temp.wav.srt -c copy -metadata:s:s:0 language=eng "${1%.*}.subtitled.mkv"
#!/usr/bin/env python3
import json
import os.path
import os
import subprocess as sp
import sys
import time
def has_subtitles(fname) -> bool:
base, _ = os.path.splitext(fname)
if os.path.exists(base + '.srt') or \
os.path.exists(base + '.en.srt') or \
os.path.exists(base + '.english.srt') or \
os.path.exists(base + '.stt') or \
os.path.exists(base + '.en.stt') or \
os.path.exists(base + '.english.stt') or \
os.path.exists(base + '.sub'):
return True
out = sp.check_output(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', fname], stderr=sp.STDOUT)
for stream in json.loads(out)['streams']:
if stream['codec_type'] == 'subtitle':
return True
return False
check_endings = {'.mkv', '.mpg', '.avi', '.mp4', '.m4v', '.mov'}
for dirpath, dirnames, fnames in os.walk(sys.argv[1]):
for name in fnames:
fname = os.path.join(dirpath, name)
_, ext = os.path.splitext(fname)
if ext in check_endings:
if not has_subtitles(fname):
print(fname)
sp.check_call(['./make-subtitles.sh', fname])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment