Skip to content

Instantly share code, notes, and snippets.

@kolja
Created January 25, 2020 14:33
Show Gist options
  • Save kolja/06044345e7c7d0f39839db9be0c4a21f to your computer and use it in GitHub Desktop.
Save kolja/06044345e7c7d0f39839db9be0c4a21f to your computer and use it in GitHub Desktop.
Subtitle Editor
#!/usr/bin/env bash
"exec" "plk" "-Sdeps" "{:deps {org.clojure/tools.cli {:mvn/version \"0.4.2\"}}}" "-Ksf" "$0" "$@"
(ns subtitles.core
(:require
[planck.http :as http]
[planck.core :as core]
[clojure.tools.cli :refer [parse-opts]]
[clojure.string :as s]))
(def help "
SUBTITLE EDITOR (.srt)
----------------------
Reads a subtitle.srt file from stdin and makes changes to it
usage:
-e <ms> show subtitles earlier (milliseconds)
-l <ms> show subtitles later (milliseconds)
-t <from:to> translate subtitles form <language> to <language>
using google translate
-r <from:to> only translate subtitles that are in the range
-h display this help screen
example:
cat subtitles.srt | sub.cljs -t en:zh -r 1:100 > chinese.srt\n")
(def cli-options
[["-e" "--earlier <milliseconds>" "show subtitles earlier <milliseconds>"
:default 0
:parse-fn #(js/parseInt %)]
["-l" "--later <milliseconds>" "postpone subtitles <milliseconds>"
:default 0
:parse-fn #(js/parseInt %)]
["-t" "--translate <from:to>" "translate <from:to>"
:parse-fn #(rest (re-matches #"([\w-]+):([\w-]+)" %))]
["-r" "--range <from-id:to-id>" "only translate subtitles that are in the range"
:default []
:parse-fn #(map js/parseInt (rest (re-matches #"(\d+):(\d+)" %)))]
["-h" "--help"]])
(defn to-timestamp [total]
(let [ms (rem total 1000)
hours (quot total 3600000)
minutes (- (quot total 60000) (* 60 hours))
seconds (- (quot total 1000) (* 60 minutes) (* 3600 hours))]
(goog.string.format "%02d:%02d:%02d,%d" hours minutes seconds ms)))
(defn to-ms [timestamp]
(->> timestamp
(re-matches #"(\d{2})\:(\d{2})\:(\d{2}),(\d{1,3})")
rest
(map * [3600000 60000 1000 1])
(reduce +)))
(defn parse [line el]
(condp re-matches line
#"(.*)\s+-->\s+(.*)" :>> #(assoc el :time (map to-ms (rest %)))
#".*" :>> #(assoc el :text (conj (or (:text el) []) line))))
(defn agg [acc line]
(if (re-matches #"^\d+$" line)
(conj acc {:id line})
(conj (pop acc) (parse line (last acc)))))
(defn translate [text from to]
;; supported languages: https://www.labnol.org/code/19899-google-translate-languages
(let [uri (-> (goog.uri.utils.setPath "https://translate.googleapis.com/" "translate_a/single")
(goog.uri.utils.appendParamsFromMap (clj->js {:client "gtx"
:sl from
:tl to
:dt "t"
:q text})))]
(get-in (js->clj (.parse js/JSON (:body (http/get uri)))) [0 0 0])))
(defn -main [& args]
(let [opts (:options (parse-opts args cli-options))
offset (- (get opts :later) (get opts :earlier))
input (reduce agg [] (core/line-seq core/*in*))
last-id (js/parseInt (get (last input) :id))
[src-lang target-lang] (get opts :translate)
rng (get opts :range)
[from to] (if (empty? rng) [0 last-id] rng)]
(when (:help opts) (println help))
(doseq [{:keys [id time text]} input]
(println id)
(println (to-timestamp (+ (first time) offset))
"-->"
(to-timestamp (+ (second time) offset)))
(let [subtitle (s/join "\n" text)]
(if (and src-lang target-lang (<= from id to))
(println (translate subtitle src-lang target-lang))
(println subtitle))))))
(set! *main-cli-fn* -main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment