Skip to content

Instantly share code, notes, and snippets.

@DarrenSem
Last active September 15, 2024 16:41
Show Gist options
  • Save DarrenSem/dd66d5b1b133e35775c75abedd52fc95 to your computer and use it in GitHub Desktop.
Save DarrenSem/dd66d5b1b133e35775c75abedd52fc95 to your computer and use it in GitHub Desktop.
ytSubs.js - YouTube subtitles - English (auto-generated) CC (closed captions) - Usage: node ytSubs.js videoIdOrUrl, or Web browser BOOKMARKLET into a new window
// ytSubs.js (SEE DISCLAIMER) - via @DarrenSem https://gist.github.com/DarrenSem (22Mar2024)
// YouTube subtitles - English (auto-generated) CC (closed captions)
// Usage: node ytSubs.js videoIdOrUrl
// or Web browser BOOKMARKLET (contents open in a new window)
// ES6 clickable Bookmarklet = 3923 chars:
//javascript:void function(){"use strict";var e=String.fromCharCode;const n=!1,d=globalThis,{process:i}=d,t=console,u=async(e,n,d="text")=>fetch(e,n).then(e=>e[d]()),a=e=>{try{e=(e||"")+"";const n=e.match(/"captionTracks":.*"isTranslatable"\:.*?}]/),d=JSON.parse(`{${(n||[""])[0]}}`).captionTracks||[],i=d.map(e=>{const n=e.name;return[l(n.simpleText||n.runs&&n.runs[0].text),e.baseUrl+"&fmt=json3"]});return i}catch(n){}},r=e=>{e=new Date(e);let n=e.getDate();return isNaN(n)?null:`${(n+"").padStart(2,"0")}${e.toLocaleString("default",{month:"short"})}${e.getFullYear()}`},f=e=>{e="string"!=typeof e&&e?new URL(e):{href:e||""};const n=e&&e.href.trim()||"",d=n.replace(/(https?:\/\/)?\/*(.+?)\/?$/,"https://$2"),i=d.replace(/(https:\/\/)([-\w]+)$/,"$1youtu.be/$2"),t=i.replace(/(https:\/\/(?:youtu\.be|(?:\w+\.)?youtube\.\w+)\/)([-\w]+)$/,"$1$2/?&v=$2");try{n&&(e=new URL(t))}catch(n){}const{pathname:u,searchParams:a}=e,r=a&&a.get("v")||u&&(u.match(/^\/?(shorts|video|watch|live|stream|audio)\/([^?&\/]+)/)||[])[2];return r},l=n=>((n||"")+"").replace(/\\(?:u([a-zA-Z\d]{4})|x([a-zA-Z\d]{2})|(.))/g,(n,d,i,t)=>d?e(parseInt(d,16)):i?e(parseInt(i,16)):`${t}`),c=async e=>{const n=e&&new URL(e),d=f(n);let i="";d&&(i=await u(n));const t=new RegExp(`,"videoDetails":{"videoId":"${d}","title":"(.+?)","lengthSeconds":.+?"publishDate":"(.+?)","ownerChannelName":"([^"]+)".+?"uploadDate":"(.+?)"`),c=i.match(t),[,s,,p,h]=c||[],m=/:/.test(h)?h:`${h} 1:00`,b={id:d,title:l(s),urls:a(i),channel:o(l(p)),date:r(m)};return b},s=async e=>{const{urls:n}=e,d=n&&n[(n.findIndex(e=>e[0].endsWith(`English (auto-generated)`))+1||n.findIndex(e=>e[0].startsWith(`English (`))+1||n.findIndex(e=>e[0].startsWith(`English `))+1||1)-1];let i,t="";return d&&(t=await u(d[1])),i=JSON.parse((t||null)+""),i},p=e=>{let n=0,d="";const i=/[\s↵\x00-\x20\x7f-\x9f]+/g,t=/(?:♪|\[(INAUDIBLE|Applause|Laughter|Music|Man|Woman)\]|\((?:distorted singing|voice echoing)\))\s*/g,u=/\[\s_+\s\]/g,{events:a=[]}=e||{};return a.forEach(e=>{const{segs:a=[]}=e;let r="";if(a.forEach(e=>{const{utf8:n=""}=e,d=n.replace(/\s+/g," ").trim(),a=d.replace(i," ").replace(u,"[_]").replace(t,"");""!==a&&(r+=" "+a)}),r=r.trim(),r.length){const e=n?" ":"",i=n+1<3?"":"\n\n";d+=e+r+i,r="",n=i?0:n+1}}),d},h=(e,n)=>{if(i)return t.info(n);const d=open(URL.createObjectURL(new Blob([n],{type:"text/plain;charset=utf-8"})),"_blank");d&&!d.closed&&(d.onload=()=>{e&&(d.document.title=e)})},o=e=>{e=null==e?"":e+"";const n=e.replace(/[\u200b\u00ad\ufffc]/g,""),d=n.replace(/[\xa0\ufeff]/g," "),i=d.replace(/[\u{1F600}-\u{1F64F}]/gu,""),t=i.replace(/\s*[¸,]/g,","),u=t.replace(/[…]/g,"..."),a=u.replace(/\s*[.\uf03a\u2024]/g,"."),r=a.replace(/¡/g,"!"),f=r.replace(/⋕/g,"#"),l=f.replace(/[@]/g," at "),c=l.replace(/[~˜^‸ˆ⋍•]+/g,"-"),s=c.replace(/(\s*[¦:–—|‹›·«»¯⋅⋆⋇⋖⋗]+\s*)|\s+(-{2,})\s*|\s*[-]+\s+/g," - "),p=s.replace(/\s*[\x25‱]/g," percent"),h=p.replace(/["″‴“”‘’′´`‵‶‷¨]|&(quot|#34|apos|#39);/g,"'"),o=h.replace(/[/\\:*?"<>|]|&lt;|&gt;/g,"_"),m=o.replace(/(\s*(&amp;)+\s*)+/g," and "),b=m.replace(/(?:\s*&(\w+?);+(\s)*)+/g,"_$1;$2"),g=b.replace(/(\s*[&]+\s*)/g," and "),$=g.replace(/\s+/g," ").trim();return $},m=async(e,n)=>{try{n=((n||"https://www.youtube.com/watch?v=")+"").trim(),e=((e||"")+"").trim();const i=d.document?.location?.href||"",t=e?`${/[^-\w]/.test(e)?"":n}${e}`:i,u="//m.youtube.",a=i.includes(u)?n.replace("//www.youtube.",u):n;e=f(t);const r=e&&`${a}${e}`,h=await c(r),m=await s(h),{title:b,channel:g,date:$,id:w}=h;if(!g)return"";const x=`${n}${w}&ab_channel=${g.replace(/\s/g,"")}`,v=p(m),y=o(l(b));return{title:y,channel:g,date:$,id:w,url:x,subs:v}}catch(n){return t.error(n),n.message}},b=e=>``+`${e.title} (${e.channel} ${e.date} ${e.id})`+` - transcript`+`\n\n`+`...\n\n`+`${e.url}\n\n`+``+e.subs?.trim();{t.subs||(t.subs=m,i||!0||alert("console.subs() added!")),!1||(async()=>{const e=i&&i.argv[2]||n&&n;let d=await m(e),t=b(d);h(d.title,t+"\n\n")})()}}();
// ES6 "injectable" script stored as console.subs = async function(url) = 3923 chars:
javascript:void function(){"use strict";var e=String.fromCharCode;const n=!1,d=globalThis,{process:i}=d,t=console,u=async(e,n,d="text")=>fetch(e,n).then(e=>e[d]()),a=e=>{try{e=(e||"")+"";const n=e.match(/"captionTracks":.*"isTranslatable"\:.*?}]/),d=JSON.parse(`{${(n||[""])[0]}}`).captionTracks||[],i=d.map(e=>{const n=e.name;return[l(n.simpleText||n.runs&&n.runs[0].text),e.baseUrl+"&fmt=json3"]});return i}catch(n){}},r=e=>{e=new Date(e);let n=e.getDate();return isNaN(n)?null:`${(n+"").padStart(2,"0")}${e.toLocaleString("default",{month:"short"})}${e.getFullYear()}`},f=e=>{e="string"!=typeof e&&e?new URL(e):{href:e||""};const n=e&&e.href.trim()||"",d=n.replace(/(https?:\/\/)?\/*(.+?)\/?$/,"https://$2"),i=d.replace(/(https:\/\/)([-\w]+)$/,"$1youtu.be/$2"),t=i.replace(/(https:\/\/(?:youtu\.be|(?:\w+\.)?youtube\.\w+)\/)([-\w]+)$/,"$1$2/?&v=$2");try{n&&(e=new URL(t))}catch(n){}const{pathname:u,searchParams:a}=e,r=a&&a.get("v")||u&&(u.match(/^\/?(shorts|video|watch|live|stream|audio)\/([^?&\/]+)/)||[])[2];return r},l=n=>((n||"")+"").replace(/\\(?:u([a-zA-Z\d]{4})|x([a-zA-Z\d]{2})|(.))/g,(n,d,i,t)=>d?e(parseInt(d,16)):i?e(parseInt(i,16)):`${t}`),c=async e=>{const n=e&&new URL(e),d=f(n);let i="";d&&(i=await u(n));const t=new RegExp(`,"videoDetails":{"videoId":"${d}","title":"(.+?)","lengthSeconds":.+?"publishDate":"(.+?)","ownerChannelName":"([^"]+)".+?"uploadDate":"(.+?)"`),c=i.match(t),[,s,,p,h]=c||[],m=/:/.test(h)?h:`${h} 1:00`,b={id:d,title:l(s),urls:a(i),channel:o(l(p)),date:r(m)};return b},s=async e=>{const{urls:n}=e,d=n&&n[(n.findIndex(e=>e[0].endsWith(`English (auto-generated)`))+1||n.findIndex(e=>e[0].startsWith(`English (`))+1||n.findIndex(e=>e[0].startsWith(`English `))+1||1)-1];let i,t="";return d&&(t=await u(d[1])),i=JSON.parse((t||null)+""),i},p=e=>{let n=0,d="";const i=/[\s↵\x00-\x20\x7f-\x9f]+/g,t=/(?:♪|\[(INAUDIBLE|Applause|Laughter|Music|Man|Woman)\]|\((?:distorted singing|voice echoing)\))\s*/g,u=/\[\s_+\s\]/g,{events:a=[]}=e||{};return a.forEach(e=>{const{segs:a=[]}=e;let r="";if(a.forEach(e=>{const{utf8:n=""}=e,d=n.replace(/\s+/g," ").trim(),a=d.replace(i," ").replace(u,"[_]").replace(t,"");""!==a&&(r+=" "+a)}),r=r.trim(),r.length){const e=n?" ":"",i=n+1<3?"":"\n\n";d+=e+r+i,r="",n=i?0:n+1}}),d},h=(e,n)=>{if(i)return t.info(n);const d=open(URL.createObjectURL(new Blob([n],{type:"text/plain;charset=utf-8"})),"_blank");d&&!d.closed&&(d.onload=()=>{e&&(d.document.title=e)})},o=e=>{e=null==e?"":e+"";const n=e.replace(/[\u200b\u00ad\ufffc]/g,""),d=n.replace(/[\xa0\ufeff]/g," "),i=d.replace(/[\u{1F600}-\u{1F64F}]/gu,""),t=i.replace(/\s*[¸,]/g,","),u=t.replace(/[…]/g,"..."),a=u.replace(/\s*[.\uf03a\u2024]/g,"."),r=a.replace(/¡/g,"!"),f=r.replace(//g,"#"),l=f.replace(/[@]/g," at "),c=l.replace(/[~˜^‸ˆ⋍•]+/g,"-"),s=c.replace(/(\s*[¦:–—|‹›·«»¯⋅⋆⋇⋖⋗]+\s*)|\s+(-{2,})\s*|\s*[-]+\s+/g," - "),p=s.replace(/\s*[\x25‱]/g," percent"),h=p.replace(/["″‴“”‘’′´`‵‶‷¨]|&(quot|#34|apos|#39);/g,"'"),o=h.replace(/[/\\:*?"<>|]|&lt;|&gt;/g,"_"),m=o.replace(/(\s*(&amp;)+\s*)+/g," and "),b=m.replace(/(?:\s*&(\w+?);+(\s)*)+/g,"_$1;$2"),g=b.replace(/(\s*[&]+\s*)/g," and "),$=g.replace(/\s+/g," ").trim();return $},m=async(e,n)=>{try{n=((n||"https://www.youtube.com/watch?v=")+"").trim(),e=((e||"")+"").trim();const i=d.document?.location?.href||"",t=e?`${/[^-\w]/.test(e)?"":n}${e}`:i,u="//m.youtube.",a=i.includes(u)?n.replace("//www.youtube.",u):n;e=f(t);const r=e&&`${a}${e}`,h=await c(r),m=await s(h),{title:b,channel:g,date:$,id:w}=h;if(!g)return"";const x=`${n}${w}&ab_channel=${g.replace(/\s/g,"")}`,v=p(m),y=o(l(b));return{title:y,channel:g,date:$,id:w,url:x,subs:v}}catch(n){return t.error(n),n.message}},b=e=>``+`${e.title} (${e.channel} ${e.date} ${e.id})`+` - transcript`+`\n\n`+`...\n\n`+`${e.url}\n\n`+``+e.subs?.trim();{t.subs||(t.subs=m,i||!1||alert("console.subs() added!")),!0||(async()=>{const e=i&&i.argv[2]||n&&n;let d=await m(e),t=b(d);h(d.title,t+"\n\n")})()}}();
// DISCLAIMER: **For educational and code technique / demonstrative purposes only, not responsible or liable for any use by anyone.**
@DarrenSem
Copy link
Author

DarrenSem commented Mar 26, 2024

// https://www.youtube.com/watch?v=0vGLXkQV3cg&ab_channel=FoxNews

tracks

0: {lang: 'English (auto-generated)', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…4CE1C66F807AA0&key=yt8&kind=asr&lang=en&fmt=json3', languageCode: 'en'}
1: {lang: 'English - CC1', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…4CE1C66F807AA0&key=yt8&lang=en&name=CC1&fmt=json3', languageCode: 'en'}
2: {lang: 'English - DTVCC1', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…1C66F807AA0&key=yt8&lang=en&name=DTVCC1&fmt=json3', languageCode: 'en'}
3: {lang: 'English (United States)', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…EF5C224CE1C66F807AA0&key=yt8&lang=en-US&fmt=json3', languageCode: 'en-US'}

length: 4

^ "English (United States)", then /en\-.+/ takes precedence over just /en/, only THEN use the first /en/ and then use [0].

(tracks => (
			tracks.filter(track => track.lang.match(/English \(auto-generated\)/i))[0]
			|| tracks.filter(track => track.languageCode.match(/^en/i))[0]
			|| tracks[0]
		));

now should become this...

(tracks => (
			tracks.filter(track => track.lang.match(/English \(auto-generated\)/i))[0]
			|| tracks.filter(track => track.lang.match(/English \(United States\)/i))[0]
			|| tracks.filter(track => track.languageCode.match(/^en\-.+/i))[0]
			|| tracks.filter(track => track.languageCode.match(/^en/i))[0]
			|| tracks[0]
		));

PS: make this use "find" [not findIndex] INSTEAD OF .filter (DUH!)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment