Skip to content

Instantly share code, notes, and snippets.

@dat-boris
Created September 7, 2024 16:17
Show Gist options
  • Save dat-boris/2d1eab171638434e5def02926d22cb08 to your computer and use it in GitHub Desktop.
Save dat-boris/2d1eab171638434e5def02926d22cb08 to your computer and use it in GitHub Desktop.
A script for downloading video from https://publicorderemergencycommission.ca
#!/usr/bin/env python
"""A script for downloading video from https://publicorderemergencycommission.ca
Oh my, the site is awful! It streams the mp4 in .ts format that is much slower
than the stream reate. So this script downloads the stream and compress it using
ffmpeg h264 codec.
You will need to find the video link - you can do that by using the developer tool
when playing the video on page
Page link:
https://publicorderemergencycommission.ca/public-hearings/day-31-november-25/
Once you are in developer tool, look at the network activity - you should be
able to see it download following files:
https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/2022-11-25/2022-11-25-english.mp4/media_w812087745_1.ts
https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/2022-11-25/2022-11-25-english.mp4/media_w812087745_2.ts
.... until end
(zoom to the end you will see the "end_number" )
Then modify the below "MODIFYME" constants in the file:
1. download_one_day function variables
2. __main__ variables
"""
import os
from urllib import request
from urllib.error import HTTPError
from multiprocessing import Pool
# Maybe even up to a 100, seems like this is okay!
THREAD_POOL_SIZE = 100
def download_date(date: str, end_video: int):
# create subfolder base on date
os.makedirs(date, exist_ok=True)
# create thread pool to download.
with Pool(THREAD_POOL_SIZE) as p:
p.map(
download_one_day,
range(end_video + 1)
)
def download_one_day(i: int) -> bool:
# MODIFYME - change the date and slug base on the link
# XXX: We need to hardcode here due to multiprocessing will only pickle the
# function directly. Lazy way to do it.
# example link:
# https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/2022-11-25/2022-11-25-english.mp4/media_w812087745_1.ts
# date: str = "2022-11-25"
# slug = "w812087745"
# https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/2023-02-17/2023-02-17-english.mp4/media_w1897640591_1.ts
date = "2023-02-17"
slug = "w1897640591"
url = f"https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/{date}/{date}-english.mp4/media_{slug}_{i}.ts"
print(url)
# download video and save to disk
file = f"{date}/{date}-{i}.ts"
# check if file exists
if os.path.exists(file):
print(f"File {file} already exists, skipping")
return True
try:
request.urlretrieve(url, file)
except HTTPError as e:
if e.code == 404:
print("Done - found 404!")
return False
else:
raise e
return True
def join_files(date: str, end_video: int):
"""After downloading, run comperssion (require install ffmpeg):
cat * > all.ts
ffmpeg -i all.ts -vcodec h264 -tune zerolatency -acodec copy compress2.mp4
See https://www.ffmpeg.org/download.html on how to install ffmpeg
"""
# join all files into one
with open(f"{date}/all.ts", "wb") as out:
for i in range(end_video + 1):
file = f"{date}/{date}-{i}.ts"
if os.path.exists(file):
with open(file, "rb") as f:
print(f"Joining {file}")
out.write(f.read())
else:
raise RuntimeError(f"File {file} not found, skipping")
# compress the file
os.system(f"ffmpeg -i {date}/all.ts -vcodec h264 -tune zerolatency -acodec copy {date}/compressed.mp4")
if __name__ == "__main__":
# e.g.
# https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/2022-11-25/2022-11-25-english.mp4/media_w812087745_2.ts
# download_date("2022-11-25", 3094)
# Only need first 2 hours
# download_date("2022-11-25", 1200)
# join_files("2022-11-25", 1200)
# https://cdn1.isilive.ca/vod/_definst_/mp4:poec-cedu/2023-02-17/2023-02-17-english.mp4/media_w1897640591_1.ts
# MODIFYME - change the date and end video number
download_date("2023-02-17", 167)
join_files("2023-02-17", 167)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment