Created
October 29, 2023 20:16
-
-
Save sudoalx/7796e621d03627252264d05c80aa3646 to your computer and use it in GitHub Desktop.
GoTo Meeting Transcripts bulk downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import requests | |
from tqdm import tqdm | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.action_chains import ActionChains | |
def download_video(link, destination_directory): | |
# Set up Chrome in headless mode | |
chrome_options = webdriver.ChromeOptions() | |
chrome_options.add_argument('--headless') | |
chrome_options.add_argument('--disable-gpu') # Required when running in headless mode | |
chrome_options.add_argument('--no-sandbox') # Required when running in headless mode | |
# Create a WebDriver instance with headless mode | |
driver = webdriver.Chrome(options=chrome_options) | |
# Load the URL | |
driver.get(link) | |
# Wait for the <video> tag to be present in the page | |
wait = WebDriverWait(driver, 10) # Wait for up to 10 seconds | |
video_element = wait.until(EC.presence_of_element_located((By.TAG_NAME, 'video'))) | |
# Find the date element and extract its text | |
date_element = driver.find_element(By.XPATH, '//div[p[text()="Date"]]/p[contains(@class, "src-components-Info-MeetingStats-MeetingStats--text")]') | |
date_text = date_element.text | |
# Get the title meta tag from the page | |
title_element = driver.find_element(By.TAG_NAME, 'title') | |
title_text = title_element.get_attribute('textContent') | |
# Wait for a few seconds (adjust the sleep time as needed) | |
time.sleep(5) # Wait for 5 seconds | |
# Define the filename for the downloaded video | |
full_filename = f"{destination_directory}/{title_text} - {date_text}.mp4" | |
short_filename = f"{title_text}_{date_text}.mp4" | |
# Download the video using requests and display a progress bar | |
response = requests.get(video_element.get_attribute("src"), stream=True) | |
total_size = int(response.headers.get('content-length', 0)) | |
with open(full_filename, "wb") as video_file: | |
with tqdm(total=total_size, unit='B', unit_scale=True, unit_divisor=1024) as pbar: | |
for data in response.iter_content(chunk_size=1024): | |
video_file.write(data) | |
pbar.update(len(data)) | |
# When you're done, close the browser | |
driver.quit() | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser(description="Download videos from a list of links.") | |
parser.add_argument("-f", "--links-file", required=True, help="Path to the text file containing comma-separated links.") | |
parser.add_argument("-d", "--destination-directory", required=True, help="Directory where downloaded videos will be saved.") | |
args = parser.parse_args() | |
with open(args.links_file, "r") as file: | |
links = file.read().strip().split(",") | |
for link in links: | |
download_video(link, args.destination_directory) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
GoToMeeting Transcripts Video Downloader
The GoToMeeting Transcripts Video Downloader is a Python program tailored for automating the downloading of video files from the GoToMeeting Transcripts website. It streamlines the process of fetching videos from this specific source.
Features
Headless Browsing: The script utilizes a headless Chrome browser to interact with the GoToMeeting Transcripts website, ensuring a seamless and unobtrusive experience.
Date Extraction: It extracts the date information from the GoToMeeting webpage, facilitating organization and categorization of downloaded videos.
Title Extraction: The program captures the title meta tag from the webpage, allowing precise identification of the downloaded video.
Flexible Parameters: Users can specify the list of video links to download from a text file and define the directory where downloaded GoToMeeting videos will be saved, making it highly configurable.
Download Progress: It displays a download progress bar using the
tqdm
library, so users can easily track the download process.Custom File Naming: The script constructs two file names for each downloaded video - a full file name with the video title and date, and a shorter file name with title and date separated by underscores.
How to Use
Clone the repository or download the script.
Install the required libraries using
pip install selenium requests tqdm
.Execute the script with the following command:
python script_name.py -f links_file.txt -d destination_directory
Example:
python3 script_name.py --links-file video_links.txt --destination-directory Downloads/Meeting
script_name.py
with the name of the script.links_file.txt
should be a text file with comma-separated video links.destination_directory
is the directory where downloaded GoToMeeting videos will be saved.This specialized script simplifies the process of downloading videos from GoToMeeting Transcripts and can be a valuable tool for users who regularly access and work with content from this platform.
Note: Ensure you have Python and ChromeDriver installed before using the script.