Skip to content

Instantly share code, notes, and snippets.

@J-Broadway
Last active November 27, 2023 07:33
Show Gist options
  • Save J-Broadway/de61cc0f2be876a1036667b2bced3e6b to your computer and use it in GitHub Desktop.
Save J-Broadway/de61cc0f2be876a1036667b2bced3e6b to your computer and use it in GitHub Desktop.
YouTube videos page scrapper (JS to paste into dev tools console)
///////////////////////////////////////////////////////////////////////////////////////////////////
// WAT DO
//-------------------------------------------------------------------------------------------------
// Intended to scrape youtube video thumbnail, title (with embeded hyperlink), estimated date. And
// save as a .html file
///////////////////////////////////////////////////////////////////////////////////////////////////
// USAGE
//-------------------------------------------------------------------------------------------------
// - Open up your browser's dev tools (CTRL + SHIFT + C [works for me])
// - Navigate to 'Console' tab
// - Paste this code and hit 'ENTER'
// - Save .html to your computer
///////////////////////////////////////////////////////////////////////////////////////////////////
// WORKING AS OF 11/27/2023
///////////////////////////////////////////////////////////////////////////////////////////////////
(function() {
// Function to estimate the date from relative descriptions
function estimateDate(relativeDateStr) {
const parts = relativeDateStr.split(' ');
const currentDate = new Date();
if (parts.length === 3) {
const amount = parseInt(parts[0], 10);
const unit = parts[1];
switch (unit) {
case 'year':
case 'years':
currentDate.setFullYear(currentDate.getFullYear() - amount);
break;
case 'month':
case 'months':
currentDate.setMonth(currentDate.getMonth() - amount);
break;
case 'week':
case 'weeks':
currentDate.setDate(currentDate.getDate() - (amount * 7));
break;
case 'day':
case 'days':
currentDate.setDate(currentDate.getDate() - amount);
break;
default:
return 'Unknown date';
}
return currentDate.toLocaleDateString();
} else {
return 'Unknown date';
}
}
// Create a new Blob to hold the HTML
let blobContent = '<html><head><title>YouTube Content</title></head><body>';
// Use a Set to keep track of processed video titles to prevent duplicates
const processedTitles = new Set();
// Get all elements with the 'ytd-rich-item-renderer' class
const elements = document.querySelectorAll('.ytd-rich-item-renderer');
elements.forEach(el => {
const titleElement = el.querySelector('#video-title-link');
const metadataItems = el.querySelectorAll('#metadata-line .inline-metadata-item');
const thumbnailElement = el.querySelector('ytd-thumbnail img');
if (titleElement) {
const title = titleElement.textContent.trim();
// Skip processing if this title has already been processed
if (processedTitles.has(title)) return;
processedTitles.add(title);
const videoLink = titleElement.href;
const viewCount = metadataItems.length > 0 ? metadataItems[0].textContent.trim() : 'No views';
const dateText = metadataItems.length > 1 ? metadataItems[1].textContent.trim() : 'No date';
const estimatedDate = dateText !== 'No date' ? estimateDate(dateText) : 'Unknown date';
const thumbnail = thumbnailElement ? thumbnailElement.src : '';
blobContent += `
<div>
<h3><a href="${videoLink}">${title}</a></h3>
<p>Views: ${viewCount}</p>
<p>Date: ${estimatedDate}</p>
<img src="${thumbnail}" alt="Thumbnail">
</div>
`;
}
});
blobContent += '</body></html>';
// Create a Blob from the content
const blob = new Blob([blobContent], {type: 'text/html'});
// Create a link element for downloading
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = 'youtube_content.html';
// Append the link to the document, trigger the download, and then remove the link
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment