Skip to content

Instantly share code, notes, and snippets.

@NoxWings
Last active April 6, 2018 14:28
Show Gist options
  • Save NoxWings/42ea1b01cfb7f466d31037e12461ba6a to your computer and use it in GitHub Desktop.
Save NoxWings/42ea1b01cfb7f466d31037e12461ba6a to your computer and use it in GitHub Desktop.
const osmosis = require("osmosis");
const fs = require("fs");
function extractVideoLinks (code) {
const resolutionRegex = /resolution *==+ *"(.*?)"/g;
const sourceRegex = /source\.src *= *"(.*?)";/g;
const links = {};
let match = false;
do {
const resolutionMatch = resolutionRegex.exec(code);
const sourceMatch = sourceRegex.exec(code);
match = resolutionMatch && sourceMatch;
if (match) {
links[resolutionMatch[1]] = sourceMatch[1];
}
} while (match);
return links;
}
osmosis
.get("https://www.destroyallsoftware.com/screencasts/catalog")
.config({ user_agent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36" })
.set({
"scrapedData": osmosis
.find(".season")
.set({
season: ".season_title a@name",
episodes: [
osmosis
.find(".episodes .episode")
.set({
number: ".row div[1]",
title: ".row div[2]",
description: ".row div[3]",
duration: ".row div[4]"
})
.follow("a")
.set("code", ".main_content script:html")
.data((data) => {
data.links = extractVideoLinks(data.code);
delete data.code;
})
]
})
})
.data((data) => {
fs.writeFile("scraped_data.json", JSON.stringify(data, null, 2), function (err) {
if (err) { return console.log(err); } // eslint-disable-line no-console
console.log("SUCCESS"); // eslint-disable-line no-console
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment