Created
February 15, 2017 18:07
-
-
Save thekoushik/99b2c3fe887318ade638279bfb060058 to your computer and use it in GitHub Desktop.
"The Flash (CW)" Episode Scraper using NodeJS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
"dependencies": { | |
"chalk": "^1.1.3", | |
"cheerio": "^0.22.0", | |
"moment": "^2.16.0", | |
"superagent": "^2.3.0" | |
} | |
Usage: | |
node theflash [session] where session is optional, latest if not specified | |
*/ | |
var request=require('superagent'); | |
var cheerio=require('cheerio'); | |
var chalk=require('chalk'); | |
var moment=require('moment'); | |
var option=parseInt(process.argv[2]==undefined?"0":process.argv[2]); | |
if(option<0){ | |
option=0; | |
console.log("Wrong Session, taking latest session as input."); | |
} | |
process.stdout.write("Checking....(Please Wait)"); | |
request | |
.get('https://en.wikipedia.org/wiki/List_of_The_Flash_episodes') | |
.timeout(30000) | |
.on('error', function(){ | |
process.stdout.write("\r\x1b[K"); | |
process.stdout.write("Server is not responding, try again later."); | |
}) | |
.end(function(err, res){ | |
process.stdout.write("\r\x1b[K"); | |
if(!res){ | |
console.log(chalk.red("Network Error")); | |
return; | |
} | |
var $ = cheerio.load(res.text); | |
var sessions=$(".wikiepisodetable"); | |
if(option>sessions.length || option<1){ | |
option=sessions.length; | |
console.log("Wrong Session, taking latest session as input."); | |
} | |
var episodes=$(sessions[option-1]).find(".vevent"); | |
var latest=null; | |
episodes.each(function(){//each episodes | |
var info=$(this).find("td"); | |
var title=$(info[1]).text(); | |
var dt=moment($(info[4]).find(".dtstart").text()); | |
if($(info[5]).find("span").text()!="TBA"){ | |
console.log(chalk.cyan(title.substr(1,title.indexOf('"',1)-1))+" -",dt.format("dddd Do MMMM YYYY")); | |
}else if(!latest){ | |
latest={title:title.substr(1,title.indexOf('"',1)-1),date:dt.format("dddd Do MMMM YYYY")}; | |
} | |
}); | |
if(latest){ | |
console.log(chalk.green(latest.title)+" -",chalk.yellow(latest.date)); | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment