Skip to content

Instantly share code, notes, and snippets.

@codingisacopingstrategy
Created April 9, 2014 12:49
Show Gist options
  • Save codingisacopingstrategy/10266169 to your computer and use it in GitHub Desktop.
Save codingisacopingstrategy/10266169 to your computer and use it in GitHub Desktop.
Scrape a webpage, optionally giving just the HTML of the specified selector
/**
* Usage: phantomjs scrape.js URL [selector]
* selector defaults to `#content`
*/
var page = require('webpage').create(),
system = require('system');
if (system.args.length < 2 || system.args.length > 3) {
console.log('Usage: phantomjs scrape.js URL [selector]');
console.log(' selector defaults to `#content`');
phantom.exit(1);
} else {
page.open(system.args[1], function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var p = page.evaluate(function (selector) {
return document.querySelector(selector).innerHTML;
}, system.args[2] ? system.args[2] : "#content");
console.log(p);
}
phantom.exit();
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment