Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ssddi456/4c62fcc9d7203df5681eefc3aad6929f to your computer and use it in GitHub Desktop.
Save ssddi456/4c62fcc9d7203df5681eefc3aad6929f to your computer and use it in GitHub Desktop.
for grab noval from xs.dmzj
var clawer_aggregate = require('./..');
var commander = require('commander');
var fs = require('fs');
var path = require('path');
program = commander
.usage('')
.option('--chapture <url>', 'page url')
.option('--noval <url>', 'noval url')
.parse(process.argv);
if( !program.noval && !program.chapture ){
program.help()
} else if( program.noval ){
clawer_aggregate({
})
.grep( program.noval, {
title : ['text', 'h1'],
content: ['href', '.wrapper .left .download_boxbg a:first-of-type']
})
.unwind('content')
.grep('$content', {
doc : 'content'
})
.exec(function(e, res ) {
if( e ){
console.log( e.stack );
} else {
var filename= path.join(__dirname, res[0].title + '.txt');
res.forEach(function( doc ) {
fs.appendFileSync( filename, doc.doc );
});
console.log('all done', filename);
}
})
} else if( program.chapture ){
clawer_aggregate({})
.grep(program.chapture, {
title : ['text', 'h1'],
content : ['text', '#novel_contents.novel_text']
})
.exec(function( e ) {
if( e ){
console.log( e.stack );
} else {
var filename= path.join(__dirname, res[0].title + '.txt');
res.forEach(function( doc ) {
fs.appendFileSync( filename, doc.doc );
});
console.log('all done', filename);
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment