Skip to content

Instantly share code, notes, and snippets.

@Cshion
Created February 19, 2016 17:32
Show Gist options
  • Save Cshion/88bfb257f1b851862dff to your computer and use it in GitHub Desktop.
Save Cshion/88bfb257f1b851862dff to your computer and use it in GitHub Desktop.
Basic DRAE scrapper , need refactor to manage redirections.
var request = require( "request" );
var cheerio = require( "cheerio" );
var async = require( "async" );
var getHtml = function( word , callback ) {
var BASE_URL = "http://dle.rae.es/srv/search";
request( {
uri : BASE_URL ,
qs : {
w : word ,
m : 30
}
} , function( err , response , html ) {
if( err ) {
return callback( err );
} else {
return callback( null , html );
}
} )
};
var crawlHtml = async.asyncify( function( html ) {
var $ = cheerio.load( html );
var resultados = $( "p" );
var results = [];
var pattern = /^j.*/;
resultados.each( function( i , e ) {
var element = $( this );
var c = element.attr( "class" )
if( pattern.test( c ) ) {
var s = "";
element.children().each( function( ix , ex ) {
s += " " + $( this ).text();
} );
results.push( s );
}
} );
return results;
} );
function getWordReference( word , callback ) {
async.waterfall( [
async.constant( word ) ,
getHtml ,
crawlHtml
] , function( err , result ) {
if( err ) {
return callback( err );
}
return callback( null , result );
} )
}
getWordReference( "cama" , function( err , result ) {
if( err ) {
console.log( err );
} else {
console.log( result );
}
} )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment