Created
April 27, 2017 19:44
-
-
Save btk/fe3cdb833f6d661900982207c624e7be to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require("fs"); | |
var mysql = require("mysql"); | |
var request = require("request"); | |
var http = require("http"); | |
function uniqueid() { | |
// desired length of Id | |
var idStrLen = 10; | |
// always start with a letter -- base 36 makes for a nice shortcut | |
var idStr = (Math.floor((Math.random() * 25)) + 10).toString(36); | |
// add a timestamp in milliseconds (base 36 again) as the base | |
idStr += (new Date()).getTime().toString(36); | |
// similar to above, complete the Id using random, alphanumeric characters | |
do { | |
idStr += (Math.floor((Math.random() * 35))).toString(36); | |
} while (idStr.length < idStrLen); | |
return (idStr); | |
} | |
function crawl(page, connect) { | |
var url = "http://url/" + page; | |
var reeq = request({ | |
url: url, | |
json: false | |
}, function(error, response, body) { | |
if (!error && response.statusCode === 200) { | |
var splitted = body.split('class="icon"'); | |
if (splitted) { | |
var i = 1; | |
while (i <= 100) { | |
if (splitted[i]) { | |
var splitted2 = splitted[i].split('<img src="'); | |
var title = splitted[i].split('title="')[1].split('"')[0]; | |
var data_team = splitted[i].split('data-team="')[1].split('"')[0]; | |
var data_color = splitted[i].split('data-color="')[1].split('"')[0]; | |
var data_pack = splitted[i].split('data-pack="')[1].split('"')[0]; | |
var png = splitted2[1].split('"')[0]; | |
var pngx = png.split("/"); | |
var svg = "http://image.flaticon.com/icons/svg/" + pngx[6] + "/" + pngx[7].split(".")[0] + ".svg"; | |
addSVG(connect, title, svg, data_pack, data_color, data_team); | |
}else{ | |
console.log("#####################################"); | |
console.log("Current crawling page: " + page); | |
console.log("Currently crawling batch size: "+ i); | |
break; | |
} | |
i++; | |
} | |
} | |
} | |
}); | |
reeq.on('error', function(err) { | |
console.log("Error catched: "+ err); | |
}); | |
} | |
var connection = mysql.createConnection({ | |
host : 'localhost', | |
user : 'root', | |
password : '', | |
database : 'svgrepo' | |
}); | |
var addSVG = function(connect, title, url, dp, dc, dt){ | |
var uniq = uniqueid(); | |
var add = {svg_hash: uniq, title: title , data_pack: dp, data_color: dc, data_team: dt}; | |
connection.query('INSERT INTO svg SET ?', add, function(err, res) { | |
if(err){ | |
console.log("Wooops, "+ err); | |
}else{ | |
asyncSave(url, uniq); | |
console.log("Added: " + title); | |
} | |
}); | |
} | |
function asyncSave(svg, hash) { | |
http.get(svg, function(response) { | |
var file = fs.createWriteStream("svg/" + hash + ".svg"); | |
response.pipe(file); | |
console.log("Saved: " + hash + ".svg"); | |
}); | |
} | |
connection.connect(); | |
var j = 1; | |
while(j < 2591){ | |
crawl(j, connection); | |
j++; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment