Created
January 16, 2015 15:46
-
-
Save anonymous/c10005a58838d31b0763 to your computer and use it in GitHub Desktop.
google autocomplete scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d = function(msg) { | |
console.log(msg) | |
} | |
A = [] | |
var arrayUnique = function(a) { | |
return a.reduce(function(p, c) { | |
if (p.indexOf(c) < 0) p.push(c); | |
return p; | |
}, []); | |
}; | |
var splitByNewLine = function(s) { | |
if (!s) { return []; } | |
return s.split(/\r\n|\r|\n/g) | |
} | |
gac_scrapper = function () { | |
console.log(document.querySelector('#gsr').innerText) | |
} | |
new MutationObserver( | |
gac_scrapper | |
) | |
// select the target node | |
var target = document.querySelector('#gsr > div.gstl_0.sbdd_a > div:nth-child(2) > div.sbdd_b > div > ul'); | |
//var target = document.body | |
//var target = document.querySelector('#gsr'); | |
// create an observer instance | |
var observer = new MutationObserver(function(mutations) { | |
mutations.forEach(function(mutation) { | |
//console.log(mutation.type); | |
//d(mutation.target.innerText) | |
//lines = splitByNewLine(mutation.target.innerText) | |
//ac_list = document.querySelector('#gsr > div.gstl_0.sbdd_a > div:nth-child(2) > div.sbdd_b > div > ul'); | |
ac_list = mutation.target | |
if (ac_list) { | |
lines = splitByNewLine(ac_list.innerText.toLowerCase()) | |
lines.forEach(function(line) | |
{ | |
A.push(line); | |
}); | |
A = arrayUnique(A); | |
A = A.sort(); | |
d(A.length); | |
} | |
}); | |
}); | |
// configuration of the observer: | |
var config = { attributes: true, childList: true, characterData: true }; | |
// pass in the target node, as well as the observer options | |
observer.observe(target, config); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment