Skip to content

Instantly share code, notes, and snippets.

@franzenzenhofer
Forked from anonymous/gac-scrapper.js
Last active August 29, 2015 14:13
Show Gist options
  • Save franzenzenhofer/460ceeb9e9c65ef5d873 to your computer and use it in GitHub Desktop.
Save franzenzenhofer/460ceeb9e9c65ef5d873 to your computer and use it in GitHub Desktop.
d = function(msg) { console.log(msg); }
var _css_path_to_autocomplete_ = '#gsr > div.gstl_0.sbdd_a > div:nth-child(2) > div.sbdd_b > div > ul';
var _css_path_to_recommendations_ = '._e4b'; _css_path_to_autocomplete_;//'#brs > div > div:nth-child(1) > p:nth-child(1) > a';//'#brs';//'._e4b';
A = []
var arrayUnique = function(a) {
return a.reduce(function(p, c) {
if (p.indexOf(c) < 0) p.push(c);
return p;
}, []);
};
Research = []
var addResearchItem = function (autocomplete_or_related, query, source)
{
if(autocomplete_or_related === '' || query === '' || source === ''){ return false;}
if (source === 'autocomplete'){source_id = 1}
else if (source === 'related'){source_id = 2}
else if (source === 'autocomplete+related'){source_id = 3}
else {source_id = 0}
obj = {
"demand": autocomplete_or_related,
"demand_length": autocomplete_or_related.length,
"trigger": query,
"trigger_length": query.length,
"source": source,
"source_id": source_id,
"duplicate": false
}
Research.push(obj);
}
sortResearch = function(R)
{
return R.sort(function(a,b)
{
tl = a.trigger_length - b.trigger_length
if (tl != 0){ return tl;}
dl = a.demand_length - b.demand_length
if (dl != 0){ return dl;}
return a.demand.toLowerCase().localeCompare(b.demand.toLowerCase())
});
}
var researchArrayUnique = function(R) {
R = sortResearch(R);
//d(R);
for (var i = 0; i<R.length; i++)
{
r = R[i]
//var dup_found = false;
for (var j = i+1; j<R.length; j++)
{
rr = R[j]
if(r.demand === rr.demand)
{
rr.duplicate = true
//var dup_found = true;
//d('duplicate found');
if(r.source != rr.source)
{
r.source = "autocomplete+related";
r.source_id = 3;
}
}
}
}
var new_R = [];
for(var k = 0; k<R.length; k++)
{
if(R[k].duplicate === false){
//d(R[k]);
new_R.push(R[k]);
}
}
//d(new_R);
return new_R;
};
var splitByNewLine = function(s) {
if (!s) { return []; }
return s.split(/\r\n|\r|\n/g)
}
var init = function () {
d('google autocomplete recorder active')
// select the target node
var target = document.querySelector(_css_path_to_autocomplete_);
var observer = new MutationObserver(function(mutations) {
d('autocomplete mutation')
mutations.forEach(function(mutation) {
ac_list = mutation.target
if (ac_list) {
lines = splitByNewLine(ac_list.innerText.toLowerCase())
lines.forEach(function(line)
{
A.push(line);
addResearchItem(line, document.querySelector('#gbqfq').value.toLowerCase(), 'autocomplete');
});
}
//Research = sortResearch(Research);
//Research = arrayUnique(Research);
Research = researchArrayUnique(Research);
A = arrayUnique(A);
A = A.sort();
//d(A.length);
});
});
// pass in the target node, as well as the observer options
observer.observe(target, { attributes: true, childList: true, characterData: true });
};
//TODO: but this into it's own observer'
//bonus collect related searches, too
var init_recommendations = function () {
d('google recommendation recorder active')
//var rs = document.querySelectorAll(_css_path_to_recommendations_);
var recommendation_observer = new MutationObserver(function(mutations)
{
d('mutation found in recommendation')
d(mutations);
var rs_items = document.querySelectorAll(_css_path_to_recommendations_);
if (!rs_items) {return false;}
for (var ii = 0; ii < rs_items.length; ++ii)
{
d(rs_items[ii]);
d(rs_items[ii].innerText);
related_term = rs_items[ii].innerText.toLowerCase();
d(ii+': '+related_term);
A.push(related_term);
addResearchItem(related_term, document.querySelector('#gbqfq').value.toLowerCase(), 'related');
}
d('related searches end')
Research = researchArrayUnique(Research);
});
recommendation_observer.observe(document.body, { attributes: true, childList: true, characterData: true, subtree: true });
};
var researchToCsv = function()
{
var csvContent = "data:text/csv;charset=utf-8,";
var first_line = "demand,demand_length,trigger,trigger_length,source,source_id,duplicate\n"
csvContent = csvContent + first_line;
Research.forEach(function(o, index){
csvContent = csvContent + o.demand+','+o.demand_length+','+o.trigger+','+o.trigger_length+','+o.source+','+o.source_id+','+o.duplicate+"\n";
});
var encodedUri = encodeURI(csvContent);
window.open(encodedUri);
}
//keybord shortcuts //cmd + i
var cmd_pressed = false;
window.addEventListener('keydown', function (e) {
if(e.keyCode === 91 || e.keyCode === 93){cmd_pressed = true;}
if(cmd_pressed && e.keyCode != 91 && e.keyCode === 73) {d('cmd + i'); researchToCsv();}
});
window.addEventListener('keyup', function (e) { if(e.keyCode === 91 || e.keyCode === 93){ cmd_pressed = false; } });
//observer to assign the other observer
//init();
var look_for_autocomplete = true;
var look_for_recommendations = true;
var bodyobserver = new MutationObserver(
function(){
if(look_for_autocomplete === true)
{
if (document.querySelector(_css_path_to_autocomplete_))
{
init();
look_for_autocomplete = false;
}
}
if(look_for_recommendations === true)
{
if (document.querySelector(_css_path_to_recommendations_))
{
init_recommendations();
look_for_recommendations = false;
}
}
if(look_for_autocomplete === false && look_for_recommendations === false)
{
d('no more bodyobserver')
bodyobserver.disconnect();
}
}
)
bodyobserver.observe(document.body, { attributes: true, childList: true, characterData: true, subtree: true})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment