Skip to content

Instantly share code, notes, and snippets.

@NickDeckerDevs
Created December 6, 2022 16:28
Show Gist options
  • Save NickDeckerDevs/63c44001b5432abad647c88fb04b9ca9 to your computer and use it in GitHub Desktop.
Save NickDeckerDevs/63c44001b5432abad647c88fb04b9ca9 to your computer and use it in GitHub Desktop.
google app script that scrapes a webpage (using column data) and then fills in content related to webpage
function onOpen() {
var ui = SpreadsheetApp.getUi();
ui.createMenu('Product Functions')
.addItem('Scrape Product SKUs', 'menuItem1')
.addSeparator()
.addSubMenu(ui.createMenu('Clean Up')
.addItem('Second item', 'menuItem2'))
.addToUi();
}
function menuItem1() {
fetchData();
}
function getElementById(element, idToFind) {
var descendants = element.getDescendants();
for(i in descendants) {
var elt = descendants[i].asElement();
if( elt !=null) {
var id = elt.getAttribute('id');
if( id !=null && id.getValue()== idToFind) return elt;
}
}
}
function getElementsByClassName(element, classToFind) {
var data = [];
var descendants = element.getDescendants();
descendants.push(element);
for(i in descendants) {
var elt = descendants[i].asElement();
if(elt != null) {
var classes = elt.getAttribute('class');
if(classes != null) {
classes = classes.getValue();
if(classes == classToFind) data.push(elt);
else {
classes = classes.split(' ');
for(j in classes) {
if(classes[j] == classToFind) {
data.push(elt);
break;
}
}
}
}
}
}
return data;
}
function getElementsByTagName(element, tagName) {
var data = [];
var descendants = element.getDescendants();
for(i in descendants) {
var elt = descendants[i].asElement();
if( elt !=null && elt.getName()== tagName) data.push(elt);
}
return data;
}
var ui = SpreadsheetApp.getUi();
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet();
var rangeData = sheet.getDataRange();
var lastColumn = rangeData.getLastColumn();
var lastRow = rangeData.getLastRow();
var activeCell = sheet.getActiveCell().getRow();
var selection = sheet.getSelection();
var searchRange = selection.getActiveRange(); //sheet.getRange(activeCell, lastRow);
function fetchData() {
//var selection = rangeData.getSelection();
var range = SpreadsheetApp.getActiveSheet().getActiveRange();
var numRows = range.getNumRows();
var numCols = range.getNumColumns();
for (var i = 1; i <= numRows; i++) {
for (var j = 1; j <= numCols; j++) {
var currentValue = range.getCell(i,j).getValue();
if(currentValue != null || currentValue != 'NA') {
var metaSKUFinder = /(?![\<meta content=\'])(.*)(?=\' itemprop=\'productID\')/g;
var html = UrlFetchApp.fetch(currentValue).getContentText();
var metaSKU = html.match(metaSKUFinder);
var withString = currentValue + "#"+metaSKU;
range.getCell(i,j).setValue(withString);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment