Created
December 6, 2022 16:28
-
-
Save NickDeckerDevs/63c44001b5432abad647c88fb04b9ca9 to your computer and use it in GitHub Desktop.
google app script that scrapes a webpage (using column data) and then fills in content related to webpage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function onOpen() { | |
var ui = SpreadsheetApp.getUi(); | |
ui.createMenu('Product Functions') | |
.addItem('Scrape Product SKUs', 'menuItem1') | |
.addSeparator() | |
.addSubMenu(ui.createMenu('Clean Up') | |
.addItem('Second item', 'menuItem2')) | |
.addToUi(); | |
} | |
function menuItem1() { | |
fetchData(); | |
} | |
function getElementById(element, idToFind) { | |
var descendants = element.getDescendants(); | |
for(i in descendants) { | |
var elt = descendants[i].asElement(); | |
if( elt !=null) { | |
var id = elt.getAttribute('id'); | |
if( id !=null && id.getValue()== idToFind) return elt; | |
} | |
} | |
} | |
function getElementsByClassName(element, classToFind) { | |
var data = []; | |
var descendants = element.getDescendants(); | |
descendants.push(element); | |
for(i in descendants) { | |
var elt = descendants[i].asElement(); | |
if(elt != null) { | |
var classes = elt.getAttribute('class'); | |
if(classes != null) { | |
classes = classes.getValue(); | |
if(classes == classToFind) data.push(elt); | |
else { | |
classes = classes.split(' '); | |
for(j in classes) { | |
if(classes[j] == classToFind) { | |
data.push(elt); | |
break; | |
} | |
} | |
} | |
} | |
} | |
} | |
return data; | |
} | |
function getElementsByTagName(element, tagName) { | |
var data = []; | |
var descendants = element.getDescendants(); | |
for(i in descendants) { | |
var elt = descendants[i].asElement(); | |
if( elt !=null && elt.getName()== tagName) data.push(elt); | |
} | |
return data; | |
} | |
var ui = SpreadsheetApp.getUi(); | |
var ss = SpreadsheetApp.getActiveSpreadsheet(); | |
var sheet = ss.getActiveSheet(); | |
var rangeData = sheet.getDataRange(); | |
var lastColumn = rangeData.getLastColumn(); | |
var lastRow = rangeData.getLastRow(); | |
var activeCell = sheet.getActiveCell().getRow(); | |
var selection = sheet.getSelection(); | |
var searchRange = selection.getActiveRange(); //sheet.getRange(activeCell, lastRow); | |
function fetchData() { | |
//var selection = rangeData.getSelection(); | |
var range = SpreadsheetApp.getActiveSheet().getActiveRange(); | |
var numRows = range.getNumRows(); | |
var numCols = range.getNumColumns(); | |
for (var i = 1; i <= numRows; i++) { | |
for (var j = 1; j <= numCols; j++) { | |
var currentValue = range.getCell(i,j).getValue(); | |
if(currentValue != null || currentValue != 'NA') { | |
var metaSKUFinder = /(?![\<meta content=\'])(.*)(?=\' itemprop=\'productID\')/g; | |
var html = UrlFetchApp.fetch(currentValue).getContentText(); | |
var metaSKU = html.match(metaSKUFinder); | |
var withString = currentValue + "#"+metaSKU; | |
range.getCell(i,j).setValue(withString); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment