Created
August 27, 2014 16:07
-
-
Save maxpaynestory/a2fe0203dbcb34fd2a97 to your computer and use it in GitHub Desktop.
Scrape Amazon Seller central orders using casperjs and phantomjs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
String.prototype.trim=function(){return this.replace(/^\s+|\s+$/g, '');} | |
String.prototype.ltrim=function(){return this.replace(/^\s+/,'');}; | |
String.prototype.rtrim=function(){return this.replace(/\s+$/,'');}; | |
String.prototype.fulltrim=function(){return this.replace(/(?:(?:^|\n)\s+|\s+(?:$|\n))/g,'').replace(/\s+/g,' ');}; | |
Date.prototype.MMDDYYYY = function() { | |
var yyyy = this.getUTCFullYear().toString(); | |
var mm = (this.getUTCMonth()+1).toString(); // getMonth() is zero-based | |
var dd = this.getUTCDate().toString(); | |
return (mm[1]?mm:"0"+mm[0]) + "/" + (dd[1]?dd:"0"+dd[0]) + "/" + yyyy; | |
}; | |
var casper = require('casper').create({ | |
verbose: true, | |
logLevel: 'error', | |
pageSettings: { | |
loadImages: false, | |
loadPlugins: false, | |
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36' | |
} | |
}); | |
var cli_username = cli_password = cli_filename = cli_filepath = cli_fromdate = cli_todate = ""; | |
if(!casper.cli.has("email") || !casper.cli.has("password") || !casper.cli.has("filename") || !casper.cli.has("filepath") || !casper.cli.has("fromdate") || !casper.cli.has("todate")){ | |
casper.echo(""); | |
casper.echo(""); | |
casper.echo("Something is missing"); | |
casper.echo("How to use this script"); | |
casper.echo("Example"); | |
casper.echo(""); | |
casper.echo("casperjs.exe amazon_pull_orders.js --email=myemail@gmail.com --password=mypassword --filename=\"orders_of_july.csv\" --filepath=\"D:\\downloaded_csv\" --fromdate=23/11/2011 --todate=25/11/2011"); | |
casper.echo(""); | |
casper.echo(""); | |
casper.echo("fromdate and todate should be in DD/MM/YYYY format"); | |
casper.echo(""); | |
casper.echo(""); | |
casper.exit(1); | |
} | |
cli_username = casper.cli.get("email"); | |
cli_password = casper.cli.get("password"); | |
cli_filename = casper.cli.get("filename"); | |
cli_filepath = casper.cli.get("filepath"); | |
cli_fromdate = casper.cli.get("fromdate"); | |
cli_todate = casper.cli.get("todate"); | |
var utils = require('utils'); | |
var fs = require('fs'); | |
var x = require('casper').selectXPath; | |
var screenshot = "tasveer.png"; | |
var waittime = 8000; /// in milliseconds | |
if(!fs.isDirectory(cli_filepath) && cli_filepath.length>0){ | |
this.echo(cli_filepath + " doesn't exists"); | |
this.exit(1); | |
} | |
if(!fs.isWritable(cli_filepath) && cli_filepath.length>0){ | |
this.echo(cli_filepath + " is not writable"); | |
this.exit(1); | |
} | |
var outputfilename = (cli_filepath.length>0?cli_filepath + "/":"") + cli_filename; | |
var listingpages = 1; | |
var orderlinks = []; | |
var header = "Order ID,Order Date,Ship Address,Ship Country,Buyer Username,Sales Channel,Items Total,Delivery total,Refunds Total,Grand Total,Order Line,SKU,Quantity Ordered,Quantity Shipped,Price Each,Shipping,Refund,Status"; | |
if(fs.exists(screenshot)){ | |
fs.remove(screenshot); | |
} | |
if(fs.exists(outputfilename)){ | |
fs.remove(outputfilename); | |
} | |
var stream = fs.open(outputfilename,"w"); | |
SanitizeString = function(str){ | |
if(str==null){ | |
str = ''; | |
} | |
str = str.trim(); | |
var returnstring = str.replace(/"/g,''); | |
returnstring = returnstring.replace(/\n/g," "); | |
returnstring = returnstring.replace(/\$/g,""); | |
returnstring = returnstring.replace(/£/g,""); | |
returnstring = returnstring.replace(/€/g,""); | |
returnstring = '"' + returnstring.fulltrim() + '"'; | |
return returnstring; | |
} | |
PickOrderDetail = function(c,link,stream){ | |
c.thenOpen(link,function(){ | |
var invoice_number = invoice_date = buyer_name = buyer_email = ship_counry = sales_channel = shipping_address = items_total = delivery_total = grand_total = refund_total = ""; | |
var str_frm_url = link.match(/orderID=\d+-\d+-\d+/g); | |
if(str_frm_url){ | |
var str_pieces = str_frm_url[0].split("="); | |
invoice_number = str_pieces[1]; | |
this.echo("Getting detail of order " + invoice_number); | |
var thedata = this.evaluate(function(){ | |
var jsondata = {}; | |
jsondata.orderdatetext = $("td.data-display-field:contains('Order Date'):eq(1)").next().text(); | |
jsondata.buyername = $("span#_myo_buyerEmail_progressIndicator").prev().text(); | |
jsondata.buyeremail = $("span#_myo_buyerEmail_progressIndicator").prev().attr("href"); | |
var ship_address_pieces = $("td.data-display-field strong:contains('Shipping Address')").parent().html().replace("<strong>Shipping Address</strong><br>","").split("<br>"); | |
jsondata.shipcountry = ship_address_pieces[ship_address_pieces.length-2]; | |
jsondata.saleschannel = $("td.data-display-field:contains('Sales channel'):eq(1)").next().text(); | |
jsondata.shippingaddress = $("strong:contains('Shipping Address')").parent().html().replace("<strong>Shipping Address</strong><br>","").replace(/<br>/g,", "); | |
jsondata.itemstotal = $("td.data-display-field[align=right]:contains('Items total:')").parent().find("td:nth-child(2)").text(); | |
jsondata.deliverytotal = $("td.data-display-field[align=right]:contains('Delivery total:')").parent().find("td:nth-child(2)").text(); | |
jsondata.grandtotal = $("td.data-display-field[align=right]:contains('Grand Total')").parent().find("td:nth-child(2)").text(); | |
jsondata.refundtotal = $("td.data-display-field[align=right]:contains('Refund(s) total:')").parent().find("td:nth-child(2)").text().replace(/\(|\)/g,""); | |
var order_items_text = []; | |
$.each($("table.data-display:eq(3) tbody:eq(0)").children("tr"),function(){ | |
if($(this).hasClass("list-row-odd") || $(this).hasClass("list-row-even")){ | |
var tr_text = $(this).text().replace(/(\r\n|\n|\t)/g,""); | |
sku = tr_text.match(/SKU: +\w+(?:-\w+)*/g); | |
if(sku){ | |
sku_pieces = sku[0].split(":"); | |
if(sku_pieces[1]){ | |
sku = sku_pieces[1]; | |
} | |
} | |
quantity_ordered = $(this).children("td:eq(2)").text(); | |
quantity_shipped = $(this).children("td:eq(3)").text(); | |
price_each = $(this).children("td:eq(4)").text(); | |
ship_charged = $(this).children("td:eq(5)").find("table tbody tr:eq(1) td:eq(1)").text(); | |
order_status = $(this).children("td:eq(1)").text(); | |
refund = $(this).children("td:nth-child(6)").find("table tr:nth-child(3) td:nth-child(2)").text().replace(/\(|\)/g,""); | |
order_items_text.push({ | |
sku:sku, | |
quantity_ordered:quantity_ordered, | |
quantity_shipped:quantity_shipped, | |
price_each:price_each, | |
ship_charged:ship_charged, | |
order_status:order_status, | |
refund:refund | |
}); | |
} | |
}); | |
jsondata.orderitems = order_items_text; | |
return jsondata; | |
}); | |
invoice_date = thedata.orderdatetext; | |
buyer_name = thedata.buyername; | |
buyer_email = thedata.buyeremail; | |
ship_country = thedata.shipcountry; | |
sales_channel = thedata.saleschannel; | |
shipping_address = thedata.shippingaddress; | |
shipping_address = shipping_address.replace(buyer_name + ", ",""); | |
items_total = thedata.itemstotal; | |
delivery_total = thedata.deliverytotal; | |
grand_total = thedata.grandtotal; | |
refund_total = thedata.refundtotal; | |
invoice_number = SanitizeString(invoice_number); | |
invoice_date = SanitizeString(invoice_date); | |
buyer_name = SanitizeString(buyer_name); | |
buyer_email = SanitizeString(buyer_email); | |
ship_country = SanitizeString(ship_country); | |
sales_channel = SanitizeString(sales_channel); | |
shipping_address = SanitizeString(shipping_address); | |
items_total = SanitizeString(items_total); | |
delivery_total = SanitizeString(delivery_total); | |
grand_total = SanitizeString(grand_total); | |
refund_total = SanitizeString(refund_total); | |
for(var dataitemindex=0;dataitemindex<thedata.orderitems.length;dataitemindex++){ | |
order_line = dataitemindex+1; | |
sku = SanitizeString(thedata.orderitems[dataitemindex].sku); | |
quantity_ordered = SanitizeString(thedata.orderitems[dataitemindex].quantity_ordered); | |
quantity_shipped = SanitizeString(thedata.orderitems[dataitemindex].quantity_shipped); | |
price_each = SanitizeString(thedata.orderitems[dataitemindex].price_each); | |
ship_charged = SanitizeString(thedata.orderitems[dataitemindex].ship_charged); | |
order_status = SanitizeString(thedata.orderitems[dataitemindex].order_status); | |
refund = SanitizeString(thedata.orderitems[dataitemindex].refund); | |
stream.writeLine(invoice_number + "," + invoice_date + "," + shipping_address + "," + ship_country + "," + buyer_name + "," + sales_channel + "," + items_total + "," + delivery_total + "," + refund_total + "," + grand_total + "," + order_line + "," + sku + "," + quantity_ordered + "," + quantity_shipped + "," + price_each + "," + ship_charged + "," + refund + "," + order_status); | |
} | |
} | |
}); | |
} | |
PickOrdersFromListing = function(c){ | |
var orderlinkhrefs = c.getElementsInfo('tr.order-row td:nth-child(4) a'); | |
c.echo("Picked " + orderlinkhrefs.length + " orders"); | |
for(var cindex=0;cindex<orderlinkhrefs.length;cindex++){ | |
orderlinks.push(orderlinkhrefs[cindex].attributes.href); | |
} | |
} | |
casper.on('remote.message', function(msg) { | |
//this.echo(msg); | |
}) | |
casper.start('https://sellercentral.amazon.co.uk/gp/homepage.html',function(){ | |
if(!this.exists("input#username")){ | |
this.echo("Unable to found amazon login page"); | |
this.exit(1); | |
} | |
this.echo("Logging in to seller central"); | |
this.fillSelectors('form[name=signinWidget]', { | |
'input#username': cli_username, | |
'input#password': cli_password | |
}, true); | |
}); | |
casper.then(function(){ | |
if(!this.exists("li#sc-quicklink-settings")){ | |
this.echo("Login failed. Invalid email or password"); | |
this.exit(1); | |
} | |
}); | |
casper.thenOpen("https://sellercentral.amazon.co.uk/gp/orders-v2/search/ref=ag_myosearch_apsearch_myo",function(){ | |
this.click('input#_myoSO_SearchOption_exactDates'); | |
this.click('input#_myoSO_ShowPendingCheckBox'); | |
this.evaluate(function(cli_fromdate,cli_todate){ | |
document.querySelector('input#exactDateBegin').value = cli_fromdate; | |
document.querySelector('input#exactDateEnd').value = cli_todate; | |
document.querySelector('select#sortBy').value = "OrderDateDescending"; | |
},{ | |
cli_fromdate:cli_fromdate, | |
cli_todate:cli_todate | |
}); | |
this.click('button#_myoSO_SearchButton'); | |
this.echo("Filled advanced search form and initiated search"); | |
}); | |
casper.then(function(){ | |
var currentlink = this.getCurrentUrl(); | |
var order_not_found = currentlink.match(/dmCode=\w+/g); | |
if(order_not_found){ | |
this.click("li.sc-logout-quicklink a"); | |
this.echo("No orders found for this date range"); | |
this.exit(1); | |
} | |
this.echo("Setting items per page to 100"); | |
var urlwith100items = currentlink + "&itemsPerPage=100"; | |
this.open(urlwith100items); | |
}) | |
casper.then(function(){ | |
this.waitWhileVisible("tr#_myoQL_progressIndicator",function then(){ | |
var pages = this.evaluate(function(){ | |
return $("a.myo_list_orders_link:last-child").prev()[0].text; | |
}); | |
if(pages){ | |
listingpages = parseInt(pages); | |
} | |
PickOrdersFromListing(this); | |
}); | |
}); | |
casper.then(function(){ | |
this.repeat(listingpages-1,function(){ | |
var nextlink = this.getElementInfo("a.myo_list_orders_link:last-child"); | |
if(nextlink.text=="Next"){ | |
this.thenOpen(nextlink.attributes.href,function(){ | |
this.wait(waittime,function(){ | |
PickOrdersFromListing(this); | |
}); | |
}); | |
} | |
}); | |
}) | |
casper.then(function(){ | |
this.echo("Now going to download " + orderlinks.length + " orders"); | |
stream.writeLine(header); | |
for(var m=0;m<orderlinks.length;m++){ | |
PickOrderDetail(this,orderlinks[m],stream); | |
} | |
}); | |
casper.thenClick("li.sc-logout-quicklink a"); | |
casper.then(function(){ | |
stream.close(); | |
stream.flush(); | |
}); | |
casper.run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment