Last active
December 21, 2021 18:08
-
-
Save kami4ka/9a7d38d4e045f742949288acc8867a69 to your computer and use it in GitHub Desktop.
Amazon batch scraper with error handling. ScrapingAnt API used to get data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Amazon Batch Scraper - create file with a list of keywords and all products would be scraped in one CSV file | |
* | |
* Installation instructions: | |
* npm install "@scrapingant/amazon-proxy-scraper" | |
* npm install json2csv | |
* | |
*/ | |
const ProductsScraper = require("@scrapingant/amazon-proxy-scraper"); | |
const readline = require('readline'); | |
const fs = require('fs'); | |
const fsPromise = require('fs').promises; | |
const Json2csvParser = require('json2csv').Parser; | |
const KEYWORDS_FILENAME = '12_14_21.csv'; | |
const MAX_PRODUCTS_FOR_KEYWORD = 10; | |
const SCRAPINGANT_API_KEY = '<YOUR SCRAPINGANT API KEY>'; | |
scrapeFromCSV().then(() => console.log(`Results can be found at: results_${KEYWORDS_FILENAME}`), console.error) | |
async function scrapeFromCSV() { | |
const fileStream = fs.createReadStream(KEYWORDS_FILENAME); | |
const rl = readline.createInterface({ | |
input: fileStream, | |
crlfDelay: Infinity | |
}); | |
const allProducts = []; | |
for await (const keyword of rl) { | |
console.log(`Scraping keyword: ${keyword}`); | |
const scraper = new ProductsScraper({ | |
"apiKey": SCRAPINGANT_API_KEY, | |
"keyword": keyword, | |
"number": MAX_PRODUCTS_FOR_KEYWORD, | |
}); | |
try { | |
const products = await scraper.startScraping(); | |
allProducts.push(...products); | |
} catch (e) { | |
console.error(e); | |
} | |
} | |
await writeDataToCSV(`results_${KEYWORDS_FILENAME}`, allProducts); | |
} | |
async function writeDataToCSV(filename, productsList) { | |
const productsParser = new Json2csvParser({ | |
fields: ['title', 'price', 'savings', 'rating', 'reviews-count', 'score', 'url', 'is-sponsored', 'is-amazon-choice', 'is-discounted', 'before-discount', 'amazon-id', 'thumbnail', 'high-res-image', 'short-description', 'full-description'], | |
}); | |
return fsPromise.writeFile(filename, productsParser.parse(productsList)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment