Created
April 4, 2019 14:33
-
-
Save luisramalho/012c3ce2a32470e22d4fe68954a558ac to your computer and use it in GitHub Desktop.
Web Scraping with Node.js https://stackabuse.com/web-scraping-with-node-js/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const rp = require('request-promise'); | |
const otcsv = require('objects-to-csv'); | |
const cheerio = require('cheerio'); | |
const baseURL = 'https://www.yellowpages.com'; | |
const searchURL = '/search?search_terms=printing&geo_location_terms=New+York'; | |
const getCompanies = async () => { | |
const html = await rp(baseURL + searchURL); | |
const businessMap = cheerio('a.business-name', html).map(async (i, e) => { | |
const link = baseURL + e.attribs.href; | |
const innerHtml = await rp(link); | |
const emailAddress = cheerio('a.email-business', innerHtml).prop('href'); | |
const name = e.children[0].data || cheerio('h1', innerHtml).text(); | |
const phone = cheerio('p.phone', innerHtml).text(); | |
return { | |
emailAddress: emailAddress ? emailAddress.replace('mailto:', '') : '', | |
link, | |
name, | |
phone, | |
} | |
}).get(); | |
return Promise.all(businessMap); | |
}; | |
getCompanies() | |
.then(result => { | |
const transformed = new otcsv(result); | |
return transformed.toDisk('./output.csv'); | |
}) | |
.then(() => console.log('SUCCESSFULLY COMPLETED THE WEB SCRAPING SAMPLE')) | |
.catch((err) => console.error(err)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment