sueszli · January 12, 2024 14:22
diff --git a/kijiji.js b/kijiji.js
 import axios from 'axios'
 import * as cheerio from 'cheerio'
 import { assert } from 'console'
 import open from 'open'

 const main = async () => {
    let url = process.argv[2]
    assert(process.argv.length !== 2, 'illegal number of arguments')
    assert(url, 'missing url as argument')

    const links = []
    while (true) {
        let htmlStr = await axios.get(url).then((r) => r.data)
        let $ = cheerio.load(htmlStr)

        // get links from current page
        const newLinks = []
        $('a[data-testid="listing-link"]').each((i, a) => {
            if (a.type === 'tag') {
                const href = a.attribs && a.attribs.href
                if (href) {
                    newLinks.push(href)
                }
            }
        })
        console.log('found ' + newLinks.length + ' links')
        links.push(...newLinks)

        // set link for next page, repeat
        let nextButton = $('li[data-testid="pagination-next-link"] a')
        if (!nextButton.length) {
            console.log('reached last page')
            break
        }
        const nextButtonHref = nextButton.attr('href')
        if (!nextButtonHref) {
            console.error('next button has no href')
            process.exit(1)
        }
        const nextPageLink = new URL(nextButtonHref, url).href
        url = nextPageLink
    }

    console.log('press enter key to open the ' + links.length + ' scraped links in your default browser')
    await new Promise((resolve) => process.stdin.once('data', resolve))

    // iterate through links
    for (let i = 0; i < links.length; i++) {
        const l = links[i]
        const lurl = new URL(l, url).href
        console.log('opening: ' + lurl)
        open(lurl)
    }

    process.exit(0)
 }
 main()
diff --git a/puppeteer.config.cjs b/puppeteer.config.cjs
 const { join } = require("path");

 module.exports = {
  cacheDirectory: join(__dirname, ".cache", "puppeteer"),
 };
	import axios from 'axios'
	import * as cheerio from 'cheerio'
	import { assert } from 'console'
	import open from 'open'

	const main = async () => {
	let url = process.argv[2]
	assert(process.argv.length !== 2, 'illegal number of arguments')
	assert(url, 'missing url as argument')

	const links = []
	while (true) {
	let htmlStr = await axios.get(url).then((r) => r.data)
	let $ = cheerio.load(htmlStr)

	// get links from current page
	const newLinks = []
	$('a[data-testid="listing-link"]').each((i, a) => {
	if (a.type === 'tag') {
	const href = a.attribs && a.attribs.href
	if (href) {
	newLinks.push(href)
	}
	}
	})
	console.log('found ' + newLinks.length + ' links')
	links.push(...newLinks)

	// set link for next page, repeat
	let nextButton = $('li[data-testid="pagination-next-link"] a')
	if (!nextButton.length) {
	console.log('reached last page')
	break
	}
	const nextButtonHref = nextButton.attr('href')
	if (!nextButtonHref) {
	console.error('next button has no href')
	process.exit(1)
	}
	const nextPageLink = new URL(nextButtonHref, url).href
	url = nextPageLink
	}

	console.log('press enter key to open the ' + links.length + ' scraped links in your default browser')
	await new Promise((resolve) => process.stdin.once('data', resolve))

	// iterate through links
	for (let i = 0; i < links.length; i++) {
	const l = links[i]
	const lurl = new URL(l, url).href
	console.log('opening: ' + lurl)
	open(lurl)
	}

	process.exit(0)
	}
	main()
	const { join } = require("path");

	module.exports = {
	cacheDirectory: join(__dirname, ".cache", "puppeteer"),
	};