Skip to content

Instantly share code, notes, and snippets.

@rogigs
Created March 4, 2024 22:36
Show Gist options
  • Save rogigs/ec0146dc2198005bfab64b42aea11ccb to your computer and use it in GitHub Desktop.
Save rogigs/ec0146dc2198005bfab64b42aea11ccb to your computer and use it in GitHub Desktop.
const puppeteer = require("puppeteer");

(async () => {
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();

  // Old site
  await page.goto(
    ""
  );

  await page
    .waitForSelector("#content > div.item-page > div:nth-child(3) > div")
    .then(() => {
      console.log("Finish load...");
    })
    .catch(() => {
      console.log("Error load...");
    });

  const categories = await page.evaluate(async () => {
    const section = document.querySelector(
      "#content > div.item-page > div:nth-child(3) > div"
    );

    const categories = section.getElementsByTagName("a");

    return Array.from(categories).map((element) => ({
      href: element.href,
      content: element.textContent.trim(),
    }));
  });

  let auxCategory = [];
  for (const category of categories) {
    await page.goto(category.href);

    const fornecedores = await page.evaluate(async () => {
      const fornecedores = document.querySelectorAll(".tagItemTitle");

      return Array.from(fornecedores).map((element) =>
        element.textContent.trim()
      );
    });

    auxCategory.push({
      category: category.content,
      fornecedores,
    });
  }

  // New Site
  await page.goto("");

  await page
    .waitForSelector("#content > div.elementor.elementor-14722 > div > div")
    .then(() => {
      console.log("Finish load...");
    })
    .catch(() => {
      console.log("Error load...");
    });

  const categoriesHomolog = await page.evaluate(async () => {
    const section = document.querySelector(
      "#content > div.elementor.elementor-14722 > div > div"
    );

    const categories = section.querySelectorAll(".rtcl-category-title");

    return Array.from(categories).map((element) => ({
      href: element.children[0].href,
      content: element.children[0].textContent.trim(),
    }));
  });

  let auxCategoryHomolog = [];
  for (const categoryHomolog of categoriesHomolog) {
    await page.goto(categoryHomolog.href);

    const fornecedores = await page.evaluate(async () => {
      const fornecedores = document.querySelectorAll(".rtcl-listing-title");

      return Array.from(fornecedores).map((element) =>
        element.textContent.trim()
      );
    });

    auxCategoryHomolog.push({
      category: categoryHomolog.content,
      fornecedores,
    });
  }

  const categoriesMissed = categories.filter(
    (category) =>
      !categoriesHomolog.some((homolog) => homolog.content === category.content)
  );

  const fornecedoresMissed = auxCategory.map((category) => {
    const info = auxCategoryHomolog.find(
      (cat) => category.category === cat.category
    );

    if (!info) {
      return "Category not found";
    }

    return info.fornecedores.filter((fornecedor) => {
      if (!category.fornecedores.includes(fornecedor))
        return { category: category.category, fornecedor };
    });
  });

  console.log("🚀 ~ categoriesMissed:", categoriesMissed);
  console.log(
    "🚀 ~ fornecedoresMissed ~ fornecedoresMissed:",
    fornecedoresMissed
  );

  await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment