Skip to content

Instantly share code, notes, and snippets.

@paulgrammer
Last active November 21, 2023 09:18
Show Gist options
  • Save paulgrammer/741850da474e32d4ddd02656b17a1ae3 to your computer and use it in GitHub Desktop.
Save paulgrammer/741850da474e32d4ddd02656b17a1ae3 to your computer and use it in GitHub Desktop.
const cheerio = require("cheerio");
const axios = require("axios");
const fs = require("fs");
const baseDir = "./bible";
const BASE_URL = {
en: "https://www.jw.org/en/library/bible/nwt/books",
lg: "https://www.jw.org/lg/layibulale/bayibuli/nwt/ebitabo",
};
if (!fs.existsSync(baseDir)) fs.mkdirSync(baseDir);
async function getBooks() {
let get = (url) =>
axios
.get(url)
.then((response) => {
console.log(response.request.res.responseUrl);
const $ = cheerio.load(response.data);
let nodes = $("select[id=Book]").find("option");
let books = [];
nodes.each(function (_, node) {
books.push($(node).html());
});
return books;
})
.catch(() => []);
let lgPath = `${baseDir}/books.lg.txt`;
let enPath = `${baseDir}/books.en.txt`;
if (fs.existsSync(enPath) && fs.existsSync(lgPath)) {
let lg = fs.readFileSync(lgPath).toString().split("\n");
let en = fs.readFileSync(enPath).toString().split("\n");
return { en, lg };
}
let books = await Promise.all([get(BASE_URL.en), get(BASE_URL.lg)]);
fs.writeFileSync(enPath, books[0].join("\n"));
fs.writeFileSync(lgPath, books[1].join("\n"));
return {
en: books[0],
lg: books[1],
};
}
async function getChapters(book, url = BASE_URL.en) {
let chaptersDir = `${baseDir}/chapters`;
if (!fs.existsSync(chaptersDir)) fs.mkdirSync(chaptersDir);
let get = () =>
axios
.get(`${url}/${book}`.replace(/\s/g, "-"))
.then((response) => {
console.log(response.request.res.responseUrl);
const $ = cheerio.load(response.data);
let nodes = $("div").find(".chapters").find("a");
let chapters = [];
nodes.each(function (_, node) {
chapters.push($(node).html());
});
return chapters;
})
.catch(() => []);
if (fs.existsSync(`${chaptersDir}/${book}`)) {
return fs.readFileSync(`${chaptersDir}/${book}`).toString().split("\n");
}
let chapters = await get();
fs.writeFileSync(`${chaptersDir}/${book}`, chapters.join("\n"));
return chapters;
}
async function getScriptures(bookIndex, chapter) {
let books = await getBooks();
let scripturesDir = `${baseDir}/scriptures`;
let scriptureDir = `${scripturesDir}/${books["en"][bookIndex]}`;
let get = (language) => {
let filePath = `${scriptureDir}/${chapter}.${language}.txt`;
if (fs.existsSync(filePath)) {
return;
}
return axios
.get(
`${BASE_URL[language]}/${books[language][bookIndex]}/${chapter}`.replace(
/\s/g,
"-"
)
)
.then((response) => {
console.log(response.request.res.responseUrl);
const $ = cheerio.load(response.data);
let nodes = $("div[id=bibleText]");
let scriptures = nodes
.text()
.trim()
.replace(/\+/g, "")
.replace(/\*/g, "")
.replace(/\“/g, '"')
.replace(/\”/g, '"')
.replace(/\’/g, "'")
.split("\n")
.filter((line) => line.length);
if (!fs.existsSync(scriptureDir)) fs.mkdirSync(scriptureDir);
fs.writeFileSync(filePath, scriptures.join("\n"));
return scriptures;
})
.catch(() => []);
};
return await Promise.all([get("en"), get("lg")]);
}
getBooks().then(async (books) => {
let { en: list } = books;
let bookIndex = 0;
let next = async () => {
let book = list.shift();
if (!book) {
return console.log("done!");
}
await fetchScriptures(book, bookIndex);
bookIndex++;
next();
};
next();
function fetchScriptures(book, bookIndex) {
return new Promise(async (resolve) => {
let chapterCount = 1;
let chapters = await getChapters(book);
let next = async (bookIndex) => {
let chapter = chapters.shift();
if (!chapter) {
return resolve();
}
await getScriptures(bookIndex, chapterCount);
chapterCount++;
next(bookIndex);
};
next(bookIndex);
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment