Skip to content

Instantly share code, notes, and snippets.

@yshalsager
Created June 1, 2024 15:44
Show Gist options
  • Save yshalsager/1a45ea8513c66d5f73e3edcb9940ba43 to your computer and use it in GitHub Desktop.
Save yshalsager/1a45ea8513c66d5f73e3edcb9940ba43 to your computer and use it in GitHub Desktop.
// ==UserScript==
// @name alukah مخطوطات
// @namespace http://tampermonkey.net/
// @version 2024-06-01
// @description Extracts manuscript data and downloads a CSV fileز
// @author yshalsager
// @match https://www.alukah.net/library/11452/
// @icon https://www.google.com/s2/favicons?sz=64&domain=alukah.net
// @grant none
// ==/UserScript==
(function() {
'use strict';
function extractDataFromPage(doc) {
const tableRows = Array.from(doc.querySelectorAll('table.table tbody tr'));
const data = tableRows.map(row => {
const titleLink = row.querySelector('td:nth-child(1) a');
const author = row.querySelector('td:nth-child(2)').textContent.trim();
const date = row.querySelector('td:nth-child(3)').textContent.trim();
const reads = row.querySelector('td:nth-child(4)').textContent.trim();
return [
titleLink ? titleLink.textContent.trim() : "",
titleLink ? titleLink.href : "",
author,
date,
reads
].map(value => `"${value.replace(/"/g, '""')}"`).join(',');
}).join('\n');
return data;
}
function getLastPageNumber() {
const lastPageLink = document.querySelector('.Pager a:last-child');
return parseInt(lastPageLink.textContent.trim(), 10);
}
async function fetchAndExtract(url) {
const response = await fetch(url);
const html = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
return extractDataFromPage(doc);
}
async function extractAllDataToCSV() {
let csvData = 'Title,Link,Author,Date,Reads\n';
csvData += extractDataFromPage(document);
const lastPageNumber = getLastPageNumber();
const baseUrl = window.location.origin;
const pageUrls = [];
for (let pageNumber = 2; pageNumber <= lastPageNumber; pageNumber++) {
pageUrls.push(`${baseUrl}/library/11452/Page/${pageNumber}/`);
}
const allPageData = await Promise.all(pageUrls.map(fetchAndExtract));
allPageData.forEach(pageData => {
csvData += '\n' + pageData;
});
return csvData;
}
function downloadCSV(csvData, filename) {
const blob = new Blob([csvData], { type: 'text/csv;charset=utf-8;' });
const link = document.createElement("a");
const url = URL.createObjectURL(blob);
link.setAttribute("href", url);
link.setAttribute("download", filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
function addDownloadButton() {
const button = document.createElement('button');
button.textContent = 'Download Manuscript Data (CSV)';
button.style.margin = "10px 0";
button.style.display = "block";
button.addEventListener('click', async (event) => {
event.preventDefault();
const csvData = await extractAllDataToCSV();
downloadCSV(csvData, 'manuscript_data.csv');
});
const tableContainer = document.querySelector('table.table');
tableContainer.parentNode.insertBefore(button, tableContainer);
}
addDownloadButton();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment