Created
June 1, 2024 15:44
-
-
Save yshalsager/1a45ea8513c66d5f73e3edcb9940ba43 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name alukah مخطوطات | |
// @namespace http://tampermonkey.net/ | |
// @version 2024-06-01 | |
// @description Extracts manuscript data and downloads a CSV fileز | |
// @author yshalsager | |
// @match https://www.alukah.net/library/11452/ | |
// @icon https://www.google.com/s2/favicons?sz=64&domain=alukah.net | |
// @grant none | |
// ==/UserScript== | |
(function() { | |
'use strict'; | |
function extractDataFromPage(doc) { | |
const tableRows = Array.from(doc.querySelectorAll('table.table tbody tr')); | |
const data = tableRows.map(row => { | |
const titleLink = row.querySelector('td:nth-child(1) a'); | |
const author = row.querySelector('td:nth-child(2)').textContent.trim(); | |
const date = row.querySelector('td:nth-child(3)').textContent.trim(); | |
const reads = row.querySelector('td:nth-child(4)').textContent.trim(); | |
return [ | |
titleLink ? titleLink.textContent.trim() : "", | |
titleLink ? titleLink.href : "", | |
author, | |
date, | |
reads | |
].map(value => `"${value.replace(/"/g, '""')}"`).join(','); | |
}).join('\n'); | |
return data; | |
} | |
function getLastPageNumber() { | |
const lastPageLink = document.querySelector('.Pager a:last-child'); | |
return parseInt(lastPageLink.textContent.trim(), 10); | |
} | |
async function fetchAndExtract(url) { | |
const response = await fetch(url); | |
const html = await response.text(); | |
const parser = new DOMParser(); | |
const doc = parser.parseFromString(html, 'text/html'); | |
return extractDataFromPage(doc); | |
} | |
async function extractAllDataToCSV() { | |
let csvData = 'Title,Link,Author,Date,Reads\n'; | |
csvData += extractDataFromPage(document); | |
const lastPageNumber = getLastPageNumber(); | |
const baseUrl = window.location.origin; | |
const pageUrls = []; | |
for (let pageNumber = 2; pageNumber <= lastPageNumber; pageNumber++) { | |
pageUrls.push(`${baseUrl}/library/11452/Page/${pageNumber}/`); | |
} | |
const allPageData = await Promise.all(pageUrls.map(fetchAndExtract)); | |
allPageData.forEach(pageData => { | |
csvData += '\n' + pageData; | |
}); | |
return csvData; | |
} | |
function downloadCSV(csvData, filename) { | |
const blob = new Blob([csvData], { type: 'text/csv;charset=utf-8;' }); | |
const link = document.createElement("a"); | |
const url = URL.createObjectURL(blob); | |
link.setAttribute("href", url); | |
link.setAttribute("download", filename); | |
link.style.visibility = 'hidden'; | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
} | |
function addDownloadButton() { | |
const button = document.createElement('button'); | |
button.textContent = 'Download Manuscript Data (CSV)'; | |
button.style.margin = "10px 0"; | |
button.style.display = "block"; | |
button.addEventListener('click', async (event) => { | |
event.preventDefault(); | |
const csvData = await extractAllDataToCSV(); | |
downloadCSV(csvData, 'manuscript_data.csv'); | |
}); | |
const tableContainer = document.querySelector('table.table'); | |
tableContainer.parentNode.insertBefore(button, tableContainer); | |
} | |
addDownloadButton(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment