Skip to content

Instantly share code, notes, and snippets.

@c3founder
Last active June 1, 2022 17:14
Show Gist options
  • Save c3founder/ae0bc8ae4ca8d069bcdb4c13a227e066 to your computer and use it in GitHub Desktop.
Save c3founder/ae0bc8ae4ca8d069bcdb4c13a227e066 to your computer and use it in GitHub Desktop.
Math and Multi Language OCR for Roamresearch
// ==UserScript==
// @name Math and Multi Language OCR for Roamresearch
// @author Connected Cognition Crumbs <c3founder@gmail.com>
// @require Roam42: Wait until Roam42 loads completely
// @version 0.4
// @description Ctrl+click => Math OCR using Mathpix (need to register on mathpix.com and activate the API)
// Shift+click => First Language OCR with Tesseract
// Alt+click => Second Language OCR with Tesseract
// Alt+a c => Replace the image with the OCR result
// @match https://*.roamresearch.com
params = {
lang1 : "eng", //Shift + Click
lang2 : "ara", //Alt + Click
//Mathpix parameters
appId : "YOUR_APP_ID",
appKey: "YOUR_APP_KEY",
//Edit options
saveRef2Img : false,
//Cleanup Shortcut
cleanKey : 'alt+a c'
};
//Waiting for Roam42 to load
var roam42Ready = setInterval(() => {
if(typeof window.roam42 === undefined || typeof roam42.roam42KeyboardLib === undefined) return;
roam42.loader.addScriptToPage('Tesseract', 'https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js');
clearInterval(roam42Ready);
}, 1000);
const scanForNewImages = (mutationsList=null) => {
var oldImg = document.querySelectorAll('.rm-inline-img.img-ready4ocr');
var curImg = document.getElementsByClassName('rm-inline-img');
if(oldImg.length === curImg.length) return;
Array.from(curImg).forEach(im => {
if(!im.classList.contains('img-ready4ocr')){
im.classList.add('img-ready4ocr');
im.addEventListener("mousedown", async function (e){
try{
if(e.ctrlKey || e.metaKey || e.shiftKey || e.altKey){
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
await simulateImgEditClick(im);
var txt = document.querySelector('textarea.rm-block-input');
var url = await makeAChildBlock(txt);
fillTheBlock("Granting wishes...")
}
var parsedStr = '';
if(e.ctrlKey || e.metaKey){ //Math OCR
parsedStr = await parseMath(url);
}
if(e.shiftKey){
parsedStr = await parseLan(url, params.lang1);
}
if(e.altKey){
parsedStr = await parseLan(url, params.lang2);
}
if(e.ctrlKey || e.metaKey || e.shiftKey || e.altKey){
var prefix = params.saveRef2Img ? " [*](" + url + ") " : "";
fillTheBlock(prefix + parsedStr);
}
}
catch(err) {
fillTheBlock("OCR was unsuccessful. Only PNG and JPG files that are directly uploaded to Roam are supported.");
}
});
}
});
};
//OCR the image in url using language lan
async function parseLan(url, lan){
return Tesseract.recognize(url,lan)
.then(({ data: { text } }) => {
return(text.replace(/\n/g, " "));
});
}
async function simulateImgEditClick(im){
const block = im.closest('.roam-block');
roam42.common.simulateMouseClick(block.querySelector('.bp3-button.bp3-icon-edit'));
await roam42.common.sleep(50);
}
//Fill out the current block with the given text
function fillTheBlock(givenTxt){
//Fill out the child block
var setValue = Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, 'value').set;
let newTextArea = document.querySelector("textarea.rm-block-input");
setValue.call(newTextArea, givenTxt);
var e = new Event('input', { bubbles: true });
newTextArea.dispatchEvent(e);
}
//OCR the given image using the Mathpix API
async function parseMath(url){
//Send the request to Mathpix API
var ocrReq = {
"src": url,
"formats": "text",
}
var latexStr = await postData('https://api.mathpix.com/v3/text', ocrReq)
.then(response => {
latexStr = response.text
return(latexStr)
});
console.log(latexStr);
//Make the math Roam-readable
latexStr = latexStr.replace(/(\\\( )|( \\\))/g, "$$$$");
latexStr = latexStr.replace(/(\n\\\[\n)|(\n\\\]\n?)/g, " $$$$ ");
return(latexStr)
}
async function makeAChildBlock(txtArea = ''){
var msg = txtArea.value ;
//Make a child block
txtArea.selectionStart = msg.length;
txtArea.selectionEnd = msg.length;
await roam42KeyboardLib.pressEnter(50);
await roam42.common.sleep(50);
await roam42KeyboardLib.pressTab(50);
await roam42.common.sleep(50);
var url = msg.replace(/\!\[.*\]/, ''); //The form is (url)
return url.substring(1, url.length-1);
}
async function postData(url = '', data = {}) {
// Default options are marked with *
const response = await fetch(url, {
method: 'POST', // *GET, POST, PUT, DELETE, etc.
headers: {
"content-type": "application/json",
"app_id": params.appID,
"app_key": params.appKey
},
body: JSON.stringify(data) // body data type must match "Content-Type" header
});
return response.json(); // parses JSON response into native JavaScript objects
}
observerImg = new MutationObserver(scanForNewImages);
observerImg.observe(document, { childList: true, subtree: true })
var mouseTrapReady = setInterval(() => {
//Installing Mousetrap
if (document.querySelectorAll("head > script[src='https://cdn.jsdelivr.net/npm/mousetrap@1.6.5/mousetrap.min.js']").length == 0) {
const tag = document.createElement('script');
tag.src = 'https://cdn.jsdelivr.net/npm/mousetrap@1.6.5/mousetrap.min.js';
const htmlEl = document.getElementsByTagName('head')[0];
htmlEl.appendChild(tag);
}
if(Mousetrap === undefined) return;
Mousetrap.bind(params.cleanKey, async function(e) {
var activeTxt = document.querySelector('textarea.rm-block-input');
var recognizedTxt = activeTxt.value;
fillTheBlock("");
await roam42KeyboardLib.pressBackspace(70); //Delete Img
await roam42.common.sleep(70);
fillTheBlock(recognizedTxt);
return false;
});
clearInterval(mouseTrapReady);
}, 1000);
@jrykner
Copy link

jrykner commented Mar 9, 2021

Works like magic in both English and Hebrew :)

@AyumuSuzuki31
Copy link

thanks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment