stephanieleary · September 10, 2024 14:52
diff --git a/isbn-regex.js b/isbn-regex.js
 function matchISBNs(html) {
  // flatten string and find ISBN-looking things
  let isbns = html.replaceAll("\n", ' ').match("/(?=([0-9]|X|-){10,20})(97(8|9))?-?(0|1|8)([0-9]|-){8,14}([0-9]|X)/gi");

  if (!isbns || !isbns.length) 
    return;

  // remove dashes
  isbns = isbns.map((value) => value.replaceAll('-', ''));
  // throw out anything that isn't 10 or 13 characters
  isbns = isbns.filter((value, index, array) => (value.length == 10 || value.length == 13));
  // de-dupe
  isbns = isbns.filter((value, index, array) => array.indexOf(value) === index);
  
  return isbns;
 }

 let text = document.body.innerText;
 let html = document.body.innerHTML;
 //let html = document.getElementById('isbns').value;

 // look in URLs
 let hrefs = html.match(/href=(["'])(.*?)\1/gi);
 // also in microdata
 let itemids = html.match(/itemid=(["'])(.*?)\1/gi);

 // combine URLs, microdata, and visible content
 text += ' ' + hrefs + ' ' + itemids;

 let isbns = matchISBNs(text);
 // result: array of ISBNs without dashes

 // demo: https://codepen.io/stephanieleary/full/XWLPWww
 // bookmarklet: https://codepen.io/stephanieleary/full/bGPxGrW
	function matchISBNs(html) {
	// flatten string and find ISBN-looking things
	let isbns = html.replaceAll("\n", ' ').match("/(?=([0-9]\|X\|-){10,20})(97(8\|9))?-?(0\|1\|8)([0-9]\|-){8,14}([0-9]\|X)/gi");

	if (!isbns \|\| !isbns.length)
	return;

	// remove dashes
	isbns = isbns.map((value) => value.replaceAll('-', ''));
	// throw out anything that isn't 10 or 13 characters
	isbns = isbns.filter((value, index, array) => (value.length == 10 \|\| value.length == 13));
	// de-dupe
	isbns = isbns.filter((value, index, array) => array.indexOf(value) === index);

	return isbns;
	}

	let text = document.body.innerText;
	let html = document.body.innerHTML;
	//let html = document.getElementById('isbns').value;

	// look in URLs
	let hrefs = html.match(/href=(["'])(.*?)\1/gi);
	// also in microdata
	let itemids = html.match(/itemid=(["'])(.*?)\1/gi);

	// combine URLs, microdata, and visible content
	text += ' ' + hrefs + ' ' + itemids;

	let isbns = matchISBNs(text);
	// result: array of ISBNs without dashes

	// demo: https://codepen.io/stephanieleary/full/XWLPWww
	// bookmarklet: https://codepen.io/stephanieleary/full/bGPxGrW