Last active
January 20, 2017 02:38
-
-
Save dotproto/87f64736815add079a6b602d99a85227 to your computer and use it in GitHub Desktop.
Examining raw unicode values and their normalized forms. TL:DR; comparing unicode strings using a `.normalized()` and `. localeCompare()`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// References | |
// | |
// - https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type | |
// - http://unicode.org/reports/tr15/#Norm_Forms | |
// - http://unicode.org/faq/normalization.html#7 (What is the difference is between W3C normalization and Unicode normalization?) | |
// - https://developer.mozilla.org/en-US/docs/Web/API/DOMParser | |
// | |
// Resources | |
// | |
// - http://stackoverflow.com/questions/8936984/uint8array-to-string-in-javascript | |
// - https://encoding.spec.whatwg.org/ | |
{ | |
const norm = s => s.normalize() | |
const localeCompare = (x,y) => !x.localeCompare(y) | |
// =========================================================================== | |
const a = '\u00E1' | |
const b = 'a\u0301' | |
const c = 'a' | |
console.info('JS Strings: .normalize() comparison') | |
console.log(`a === a, ${a === b}`) // false | |
console.log(`norm(a) === b, ${norm(a) === b}`) // false | |
console.log(`a === norm(b), ${a === norm(b)}`) // true | |
console.log(`norm(a) === norm(b), ${norm(a) === norm(b)}`) // true | |
console.log(`norm(a) === norm(c), ${norm(a) === norm(c)}`) // false | |
console.log(`norm(b) === norm(c), ${norm(b) === norm(c)}`) // false | |
console.info('JS Strings: .localeCompare()') | |
console.log(`a.localeCompare(b), ${!a.localeCompare(b)}`) // true | |
console.log(`b.localeCompare(a), ${!b.localeCompare(a)}`) // true | |
console.log(`a.localeCompare(c), ${!a.localeCompare(c)}`) // false | |
console.log(`b.localeCompare(c), ${!b.localeCompare(c)}`) // false | |
// =========================================================================== | |
const frag = document.createElement('div') | |
frag.innerHTML = '<div data-a="á" data-b="á" data-c="a">hi</div>' | |
const data = frag.firstChild.dataset | |
const A = data.a | |
const B = data.b | |
const C = data.c | |
console.info('DOM Strings: .normalize() comparison') | |
console.log(`A === B, ${A === B}`) // false | |
console.log(`norm(A) === B, ${norm(A) === B}`) // false | |
console.log(`A === norm(B), ${A === norm(B)}`) // true | |
console.log(`norm(A) === norm(B), ${norm(A) === norm(B)}`) // true | |
console.log(`norm(B) === norm(C), ${norm(A) === norm(C)}`) // false | |
console.log(`norm(B) === norm(C), ${norm(B) === norm(C)}`) // false | |
console.info('DOM Strings: .localeCompare()') | |
console.log(`localeCompare(A, B), ${localeCompare(A, B)}`) // true | |
console.log(`localeCompare(B, A), ${localeCompare(B, A)}`) // true | |
console.log(`localeCompare(A, C), ${localeCompare(A, C)}`) // false | |
console.log(`localeCompare(B, C), ${localeCompare(A, C)}`) // false | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment