Created
November 30, 2015 07:39
-
-
Save hugowetterberg/6b6275feb028bdc26bb6 to your computer and use it in GitHub Desktop.
A test of utf8-encoding in javascript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/beatgammit/base64-js | |
var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' | |
;(function (exports) { | |
'use strict' | |
var Arr = (typeof Uint8Array !== 'undefined') | |
? Uint8Array | |
: Array | |
var PLUS = '+'.charCodeAt(0) | |
var SLASH = '/'.charCodeAt(0) | |
var NUMBER = '0'.charCodeAt(0) | |
var LOWER = 'a'.charCodeAt(0) | |
var UPPER = 'A'.charCodeAt(0) | |
var PLUS_URL_SAFE = '-'.charCodeAt(0) | |
var SLASH_URL_SAFE = '_'.charCodeAt(0) | |
function decode (elt) { | |
var code = elt.charCodeAt(0) | |
if (code === PLUS || code === PLUS_URL_SAFE) return 62 // '+' | |
if (code === SLASH || code === SLASH_URL_SAFE) return 63 // '/' | |
if (code < NUMBER) return -1 // no match | |
if (code < NUMBER + 10) return code - NUMBER + 26 + 26 | |
if (code < UPPER + 26) return code - UPPER | |
if (code < LOWER + 26) return code - LOWER + 26 | |
} | |
function b64ToByteArray (b64) { | |
var i, j, l, tmp, placeHolders, arr | |
if (b64.length % 4 > 0) { | |
throw new Error('Invalid string. Length must be a multiple of 4') | |
} | |
// the number of equal signs (place holders) | |
// if there are two placeholders, than the two characters before it | |
// represent one byte | |
// if there is only one, then the three characters before it represent 2 bytes | |
// this is just a cheap hack to not do indexOf twice | |
var len = b64.length | |
placeHolders = b64.charAt(len - 2) === '=' ? 2 : b64.charAt(len - 1) === '=' ? 1 : 0 | |
// base64 is 4/3 + up to two characters of the original data | |
arr = new Arr(b64.length * 3 / 4 - placeHolders) | |
// if there are placeholders, only get up to the last complete 4 chars | |
l = placeHolders > 0 ? b64.length - 4 : b64.length | |
var L = 0 | |
function push (v) { | |
arr[L++] = v | |
} | |
for (i = 0, j = 0; i < l; i += 4, j += 3) { | |
tmp = (decode(b64.charAt(i)) << 18) | (decode(b64.charAt(i + 1)) << 12) | (decode(b64.charAt(i + 2)) << 6) | decode(b64.charAt(i + 3)) | |
push((tmp & 0xFF0000) >> 16) | |
push((tmp & 0xFF00) >> 8) | |
push(tmp & 0xFF) | |
} | |
if (placeHolders === 2) { | |
tmp = (decode(b64.charAt(i)) << 2) | (decode(b64.charAt(i + 1)) >> 4) | |
push(tmp & 0xFF) | |
} else if (placeHolders === 1) { | |
tmp = (decode(b64.charAt(i)) << 10) | (decode(b64.charAt(i + 1)) << 4) | (decode(b64.charAt(i + 2)) >> 2) | |
push((tmp >> 8) & 0xFF) | |
push(tmp & 0xFF) | |
} | |
return arr | |
} | |
function uint8ToBase64 (uint8) { | |
var i | |
var extraBytes = uint8.length % 3 // if we have 1 byte left, pad 2 bytes | |
var output = '' | |
var temp, length | |
function encode (num) { | |
return lookup.charAt(num) | |
} | |
function tripletToBase64 (num) { | |
return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F) | |
} | |
// go through the array every three bytes, we'll deal with trailing stuff later | |
for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) { | |
temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2]) | |
output += tripletToBase64(temp) | |
} | |
// pad the end with zeros, but make sure to not forget the extra bytes | |
switch (extraBytes) { | |
case 1: | |
temp = uint8[uint8.length - 1] | |
output += encode(temp >> 2) | |
output += encode((temp << 4) & 0x3F) | |
output += '==' | |
break | |
case 2: | |
temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1]) | |
output += encode(temp >> 10) | |
output += encode((temp >> 4) & 0x3F) | |
output += encode((temp << 2) & 0x3F) | |
output += '=' | |
break | |
default: | |
break | |
} | |
return output | |
} | |
exports.toByteArray = b64ToByteArray | |
exports.fromByteArray = uint8ToBase64 | |
}(typeof exports === 'undefined' ? (this.base64js = {}) : exports)) | |
// Copied from http://xahlee.info/js/js_unicode_code_point.html | |
// returns a char's Unicode codepoint, of the char at index idx of string str | |
// 2013-07-16 from https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/String/charCodeAt | |
function fixedCharCodeAt (str, idx) { | |
// ex. fixedCharCodeAt ('\uD800\uDC00', 0); // 65536 | |
// ex. fixedCharCodeAt ('\uD800\uDC00', 1); // 65536 | |
idx = idx || 0; | |
var code = str.charCodeAt(idx); | |
var hi, low; | |
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters) | |
hi = code; | |
low = str.charCodeAt(idx+1); | |
if (isNaN(low)) { | |
throw 'High surrogate not followed by low surrogate in fixedCharCodeAt()'; | |
} | |
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; | |
} | |
if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate | |
// We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration | |
return false; | |
/*hi = str.charCodeAt(idx-1); | |
low = code; | |
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/ | |
} | |
return code; | |
} | |
// Google closure, string to byte modified to handle code points up to U+7FFFFFFF (full utf8) | |
// https://github.com/google/closure-library/blob/28d9db61f5dc639c010be74e4d61682121d2dbd7/closure/goog/crypt/crypt.js#L110 | |
/** | |
* Converts a JS string to a UTF-8 "byte" array. | |
* @param {string} str 16-bit unicode string. | |
* @return {!Array<number>} UTF-8 byte array. | |
*/ | |
var stringToUtf8ByteArray = function(str) { | |
// TODO(user): Use native implementations if/when available | |
var out = [], p = 0; | |
for (var i = 0; i < str.length; i++) { | |
var c = fixedCharCodeAt(str, i); | |
if (c === false) continue; | |
if (c < 128) { | |
out[p++] = c; | |
} else if (c < 2048) { | |
out[p++] = (c >> 6) | 192; | |
out[p++] = (c & 63) | 128; | |
} else if (c < 65536) { | |
out[p++] = (c >> 12) | 224; | |
out[p++] = ((c >> 6) & 63) | 128; | |
out[p++] = (c & 63) | 128; | |
} else if (c < 2097152) { | |
out[p++] = (c >> 18) | 240; | |
out[p++] = ((c >> 12) & 63) | 128; | |
out[p++] = ((c >> 6) & 63) | 128; | |
out[p++] = (c & 63) | 128; | |
} else if (c < 67108864) { | |
out[p++] = (c >> 24) | 248; | |
out[p++] = ((c >> 18) & 63) | 128; | |
out[p++] = ((c >> 12) & 63) | 128; | |
out[p++] = ((c >> 6) & 63) | 128; | |
out[p++] = (c & 63) | 128; | |
} else if (c < 2147483648) { | |
out[p++] = (c >> 30) | 252; | |
out[p++] = ((c >> 24) & 63) | 128; | |
out[p++] = ((c >> 18) & 63) | 128; | |
out[p++] = ((c >> 12) & 63) | 128; | |
out[p++] = ((c >> 6) & 63) | 128; | |
out[p++] = (c & 63) | 128; | |
} | |
} | |
return out; | |
}; | |
// Using the js-implementation | |
var utfyPass = "Huöut væ 💥💖 Iñtërnâtiônàlizætiøn"; | |
var data = stringToUtf8ByteArray(utfyPass); | |
var base = exports.fromByteArray(data); | |
// Using native buffera | |
var buf = new Buffer(utfyPass, 'utf8'); | |
console.log(base); | |
if (buf.toString('base64') === base) { | |
console.log('Yay, we passed comparison with native base64-encoded utf8 buffer!') | |
console.log(new Buffer(base, 'base64').toString('utf8')); | |
} else { | |
console.log("Noo! Fail!") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment