hugowetterberg · November 30, 2015 07:39
diff --git a/utf8buffer.js b/utf8buffer.js
 // https://github.com/beatgammit/base64-js
 var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'

 ;(function (exports) {
 	'use strict'

 	var Arr = (typeof Uint8Array !== 'undefined')
 		? Uint8Array
 		: Array

 	var PLUS = '+'.charCodeAt(0)
 	var SLASH = '/'.charCodeAt(0)
 	var NUMBER = '0'.charCodeAt(0)
 	var LOWER = 'a'.charCodeAt(0)
 	var UPPER = 'A'.charCodeAt(0)
 	var PLUS_URL_SAFE = '-'.charCodeAt(0)
 	var SLASH_URL_SAFE = '_'.charCodeAt(0)

 	function decode (elt) {
 		var code = elt.charCodeAt(0)
 		if (code === PLUS || code === PLUS_URL_SAFE) return 62 // '+'
 		if (code === SLASH || code === SLASH_URL_SAFE) return 63 // '/'
 		if (code < NUMBER) return -1 // no match
 		if (code < NUMBER + 10) return code - NUMBER + 26 + 26
 		if (code < UPPER + 26) return code - UPPER
 		if (code < LOWER + 26) return code - LOWER + 26
 	}

 	function b64ToByteArray (b64) {
 		var i, j, l, tmp, placeHolders, arr

 		if (b64.length % 4 > 0) {
 			throw new Error('Invalid string. Length must be a multiple of 4')
 		}

 		// the number of equal signs (place holders)
 		// if there are two placeholders, than the two characters before it
 		// represent one byte
 		// if there is only one, then the three characters before it represent 2 bytes
 		// this is just a cheap hack to not do indexOf twice
 		var len = b64.length
 		placeHolders = b64.charAt(len - 2) === '=' ? 2 : b64.charAt(len - 1) === '=' ? 1 : 0

 		// base64 is 4/3 + up to two characters of the original data
 		arr = new Arr(b64.length * 3 / 4 - placeHolders)

 		// if there are placeholders, only get up to the last complete 4 chars
 		l = placeHolders > 0 ? b64.length - 4 : b64.length

 		var L = 0

 		function push (v) {
 			arr[L++] = v
 		}

 		for (i = 0, j = 0; i < l; i += 4, j += 3) {
 			tmp = (decode(b64.charAt(i)) << 18) | (decode(b64.charAt(i + 1)) << 12) | (decode(b64.charAt(i + 2)) << 6) | decode(b64.charAt(i + 3))
 			push((tmp & 0xFF0000) >> 16)
 			push((tmp & 0xFF00) >> 8)
 			push(tmp & 0xFF)
 		}

 		if (placeHolders === 2) {
 			tmp = (decode(b64.charAt(i)) << 2) | (decode(b64.charAt(i + 1)) >> 4)
 			push(tmp & 0xFF)
 		} else if (placeHolders === 1) {
 			tmp = (decode(b64.charAt(i)) << 10) | (decode(b64.charAt(i + 1)) << 4) | (decode(b64.charAt(i + 2)) >> 2)
 			push((tmp >> 8) & 0xFF)
 			push(tmp & 0xFF)
 		}

 		return arr
 	}

 	function uint8ToBase64 (uint8) {
 		var i
 		var extraBytes = uint8.length % 3 // if we have 1 byte left, pad 2 bytes
 		var output = ''
 		var temp, length

 		function encode (num) {
 			return lookup.charAt(num)
 		}

 		function tripletToBase64 (num) {
 			return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F)
 		}

 		// go through the array every three bytes, we'll deal with trailing stuff later
 		for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) {
 			temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2])
 			output += tripletToBase64(temp)
 		}

 		// pad the end with zeros, but make sure to not forget the extra bytes
 		switch (extraBytes) {
 			case 1:
 				temp = uint8[uint8.length - 1]
 				output += encode(temp >> 2)
 				output += encode((temp << 4) & 0x3F)
 				output += '=='
 				break
 			case 2:
 				temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1])
 				output += encode(temp >> 10)
 				output += encode((temp >> 4) & 0x3F)
 				output += encode((temp << 2) & 0x3F)
 				output += '='
 				break
 			default:
 				break
 		}

 		return output
 	}

 	exports.toByteArray = b64ToByteArray
 	exports.fromByteArray = uint8ToBase64
 }(typeof exports === 'undefined' ? (this.base64js = {}) : exports))

 // Copied from http://xahlee.info/js/js_unicode_code_point.html
 // returns a char's Unicode codepoint, of the char at index idx of string str
 // 2013-07-16 from https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/String/charCodeAt
 function fixedCharCodeAt (str, idx) {
 	// ex. fixedCharCodeAt ('\uD800\uDC00', 0); // 65536
 	// ex. fixedCharCodeAt ('\uD800\uDC00', 1); // 65536
 	idx = idx || 0;
 	var code = str.charCodeAt(idx);
 	var hi, low;
 	if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
 		hi = code;
 		low = str.charCodeAt(idx+1);
 		if (isNaN(low)) {
 			throw 'High surrogate not followed by low surrogate in fixedCharCodeAt()';
 		}
 		return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
 	}
 	if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
 		// We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration
 		return false;
 		/*hi = str.charCodeAt(idx-1);
 		low = code;
 		return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/
 	}
 	return code;
 }

 // Google closure, string to byte modified to handle code points up to U+7FFFFFFF (full utf8)
 // https://github.com/google/closure-library/blob/28d9db61f5dc639c010be74e4d61682121d2dbd7/closure/goog/crypt/crypt.js#L110
 /**
 * Converts a JS string to a UTF-8 "byte" array.
 * @param {string} str 16-bit unicode string.
 * @return {!Array<number>} UTF-8 byte array.
 */
 var stringToUtf8ByteArray = function(str) {
 	// TODO(user): Use native implementations if/when available
 	var out = [], p = 0;
 	for (var i = 0; i < str.length; i++) {
 		var c = fixedCharCodeAt(str, i);
 		if (c === false) continue;

 		if (c < 128) {
 			out[p++] = c;
 		} else if (c < 2048) {
 			out[p++] = (c >> 6) | 192;
 			out[p++] = (c & 63) | 128;
 		} else if (c < 65536) {
 			out[p++] = (c >> 12) | 224;
 			out[p++] = ((c >> 6) & 63) | 128;
 			out[p++] = (c & 63) | 128;
 		} else if (c < 2097152) {
 			out[p++] = (c >> 18) | 240;
 			out[p++] = ((c >> 12) & 63) | 128;
 			out[p++] = ((c >> 6) & 63) | 128;
 			out[p++] = (c & 63) | 128;
 		} else if (c < 67108864) {
 			out[p++] = (c >> 24) | 248;
 			out[p++] = ((c >> 18) & 63) | 128;
 			out[p++] = ((c >> 12) & 63) | 128;
 			out[p++] = ((c >> 6) & 63) | 128;
 			out[p++] = (c & 63) | 128;
 		} else if (c < 2147483648) {
 			out[p++] = (c >> 30) | 252;
 			out[p++] = ((c >> 24) & 63) | 128;
 			out[p++] = ((c >> 18) & 63) | 128;
 			out[p++] = ((c >> 12) & 63) | 128;
 			out[p++] = ((c >> 6) & 63) | 128;
 			out[p++] = (c & 63) | 128;
 		}
 	}
 	return out;
 };


 // Using the js-implementation
 var utfyPass = "Huöut væ 💥💖 Iñtërnâtiônàlizætiøn";
 var data = stringToUtf8ByteArray(utfyPass);
 var base = exports.fromByteArray(data);

 // Using native buffera
 var buf = new Buffer(utfyPass, 'utf8');

 console.log(base);
 if (buf.toString('base64') === base) {
 	console.log('Yay, we passed comparison with native base64-encoded utf8 buffer!')
 	console.log(new Buffer(base, 'base64').toString('utf8'));
 } else {
 	console.log("Noo! Fail!")
 }
	// https://github.com/beatgammit/base64-js
	var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'

	;(function (exports) {
	'use strict'

	var Arr = (typeof Uint8Array !== 'undefined')
	? Uint8Array
	: Array

	var PLUS = '+'.charCodeAt(0)
	var SLASH = '/'.charCodeAt(0)
	var NUMBER = '0'.charCodeAt(0)
	var LOWER = 'a'.charCodeAt(0)
	var UPPER = 'A'.charCodeAt(0)
	var PLUS_URL_SAFE = '-'.charCodeAt(0)
	var SLASH_URL_SAFE = '_'.charCodeAt(0)

	function decode (elt) {
	var code = elt.charCodeAt(0)
	if (code === PLUS \|\| code === PLUS_URL_SAFE) return 62 // '+'
	if (code === SLASH \|\| code === SLASH_URL_SAFE) return 63 // '/'
	if (code < NUMBER) return -1 // no match
	if (code < NUMBER + 10) return code - NUMBER + 26 + 26
	if (code < UPPER + 26) return code - UPPER
	if (code < LOWER + 26) return code - LOWER + 26
	}

	function b64ToByteArray (b64) {
	var i, j, l, tmp, placeHolders, arr

	if (b64.length % 4 > 0) {
	throw new Error('Invalid string. Length must be a multiple of 4')
	}

	// the number of equal signs (place holders)
	// if there are two placeholders, than the two characters before it
	// represent one byte
	// if there is only one, then the three characters before it represent 2 bytes
	// this is just a cheap hack to not do indexOf twice
	var len = b64.length
	placeHolders = b64.charAt(len - 2) === '=' ? 2 : b64.charAt(len - 1) === '=' ? 1 : 0

	// base64 is 4/3 + up to two characters of the original data
	arr = new Arr(b64.length * 3 / 4 - placeHolders)

	// if there are placeholders, only get up to the last complete 4 chars
	l = placeHolders > 0 ? b64.length - 4 : b64.length

	var L = 0

	function push (v) {
	arr[L++] = v
	}

	for (i = 0, j = 0; i < l; i += 4, j += 3) {
	tmp = (decode(b64.charAt(i)) << 18) \| (decode(b64.charAt(i + 1)) << 12) \| (decode(b64.charAt(i + 2)) << 6) \| decode(b64.charAt(i + 3))
	push((tmp & 0xFF0000) >> 16)
	push((tmp & 0xFF00) >> 8)
	push(tmp & 0xFF)
	}

	if (placeHolders === 2) {
	tmp = (decode(b64.charAt(i)) << 2) \| (decode(b64.charAt(i + 1)) >> 4)
	push(tmp & 0xFF)
	} else if (placeHolders === 1) {
	tmp = (decode(b64.charAt(i)) << 10) \| (decode(b64.charAt(i + 1)) << 4) \| (decode(b64.charAt(i + 2)) >> 2)
	push((tmp >> 8) & 0xFF)
	push(tmp & 0xFF)
	}

	return arr
	}

	function uint8ToBase64 (uint8) {
	var i
	var extraBytes = uint8.length % 3 // if we have 1 byte left, pad 2 bytes
	var output = ''
	var temp, length

	function encode (num) {
	return lookup.charAt(num)
	}

	function tripletToBase64 (num) {
	return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F)
	}

	// go through the array every three bytes, we'll deal with trailing stuff later
	for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) {
	temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2])
	output += tripletToBase64(temp)
	}

	// pad the end with zeros, but make sure to not forget the extra bytes
	switch (extraBytes) {
	case 1:
	temp = uint8[uint8.length - 1]
	output += encode(temp >> 2)
	output += encode((temp << 4) & 0x3F)
	output += '=='
	break
	case 2:
	temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1])
	output += encode(temp >> 10)
	output += encode((temp >> 4) & 0x3F)
	output += encode((temp << 2) & 0x3F)
	output += '='
	break
	default:
	break
	}

	return output
	}

	exports.toByteArray = b64ToByteArray
	exports.fromByteArray = uint8ToBase64
	}(typeof exports === 'undefined' ? (this.base64js = {}) : exports))

	// Copied from http://xahlee.info/js/js_unicode_code_point.html
	// returns a char's Unicode codepoint, of the char at index idx of string str
	// 2013-07-16 from https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/String/charCodeAt
	function fixedCharCodeAt (str, idx) {
	// ex. fixedCharCodeAt ('\uD800\uDC00', 0); // 65536
	// ex. fixedCharCodeAt ('\uD800\uDC00', 1); // 65536
	idx = idx \|\| 0;
	var code = str.charCodeAt(idx);
	var hi, low;
	if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
	hi = code;
	low = str.charCodeAt(idx+1);
	if (isNaN(low)) {
	throw 'High surrogate not followed by low surrogate in fixedCharCodeAt()';
	}
	return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
	}
	if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
	// We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration
	return false;
	/*hi = str.charCodeAt(idx-1);
	low = code;
	return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/
	}
	return code;
	}

	// Google closure, string to byte modified to handle code points up to U+7FFFFFFF (full utf8)
	// https://github.com/google/closure-library/blob/28d9db61f5dc639c010be74e4d61682121d2dbd7/closure/goog/crypt/crypt.js#L110
	/**
	* Converts a JS string to a UTF-8 "byte" array.
	* @param {string} str 16-bit unicode string.
	* @return {!Array<number>} UTF-8 byte array.
	*/
	var stringToUtf8ByteArray = function(str) {
	// TODO(user): Use native implementations if/when available
	var out = [], p = 0;
	for (var i = 0; i < str.length; i++) {
	var c = fixedCharCodeAt(str, i);
	if (c === false) continue;

	if (c < 128) {
	out[p++] = c;
	} else if (c < 2048) {
	out[p++] = (c >> 6) \| 192;
	out[p++] = (c & 63) \| 128;
	} else if (c < 65536) {
	out[p++] = (c >> 12) \| 224;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	} else if (c < 2097152) {
	out[p++] = (c >> 18) \| 240;
	out[p++] = ((c >> 12) & 63) \| 128;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	} else if (c < 67108864) {
	out[p++] = (c >> 24) \| 248;
	out[p++] = ((c >> 18) & 63) \| 128;
	out[p++] = ((c >> 12) & 63) \| 128;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	} else if (c < 2147483648) {
	out[p++] = (c >> 30) \| 252;
	out[p++] = ((c >> 24) & 63) \| 128;
	out[p++] = ((c >> 18) & 63) \| 128;
	out[p++] = ((c >> 12) & 63) \| 128;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	}
	}
	return out;
	};


	// Using the js-implementation
	var utfyPass = "Huöut væ 💥💖 Iñtërnâtiônàlizætiøn";
	var data = stringToUtf8ByteArray(utfyPass);
	var base = exports.fromByteArray(data);

	// Using native buffera
	var buf = new Buffer(utfyPass, 'utf8');

	console.log(base);
	if (buf.toString('base64') === base) {
	console.log('Yay, we passed comparison with native base64-encoded utf8 buffer!')
	console.log(new Buffer(base, 'base64').toString('utf8'));
	} else {
	console.log("Noo! Fail!")
	}