zonesan · February 11, 2020 07:33
diff --git a/sentencize.html b/sentencize.html
 <!DOCTYPE html>
 <html>
 <script>
 function is_ascii(str) {
  return /^[\x00-\x7F]*$/.test(str);
 }

 function is_seperator(c) {
  return [" ", "，", "。", ",", ";", ",", "?", ".", "？", "；", "《", "》"].indexOf(c) > -1;
 }

 function random_choice(arr) {
    return arr[Math.floor(arr.length * Math.random())];
 }

 function random_shuffle(array) {
  var current_index = array.length, temporary_value, random_index;

  // While there remain elements to shuffle...
  while (0 !== current_index) {

    // Pick a remaining element...
    random_index = Math.floor(Math.random() * current_index);
    current_index -= 1;

    // And swap it with the current element.
    temporary_value = array[current_index];
    array[current_index] = array[random_index];
    array[random_index] = temporary_value;
  }

  return array;
 }

 function tokenize(src_txt) {
  var token_list = [];
  var token = "";
  
  for (var c of src_txt) {
    if (is_ascii(c)) {
      token += c;
    } else {
      if (token !== "") {
        token_list.push(token);
        token = "";
      }
      token_list.push(c);
    }
  }
  
  if (token !== "") {
    token_list.push(token);
  }
  
  return token_list;
 }

 function reorder(token_list) {
  var n_grams = [2, 3];
  var i = 0;
  var token_list_reordered = [];
  
  while (i < token_list.length) {
    var n_gram = random_choice(n_grams);
    var j = Math.min(i + n_gram, token_list.length);

    n_gram = token_list.slice(i, j);
    random_shuffle(n_gram);
    Array.prototype.push.apply(token_list_reordered, n_gram);
    i = j;
  }
  
  return token_list_reordered;
 }

 function sentencize(src_txt) {
  var sentence_list = [];
  var sentence = "";
  var reordered_txt = [];
  
  for (var c of src_txt) {
    if (is_seperator(c) || !Number.isNaN(+c)) {
      Array.prototype.push.apply(reordered_txt, reorder(tokenize(sentence)));
      reordered_txt.push(c);
      sentence = "";
    } else {
      sentence += c;
    }
  }
  
  if (sentence !== "") {
    Array.prototype.push.apply(reordered_txt, reorder(tokenize(sentence)));
  }
  
  return reordered_txt.join("");
 }
 </script>
 <body>

 <h1>研表究明，汉字的序顺并不定一能影阅响读</h1>
 <textarea id="src" name="message" rows="10" cols="30">
 研究表明，汉字的顺序并不一定能影响阅读。
 </textarea>
 <button type="button"
 onclick="document.getElementById('demo').innerHTML = sentencize(document.getElementById('src').innerHTML)">
 变！</button>

 <p id="demo"></p>
 </body>
 </html>
	<!DOCTYPE html>
	<html>
	<script>
	function is_ascii(str) {
	return /^[\x00-\x7F]*$/.test(str);
	}

	function is_seperator(c) {
	return [" ", "，", "。", ",", ";", ",", "?", ".", "？", "；", "《", "》"].indexOf(c) > -1;
	}

	function random_choice(arr) {
	return arr[Math.floor(arr.length * Math.random())];
	}

	function random_shuffle(array) {
	var current_index = array.length, temporary_value, random_index;

	// While there remain elements to shuffle...
	while (0 !== current_index) {

	// Pick a remaining element...
	random_index = Math.floor(Math.random() * current_index);
	current_index -= 1;

	// And swap it with the current element.
	temporary_value = array[current_index];
	array[current_index] = array[random_index];
	array[random_index] = temporary_value;
	}

	return array;
	}

	function tokenize(src_txt) {
	var token_list = [];
	var token = "";

	for (var c of src_txt) {
	if (is_ascii(c)) {
	token += c;
	} else {
	if (token !== "") {
	token_list.push(token);
	token = "";
	}
	token_list.push(c);
	}
	}

	if (token !== "") {
	token_list.push(token);
	}

	return token_list;
	}

	function reorder(token_list) {
	var n_grams = [2, 3];
	var i = 0;
	var token_list_reordered = [];

	while (i < token_list.length) {
	var n_gram = random_choice(n_grams);
	var j = Math.min(i + n_gram, token_list.length);

	n_gram = token_list.slice(i, j);
	random_shuffle(n_gram);
	Array.prototype.push.apply(token_list_reordered, n_gram);
	i = j;
	}

	return token_list_reordered;
	}

	function sentencize(src_txt) {
	var sentence_list = [];
	var sentence = "";
	var reordered_txt = [];

	for (var c of src_txt) {
	if (is_seperator(c) \|\| !Number.isNaN(+c)) {
	Array.prototype.push.apply(reordered_txt, reorder(tokenize(sentence)));
	reordered_txt.push(c);
	sentence = "";
	} else {
	sentence += c;
	}
	}

	if (sentence !== "") {
	Array.prototype.push.apply(reordered_txt, reorder(tokenize(sentence)));
	}

	return reordered_txt.join("");
	}
	</script>
	<body>

	<h1>研表究明，汉字的序顺并不定一能影阅响读</h1>
	<textarea id="src" name="message" rows="10" cols="30">
	研究表明，汉字的顺序并不一定能影响阅读。
	</textarea>
	<button type="button"
	onclick="document.getElementById('demo').innerHTML = sentencize(document.getElementById('src').innerHTML)">
	变！</button>

	<p id="demo"></p>
	</body>
	</html>