Skip to content

Instantly share code, notes, and snippets.

@lambdalisue
Last active August 27, 2024 10:10
Show Gist options
  • Save lambdalisue/22fe05493d4a3a6c34651959e62ebb2f to your computer and use it in GitHub Desktop.
Save lambdalisue/22fe05493d4a3a6c34651959e62ebb2f to your computer and use it in GitHub Desktop.
English word length frequency
import { exists } from "jsr:@std/fs";
import { map, reduce } from "jsr:@core/iterutil";
const sourceUrl =
"https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt";
const filename = "words_alpha.txt";
if (!(await exists(filename))) {
const resp = await fetch(sourceUrl);
await Deno.writeFile(filename, resp.body!);
}
const content = await Deno.readTextFile(filename);
const samples = content
.split("\n")
.filter((v) => v)
.map((v) => v.trim().length);
function sum(value: number[]): number {
return value.reduce((a, v) => a + v);
}
function mean(value: number[]): number {
return sum(value) / value.length;
}
function mode(value: number[]): number {
const counter = map(
Map.groupBy(value, (v) => v).entries(),
([n, vs]) => [n, vs.length] as const,
);
return reduce(counter, (a, v) => a[1] < v[1] ? v : a)![0];
}
function median(value: number[]): number {
const freq = value.length;
if (freq % 2 === 0) {
const mid = freq / 2;
return (value[mid - 1] + value[mid]) / 2;
} else {
const mid = (freq - 1) / 2;
return value[mid];
}
}
function quantile(value: number[]): [q1: number, q2: number, q3: number] {
const freq = value.length;
if (freq % 2 === 0) {
const mid = freq / 2;
const head = value.slice(0, mid);
const tail = value.slice(mid);
return [median(head), median(value), median(tail)];
} else {
const mid = (freq - 1) / 2;
const head = value.slice(0, mid);
const tail = value.slice(mid + 1);
return [median(head), median(value), median(tail)];
}
}
console.log(`Frequency: ${samples.length}`);
console.log(`Mean: ${mean(samples)}`);
console.log(`Mode: ${mode(samples)}`);
console.log(`Median: ${median(samples)}`);
console.log(`Quantile: ${quantile(samples)}`);
const counter = map(
Map.groupBy(samples, (v) => v).entries(),
([n, vs]) => [n, vs.length] as const,
);
console.log("Data:");
for (
const [n, v] of [...counter].sort(([a], [b]) => a === b ? 0 : a > b ? 1 : -1)
) {
console.log(`${n.toString().padStart(2)}, ${v}`);
}
@lambdalisue
Copy link
Author

lambdalisue commented Aug 27, 2024

Frequency: 370104
Mean:      9.442467522642284
Mode:      9
Median:    11
Quantile:  8,11,11.5
Data:
 1, 26
 2, 427
 3, 2130
 4, 7186
 5, 15920
 6, 29874
 7, 41998
 8, 51627
 9, 53402
10, 45872
11, 37539
12, 29124
13, 20944
14, 14149
15, 8846
16, 5182
17, 2967
18, 1471
19, 760
20, 359
21, 168
22, 74
23, 31
24, 12
25, 8
27, 3
28, 2
29, 2
31, 1

@lambdalisue
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment