anuragsoni · March 20, 2021 03:37
diff --git a/count_words.ml b/count_words.ml
 module Trie = struct
  type t = { mutable count : int; children : t option Array.t }

  let[@inline] empty () = { count = 0; children = Array.make 128 None }

  let add buf pos len t =
    let rec loop idx t =
      if idx > pos + len - 1 then t.count <- t.count + 1
      else
        let c = Char.lowercase_ascii (Bytes.get buf idx) in
        let t' =
          match t.children.(Char.code c) with
          | None ->
              let t'' = empty () in
              t.children.(Char.code c) <- Some t'';
              t''
          | Some c -> c
        in
        loop (idx + 1) t'
    in
    loop pos t

  let to_assoc t =
    let rec loop acc curr t =
      let acc =
        match (curr, t.count) with
        | [], _ -> acc
        | _, 0 -> acc
        | curr, c -> (List.rev curr, c) :: acc
      in
      let r = ref acc in
      Array.iteri
        (fun idx t' ->
          match t' with
          | None -> ()
          | Some t' -> r := loop !r (Char.chr idx :: curr) t')
        t.children;
      !r
    in
    loop [] [] t
 end

 let[@inline] is_whitespace c = c = ' ' || c = '\n'

 let rec skip_whitespace countwords pad ~lim i =
  if i >= lim then (read_more_and_white [@tailcall]) countwords pad i
  else if is_whitespace @@ Bytes.unsafe_get pad i then
    (skip_whitespace [@tailcall]) countwords pad ~lim (i + 1)
  else (read_next_word [@tailcall]) countwords pad ~lim i i

 and read_next_word countwords pad ~lim i j =
  if j >= lim then (read_more_and_next [@tailcall]) countwords pad i j
  else if is_whitespace @@ Bytes.unsafe_get pad j then (
    Trie.add pad i (j - i) countwords;
    (skip_whitespace [@tailcall]) countwords pad ~lim (j + 1))
  else (read_next_word [@tailcall]) countwords pad ~lim i (j + 1)

 and read_more_and_next countwords pad i j =
  let l = j - i in
  Bytes.blit pad i pad 0 l;
  let read = input stdin pad l (Bytes.length pad - l) in
  let lim = l + read in
  if lim = 0 then () else (read_next_word [@tailcall]) countwords pad ~lim 0 l

 and read_more_and_white countwords pad i =
  let lim = input stdin pad 0 (Bytes.length pad) in
  if lim = 0 then () else (skip_whitespace [@tailcall]) countwords pad ~lim 0

 let[@inline] read_start countwords pad =
  (read_more_and_next [@specialize]) countwords pad 0 0

 let () =
  let t = Trie.empty () in
  let pad = Bytes.make (64 * 1024) ' ' in
  read_start t pad;
  let assoc = Array.of_list (Trie.to_assoc t) in
  Array.sort (fun (_, a) (_, b) -> Int.compare b a) assoc;
  Array.iter
    (fun (word, count) ->
      List.iter print_char word;
      print_char ' ';
      print_int count;
      print_newline ())
    assoc
diff --git a/count_words_2.ml b/count_words_2.ml
 (* Simpler IO doesn't make the timing worse *)
 module Trie = struct
  type t = { mutable count : int; children : t option Array.t }

  let[@inline] empty () = { count = 0; children = Array.make 128 None }

  let add s pos len t =
    let rec loop idx t =
      if idx > pos + len - 1 then t.count <- t.count + 1
      else
        let c = Char.lowercase_ascii (String.get s idx) in
        let t' =
          match t.children.(Char.code c) with
          | None ->
              let t'' = empty () in
              t.children.(Char.code c) <- Some t'';
              t''
          | Some c -> c
        in
        loop (idx + 1) t'
    in
    loop pos t

  let to_assoc t =
    let rec loop acc curr t =
      let acc =
        match (curr, t.count) with
        | [], _ -> acc
        | _, 0 -> acc
        | curr, c -> (List.rev curr, c) :: acc
      in
      let r = ref acc in
      Array.iteri
        (fun idx t' ->
          match t' with
          | None -> ()
          | Some t' -> r := loop !r (Char.chr idx :: curr) t')
        t.children;
      !r
    in
    loop [] [] t
 end

 let read_input t =
  try
    let rec loop () =
      let line = input_line stdin in
      let start = ref 0 in
      let len = ref 0 in
      for i = 0 to String.length line - 1 do
        if line.[i] <> ' ' then incr len
        else (
          Trie.add line !start !len t;
          start := i + 1;
          len := 0)
      done;
      Trie.add line !start !len t;
      loop ()
    in
    loop ()
  with End_of_file -> ()

 let () =
  let t = Trie.empty () in
  read_input t;
  let assoc = Array.of_list (Trie.to_assoc t) in
  Array.sort (fun (_, a) (_, b) -> Int.compare b a) assoc;
    Array.iter
    (fun (word, count) ->
      List.iter print_char word;
      print_char ' ';
      print_int count;
      print_newline ())
    assoc
diff --git a/mutable_countwords.ml b/mutable_countwords.ml
 module Buf = struct
  type t = { buf : Bytes.t; mutable pos : int; mutable len : int }

  let create size = { buf = Bytes.create size; pos = 0; len = 0 }

  let compress t =
    if t.len <= 0 || t.pos = t.len then (
      t.pos <- 0;
      t.len <- 0)
    else if t.pos > 0 then (
      (* Shifting the unconsumed bytes to the start of the buffer. We will always
         have enough room to shift these bytes so it should be fine to use unsafe_blit here. *)
      BytesLabels.unsafe_blit ~src:t.buf ~src_pos:t.pos ~dst:t.buf ~dst_pos:0
        ~len:t.len;
      t.pos <- 0)

  let read_into t fd =
    compress t;
    let count =
      UnixLabels.read fd ~buf:t.buf ~pos:(t.pos + t.len) ~len:(Bytes.length t.buf - t.len)
    in
    t.len <- t.len + count;
    for i = t.pos to t.len - 1 do
      Bytes.unsafe_set t.buf i (Char.lowercase_ascii (Bytes.unsafe_get t.buf i))
    done;
    count

  let[@inline] is_delim c = c = '\n' || c = ' '

  let iter_words f t =
    let prev = ref t.pos in
    for i = t.pos to t.len - 1 do
      (* We perform bounds check so its okay to call unsafe_get here *)
      if is_delim (Bytes.unsafe_get t.buf i) then (
        f t.buf !prev (i - !prev);
        prev := i + 1)
    done;
    t.len <- t.len - !prev;
    t.pos <- !prev
 end

 module BytesHash = struct
  include Bytes

  let hash (k : bytes) = Hashtbl.hash k
 end

 module BytesHashtbl = Hashtbl.Make (BytesHash)

 let process_word countwords buf pos len =
  if len > 0 then
    let word = Bytes.sub buf pos len in
    try incr (BytesHashtbl.find countwords word)
    with Not_found -> BytesHashtbl.add countwords word (ref 1)

 let () =
  let countwords = BytesHashtbl.create 33_000 in
  let buf = Buf.create (64 * 1024) in
  while Buf.read_into buf Unix.stdin > 0 do
    Buf.iter_words (fun buf pos len -> process_word countwords buf pos len) buf
  done;
  let arr = Array.make (BytesHashtbl.length countwords) (Bytes.empty, 0) in
  let idx = ref 0 in
  BytesHashtbl.iter
    (fun word count ->
      arr.(!idx) <- (word, !count);
      incr idx)
    countwords;
  Array.sort (fun (_, x) (_, y) -> Int.compare y x) arr;
  Array.iter
    (fun (w, c) ->
      print_bytes w;
      print_char ' ';
      print_int c;
      print_newline ())
    arr
	module Trie = struct
	type t = { mutable count : int; children : t option Array.t }

	let[@inline] empty () = { count = 0; children = Array.make 128 None }

	let add buf pos len t =
	let rec loop idx t =
	if idx > pos + len - 1 then t.count <- t.count + 1
	else
	let c = Char.lowercase_ascii (Bytes.get buf idx) in
	let t' =
	match t.children.(Char.code c) with
	\| None ->
	let t'' = empty () in
	t.children.(Char.code c) <- Some t'';
	t''
	\| Some c -> c
	in
	loop (idx + 1) t'
	in
	loop pos t

	let to_assoc t =
	let rec loop acc curr t =
	let acc =
	match (curr, t.count) with
	\| [], _ -> acc
	\| _, 0 -> acc
	\| curr, c -> (List.rev curr, c) :: acc
	in
	let r = ref acc in
	Array.iteri
	(fun idx t' ->
	match t' with
	\| None -> ()
	\| Some t' -> r := loop !r (Char.chr idx :: curr) t')
	t.children;
	!r
	in
	loop [] [] t
	end

	let[@inline] is_whitespace c = c = ' ' \|\| c = '\n'

	let rec skip_whitespace countwords pad ~lim i =
	if i >= lim then (read_more_and_white [@tailcall]) countwords pad i
	else if is_whitespace @@ Bytes.unsafe_get pad i then
	(skip_whitespace [@tailcall]) countwords pad ~lim (i + 1)
	else (read_next_word [@tailcall]) countwords pad ~lim i i

	and read_next_word countwords pad ~lim i j =
	if j >= lim then (read_more_and_next [@tailcall]) countwords pad i j
	else if is_whitespace @@ Bytes.unsafe_get pad j then (
	Trie.add pad i (j - i) countwords;
	(skip_whitespace [@tailcall]) countwords pad ~lim (j + 1))
	else (read_next_word [@tailcall]) countwords pad ~lim i (j + 1)

	and read_more_and_next countwords pad i j =
	let l = j - i in
	Bytes.blit pad i pad 0 l;
	let read = input stdin pad l (Bytes.length pad - l) in
	let lim = l + read in
	if lim = 0 then () else (read_next_word [@tailcall]) countwords pad ~lim 0 l

	and read_more_and_white countwords pad i =
	let lim = input stdin pad 0 (Bytes.length pad) in
	if lim = 0 then () else (skip_whitespace [@tailcall]) countwords pad ~lim 0

	let[@inline] read_start countwords pad =
	(read_more_and_next [@specialize]) countwords pad 0 0

	let () =
	let t = Trie.empty () in
	let pad = Bytes.make (64 * 1024) ' ' in
	read_start t pad;
	let assoc = Array.of_list (Trie.to_assoc t) in
	Array.sort (fun (_, a) (_, b) -> Int.compare b a) assoc;
	Array.iter
	(fun (word, count) ->
	List.iter print_char word;
	print_char ' ';
	print_int count;
	print_newline ())
	assoc
	(* Simpler IO doesn't make the timing worse *)
	module Trie = struct
	type t = { mutable count : int; children : t option Array.t }

	let[@inline] empty () = { count = 0; children = Array.make 128 None }

	let add s pos len t =
	let rec loop idx t =
	if idx > pos + len - 1 then t.count <- t.count + 1
	else
	let c = Char.lowercase_ascii (String.get s idx) in
	let t' =
	match t.children.(Char.code c) with
	\| None ->
	let t'' = empty () in
	t.children.(Char.code c) <- Some t'';
	t''
	\| Some c -> c
	in
	loop (idx + 1) t'
	in
	loop pos t

	let to_assoc t =
	let rec loop acc curr t =
	let acc =
	match (curr, t.count) with
	\| [], _ -> acc
	\| _, 0 -> acc
	\| curr, c -> (List.rev curr, c) :: acc
	in
	let r = ref acc in
	Array.iteri
	(fun idx t' ->
	match t' with
	\| None -> ()
	\| Some t' -> r := loop !r (Char.chr idx :: curr) t')
	t.children;
	!r
	in
	loop [] [] t
	end

	let read_input t =
	try
	let rec loop () =
	let line = input_line stdin in
	let start = ref 0 in
	let len = ref 0 in
	for i = 0 to String.length line - 1 do
	if line.[i] <> ' ' then incr len
	else (
	Trie.add line !start !len t;
	start := i + 1;
	len := 0)
	done;
	Trie.add line !start !len t;
	loop ()
	in
	loop ()
	with End_of_file -> ()

	let () =
	let t = Trie.empty () in
	read_input t;
	let assoc = Array.of_list (Trie.to_assoc t) in
	Array.sort (fun (_, a) (_, b) -> Int.compare b a) assoc;
	Array.iter
	(fun (word, count) ->
	List.iter print_char word;
	print_char ' ';
	print_int count;
	print_newline ())
	assoc
	module Buf = struct
	type t = { buf : Bytes.t; mutable pos : int; mutable len : int }

	let create size = { buf = Bytes.create size; pos = 0; len = 0 }

	let compress t =
	if t.len <= 0 \|\| t.pos = t.len then (
	t.pos <- 0;
	t.len <- 0)
	else if t.pos > 0 then (
	(* Shifting the unconsumed bytes to the start of the buffer. We will always
	have enough room to shift these bytes so it should be fine to use unsafe_blit here. *)
	BytesLabels.unsafe_blit ~src:t.buf ~src_pos:t.pos ~dst:t.buf ~dst_pos:0
	~len:t.len;
	t.pos <- 0)

	let read_into t fd =
	compress t;
	let count =
	UnixLabels.read fd ~buf:t.buf ~pos:(t.pos + t.len) ~len:(Bytes.length t.buf - t.len)
	in
	t.len <- t.len + count;
	for i = t.pos to t.len - 1 do
	Bytes.unsafe_set t.buf i (Char.lowercase_ascii (Bytes.unsafe_get t.buf i))
	done;
	count

	let[@inline] is_delim c = c = '\n' \|\| c = ' '

	let iter_words f t =
	let prev = ref t.pos in
	for i = t.pos to t.len - 1 do
	(* We perform bounds check so its okay to call unsafe_get here *)
	if is_delim (Bytes.unsafe_get t.buf i) then (
	f t.buf !prev (i - !prev);
	prev := i + 1)
	done;
	t.len <- t.len - !prev;
	t.pos <- !prev
	end

	module BytesHash = struct
	include Bytes

	let hash (k : bytes) = Hashtbl.hash k
	end

	module BytesHashtbl = Hashtbl.Make (BytesHash)

	let process_word countwords buf pos len =
	if len > 0 then
	let word = Bytes.sub buf pos len in
	try incr (BytesHashtbl.find countwords word)
	with Not_found -> BytesHashtbl.add countwords word (ref 1)

	let () =
	let countwords = BytesHashtbl.create 33_000 in
	let buf = Buf.create (64 * 1024) in
	while Buf.read_into buf Unix.stdin > 0 do
	Buf.iter_words (fun buf pos len -> process_word countwords buf pos len) buf
	done;
	let arr = Array.make (BytesHashtbl.length countwords) (Bytes.empty, 0) in
	let idx = ref 0 in
	BytesHashtbl.iter
	(fun word count ->
	arr.(!idx) <- (word, !count);
	incr idx)
	countwords;
	Array.sort (fun (_, x) (_, y) -> Int.compare y x) arr;
	Array.iter
	(fun (w, c) ->
	print_bytes w;
	print_char ' ';
	print_int c;
	print_newline ())
	arr