Created
August 7, 2023 17:30
-
-
Save Guest0x0/f50f6cb7a39aab60ea71ba7d40515306 to your computer and use it in GitHub Desktop.
Hash Array Mapped Trie implementation & benchmark in OCaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* HAMT v.s. AVL benchmark. | |
test for their usage as string map. | |
usage: | |
> ocamlopt HAMT.ml -o <executable-name> | |
> <executable-name> (avl|hamt) (random|ordered) <key-length> | |
time data of the form: | |
<tree-size> <add-time> <find-time> | |
will be displayed in stdout, | |
some progress information indicating the program is progressively running | |
will be displayed in stderr. *) | |
let ctpop = | |
let low_of_2bits = 0x55555555 in | |
let low_of_4bits = 0x33333333 in | |
let low_of_8bits = 0x0f0f0f0f in | |
fun n -> | |
(* let c2bits = (n land low_of_2bits ) + (n lsr 1) land low_of_2bits in *) | |
let c2bits = n - (n lsr 1) land low_of_2bits in | |
let c4bits = (c2bits land low_of_4bits ) + (c2bits lsr 2) land low_of_4bits in | |
let c8bits = (c4bits land low_of_8bits ) + (c4bits lsr 4) land low_of_8bits in | |
(* let c16bits = (c8bits land low_of_16bits) + (c8bits lsr 8) land low_of_16bits in *) | |
let c16bits = c8bits + (c8bits lsr 8) in | |
(c16bits + (c16bits lsr 16)) land 0x3f | |
let get_seg offset len value = | |
let mask = ((1 lsl len) - 1) in | |
(value lsr (32 - len - offset)) land mask | |
type bitmap = int | |
let bitmap_empty = 0 | |
let bitmap_mem idx bitmap = | |
(bitmap land (1 lsl idx)) <> 0 | |
let bitmap_get idx bitmap = | |
let below_mask = (1 lsl idx) - 1 in | |
ctpop (bitmap land below_mask) | |
let bitmap_set idx bitmap = | |
bitmap lor (1 lsl idx) | |
let bitmap_remove idx bitmap = | |
bitmap lxor (1 lsl idx) | |
type 'a sparse_array = | |
{ bitmap : bitmap | |
; data : 'a Array.t } | |
let sa_mem idx sa = | |
bitmap_mem idx sa.bitmap | |
let sa_find_opt idx sa = | |
match bitmap_mem idx sa.bitmap with | |
| false -> | |
None | |
| true -> | |
Some(sa.data.(bitmap_get idx sa.bitmap)) | |
let sa_add idx v sa = | |
let pos = bitmap_get idx sa.bitmap in | |
let data' = Array.init (Array.length sa.data + 1) @@ fun i -> | |
if i < pos | |
then sa.data.(i) | |
else if i = pos | |
then v | |
else sa.data.(i - 1) | |
in | |
{ bitmap = bitmap_set idx sa.bitmap | |
; data = data' } | |
let sa_update idx f sa = | |
let pos = bitmap_get idx sa.bitmap in | |
let data' = Array.copy sa.data in | |
data'.(pos) <- f sa.data.(pos); | |
{ sa with data = data' } | |
let seg_len = 5 | |
type ('k, 'v) t = | |
| Empty | |
| Leaf of 'k * 'v | |
| Collision of ('k * 'v) list | |
| Node of ('k, 'v) t sparse_array | |
let empty = Empty | |
let find_opt k t = | |
let h = Hashtbl.hash k in | |
let rec loop offset t = | |
match t with | |
| Leaf(k', v) when k' = k -> | |
Some v | |
| Empty | |
| Leaf(_, _) -> | |
None | |
| Collision buckets -> | |
List.assoc_opt k buckets | |
| Node sa -> | |
let idx = get_seg offset seg_len h in | |
Option.bind (sa_find_opt idx sa) (loop (offset + seg_len)) | |
in | |
loop 0 t | |
let rec insert_assoc k v = function | |
| [] -> | |
[k, v] | |
| (k', _) :: tl when k' = k -> | |
(k, v) :: tl | |
| (k', v') :: tl -> | |
(k', v') :: insert_assoc k v tl | |
let add k v t = | |
let h = Hashtbl.hash k in | |
let rec loop offset t = | |
match t with | |
| Empty when offset + seg_len >= 32 -> | |
Leaf(k, v) | |
| Empty -> | |
let idx = get_seg offset seg_len h in | |
Node { | |
bitmap = bitmap_set idx bitmap_empty; | |
data = [| loop (offset + seg_len) Empty |] | |
} | |
| Leaf(k', _) when k' = k -> | |
Leaf(k, v) | |
| Leaf(k', v') -> | |
Collision [k, v; k', v'] | |
| Collision buckets -> | |
Collision(insert_assoc k v buckets) | |
| Node sa -> | |
let idx = get_seg offset seg_len h in | |
if sa_mem idx sa | |
then | |
Node(sa_update idx (loop (offset + seg_len)) sa) | |
else | |
Node(sa_add idx (loop (offset + seg_len) Empty) sa) | |
in | |
loop 0 t | |
type ('v, 'map) string_map = { | |
empty : 'map; | |
find : string -> 'map -> 'v option; | |
add : string -> 'v -> 'map -> 'map; | |
} | |
type test_config = { | |
key_length : int; | |
tree_size : int; | |
repeat_count : int; | |
} | |
let random_key cfg = | |
let len = Random.int (cfg.key_length / 2) + cfg.key_length in | |
String.init len (fun _ -> Char.chr (Random.int 26 + Char.code 'a')) | |
let ordered_key cfg = | |
let key = Bytes.init cfg.key_length (fun _ -> 'a') in | |
let curr_index = ref 0 in | |
let rec next_key () = | |
assert (!curr_index < Bytes.length key); | |
let c = Bytes.get key !curr_index in | |
if c = 'z' | |
then (incr curr_index; next_key ()) | |
else Bytes.set key !curr_index (Char.chr (Char.code c + 1)) | |
in | |
fun () -> | |
next_key (); | |
Bytes.to_string key | |
let random_test cfg map = | |
let init_data = Array.init cfg.tree_size (fun i -> (random_key cfg, i)) in | |
let tree = Array.fold_left (fun tree (k, v) -> map.add k v tree) map.empty init_data in | |
let insert_data = Array.init cfg.repeat_count (fun i -> (random_key cfg, i)) in | |
let _ = Gc.full_major () in | |
let t0 = Sys.time () in | |
let _ = Array.fold_left (fun tree (k, v) -> map.add k v tree) tree insert_data in | |
let t1 = Sys.time () in | |
let find_data = | |
Array.init cfg.repeat_count (fun _ -> init_data.(Random.int cfg.tree_size)) | |
in | |
let _ = Gc.full_major () in | |
let t2 = Sys.time () in | |
let _ = Array.iter (fun (k, v) -> assert (map.find k tree = Some v)) find_data in | |
let t3 = Sys.time () in | |
(t1 -. t0, t3 -. t2) | |
let ordered_test cfg map = | |
let next_key = ordered_key cfg in | |
let init_data = Array.init cfg.tree_size (fun i -> (next_key (), i)) in | |
let tree = Array.fold_left (fun tree (k, v) -> map.add k v tree) map.empty init_data in | |
let insert_data = Array.init cfg.repeat_count (fun i -> (next_key (), i)) in | |
let _ = Gc.full_major () in | |
let t0 = Sys.time () in | |
let _ = Array.fold_left (fun tree (k, v) -> map.add k v tree) tree insert_data in | |
let t1 = Sys.time () in | |
let find_data = | |
Array.init cfg.repeat_count (fun _ -> init_data.(Random.int cfg.tree_size)) | |
in | |
let _ = Gc.full_major () in | |
let t2 = Sys.time () in | |
let _ = Array.iter (fun (k, v) -> assert (map.find k tree = Some v)) find_data in | |
let t3 = Sys.time () in | |
(t1 -. t0, t3 -. t2) | |
module String_Map = Map.Make(String) | |
let avl_map = { | |
empty = String_Map.empty; | |
find = String_Map.find_opt; | |
add = String_Map.add; | |
} | |
let hamt_map = { | |
empty; | |
find = find_opt; | |
add; | |
} | |
let _ = | |
let map = Sys.argv.(1) in | |
let () = Random.self_init () in | |
let test cfg = | |
match Sys.argv.(2) with | |
| "random" -> random_test cfg | |
| "ordered" -> ordered_test cfg | |
| _ -> exit 1 | |
in | |
let key_length = int_of_string Sys.argv.(3) in | |
let rec loop index tree_size = | |
if index > 10 | |
then () | |
else | |
let config = { key_length; tree_size; repeat_count = 10000 } in | |
Printf.eprintf "testing size=%d...\n" tree_size; | |
flush_all (); | |
let (t_insert, t_find) = | |
match map with | |
| "avl" -> test config avl_map | |
| "hamt" -> test config hamt_map | |
| _ -> exit 1 | |
in | |
Printf.printf "%d %f %f\n" tree_size t_insert t_find; | |
loop (index + 1) (tree_size * 2) | |
in | |
loop 1 5000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment