Last active
April 21, 2022 07:30
-
-
Save Twister915/365887ffff7519f3d8c428bfa046e2d7 to your computer and use it in GitHub Desktop.
topk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::marker::PhantomData; | |
pub enum TopK<I, E, K, F, const N: usize> { | |
// this TopK has just been created and no data has been computed | |
// this is the initial state | |
Prepared { | |
itr: I, | |
f: F, | |
_k: PhantomData<K>, | |
}, | |
// results have been demanded by a call to .next(), so we have computed the top K items | |
Ready { | |
items: std::array::IntoIter<Option<E>, N>, | |
size: usize, | |
} | |
} | |
impl<I, E, K, F, const N: usize> TopK<I, E, K, F, N> | |
where | |
I: Iterator<Item=E>, | |
K: PartialOrd<K>, | |
F: Fn(&E) -> K, | |
[Option<K>; N]: Default, | |
[Option<E>; N]: Default, | |
{ | |
pub fn new(itr: I, f: F) -> Self { | |
Self::Prepared { | |
itr, | |
f, | |
_k: PhantomData, | |
} | |
} | |
fn items_iter(&mut self) -> &mut std::array::IntoIter<Option<E>, N> { | |
loop { | |
use TopK::*; | |
match self { | |
Ready { items, .. } => { | |
return items; | |
}, | |
Prepared { itr, f, .. } => { | |
*self = Self::compute_ready_state(itr, f); | |
} | |
} | |
} | |
} | |
// using an iterator and a scoring function, this produces a Self::Ready | |
fn compute_ready_state(itr: &mut I, f: &mut F) -> Self { | |
// size (updated below). Should be N or less, depending on if source iterator emits <N items | |
let mut size = 0; | |
// these two arrays are coordinated such that if scores[x].is_some() then items[x].is_some() | |
// scores[x] is f(&items[x]) | |
let mut scores: [Option<K>; N] = Default::default(); | |
let mut items: [Option<E>; N] = Default::default(); | |
// exhaust the iterator (look at every item) | |
while let Some(item) = itr.next() { | |
// compute score | |
let score = f(&item); | |
// find if the score is larger than anything in our array currently | |
for i in 0..N { | |
// we should insert if we are larger OR if the slot is available | |
if if let Some(other) = &scores[i] { | |
other < &score | |
} else { | |
true | |
} { | |
// insert score and item | |
array_insert(&mut scores, Some(score), i); | |
array_insert(&mut items, Some(item), i); | |
// ensure size is correct | |
if size < N { | |
size += 1; | |
} | |
// this break combined with the structure of this loop ensures that | |
// the arrays are always sorted from greatest -> least score value | |
break; | |
} | |
} | |
} | |
// convert items into an iterator, which is what the Self::Ready wants | |
let items = items.into_iter(); | |
Self::Ready { items, size } | |
} | |
} | |
impl<I, E, K, F, const N: usize> Iterator for TopK<I, E, K, F, N> | |
where | |
I: Iterator<Item=E>, | |
K: PartialOrd<K>, | |
F: Fn(&E) -> K, | |
[Option<K>; N]: Default, | |
[Option<E>; N]: Default, | |
{ | |
type Item = E; | |
fn next(&mut self) -> Option<Self::Item> { | |
self.items_iter().next().flatten() | |
} | |
fn size_hint(&self) -> (usize, Option<usize>) { | |
match self { | |
TopK::Ready { size, .. } => (0, Some(*size)), | |
TopK::Prepared { .. } => (0, Some(N)), | |
} | |
} | |
} | |
pub trait TopKExt: Iterator + Sized { | |
fn top_k<K, F, const N: usize>(self, score_f: F) -> TopK<Self, Self::Item, K, F, N> | |
where | |
F: Fn(&Self::Item) -> K, | |
K: PartialOrd<K>, | |
[Option<K>; N]: Default, | |
[Option<Self::Item>; N]: Default, | |
{ | |
TopK::new(self, score_f) | |
} | |
} | |
impl<I> TopKExt for I where I: Iterator + Sized {} | |
#[inline] | |
fn array_insert<E, const N: usize>(elems: &mut[E; N], mut tmp: E, idx: usize) { | |
for i in idx..N { | |
std::mem::swap(&mut tmp, &mut elems[i]); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment