Last active
June 13, 2018 23:09
-
-
Save anowell/9936f3b0f4ba0a8ad2c651bf9baf38ed to your computer and use it in GitHub Desktop.
bench iterators and loops of Zip-Map-Sum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(Benchmark disclaimers apply. Don't trust the numbers without double checking my code.) | |
Scala (functional): 697,099 ns/iter | |
Scala (for loop): 20,467 ns/iter | |
Rust (functional): 4,984 ns/iter | |
Rust (for loop): 4,983 ns/iter // Note: this required adding assertions to prevent bounds checking | |
C (for loop): 6,172 ns/iter // Note: this used -O2 optimization |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
uint zms(uint *a, uint *b, uint aLen, uint bLen) { | |
uint sum = 0; | |
uint minLen = (aLen > bLen) ? aLen : bLen; | |
for(int i=0; i < minLen; i++) { | |
sum += (a[i] * b[i]); | |
} | |
return sum; | |
} | |
unsigned long get_time() { | |
timespec ts; | |
clock_gettime(CLOCK_REALTIME, &ts); | |
return ts.tv_sec * 1e9 + ts.tv_nsec; | |
} | |
int main() { | |
uint *these, *those; | |
unsigned long start, stop; | |
these = (uint*) malloc (10000 * sizeof(uint)); | |
those = (uint*) malloc (10000 * sizeof(uint)); | |
for(int i=0; i<10000; i++) { | |
these[i] = i%997; | |
those[i] = i%97; | |
} | |
// Storing sum to keep loop from being optimized away | |
uint sum = 0; | |
start = get_time(); | |
for(int i=0; i<1000; i++) { | |
sum = zms(these, those, 10000, 10000); | |
} | |
stop = get_time(); | |
printf("sum=%d\n", sum); | |
printf("%d ns/iter", (stop-start)/1000); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test)] | |
extern crate test; | |
pub fn zms(a: &[u32], b: &[u32]) -> u32 { | |
a.iter() | |
.zip(b) | |
.map(|(i,j)| i*j) | |
.sum() | |
} | |
pub fn zms_for_loop(a: &[u32], b: &[u32]) -> u32 { | |
let mut sum = 0; | |
let min_len = ::std::cmp::min(a.len(), b.len()); | |
// assert less than length to force LLVM to optimize out the bounds check | |
// while still avoiding the unsafe block to use .get_unchecked(i) | |
// otherwise the bounds checks ends up being ~40% of the tight loop | |
assert!(min_len - 1 < a.len()); | |
assert!(min_len - 1 < b.len()); | |
for i in 0..min_len { | |
sum += a[i] * b[i]; | |
} | |
sum | |
} | |
#[cfg(test)] | |
mod tests { | |
use test::Bencher; | |
use super::*; | |
fn prep() -> (Vec<u32>, Vec<u32>) { | |
let mut these = vec![]; | |
let mut those = vec![]; | |
let count = 10000; | |
for i in 0..count { | |
these.push(i%997); | |
those.push(i%97); | |
} | |
(these, those) | |
} | |
#[bench] | |
fn bench_zms(b: &mut Bencher) { | |
let (these, those) = prep(); | |
b.iter(|| zms(&these, &those) ); | |
} | |
#[bench] | |
fn bench_zms_for_loop(b: &mut Bencher) { | |
let (these, those) = prep(); | |
b.iter(|| zms_for_loop(&these, &those)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var these: Array[Int] = Array(); | |
var those: Array[Int] = Array(); | |
// This is not efficient, but who cares, we aren't benching this step | |
for(i <- 0 until 10000) { these = these :+ i%997; those = those :+ i%97 } | |
def zms(a: Array[Int], b: Array[Int]) = a.zip(b).map{ case (i,j) => i*j }.sum | |
def zmsLoop(a: Array[Int], b: Array[Int]) = { | |
var sum=0 | |
val min_len = math.min(a.length, b.length) | |
for(i <- 0 until min_len) { | |
sum += (a(i) * b(i)) | |
} | |
sum | |
} | |
// Run zms 1000 times | |
val start = System.nanoTime; for(i <- 0 until 1000) { zms(these, those) }; val stop = System.nanoTime; val zmsDur = (stop-start)/1000 | |
// Run zmsLoop 1000 times | |
val start = System.nanoTime; for(i <- 0 until 1000) { zmsLoop(these, those) }; val stop = System.nanoTime; val zmsLoopDur = (stop-start)/1000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment