Skip to content

Instantly share code, notes, and snippets.

@rjhall
Created February 11, 2014 01:53
Show Gist options
  • Save rjhall/8927913 to your computer and use it in GitHub Desktop.
Save rjhall/8927913 to your computer and use it in GitHub Desktop.
import org.apache.commons.math3.linear._
import collection.mutable.PriorityQueue
val dim = 100
val K = 100
val rand = new scala.util.Random()
val data_ary = (0 until 5000).map{i => (i, (0 until dim).map{j => rand.nextGaussian}.toArray)}
val data_vec = data_ary.map{t => (t._1, MatrixUtils.createRealVector(t._2))}
def dot_ary(u : Array[Double], v : Array[Double]) : Double = {
var i = 0
var res = 0.0
while(i < dim){
res += u(i) * v(i)
i += 1
}
res
}
def dot_vec(u : RealVector, v : RealVector) : Double = u.dotProduct(v)
def max_dot_dumb[V](data : Iterable[(Int, V)], dotfn : (V, V) => Double) : Iterable[(Int, Iterable[(Int, Double)])] = {
data.map{t => (t._1, data.map{s => (s._1, dotfn(t._2, s._2))}.toList.sortBy{-_._2}.take(K))}
}
def max_dot_pq[V](data : Iterable[(Int, V)], dotfn : (V, V) => Double) : Iterable[(Int, Iterable[(Int, Double)])] = {
data.map{t =>
val q = new PriorityQueue[(Int, Double)]()(Ordering.by[(Int, Double), Double](-_._2))
var worst = 0.0
var size = 0
data.foreach{s =>
val dot = dotfn(t._2, s._2)
if(size < K || dot > worst) {
size += 1
q.enqueue((s._1, dot))
if(size > K) {
q.dequeue
size -= 1
}
worst = q.head._2
}
}
(t._1, q.toList.sortBy{-_._2})
}
}
def time[V](fn: (Iterable[(Int, V)], (V, V) => Double) => Iterable[(Int, Iterable[(Int, Double)])], a : Iterable[(Int, V)], b : (V, V) => Double) : Double = {
val startTime = System.nanoTime
val res = fn(a, b)
(System.nanoTime - startTime) / 1000000.0
}
val trials = 5
val names = "dumb_vec,dumb_ary,pq_vec,pq_ary".split(",")
val res = names.map{s => (s, new Array[Double](trials))}.toMap
(0 until trials).foreach{t =>
names.foreach{m =>
val timems = m match{
case "dumb_vec" => time[RealVector](max_dot_dumb, data_vec, dot_vec)
case "dumb_ary" => time[Array[Double]](max_dot_dumb, data_ary, dot_ary)
case "pq_vec" => time[RealVector](max_dot_pq, data_vec, dot_vec)
case "pq_ary" => time[Array[Double]](max_dot_pq, data_ary, dot_ary)
}
res(m)(t) = timems
}
}
res.foreach{kv =>
val mean = kv._2.sum / trials
val sd = math.sqrt(kv._2.map{v => (v - mean) * (v - mean)}.sum / trials)
println(kv._1 + ": " + mean + " (" + sd +")")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment