object Rerank {
def score(l : Array[Int], c : List[(Int, Int, Double)]) : Double = {
// Rank changes.
var s ={t => math.abs(t._1 - t._2)}.sum.toDouble
// Constraints.
val r = l.zipWithIndex.sortBy{_._1}.map{_._2}.toArray
c.foreach{t =>
if(r(t._1) > r(t._2)) {
s += t._3
import com.etsy.conjecture.scalding.SVD
import com.twitter.scalding._
import com.twitter.scalding.mathematics.Matrix._
class SVDTest(args : Args) extends Job(args) {
val s = IterableSource[(Long, List[Double])]((0 until 100).map{i => (i.toLong, (0 until 100).map{j => if(i == j) 1.0 else 0.0}.toList)}.toList, ('row, 'values))
.flatMapToMatrix('row, 'values){x : (Long, List[Double]) =>
import com.etsy.scalding.conjecture.NNMF
import com.twitter.scalding.{Args, Job, Tsv, SequenceFile}
import org.apache.commons.math3.linear.RealVector
class NNMFTest(args : Args) extends Job(args) {
val iter = args.getOrElse("iter", "0").toInt
val iters = args.getOrElse("iters", "20").toInt
import org.apache.commons.math3.linear._
import collection.mutable.PriorityQueue
val dim = 100
val K = 100
val rand = new scala.util.Random()
val data_ary = (0 until 5000).map{i => (i, (0 until dim).map{j => rand.nextGaussian}.toArray)}
val data_vec ={t => (t._1, MatrixUtils.createRealVector(t._2))}
def dot_ary(u : Array[Double], v : Array[Double]) : Double = {
scala> val f = 1337.666f
f: Float = 1337.666
scala> java.lang.Integer.toHexString(java.lang.Float.floatToIntBits(f))
res1: java.lang.String = 44a73550
php> unpack('f', pack('i', hexdec('44a73550')));
→ array(
1 => 1337.66601562
rjhall / lr.R
Last active January 3, 2016 09:09
linear regression with parameters constrained to lay on the unit ball
# fake data.
X = matrix(data = rnorm(20), nr = 5, nc = 4);
y = X %*% (runif(4) * 2 - 1) + rnorm(5) * 0.1
# the usual linear regression.
b = solve(t(X) %*% X) %*% t(X) %*% y
sum((y - X %*% b)^2)
# the unit ball constrained linear regression.
rjhall / gist:7492876
Created November 15, 2013 22:38
static class TObjectDoubleHashMapNoDouble<K> extends TObjectDoubleHashMap<K> {
public TObjectDoubleHashMapNoDouble(int initialSize, float loadFactor) {
super(initialSize, loadFactor);
public double put(K key, double value) {
int index = insertKey(key);
double previous = 0.0;
boolean isNewMapping = true;
if (index < 0) {
rjhall / foo.scala
Created November 13, 2013 20:12
Stuff to trade CPU for better memory use vs cascading.kryo. Basically uses some trick streams instead of a buffer in ram like I did here: If you give kryo the unadulterated streams it will read beyond the object boundaries, and hadoop will die.
class ModelSerialization extends com.twitter.scalding.serialization.KryoHadoop {
override def getSerializer(c : Class[Object]) : Serializer[Object] = {
new ModelSerializer(populatedKryo)
override def getDeserializer(c : Class[Object]) : Deserializer[Object] = {
new ModelDeserializer(populatedKryo, c)
scala> val v = (0 until 15000).map{i => (i.toLong, 0.0)}.toArray
scala> scala.util.Sorting.quickSort(v)([(Long, Double), Double](-_._2))
