Skip to content

Instantly share code, notes, and snippets.

<?=$n*$argv[1]-=$argv[3]/6*(1-$n*$n=$argv[2])?>
object Rerank {
def score(l : Array[Int], c : List[(Int, Int, Double)]) : Double = {
// Rank changes.
var s = l.zipWithIndex.map{t => math.abs(t._1 - t._2)}.sum.toDouble
// Constraints.
val r = l.zipWithIndex.sortBy{_._1}.map{_._2}.toArray
c.foreach{t =>
if(r(t._1) > r(t._2)) {
s += t._3
package com.etsy.scalding.jobs
import com.etsy.conjecture.scalding.SVD
import com.twitter.scalding._
import com.twitter.scalding.mathematics.Matrix._
class SVDTest(args : Args) extends Job(args) {
val s = IterableSource[(Long, List[Double])]((0 until 100).map{i => (i.toLong, (0 until 100).map{j => if(i == j) 1.0 else 0.0}.toList)}.toList, ('row, 'values))
.flatMapToMatrix('row, 'values){x : (Long, List[Double]) =>
package com.etsy.scalding.jobs.conjecture
import com.etsy.scalding.conjecture.NNMF
import com.twitter.scalding.{Args, Job, Tsv, SequenceFile}
import org.apache.commons.math3.linear.RealVector
class NNMFTest(args : Args) extends Job(args) {
val iter = args.getOrElse("iter", "0").toInt
val iters = args.getOrElse("iters", "20").toInt
import org.apache.commons.math3.linear._
import collection.mutable.PriorityQueue
val dim = 100
val K = 100
val rand = new scala.util.Random()
val data_ary = (0 until 5000).map{i => (i, (0 until dim).map{j => rand.nextGaussian}.toArray)}
val data_vec = data_ary.map{t => (t._1, MatrixUtils.createRealVector(t._2))}
def dot_ary(u : Array[Double], v : Array[Double]) : Double = {
scala> val f = 1337.666f
f: Float = 1337.666
scala> java.lang.Integer.toHexString(java.lang.Float.floatToIntBits(f))
res1: java.lang.String = 44a73550
php> unpack('f', pack('i', hexdec('44a73550')));
→ array(
1 => 1337.66601562
@rjhall
rjhall / lr.R
Last active January 3, 2016 09:09
linear regression with parameters constrained to lay on the unit ball
# fake data.
X = matrix(data = rnorm(20), nr = 5, nc = 4);
y = X %*% (runif(4) * 2 - 1) + rnorm(5) * 0.1
# the usual linear regression.
b = solve(t(X) %*% X) %*% t(X) %*% y
sum((y - X %*% b)^2)
sum(b*b)
# the unit ball constrained linear regression.
@rjhall
rjhall / gist:7492876
Created November 15, 2013 22:38
the_fugly.java
static class TObjectDoubleHashMapNoDouble<K> extends TObjectDoubleHashMap<K> {
public TObjectDoubleHashMapNoDouble(int initialSize, float loadFactor) {
super(initialSize, loadFactor);
}
public double put(K key, double value) {
int index = insertKey(key);
double previous = 0.0;
boolean isNewMapping = true;
if (index < 0) {
@rjhall
rjhall / foo.scala
Created November 13, 2013 20:12
Stuff to trade CPU for better memory use vs cascading.kryo. Basically uses some trick streams instead of a buffer in ram like I did here: https://github.com/Cascading/cascading.kryo/blob/develop/src/jvm/cascading/kryo/KryoSerializer.java#L31 If you give kryo the unadulterated streams it will read beyond the object boundaries, and hadoop will die.
class ModelSerialization extends com.twitter.scalding.serialization.KryoHadoop {
override def getSerializer(c : Class[Object]) : Serializer[Object] = {
new ModelSerializer(populatedKryo)
}
override def getDeserializer(c : Class[Object]) : Deserializer[Object] = {
new ModelDeserializer(populatedKryo, c)
}
}
scala> val v = (0 until 15000).map{i => (i.toLong, 0.0)}.toArray
v: Array[(Long, Double)] = Array((0,0.0), (1,0.0), (2,0.0), (3,0.0), (4,0.0), (5,0.0), (6,0.0), (7,0.0), (8,0.0), (9,0.0), (10,0.0), (11,0.0), (12,0.0), (13,0.0), (14,0.0), (15,0.0), (16,0.0), (17,0.0), (18,0.0), (19,0.0), (20,0.0), (21,0.0), (22,0.0), (23,0.0), (24,0.0), (25,0.0), (26,0.0), (27,0.0), (28,0.0), (29,0.0), (30,0.0), (31,0.0), (32,0.0), (33,0.0), (34,0.0), (35,0.0), (36,0.0), (37,0.0), (38,0.0), (39,0.0), (40,0.0), (41,0.0), (42,0.0), (43,0.0), (44,0.0), (45,0.0), (46,0.0), (47,0.0), (48,0.0), (49,0.0), (50,0.0), (51,0.0), (52,0.0), (53,0.0), (54,0.0), (55,0.0), (56,0.0), (57,0.0), (58,0.0), (59,0.0), (60,0.0), (61,0.0), (62,0.0), (63,0.0), (64,0.0), (65,0.0), (66,0.0), (67,0.0), (68,0.0), (69,0.0), (70,0.0), (71,0.0), (72,0.0), (73,0.0), (74,0.0), (75,0.0), (76,0.0), (77,...
scala> scala.util.Sorting.quickSort(v)(Ordering.by[(Long, Double), Double](-_._2))
java.lang.StackOverflowError
at scala.math.Ordering$$anon$7.lt(Ordering.s