zhangce · October 7, 2015 15:46
diff --git a/gistfile1.txt b/gistfile1.txt
 // Copyright 2014 Hazy Research (http://i.stanford.edu/hazy)
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.


 #ifndef _GLM_SPARSE_SGD_H
 #define _GLM_SPARSE_SGD_H

 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <fcntl.h>

 #include "dimmwitted.h"

 #define NNUMA 1
 #define NTHREAD 1

 /*
 #define IMIN 1.0
 #define IMAX 1.0
 #define ITYPE float
 */

 /*
 #define IMIN -32768
 #define IMAX 32767
 #define ITYPE signed short
 */


 #define IMIN -128
 #define IMAX 127
 #define ITYPE signed char


 #define MTYPE float

 const float DIVIDEDBY = 1.0/IMAX;

 struct InputTuple{
  int eid;
  int iid;
  double value;
 };


 class GLMModelExample_Sparse{
 public:
  MTYPE * const p;
  int n;
  
  GLMModelExample_Sparse(int _n):
    n(_n), p(new MTYPE[_n]){}

  GLMModelExample_Sparse( const GLMModelExample_Sparse& other ) :
     n(other.n), p(new MTYPE[other.n]){
    for(int i=0;i<n;i++){
      p[i] = other.p[i];
    }
  }

 };

 void f_lr_modelavg(GLMModelExample_Sparse** const p_models, int nreplicas, int ireplica){
  assert(false);
 }


 double f_lr_loss_sparse(const SparseVector<float>* const ex, GLMModelExample_Sparse* const p_model){
  MTYPE * model = p_model->p;
  float label = ex->p[0];
  float dot = 0.0;
  for(int i=1;i<ex->n;i++){
    //std::cout << ex->p[i] << "    " << ex->idxs[i] << std::endl;
    dot += ex->p[i] * model[ex->idxs[i]];
  }
  //std::cout << label << "    " << dot << std::endl;
  //std::cout << "-------" << std::endl;
  return  - label * dot + log(exp(dot) + 1.0);
 }

 double f_lr_grad_sparse(const SparseVector<ITYPE>* const ex, GLMModelExample_Sparse* const p_model){
  MTYPE * model = p_model->p;
  float label = ex->p[0];

  float dot = 0.0;
  for(int i=1;i<ex->n;i++){
    dot += (DIVIDEDBY * ex->p[i]) * model[ex->idxs[i]];
  }

  const float d = exp(-dot);
  const float Z = 0.0001 * (-label + 1.0/(1.0+d));

  for(int i=1;i<ex->n;i++){
    model[ex->idxs[i]] -= (DIVIDEDBY * ex->p[i]) * Z;
  }

  return 1.0;
 }

 template<ModelReplType MODELREPL, DataReplType DATAREPL>
 float test_glm_sparse_sgd(){

  //int fdin = open("data/RCV.binary.train.dat", O_RDONLY);
    
  int fdin = open("data/reuters.bin", O_RDONLY);
 
  struct stat statbuf;
  fstat (fdin,&statbuf);
  int * tmp = (int*) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0);
  InputTuple * tuples = (InputTuple*) &tmp[0];
  //int ntuple = tmp[0];

  int ntuple = statbuf.st_size/sizeof(InputTuple);

  assert(ntuple == statbuf.st_size/sizeof(InputTuple));

  std::cout << "NNZ  = " << ntuple << std::endl;
  
  int nexp = tuples[ntuple-1].eid + 1;

  std::cout << "NEXP = " << nexp << std::endl;

  //for(int i=0;i<ntuple;i++){
  //  std::cout << tuples[i].eid << "   " << tuples[i].iid << "   " << tuples[i].value << std::endl;
  //}
  
  float * examples = new float[ntuple];
  
  ITYPE * examples_round = new ITYPE[ntuple]; // examples after rounding

  int * cols = new int[ntuple];
  int * rows = new int[nexp];

  int oeid = -1;
  int nfeat = 0;
  for(int ituple=0;ituple<ntuple;ituple++){
    const InputTuple & tuple = tuples[ituple];
    examples[ituple] = tuple.value;

    if(tuple.iid >= 0){
      if(examples[ituple] < -1) examples[ituple] = -1;
      if(examples[ituple] > 1) examples[ituple] = 1;
      assert(examples[ituple]>=-1 && examples[ituple] <= 1);
      examples_round[ituple] = IMAX * examples[ituple];  // rounding
    }else{
      examples_round[ituple] = tuple.value;
    }
    
    cols[ituple] = tuple.iid;
    if(tuple.eid != oeid){
      rows[tuple.eid] = ituple;
      oeid = tuple.eid;
    }
    if(tuple.iid > nfeat){
      nfeat = tuple.iid;
    }
  }
  nfeat += 2;

  std::cout << "NFEAT = " << nfeat << std::endl;


  for(int ituple=0;ituple<ntuple;ituple++){
    if(cols[ituple] < 0){
      cols[ituple] = nfeat - 1;
      examples[ituple] = (examples[ituple] + 1) / 2; // {-1, 1} => {0, 1}
      examples_round[ituple] = examples[ituple];
    }
  }

  GLMModelExample_Sparse model(nfeat);
  for(int i=0;i<model.n;i++){
    model.p[i] = 0.0;
  }

  SparseDimmWitted<ITYPE, GLMModelExample_Sparse, MODELREPL, DATAREPL, DW_ACCESS_ROW> 
    dw(examples_round, rows, cols, nexp, nfeat, ntuple, &model);
  dw.set_n_numa_node(NNUMA);
  dw.set_n_thread_per_node(NTHREAD);

  SparseDimmWitted<float, GLMModelExample_Sparse, MODELREPL, DATAREPL, DW_ACCESS_ROW> 
    dw_loss(examples, rows, cols, nexp, nfeat, ntuple, &model);
  dw_loss.set_n_numa_node(NNUMA);
  dw_loss.set_n_thread_per_node(NTHREAD);

  unsigned int f_handle_grad = dw.register_row(f_lr_grad_sparse);
  unsigned int f_handle_loss = dw_loss.register_row(f_lr_loss_sparse);
  dw.register_model_avg(f_handle_grad, f_lr_modelavg);
  dw_loss.register_model_avg(f_handle_loss, f_lr_modelavg);

  std::cout << sizeof(ITYPE) * ntuple << std::endl;

  float sum = 0.0;
  for(int i_epoch=0;i_epoch<100;i_epoch++){
    float loss = dw_loss.exec(f_handle_loss)/nexp;
    //float loss = 0.0;
    sum = 0.0;
    for(int i=0;i<nfeat;i++){
      sum += model.p[i];
    }
    std::cout.precision(8);
    std::cout << sum << "    loss=" << loss << std::endl;
    dw.exec(f_handle_grad);
  }

  return 0;
 }

 int main(int argc, char** argv){
  float rs = test_glm_sparse_sgd<DW_MODELREPL_PERMACHINE, DW_DATAREPL_SHARDING>();
  std::cout << "SUM OF MODEL (Should be ~1.3-1.4): " << rs << std::endl;
  return 0;
 }

 #endif
	// Copyright 2014 Hazy Research (http://i.stanford.edu/hazy)
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.


	#ifndef _GLM_SPARSE_SGD_H
	#define _GLM_SPARSE_SGD_H

	#include <sys/types.h>
	#include <sys/stat.h>
	#include <sys/mman.h>
	#include <fcntl.h>

	#include "dimmwitted.h"

	#define NNUMA 1
	#define NTHREAD 1

	/*
	#define IMIN 1.0
	#define IMAX 1.0
	#define ITYPE float
	*/

	/*
	#define IMIN -32768
	#define IMAX 32767
	#define ITYPE signed short
	*/


	#define IMIN -128
	#define IMAX 127
	#define ITYPE signed char


	#define MTYPE float

	const float DIVIDEDBY = 1.0/IMAX;

	struct InputTuple{
	int eid;
	int iid;
	double value;
	};


	class GLMModelExample_Sparse{
	public:
	MTYPE * const p;
	int n;

	GLMModelExample_Sparse(int _n):
	n(_n), p(new MTYPE[_n]){}

	GLMModelExample_Sparse( const GLMModelExample_Sparse& other ) :
	n(other.n), p(new MTYPE[other.n]){
	for(int i=0;i<n;i++){
	p[i] = other.p[i];
	}
	}

	};

	void f_lr_modelavg(GLMModelExample_Sparse** const p_models, int nreplicas, int ireplica){
	assert(false);
	}


	double f_lr_loss_sparse(const SparseVector<float>* const ex, GLMModelExample_Sparse* const p_model){
	MTYPE * model = p_model->p;
	float label = ex->p[0];
	float dot = 0.0;
	for(int i=1;i<ex->n;i++){
	//std::cout << ex->p[i] << " " << ex->idxs[i] << std::endl;
	dot += ex->p[i] * model[ex->idxs[i]];
	}
	//std::cout << label << " " << dot << std::endl;
	//std::cout << "-------" << std::endl;
	return - label * dot + log(exp(dot) + 1.0);
	}

	double f_lr_grad_sparse(const SparseVector<ITYPE>* const ex, GLMModelExample_Sparse* const p_model){
	MTYPE * model = p_model->p;
	float label = ex->p[0];

	float dot = 0.0;
	for(int i=1;i<ex->n;i++){
	dot += (DIVIDEDBY * ex->p[i]) * model[ex->idxs[i]];
	}

	const float d = exp(-dot);
	const float Z = 0.0001 * (-label + 1.0/(1.0+d));

	for(int i=1;i<ex->n;i++){
	model[ex->idxs[i]] -= (DIVIDEDBY * ex->p[i]) * Z;
	}

	return 1.0;
	}

	template<ModelReplType MODELREPL, DataReplType DATAREPL>
	float test_glm_sparse_sgd(){

	//int fdin = open("data/RCV.binary.train.dat", O_RDONLY);

	int fdin = open("data/reuters.bin", O_RDONLY);

	struct stat statbuf;
	fstat (fdin,&statbuf);
	int * tmp = (int*) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0);
	InputTuple * tuples = (InputTuple*) &tmp[0];
	//int ntuple = tmp[0];

	int ntuple = statbuf.st_size/sizeof(InputTuple);

	assert(ntuple == statbuf.st_size/sizeof(InputTuple));

	std::cout << "NNZ = " << ntuple << std::endl;

	int nexp = tuples[ntuple-1].eid + 1;

	std::cout << "NEXP = " << nexp << std::endl;

	//for(int i=0;i<ntuple;i++){
	// std::cout << tuples[i].eid << " " << tuples[i].iid << " " << tuples[i].value << std::endl;
	//}

	float * examples = new float[ntuple];

	ITYPE * examples_round = new ITYPE[ntuple]; // examples after rounding

	int * cols = new int[ntuple];
	int * rows = new int[nexp];

	int oeid = -1;
	int nfeat = 0;
	for(int ituple=0;ituple<ntuple;ituple++){
	const InputTuple & tuple = tuples[ituple];
	examples[ituple] = tuple.value;

	if(tuple.iid >= 0){
	if(examples[ituple] < -1) examples[ituple] = -1;
	if(examples[ituple] > 1) examples[ituple] = 1;
	assert(examples[ituple]>=-1 && examples[ituple] <= 1);
	examples_round[ituple] = IMAX * examples[ituple]; // rounding
	}else{
	examples_round[ituple] = tuple.value;
	}

	cols[ituple] = tuple.iid;
	if(tuple.eid != oeid){
	rows[tuple.eid] = ituple;
	oeid = tuple.eid;
	}
	if(tuple.iid > nfeat){
	nfeat = tuple.iid;
	}
	}
	nfeat += 2;

	std::cout << "NFEAT = " << nfeat << std::endl;


	for(int ituple=0;ituple<ntuple;ituple++){
	if(cols[ituple] < 0){
	cols[ituple] = nfeat - 1;
	examples[ituple] = (examples[ituple] + 1) / 2; // {-1, 1} => {0, 1}
	examples_round[ituple] = examples[ituple];
	}
	}

	GLMModelExample_Sparse model(nfeat);
	for(int i=0;i<model.n;i++){
	model.p[i] = 0.0;
	}

	SparseDimmWitted<ITYPE, GLMModelExample_Sparse, MODELREPL, DATAREPL, DW_ACCESS_ROW>
	dw(examples_round, rows, cols, nexp, nfeat, ntuple, &model);
	dw.set_n_numa_node(NNUMA);
	dw.set_n_thread_per_node(NTHREAD);

	SparseDimmWitted<float, GLMModelExample_Sparse, MODELREPL, DATAREPL, DW_ACCESS_ROW>
	dw_loss(examples, rows, cols, nexp, nfeat, ntuple, &model);
	dw_loss.set_n_numa_node(NNUMA);
	dw_loss.set_n_thread_per_node(NTHREAD);

	unsigned int f_handle_grad = dw.register_row(f_lr_grad_sparse);
	unsigned int f_handle_loss = dw_loss.register_row(f_lr_loss_sparse);
	dw.register_model_avg(f_handle_grad, f_lr_modelavg);
	dw_loss.register_model_avg(f_handle_loss, f_lr_modelavg);

	std::cout << sizeof(ITYPE) * ntuple << std::endl;

	float sum = 0.0;
	for(int i_epoch=0;i_epoch<100;i_epoch++){
	float loss = dw_loss.exec(f_handle_loss)/nexp;
	//float loss = 0.0;
	sum = 0.0;
	for(int i=0;i<nfeat;i++){
	sum += model.p[i];
	}
	std::cout.precision(8);
	std::cout << sum << " loss=" << loss << std::endl;
	dw.exec(f_handle_grad);
	}

	return 0;
	}

	int main(int argc, char** argv){
	float rs = test_glm_sparse_sgd<DW_MODELREPL_PERMACHINE, DW_DATAREPL_SHARDING>();
	std::cout << "SUM OF MODEL (Should be ~1.3-1.4): " << rs << std::endl;
	return 0;
	}

	#endif