Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created June 8, 2011 06:44
Show Gist options
  • Save lindenb/1013908 to your computer and use it in GitHub Desktop.
Save lindenb/1013908 to your computer and use it in GitHub Desktop.
Adds a UCSC 'bin' column
/**
* Author: Pierre Lindenbaum PhD (original source is from Jim Kent: http://genomewiki.ucsc.edu/index.php/Bin_indexing_system
* Motivation: Adds a UCSC 'bin' column see http://biostar.stackexchange.com/questions/8943/get-rs-number-based-on-position
* Compilation: gcc bin.c
* Execute: echo -e "chr1\t10326\t10327\trs112750067" | ./a.out
* History: updated so it prints all the bins in a at any depth
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void binsInRange(
int chromStart,
int chromEnd,
int binId,
int level,
int binRowStart,
int rowIndex,
int binRowCount,
int genomicPos,
int genomicLength,
int first
)
{
if(first!=1) fputc(',',stdout);
printf("%d",binId);
if(level<4)
{
int i;
int childLength=genomicLength/8;
int childBinRowCount=binRowCount*8;
int childRowBinStart=binRowStart+binRowCount;
int firstChildIndex=rowIndex*8;
int firstChildBin=childRowBinStart+firstChildIndex;
for(i=0;i< 8;++i)
{
int childStart=genomicPos+i*childLength;
if( chromStart>(childStart+childLength) ||
chromEnd<childStart )
{
continue;
}
binsInRange(
chromStart,
chromEnd,
firstChildBin+i,
level+1,
childRowBinStart,
firstChildIndex+i,
childBinRowCount,
childStart,
childLength,
0
);
}
}
}
static void bins(int chromStart,int chromEnd)
{
int genomicLength=536870912;
binsInRange(chromStart,chromEnd,0,0,0,0,1,0,genomicLength,1);
}
static char* readline(FILE* in)
{
char* p=NULL;
int length=0;
int c;
while((c=fgetc(in))!=EOF)
{
p=realloc(p,(length+2)*sizeof(char));
if(p==NULL)
{
fputs("Out of memory\n",stderr);
exit(EXIT_FAILURE);
}
if(c=='\n')
{
p[length]='\0';
return p;
}
p[length]=c;
p[length+1]='\0';
length++;
}
return p;
}
int main(int argc,char** argv)
{
char *line;
while((line=readline(stdin))!=NULL)
{
int chromStart=0;
int chromEnd=0;
int col=0;
char *t = strtok(line,"\t");
while (t != NULL)
{
switch(col)
{
case 0: fputs(t,stdout);break;
case 1: chromStart=atoi(t);printf("\t%s",t);break;
case 2: chromEnd=atoi(t);printf("\t%s\t",t);bins(chromStart,chromEnd);break;
default:printf("\t%s",t);break;
}
t = strtok (NULL, "\t");
++col;
}
fputs("\n",stdout);
free(line);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment