Skip to content

Instantly share code, notes, and snippets.

@parashardhapola
Created April 24, 2016 20:03
Show Gist options
  • Save parashardhapola/efcce728601f335de50ae91411e165dc to your computer and use it in GitHub Desktop.
Save parashardhapola/efcce728601f335de50ae91411e165dc to your computer and use it in GitHub Desktop.
import sys
from warnings import warn
import os
__author__ = "Parashar Dhapola"
__email__ = ""
__desc__ = """
This script converts whole gene coordinates into TSS coordinates.
Ideally this script should work on any table downloaded form UCSC
table browser but has been only testedin "SGD track".
"""
def optParse():
"""Returns filehandles if input parameters are correct. Else return 'False'"""
try:
input_bed = sys.argv[1]
out_bed = sys.argv[2]
except IndexError:
print "USAGE:\npython %s <input_bed_file> <output_bed_file>" % __file__
return False
try:
fhi = open(input_bed)
except IOError:
print "Input bed file doesn't exist"
return False
try:
fho = open(out_bed, 'w')
except:
print "Couldn't create output file. Check permission"
return False
return fhi, fho
if __name__ == "__main__":
opt_ret = optParse()
if opt_ret is False:
sys.exit()
fhi, fho = opt_ret
out_bed_data = {}
for n,l in enumerate(fhi):
c = l.rstrip('\n').split('\t')
if c[3] not in out_bed_data:
if c[5] == "+":
tss = int(c[1])
elif c[5] == "-":
tss = int(c[2])-1 # Because end coordinate is excluded in BED format
else:
raise ValueError("Unrecognized symbol for strand found in line %d. Exiting!" % n)
out_bed_data[c[3]] = "\t".join(map(str, [c[0], tss, tss+1, c[3]]))
else:
warn("Ignoring duplicate gene name %s" % c[3])
fhi.close()
fho.write("\n".join(out_bed_data.values()))
fho.close()
print "Output written to file %s" % sys.argv[2]
print "Please run following command on bash shell to sort or use BedTools (use Excel if on Windows :( ))"
print "sort -k1,1 -k2n,2 %s > %s" % (sys.argv[2], sys.argv[2].split('.')[0]+"_sorted.bed")
print "Job completed!!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment