Skip to content

Instantly share code, notes, and snippets.

@cwidmer
Created December 5, 2013 20:49
Show Gist options
  • Save cwidmer/7813619 to your computer and use it in GitHub Desktop.
Save cwidmer/7813619 to your computer and use it in GitHub Desktop.
Visualize a list of sequences (of same length, assuming DNA encoding) as a color coded matrix
import pylab
import numpy
def show_seqs(seqs):
"""
plot color coded training sequences
"""
pylab.figure()
num_seqs = len(seqs)
len_seqs = len(seqs[0])
color_matrix = numpy.zeros((num_seqs, len_seqs))
base_map= {"A": 0, "T": 1, "G": 2, "C": 3}
for i, seq in enumerate(seqs):
for j, nucl in enumerate(seq):
color_matrix[i,j] = base_map[nucl]
pylab.imshow(color_matrix)
# add a 'best fit' line
#y = mlab.normpdf( bins, mu, sigma)
#l = pylab.plot(bins, y, 'r--', linewidth=1)
pylab.title("seqs distribution")
pylab.xlabel("seq pos")
pylab.ylabel("seq")
pylab.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment