Created
September 28, 2011 12:18
-
-
Save x0xMaximus/1247796 to your computer and use it in GitHub Desktop.
Python Scripting for Williams Bio319
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# cc attribution-sharealike // max [@] maxnanis.com | |
import re | |
from bio import fastad, fastac, rc | |
# set up a standard code table (as a Python dictionary) | |
# takes in a nucleotide string and returns the amino acid string | |
stdcode = {} | |
stdcode["TTT"] = stdcode["TTC"] = "F" | |
stdcode["TTA"] = stdcode["TTG"] = "L" | |
stdcode["TCT"] = stdcode["TCC"] = stdcode["TCA"] = stdcode["TCG"] = "S" | |
stdcode["TAT"] = stdcode["TAC"] = "Y" | |
stdcode["TAA"] = stdcode["TAG"] = "*" #stop codon | |
stdcode["TGT"] = stdcode["TGC"] = "C" | |
stdcode["TGA"] = "*" #stop codon | |
stdcode["TGG"] = "W" | |
stdcode["CTT"] = stdcode["CTC"] = stdcode["CTA"] = stdcode["CTG"] = "L" | |
stdcode["CCT"] = stdcode["CCC"] = stdcode["CCA"] = stdcode["CCG"] = "P" | |
stdcode["CAT"] = stdcode["CAC"] = "H" | |
stdcode["CAA"] = stdcode["CAG"] = "Q" | |
stdcode["CGT"] = stdcode["CGC"] = stdcode["CGA"] = stdcode["CGG"] = "R" | |
stdcode["ATT"] = stdcode["ATC"] = stdcode["ATA"] = "I" | |
stdcode["ATG"] = "M" | |
stdcode["ACT"] = stdcode["ACC"] = stdcode["ACA"] = stdcode["ACG"] = "T" | |
stdcode["AAT"] = stdcode["AAC"] = "N" | |
stdcode["AAA"] = stdcode["AAG"] = "K" | |
stdcode["AGT"] = stdcode["AGC"] = "S" | |
stdcode["AGA"] = stdcode["AGG"] = "R" | |
stdcode["GTT"] = stdcode["GTC"] = stdcode["GTA"] = stdcode["GTG"] = "V" | |
stdcode["GCT"] = stdcode["GCC"] = stdcode["GCA"] = stdcode["GCG"] = "A" | |
stdcode["GAT"] = stdcode["GAC"] = "D" | |
stdcode["GAA"] = stdcode["GAG"] = "E" | |
stdcode["GGT"] = stdcode["GGC"] = stdcode["GGA"] = stdcode["GGG"] = "G" | |
def translate(nt_list, frame = 0, code = stdcode): | |
results = [] | |
# loop through nt lists and store its amino acid | |
for nt in nt_list: | |
# pos starts at i+frameoffset defined by user and will increase as we go through each nt | |
pos = 0+frame | |
# indent nicely (currently: none) | |
aa = ' '*pos | |
# get the next three letters from the nt | |
codon = nt[pos:pos+3] | |
# as long as we have a complete codon, when offset - the last will cut off 1-2 bases short | |
while len(codon) == 3: | |
# handled by mapping? | |
cdn = code[codon] | |
if cdn: | |
# yes: include aa code | |
aa += cdn + " " | |
else: | |
# no: include an * | |
aa = aa + "*" | |
# march along, 3nt | |
pos += 3 | |
codon = nt[pos:pos+3] | |
# append the translation to the results | |
results.append(aa) | |
return results | |
def render(nt, frame): | |
# if frame is >= 3, perhaps that means to work on reverse complement | |
rvs = "Nah" | |
if frame >= 3: | |
rvs = "Yup" | |
nt = rc(nt) | |
frame = frame - 3 | |
print("\n Reading frame {}, Reversed: ".format(frame)+rvs+"\n\n") | |
aa = translate(nt, frame) | |
for i in range(len(nt)): | |
print(nt[i]) | |
print(aa[i]) | |
if __name__ == "__main__": | |
# argv a list of words in command; eg. argv[0] is usually 'translate' | |
from sys import stdin, argv | |
nt = fastad(stdin.readlines()) | |
if len(argv) == 1: | |
# if no arguments were passed in | |
# do all possible frames/reverses | |
for frame in range(0,6): | |
render(nt, frame) | |
else: | |
# if frame argument was passed in | |
render(nt, int(argv[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment