Skip to content

Instantly share code, notes, and snippets.

@yosemitebandit
Last active April 22, 2016 20:57
Show Gist options
  • Save yosemitebandit/12f7bb6542315f10f4467392eb6f603f to your computer and use it in GitHub Desktop.
Save yosemitebandit/12f7bb6542315f10f4467392eb6f603f to your computer and use it in GitHub Desktop.
plotting gene expression data
"""Plotting gene expression data from a csv"""
import os
import matplotlib
import matplotlib.pyplot as plt
import pylab
# Load data.
path = 'murine-placenta-gene-1-2.csv'
#path = 'human-placenta-gene-1-2.csv'
with open(path) as data_file:
data = data_file.read()
# Strip header.
line_data, header_data = [], []
for index, line in enumerate(data.split('\r')):
if index <= 2:
header_data.append(line)
if index > 2:
line_data.append(line)
# Get the embryonic / trimester data (e8.5, e10.5, etc).
embryonic_data = header_data[1].split(',')
embryonic_numeric_data = []
if 'murine' in path:
for d in embryonic_data:
if not d:
continue
if d == 'P0':
number = 20
else:
number = float(d.split('e')[1])
embryonic_numeric_data.append(number)
else:
for d in embryonic_data:
if not d:
continue
if d == 'Pregnancy stage':
continue
if d == 'First trimester':
number = 1
elif d == 'Third trimester':
number = 3
embryonic_numeric_data.append(number)
# Group the probe data by gene -- keyed by gene, contents are an array of rows.
gene_data = {}
for line in line_data:
tokens = line.split(',')
if not tokens or not tokens[0]:
continue
gene = tokens[1]
if gene not in gene_data:
gene_data[gene] = []
gene_data[gene].append(tokens)
# Find the greatest y value.
max_expression = 0
for gene in gene_data.keys():
for tokens in gene_data[gene]:
data = [float(v) for v in tokens[2:]]
if max(data) > max_expression:
max_expression = max(data)
# Make a plot for each gene.
if 'murine' in path:
out_dir = 'mouse-plots'
else:
out_dir = 'human-plots'
for gene in gene_data.keys():
figure = plt.figure()
for tokens in gene_data[gene]:
probe = tokens[0]
data = tokens[2:]
plt.plot(embryonic_numeric_data, data, 'o', label=probe)
plt.title(gene)
axes = plt.gca()
if 'murine' in path:
axes.tick_params(axis='x', pad=15)
plt.xticks(list(set(embryonic_numeric_data)))
axes.set_xlim([8, 21])
plt.xlabel('gestation day')
plt.ylabel('genechip expression level')
else:
axes.set_xlim([0., 4])
plt.xlabel('trimester')
plt.ylabel('beadchip expression level')
box = axes.get_position()
axes.set_position([box.x0, box.y0, box.width * 0.75, box.height])
axes.legend(loc='center left', bbox_to_anchor=(1, 0.5))
axes.set_ylim([0., 1.1 * max_expression])
matplotlib.rcParams.update({'font.size': 8})
figure_path = os.path.join(out_dir, '%s.png' % gene)
pylab.savefig(figure_path)
plt.close(figure)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment