ls /data
10.fq.genes.results 20.fq.isoforms.results 31.fq.genes.results
10.fq.isoforms.results 21.fq.genes.results 31.fq.isoforms.results
11.fq.genes.results 21.fq.isoforms.results 32.fq.genes.results
11.fq.isoforms.results 22.fq.genes.results 32.fq.isoforms.results
12.fq.genes.results 22.fq.isoforms.results 33.fq.genes.results
12.fq.isoforms.results 23.fq.genes.results 33.fq.isoforms.results
13.fq.genes.results 23.fq.isoforms.results 3.fq.genes.results
13.fq.isoforms.results 24.fq.genes.results 3.fq.isoforms.results
14.fq.genes.results 24.fq.isoforms.results 4.fq.genes.results
14.fq.isoforms.results 25.fq.genes.results 4.fq.isoforms.results
15.fq.genes.results 25.fq.isoforms.results 5.fq.genes.results
15.fq.isoforms.results 26.fq.genes.results 5.fq.isoforms.results
16.fq.genes.results 26.fq.isoforms.results 6.fq.genes.results
16.fq.isoforms.results 27.fq.genes.results 6.fq.isoforms.results
17.fq.genes.results 27.fq.isoforms.results 7.fq.genes.results
17.fq.isoforms.results 28.fq.genes.results 7.fq.isoforms.results
18.fq.genes.results 28.fq.isoforms.results 8.fq.genes.results
18.fq.isoforms.results 29.fq.genes.results 8.fq.isoforms.results
19.fq.genes.results 29.fq.isoforms.results 9.fq.genes.results
19.fq.isoforms.results 2.fq.genes.results 9.fq.isoforms.results
1.fq.genes.results 2.fq.isoforms.results lost+found/
1.fq.isoforms.results 30.fq.genes.results nematostella.fa.gz
20.fq.genes.results 30.fq.isoforms.results
import csv
def load_results(filename):
r = csv.DictReader(open(filename, 'rb'), delimiter='\t')
results = {}
for row in r:
gene = row['gene_id']
results[gene] = row
return results
sample1 = load_results('/data/1.fq.genes.results') # these are replicates
sample2 = load_results('/data/2.fq.genes.results')
sample1v2 = []
for k in sample1:
s1 = float(sample1[k]['FPKM'])
s2 = float(sample2[k]['FPKM'])
if s1 == 0 or s2 == 0:
continue
else:
sample1v2.append((s1, s2))
sample1v2 = numpy.array(sample1v2)
ax = plot(sample1v2[:,0], sample1v2[:,1], 'bo', alpha=0.1)
ax = axes()
ax.set_yscale('log')
ax.set_xscale('log')
r = numpy.corrcoef(sample1v2[:,0], sample1v2[:,1])[0,1]
r**2
0.9999227579572888
ratio = []
for k in sample1:
s1 = float(sample1[k]['FPKM'])
s2 = float(sample2[k]['FPKM'])
if s1 == 0 or s2 == 0:
continue
else:
ratio.append(math.log(s1 / s2, 2))
hist(ratio, bins=100, range=(-2,2))
(array([ 6, 4, 5, 6, 25, 5, 4, 2, 5, 129, 27, 12, 10, 8, 23, 13, 67, 12, 23, 12, 22, 26, 19, 28, 360, 41, 39, 39, 64, 69, 58, 81, 77, 74, 222, 103, 115, 144, 115, 233, 205, 237, 290, 285, 328, 437, 510, 545, 702, 1744, 746, 644, 587, 515, 401, 359, 305, 290, 212, 248, 146, 130, 123, 105, 243, 92, 88, 118, 59, 83, 59, 42, 47, 30, 402, 51, 30, 28, 26, 21, 32, 8, 67, 6, 15, 17, 14, 7, 41, 134, 5, 6, 8, 6, 10, 8, 7, 3, 2, 63]), array([-2. , -1.96, -1.92, -1.88, -1.84, -1.8 , -1.76, -1.72, -1.68, -1.64, -1.6 , -1.56, -1.52, -1.48, -1.44, -1.4 , -1.36, -1.32, -1.28, -1.24, -1.2 , -1.16, -1.12, -1.08, -1.04, -1. , -0.96, -0.92, -0.88, -0.84, -0.8 , -0.76, -0.72, -0.68, -0.64, -0.6 , -0.56, -0.52, -0.48, -0.44, -0.4 , -0.36, -0.32, -0.28, -0.24, -0.2 , -0.16, -0.12, -0.08, -0.04, 0. , 0.04, 0.08, 0.12, 0.16, 0.2 , 0.24, 0.28, 0.32, 0.36, 0.4 , 0.44, 0.48, 0.52, 0.56, 0.6 , 0.64, 0.68, 0.72, 0.76, 0.8 , 0.84, 0.88, 0.92, 0.96, 1. , 1.04, 1.08, 1.12, 1.16, 1.2 , 1.24, 1.28, 1.32, 1.36, 1.4 , 1.44, 1.48, 1.52, 1.56, 1.6 , 1.64, 1.68, 1.72, 1.76, 1.8 , 1.84, 1.88, 1.92, 1.96, 2. ]), <a list of 100 Patch objects>)
x = [1,2,3,4]
y = [3,5,7,10] # 10, not 9, so the fit isn't perfect
fit = polyfit(x,y,1)
print fit
fit_fn = poly1d(fit) # fit_fn is now a function which takes in x and returns an estimate for y
plot(x,y, 'yo', x, fit_fn(x), '--k')
xlim(0, 5)
ylim(0, 12)
[ 2.3 0.5]
(0, 12)
log_1v2 = numpy.log(sample1v2)
plot(log_1v2[:,0], log_1v2[:,1], '.', alpha=0.1)
fit = polyfit(log_1v2[:,0], log_1v2[:,1], 1)
fit_fn = poly1d(fit)
plot(log_1v2[:,0], fit_fn(log_1v2[:,0]), 'r-')
[<matplotlib.lines.Line2D at 0xa52df90>]
sample1 = load_results('/data/1.fq.genes.results')
sample7 = load_results('/data/7.fq.genes.results') # this is a different time point!
sample1v7 = []
for k in sample1:
s1 = float(sample1[k]['FPKM'])
s2 = float(sample7[k]['FPKM'])
if s1 == 0 or s2 == 0:
continue
else:
sample1v7.append((s1, s2))
sample1v7 = numpy.array(sample1v7)
ax = plot(sample1v7[:,0], sample1v7[:,1], 'go', alpha=0.1)
ax = axes()
ax.set_yscale('log')
ax.set_xscale('log')
xlabel("sample 1 expression")
ylabel("sample 2 expression")
<matplotlib.text.Text at 0x16d19f10>
ratio2 = []
for k in sample1:
s1 = float(sample1[k]['FPKM'])
s2 = float(sample7[k]['FPKM'])
if s1 == 0 or s2 == 0:
continue
else:
ratio2.append(math.log(s1 / s2, 2))
hist(ratio2, bins=100)
(array([ 2, 0, 1, 0, 1, 4, 0, 6, 3, 2, 4, 6, 5, 11, 13, 22, 25, 64, 49, 75, 51, 141, 105, 134, 282, 224, 294, 321, 608, 431, 545, 618, 614, 648, 720, 1350, 682, 697, 707, 653, 526, 464, 654, 306, 260, 238, 128, 249, 104, 91, 128, 48, 70, 36, 59, 36, 20, 25, 20, 11, 7, 6, 8, 5, 0, 3, 0, 2, 0, 0, 0, 2, 2, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), array([-4.93073734, -4.79119215, -4.65164695, -4.51210176, -4.37255657, -4.23301138, -4.09346618, -3.95392099, -3.8143758 , -3.67483061, -3.53528542, -3.39574022, -3.25619503, -3.11664984, -2.97710465, -2.83755945, -2.69801426, -2.55846907, -2.41892388, -2.27937868, -2.13983349, -2.0002883 , -1.86074311, -1.72119792, -1.58165272, -1.44210753, -1.30256234, -1.16301715, -1.02347195, -0.88392676, -0.74438157, -0.60483638, -0.46529119, -0.32574599, -0.1862008 , -0.04665561, 0.09288958, 0.23243478, 0.37197997, 0.51152516, 0.65107035, 0.79061554, 0.93016074, 1.06970593, 1.20925112, 1.34879631, 1.48834151, 1.6278867 , 1.76743189, 1.90697708, 2.04652227, 2.18606747, 2.32561266, 2.46515785, 2.60470304, 2.74424824, 2.88379343, 3.02333862, 3.16288381, 3.302429 , 3.4419742 , 3.58151939, 3.72106458, 3.86060977, 4.00015497, 4.13970016, 4.27924535, 4.41879054, 4.55833574, 4.69788093, 4.83742612, 4.97697131, 5.1165165 , 5.2560617 , 5.39560689, 5.53515208, 5.67469727, 5.81424247, 5.95378766, 6.09333285, 6.23287804, 6.37242323, 6.51196843, 6.65151362, 6.79105881, 6.930604 , 7.0701492 , 7.20969439, 7.34923958, 7.48878477, 7.62832996, 7.76787516, 7.90742035, 8.04696554, 8.18651073, 8.32605593, 8.46560112, 8.60514631, 8.7446915 , 8.88423669, 9.02378189]), <a list of 100 Patch objects>)