In [1]:

#from collections import defaultdict

import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

from Bio.PopGen.GenePop import Controller as gpc
%matplotlib inline

In [2]:

my_pops = [l.rstrip() for l in open('hapmap10_auto_noofs_2.pops')]
num_pops = len(my_pops)

In [3]:

ctrl = gpc.GenePopController()

In [4]:

(multi_fis, multi_fst, multi_fit), f_iter = ctrl.calc_fst_all('hapmap10_auto_noofs_2.gp')

In [5]:

print(multi_fis, multi_fst, multi_fit)

(0.0004, 0.1078, 0.1082)

In [6]:

fst_vals = []
fis_vals = []
fit_vals = []
for f_case in f_iter:
    name, fis, fst, fit, qinter, qintra = f_case
    fst_vals.append(fst)
    fis_vals.append(fis)
    fit_vals.append(fit)

In [7]:

sns.set_style("whitegrid")
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(1, 1, 1)
ax.hist(fst_vals, 50, color='r')
ax.set_title('FST, FIS and FIS distributions')
ax.set_xlabel('FST')
ax = fig.add_subplot(2, 2, 2)
sns.violinplot([fis_vals, fit_vals], ax=ax, vert=False)
ax.set_yticklabels(['FIS', 'FIT'])
ax.set_xlim(-.1, 0.4)

Out[7]:

(-0.1, 0.4)

In [8]:

fpair_iter, avg = ctrl.calc_fst_pair('hapmap10_auto_noofs_2.gp')

In [9]:

min_pair = min(avg.values())
max_pair = max(avg.values())
arr = np.ones((num_pops - 1, num_pops - 1, 3), dtype=float)

In [10]:

sns.set_style("white")
fig = plt.figure(figsize=(16,9))
ax = fig.add_subplot(111)
for row in range(num_pops - 1):
    for col in range(row + 1, num_pops):
        val = avg[(col, row)]
        norm_val = (val - min_pair) / (max_pair - min_pair)
        ax.text(col - 1, row, '%.3f' % val, ha='center')
        if norm_val == 0.0:
            arr[row, col - 1, 0] = 1
            arr[row, col - 1, 1] = 1
            arr[row, col - 1, 2] = 0
        elif norm_val == 1.0:
            arr[row, col - 1, 0] = 1
            arr[row, col - 1, 1] = 0
            arr[row, col - 1, 2] = 1
        else:
            arr[row, col - 1, 0] = 1 - norm_val
            arr[row, col - 1, 1] = 1
            arr[row, col - 1, 2] = 1
ax.imshow(arr, interpolation='none')
ax.set_title('Multilocus Pairwise FST')
ax.set_xticks(range(num_pops - 1))
ax.set_xticklabels(my_pops[1:])
ax.set_yticks(range(num_pops - 1))
ax.set_yticklabels(my_pops[:-1])

Out[10]:

[<matplotlib.text.Text at 0x7f43bbff81d0>,
 <matplotlib.text.Text at 0x7f43bc33d1d0>,
 <matplotlib.text.Text at 0x7f43bc24ec10>,
 <matplotlib.text.Text at 0x7f43bc255150>,
 <matplotlib.text.Text at 0x7f43bc255850>,
 <matplotlib.text.Text at 0x7f43bc23af10>,
 <matplotlib.text.Text at 0x7f43bc255cd0>,
 <matplotlib.text.Text at 0x7f43bc25e450>,
 <matplotlib.text.Text at 0x7f43bc25eb90>,
 <matplotlib.text.Text at 0x7f43bc268310>]

In [11]:

pop_ceu = my_pops.index('CEU')
pop_yri = my_pops.index('YRI')
start_pos = 136261886  # b36
end_pos = 136350481
all_fsts = []
inside_fsts = []
for locus_pfst in fpair_iter:
    name = locus_pfst[0]
    pfst = locus_pfst[1]
    pos = int(name.split('/')[-1])  # dependent
    my_fst = pfst[(pop_yri, pop_ceu)]
    if my_fst == '-':  # Can be this
        continue
    all_fsts.append(my_fst)
    if pos >= start_pos and pos <= end_pos:
        inside_fsts.append(my_fst)

In [12]:

print(inside_fsts)
print('%.2f/%.2f/%.2f' % (np.median(all_fsts), np.mean(all_fsts), np.percentile(all_fsts, 90)))

[0.1346, 0.6703, 0.7317, 0.1432, 0.2485]
0.08/0.13/0.33

In [13]:

100 * (end_pos - start_pos) / 242000000.0

Out[13]:

0.03660950413223141

In [ ]: