from __future__ import division, print_function
import numpy as np
np.random.seed(42)
import scipy.stats
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
# import anhima
# # dev imports
sys.path.insert(0, '..')
%reload_ext autoreload
%autoreload 1
%aimport anhima.sim
%aimport anhima.gt
%aimport anhima.af
%aimport anhima.f2
# simulate genotypes
n_variants = 10000
n_samples = 100
ploidy = 2
af_dist = scipy.stats.beta(a=.1, b=6)
p_missing = 0
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy)
# simulate three sub-populations with relatedness
pop1_genotypes = anhima.sim.simulate_relatedness(genotypes[:, :n_samples//3], relatedness=.5, n_iter=100)
pop23_genotypes = anhima.sim.simulate_relatedness(genotypes[:, n_samples//3:], relatedness=.5, n_iter=100)
pop2_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, n_samples//3:], relatedness=.5, n_iter=20)
pop3_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, :n_samples//3], relatedness=.5, n_iter=20)
pop1_ac = anhima.af.allele_count(pop1_genotypes, allele=1)
pop2_ac = anhima.af.allele_count(pop2_genotypes, allele=1)
pop3_ac = anhima.af.allele_count(pop3_genotypes, allele=1)
subpops_ac = np.column_stack([pop1_ac, pop2_ac, pop3_ac])
# raw counts of shared doubletons
counts = anhima.f2.count_shared_doubletons(subpops_ac)
counts
array([[53, 14, 13], [14, 59, 79], [13, 79, 44]])
subpop_labels = ['pop1', 'pop2', 'pop3']
anhima.f2.plot_shared_doubletons(counts,
subpop_labels=subpop_labels);
anhima.f2.plot_shared_doubletons(counts,
subpop_labels=subpop_labels,
flip=True);
anhima.f2.plot_shared_doubletons(counts,
subpop_labels=subpop_labels,
relative=True);
n_samples = [pop1_genotypes.shape[1], pop2_genotypes.shape[1], pop3_genotypes.shape[1]]
n_samples
[33, 34, 33]
# normalise doubleton counts by the number of distinct pairs of haplotypes in each comparison
counts_normed = anhima.f2.normalise_doubleton_counts(counts, n_samples=n_samples, ploidy=2)
counts_normed
array([[ 0.02470862, 0.00311943, 0.00298439], [ 0.00311943, 0.02589991, 0.0176025 ], [ 0.00298439, 0.0176025 , 0.02051282]])
anhima.f2.plot_shared_doubletons(counts_normed,
subpop_labels=subpop_labels);
anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels);
anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels, orientation='horizontal');
anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5);
anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5, relative=True);
anhima.f2.plot_f2_fig(counts,
subpop_labels=subpop_labels,
figsize_factor=1.5,
relative=True,
normed=True,
n_samples=n_samples,
ploidy=2);