from __future__ import division, print_function import numpy as np np.random.seed(42) import scipy.stats import random random.seed(1) import matplotlib.pyplot as plt %matplotlib inline import sys # import anhima # # dev imports sys.path.insert(0, '..') %reload_ext autoreload %autoreload 1 %aimport anhima.sim %aimport anhima.gt %aimport anhima.af %aimport anhima.f2 # simulate genotypes n_variants = 10000 n_samples = 100 ploidy = 2 af_dist = scipy.stats.beta(a=.1, b=6) p_missing = 0 genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy) # simulate three sub-populations with relatedness pop1_genotypes = anhima.sim.simulate_relatedness(genotypes[:, :n_samples//3], relatedness=.5, n_iter=100) pop23_genotypes = anhima.sim.simulate_relatedness(genotypes[:, n_samples//3:], relatedness=.5, n_iter=100) pop2_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, n_samples//3:], relatedness=.5, n_iter=20) pop3_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, :n_samples//3], relatedness=.5, n_iter=20) pop1_ac = anhima.af.allele_count(pop1_genotypes, allele=1) pop2_ac = anhima.af.allele_count(pop2_genotypes, allele=1) pop3_ac = anhima.af.allele_count(pop3_genotypes, allele=1) subpops_ac = np.column_stack([pop1_ac, pop2_ac, pop3_ac]) # raw counts of shared doubletons counts = anhima.f2.count_shared_doubletons(subpops_ac) counts subpop_labels = ['pop1', 'pop2', 'pop3'] anhima.f2.plot_shared_doubletons(counts, subpop_labels=subpop_labels); anhima.f2.plot_shared_doubletons(counts, subpop_labels=subpop_labels, flip=True); anhima.f2.plot_shared_doubletons(counts, subpop_labels=subpop_labels, relative=True); n_samples = [pop1_genotypes.shape[1], pop2_genotypes.shape[1], pop3_genotypes.shape[1]] n_samples # normalise doubleton counts by the number of distinct pairs of haplotypes in each comparison counts_normed = anhima.f2.normalise_doubleton_counts(counts, n_samples=n_samples, ploidy=2) counts_normed anhima.f2.plot_shared_doubletons(counts_normed, subpop_labels=subpop_labels); anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels); anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels, orientation='horizontal'); anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5); anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5, relative=True); anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5, relative=True, normed=True, n_samples=n_samples, ploidy=2);