from __future__ import division, print_function import numpy as np np.random.seed(1) import scipy.stats import random random.seed(1) import matplotlib.pyplot as plt %matplotlib inline import sys # import anhima # # dev imports sys.path.insert(0, '..') %reload_ext autoreload %autoreload 1 %aimport anhima.sim %aimport anhima.gt %aimport anhima.af %aimport anhima.sf n_samples = 100 ploidy = 2 m = n_samples * ploidy # simulated allele counts for population 1 - constant size pop1_derived_af = scipy.stats.reciprocal.rvs(a=.001, b=1, size=1000000) pop1_derived_ac = np.round(pop1_derived_af * m).astype('i4') pop1_biallelic_ac = np.column_stack([m - pop1_derived_ac, pop1_derived_ac]) # simulated allele counts for population 2 - deficit of rare variants (population bottleneck) pop2_derived_af = scipy.stats.beta.rvs(a=.2, b=1.1, size=1000000) pop2_derived_ac = np.round(pop2_derived_af * m).astype('i4') pop2_biallelic_ac = np.column_stack([m - pop2_derived_ac, pop2_derived_ac]) # simulated allele counts for population 3 - excess of rare variants pop3_derived_af = np.append(pop1_derived_af, scipy.stats.beta.rvs(a=.1, b=6, size=1000000)) pop3_derived_ac = np.round(pop3_derived_af * m).astype('i4') pop3_biallelic_ac = np.column_stack([m - pop3_derived_ac, pop3_derived_ac]) pop1_sfs = anhima.sf.site_frequency_spectrum(pop1_derived_ac) pop2_sfs = anhima.sf.site_frequency_spectrum(pop2_derived_ac) pop3_sfs = anhima.sf.site_frequency_spectrum(pop3_derived_ac) fig, ax = plt.subplots() anhima.sf.plot_site_frequency_spectrum(pop1_sfs, ax=ax, label='pop1', plot_kwargs=dict(lw=2)) anhima.sf.plot_site_frequency_spectrum(pop2_sfs, ax=ax, label='pop2', plot_kwargs=dict(lw=2)) anhima.sf.plot_site_frequency_spectrum(pop3_sfs, ax=ax, label='pop3', plot_kwargs=dict(lw=2)) ax.set_xlabel('derived allele count') ax.legend(); pop1_sfs_scaled = anhima.sf.site_frequency_spectrum_scaled(pop1_derived_ac) pop2_sfs_scaled = anhima.sf.site_frequency_spectrum_scaled(pop2_derived_ac) pop3_sfs_scaled = anhima.sf.site_frequency_spectrum_scaled(pop3_derived_ac) fig, ax = plt.subplots() anhima.sf.plot_site_frequency_spectrum(pop1_sfs_scaled, ax=ax, label='pop1', plot_kwargs=dict(lw=2)) anhima.sf.plot_site_frequency_spectrum(pop2_sfs_scaled, ax=ax, label='pop2', plot_kwargs=dict(lw=2)) anhima.sf.plot_site_frequency_spectrum(pop3_sfs_scaled, ax=ax, label='pop3', plot_kwargs=dict(lw=2)) ax.set_ylim(bottom=0) ax.set_ylabel('scaled site frequency') ax.set_xlabel('derived allele count') ax.legend(bbox_to_anchor=(1, 1), loc='upper left'); pop1_sfs_folded = anhima.sf.site_frequency_spectrum_folded(pop1_biallelic_ac) pop2_sfs_folded = anhima.sf.site_frequency_spectrum_folded(pop2_biallelic_ac) pop3_sfs_folded = anhima.sf.site_frequency_spectrum_folded(pop3_biallelic_ac) fig, ax = plt.subplots() anhima.sf.plot_site_frequency_spectrum(pop1_sfs_folded, ax=ax, label='pop1', plot_kwargs=dict(lw=2)) anhima.sf.plot_site_frequency_spectrum(pop2_sfs_folded, ax=ax, label='pop2', plot_kwargs=dict(lw=2)) anhima.sf.plot_site_frequency_spectrum(pop3_sfs_folded, ax=ax, label='pop3', plot_kwargs=dict(lw=2)) ax.set_xlabel('minor allele count') ax.legend(); pop1_sfs_folded_scaled = anhima.sf.site_frequency_spectrum_folded_scaled(pop1_biallelic_ac) pop2_sfs_folded_scaled = anhima.sf.site_frequency_spectrum_folded_scaled(pop2_biallelic_ac) pop3_sfs_folded_scaled = anhima.sf.site_frequency_spectrum_folded_scaled(pop3_biallelic_ac) fig, ax = plt.subplots() anhima.sf.plot_site_frequency_spectrum(pop1_sfs_folded_scaled, ax=ax, label='pop1', plot_kwargs=dict(lw=2), m=m) anhima.sf.plot_site_frequency_spectrum(pop2_sfs_folded_scaled, ax=ax, label='pop2', plot_kwargs=dict(lw=2), m=m) anhima.sf.plot_site_frequency_spectrum(pop3_sfs_folded_scaled, ax=ax, label='pop3', plot_kwargs=dict(lw=2), m=m) ax.set_ylim(bottom=0) ax.set_ylabel('scaled site frequency') ax.set_xlabel('minor allele frequency') ax.legend(bbox_to_anchor=(1, 1), loc='upper left');