from __future__ import division, print_function
import numpy as np
np.random.seed(1)
import scipy.stats
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
import anhima
# # dev imports
# sys.path.insert(0, '../src')
# %reload_ext autoreload
# %autoreload 1
# %aimport anhima.sim
# %aimport anhima.gt
# %aimport anhima.af
# simulate genotypes
n_variants = 1000
n_samples = 1000
ploidy = 2
af_dist = scipy.stats.beta(a=.4, b=.6)
p_missing = .1
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy)
anhima.af.count_variant(genotypes)
962
anhima.af.count_non_variant(genotypes)
38
n_seg = anhima.af.count_segregating(genotypes)
n_seg
958
n_non_seg = anhima.af.count_non_segregating(genotypes)
n_non_seg
42
assert n_seg + n_non_seg == n_variants
n_non_seg_ref = anhima.af.count_non_segregating(genotypes, allele=0)
n_non_seg_ref
38
n_non_seg_alt = anhima.af.count_non_segregating(genotypes, allele=1)
n_non_seg_alt
4
assert n_non_seg == n_non_seg_ref + n_non_seg_alt
anhima.af.count_singletons(genotypes)
12
anhima.af.count_doubletons(genotypes)
9
an, ac, af = anhima.af.allele_frequencies(genotypes)
# plot actual distribution of alt allele frequencies
plt.hist(af[:, 1], bins=20);