import numpy as np np.random.seed(42) import scipy.stats import itertools import random random.seed(42) import matplotlib.pyplot as plt %matplotlib inline import sys # import anhima # dev imports sys.path.insert(0, '..') %reload_ext autoreload %autoreload 1 %aimport anhima.sim %aimport anhima.gt %aimport anhima.mds %aimport anhima.dist # simulate genotypes with no relatedness n_variants = 1000 n_samples = 100 labels = [''.join(s) for s in itertools.product('ABCDEFGHIJ', repeat=2)] ploidy = 2 af_dist = scipy.stats.beta(a=.4, b=.6) p_missing = 0 genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy) gn = anhima.gt.as_n_alt(genotypes) # simulate three sub-populations with relatedness genotypes_subpop1 = anhima.sim.simulate_relatedness(genotypes[:, :n_samples//2], relatedness=.5, n_iter=1000) genotypes_subpop2 = anhima.sim.simulate_relatedness(genotypes[:, n_samples//2:], relatedness=.5, n_iter=1000) genotypes_subpop2a = anhima.sim.simulate_relatedness(genotypes_subpop2[:, n_samples//4:], relatedness=.5, n_iter=500) genotypes_subpop2b = anhima.sim.simulate_relatedness(genotypes_subpop2[:, :n_samples//4], relatedness=.5, n_iter=500) genotypes_related = np.concatenate([genotypes_subpop1, genotypes_subpop2a, genotypes_subpop2b], axis=1) gnr = anhima.gt.as_n_alt(genotypes_related) sample_colors = (['red']*50) + (['blue']*25) + (['green']*25) # compute pairwise distance _, s = anhima.dist.pairwise_distance(gn) _, sr = anhima.dist.pairwise_distance(gnr) # fit MDS t = anhima.mds.smacof(s, random_state=42) tr = anhima.mds.smacof(sr, random_state=42) anhima.mds.plot_coords(t, labels=labels, colors=sample_colors, sizes=40); anhima.mds.plot_coords(tr, colors=sample_colors, sizes=40); tc = anhima.mds.classical(s) tcr = anhima.mds.classical(sr) anhima.mds.plot_coords(tc, labels=labels, colors=sample_colors, sizes=40); anhima.mds.plot_coords(tcr, colors=sample_colors, sizes=40);