import numpy as np np.random.seed(42) import random random.seed(42) import matplotlib.pyplot as plt %matplotlib inline import sys # import anhima # dev imports sys.path.insert(0, '..') %load_ext autoreload %autoreload 1 %aimport anhima.sim %aimport anhima.ld %aimport anhima.loc n_variants = 1000 n_samples = 100 correlation = .97 g = anhima.sim.simulate_genotypes_with_ld(n_variants, n_samples, correlation) g pos = np.random.randint(low=0, high=n_variants*100, size=n_variants) pos.sort() r_squared = anhima.ld.pairwise_genotype_ld(g) r_squared anhima.ld.plot_pairwise_ld(r_squared); # zoom in to first 100 variants anhima.ld.plot_pairwise_ld(r_squared[:100, :100]); cor, sep, dist = anhima.ld.pairwise_ld_decay(r_squared, pos) cor, sep, dist plt.hist(cor[sep == 1]); plt.hist(cor[(dist > 0) & (dist < 200)]); np.sqrt(np.mean(cor[sep == 1])) anhima.ld.plot_ld_decay_by_separation(cor, sep); anhima.ld.plot_ld_decay_by_distance(cor, dist, bins=np.arange(0, 10000, 200)); anhima.ld.plot_windowed_ld(g, pos, window_size=10000); anhima.ld.plot_windowed_ld(g, pos, window_size=1000); included = anhima.ld.ld_prune_pairwise(g) np.nonzero(included)[0] g_pruned = np.compress(included, g, axis=0) g_pruned r_squared_pruned = anhima.ld.pairwise_genotype_ld(g_pruned) anhima.ld.plot_pairwise_ld(r_squared_pruned); pos_pruned = pos[included] cor_pruned, sep_pruned, dist_pruned = anhima.ld.pairwise_ld_decay(r_squared_pruned, pos_pruned) anhima.ld.plot_ld_decay_by_separation(cor_pruned, sep_pruned); anhima.ld.plot_ld_decay_by_distance(cor_pruned, dist_pruned, bins=np.arange(0, 10000, 400)); n_variants_big = 100000 g_big = anhima.sim.simulate_genotypes_with_ld(n_variants_big, n_samples, correlation=.97) pos_big = np.random.randint(low=0, high=n_variants_big*100, size=n_variants_big) pos_big.sort() included_big = anhima.ld.ld_prune_pairwise(g_big) np.count_nonzero(included_big) g_big_pruned = np.compress(included_big, g_big, axis=0) r_squared_big_pruned = anhima.ld.pairwise_genotype_ld(g_big_pruned) anhima.ld.plot_pairwise_ld(r_squared_big_pruned[:100, :100]); cor_big, sep_big, dist_big = anhima.ld.windowed_ld_decay(g_big, pos_big, window_size=1000, step=10) anhima.ld.plot_ld_decay_by_separation(cor_big, sep_big, max_separation=100); anhima.ld.plot_ld_decay_by_distance(cor_big, dist_big, bins=np.arange(0, 10000, 200)); anhima.ld.plot_windowed_ld(g_big, pos_big, window_size=100000); anhima.ld.plot_windowed_ld(g_big, pos_big, window_size=10000);