from __future__ import print_function, division import sys import numpy as np import random # import anhima # dev imports sys.path.insert(0, '..') %reload_ext autoreload %autoreload 1 %aimport anhima.util n_variants = 100000 n_samples = 1000 genotypes = np.random.randint(0, 2, size=(n_variants, n_samples, 2)).astype('i1') genotypes.shape, genotypes.nbytes / 1e6 gn = anhima.gt.as_n_alt(genotypes) gn2 = anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, block_size=1000) assert np.array_equal(gn, gn2) ac = anhima.af.allele_counts(genotypes) ac2 = anhima.util.block_apply(anhima.af.allele_counts, genotypes, block_size=1000) assert np.array_equal(ac, ac2) packed = anhima.gt.pack_diploid(genotypes) packed2 = anhima.util.block_apply(anhima.gt.pack_diploid, genotypes, block_size=1000) assert np.array_equal(packed, packed2) row_indices = sorted(random.sample(range(n_variants), n_variants//10)) col_indices = sorted(random.sample(range(n_samples), n_samples//10)) g = genotypes.take(row_indices, axis=0).take(col_indices, axis=1) g2 = anhima.util.block_take2d(genotypes, row_indices, col_indices, block_size=1000) assert np.array_equal(g, g2)