from __future__ import print_function
import numpy as np
np.random.seed(1)
import scipy.stats
import sys
import cProfile
%reload_ext memory_profiler
# dev imports
sys.path.insert(0, '../src')
%reload_ext autoreload
%autoreload 1
%aimport anhima.sim
%aimport anhima.gt
%aimport anhima.ped
# simulate random biallelic genotypes in a cross with 2 parents and 10 progeny
n_samples = 12
n_variants = 10**6
ploidy = 2
af_dist = scipy.stats.beta(a=.9, b=.1)
p_missing = .1
# progeny totally unrelated to parents, should give plenty of mendel errors
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples,
af_dist=af_dist,
p_missing=p_missing,
ploidy=ploidy)
parents = genotypes[:, :2, :]
progeny = genotypes[:, 2:, :]
me = anhima.ped.diploid_mendelian_error(parents, progeny)
me
array([[0, 1, 0, ..., 0, 0, 0], [0, 1, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [1, 1, 1, ..., 0, 1, 1], [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
%timeit anhima.ped.diploid_mendelian_error(parents, progeny)
1 loops, best of 3: 447 ms per loop
%memit anhima.ped.diploid_mendelian_error(parents, progeny)
peak memory: 193.02 MiB, increment: 31.57 MiB
cProfile.run('anhima.ped.diploid_mendelian_error(parents, progeny)', sort='time')
146 function calls in 0.449 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 5 0.247 0.049 0.247 0.049 necompiler.py:662(evaluate) 2 0.115 0.058 0.216 0.108 gt.py:704(as_012) 2 0.041 0.021 0.041 0.021 {method 'reduce' of 'numpy.ufunc' objects} 2 0.034 0.017 0.034 0.017 gt.py:300(is_het) 1 0.010 0.010 0.010 0.010 {method 'astype' of 'numpy.ndarray' objects} 2 0.001 0.001 0.001 0.001 {method 'fill' of 'numpy.ndarray' objects} 1 0.001 0.001 0.449 0.449 ped.py:123(diploid_mendelian_error) 5 0.000 0.000 0.000 0.000 necompiler.py:462(getContext) 21 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array} 2 0.000 0.000 0.036 0.018 gt.py:486(is_hom_alt) 2 0.000 0.000 0.029 0.015 gt.py:395(is_hom_ref) 21 0.000 0.000 0.000 0.000 numeric.py:392(asarray) 5 0.000 0.000 0.000 0.000 {sorted} 2 0.000 0.000 0.041 0.021 fromnumeric.py:2048(amax) 11 0.000 0.000 0.000 0.000 necompiler.py:611(getType) 1 0.000 0.000 0.449 0.449 <string>:1(<module>) 2 0.000 0.000 0.041 0.021 _methods.py:15(_amax) 2 0.000 0.000 0.000 0.000 {numpy.core.multiarray.empty} 5 0.000 0.000 0.000 0.000 {isinstance} 10 0.000 0.000 0.000 0.000 {sys._getframe} 5 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects} 5 0.000 0.000 0.000 0.000 {zip} 5 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects} 5 0.000 0.000 0.000 0.000 {method 'copy' of 'dict' objects} 10 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects} 11 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects} 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
me2 = anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)
me2
array([[0, 1, 0, ..., 0, 0, 0], [0, 1, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [1, 1, 1, ..., 0, 1, 1], [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
np.array_equal(me, me2)
True
%timeit anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)
1 loops, best of 3: 1.62 s per loop
%memit anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)
peak memory: 419.62 MiB, increment: 120.59 MiB
cProfile.run('anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)', sort='time')
97 function calls in 1.596 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 12 0.982 0.082 0.982 0.082 {method 'reduce' of 'numpy.ufunc' objects} 1 0.489 0.489 1.596 1.596 ped.py:251(diploid_mendelian_error_multiallelic) 2 0.092 0.046 0.402 0.201 gt.py:763(as_allele_counts) 2 0.022 0.011 0.022 0.011 {method 'astype' of 'numpy.ndarray' objects} 1 0.008 0.008 0.008 0.008 necompiler.py:662(evaluate) 2 0.003 0.001 0.003 0.001 {numpy.core.multiarray.zeros} 11 0.000 0.000 0.000 0.000 {numpy.core.multiarray.array} 5 0.000 0.000 0.440 0.088 fromnumeric.py:1621(sum) 6 0.000 0.000 0.590 0.098 _methods.py:23(_sum) 1 0.000 0.000 0.008 0.008 gt.py:117(is_missing) 1 0.000 0.000 0.000 0.000 necompiler.py:462(getContext) 6 0.000 0.000 0.000 0.000 {isinstance} 2 0.000 0.000 0.323 0.161 fromnumeric.py:1842(all) 2 0.000 0.000 0.031 0.015 fromnumeric.py:1762(any) 4 0.000 0.000 0.000 0.000 numeric.py:462(asanyarray) 7 0.000 0.000 0.000 0.000 numeric.py:392(asarray) 1 0.000 0.000 1.596 1.596 <string>:1(<module>) 2 0.000 0.000 0.030 0.015 _methods.py:31(_any) 2 0.000 0.000 0.040 0.020 _methods.py:15(_amax) 2 0.000 0.000 0.040 0.020 fromnumeric.py:2048(amax) 2 0.000 0.000 0.323 0.161 _methods.py:35(_all) 2 0.000 0.000 0.323 0.161 {method 'all' of 'numpy.ndarray' objects} 2 0.000 0.000 0.031 0.015 {method 'any' of 'numpy.ndarray' objects} 1 0.000 0.000 0.149 0.149 {method 'sum' of 'numpy.ndarray' objects} 1 0.000 0.000 0.000 0.000 {sorted} 1 0.000 0.000 0.000 0.000 {max} 2 0.000 0.000 0.000 0.000 necompiler.py:611(getType) 1 0.000 0.000 0.000 0.000 {range} 1 0.000 0.000 0.000 0.000 {zip} 2 0.000 0.000 0.000 0.000 {sys._getframe} 1 0.000 0.000 0.000 0.000 {method 'get' of 'dict' objects} 1 0.000 0.000 0.000 0.000 {method 'items' of 'dict' objects} 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 2 0.000 0.000 0.000 0.000 {len} 1 0.000 0.000 0.000 0.000 {method 'copy' of 'dict' objects} 2 0.000 0.000 0.000 0.000 {method 'pop' of 'dict' objects} 2 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}