cd d:\workspace\mamba
d:\workspace\mamba
import simulation
reload(simulation);
simulation.mu*=100
simulation.mu
0.3
p,g = simulation.run(debug=True)
Starting simulation with 10 ticks Drift Selection Mutation Update Done
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-12-2caebf423e16> in <module>() ----> 1 p,g = simulation.run(debug=True) d:\workspace\mamba\simulation.py in run(ticks, tick_interval, debug) 35 selection(population, fitness) 36 if debug: print "Mutation" ---> 37 population, genomes = mutation(population, genomes, mutation_rates, num_loci, target_genome) 38 if debug: print "Update" 39 fitness = create_fitness(genomes, target_genome, s) d:\workspace\mamba\model.pyc in mutation_implicit_genomes(population, genomes, mutation_rates, num_loci, target_genome) 101 new_counts[key] += 1 102 else: --> 103 new_genome = genomes[strain,:].copy() 104 new_genome[locus] = new_allele 105 index = find_row(genomes, new_genome) KeyboardInterrupt:
Tick 0 Drift Selection Mutation
import model
reload(model);
p=model.create_mutation_free_population()
p[0]-=100
p[1]+=100
f=model.create_fitness()
print p[-1]
model.selection(p,f)
print p[-1]
model.selection(p,f)
print p[-1]
0 0 0
np.get_include()
'C:\\Python27\\lib\\site-packages\\numpy\\core\\include'
import time
time.clock()
24.026697858339496
t=array(zeros(1000), dtype=np.int)
g=zeros((100,1000), dtype=np.int)
for i in range(100):
g[i, randint(0,999)] = 1
g[1,5] = 1
g.sum(axis=1)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
def ham1(g,t):
a = zeros(100, dtype=np.int)
for i in range(a.shape[0]):
a[i] = (t!=g[i,:]).sum()
return a
g.shape[0]
100L
ham1(g,t)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
%load_ext cythonmagic
%%cython
import numpy as np
cimport numpy as np
cimport cython
@cython.boundscheck(False)
def ham2(np.ndarray[int, ndim=2] g, np.ndarray[int, ndim=1] t):
cdef np.ndarray[int, ndim=1] a
a = np.zeros(g.shape[0], dtype=np.int)
cdef Py_ssize_t i
for i in range(g.shape[0]):
a[i] = (t!=g[i,:]).sum()
return a
def hamming_fitness(genome, target_genome):
return (genome != target_genome).sum()
def ham3(genomes, t):
return np.apply_along_axis(hamming_fitness, 1, g, t)
%timeit -n 10000 ham1(g,t)
%timeit -n 10000 ham2(g,t)
%timeit -n 10000 ham3(g,t)
1000 loops, best of 3: 2.58 ms per loop 1000 loops, best of 3: 2.31 ms per loop 1000 loops, best of 3: 5.11 ms per loop
a=rand(1000)
a[1:990]=0
a.nonzero()
(array([ 0, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], dtype=int64),)
%timeit 2**a
%timeit 2.**a
%timeit np.power(2,a)
100000 loops, best of 3: 18.4 us per loop 100000 loops, best of 3: 17 us per loop 100000 loops, best of 3: 18.2 us per loop
env=np.random.binomial(1,0.2,10)
env
array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
%timeit -n 100000 int(1.3)
%timeit -n 100000 math.floor(1.3)
%timeit -n 100000 np.floor(1.3)
100000 loops, best of 3: 217 ns per loop 100000 loops, best of 3: 145 ns per loop 100000 loops, best of 3: 1.64 us per loop
a = np.random.rand(1000)
%timeit -n 1000 [int(x) for x in a]
%timeit -n 1000 [math.floor(x) for x in a]
%timeit -n 1000 [np.floor(x) for x in a]
%timeit -n 1000 np.floor(a)
1000 loops, best of 3: 298 us per loop 1000 loops, best of 3: 316 us per loop 1000 loops, best of 3: 2.67 ms per loop 1000 loops, best of 3: 12.7 us per loop
type(np.arange(10.0)[0])
numpy.float64
type(xrange(10)[0])
int
import cython_load
import model_c
nums=model_c.genomes_to_nums(g)
type(nums[0])
numpy.float64
import random
import numpy as np
def choose_no_rep_python(n,k):
return random.sample(xrange(n), k)
def choose_no_rep_numpy(n, k):
return np.random.permutation(n)[:k]
def choose_no_rep_numpy_take1(n, k):
return np.random.permutation(n).take(range(k))
def choose_no_rep_numpy_take2(n, k):
return np.random.permutation(n).take(arange(k))
def choose_no_rep_numpy_take3(n, k):
return np.random.permutation(n).take(xrange(k))
%timeit -n 10000 choose_no_rep_python(1000, 4)
%timeit -n 10000 choose_no_rep_numpy(1000, 4)
%timeit -n 10000 choose_no_rep_numpy_take1(1000, 4)
%timeit -n 10000 choose_no_rep_numpy_take2(1000, 4)
%timeit -n 10000 choose_no_rep_numpy_take3(1000, 4)
10000 loops, best of 3: 9.11 us per loop 10000 loops, best of 3: 408 us per loop 10000 loops, best of 3: 424 us per loop 10000 loops, best of 3: 413 us per loop 10000 loops, best of 3: 425 us per loop
ge=g[2,:].copy()
sum(ge)
1
i= choose_no_rep_python(1000,4)
ge[i] = (ge[i] + 1) % 2
sum(ge)
13
def genome_to_num1(genome):
i = array(range(genome.shape[0]))
return (2. ** i * genome).sum()
def genome_to_num2(genome):
i = np.arange(genome.shape[0])
return (2. ** i * genome).sum()
def genome_to_num3(genome):
i = array(xrange(genome.shape[0]))
return (2. ** i * genome).sum()
%timeit -n 1000 genome_to_num1(ge)
%timeit -n 1000 genome_to_num2(ge)
%timeit -n 1000 genome_to_num3(ge)
1000 loops, best of 3: 409 us per loop 1000 loops, best of 3: 172 us per loop 1000 loops, best of 3: 399 us per loop
import model
reload(model);
import cython_load
import model_c
f=model.create_fitness_by_mutational_load()
p=model.create_mutation_free_population()
m=model.create_muation_rates()
model.drift(p)
model.selection(p,f)
model_c.mutation_by_mutation_load(p,m);
p[:5]
array([982566623, 17306161, 126779, 436, 1])
import math
import scipy.stats as sps
import simulation
reload(simulation);
p=simulation.run(10**3,0)
Starting simulation with 1000 ticks Simulation finished, 1000 ticks, time elapsed 0.163826916401 seconds
lam = model.mu/model.s
poi=sps.poisson(lam)
print [exp(-lam) * lam**k / math.factorial(k) for k in range(6)]
print [poi.pmf(k) for k in range(6)]
print [p[k]/float(p.sum()) for k in range(6)]
[0.74081822068171788, 0.22224546620451535, 0.033336819930677303, 0.0033336819930677298, 0.00025002614948007977, 1.5001568968804782e-05] [0.74081822068171788, 0.22224546620451532, 0.033336819930677296, 0.0033336819930677277, 0.00025002614948007934, 1.5001568968804773e-05] [0.74045606799999997, 0.222601676, 0.033354796999999999, 0.003323749, 0.00024877800000000001, 1.4267000000000001e-05]
100 generations are enough to reach an MSB with 10^9 population size and no explicint genomes (just counting mutations). Typical running time: 0.15 seconds
print sum([ i*x for i,x in enumerate(p) if x>0])/float(p.sum())
print lam
print sum([ ((1-model.s)**i)*x for i,x in enumerate(p) if x>0])/float(p.sum())
print e**(-model.mu)
0.300352987 0.3 0.997000962813 0.997004495503
import model
reload(model);
tg = model.create_target_genome(5)
tg
array([0, 0, 0, 0, 0])
g=tg.copy()
g.resize( (1,g.shape[0]) )
print tg.shape, g.shape
(5L,) (1L, 5L)
%timeit -n 100000 np.float(5)
%timeit -n 100000 float(5)
100000 loops, best of 3: 324 ns per loop 100000 loops, best of 3: 280 ns per loop
import simulation
reload(simulation)
import params
reload(params);
import model
reload(model);
p=simulation.run(0)
Starting simulation with 0 ticks Tick 0 Simulation finished, 0 ticks, time elapsed 0.00117361184493 seconds
muts=p*model.create_muation_rates(0.00003, 2)
muts
array([ 29.70537, 0.29463])
muts = np.random.poisson(muts)
muts[0]=5
muts[1]=2
muts.cumsum()
array([5, 7])
loci = np.random.randint(0,5,muts.sum())
loci
array([2, 3, 3, 3, 0, 1, 4])
loci_split = np.split(loci, muts.cumsum())[:-1]
type(loci_split)
list
new_allele = (tg[loci]+1)%2
new_allele
array([0, 1, 1, 1, 1, 0, 1])
g[0,:]
array([0, 0, 0, 0, 0])
d={}
d[(0,0)]=(1,array([1,0,0,0,0]))
d
{(0, 0): (1, array([1, 0, 0, 0, 0]))}
for (strain, locus),(count,genome) in d.items():
print strain, locus, count, genome
0 0 1 [1 0 0 0 0]
d1,d2={},{}
v = [randint(0,100) for _ in range(100)]
for x in v:
d1[x]=str(x)
d2[x]=x
v1= d1.values()
v2= d2.values()
for i in range(len(v1)):
print int(v1[i])==v2[i]
True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True
?np.append
import model
reload(model);
import cython_load
import model_c
import params
reload(params);
import simulation
reload(simulation);
p,g=simulation.run(100,10)
Starting simulation with 100 ticks Tick 0 Tick 10 Tick 20 Tick 30 Tick 40 Tick 50 Tick 60 Tick 70 Tick 80 Tick 90 Tick 100 Simulation finished, 100 ticks, time elapsed 25.0971423397 seconds
p.shape[0]
985L
p;
plot(log(p));
ml = g.sum(1)
hist(ml);
p1=p[0];p2=p.sum()-p1;p1=p1/float(p.sum());p2=p2/float(p.sum())
print p1, p2
0.996954 0.003046
import scipy.stats as scs
poi = scs.poisson(params.mu/params.s)
[poi.pmf(k) for k in range(5)]
[0.74081822068171788, 0.22224546620451532, 0.033336819930677296, 0.0033336819930677277, 0.00025002614948007934]
(ml==2).any()
False
nums=model_c.genomes_to_nums(g)
len(np.unique(nums))
960
len(nums)
985
ge=g[22,:]
len(ge)
1000
ge in g
True
g2=g[:,1]
len(g2)
985
g2 in g
False
np.select?
g
array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]])
ge=g[444,:]
ge.sum()
1
def find_row_index(genomes, genome):
'''looks for genome in the rows of genome, returns the index if found, -1 otherwise'''
for i, row in enumerate(genomes):
if (row==genome).all():
return i
return -1
def find_row_index2(genomes, genome):
'''looks for genome in the rows of genome, returns the index if found, -1 otherwise'''
for i in xrange(genomes.shape[0]):
if (genomes[i]==genome).all():
return i
return -1
%load_ext cythonmagic
%%cython
import numpy as np
cimport numpy as np
cimport cython
from cpython cimport bool
@cython.boundscheck(False)
@cython.wraparound(False)
def find_row_index3(np.ndarray[int, ndim=2] genomes, np.ndarray[int, ndim=1] genome):
cdef np.ndarray[int, ndim=1] row
cdef Py_ssize_t i, j
cdef bool s
for i, row in enumerate(genomes):
s = True
for j in range(genome.shape[0]):
if genome[j]!=row[j]:
s = False
break
if s:
return i
return -1
%%cython
import numpy as np
cimport numpy as np
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
def find_row_index4(np.ndarray[int, ndim=2] genomes, np.ndarray[int, ndim=1] genome):
cdef np.ndarray[int, ndim=1] row
cdef Py_ssize_t i, j
cdef int s
for i, row in enumerate(genomes):
s = 0
for j in range(genome.shape[0]):
if genome[j]==row[j]:
s = s + 1
else:
break
if s == genome.shape[0]:
return i
return -1
%timeit -n 1000 find_row_index(g,ge)
%timeit -n 1000 find_row_index2(g,ge)
%timeit -n 1000 find_row_index3(g,ge)
1000 loops, best of 3: 4.59 ms per loop 1000 loops, best of 3: 4.43 ms per loop 1000 loops, best of 3: 813 us per loop
%timeit -n 5000 find_row_index3(g,ge)
%timeit -n 5000 find_row_index4(g,ge)
5000 loops, best of 3: 803 us per loop 5000 loops, best of 3: 795 us per loop
cd d:\workspace\mamba
d:\workspace\mamba
import model
reload(model);
import cython_load
import model_c
import params
reload(params);
import simulation
reload(simulation);
p,g = simulation.run(1000,10)
Starting simulation with 1000 ticks Tick 0 Tick 10 Tick 20 Tick 30 Tick 40 Tick 50 Tick 60 Tick 70 Tick 80 Tick 90 Tick 100 Tick 110 Tick 120 Tick 130 Tick 140 Tick 150 Tick 160 Tick 170 Tick 180 Tick 190 Tick 200 Tick 210 Tick 220 Tick 230 Tick 240 Tick 250 Tick 260 Tick 270 Tick 280 Tick 290 Tick 300 Tick 310 Tick 320 Tick 330 Tick 340 Tick 350 Tick 360 Tick 370 Tick 380 Tick 390 Tick 400 Tick 410 Tick 420 Tick 430 Tick 440 Tick 450 Tick 460 Tick 470 Tick 480 Tick 490 Tick 500 Tick 510 Tick 520 Tick 530 Tick 540 Tick 550 Tick 560 Tick 570 Tick 580 Tick 590 Tick 600 Tick 610 Tick 620 Tick 630 Tick 640 Tick 650 Tick 660 Tick 670 Tick 680 Tick 690 Tick 700 Tick 710 Tick 720 Tick 730 Tick 740 Tick 750 Tick 760 Tick 770 Tick 780 Tick 790 Tick 800 Tick 810 Tick 820 Tick 830 Tick 840 Tick 850 Tick 860 Tick 870 Tick 880 Tick 890 Tick 900 Tick 910 Tick 920 Tick 930 Tick 940 Tick 950 Tick 960 Tick 970 Tick 980 Tick 990 Tick 1000 Simulation finished, 1000 ticks, time elapsed 3718.33720633 seconds
lm = g.sum(axis=1)
lm.shape
(951L,)
simulation.mu=0.1
print simulation.mu
simulation.pop_size=10**6
print simulation.pop_size
0.1 1000000
cd d:\workspace\mamba
d:\workspace\mamba
%run simulation.py --ticks -1
2012-12-09 16:27:51,273 - mamba - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log 2012-12-09 16:27:51,273 - mamba - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log 2012-12-09 16:27:51,273 - simulation - INFO - Simulation ID: 2012-Dec-09_16-27-51-269000 2012-12-09 16:27:51,273 - simulation - INFO - Simulation ID: 2012-Dec-09_16-27-51-269000 2012-12-09 16:27:51,275 - simulation - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log 2012-12-09 16:27:51,275 - simulation - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log 2012-12-09 16:27:51,276 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': -1, 'tick_interval': 0, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'} 2012-12-09 16:27:51,276 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': -1, 'tick_interval': 0, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'} 2012-12-09 16:27:51,276 - simulation - INFO - Parameters saved to file params\test_2012-Dec-09_16-27-51-269000.py 2012-12-09 16:27:51,276 - simulation - INFO - Parameters saved to file params\test_2012-Dec-09_16-27-51-269000.py 2012-12-09 16:27:51,278 - simulation - INFO - Saving output to output\test_2012-Dec-09_16-27-51-269000.csv 2012-12-09 16:27:51,278 - simulation - INFO - Saving output to output\test_2012-Dec-09_16-27-51-269000.csv 2012-12-09 16:27:51,279 - simulation - INFO - Starting simulation with -1 ticks 2012-12-09 16:27:51,279 - simulation - INFO - Starting simulation with -1 ticks 2012-12-09 16:27:51,279 - simulation - INFO - Simulation finished, 0 ticks, time elapsed 0.001 seconds 2012-12-09 16:27:51,279 - simulation - INFO - Simulation finished, 0 ticks, time elapsed 0.001 seconds 2012-12-09 16:27:51,292 - simulation - INFO - Serialized population to serialization\test_2012-Dec-09_16-27-51-269000.ser 2012-12-09 16:27:51,292 - simulation - INFO - Serialized population to serialization\test_2012-Dec-09_16-27-51-269000.ser
ls serialization
Volume in drive D is Purple24 Volume Serial Number is 12B2-607F Directory of d:\workspace\mamba\serialization 12/09/2012 04:27 PM <DIR> . 12/09/2012 04:27 PM <DIR> .. 12/09/2012 04:24 PM 4,260,575 test_2012-Dec-09_16-24-34-037000.ser 12/09/2012 04:24 PM 4,276,591 test_2012-Dec-09_16-24-45-008000.ser 12/09/2012 04:27 PM 32,350 test_2012-Dec-09_16-27-51-269000.ser 3 File(s) 8,569,516 bytes 2 Dir(s) 115,987,808,256 bytes free
fname = 'serialization/test_2012-Dec-09_16-24-34-037000.ser'
p = deserialize(fname)
2012-12-09 16:28:31,578 - simulation - INFO - Deserialized population from serialization/test_2012-Dec-09_16-24-34-037000.ser 2012-12-09 16:28:31,578 - simulation - INFO - Deserialized population from serialization/test_2012-Dec-09_16-24-34-037000.ser
len(p)
3
p,g,tg=p
ml=g.sum(1)
p[ml>0].sum()/float(p.sum())
0.0030200000000000001
Looks OK
mutation_rates = array([0.003,0.003])
recombination_rates = array([0.0006,0.0006])
total_rates = mutation_rates + recombination_rates
population = array([10**7, 10**5])
events = np.random.poisson(population * total_rates, size=population.shape)
events
array([35702, 359])
events.cumsum()
array([35702, 36061])
events.sum()==events.cumsum()[-1]
True
prob_mu = mutation_rates/(mutation_rates+recombination_rates)
prob_mu
array([ 0.83333333, 0.83333333])
loci = np.random.randint(0, 1000, events.cumsum()[-1])
loci[:10]
array([356, 997, 241, 911, 757, 85, 31, 863, 873, 278])
loci_split = np.split(loci, events.cumsum())[:-1]
len(loci_split[1])
359
mutations=np.random.binomial(events, prob_mu, size=population.shape)
mutations
array([29706, 308])
events
array([35702, 359])
split2=[np.split(x,mutations[i:i+1]) for i,x in enumerate(loci_split)]
[[len(x) for x in y ] for y in split2]
[[29769, 5933], [306, 53]]
mutations, events-mutations
(array([29769, 306]), array([5933, 53]))
len(split2[1][0])
306
split2[0][0]
array([356, 997, 241, ..., 756, 116, 939])
type(split2[0][0])
numpy.ndarray
np.random.binomial([0,10,11], [0.3,0.3,0.4])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-114-1236cfd012b4> in <module>() ----> 1 np.random.binomial([0,10,11], [0.3,0.3,0.4]) C:\Python27\lib\site-packages\numpy\random\mtrand.pyd in mtrand.RandomState.binomial (numpy\random\mtrand\mtrand.c:14633)() ValueError: n <= 0
np.int(array([0,10,11])*array([0.3,0.3,0.4]))
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-120-98b108d60cee> in <module>() ----> 1 np.int(array([0,10,11])*array([0.3,0.3,0.4])) TypeError: only length-1 arrays can be converted to Python scalars
a=np.round(array([0,10,11])*array([0.3,0.3,0.4]))
a
array([ 0., 3., 4.])
b=array(a,dtype=np.int)
b
array([ 0, 0, 0, 1074266112, 0, 1074790400])
array([np.int(x) for x in a])
array([0, 3, 4])
a.dtype
dtype('float64')
np.in64(a)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-138-603e7308b6b6> in <module>() ----> 1 np.in64(a) AttributeError: 'module' object has no attribute 'in64'
import pandas as pd
df=pd.DataFrame({'a':1,'b':3},index=[1])
df.to_csv('tmp.csv')
?df.to_csv
pd.version.version
'0.9.0'
Time to update pandas...
pd.version.version
'0.10.0'
import gzip
f=gzip.open('tmp.csv.gz','wb')
df.to_csv(f)
f.close()
pd.DataFrame.__module__
'pandas.core.frame'
%run simulation.py --ticks=5
2013-01-04 10:42:24,601 - mamba - INFO - Logging to log\test_2013-Jan-04_10-42-24-594000.log 2013-01-04 10:42:24,602 - simulation - INFO - Simulation ID: 2013-Jan-04_10-42-24-594000 2013-01-04 10:42:24,605 - simulation - INFO - Logging to log\test_2013-Jan-04_10-42-24-594000.log 2013-01-04 10:42:24,605 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': 5, 'tick_interval': 0, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'} 2013-01-04 10:42:24,607 - simulation - INFO - Parameters saved to file params\test_2013-Jan-04_10-42-24-594000.py 2013-01-04 10:42:24,608 - simulation - INFO - Saving output to output\test_2013-Jan-04_10-42-24-594000.csv.gz 2013-01-04 10:42:24,608 - simulation - INFO - Starting simulation with 5 ticks 2013-01-04 10:42:25,714 - simulation - INFO - Simulation finished, 5 ticks, time elapsed 1.106 seconds 2013-01-04 10:42:26,369 - simulation - INFO - Serialized population to serialization\test_2013-Jan-04_10-42-24-594000.ser.gz
g.shape
(514L, 1000L)
mods = np.array([0,1,0,1])
print tg.shape, mods.shape
(1000L,) (4L,)
gs = np.concatenate( (tg,mods)).copy()
gs.resize((1,gs.shape[0]))
gs
array([[0, 0, 0, ..., 1, 0, 1]])
gs[0][-4:]
array([0, 1, 0, 1])
gs[,-4:]
File "<ipython-input-55-39f6b9c0b380>", line 1 gs[,-4:] ^ SyntaxError: invalid syntax
gs.shape
(1L, 1004L)
gs[:,-4:]
array([[0, 1, 0, 1]])
gs.nonzero()
(array([0, 0], dtype=int64), array([1001, 1003], dtype=int64))
gs
array([[0, 0, 0, ..., 1, 0, 1]])
gs.nonzero()
(array([0, 0], dtype=int64), array([1001, 1003], dtype=int64))
gs
array([[0, 0, 0, ..., 1, 0, 1]])
non_zero = gs[0,].nonzero()[0]
non_zero=non_zero[non_zero<num_loci]
(2.**non_zero).sum()
0.0
(gs[:,:-4]==gs[:,:num_loci]).all()
True
gs[:,:num_loci].shape, tg.reshape(1, num_loci).shape
((1L, 1000L), (1L, 1000L))
genomes
matrix¶gs2=concatenate((gs,)*100)
gs2.shape
(100L, 1004L)
%timeit gs2.take(range(num_loci),axis=1)
%timeit gs2.take(np.arange(num_loci),axis=1)
%timeit gs2[:, :num_loci]
1000 loops, best of 3: 598 us per loop 1000 loops, best of 3: 322 us per loop 1000000 loops, best of 3: 1.89 us per loop
slicing is the best!
Not functional yet, just added 4 columns to the end of the genomes
matrix and want to make sure that nothing broke.
target_genome = create_target_genome(num_loci)
genomes = np.concatenate((target_genome, np.array([0, 1.0, 0, 1.0])))
genomes.resize( (1, genomes.shape[0]) )
genomes[:, :num_loci].shape, target_genome.reshape(1, num_loci).shape
((1L, 1000L), (1L, 1000L))
from scipy.spatial.distance import cdist, hamming
load = cdist(genomes[:, :num_loci], target_genome.reshape(1, num_loci), 'hamming') * num_loci
((1 - s) ** load).reshape(genomes.shape[0])
array([ 1.])
cd d:\workspace\mamba
d:\workspace\mamba
cd d:\workspace\mamba
d:\workspace\mamba
%run simulation.py
2013-01-04 11:49:43,770 - mamba - INFO - Logging to log\test_2013-Jan-04_11-49-43-767000.log 2013-01-04 11:49:43,772 - simulation - INFO - Simulation ID: 2013-Jan-04_11-49-43-767000 2013-01-04 11:49:43,773 - simulation - INFO - Logging to log\test_2013-Jan-04_11-49-43-767000.log 2013-01-04 11:49:43,773 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': 10, 'tick_interval': 10, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'} 2013-01-04 11:49:43,776 - simulation - INFO - Parameters saved to file params\test_2013-Jan-04_11-49-43-767000.py 2013-01-04 11:49:43,778 - simulation - INFO - Saving output to output\test_2013-Jan-04_11-49-43-767000.csv.gz 2013-01-04 11:49:43,779 - simulation - INFO - Starting simulation with 10 ticks 2013-01-04 11:49:43,859 - simulation - DEBUG - Tick 0 2013-01-04 11:49:46,204 - simulation - DEBUG - Tick 10 2013-01-04 11:49:46,207 - simulation - INFO - Simulation finished, 10 ticks, time elapsed 2.428 seconds 2013-01-04 11:49:47,684 - simulation - INFO - Serialized population to serialization\test_2013-Jan-04_11-49-43-767000.ser.gz
genomes[:, num_loci].shape==p.shape
True
w = create_fitness(g,tg,0.01,num_loci)
pi = g[:, num_loci]
tau = g[:, num_loci+1]
pi = (1-s)**pi
tau[:10]=2
w[:15]
array([ 1. , 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99])
pi[:15]
array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
tau[:15]
array([ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1.])
w<=pi
array([ True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True], dtype=bool)
mus = create_muation_rates(mu, g.shape[0])
mus[w<=pi] *= tau[w<=pi]
np.unique(mus)
array([ 0.003, 0.006, 0.012])
The bug is that (population-events>0).all()!=False
.
a=array([1,2,3,4,5,6])
b=array([0,0,2,5,5,3])
a-b
array([ 1, 2, 1, -1, 0, 3])
array([min(a[i],b[i]) for i in range(a.shape[0])])
array([0, 0, 2, 4, 5, 3])
M=array((a,b))
M
array([[1, 2, 3, 4, 5, 6], [0, 0, 2, 5, 5, 3]])
M.min(axis=0)
array([0, 0, 2, 4, 5, 3])
array((a,b)).min(axis=0)
array([0, 0, 2, 4, 5, 3])
The fix is to set events
to the minimum of events
and population
. This means that there is a limit of one functional mutation per individual per generation, and that if the Poisson distribution resulted in numerous mutations they are taken together to be a single mutation.
%run simulation.py --ticks=3 --console
p2 = p.copy()
(p2==p).all()
True
p2 *= 0.5
p2.dtype
dtype('int32')
p[:5],p2[:5]
(array([98743, 2, 4, 18, 6]), array([49371, 1, 2, 9, 3]))
p -= p2
p[:5],p2[:5]
(array([49372, 1, 2, 9, 3]), array([49371, 1, 2, 9, 3]))
g2=g.copy()
g2[:,1000:]=array([0,0,0,0])
g2[:,1000:]
array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ..., [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
a=array([0,0,0])
type(a), type(array(a))
(numpy.ndarray, numpy.ndarray)
np.concatenate((g,g2),axis=0).shape
(932L, 1004L)
np.concatenate((p,p2),axis=1).shape
(932L,)
import model
g = np.random.binomial(1,0.01,1000)
g[:100]
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
g2=np.concatenate((g,np.array([1,10,0,1])))
num = model.genome_to_num(g2,1000)
modifiers = g2[1000:]
tuple(modifiers)
(1, 10, 0, 1)
(num,)+tuple(modifiers)
(3.1185004836933799e+290, 1, 10, 0, 1)
geno=np.array([g2,g2])
print geno.shape
(2L, 1004L)
num=model.genome_to_num(g[:1000],1000)
print '%f' % num
s = np.binary_repr(num,1000)
print s
print len(s),1000-len(s)
311850048369337986039074908356580191286631485273518940205657389135257021249410708085175330220849964020600859463538984619999944452075825746690034490295331418766744034060298332147660495641766575175738981940279809888430447181726856422784384978737478515029032130478060163688790584926200496390144.000000 0000000000000000000000000000000000100000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 1000 0
r=''.join([str(x) for x in g[:1000]])
print r
print len(r)
0000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100000100000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000010000000000000000000000000000000000 1000
print sum([1 for x in s if x == '1' ])
print sum([1 for x in r if x == '1' ])
2 8
tuple(g.nonzero()[0])
(71, 335, 729, 733, 739, 801, 929, 965)
num
3.1185004836933799e+290
2**4
16
2**5
32
2**2+2**3+2**4
28
%timeit tuple(g2[:1000].nonzero()[0])+tuple(g2[1000:])
100000 loops, best of 3: 17 us per loop
%%timeit
nz=g2.nonzero()[0]
nz=nz[nz<1000]
tuple(nz)+tuple(g2[1000:])
10000 loops, best of 3: 24.2 us per loop
%timeit (model.genome_to_num(g2,1000),)+tuple(g2[1000:])
10000 loops, best of 3: 37.2 us per loop
reload(model);
model.genome_to_key(g2,1000)
(71, 335, 729, 733, 739, 801, 929, 965)
model.genome_to_key_w_modifiers(g2,1000)
((71, 335, 729, 733, 739, 801, 929, 965), (1, 10, 0, 1))
model.genomes_to_keys(geno,1000)
array([[ 71, 335, 729, 733, 739, 801, 929, 965], [ 71, 335, 729, 733, 739, 801, 929, 965]], dtype=int64)
geno[:,1000:]
array([[ 1, 10, 0, 1], [ 1, 10, 0, 1]])
def genomes_to_keys_w_mod1(genomes,num_loci):
return np.array([model.genome_to_key_w_modifiers(g, num_loci) for g in genomes])
def genomes_to_keys_w_mod2(genomes,num_loci):
keys = np.array([np.concatenate((g[:num_loci].nonzero()[0],g[num_loci:])) for g in genomes])
return keys
def genomes_to_keys_w_mod3(genomes,num_loci):
keys = np.array([g[:num_loci].nonzero()[0] for g in genomes])
mods = np.array([g[num_loci:] for g in genomes])
return np.concatenate((keys,mods),axis=1)
%timeit genomes_to_keys_w_mod1(geno,1000)
%timeit genomes_to_keys_w_mod2(geno,1000)
%timeit genomes_to_keys_w_mod3(geno,1000)
10000 loops, best of 3: 97 us per loop 10000 loops, best of 3: 51.3 us per loop 10000 loops, best of 3: 70.9 us per loop
reload(model);
keys = model.genomes_to_keys_w_modifiers(geno,1000)
keys
array([[ 71, 335, 729, 733, 739, 801, 929, 965, 1, 10, 0, 1], [ 71, 335, 729, 733, 739, 801, 929, 965, 1, 10, 0, 1]], dtype=int64)
model.find_row_keys(keys,model.genome_to_key_w_modifiers(g2,1000))
0
k=keys[0]
k
array([ 71, 335, 729, 733, 739, 801, 929, 965, 1, 10, 0, 1], dtype=int64)
str(k)
'[ 71 335 729 733 739 801 929 965 1 10 0 1]'
k
array([ 71, 335, 729, 733, 739, 801, 929, 965, 1, 10, 0, 1], dtype=int64)
%timeit (k==k).all()
100000 loops, best of 3: 4.45 us per loop
%timeit np.array_equal(k,k)
100000 loops, best of 3: 11.8 us per loop
%timeit np.array_equiv(k,k)
100000 loops, best of 3: 10.8 us per loop
reload(model);
type(model.genome_to_num(g,1000))
non_zero [ 71 335 729 733 739 801 929 965]
numpy.ndarray
type(model.genomes_to_nums(geno,1000))
non_zero [ 71 335 729 733 739 801 929 965] non_zero [ 71 335 729 733 739 801 929 965] nums: [[ 71 335 729 733 739 801 929 965] [ 71 335 729 733 739 801 929 965]]
numpy.ndarray
model.find_row_nums(model.genomes_to_nums(geno,1000),model.genome_to_num(g,1000))
non_zero [ 71 335 729 733 739 801 929 965] non_zero [ 71 335 729 733 739 801 929 965] nums: [[ 71 335 729 733 739 801 929 965] [ 71 335 729 733 739 801 929 965]] non_zero [ 71 335 729 733 739 801 929 965]
0
%run simulation --ticks=3
2013-01-17 14:39:38,071 - simulation - INFO - Simulation ID: 2013-Jan-17_14-39-38-063000 2013-01-17 14:39:38,071 - simulation - INFO - Simulation ID: 2013-Jan-17_14-39-38-063000 2013-01-17 14:39:38,071 - simulation - INFO - Simulation ID: 2013-Jan-17_14-39-38-063000 2013-01-17 14:39:38,072 - simulation - INFO - Logging to log\test\test_2013-Jan-17_14-39-38-063000.log 2013-01-17 14:39:38,072 - simulation - INFO - Logging to log\test\test_2013-Jan-17_14-39-38-063000.log 2013-01-17 14:39:38,072 - simulation - INFO - Logging to log\test\test_2013-Jan-17_14-39-38-063000.log 2013-01-17 14:39:38,073 - simulation - INFO - Parametes from file and command line: {u'tau': 1, u'params_dir': u'params', 'params_file': 'params.json', 'datetime': '2013-Jan-17_14-39-38-063000', u'envch_rate': 0.01, u'pop_size': 100000, 'console': True, u'log_ext': u'.log', 'envch_start': False, u'ticks': 3, u'tick_interval': 10, u'log_dir': u'log', u'rb': False, u'stats_interval': 1, u'pi': 0, u'job_name': u'test', u'output_dir': u'output', u'phi': 0, u'ser_ext': u'.ser', 'debug': True, u'ser_dir': u'serialization', u'rho': 1, u'output_ext': u'.csv', u'mu': 0.003, u's': 0.01, u'r': 6e-05, u'num_loci': 1000, u'params_ext': u'.json', u'envch_str': 0} 2013-01-17 14:39:38,073 - simulation - INFO - Parametes from file and command line: {u'tau': 1, u'params_dir': u'params', 'params_file': 'params.json', 'datetime': '2013-Jan-17_14-39-38-063000', u'envch_rate': 0.01, u'pop_size': 100000, 'console': True, u'log_ext': u'.log', 'envch_start': False, u'ticks': 3, u'tick_interval': 10, u'log_dir': u'log', u'rb': False, u'stats_interval': 1, u'pi': 0, u'job_name': u'test', u'output_dir': u'output', u'phi': 0, u'ser_ext': u'.ser', 'debug': True, u'ser_dir': u'serialization', u'rho': 1, u'output_ext': u'.csv', u'mu': 0.003, u's': 0.01, u'r': 6e-05, u'num_loci': 1000, u'params_ext': u'.json', u'envch_str': 0} 2013-01-17 14:39:38,073 - simulation - INFO - Parametes from file and command line: {u'tau': 1, u'params_dir': u'params', 'params_file': 'params.json', 'datetime': '2013-Jan-17_14-39-38-063000', u'envch_rate': 0.01, u'pop_size': 100000, 'console': True, u'log_ext': u'.log', 'envch_start': False, u'ticks': 3, u'tick_interval': 10, u'log_dir': u'log', u'rb': False, u'stats_interval': 1, u'pi': 0, u'job_name': u'test', u'output_dir': u'output', u'phi': 0, u'ser_ext': u'.ser', 'debug': True, u'ser_dir': u'serialization', u'rho': 1, u'output_ext': u'.csv', u'mu': 0.003, u's': 0.01, u'r': 6e-05, u'num_loci': 1000, u'params_ext': u'.json', u'envch_str': 0} 2013-01-17 14:39:38,075 - simulation - INFO - Parameters saved to file params\test\test_2013-Jan-17_14-39-38-063000.json 2013-01-17 14:39:38,075 - simulation - INFO - Parameters saved to file params\test\test_2013-Jan-17_14-39-38-063000.json 2013-01-17 14:39:38,075 - simulation - INFO - Parameters saved to file params\test\test_2013-Jan-17_14-39-38-063000.json 2013-01-17 14:39:38,078 - simulation - INFO - Saving temporary output to output\test\tmp\test_2013-Jan-17_14-39-38-063000.csv.gz 2013-01-17 14:39:38,078 - simulation - INFO - Saving temporary output to output\test\tmp\test_2013-Jan-17_14-39-38-063000.csv.gz 2013-01-17 14:39:38,078 - simulation - INFO - Saving temporary output to output\test\tmp\test_2013-Jan-17_14-39-38-063000.csv.gz 2013-01-17 14:39:38,082 - simulation - INFO - Starting simulation with 3 ticks 2013-01-17 14:39:38,082 - simulation - INFO - Starting simulation with 3 ticks 2013-01-17 14:39:38,082 - simulation - INFO - Starting simulation with 3 ticks 2013-01-17 14:39:38,108 - simulation - DEBUG - Tick 0 2013-01-17 14:39:38,108 - simulation - DEBUG - Tick 0 2013-01-17 14:39:38,108 - simulation - DEBUG - Tick 0 2013-01-17 14:39:38,157 - simulation - INFO - Simulation finished, 3 ticks, time elapsed 0.080 seconds 2013-01-17 14:39:38,157 - simulation - INFO - Simulation finished, 3 ticks, time elapsed 0.080 seconds 2013-01-17 14:39:38,157 - simulation - INFO - Simulation finished, 3 ticks, time elapsed 0.080 seconds 2013-01-17 14:39:38,168 - simulation - INFO - Serialized population to serialization\test\test_2013-Jan-17_14-39-38-063000.ser.gz 2013-01-17 14:39:38,168 - simulation - INFO - Serialized population to serialization\test\test_2013-Jan-17_14-39-38-063000.ser.gz 2013-01-17 14:39:38,168 - simulation - INFO - Serialized population to serialization\test\test_2013-Jan-17_14-39-38-063000.ser.gz 2013-01-17 14:39:38,173 - simulation - INFO - Saved output to output\test\test_2013-Jan-17_14-39-38-063000.csv.gz 2013-01-17 14:39:38,173 - simulation - INFO - Saved output to output\test\test_2013-Jan-17_14-39-38-063000.csv.gz 2013-01-17 14:39:38,173 - simulation - INFO - Saved output to output\test\test_2013-Jan-17_14-39-38-063000.csv.gz
nums=model.genomes_to_nums(geno,1000)
nums
array([[ 71, 335, 729, 733, 739, 801, 929, 965], [ 71, 335, 729, 733, 739, 801, 929, 965]], dtype=int64)
pd.Series([ str(n) for n in nums])
0 [ 71 335 729 733 739 801 929 965] 1 [ 71 335 729 733 739 801 929 965]
pd.Series([i for i in range(len(nums))])
0 0 1 1
df = pd.DataFrame(data = { "genome":pd.Series([ str(n) for n in nums]), "tick":pd.Series([i for i in range(len(nums))])})
df
genome | tick | |
---|---|---|
0 | [ 71 335 729 733 739 801 929 965] | 0 |
1 | [ 71 335 729 733 739 801 929 965] | 1 |
df.to_csv("tmp.csv",header=True,index_label="index")
The problem is with (v1==v2).all()
, if one of the arrays is empty, it gives True
.
So I need to use array_equal
:
(np.array([189])==np.array([])).all()
True
np.array_equal(np.array([189]),np.array([]))
False
np.array_equal.__module__
'numpy.core.numeric'
But I checked array_euqal
code and it checks all the elements, instead of aborting when it finds an element which is not equal. So I'm looking at my own implementation:
g = model.create_target_genome(1000)
def arr_eq(a1,a2):
try:
a1, a2 = asarray(a1), asarray(a2)
except:
return False
if a1.shape != a2.shape:
return False
for i in range(a1.shape[0]):
if a1[i] != a2[i]:
return False
return True
%load_ext cythonmagic
%%cython
import numpy as np
cimport numpy as np
cimport cython
@cython.boundscheck(False)
def arr_eq_cython(np.ndarray[int, ndim=1] a1, np.ndarray[int, ndim=1] a2):
try:
a1, a2 = np.asarray(a1), np.asarray(a2)
except:
return False
if a1.shape!=a2.shape:
return False
cdef Py_ssize_t i
for i in range(a1.shape[0]):
if a1[i] != a2[i]:
return False
return True
np.array_equal(g,g),arr_eq(g,g),arr_eq_cython(g,g)
(True, True, True)
g1 = g.copy()
g1[randint(0,1000)] = 1
np.array_equal(g1,g),arr_eq(g1,g),arr_eq_cython(g1,g)
(False, False, False)
%timeit np.array_equal(g,g)
%timeit arr_eq(g,g)
%timeit arr_eq_cython(g,g)
100000 loops, best of 3: 15 us per loop 1000 loops, best of 3: 494 us per loop 100000 loops, best of 3: 8.59 us per loop
g1 = g.copy()
r = randint(0,1000)
print r
g1[r] = 1
%timeit np.array_equal(g1,g)
%timeit arr_eq(g1,g)
%timeit arr_eq_cython(g1,g)
457 100000 loops, best of 3: 14.1 us per loop 1000 loops, best of 3: 248 us per loop 100000 loops, best of 3: 6.68 us per loop
g.dtype
dtype('int32')