In [2]:

cd d:\workspace\mamba

d:\workspace\mamba

In [10]:

import simulation
reload(simulation);

In [11]:

simulation.mu*=100
simulation.mu

Out[11]:

0.3

In [12]:

p,g = simulation.run(debug=True)

Starting simulation with  10 ticks
Drift
Selection
Mutation
Update
Done

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-12-2caebf423e16> in <module>()
----> 1 p,g = simulation.run(debug=True)

d:\workspace\mamba\simulation.py in run(ticks, tick_interval, debug)
     35                 selection(population, fitness)
     36                 if debug: print "Mutation"
---> 37                 population, genomes = mutation(population, genomes, mutation_rates, num_loci, target_genome)
     38                 if debug: print "Update"
     39                 fitness = create_fitness(genomes, target_genome, s)

d:\workspace\mamba\model.pyc in mutation_implicit_genomes(population, genomes, mutation_rates, num_loci, target_genome)
    101                                 new_counts[key] += 1
    102                         else:
--> 103                                 new_genome = genomes[strain,:].copy()
    104                                 new_genome[locus] = new_allele
    105                                 index = find_row(genomes, new_genome)

KeyboardInterrupt:

Tick 0
Drift
Selection
Mutation

In [77]:

import model
reload(model);

In [87]:

p=model.create_mutation_free_population()
p[0]-=100
p[1]+=100
f=model.create_fitness()

In [89]:

print p[-1]
model.selection(p,f)
print p[-1]
model.selection(p,f)
print p[-1]

0
0
0

In [95]:

np.get_include()

Out[95]:

'C:\\Python27\\lib\\site-packages\\numpy\\core\\include'

In [1]:

import time

In [7]:

time.clock()

Out[7]:

24.026697858339496

In [60]:

t=array(zeros(1000), dtype=np.int)
g=zeros((100,1000), dtype=np.int)
for i in range(100):
    g[i, randint(0,999)] = 1
g[1,5] = 1

In [67]:

g.sum(axis=1)

Out[67]:

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1])

In [74]:

def ham1(g,t):
    a = zeros(100, dtype=np.int)
    for i in range(a.shape[0]):
        a[i] = (t!=g[i,:]).sum()
    return a

In [76]:

g.shape[0]

Out[76]:

100L

In [75]:

ham1(g,t)

Out[75]:

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1])

In [129]:

%load_ext cythonmagic

In [78]:

%%cython
import numpy as np
cimport numpy as np
cimport cython

@cython.boundscheck(False)
def ham2(np.ndarray[int, ndim=2] g, np.ndarray[int, ndim=1] t):
    cdef np.ndarray[int, ndim=1] a 
    a = np.zeros(g.shape[0], dtype=np.int)
    cdef Py_ssize_t i
    for i in range(g.shape[0]):
        a[i] = (t!=g[i,:]).sum()
    return a

In [180]:

def hamming_fitness(genome, target_genome):
	return (genome != target_genome).sum()

def ham3(genomes, t):
    return np.apply_along_axis(hamming_fitness, 1, g, t) 

In [179]:

%timeit -n 10000 ham1(g,t)
%timeit -n 10000 ham2(g,t)
%timeit -n 10000 ham3(g,t)

1000 loops, best of 3: 2.58 ms per loop
1000 loops, best of 3: 2.31 ms per loop
1000 loops, best of 3: 5.11 ms per loop

In [20]:

a=rand(1000)
a[1:990]=0
a.nonzero()

Out[20]:

(array([  0, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], dtype=int64),)

In [23]:

%timeit 2**a
%timeit 2.**a
%timeit np.power(2,a)

100000 loops, best of 3: 18.4 us per loop
100000 loops, best of 3: 17 us per loop
100000 loops, best of 3: 18.2 us per loop

In [29]:

env=np.random.binomial(1,0.2,10)
env

Out[29]:

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [42]:

%timeit -n 100000 int(1.3)
%timeit -n 100000 math.floor(1.3)
%timeit -n 100000 np.floor(1.3) 

100000 loops, best of 3: 217 ns per loop
100000 loops, best of 3: 145 ns per loop
100000 loops, best of 3: 1.64 us per loop

In [43]:

a = np.random.rand(1000)
%timeit -n 1000 [int(x) for x in a]
%timeit -n 1000 [math.floor(x) for x in a]
%timeit -n 1000 [np.floor(x) for x in a]
%timeit -n 1000 np.floor(a)

1000 loops, best of 3: 298 us per loop
1000 loops, best of 3: 316 us per loop
1000 loops, best of 3: 2.67 ms per loop
1000 loops, best of 3: 12.7 us per loop

In [48]:

type(np.arange(10.0)[0])

Out[48]:

numpy.float64

In [47]:

type(xrange(10)[0])

Out[47]:

int

In [4]:

import cython_load
import model_c

In [6]:

nums=model_c.genomes_to_nums(g)

In [8]:

type(nums[0])

Out[8]:

numpy.float64

In [57]:

import random
import numpy as np
def choose_no_rep_python(n,k):
    return random.sample(xrange(n), k)    
def choose_no_rep_numpy(n, k):
    return np.random.permutation(n)[:k]
def choose_no_rep_numpy_take1(n, k):
    return np.random.permutation(n).take(range(k))
def choose_no_rep_numpy_take2(n, k):
    return np.random.permutation(n).take(arange(k))
def choose_no_rep_numpy_take3(n, k):
    return np.random.permutation(n).take(xrange(k))

In [58]:

%timeit -n 10000 choose_no_rep_python(1000, 4)
%timeit -n 10000 choose_no_rep_numpy(1000, 4)
%timeit -n 10000 choose_no_rep_numpy_take1(1000, 4)
%timeit -n 10000 choose_no_rep_numpy_take2(1000, 4)
%timeit -n 10000 choose_no_rep_numpy_take3(1000, 4)

10000 loops, best of 3: 9.11 us per loop
10000 loops, best of 3: 408 us per loop
10000 loops, best of 3: 424 us per loop
10000 loops, best of 3: 413 us per loop
10000 loops, best of 3: 425 us per loop

In [112]:

ge=g[2,:].copy()
sum(ge)

Out[112]:

In [115]:

i= choose_no_rep_python(1000,4)
ge[i] = (ge[i] + 1) % 2
sum(ge)

Out[115]:

In [132]:

def genome_to_num1(genome):
	i = array(range(genome.shape[0]))
	return (2. ** i * genome).sum()
def genome_to_num2(genome):
	i = np.arange(genome.shape[0])
	return (2. ** i * genome).sum()
def genome_to_num3(genome):
	i = array(xrange(genome.shape[0]))
	return (2. ** i * genome).sum()

In [127]:

%timeit -n 1000 genome_to_num1(ge)
%timeit -n 1000 genome_to_num2(ge)
%timeit -n 1000 genome_to_num3(ge)

1000 loops, best of 3: 409 us per loop
1000 loops, best of 3: 172 us per loop
1000 loops, best of 3: 399 us per loop

In [6]:

import model
reload(model);
import cython_load
import model_c

In [5]:

f=model.create_fitness_by_mutational_load()
p=model.create_mutation_free_population()
m=model.create_muation_rates()

In [28]:

model.drift(p)

In [30]:

model.selection(p,f)

In [32]:

model_c.mutation_by_mutation_load(p,m);

In [33]:

p[:5]

Out[33]:

array([982566623,  17306161,    126779,       436,         1])

In [14]:

import math
import scipy.stats as sps
import simulation
reload(simulation);

In [3]:

p=simulation.run(10**3,0)

Starting simulation with  1000 ticks
Simulation finished, 1000 ticks, time elapsed 0.163826916401 seconds

In [15]:

lam = model.mu/model.s
poi=sps.poisson(lam)

In [16]:

print [exp(-lam) * lam**k / math.factorial(k) for k in range(6)]
print [poi.pmf(k) for k in range(6)]
print [p[k]/float(p.sum()) for k in range(6)]

[0.74081822068171788, 0.22224546620451535, 0.033336819930677303, 0.0033336819930677298, 0.00025002614948007977, 1.5001568968804782e-05]
[0.74081822068171788, 0.22224546620451532, 0.033336819930677296, 0.0033336819930677277, 0.00025002614948007934, 1.5001568968804773e-05]
[0.74045606799999997, 0.222601676, 0.033354796999999999, 0.003323749, 0.00024877800000000001, 1.4267000000000001e-05]

100 generations are enough to reach an MSB with 10^9 population size and no explicint genomes (just counting mutations). Typical running time: 0.15 seconds

In [26]:

print sum([ i*x for i,x in enumerate(p) if x>0])/float(p.sum())
print lam
print sum([ ((1-model.s)**i)*x for i,x in enumerate(p) if x>0])/float(p.sum())
print e**(-model.mu)

0.300352987
0.3
0.997000962813
0.997004495503

In [34]:

import model
reload(model);

In [119]:

tg = model.create_target_genome(5)
tg

Out[119]:

array([0, 0, 0, 0, 0])

In [120]:

g=tg.copy()
g.resize( (1,g.shape[0]) )
print tg.shape, g.shape

(5L,) (1L, 5L)

In [20]:

%timeit -n 100000 np.float(5)
%timeit -n 100000 float(5)

100000 loops, best of 3: 324 ns per loop
100000 loops, best of 3: 280 ns per loop

In [18]:

import simulation
reload(simulation)
import params
reload(params);
import model
reload(model);

In [27]:

p=simulation.run(0)

Starting simulation with  0 ticks
Tick 0
Simulation finished, 0 ticks, time elapsed 0.00117361184493 seconds

In [75]:

muts=p*model.create_muation_rates(0.00003, 2)
muts

Out[75]:

array([ 29.70537,   0.29463])

In [80]:

muts = np.random.poisson(muts)
muts[0]=5
muts[1]=2
muts.cumsum()

Out[80]:

array([5, 7])

In [134]:

loci = np.random.randint(0,5,muts.sum())
loci

Out[134]:

array([2, 3, 3, 3, 0, 1, 4])

In [135]:

loci_split = np.split(loci, muts.cumsum())[:-1]
type(loci_split)

Out[135]:

list

In [112]:

new_allele = (tg[loci]+1)%2
new_allele

Out[112]:

array([0, 1, 1, 1, 1, 0, 1])

In [121]:

g[0,:]

Out[121]:

array([0, 0, 0, 0, 0])

In [122]:

d={}
d[(0,0)]=(1,array([1,0,0,0,0]))
d

Out[122]:

{(0, 0): (1, array([1, 0, 0, 0, 0]))}

In [126]:

for (strain, locus),(count,genome) in d.items():
    print strain, locus, count, genome

0 0 1 [1 0 0 0 0]

In [131]:

d1,d2={},{}
v = [randint(0,100) for _ in range(100)]
for x in v:
    d1[x]=str(x)
    d2[x]=x
v1= d1.values()
v2= d2.values()
for i in range(len(v1)):
    print int(v1[i])==v2[i]

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True

In [136]:

?np.append

In [97]:

import model
reload(model);
import cython_load
import model_c
import params
reload(params);
import simulation
reload(simulation);

In [98]:

p,g=simulation.run(100,10)

Starting simulation with  100 ticks
Tick 0
Tick 10
Tick 20
Tick 30
Tick 40
Tick 50
Tick 60
Tick 70
Tick 80
Tick 90
Tick 100
Simulation finished, 100 ticks, time elapsed 25.0971423397 seconds

In [99]:

p.shape[0]

Out[99]:

985L

In [101]:

p;

In [102]:

plot(log(p));

In [105]:

ml = g.sum(1)
hist(ml);

In [106]:

p1=p[0];p2=p.sum()-p1;p1=p1/float(p.sum());p2=p2/float(p.sum())
print p1, p2

0.996954 0.003046

In [107]:

import scipy.stats as scs
poi = scs.poisson(params.mu/params.s)
[poi.pmf(k) for k in range(5)]

Out[107]:

[0.74081822068171788,
 0.22224546620451532,
 0.033336819930677296,
 0.0033336819930677277,
 0.00025002614948007934]

In [109]:

(ml==2).any()

Out[109]:

False

In [119]:

nums=model_c.genomes_to_nums(g)

In [122]:

len(np.unique(nums))

Out[122]:

In [123]:

len(nums)

Out[123]:

In [124]:

ge=g[22,:]
len(ge)

Out[124]:

In [125]:

ge in g

Out[125]:

True

In [127]:

g2=g[:,1]
len(g2)

Out[127]:

In [128]:

g2 in g

Out[128]:

False

In [132]:

np.select?

In [133]:

Out[133]:

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [141]:

ge=g[444,:]
ge.sum()

Out[141]:

In [202]:

def find_row_index(genomes, genome):
	'''looks for genome in the rows of genome, returns the index if found, -1 otherwise'''
	for i, row in enumerate(genomes):
		if (row==genome).all():
			return i
	return -1

In [203]:

def find_row_index2(genomes, genome):
	'''looks for genome in the rows of genome, returns the index if found, -1 otherwise'''
	for i in xrange(genomes.shape[0]):
		if (genomes[i]==genome).all():
			return i
	return -1

In [204]:

%load_ext cythonmagic

In [232]:

%%cython
import numpy as np
cimport numpy as np
cimport cython
from cpython cimport bool

@cython.boundscheck(False)
@cython.wraparound(False)
def find_row_index3(np.ndarray[int, ndim=2] genomes, np.ndarray[int, ndim=1] genome):
    cdef np.ndarray[int, ndim=1] row 
    cdef Py_ssize_t i, j
    cdef bool s
    for i, row in enumerate(genomes):
        s = True
        for j in range(genome.shape[0]):
            if genome[j]!=row[j]:
                s = False
                break
        if s:
            return i
    return -1

In [227]:

%%cython
import numpy as np
cimport numpy as np
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
def find_row_index4(np.ndarray[int, ndim=2] genomes, np.ndarray[int, ndim=1] genome):
    cdef np.ndarray[int, ndim=1] row 
    cdef Py_ssize_t i, j
    cdef int s
    for i, row in enumerate(genomes):
        s = 0
        for j in range(genome.shape[0]):
            if genome[j]==row[j]:
                s = s + 1
            else:
                break
        if s == genome.shape[0]:
            return i
    return -1

In [212]:

%timeit -n 1000 find_row_index(g,ge)
%timeit -n 1000 find_row_index2(g,ge)
%timeit -n 1000 find_row_index3(g,ge)

1000 loops, best of 3: 4.59 ms per loop
1000 loops, best of 3: 4.43 ms per loop
1000 loops, best of 3: 813 us per loop

In [235]:

%timeit -n 5000 find_row_index3(g,ge)
%timeit -n 5000 find_row_index4(g,ge)

5000 loops, best of 3: 803 us per loop
5000 loops, best of 3: 795 us per loop

In [1]:

cd d:\workspace\mamba

d:\workspace\mamba

In [2]:

import model
reload(model);
import cython_load
import model_c
import params
reload(params);
import simulation
reload(simulation);

In [5]:

p,g = simulation.run(1000,10)

Starting simulation with  1000 ticks
Tick 0
Tick 10
Tick 20
Tick 30
Tick 40
Tick 50
Tick 60
Tick 70
Tick 80
Tick 90
Tick 100
Tick 110
Tick 120
Tick 130
Tick 140
Tick 150
Tick 160
Tick 170
Tick 180
Tick 190
Tick 200
Tick 210
Tick 220
Tick 230
Tick 240
Tick 250
Tick 260
Tick 270
Tick 280
Tick 290
Tick 300
Tick 310
Tick 320
Tick 330
Tick 340
Tick 350
Tick 360
Tick 370
Tick 380
Tick 390
Tick 400
Tick 410
Tick 420
Tick 430
Tick 440
Tick 450
Tick 460
Tick 470
Tick 480
Tick 490
Tick 500
Tick 510
Tick 520
Tick 530
Tick 540
Tick 550
Tick 560
Tick 570
Tick 580
Tick 590
Tick 600
Tick 610
Tick 620
Tick 630
Tick 640
Tick 650
Tick 660
Tick 670
Tick 680
Tick 690
Tick 700
Tick 710
Tick 720
Tick 730
Tick 740
Tick 750
Tick 760
Tick 770
Tick 780
Tick 790
Tick 800
Tick 810
Tick 820
Tick 830
Tick 840
Tick 850
Tick 860
Tick 870
Tick 880
Tick 890
Tick 900
Tick 910
Tick 920
Tick 930
Tick 940
Tick 950
Tick 960
Tick 970
Tick 980
Tick 990
Tick 1000
Simulation finished, 1000 ticks, time elapsed 3718.33720633 seconds

In [6]:

lm = g.sum(axis=1)
lm.shape

Out[6]:

(951L,)

In [25]:

simulation.mu=0.1
print simulation.mu
simulation.pop_size=10**6
print simulation.pop_size

0.1
1000000

9/12/12¶

testing serialization¶

In [1]:

cd d:\workspace\mamba

d:\workspace\mamba

In [6]:

%run simulation.py --ticks -1

2012-12-09 16:27:51,273 - mamba - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log
2012-12-09 16:27:51,273 - mamba - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log
2012-12-09 16:27:51,273 - simulation - INFO - Simulation ID: 2012-Dec-09_16-27-51-269000
2012-12-09 16:27:51,273 - simulation - INFO - Simulation ID: 2012-Dec-09_16-27-51-269000
2012-12-09 16:27:51,275 - simulation - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log
2012-12-09 16:27:51,275 - simulation - INFO - Logging to log\test_2012-Dec-09_16-27-51-269000.log
2012-12-09 16:27:51,276 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': -1, 'tick_interval': 0, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'}
2012-12-09 16:27:51,276 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': -1, 'tick_interval': 0, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'}
2012-12-09 16:27:51,276 - simulation - INFO - Parameters saved to file params\test_2012-Dec-09_16-27-51-269000.py
2012-12-09 16:27:51,276 - simulation - INFO - Parameters saved to file params\test_2012-Dec-09_16-27-51-269000.py
2012-12-09 16:27:51,278 - simulation - INFO - Saving output to output\test_2012-Dec-09_16-27-51-269000.csv
2012-12-09 16:27:51,278 - simulation - INFO - Saving output to output\test_2012-Dec-09_16-27-51-269000.csv
2012-12-09 16:27:51,279 - simulation - INFO - Starting simulation with -1 ticks
2012-12-09 16:27:51,279 - simulation - INFO - Starting simulation with -1 ticks
2012-12-09 16:27:51,279 - simulation - INFO - Simulation finished, 0 ticks, time elapsed 0.001 seconds
2012-12-09 16:27:51,279 - simulation - INFO - Simulation finished, 0 ticks, time elapsed 0.001 seconds
2012-12-09 16:27:51,292 - simulation - INFO - Serialized population to serialization\test_2012-Dec-09_16-27-51-269000.ser
2012-12-09 16:27:51,292 - simulation - INFO - Serialized population to serialization\test_2012-Dec-09_16-27-51-269000.ser

In [7]:

ls serialization

 Volume in drive D is Purple24
 Volume Serial Number is 12B2-607F

 Directory of d:\workspace\mamba\serialization

12/09/2012  04:27 PM    <DIR>          .
12/09/2012  04:27 PM    <DIR>          ..
12/09/2012  04:24 PM         4,260,575 test_2012-Dec-09_16-24-34-037000.ser
12/09/2012  04:24 PM         4,276,591 test_2012-Dec-09_16-24-45-008000.ser
12/09/2012  04:27 PM            32,350 test_2012-Dec-09_16-27-51-269000.ser
               3 File(s)      8,569,516 bytes
               2 Dir(s)  115,987,808,256 bytes free

In [8]:

fname = 'serialization/test_2012-Dec-09_16-24-34-037000.ser'

In [10]:

p = deserialize(fname)

2012-12-09 16:28:31,578 - simulation - INFO - Deserialized population from serialization/test_2012-Dec-09_16-24-34-037000.ser
2012-12-09 16:28:31,578 - simulation - INFO - Deserialized population from serialization/test_2012-Dec-09_16-24-34-037000.ser

In [11]:

len(p)

Out[11]:

In [12]:

p,g,tg=p

In [18]:

ml=g.sum(1)

In [20]:

p[ml>0].sum()/float(p.sum())

Out[20]:

0.0030200000000000001

Looks OK

Implementing recombination¶

In [43]:

mutation_rates = array([0.003,0.003])
recombination_rates = array([0.0006,0.0006])
total_rates = mutation_rates + recombination_rates
population = array([10**7, 10**5])

In [44]:

events = np.random.poisson(population * total_rates, size=population.shape)	

In [45]:

events

Out[45]:

array([35702,   359])

In [46]:

events.cumsum()

Out[46]:

array([35702, 36061])

In [47]:

events.sum()==events.cumsum()[-1]

Out[47]:

True

In [49]:

prob_mu = mutation_rates/(mutation_rates+recombination_rates)
prob_mu

Out[49]:

array([ 0.83333333,  0.83333333])

In [52]:

loci = np.random.randint(0, 1000, events.cumsum()[-1])
loci[:10]

Out[52]:

array([356, 997, 241, 911, 757,  85,  31, 863, 873, 278])

In [57]:

loci_split = np.split(loci, events.cumsum())[:-1]
len(loci_split[1])

Out[57]:

In [113]:

mutations=np.random.binomial(events, prob_mu, size=population.shape)
mutations

Out[113]:

array([29706,   308])

In [83]:

events

Out[83]:

array([35702,   359])

In [97]:

split2=[np.split(x,mutations[i:i+1]) for i,x in enumerate(loci_split)]

In [103]:

[[len(x) for x in y ] for y in split2]

Out[103]:

[[29769, 5933], [306, 53]]

In [104]:

mutations, events-mutations

Out[104]:

(array([29769,   306]), array([5933,   53]))

In [108]:

len(split2[1][0])

Out[108]:

In [109]:

split2[0][0]

Out[109]:

array([356, 997, 241, ..., 756, 116, 939])

In [111]:

type(split2[0][0])

Out[111]:

numpy.ndarray

In [114]:

np.random.binomial([0,10,11], [0.3,0.3,0.4])

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-114-1236cfd012b4> in <module>()
----> 1 np.random.binomial([0,10,11], [0.3,0.3,0.4])

C:\Python27\lib\site-packages\numpy\random\mtrand.pyd in mtrand.RandomState.binomial (numpy\random\mtrand\mtrand.c:14633)()

ValueError: n <= 0

In [120]:

np.int(array([0,10,11])*array([0.3,0.3,0.4]))

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-120-98b108d60cee> in <module>()
----> 1 np.int(array([0,10,11])*array([0.3,0.3,0.4]))

TypeError: only length-1 arrays can be converted to Python scalars

In [134]:

a=np.round(array([0,10,11])*array([0.3,0.3,0.4]))
a

Out[134]:

array([ 0.,  3.,  4.])

In [132]:

b=array(a,dtype=np.int)
b

Out[132]:

array([         0,          0,          0, 1074266112,          0,
       1074790400])

In [135]:

array([np.int(x) for x in a])

Out[135]:

array([0, 3, 4])

In [137]:

a.dtype

Out[137]:

dtype('float64')

In [138]:

np.in64(a)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-138-603e7308b6b6> in <module>()
----> 1 np.in64(a)

AttributeError: 'module' object has no attribute 'in64'

4/1/2013¶

Pandas and Gzip¶

In [3]:

import pandas as pd

In [7]:

df=pd.DataFrame({'a':1,'b':3},index=[1])

In [8]:

df.to_csv('tmp.csv')

In [8]:

?df.to_csv

In [10]:

pd.version.version

Out[10]:

'0.9.0'

Time to update pandas...

In [4]:

pd.version.version

Out[4]:

'0.10.0'

In [10]:

import gzip

In [11]:

f=gzip.open('tmp.csv.gz','wb')

In [12]:

df.to_csv(f)

In [13]:

f.close()

In [16]:

pd.DataFrame.__module__

Out[16]:

'pandas.core.frame'

Modifiers¶

In [17]:

%run simulation.py --ticks=5

2013-01-04 10:42:24,601 - mamba - INFO - Logging to log\test_2013-Jan-04_10-42-24-594000.log
2013-01-04 10:42:24,602 - simulation - INFO - Simulation ID: 2013-Jan-04_10-42-24-594000
2013-01-04 10:42:24,605 - simulation - INFO - Logging to log\test_2013-Jan-04_10-42-24-594000.log
2013-01-04 10:42:24,605 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': 5, 'tick_interval': 0, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'}
2013-01-04 10:42:24,607 - simulation - INFO - Parameters saved to file params\test_2013-Jan-04_10-42-24-594000.py
2013-01-04 10:42:24,608 - simulation - INFO - Saving output to output\test_2013-Jan-04_10-42-24-594000.csv.gz
2013-01-04 10:42:24,608 - simulation - INFO - Starting simulation with 5 ticks
2013-01-04 10:42:25,714 - simulation - INFO - Simulation finished, 5 ticks, time elapsed 1.106 seconds
2013-01-04 10:42:26,369 - simulation - INFO - Serialized population to serialization\test_2013-Jan-04_10-42-24-594000.ser.gz

In [21]:

g.shape

Out[21]:

(514L, 1000L)

In [39]:

mods = np.array([0,1,0,1])

In [41]:

print tg.shape, mods.shape

(1000L,) (4L,)

In [48]:

gs = np.concatenate( (tg,mods)).copy()

In [49]:

gs.resize((1,gs.shape[0]))

In [51]:

gs

Out[51]:

array([[0, 0, 0, ..., 1, 0, 1]])

In [54]:

gs[0][-4:]

Out[54]:

array([0, 1, 0, 1])

In [55]:

gs[,-4:]

  File "<ipython-input-55-39f6b9c0b380>", line 1
    gs[,-4:]
       ^
SyntaxError: invalid syntax

In [56]:

gs.shape

Out[56]:

(1L, 1004L)

In [60]:

gs[:,-4:]

Out[60]:

array([[0, 1, 0, 1]])

In [61]:

gs.nonzero()

Out[61]:

(array([0, 0], dtype=int64), array([1001, 1003], dtype=int64))

In [62]:

gs

Out[62]:

array([[0, 0, 0, ..., 1, 0, 1]])

In [65]:

gs.nonzero()

Out[65]:

(array([0, 0], dtype=int64), array([1001, 1003], dtype=int64))

In [67]:

gs

Out[67]:

array([[0, 0, 0, ..., 1, 0, 1]])

In [74]:

non_zero = gs[0,].nonzero()[0]

In [82]:

non_zero=non_zero[non_zero<num_loci]

In [83]:

(2.**non_zero).sum()

Out[83]:

0.0

In [87]:

(gs[:,:-4]==gs[:,:num_loci]).all()

Out[87]:

True

In [97]:

gs[:,:num_loci].shape, tg.reshape(1, num_loci).shape

Out[97]:

((1L, 1000L), (1L, 1000L))

Time methods of taking part of the `genomes` matrix¶

In [115]:

gs2=concatenate((gs,)*100)

In [116]:

gs2.shape

Out[116]:

(100L, 1004L)

In [117]:

%timeit gs2.take(range(num_loci),axis=1)
%timeit gs2.take(np.arange(num_loci),axis=1)
%timeit gs2[:, :num_loci]

1000 loops, best of 3: 598 us per loop
1000 loops, best of 3: 322 us per loop
1000000 loops, best of 3: 1.89 us per loop

slicing is the best!

Testing the modifiers change¶

Not functional yet, just added 4 columns to the end of the genomes matrix and want to make sure that nothing broke.

In [121]:

target_genome = create_target_genome(num_loci)
genomes = np.concatenate((target_genome, np.array([0, 1.0, 0, 1.0])))
genomes.resize( (1, genomes.shape[0]) )

In [124]:

genomes[:, :num_loci].shape, target_genome.reshape(1, num_loci).shape

Out[124]:

((1L, 1000L), (1L, 1000L))

In [125]:

from scipy.spatial.distance import cdist, hamming

In [128]:

load = cdist(genomes[:, :num_loci], target_genome.reshape(1, num_loci), 'hamming') * num_loci

In [129]:

((1 - s) ** load).reshape(genomes.shape[0])

Out[129]:

array([ 1.])

In [1]:

cd d:\workspace\mamba

d:\workspace\mamba

Creating mutation rates with modifiers¶

In [1]:

cd d:\workspace\mamba

d:\workspace\mamba

In [2]:

%run simulation.py

2013-01-04 11:49:43,770 - mamba - INFO - Logging to log\test_2013-Jan-04_11-49-43-767000.log
2013-01-04 11:49:43,772 - simulation - INFO - Simulation ID: 2013-Jan-04_11-49-43-767000
2013-01-04 11:49:43,773 - simulation - INFO - Logging to log\test_2013-Jan-04_11-49-43-767000.log
2013-01-04 11:49:43,773 - simulation - INFO - Parametes from file and command line: {'params_dir': 'params', 'log_ext': '.log', 'ticks': 10, 'tick_interval': 10, 'output_ext': '.csv', 'console': True, 'output_dir': 'output', 'mu': 0.003, 'debug': True, 's': 0.01, 'r': 6e-05, 'num_loci': 1000, 'log_dir': 'log', 'params': 'params.py', 'ser_ext': '.ser', 'params_ext': '.py', 'stats_interval': 1, 'pop_size': 100000, 'job_name': 'test', 'ser_dir': 'serialization'}
2013-01-04 11:49:43,776 - simulation - INFO - Parameters saved to file params\test_2013-Jan-04_11-49-43-767000.py
2013-01-04 11:49:43,778 - simulation - INFO - Saving output to output\test_2013-Jan-04_11-49-43-767000.csv.gz
2013-01-04 11:49:43,779 - simulation - INFO - Starting simulation with 10 ticks
2013-01-04 11:49:43,859 - simulation - DEBUG - Tick 0
2013-01-04 11:49:46,204 - simulation - DEBUG - Tick 10
2013-01-04 11:49:46,207 - simulation - INFO - Simulation finished, 10 ticks, time elapsed 2.428 seconds
2013-01-04 11:49:47,684 - simulation - INFO - Serialized population to serialization\test_2013-Jan-04_11-49-43-767000.ser.gz

In [6]:

genomes[:, num_loci].shape==p.shape

Out[6]:

True

In [7]:

w = create_fitness(g,tg,0.01,num_loci)

In [33]:

pi = g[:, num_loci]
tau = g[:, num_loci+1]

In [34]:

pi = (1-s)**pi

In [37]:

tau[:10]=2

In [40]:

w[:15]

Out[40]:

array([ 1.  ,  0.99,  0.99,  0.99,  0.99,  0.99,  0.99,  0.99,  0.99,
        0.99,  0.99,  0.99,  0.99,  0.99,  0.99])

In [41]:

pi[:15]

Out[41]:

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.])

In [42]:

tau[:15]

Out[42]:

array([ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  1.,  1.,  1.,
        1.,  1.])

In [43]:

w<=pi

Out[43]:

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True], dtype=bool)

In [45]:

mus = create_muation_rates(mu, g.shape[0])

In [49]:

mus[w<=pi] *= tau[w<=pi] 

In [51]:

np.unique(mus)

Out[51]:

array([ 0.003,  0.006,  0.012])

Assertion bug¶

The bug is that (population-events>0).all()!=False.

In [59]:

a=array([1,2,3,4,5,6])
b=array([0,0,2,5,5,3])
a-b

Out[59]:

array([ 1,  2,  1, -1,  0,  3])

In [61]:

array([min(a[i],b[i]) for i in range(a.shape[0])])

Out[61]:

array([0, 0, 2, 4, 5, 3])

In [62]:

M=array((a,b))
M

Out[62]:

array([[1, 2, 3, 4, 5, 6],
       [0, 0, 2, 5, 5, 3]])

In [63]:

M.min(axis=0)

Out[63]:

array([0, 0, 2, 4, 5, 3])

In [64]:

array((a,b)).min(axis=0)

Out[64]:

array([0, 0, 2, 4, 5, 3])

The fix is to set events to the minimum of events and population. This means that there is a limit of one functional mutation per individual per generation, and that if the Poisson distribution resulted in numerous mutations they are taken together to be a single mutation.

5/1/13¶

Invasion¶

In [2]:

%run simulation.py --ticks=3 --console

In [7]:

p2 = p.copy()

In [8]:

(p2==p).all()

Out[8]:

True

In [9]:

p2 *= 0.5

In [10]:

p2.dtype

Out[10]:

dtype('int32')

In [14]:

p[:5],p2[:5]

Out[14]:

(array([98743,     2,     4,    18,     6]),
 array([49371,     1,     2,     9,     3]))

In [16]:

p -= p2

In [17]:

p[:5],p2[:5]

Out[17]:

(array([49372,     1,     2,     9,     3]),
 array([49371,     1,     2,     9,     3]))

In [27]:

g2=g.copy()

In [30]:

g2[:,1000:]=array([0,0,0,0])

In [32]:

g2[:,1000:]

Out[32]:

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       ..., 
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

In [34]:

a=array([0,0,0])

In [37]:

type(a), type(array(a))

Out[37]:

(numpy.ndarray, numpy.ndarray)

In [44]:

np.concatenate((g,g2),axis=0).shape

Out[44]:

(932L, 1004L)

In [46]:

np.concatenate((p,p2),axis=1).shape

Out[46]:

(932L,)

17/1/2013¶

Implementing `genome_to_num` with modifiers¶

In [3]:

import model

In [5]:

g = np.random.binomial(1,0.01,1000)
g[:100]

Out[5]:

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])

In [10]:

g2=np.concatenate((g,np.array([1,10,0,1])))

In [16]:

num = model.genome_to_num(g2,1000)

In [20]:

modifiers = g2[1000:]

In [21]:

tuple(modifiers)

Out[21]:

(1, 10, 0, 1)

In [22]:

(num,)+tuple(modifiers)

Out[22]:

(3.1185004836933799e+290, 1, 10, 0, 1)

In [25]:

geno=np.array([g2,g2])
print geno.shape

(2L, 1004L)

In [72]:

num=model.genome_to_num(g[:1000],1000)
print '%f' % num
s = np.binary_repr(num,1000)
print s
print len(s),1000-len(s)

311850048369337986039074908356580191286631485273518940205657389135257021249410708085175330220849964020600859463538984619999944452075825746690034490295331418766744034060298332147660495641766575175738981940279809888430447181726856422784384978737478515029032130478060163688790584926200496390144.000000
0000000000000000000000000000000000100000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
1000 0

In [73]:

r=''.join([str(x) for x in g[:1000]])
print r
print len(r)

0000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100000100000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000010000000000000000000000000000000000
1000

In [76]:

print sum([1 for x in s if x == '1' ])
print sum([1 for x in r if x == '1' ])

2
8

In [85]:

tuple(g.nonzero()[0])

Out[85]:

(71, 335, 729, 733, 739, 801, 929, 965)

In [83]:

num

Out[83]:

3.1185004836933799e+290

In [87]:

2**4

Out[87]:

In [88]:

2**5

Out[88]:

In [90]:

2**2+2**3+2**4

Out[90]:

In [100]:

%timeit tuple(g2[:1000].nonzero()[0])+tuple(g2[1000:])

100000 loops, best of 3: 17 us per loop

In [101]:

%%timeit 
nz=g2.nonzero()[0]
nz=nz[nz<1000]
tuple(nz)+tuple(g2[1000:])

10000 loops, best of 3: 24.2 us per loop

In [102]:

%timeit (model.genome_to_num(g2,1000),)+tuple(g2[1000:])

10000 loops, best of 3: 37.2 us per loop

In [116]:

reload(model);

In [117]:

model.genome_to_key(g2,1000)

Out[117]:

(71, 335, 729, 733, 739, 801, 929, 965)

In [118]:

model.genome_to_key_w_modifiers(g2,1000)

Out[118]:

((71, 335, 729, 733, 739, 801, 929, 965), (1, 10, 0, 1))

In [119]:

model.genomes_to_keys(geno,1000)

Out[119]:

array([[ 71, 335, 729, 733, 739, 801, 929, 965],
       [ 71, 335, 729, 733, 739, 801, 929, 965]], dtype=int64)

In [120]:

geno[:,1000:]

Out[120]:

array([[ 1, 10,  0,  1],
       [ 1, 10,  0,  1]])

In [142]:

def genomes_to_keys_w_mod1(genomes,num_loci):
    return np.array([model.genome_to_key_w_modifiers(g, num_loci) for g in genomes])

In [143]:

def genomes_to_keys_w_mod2(genomes,num_loci):
    keys = np.array([np.concatenate((g[:num_loci].nonzero()[0],g[num_loci:])) for g in genomes])
    return keys    

In [144]:

def genomes_to_keys_w_mod3(genomes,num_loci):
    keys = np.array([g[:num_loci].nonzero()[0] for g in genomes])
    mods = np.array([g[num_loci:] for g in genomes])
    return np.concatenate((keys,mods),axis=1)

In [145]:

%timeit genomes_to_keys_w_mod1(geno,1000)
%timeit genomes_to_keys_w_mod2(geno,1000)
%timeit genomes_to_keys_w_mod3(geno,1000)

10000 loops, best of 3: 97 us per loop
10000 loops, best of 3: 51.3 us per loop
10000 loops, best of 3: 70.9 us per loop

In [155]:

reload(model);

In [156]:

keys = model.genomes_to_keys_w_modifiers(geno,1000)
keys

Out[156]:

array([[ 71, 335, 729, 733, 739, 801, 929, 965,   1,  10,   0,   1],
       [ 71, 335, 729, 733, 739, 801, 929, 965,   1,  10,   0,   1]], dtype=int64)

In [158]:

model.find_row_keys(keys,model.genome_to_key_w_modifiers(g2,1000))

Out[158]:

In [160]:

k=keys[0]

In [162]:

Out[162]:

array([ 71, 335, 729, 733, 739, 801, 929, 965,   1,  10,   0,   1], dtype=int64)

In [164]:

str(k)

Out[164]:

'[ 71 335 729 733 739 801 929 965   1  10   0   1]'

In [166]:

Out[166]:

array([ 71, 335, 729, 733, 739, 801, 929, 965,   1,  10,   0,   1], dtype=int64)

In [168]:

%timeit (k==k).all()

100000 loops, best of 3: 4.45 us per loop

In [172]:

%timeit np.array_equal(k,k)

100000 loops, best of 3: 11.8 us per loop

In [173]:

%timeit np.array_equiv(k,k)

100000 loops, best of 3: 10.8 us per loop

In [184]:

reload(model);

In [180]:

type(model.genome_to_num(g,1000))

non_zero [ 71 335 729 733 739 801 929 965]

Out[180]:

numpy.ndarray

In [182]:

type(model.genomes_to_nums(geno,1000))

non_zero [ 71 335 729 733 739 801 929 965]
non_zero [ 71 335 729 733 739 801 929 965]
nums: [[ 71 335 729 733 739 801 929 965]
 [ 71 335 729 733 739 801 929 965]]

Out[182]:

numpy.ndarray

In [183]:

model.find_row_nums(model.genomes_to_nums(geno,1000),model.genome_to_num(g,1000))

non_zero [ 71 335 729 733 739 801 929 965]
non_zero [ 71 335 729 733 739 801 929 965]
nums: [[ 71 335 729 733 739 801 929 965]
 [ 71 335 729 733 739 801 929 965]]
non_zero [ 71 335 729 733 739 801 929 965]

Out[183]:

In [194]:

%run simulation --ticks=3

2013-01-17 14:39:38,071 - simulation - INFO - Simulation ID: 2013-Jan-17_14-39-38-063000
2013-01-17 14:39:38,071 - simulation - INFO - Simulation ID: 2013-Jan-17_14-39-38-063000
2013-01-17 14:39:38,071 - simulation - INFO - Simulation ID: 2013-Jan-17_14-39-38-063000
2013-01-17 14:39:38,072 - simulation - INFO - Logging to log\test\test_2013-Jan-17_14-39-38-063000.log
2013-01-17 14:39:38,072 - simulation - INFO - Logging to log\test\test_2013-Jan-17_14-39-38-063000.log
2013-01-17 14:39:38,072 - simulation - INFO - Logging to log\test\test_2013-Jan-17_14-39-38-063000.log
2013-01-17 14:39:38,073 - simulation - INFO - Parametes from file and command line: {u'tau': 1, u'params_dir': u'params', 'params_file': 'params.json', 'datetime': '2013-Jan-17_14-39-38-063000', u'envch_rate': 0.01, u'pop_size': 100000, 'console': True, u'log_ext': u'.log', 'envch_start': False, u'ticks': 3, u'tick_interval': 10, u'log_dir': u'log', u'rb': False, u'stats_interval': 1, u'pi': 0, u'job_name': u'test', u'output_dir': u'output', u'phi': 0, u'ser_ext': u'.ser', 'debug': True, u'ser_dir': u'serialization', u'rho': 1, u'output_ext': u'.csv', u'mu': 0.003, u's': 0.01, u'r': 6e-05, u'num_loci': 1000, u'params_ext': u'.json', u'envch_str': 0}
2013-01-17 14:39:38,073 - simulation - INFO - Parametes from file and command line: {u'tau': 1, u'params_dir': u'params', 'params_file': 'params.json', 'datetime': '2013-Jan-17_14-39-38-063000', u'envch_rate': 0.01, u'pop_size': 100000, 'console': True, u'log_ext': u'.log', 'envch_start': False, u'ticks': 3, u'tick_interval': 10, u'log_dir': u'log', u'rb': False, u'stats_interval': 1, u'pi': 0, u'job_name': u'test', u'output_dir': u'output', u'phi': 0, u'ser_ext': u'.ser', 'debug': True, u'ser_dir': u'serialization', u'rho': 1, u'output_ext': u'.csv', u'mu': 0.003, u's': 0.01, u'r': 6e-05, u'num_loci': 1000, u'params_ext': u'.json', u'envch_str': 0}
2013-01-17 14:39:38,073 - simulation - INFO - Parametes from file and command line: {u'tau': 1, u'params_dir': u'params', 'params_file': 'params.json', 'datetime': '2013-Jan-17_14-39-38-063000', u'envch_rate': 0.01, u'pop_size': 100000, 'console': True, u'log_ext': u'.log', 'envch_start': False, u'ticks': 3, u'tick_interval': 10, u'log_dir': u'log', u'rb': False, u'stats_interval': 1, u'pi': 0, u'job_name': u'test', u'output_dir': u'output', u'phi': 0, u'ser_ext': u'.ser', 'debug': True, u'ser_dir': u'serialization', u'rho': 1, u'output_ext': u'.csv', u'mu': 0.003, u's': 0.01, u'r': 6e-05, u'num_loci': 1000, u'params_ext': u'.json', u'envch_str': 0}
2013-01-17 14:39:38,075 - simulation - INFO - Parameters saved to file params\test\test_2013-Jan-17_14-39-38-063000.json
2013-01-17 14:39:38,075 - simulation - INFO - Parameters saved to file params\test\test_2013-Jan-17_14-39-38-063000.json
2013-01-17 14:39:38,075 - simulation - INFO - Parameters saved to file params\test\test_2013-Jan-17_14-39-38-063000.json
2013-01-17 14:39:38,078 - simulation - INFO - Saving temporary output to output\test\tmp\test_2013-Jan-17_14-39-38-063000.csv.gz
2013-01-17 14:39:38,078 - simulation - INFO - Saving temporary output to output\test\tmp\test_2013-Jan-17_14-39-38-063000.csv.gz
2013-01-17 14:39:38,078 - simulation - INFO - Saving temporary output to output\test\tmp\test_2013-Jan-17_14-39-38-063000.csv.gz
2013-01-17 14:39:38,082 - simulation - INFO - Starting simulation with 3 ticks
2013-01-17 14:39:38,082 - simulation - INFO - Starting simulation with 3 ticks
2013-01-17 14:39:38,082 - simulation - INFO - Starting simulation with 3 ticks
2013-01-17 14:39:38,108 - simulation - DEBUG - Tick 0
2013-01-17 14:39:38,108 - simulation - DEBUG - Tick 0
2013-01-17 14:39:38,108 - simulation - DEBUG - Tick 0
2013-01-17 14:39:38,157 - simulation - INFO - Simulation finished, 3 ticks, time elapsed 0.080 seconds
2013-01-17 14:39:38,157 - simulation - INFO - Simulation finished, 3 ticks, time elapsed 0.080 seconds
2013-01-17 14:39:38,157 - simulation - INFO - Simulation finished, 3 ticks, time elapsed 0.080 seconds
2013-01-17 14:39:38,168 - simulation - INFO - Serialized population to serialization\test\test_2013-Jan-17_14-39-38-063000.ser.gz
2013-01-17 14:39:38,168 - simulation - INFO - Serialized population to serialization\test\test_2013-Jan-17_14-39-38-063000.ser.gz
2013-01-17 14:39:38,168 - simulation - INFO - Serialized population to serialization\test\test_2013-Jan-17_14-39-38-063000.ser.gz
2013-01-17 14:39:38,173 - simulation - INFO - Saved output to output\test\test_2013-Jan-17_14-39-38-063000.csv.gz
2013-01-17 14:39:38,173 - simulation - INFO - Saved output to output\test\test_2013-Jan-17_14-39-38-063000.csv.gz
2013-01-17 14:39:38,173 - simulation - INFO - Saved output to output\test\test_2013-Jan-17_14-39-38-063000.csv.gz

In [191]:

nums=model.genomes_to_nums(geno,1000)
nums

Out[191]:

array([[ 71, 335, 729, 733, 739, 801, 929, 965],
       [ 71, 335, 729, 733, 739, 801, 929, 965]], dtype=int64)

In [193]:

pd.Series([ str(n) for n in nums])

Out[193]:

0    [ 71 335 729 733 739 801 929 965]
1    [ 71 335 729 733 739 801 929 965]

In [195]:

pd.Series([i for i in range(len(nums))])

Out[195]:

0    0
1    1

In [196]:

df = pd.DataFrame(data = { "genome":pd.Series([ str(n) for n in nums]), "tick":pd.Series([i for i in range(len(nums))])})

In [197]:

df

Out[197]:

	genome	tick
0	[ 71 335 729 733 739 801 929 965]	0
1	[ 71 335 729 733 739 801 929 965]	1

In [200]:

df.to_csv("tmp.csv",header=True,index_label="index")

The problem is with (v1==v2).all(), if one of the arrays is empty, it gives True. So I need to use array_equal:

In [205]:

(np.array([189])==np.array([])).all()

Out[205]:

True

In [206]:

np.array_equal(np.array([189]),np.array([]))

Out[206]:

False

In [209]:

np.array_equal.__module__

Out[209]:

'numpy.core.numeric'

But I checked array_euqal code and it checks all the elements, instead of aborting when it finds an element which is not equal. So I'm looking at my own implementation:

In [236]:

g = model.create_target_genome(1000)

In [220]:

def arr_eq(a1,a2):
    try:
        a1, a2 = asarray(a1), asarray(a2)
    except:
        return False
    if a1.shape != a2.shape:
        return False
    for i in range(a1.shape[0]):
        if a1[i] != a2[i]:
            return False
    return True

In [225]:

%load_ext cythonmagic

In [244]:

%%cython
import numpy as np
cimport numpy as np
cimport cython

@cython.boundscheck(False)
def arr_eq_cython(np.ndarray[int, ndim=1] a1, np.ndarray[int, ndim=1] a2):
    try:
        a1, a2 = np.asarray(a1), np.asarray(a2)
    except:
        return False
    if a1.shape!=a2.shape:
        return False
    cdef Py_ssize_t i
    for i in range(a1.shape[0]):
        if a1[i] != a2[i]:
            return False
    return True

In [245]:

np.array_equal(g,g),arr_eq(g,g),arr_eq_cython(g,g)

Out[245]:

(True, True, True)

In [246]:

g1 = g.copy()
g1[randint(0,1000)] = 1
np.array_equal(g1,g),arr_eq(g1,g),arr_eq_cython(g1,g)

Out[246]:

(False, False, False)

In [247]:

%timeit np.array_equal(g,g)
%timeit arr_eq(g,g)
%timeit arr_eq_cython(g,g)

100000 loops, best of 3: 15 us per loop
1000 loops, best of 3: 494 us per loop
100000 loops, best of 3: 8.59 us per loop

In [253]:

g1 = g.copy()
r = randint(0,1000)
print r
g1[r] = 1
%timeit np.array_equal(g1,g)
%timeit arr_eq(g1,g)
%timeit arr_eq_cython(g1,g)

457
100000 loops, best of 3: 14.1 us per loop
1000 loops, best of 3: 248 us per loop
100000 loops, best of 3: 6.68 us per loop

In [255]:

g.dtype

Out[255]:

dtype('int32')

In [ ]:

9/12/12¶

testing serialization¶

Implementing recombination¶

4/1/2013¶

Pandas and Gzip¶

Modifiers¶

Time methods of taking part of the genomes matrix¶

Testing the modifiers change¶

Creating mutation rates with modifiers¶

Assertion bug¶

5/1/13¶

Invasion¶

17/1/2013¶

Implementing genome_to_num with modifiers¶

Time methods of taking part of the `genomes` matrix¶

Implementing `genome_to_num` with modifiers¶