#!/usr/bin/env python # coding: utf-8 # Supporting material for # # The probability of improvement in Fisher's geometric model: a probabilistic approach # ## by [Yoav Ram](http://www.yoavram.com) and [Lilach Hadany](https://sites.google.com/site/hadanylab/people/lilach-hadany) # # an article published in [_Theoretical Population Biology_](http://www.journals.elsevier.com/theoretical-population-biology/). # # This [IPython notebook](http://ipython.org/) is provided for reproduction of Figure 2 of the article. It can be viewed by copying its URL to [nbviewer](http://nbviewer.ipython.org) and it can be run by opening it in [binder](http://mybinder.org/repo/yoavram/FGMProb). The notebook also contains functions for deterministic and stochastic calculation of different quantities in FGM. # ## Setting up # # We start by loading required modules and extensions: # In[1]: # This will probably produce a warning. That's OK. get_ipython().run_line_magic('pylab', 'inline') # In[2]: import scipy.stats as st import scipy.special as sp from scipy.integrate import quad as integrate rcParams['figure.figsize'] = 8,6 # In[5]: # http://nbviewer.ipython.org/github/rossant/ipycache/blob/master/examples/example.ipynb get_ipython().system('pip install ipycache') # In[2]: get_ipython().run_line_magic('load_ext', 'ipycache') # Define model parameters: # In[3]: d = 1.0 r = 0.1 n = 10 A = zeros(n) A[0] = d/2 assert allclose(norm(A), d/2) reps = 1000 # `draw` is used to draw a random mutation in Fisher's geometric model, i.e., a random vector of magnitude `r`. # In[4]: def draw(d,r,n): Z = normal(0,1,n) X = r * Z / norm(Z) assert allclose(r, norm(X)) return X # In[5]: X = draw(d,r,n) print X[0]/r < -r/d print norm(X + A) < norm(A) # In[6]: Xs = array([draw(d,r,n) for _ in range(reps)]) assert [X[0]/r < -r/d for X in Xs] == [norm(X + A) < norm(A) for X in Xs] # ### Probability of improvement # # We with the basic result, first presented by Fisher, of the probability of improvement in FGM. # # We estimate this probability by drawing many mutations and calculating the fraction of mutations that are beneficial. # In[7]: def simulate_pben(d,r,n): A = zeros(n) A[0] = d/2 assert allclose(norm(A), d/2) Xs = array([draw(d,r,n) for _ in range(reps)]) ps = [(norm(X + A) < norm(A)) for X in Xs] mean_p = mean(ps) std_p = std(ps, ddof=1)/sqrt(float(reps)) return mean_p, std_p simulate_pben(d,r,n) # This is $p_n^*$ from our article, first shown in (Kimura 1983) (see main text for references): # In[8]: def kimura_pben(d,r,n): # Kimura return 0.5 * sp.betainc((n-1)/2., 0.5, 1-r**2/d**2) kimura_pben(d,r,n) # This is Fisher's original result from (Fisher 1930): # In[9]: def fisher_pben(d,r,n): return 1-st.norm.cdf(sqrt(n)*r/d) fisher_pben(d,r,n) # This is $p^{\#}_{n}$, from (Hartl & Taubes 1996; Rice 1990): # In[10]: def hartl_pben(d,r,n): # Hartl & Taubes 1996, eq. 1 numer = integrate(lambda x: sin(x)**(n-2), 0, arccos(r/d))[0] denom = integrate(lambda x: sin(x)**(n-2), 0, pi)[0] return numer/denom hartl_pben(d,r,n) # We now do the calculations and plot a figure, which is not used in the main text: # In[11]: get_ipython().run_cell_magic('cache', 'pben_emp.pkl nrange pben_emp', 'nrange = range(2,102,5)\npben_emp = array([simulate_pben(d,r,n) for n in nrange])\n') # In[12]: pben_kimura = [kimura_pben(d,r,n) for n in nrange] pben_fisher = [fisher_pben(d,r,n) for n in nrange] pben_hartl = [hartl_pben(d,r,n) for n in nrange] n = 10 # In[13]: errorbar(nrange, pben_emp[:,0], yerr=pben_emp[:,1], fmt='.k', capsize=10, label='Empricial') plot(nrange, pben_kimura, '-k', lw=2, label='Theoretical') plot(nrange, pben_fisher, '--r', label='Fisher') plot(nrange, pben_hartl, '-b', label='Hartl') xlim(-5,105) xticks([1]+range(20,101,20)) yticks(arange(0.15,0.5,0.1)) xlabel("$n$", fontsize=16) ylabel(r"$P(|A+X|<|A|)$", fontsize=16) title("Probability of a beneficial mutation") legend(fontsize=16); # ### Expected improvement towards optimum # # Next, we evaluate the expected improvement towards the optimum in FGM. Functions denoted by `TPB` refer to equations derived in the main text. # In[14]: def simulate_X1(d,r,n): A = zeros(n) A[0] = d/2 assert allclose(norm(A), d/2) p = kimura_pben(d,r,n) Xs = array([draw(d,r,n) for _ in range(int(reps/p))]) X1 = [abs(X[0]) for X in Xs if (norm(A + X) < norm(A))] mean_X1 = mean(X1) std_X1 = std(X1, ddof=1)/sqrt(len(X1)) return mean_X1, std_X1 simulate_X1(d,r,n) # In[15]: def TPB_X1(d,r,n): r2d2 = (r**2) / (d**2) n_min_1_by_2 = 0.5 * (n-1) numer = 2 * r * (1 - r2d2)**n_min_1_by_2 denom = (n - 1) * sp.betainc(n_min_1_by_2, 0.5, 1 - r2d2) * sp.beta(n_min_1_by_2, 0.5) return numer/denom TPB_X1(d,r,n) # In[16]: get_ipython().run_cell_magic('cache', 'X1_emp.pkl X1_emp', 'X1_emp = array([simulate_X1(d,r,n) for n in nrange])\n') # In[17]: X1_TPB = array([TPB_X1(d,r,n) for n in nrange]) n = 10 # In[18]: errorbar(nrange, X1_emp[:,0], yerr=X1_emp[:,1], fmt='.k', capsize=10, label='empricial') plot(nrange, X1_TPB, '--k', lw=2, label='Theoretical') xlim(-5,105) xticks([1]+range(20,101,20)) xlabel("$n$", fontsize=16) ylabel(r"$E[|X_1| | |A+X|<|A|]$", fontsize=16) title("Expected effect towards optimum in beneficial mutations") legend(fontsize=16); # ### Expected $\Delta d$ # # The following quantity does not appear at all in the meain text, but rather in (Hartl and Taubes 1996). Here $\Delta$ is the change in phenotypic distance to the optimal phenotype. # In[19]: d,r,n = 1., 0.1, 10 def simulate_deltad(d,r,n): A = zeros(n) A[0] = d/2 assert allclose(norm(A), d/2) p = kimura_pben(d,r,n) Xs = array([draw(d,r,n) for _ in range(int(reps/p))]) deltad = [abs(norm(A+X) - norm(A)) for X in Xs if (norm(A + X) < norm(A))] mean_deltad = mean(deltad) std_deltad = std(deltad, ddof=1)/sqrt(len(deltad)) return mean_deltad, std_deltad simulate_deltad(d,r,n) # In[20]: def hartl_deltad(d,r,n): # Hartl & Taubes 1996, eq. 3 delta = lambda theta: d/2 - sqrt(0.25 * d**2 + r**2 - r * d * cos(theta)) numer = integrate(lambda x: delta(x) * sin(x)**(n-2), 0, arccos(r/d))[0] denom = integrate(lambda x: sin(x)**(n-2), 0, arccos(r/d))[0] return numer/denom hartl_deltad(d,r,n) # In[21]: get_ipython().run_cell_magic('cache', 'deltad_emp.pkl deltad_emp', 'deltad_emp = array([simulate_deltad(d,r,n) for n in nrange])\n') # In[22]: deltad_hartl = array([hartl_deltad(d,r,n) for n in nrange]) n = 10 # In[23]: errorbar(nrange, deltad_emp[:,0], yerr=deltad_emp[:,1], fmt='.k', capsize=10, label='empricial') plot(nrange, deltad_hartl, '-b', label='Hartl') xlim(-5,105) #ylim(0.1, 0.5) xticks([1]+range(20,101,20)) #yticks(arange(0.15,0.5,0.1)) xlabel("$n$", fontsize=16) ylabel(r"$E[|A+X|-|A| | |A+X|<|A|]$", fontsize=16) title("Expected change in distance in beneficial mutations") legend(fontsize=16); # ### Expected _s_ # # Here we analyze the expected change in fitness in beneficial mutations. Fitness is defined by: # $$\omega(A) = 1 - ||A||^2$$. # In[24]: def fitness(A): return 1 - norm(A)**2 assert fitness([d/2,0,0]) == 1 - d**2/4 assert fitness(zeros(n)) == 1 # In[25]: d,r,n = 1., 0.1, 10 def simulate_s(d,r,n): A = zeros(n) A[0] = d/2 assert allclose(norm(A), d/2) fitness_A = fitness(A) p = kimura_pben(d,r,n) Xs = array([draw(d,r,n) for _ in range(int(reps/p))]) s = array([fitness(X + A)/fitness_A - 1 for X in Xs]) s = s[s > 0] mean_s = mean(s) std_s = std(s, ddof=1)/sqrt(len(s)) return mean_s, std_s simulate_s(d,r,n) # This is equation (10) from the main text: # In[26]: def TPB_s(d,r,n): r2d2 = (r**2) / (d**2) n1by2 = 0.5 * (n-1) s = 2 * d * r * (1 - r2d2)**n1by2 s /= (n - 1) * (1 - 0.25 * d**2) * sp.betainc(n1by2, 0.5, 1 - r2d2) * sp.beta(n1by2, 0.5) s -= r**2 / (1 - 0.25 * d**2) return s TPB_s(d,r,n) # The same value, calculated from $\Delta$ (Hartl & Taubes 1996): # In[27]: def hartl_s(d,r,n): deltad = hartl_deltad(d,r,n) s = fitness(d/2 - deltad)/fitness(d/2) - 1 return s hartl_s(d,r,n) # We plot the results, once for $n$, number of traits, and once for $r$, the mutation size ($d$ is fixed at 1). # In[28]: get_ipython().run_cell_magic('cache', 's_emp.pkl reps s_emp', 'reps = 500\ns_emp = array([simulate_s(d,r,n) for n in nrange])\n') # In[29]: s_TPB= array([TPB_s(d,r,n) for n in nrange]) s_hartl = array([hartl_s(d,r,n) for n in nrange]) n = 10 # In[30]: errorbar(nrange, s_emp[:,0], yerr=s_emp[:,1], fmt='ok', capsize=10, label='Empricial') plot(nrange, s_TPB, '--k', lw=2, label='Theoretical') plot(nrange, s_hartl, '-b', label='Hartl') xlim(-5,105) ylim(0, 0.09) xticks([1]+range(20,101,20)) yticks(arange(0.02,0.1,0.02)) xlabel("$n$", fontsize=16) ylabel(r"$E[s | s>0]$", fontsize=16) title("Expected selection coefficient in beneficial mutations") legend(fontsize=16); # In[31]: n = 10 rrange = linspace(0.001,0.5,25) s_TPB_r = array([TPB_s(d,r,n) for r in rrange]) # In[32]: get_ipython().run_cell_magic('cache', 's_emp_r.pkl reps s_emp_r', 'reps = 500\ns_emp_r = array([simulate_s(d,r,n) for r in rrange])\n') # In[33]: errorbar(rrange, s_emp_r[:,0], yerr=s_emp_r[:,1], fmt='ok', capsize=10, label='Empricial') plot(rrange, s_TPB_r, '--k', lw=2, label='Theoretical') xlim(-0.01,0.51) ylim(0, 0.09) #xticks([1]+range(20,101,20)) yticks(arange(0.02,0.1,0.02)) xlabel("$r$", fontsize=16) ylabel(r"$E[s | s>0]$", fontsize=16) title("Expected selection coefficient in beneficial mutations") legend(loc=2,fontsize=16); # ### Fixation probability # # The main result of this notebook, the probability that a mutation (a) is beneficial, and (b) avoids extinction. # In[34]: n = 10 d = 1. r = 0.1 # This is a Wright-Fisher simulation. A single mutation appears in the population, and if it is beneficial, the evolutionary process proceeds until the mutation either reaches majority (`f > 0.5`) or goes to extinction (`f < 1/N`). $N$ is the variance effective population size. The simulation includes both selection: # $$ # f' = f / \bar{\omega} \\\\ # \bar{\omega} = f + (1-f)(1-s), # $$ # and drift: # $$ # n \sim Bin(N,f) \\\\ # f'' = n/N. # $$ # In[35]: def simulation(d,r,n,N = 1e6): N = float(N) A = zeros(n) A[0] = d/2 X = draw(d,r,n) s = fitness(A + X)/fitness(A) - 1 if s < 0: return False f = 1/N while True: assert 0 <= f <= 1 if f < 1/N: return False elif f > 0.5: return True f = f f /= 1 - s * (1 - f) f = binomial(N,f)/N simulation(d,r,n) # This function estimates the fixation probability by running many simulations: # In[36]: def simulate_pfix(d,r,n,reps=1000): fixes = array([simulation(d,r,n) for _ in range(reps)]) return mean(fixes), std(fixes) / sqrt(reps) simulate_pfix(d,r,n) # This is equation (6) from the main text: # In[37]: def TPB_pfix(d,r,n): s = TPB_s(d,r,n) p = kimura_pben(d,r,n) return 2*s*p TPB_pfix(d,r,n) # In[38]: rrange = linspace(0.001,0.5,25) pfix_TPB = array([TPB_pfix(d,r,n) for r in rrange]) # In[43]: get_ipython().run_cell_magic('cache', 'pfix_emp.pkl pfix_emp reps', 'reps = 100000\npfix_emp = array([simulate_pfix(d,r,n,reps=reps) for r in rrange])\n') # In[44]: n = 10 r = 0.1 # Here we plot Figure 2, the fixation probability as a function of $r/d$: # In[46]: fig, ax = subplots(1, 1, figsize=(10,8)) ax.errorbar(rrange, pfix_emp[:,0], yerr=pfix_emp[:,1], fmt='.k', capsize=5, label='Empricial') ax.plot(rrange, pfix_TPB, '-k', lw=2, label='Theoretical') ax.set_xlabel("Relative mutation size - $r/d$", fontsize=24) ax.set_ylabel(r"Fixation probability - $p_{fix}$", fontsize=24) ax.set_xlim(-0.01, 0.51) ax.set_xticks(arange(0, 0.51, 0.1)) ax.set_ylim(-0.001, 0.0311) ax.set_yticks(arange(0, 0.031, 0.01)) ax.set_title("Fixation probability of beneficial mutations") fig.tight_layout(); # In[47]: ax.set_title('') ax.legend().set_visible(False) fig.savefig("fig2.png") fig.savefig("fig2.pdf", dpi=300, papertype='a4') # ## Fin # # This IPython notebook was written by [Yoav Ram](http://www.yoavram.com) as supplementry material for a TPB article. # # The content is licensed under a CC-BY-SA 3.0 lincese.