#!/usr/bin/env python
# coding: utf-8

# Supporting material for
# # The probability of improvement in Fisher's geometric model: a probabilistic approach
# ## by [Yoav Ram](http://www.yoavram.com) and [Lilach Hadany](https://sites.google.com/site/hadanylab/people/lilach-hadany)
# 
# an article published in [_Theoretical Population Biology_](http://www.journals.elsevier.com/theoretical-population-biology/).
# 
# This [IPython notebook](http://ipython.org/) is provided for reproduction of Figure 2 of the article. It can be viewed by copying its URL to [nbviewer](http://nbviewer.ipython.org) and it can be run by opening it in [binder](http://mybinder.org/repo/yoavram/FGMProb). The notebook also contains functions for deterministic and stochastic calculation of different quantities in FGM.

# ## Setting up
# 
# We start by loading required modules and extensions:

# In[1]:


# This will probably produce a warning. That's OK.
get_ipython().run_line_magic('pylab', 'inline')


# In[2]:


import scipy.stats as st
import scipy.special as sp
from scipy.integrate import quad as integrate
rcParams['figure.figsize'] = 8,6


# In[5]:


# http://nbviewer.ipython.org/github/rossant/ipycache/blob/master/examples/example.ipynb
get_ipython().system('pip install ipycache')


# In[2]:


get_ipython().run_line_magic('load_ext', 'ipycache')


# Define model parameters:

# In[3]:


d = 1.0
r = 0.1
n = 10
A = zeros(n)
A[0] = d/2
assert allclose(norm(A), d/2)
reps = 1000


# `draw` is used to draw a random mutation in Fisher's geometric model, i.e., a random vector of magnitude `r`.

# In[4]:


def draw(d,r,n):
    Z = normal(0,1,n)
    X = r * Z / norm(Z)
    assert allclose(r, norm(X))
    return X


# In[5]:


X = draw(d,r,n)
print X[0]/r < -r/d
print norm(X + A) < norm(A)


# In[6]:


Xs = array([draw(d,r,n) for _ in range(reps)])
assert [X[0]/r < -r/d for X in Xs] == [norm(X + A) < norm(A) for X in Xs]


# ### Probability of improvement
# 
# We with the basic result, first presented by Fisher, of the probability of improvement in FGM.
# 
# We estimate this probability by drawing many mutations and calculating the fraction of mutations that are beneficial.

# In[7]:


def simulate_pben(d,r,n):
    A = zeros(n)
    A[0] = d/2
    assert allclose(norm(A), d/2)
    Xs = array([draw(d,r,n) for _ in range(reps)])
    ps = [(norm(X + A) < norm(A)) for X in Xs]
    mean_p = mean(ps)
    std_p = std(ps, ddof=1)/sqrt(float(reps))
    return mean_p, std_p
simulate_pben(d,r,n)


# This is $p_n^*$ from our article, first shown in (Kimura 1983) (see main text for references):

# In[8]:


def kimura_pben(d,r,n): # Kimura
    return 0.5 * sp.betainc((n-1)/2., 0.5, 1-r**2/d**2)
kimura_pben(d,r,n)


# This is Fisher's original result from (Fisher 1930):

# In[9]:


def fisher_pben(d,r,n):
    return 1-st.norm.cdf(sqrt(n)*r/d)
fisher_pben(d,r,n)


# This is $p^{\#}_{n}$, from (Hartl & Taubes 1996; Rice 1990):

# In[10]:


def hartl_pben(d,r,n): # Hartl & Taubes 1996, eq. 1
    numer = integrate(lambda x: sin(x)**(n-2), 0, arccos(r/d))[0]
    denom = integrate(lambda x: sin(x)**(n-2), 0, pi)[0]
    return numer/denom
hartl_pben(d,r,n)


# We now do the calculations and plot a figure, which is not used in the main text:

# In[11]:


get_ipython().run_cell_magic('cache', 'pben_emp.pkl nrange pben_emp', 'nrange = range(2,102,5)\npben_emp = array([simulate_pben(d,r,n) for n in nrange])\n')


# In[12]:


pben_kimura = [kimura_pben(d,r,n) for n in nrange]
pben_fisher = [fisher_pben(d,r,n) for n in nrange]
pben_hartl = [hartl_pben(d,r,n) for n in nrange]
n = 10


# In[13]:


errorbar(nrange, pben_emp[:,0], yerr=pben_emp[:,1], fmt='.k', capsize=10, label='Empricial')
plot(nrange, pben_kimura, '-k', lw=2, label='Theoretical')
plot(nrange, pben_fisher, '--r', label='Fisher')
plot(nrange, pben_hartl, '-b', label='Hartl')
xlim(-5,105)
xticks([1]+range(20,101,20))
yticks(arange(0.15,0.5,0.1))
xlabel("$n$", fontsize=16)
ylabel(r"$P(|A+X|<|A|)$", fontsize=16)
title("Probability of a beneficial mutation")
legend(fontsize=16);


# ### Expected improvement towards optimum
# 
# Next, we evaluate the expected improvement towards the optimum in FGM. Functions denoted by `TPB` refer to equations derived in the main text.

# In[14]:


def simulate_X1(d,r,n):
    A = zeros(n)
    A[0] = d/2
    assert allclose(norm(A), d/2)
    p = kimura_pben(d,r,n)
    Xs = array([draw(d,r,n) for _ in range(int(reps/p))])
    X1 = [abs(X[0]) for X in Xs if (norm(A + X) < norm(A))]
    mean_X1 = mean(X1)
    std_X1 = std(X1, ddof=1)/sqrt(len(X1))
    return mean_X1, std_X1
simulate_X1(d,r,n)


# In[15]:


def TPB_X1(d,r,n):
    r2d2 = (r**2) / (d**2)
    n_min_1_by_2 = 0.5 * (n-1)
    numer = 2 * r * (1 - r2d2)**n_min_1_by_2
    denom = (n - 1) * sp.betainc(n_min_1_by_2, 0.5, 1 - r2d2) * sp.beta(n_min_1_by_2, 0.5)
    return numer/denom
TPB_X1(d,r,n)


# In[16]:


get_ipython().run_cell_magic('cache', 'X1_emp.pkl X1_emp', 'X1_emp = array([simulate_X1(d,r,n) for n in nrange])\n')


# In[17]:


X1_TPB = array([TPB_X1(d,r,n) for n in nrange])
n = 10


# In[18]:


errorbar(nrange, X1_emp[:,0], yerr=X1_emp[:,1], fmt='.k', capsize=10, label='empricial')
plot(nrange, X1_TPB, '--k', lw=2, label='Theoretical')
xlim(-5,105)
xticks([1]+range(20,101,20))
xlabel("$n$", fontsize=16)
ylabel(r"$E[|X_1| | |A+X|<|A|]$", fontsize=16)
title("Expected effect towards optimum in beneficial mutations")
legend(fontsize=16);


# ### Expected $\Delta d$
# 
# The following quantity does not appear at all in the meain text, but rather in (Hartl and Taubes 1996). Here $\Delta$ is the change in phenotypic distance to the optimal phenotype.

# In[19]:


d,r,n = 1., 0.1, 10
def simulate_deltad(d,r,n):
    A = zeros(n)
    A[0] = d/2
    assert allclose(norm(A), d/2)
    p = kimura_pben(d,r,n)
    Xs = array([draw(d,r,n) for _ in range(int(reps/p))])
    deltad = [abs(norm(A+X) - norm(A)) for X in Xs if (norm(A + X) < norm(A))]
    mean_deltad = mean(deltad)
    std_deltad = std(deltad, ddof=1)/sqrt(len(deltad))
    return mean_deltad, std_deltad
simulate_deltad(d,r,n)


# In[20]:


def hartl_deltad(d,r,n): # Hartl & Taubes 1996, eq. 3
    delta = lambda theta: d/2 - sqrt(0.25 * d**2 + r**2 - r * d * cos(theta))
    numer = integrate(lambda x: delta(x) * sin(x)**(n-2), 0, arccos(r/d))[0]
    denom = integrate(lambda x: sin(x)**(n-2), 0, arccos(r/d))[0]
    return numer/denom
hartl_deltad(d,r,n)


# In[21]:


get_ipython().run_cell_magic('cache', 'deltad_emp.pkl deltad_emp', 'deltad_emp = array([simulate_deltad(d,r,n) for n in nrange])\n')


# In[22]:


deltad_hartl = array([hartl_deltad(d,r,n) for n in nrange])
n = 10


# In[23]:


errorbar(nrange, deltad_emp[:,0], yerr=deltad_emp[:,1], fmt='.k', capsize=10, label='empricial')
plot(nrange, deltad_hartl, '-b', label='Hartl')
xlim(-5,105)
#ylim(0.1, 0.5)
xticks([1]+range(20,101,20))
#yticks(arange(0.15,0.5,0.1))
xlabel("$n$", fontsize=16)
ylabel(r"$E[|A+X|-|A| | |A+X|<|A|]$", fontsize=16)
title("Expected change in distance in beneficial mutations")
legend(fontsize=16);


# ### Expected _s_
# 
# Here we analyze the expected change in fitness in beneficial mutations. Fitness is defined by:
# $$\omega(A) = 1 - ||A||^2$$.

# In[24]:


def fitness(A):
    return 1 - norm(A)**2
assert fitness([d/2,0,0]) == 1 - d**2/4
assert fitness(zeros(n)) == 1


# In[25]:


d,r,n = 1., 0.1, 10
def simulate_s(d,r,n):
    A = zeros(n)
    A[0] = d/2
    assert allclose(norm(A), d/2)
    fitness_A = fitness(A)
    p = kimura_pben(d,r,n)
    Xs = array([draw(d,r,n) for _ in range(int(reps/p))])
    s = array([fitness(X + A)/fitness_A - 1 for X in Xs])
    s = s[s > 0]
    mean_s = mean(s)
    std_s = std(s, ddof=1)/sqrt(len(s))
    return mean_s, std_s
simulate_s(d,r,n)


# This is equation (10) from the main text:

# In[26]:


def TPB_s(d,r,n):
    r2d2 = (r**2) / (d**2)    
    n1by2 = 0.5 * (n-1)
    s = 2 * d * r * (1 - r2d2)**n1by2
    s /= (n - 1) * (1 - 0.25 * d**2) * sp.betainc(n1by2, 0.5, 1 - r2d2) * sp.beta(n1by2, 0.5)
    s -= r**2 / (1 - 0.25 * d**2)    
    return s
TPB_s(d,r,n)


# The same value, calculated from $\Delta$ (Hartl & Taubes 1996):

# In[27]:


def hartl_s(d,r,n):
    deltad = hartl_deltad(d,r,n)
    s = fitness(d/2 - deltad)/fitness(d/2) - 1
    return s
hartl_s(d,r,n)


# We plot the results, once for $n$, number of traits, and once for $r$, the mutation size ($d$ is fixed at 1).

# In[28]:


get_ipython().run_cell_magic('cache', 's_emp.pkl reps s_emp', 'reps = 500\ns_emp = array([simulate_s(d,r,n) for n in nrange])\n')


# In[29]:


s_TPB= array([TPB_s(d,r,n) for n in nrange])
s_hartl = array([hartl_s(d,r,n) for n in nrange])
n = 10


# In[30]:


errorbar(nrange, s_emp[:,0], yerr=s_emp[:,1], fmt='ok', capsize=10, label='Empricial')
plot(nrange, s_TPB, '--k', lw=2, label='Theoretical')
plot(nrange, s_hartl, '-b', label='Hartl')
xlim(-5,105)
ylim(0, 0.09)
xticks([1]+range(20,101,20))
yticks(arange(0.02,0.1,0.02))
xlabel("$n$", fontsize=16)
ylabel(r"$E[s | s>0]$", fontsize=16)
title("Expected selection coefficient in beneficial mutations")
legend(fontsize=16);


# In[31]:


n = 10
rrange = linspace(0.001,0.5,25)
s_TPB_r = array([TPB_s(d,r,n) for r in rrange])


# In[32]:


get_ipython().run_cell_magic('cache', 's_emp_r.pkl reps s_emp_r', 'reps = 500\ns_emp_r = array([simulate_s(d,r,n) for r in rrange])\n')


# In[33]:


errorbar(rrange, s_emp_r[:,0], yerr=s_emp_r[:,1], fmt='ok', capsize=10, label='Empricial')
plot(rrange, s_TPB_r, '--k', lw=2, label='Theoretical')
xlim(-0.01,0.51)
ylim(0, 0.09)
#xticks([1]+range(20,101,20))
yticks(arange(0.02,0.1,0.02))
xlabel("$r$", fontsize=16)
ylabel(r"$E[s | s>0]$", fontsize=16)
title("Expected selection coefficient in beneficial mutations")
legend(loc=2,fontsize=16);


# ### Fixation probability
# 
# The main result of this notebook, the probability that a mutation (a) is beneficial, and (b) avoids extinction.

# In[34]:


n = 10
d = 1.
r = 0.1


# This is a Wright-Fisher simulation. A single mutation appears in the population, and if it is beneficial, the evolutionary process proceeds until the mutation either reaches majority (`f > 0.5`) or goes to extinction (`f < 1/N`). $N$ is the variance effective population size. The simulation includes both selection:
# $$
# f' = f / \bar{\omega} \\\\
# \bar{\omega} = f + (1-f)(1-s),
# $$
# and drift:
# $$
# n \sim Bin(N,f) \\\\
# f'' = n/N.
# $$

# In[35]:


def simulation(d,r,n,N = 1e6):
    N = float(N)
    A = zeros(n)
    A[0] = d/2
    X = draw(d,r,n)
    s = fitness(A + X)/fitness(A) - 1
    if s < 0:
        return False    
    f = 1/N
    while True:
        assert 0 <= f <= 1
        if f < 1/N: return False
        elif f > 0.5: return True        
        f = f 
        f /= 1 - s * (1 - f)
        f = binomial(N,f)/N
simulation(d,r,n)


# This function estimates the fixation probability by running many simulations:

# In[36]:


def simulate_pfix(d,r,n,reps=1000):
    fixes = array([simulation(d,r,n) for _ in range(reps)])
    return mean(fixes), std(fixes) / sqrt(reps)
simulate_pfix(d,r,n)


# This is equation (6) from the main text:

# In[37]:


def TPB_pfix(d,r,n):
    s = TPB_s(d,r,n)
    p = kimura_pben(d,r,n)
    return 2*s*p
TPB_pfix(d,r,n)


# In[38]:


rrange = linspace(0.001,0.5,25)
pfix_TPB = array([TPB_pfix(d,r,n) for r in rrange])


# In[43]:


get_ipython().run_cell_magic('cache', 'pfix_emp.pkl pfix_emp reps', 'reps = 100000\npfix_emp = array([simulate_pfix(d,r,n,reps=reps) for r in rrange])\n')


# In[44]:


n = 10
r = 0.1


# Here we plot Figure 2, the fixation probability as a function of $r/d$:

# In[46]:


fig, ax = subplots(1, 1, figsize=(10,8))
ax.errorbar(rrange, pfix_emp[:,0], yerr=pfix_emp[:,1], fmt='.k', capsize=5, label='Empricial')
ax.plot(rrange, pfix_TPB, '-k', lw=2, label='Theoretical')
ax.set_xlabel("Relative mutation size - $r/d$", fontsize=24)
ax.set_ylabel(r"Fixation probability - $p_{fix}$", fontsize=24)
ax.set_xlim(-0.01, 0.51)
ax.set_xticks(arange(0, 0.51, 0.1))
ax.set_ylim(-0.001, 0.0311)
ax.set_yticks(arange(0, 0.031, 0.01))
ax.set_title("Fixation probability of beneficial mutations")

fig.tight_layout();


# In[47]:


ax.set_title('')
ax.legend().set_visible(False)
fig.savefig("fig2.png")
fig.savefig("fig2.pdf", dpi=300, papertype='a4')


# ## Fin
# 
# This IPython notebook was written by [Yoav Ram](http://www.yoavram.com) as supplementry material for a TPB article. 
# 
# The content is licensed under a CC-BY-SA 3.0 lincese.