#!/usr/bin/env python
# coding: utf-8

# # Reproduction of arXiv:1506.04175
# 
# In this notebook, we'll go through [Burke et al. (2015)](http://arxiv.org/abs/1506.04175) and reproduce their findings.

# In[1]:


from __future__ import division, print_function

get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', 'InlineBackend.figure_format = "retina"')

from matplotlib import rcParams
rcParams["savefig.dpi"] = 100
rcParams["font.size"] = 20


# In[17]:


import emcee
import numpy as np
import pandas as pd
from itertools import izip
from scipy.stats import gamma
import matplotlib.pyplot as pl
from scipy.optimize import minimize


# In[3]:


# These helper functions let us download and cache
# the catalogs from the Exoplanet Archive.
from exopop2.data import get_kois, get_stlr


# ## Datasets
# 
# To start, we'll download the stellar target catalog and the relevant KOI list.

# In[4]:


stlr = get_stlr()

# Select G and K dwarfs.
m = (4200 <= stlr.teff) & (stlr.teff <= 6100)
m &= stlr.radius <= 1.15

# Remove targets with missing mass estimates.
m &= np.isfinite(stlr.mass)

# Only include stars with sufficient data coverage.
m &= stlr.dataspan > 365.25*2.
m &= stlr.dutycycle > 0.6
m &= stlr.rrmscdpp07p5 < 1000 

stlr = stlr[m]

print("Selected {0} targets after cuts".format(len(stlr)))


# In[5]:


pl.plot(stlr.teff, stlr.logg, ".k", ms=3, alpha=0.5)
pl.xlim(6100, 4200)
pl.ylim(pl.gca().get_ylim()[::-1])  # because astronomy...
pl.ylabel("$\log g$");
pl.xlabel("$T_\mathrm{eff}$");


# In[6]:


period_rng = (50, 300)
rp_rng = (0.75, 2.5)


# In[7]:


kois = get_kois()

# Join on the stellar list.
kois = pd.merge(stlr[["kepid"]], kois, on="kepid")

# Only select the KOIs in the relevant part of parameter space.
m = (kois.koi_disposition == "CANDIDATE") | (kois.koi_disposition == "CONFIRMED")
m &= (period_rng[0] <= kois.koi_period) & (kois.koi_period <= period_rng[1])
m &= (rp_rng[0] <= kois.koi_prad) & (kois.koi_prad <= rp_rng[1])

kois = kois[m]

print("Selected {0} KOIs after cuts".format(len(kois)))


# In[8]:


pl.plot(kois.koi_period, kois.koi_prad, "og", mec="none", ms=4)
pl.xlim(period_rng)
pl.ylim(rp_rng)
pl.xlabel("period [days]")
pl.ylabel("$R_p \, [R_\oplus]$");


# In[9]:


def get_duration(period, aor, e):
    return 4 * period * np.sqrt(1 - e**2) / aor

def get_a(period, mstar, Go4pi=2945.4625385377644/(4*np.pi*np.pi)):
    return (Go4pi*period*period*mstar) ** (1./3)

def get_delta(k, c=1.0874, s=1.0187):
    delta_max = k*k * (c + s*k)
    return 0.84 * delta_max

cdpp_cols = [k for k in stlr.keys() if k.startswith("rrmscdpp")]
cdpp_vals = np.array([k[-4:].replace("p", ".") for k in cdpp_cols], dtype=float)
def get_snr(star, tau, k):
    cdpp = np.array(star[cdpp_cols], dtype=float)
    sigma = np.interp(tau, cdpp_vals, cdpp)
    return get_delta(k) * 1e6 / sigma

def get_mes(star, period, rp, tau, re=0.009171):
    k = rp * re / star.radius
    snr = get_snr(star, tau, k)
    ntrn = star.dataspan * star.dutycycle / period 
    return snr * np.sqrt(ntrn)

pgam = gamma(4.65, loc=0., scale=0.98)
mesthres_cols = [k for k in stlr.keys() if k.startswith("mesthres")]
mesthres_vals = np.array([k[-4:].replace("p", ".") for k in mesthres_cols],
                         dtype=float)
def get_pdet(star, aor, period, rp, e):
    tau = get_duration(period, aor, e)
    mes = get_mes(star, period, rp, tau)
    mest = np.interp(tau, mesthres_vals,
                     np.array(star[mesthres_cols],
                              dtype=float))
    x = mes - 4.1 - (mest - 7.1)
    return pgam.cdf(x)

def get_pwin(star, period):
    M = star.dataspan / period
    f = star.dutycycle
    omf = 1.0 - f
    pw = 1 - omf**M - M*f*omf**(M-1) - 0.5*M*(M-1)*f*f*omf**(M-2)
    msk = (pw >= 0.0) * (period < star.dataspan)
    return pw * msk

def get_pgeom(aor, e):
    return 1. / (aor * (1 - e*e)) * (aor > 1.0) + (aor < 1.0)

def get_completeness(star, period, rp, e, with_geom=True):
    aor = get_a(period, star.mass) / star.radius
    pdet = get_pdet(star, aor, period, rp, e)
    pwin = get_pwin(star, period)
    if not with_geom:
        return pdet * pwin
    pgeom = get_pgeom(aor, e)
    return pdet * pwin * pgeom


# In[11]:


star = stlr[stlr.kepid == 10593626].iloc[0]

period = np.linspace(10, 700, 500)
rp = np.linspace(0.75, 2.5, 421)
X, Y = np.meshgrid(period, rp, indexing="ij")
Z = get_completeness(star, X, Y, 0.0, with_geom=False)

c = pl.contour(X, Y, Z, colors="k")
pl.clabel(c, fontsize=12, inline=1, fmt="%.2f") 
pl.xlabel("period [days]")
pl.ylabel("$R_p \, [R_\oplus]$");


# In[12]:


# Compute the mean completeness in the range.
period = np.linspace(period_rng[0], period_rng[1], 50)
rp = np.linspace(rp_rng[0], rp_rng[1], 57)
period_grid, rp_grid = np.meshgrid(period, rp, indexing="ij")
comp = np.zeros_like(period_grid)

for _, star in stlr.iterrows():
    comp += get_completeness(star, period_grid, rp_grid, 0.0, with_geom=True)

comp /= len(stlr)


# In[13]:


pl.plot(kois.koi_period, kois.koi_prad, "ok", mec="none", ms=4)

pl.pcolor(period_grid, rp_grid, comp, cmap="BuGn")
c = pl.contour(period_grid, rp_grid, comp, [0.0003, 0.001, 0.003, 0.01],
               colors="k", alpha=0.8)
pl.clabel(c, fontsize=12, inline=1, fmt="%.4f") 
pl.xlabel("period [days]")
pl.ylabel("$R_p \, [R_\oplus]$");


# In[14]:


def population_model(theta, period, rp):
    lnf0, beta, alpha = theta
    v = np.exp(lnf0) * np.ones_like(period)
    for x, rng, n in izip((period, rp), (period_rng, rp_rng),
                          (beta, alpha)):
        n1 = n + 1
        v *= x**n*n1 / (rng[1]**n1-rng[0]**n1)
    return v

koi_periods = np.array(kois.koi_period)
koi_rps = np.array(kois.koi_prad)
vol = np.diff(period_grid, axis=0)[:, :-1] * np.diff(rp_grid, axis=1)[:-1, :]
def lnlike(theta):
    pop = population_model(theta, period_grid, rp_grid)
    pop *= comp
    pop = 0.5 * (pop[:-1, :-1] + pop[1:, 1:])
    norm = np.sum(pop * vol)
    ll = np.sum(np.log(population_model(theta, koi_periods, koi_rps))) - norm
    return ll if np.isfinite(ll) else -np.inf

def lnprob(theta):
    return lnlike(theta)

def nll(theta):
    ll = lnlike(theta)
    return -ll if np.isfinite(ll) else 1e15


# In[15]:


theta_0 = np.array([np.log(0.75*len(stlr)), -0.53218, -0.5])
r = minimize(nll, theta_0, method="L-BFGS-B",
             bounds=[(None, None), (None, None), (None, None)])


# In[18]:


ndim, nwalkers = len(r.x), 16
pos = [r.x + 1e-5 * np.random.randn(ndim) for i in range(nwalkers)]
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)


# In[37]:


pos, _, _ = sampler.run_mcmc(pos, 200)
sampler.reset()
pos, _, _ = sampler.run_mcmc(pos, 2000)


# In[78]:


pl.plot(sampler.chain[:, :, 2].T, "k", alpha=0.5);


# In[79]:


samps = sampler.chain[:, ::5, :]
samps = samps.reshape((-1, samps.shape[-1]))
pop = np.empty((len(samps), period_grid.shape[0], period_grid.shape[1]))
gamma_earth = np.empty((len(samps)))
for i, p in enumerate(samps):
    pop[i] = population_model(p, period_grid, rp_grid)
    gamma_earth[i] = population_model(p, 365.25, 1.0)


# In[43]:


pop_comp = pop * comp[None, :, :]


# In[65]:


dx = 0.25
x = np.arange(rp_rng[0], rp_rng[1] + dx, dx)

pop_rp = 0.5 * (pop_comp[:, 1:] + pop_comp[:, :-1])
pop_rp = np.sum(pop_rp * np.diff(period)[None, :, None], axis=1)
a, b, c, d, e = np.percentile(pop_rp * dx, [2.5, 16, 50, 84, 97.5], axis=0)

pl.fill_between(rp, a, e, color="k", alpha=0.1, edgecolor="none")
pl.fill_between(rp, b, d, color="k", alpha=0.3, edgecolor="none")
pl.plot(rp, c, "k", lw=2)

n, _ = np.histogram(koi_rps, x)
pl.errorbar(0.5*(x[:-1]+x[1:]), n, yerr=np.sqrt(n), fmt=".k", capsize=0)
pl.xlim(rp_rng[0], rp_rng[1])
pl.xlabel("$R_p\,[R_\oplus]$")
pl.ylabel("number of planets");


# In[74]:


dx = 31.25
x = np.arange(period_rng[0], period_rng[1] + dx, dx)

pop_per = 0.5 * (pop_comp[:, :, 1:] + pop_comp[:, :, :-1])
pop_per = np.sum(pop_per * np.diff(rp)[None, None, :], axis=2)

a, b, c, d, e = np.percentile(pop_per * dx, [2.5, 16, 50, 84, 97.5], axis=0)

pl.fill_between(period, a, e, color="k", alpha=0.1, edgecolor="none")
pl.fill_between(period, b, d, color="k", alpha=0.2, edgecolor="none")
pl.plot(period, c, "k", lw=1)

n, _ = np.histogram(koi_periods, x)
pl.errorbar(0.5*(x[:-1]+x[1:]), n, yerr=np.sqrt(n), fmt=".k", ms=8, capsize=0)
pl.xlim(period_rng[0], period_rng[1])
pl.ylim(0, 80)
pl.xlabel("$P\,[\mathrm{days}]$")
pl.ylabel("number of planets");


# In[84]:


pl.hist(np.log10(gamma_earth / len(stlr) * 365.25), 50, histtype="step", color="k");


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]: