## import libraries
import dadi
import numpy as np
import itertools as itt
import pandas as pd
import sys, os

## prettier printing of arrays
pd.set_option('precision',4)
pd.set_option('chop_threshold', 0.001)

## plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## a function to select most common element in a list
def most_common(L):
    return max(itt.groupby(sorted(L)),
               key=lambda(x, v):(len(list(v)),
               -L.index(x)))[0]

## a function to get alleles from ambiguous bases
def unstruct(amb):
    " returns bases from ambiguity code"
    D = {"R":["G","A"],
         "K":["G","T"],
         "S":["G","C"],
         "Y":["T","C"],
         "W":["T","A"],
         "M":["C","A"]}
    if amb in D:
        return D.get(amb)
    else:
        return [amb,amb]

## parse the loci
locifile = open("analysis_pyrad/outfiles/virentes_c85d6m4p5.loci")
loci = locifile.read().strip().split("|")

## define focal populations and the outgroups
cuba        = ['CUCA4','CUVN10','CUSV6']
centralA    = ['CRL0030','CRL0001','HNDA09','BZBB1','MXSA3017']
virginiana  = ['FLSF33','FLBA140','SCCU3','LALC2'] 
geminata    = ["FLCK18","FLSF54","FLWO6","FLAB109"]
minima      = ["FLSF47","FLMO62","FLSA185","FLCK216"]
outgroup    = ["DO","DU","EN","AR","CH","NI","HE","BJSB3","BJVL19","BJSL25"]

## florida is a composite of three 'species'
florida = virginiana+geminata+minima

## minimum samples required to use the locus
proj = [10,6,6]

## only examine loci w/ at least 1 outgroup sample
## and at least two inds from each focal populations
Floci = []   ## filtered locus list
for loc in loci:
    names = [i.strip().split(" ")[0] for i in loc.strip().split("\n")[:-1]]  
    if len(set([">"+i for i in outgroup]).intersection(names)) >= 1:
        if len(set([">"+i for i in florida]).intersection(names)) >= proj[0]/2:
            if len(set([">"+i for i in centralA]).intersection(names)) >= proj[1]/2:
                if len(set([">"+i for i in cuba]).intersection(names)) >= proj[2]/2:
                    Floci.append(loc)

## outfile location            
outfile = open("oaks.dadi.snps", 'w')
    
## print header
print >>outfile, "\t".join(["REF","OUT",
                            "Allele1","fl","ca","cu",
                            "Allele2","fl","ca","cu",
                            "locus","site"])
## recording values
uncalled = 0
calledSNP = 0
uncalledSNP = 0

## iterate over loci
for locN, loc in enumerate(Floci):
    ## to filter a single SNP per locus
    singleSNP = 0

    ## separate names, loci, and SNP identifier line
    names = [i.strip().split(" ")[0] for i in loc.strip().split("\n")[:-1]]  
    dat = [i.strip().split(" ")[-1] for i in loc.strip().split("\n")[:-1]]
    snps = loc.strip().split("\n")[-1][14:]
    
    ## select ingroups v. outgroups
    ihits = [names.index(">"+i) for i in cuba+centralA+florida \
             if ">"+i in names]
    ohits = [names.index(">"+i) for i in outgroup \
             if ">"+i in names]
    
    ## select a single variable SNP from each locus
    ## but also record how many are skipped for calculating
    ## effective sequence length later
    for site,char in enumerate(snps):
        if char in ["-","*"]:
            ## get common (reference) alleles
            i2 = ["-"+dat[k][site]+"-" for k in ihits if dat[k][site] not in ["N","-"]]
            o2 = ["-"+dat[k][site]+"-" for k in ohits if dat[k][site] not in ["N","-"]]
            ## filter out uninformative sites
            b1 = any([i[1] not in ['N','-'] for i in i2])
            b2 = any([i[1] not in ['N','-'] for i in o2])
            if not (b1 and b2):
                uncalled += 1
            else:
                ## if site is variable in the ingroups
                if len(set(i2)) > 1:
                    ## get the segregating alleles
                    alleles = list(itt.chain(*[unstruct(i[1]) for i in i2]))
                    allele1 = most_common(alleles)
                    allele2 = most_common([i for i in alleles if i!=allele1])
                    outg = most_common(list(itt.chain(*[unstruct(i[1]) for i in o2])))
                    ## florida
                    fl = [names.index(">"+z) for z in florida if ">"+z in names]
                    fldat = list(itt.chain(*[unstruct(dat[z][site]) for z in fl]))
                    fl1,fl2 = fldat.count(allele1), fldat.count(allele2)
                    ## central America
                    ca = [names.index(">"+z) for z in centralA if ">"+z in names]
                    cadat = list(itt.chain(*[unstruct(dat[z][site]) for z in ca]))
                    ca1,ca2 = cadat.count(allele1), cadat.count(allele2)
                    ## cuba
                    cu = [names.index(">"+z) for z in cuba if ">"+z in names]
                    cudat = list(itt.chain(*[unstruct(dat[z][site]) for z in cu]))
                    cu1,cu2 = cudat.count(allele1), cudat.count(allele2)
                    
                    if not singleSNP:
                        calledSNP += 1
                        print >>outfile, "\t".join(map(str,["-"+allele1+"-",
                                                        "-"+outg+"-",
                                                        allele1, fl1, ca1, cu1,
                                                        allele2, fl2, ca2, cu2,
                                                        str(locN), str(site)])) 
                        singleSNP = 1
                    else: 
                        uncalledSNP += 1

outfile.close()

Nloci = len(Floci)
print "loci\t",Nloci
print "bp\t",Nloci*90
L = Nloci*90-uncalled
print "called bp\t",L
print 'called SNP\t', calledSNP
print 'uncalledSNP\t', uncalledSNP
propcalled = calledSNP/float(calledSNP+uncalledSNP)
print 'prop\t', propcalled

! head -20 oaks.dadi.snps

## parse the snps file
dd = dadi.Misc.make_data_dict('oaks.dadi.snps')

## extract the fs from this dict with data not projected 
## down to fewer than the total samples
fs = dadi.Spectrum.from_data_dict(dd,
                                  pop_ids=["fl","ca","cu"],
                                  projections=proj, 
                                  polarized=True)

## how many segregating sites after projection?
print fs.S()

## scenario ((florida,cuba),centralA);

def IM_split1(params, ns, pts):
    ## parse params
    N1,N2,N3,T2,T1,m13,m31,m23,m32 = params
    
    ## create a search grid
    xx = dadi.Numerics.default_grid(pts)
    
    ## create ancestral pop
    phi = dadi.PhiManip.phi_1D(xx)
    
    ## split ancestral pop into two species
    phi = dadi.PhiManip.phi_1D_to_2D(xx,phi)
    
    ## allow drift to occur along each of these branches
    phi = dadi.Integration.two_pops(phi, xx, T2, nu1=N1, nu2=N2, m12=0., m21=0.)
    
    ## split pop1 into pops 1 and 3
    phi = dadi.PhiManip.phi_2D_to_3D_split_1(xx, phi)

    ## allow drift and migration to occur along these branches
    phi = dadi.Integration.three_pops(phi, xx, T1,
                                       nu1=N1, nu2=N2, nu3=N3,  
                                       m12=0.0, m13=m13, 
                                       m21=0.0, m23=m23,
                                       m31=m31, m32=m32)
    ## simulate the fs
    fs = dadi.Spectrum.from_phi(phi,ns,(xx,xx,xx), 
                                pop_ids=['fl', 'ca', 'cu'])
    return fs

## scenario ((centralA,cuba),florida);

def IM_split2(params, ns, pts):
    ## parse params
    N1,N2,N3,T2,T1,m13,m31,m23,m32 = params
    
    ## create a search grid
    xx = dadi.Numerics.default_grid(pts)
    
    ## create ancestral pop
    phi = dadi.PhiManip.phi_1D(xx)
    
    ## split ancestral pop into two species
    phi = dadi.PhiManip.phi_1D_to_2D(xx,phi)
    
    ## allow drift to occur along each of these branches
    phi = dadi.Integration.two_pops(phi, xx, T2, nu1=N1, nu2=N2, m12=0., m21=0.)
    
    ## split pop2 into pops 2 and 3
    phi = dadi.PhiManip.phi_2D_to_3D_split_2(xx, phi)

    ## allow drift and migration to occur along these branches
    phi = dadi.Integration.three_pops(phi, xx, T1,
                                      nu1=N1, nu2=N2, nu3=N3,  
                                      m12=0.0, m13=m13, 
                                      m21=0.0, m23=m23,
                                      m31=m31, m32=m32)
    ## simulate the fs
    fs = dadi.Spectrum.from_phi(phi,ns,(xx,xx,xx), 
                                pop_ids=['fl', 'ca', 'cu'])
    return fs

## scenario hybrid speciation forms cuba.

def admix(params, ns, pts):
    ## parse params
    N1,N2,N3,T2,T1,f = params
    
    ## create a search grid
    xx = dadi.Numerics.default_grid(pts)
    
    ## make ancestral pop that splits into two
    phi = dadi.PhiManip.phi_1D(xx)
    phi = dadi.PhiManip.phi_1D_to_2D(xx,phi)
    
    ## allow drift to occur along each of these branches
    phi = dadi.Integration.two_pops(phi, xx, T2, nu1=N1, nu2=N2, m12=0., m21=0.)
    
    ## create pop 3 from a mixture of 1 and 2
    phi = dadi.PhiManip.phi_2D_to_3D_admix(phi, f, xx, xx, xx)

    ## allow drift and migration to occur along these branches
    ## cuba population shrinks in size after divergence
    phi = dadi.Integration.three_pops(phi, xx, T1,
                                      nu1=N1, nu2=N2, nu3=N3,  
                                      m12=0.0, m13=0.0, 
                                      m21=0.0, m23=0.0,
                                      m31=0.0, m32=0.0)
    ## simulate the fs
    fs = dadi.Spectrum.from_phi(phi,ns,(xx,xx,xx),
                                pop_ids=['fl', 'ca', 'cu'])
    return fs

## sample sizes
ns = fs.sample_sizes

## points used for exrapolation
pts_l = [12, 20, 32]

## starting values for params
N1 = N2 = N3 = 2.0
T2 = 2.0
T1 = 2.0
m13 = m31 = m23 = m32 = 0.1
f  = 0.4999

## create starting parameter sets
params_IM    = np.array([N1,N2,N3,T2,T1,m13,m31,m23,m32])
params_admix = np.array([N1,N2,N3,T2,T1,f])

## search limits 
upper_IM    = [10.0,10.0,10.0, 10.0,10.0, 25.0, 25.0, 25.0, 25.0]
lower_IM    = [1e-3,1e-3,1e-3, 1e-3,1e-3, 1e-5, 1e-5, 1e-5, 1e-5]

upper_admix = [10.0,10.0,10.0, 10.0,10.0, 0.9999]
lower_admix = [1e-3,1e-3,1e-3, 1e-3,1e-3, 0.0001]

%%bash
## example of running code
## python analysis_dadi/dadi_m1X.py

#### Example code for optimization in script dadi_m1X.py
## model = IM_split1
## params= params_IM
## uppers= upper_IM
## lowers= lower_IM
## maxiters=10

#### Create extrapolating function 
## Func = dadi.Numerics.make_extrap_log_func(model)

#### Perturb start values
## p0 = dadi.Misc.perturb_params(params, fold=2., 
##                              upper_bound=uppers,
##                              lower_bound=lowers)

#### optimize from starting position
## args = [p0, fs, Func, pts_l, lowers, uppers, 0, maxiters]
## popt = dadi.Inference.optimize_log_lbfgsb(*args)

#### return the likelihood of the data under these parameters 
## mod = Func(popt, ns, pts_l)
## ll_opt = dadi.Inference.ll_multinom(mod, fs)

## optimized at projection (10,6,6); extrap (12,20,32); S=1626
ml1 = [2.5013156716397473, 0.68871161421841698, 0.076807020757753017,
       0.70851628972828207, 0.14901468477012897, 0.14349173980106683,
       12.58678666702065, 1.2625236526206685, 21.036230092460865, -543.08153591196356]

ml2 = [2.412336866389539, 0.66060145092919675, 0.23061219403870617,
       0.71073109877391338, 0.084218967415615298, 0.138041546493126,
       6.036453130870238, 4.09467065466802, 0.0031179372963987696, -541.90248015938994]

ml3 = [2.2562645699939852, 0.71705301735090954, 0.14307134998023263,
       0.57976328038133673, 0.025790751880146609, 0.38345451331222341,
       -555.34376821105138]

indexnames = ["N1","N2","N3","T2","T1","m13","m31","m23","m32","LL"]
ML1 = pd.Series(ml1, index = indexnames)
ML2 = pd.Series(ml2, index = indexnames)
ML3 = pd.Series(ml3, index=["N1","N2","N3","T2","T1","f","LL"])

data = pd.DataFrame([ML1,ML2,ML3],index=["ML1","ML2","ML3"]).T
print data

## optimized grid
pts_l = [12,20,32]

# The optimal value of theta given the model.
Func = dadi.Numerics.make_extrap_log_func(IM_split1)
model1_AFS = Func(ML1.tolist()[:-1],ns,pts_l)
theta1 = dadi.Inference.optimal_sfs_scaling(model1_AFS, fs)

# The optimal value of theta given the model.
Func = dadi.Numerics.make_extrap_log_func(IM_split2)
model2_AFS = Func(ML2.tolist()[:-1],ns,pts_l)
theta2 = dadi.Inference.optimal_sfs_scaling(model2_AFS, fs)

# The optimal value of theta given the model.
Func = dadi.Numerics.make_extrap_log_func(admix)
model3_AFS = Func(ML3.tolist()[:-1],ns,pts_l)
theta3 = dadi.Inference.optimal_sfs_scaling(model3_AFS, fs)

###### Fixed params
mu = 2.5e-9         ## from Populus article by xxyy
gentime = 30.0   ## (years) from Gugger & Cavender-Bares

models = [ML1,ML2,ML3]
thetas = [theta1,theta2,theta3]
for model,theta in zip(models,thetas):
    print "\n-----model %s-------" % str(thetas.index(theta)+1)
    ## Population sizes
    Nref = theta/(4*mu*L)
    print "theta =\t",theta, "("+str(theta/Nloci)+")"
    print "Nref =\t",round(Nref)
    print "N1 =\t",round(Nref*model["N1"])
    print "N2 =\t",round(Nref*model["N2"])
    print "N3 =\t",round(Nref*model["N3"])
    ## Divergence times
    T1  = model["T1"]
    T12 = model["T1"] + model["T2"]
    print 'T1  =\t',round( ( T1*2*Nref * gentime)/1e6, 3), 'Mya'
    print 'T12 =\t',round( (T12*2*Nref * gentime)/1e6, 3), 'Mya'
    ## Migration
    if 'f' in model.index:
        print 'f   =\t', model['f']
        print '1-f =\t', 1-(model["f"])
    else:
        print 'm13 =\t', (model["m13"])/(2*Nref) * 1e3
        print 'm31 =\t', (model["m31"])/(2*Nref) * 1e3
        print 'm23 =\t', (model["m23"])/(2*Nref) * 1e3
        print 'm32 =\t', (model["m32"])/(2*Nref) * 1e3

plt.rcParams['figure.figsize'] = (6,6)
dadi.Plotting.plot_3d_comp_multinom(model1_AFS, fs, vmin=0.1, resid_range=3,
                                    pop_ids =('fl', 'ca', 'cu'))

plt.rcParams['figure.figsize'] = (6,6)
dadi.Plotting.plot_3d_comp_multinom(model2_AFS, fs, vmin=0.1, resid_range=3,
                                    pop_ids =('fl', 'ca', 'cu'))

plt.rcParams['figure.figsize'] = (6,6)
dadi.Plotting.plot_3d_comp_multinom(model3_AFS, fs, vmin=0.1, resid_range=3,
                                    pop_ids =('fl', 'ca','cu'))

#############################################################
### IM_split 1 
def simSFS_model1(theta, N1,N2,N3,mig,T1, T12):
    R1,R2,R3 = np.random.random_integers(0,9999999,3)
    command = """ ms 22 7794 \
                     -t %(theta)f \
                     -I 3 10 6 6 \
                     -n 1 %(N1)f \
                     -n 2 %(N2)f \
                     -n 3 %(N3)f \
                     -en %(T1)f 1 %(N1)f \
                     -ma %(mig)s \
                     -ej %(T1)f 3 1 \
                     -ej %(T12)f 2 1 \
                     -en %(T12)f 1 1 \
                     -seeds %(R1)f %(R2)f %(R3)f """
    sub_dict = {'theta':theta, 'N1':N1, 'N2':N2, 'N3':N3,
                'T1':T1, 'mig':" ".join(mig),
                'R1':R1, 'R2':R2, 'R3':R3}
    mscommand = command % sub_dict
    fs = dadi.Spectrum.from_ms_file(os.popen(mscommand), average=False)
    return fs

###############################################################
## IM_split 2
def simSFS_model2(theta, N1,N2,N3,mig,T1, T12):
    R1,R2,R3 = np.random.random_integers(0,9999999,3)
    command = """ ms 22 7794 \
                     -t %(theta)f \
                     -I 3 10 6 6 \
                     -n 1 %(N1)f \
                     -n 2 %(N2)f \
                     -n 3 %(N3)f \
                     -en %(T1)f 2 %(N2)f \
                     -ma %(mig)s \
                     -ej %(T1)f 3 2 \
                     -ej %(T12)f 2 1 \
                     -en %(T12)f 1 1 \
                     -seeds %(R1)f %(R2)f %(R3)f """
    sub_dict = {'theta':theta, 'N1':N1, 'N2':N2, 'N3':N3,
                'T1':T1, 'mig':" ".join(mig),
                'R1':R1, 'R2':R2, 'R3':R3}
    mscommand = command % sub_dict
    fs = dadi.Spectrum.from_ms_file(os.popen(mscommand), average=False)
    return fs

#################################################################
## Admix
def simSFS_model3(theta, N1,N2,N3,T1, T12,f):
    R1,R2,R3 = np.random.random_integers(0,9999999,3)
    command = """ ms 22 7794 \
                     -t %(theta)f \
                     -I 3 10 6 6 \
                     -n 1 %(N1)f \
                     -n 2 %(N2)f \
                     -n 3 %(N3)f \
                     -en %(T1)f 2 %(N2)f \
                     -en %(T1)f 1 %(N1)f \
                     -es %(T1)f 3 %(f)f \
                     -ej %(T1)f 4 2 \
                     -ej %(T1)f 3 1 \
                     -ej %(T12)f 2 1 \
                     -en %(T12)f 1 1 \
                     -seeds %(R1)f %(R2)f %(R3)f """
    sub_dict = {'theta':theta, 'N1':N1, 'N2':N2, 'N3':N3,
                'T1':T1, 'f':f,
                'R1':R1, 'R2':R2, 'R3':R3}
    mscommand = command % sub_dict
    fs = dadi.Spectrum.from_ms_file(os.popen(mscommand), average=False)
    return fs

%%bash
## commented out b/c it takes a long time to run

## for i in $(seq 1 200);
##   do python analysis_dadi/dadi_m1boot_fit3.py
## done

## for i in $(seq 1 200);
##   do python analysis_dadi/dadi_m2boot_fit3.py
## done

## for i in $(seq 1 100);
##   do python analysis_dadi/dadi_m3boot_fit3.py
## done

###### Fixed params
mu = 2.5e-9         ## from Populus genome article 
gentime = 30.0     ## (years) from Gugger & Cavender-Bares

def makedict(bootfile, theta):    
    ## parse file
    DD = {}
    infile = open(bootfile).readlines()
    lines = [i.strip().split("\t") for i in infile]
    bootorder = [int(i[0].replace("boot","")) for i in lines]
    elements = ["N1","N2","N3","T2","T1"]
    for el,li in zip(elements, range(1,6)):
        DD[el] = [float(i[li]) for i in lines]
    if len(lines[0]) > 8:
        elements = ["m13","m31","m23","m32","LL"]
        for el,li in zip(elements, range(6,11)):
            DD[el] = [float(i[li]) for i in lines]
    else:
        DD['f'] = [float(i[6]) for i in lines]
        DD['LL'] = [float(i[7]) for i in lines]
    DD['sLL'] = [DD["LL"][i-1] for i in bootorder[:60]]
    return DD

DD = makedict("analysis_dadi/dadi_m1_f1_boots.txt", theta1)

#############################
Nref = theta1/(4*mu*L)
#############################

print "Model 1"
for param in ["N1","N2","N3"]:
    m = np.mean([(i*Nref) for i in DD[param]])
    s = np.std([(i*Nref) for i in DD[param]])
    o = (ML1[param]*Nref)
    unbiased = o-(m-o)
    print param, max(0,unbiased-1.96*s), o, unbiased+1.96*s

#############################
m = np.mean([(i*Nref*2*gentime)/1e6 for i in DD["T1"]])
s = np.std([(i*Nref*2*gentime)/1e6 for i in DD["T1"]])
o = (ML1["T1"]*Nref*2*gentime)/1e6
unbiased = o-(m-o)
print "T1", max(0,unbiased-1.96*s), o, unbiased+1.96*s
m = np.mean([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])])
s = np.std([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])])
o = ((ML1["T1"]+ML1["T2"])*Nref*2*gentime)/1e6
unbiased = o-(m-o)
print "T12", max(0,unbiased-1.96*s), o, unbiased+1.96*s

    
#############################
for param in ["m13","m31","m23","m32"]:
    m = np.mean([i/(2*Nref) for i in DD[param]])
    s = np.std([i/(2*Nref) for i in DD[param]])
    o = (ML1[param]/(2*Nref))
    unbiased = o-(m-o)
    print param, max(0,(unbiased-1.96*s)*1e3), o*1e3, (unbiased+1.96*s)*1e3
    
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML1["LL"]
unbiased = o-(m-o)
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("analysis_dadi/dadi_m2_f2_boots.txt", theta2)

#############################
Nref = theta2/(4*mu*L)
#############################
print "Model 2"
for param in ["N1","N2","N3"]:
    m = np.mean([(i*Nref) for i in DD[param]])
    s = np.std([(i*Nref) for i in DD[param]])
    o = (ML2[param]*Nref)
    unbiased = o-(m-o)
    print param, max(0,unbiased-1.96*s), o, unbiased+1.96*s

#############################
m = np.mean([(i*Nref*2*gentime)/1e6 for i in DD["T1"]])
s = np.std([(i*Nref*2*gentime)/1e6 for i in DD["T1"]])
o = (ML2["T1"]*Nref*2*gentime)/1e6
unbiased = o-(m-o)
print "T1", max(0,unbiased-1.96*s), o, unbiased+1.96*s
m = np.mean([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])])
s = np.std([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])])
o = ((ML2["T1"]+ML2["T2"])*Nref*2*gentime)/1e6
unbiased = o-(m-o)
print "T12", max(0,unbiased-1.96*s), o, unbiased+1.96*s

    
#############################
for param in ["m13","m31","m23","m32"]:
    m = np.mean([i/(2*Nref) for i in DD[param]])
    s = np.std([i/(2*Nref) for i in DD[param]])
    o = (ML2[param]/(2*Nref))
    unbiased = o-(m-o)
    print param, max(0,(unbiased-1.96*s)*1e3), o*1e3, (unbiased+1.96*s)*1e3
    
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML2["LL"]
unbiased = o-(m-o)
#print "LL", unbiased-1.96*s, o, unbiased+1.96*s
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("analysis_dadi/dadi_m3_f3_boots.txt", theta2)

#############################
Nref = theta3/(4*mu*L)
#############################
print "Model 3"
for param in ["N1","N2","N3"]:
    m = np.mean([(i*Nref) for i in DD[param]])
    s = np.std([(i*Nref) for i in DD[param]])
    o = (ML3[param]*Nref)
    unbiased = o-(m-o)
    print param, max(0,unbiased-1.96*s), o, unbiased+1.96*s

#############################
m = np.mean([(i*Nref*2*gentime)/1e6 for i in DD["T1"]])
s = np.std([(i*Nref*2*gentime)/1e6 for i in DD["T1"]])
o = (ML3["T1"]*Nref*2*gentime)/1e6
unbiased = o-(m-o)
print "T1", max(0,unbiased-1.96*s), o, unbiased+1.96*s
m = np.mean([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])])
s = np.std([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])])
o = ((ML3["T1"]+ML3["T2"])*Nref*2*gentime)/1e6
unbiased = o-(m-o)
print "T12", max(0,unbiased-1.96*s), o, unbiased+1.96*s

    
#############################
m = np.mean(DD["f"])
s = np.std(DD["f"])
o = (ML3['f'])
unbiased = o-(m-o)
print 'f', unbiased-1.96*s, o, unbiased+1.96*s
    
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML3["LL"]
unbiased = o-(m-o)
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("dadi_m1_f3_boots.txt", theta2)
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML1["LL"]
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("dadi_m1_f2_boots.txt", theta2)
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML1["LL"]
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("dadi_m2_f1_boots.txt", theta2)
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML2["LL"]
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("dadi_m2_f3_boots.txt", theta2)
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML2["LL"]
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("dadi_m3_f1_boots.txt", theta2)
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML3["LL"]
print "LL", m-1.96*s, o, m+1.96*s

DD = makedict("dadi_m3_f2_boots.txt", theta2)
m = np.mean(DD["LL"])
s = np.std(DD["LL"])
o = ML3["LL"]
print "LL", m-1.96*s, o, m+1.96*s

delta32obs = -2.*(ML3["LL"] - ML2["LL"])
delta31obs = -2.*(ML3["LL"] - ML1["LL"])

print 'LL model3:', ML3["LL"]
print 'LL model2:', ML2["LL"]
print 'LL model1:', ML1["LL"]
print 'delta32 observed:', delta32obs
print 'delta31 observed:', delta31obs

W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL']
LL31 = pd.read_table("dadi_m3_f1_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL']
LL32 = pd.read_table("dadi_m3_f2_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

W = ['title','N1','N2','N3','T2','T1','f','LL']
LL33 = pd.read_table("dadi_m3_f3_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

delta32sim3 = -2.*(LL33-LL32)
delta31sim3 = -2.*(LL33-LL31)

plt.rcParams['figure.figsize'] = (6,4)

sns.kdeplot(delta32sim3, shade=True, label="$\delta_{32}$")
sns.kdeplot(delta31sim3, shade=True, label="$\delta_{31}$")
sns.despine()

plt.vlines(delta32obs, 0, 0.05, lw=3, color='#262626', 
           label='$\delta_{32obs}$', linestyles='dashed')
plt.vlines(delta31obs, 0, 0.05, lw=3, color='#F65655', 
           label='$\delta_{31obs}$', linestyles='dashed')
plt.title("model 3 sims")
plt.legend(loc=2)

delta32star = np.percentile(delta32sim3, 99)
delta31star = np.percentile(delta31sim3, 99)
print 'delta32star', delta32star
print 'delta31star', delta31star
print 'deltaobs', delta32obs

print delta32obs > delta32star
print delta31obs > delta31star

W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL']
LL21 = pd.read_table("dadi_m2_f1_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL']
LL22 = pd.read_table("dadi_m2_f2_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

W = ['title','N1','N2','N3','T2','T1','f','LL']
LL23 = pd.read_table("dadi_m2_f3_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

delta32sim2 = -2.*(LL23-LL22)

plt.rcParams['figure.figsize'] = (6,4)
plt.gcf().subplots_adjust(bottom=0.20, left=0.15)

sns.set_style("ticks")
sns.set_context("paper", font_scale=1.8, rc={"lines.linewidth": 2})
sns.kdeplot(delta32sim2, shade=True, color="#898989",label='$\delta_{sim2}$')
sns.kdeplot(delta32sim3, shade=True, color="#262626", label='$\delta_{sim3}$')
sns.axlabel('Log Likelihood ratio ($\delta$)', 'Density')
sns.despine()

#plt.vlines(delta32star, 0, 0.061, lw=3, color="#262626", 
#           linestyles='dashed', label="$\delta^{*}$")
plt.vlines(delta32obs, 0, 0.061, lw=3, color='#F65655',
           linestyles='dashed', label='$\delta_{obs}$')
plt.legend(loc=1)
plt.title("(B) models 2 vs. 3")
plt.savefig("delta32.png", dpi=300)
plt.savefig("delta32.svg")

1.-sum(delta32sim2 < delta32star)/float(len(delta32sim2))

W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL']
LL11 = pd.read_table("dadi_m1_f1_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL']
LL12 = pd.read_table("dadi_m1_f2_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

W = ['title','N1','N2','N3','T2','T1','f','LL']
LL13 = pd.read_table("dadi_m1_f3_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"]

delta31sim1 = -2.*(LL13-LL11)

plt.rcParams['figure.figsize'] = (6,4)
plt.gcf().subplots_adjust(bottom=0.20, left=0.15)

sns.set_style("ticks")
sns.set_context("paper", font_scale=1.8, rc={"lines.linewidth": 2})
sns.kdeplot(delta31sim1, shade=True, color="#898989",label='$\delta_{sim1}$')
sns.kdeplot(delta31sim3, shade=True, color="#262626", label='$\delta_{sim3}$')
sns.axlabel('Log Likelihood ratio ($\delta$)', 'Density')
sns.despine()

plt.vlines(delta31obs, 0, 0.085, lw=3, color='#F65655',
           linestyles='dashed', label='$\delta_{obs}$')
plt.legend(loc=1)
plt.title("(A) models 1 vs. 3")
plt.savefig("delta31.png", dpi=300)
plt.savefig("delta31.svg")

1.-sum(delta31sim1 < delta31star)/float(len(delta31sim1))

delta12obs = -2.*(ML1["LL"] - ML2["LL"])
delta13obs = -2.*(ML1["LL"] - ML3["LL"])

print 'LL model3:', ML3["LL"]
print 'LL model2:', ML2["LL"]
print 'LL model1:', ML1["LL"]
print 'delta12 observed:', delta12obs
print 'delta13 observed:', delta13obs

delta12sim1 = -2.*(LL11-LL12)

print 'obs=', delta12obs
delta12star = np.percentile(delta12sim1, 95)
print '\ndelta*95=',delta12star
print delta12obs > delta12star
delta12star = np.percentile(delta12sim1, 94)
print '\ndelta*94=',delta12star
print delta12obs > delta12star
delta12star = np.percentile(delta12sim1, 93)
print '\ndelta*93=',delta12star
print delta12obs > delta12star

delta12sim2 = -2.*(LL21-LL22)

plt.rcParams['figure.figsize'] = (6,4)
plt.gcf().subplots_adjust(bottom=0.20, left=0.15)

sns.set_context("paper", font_scale=1.8, rc={"lines.linewidth": 2})
sns.kdeplot(delta12sim1, shade=True, color="#262626", label='$\delta_{sim1}$')
sns.kdeplot(delta12sim2, shade=True, color="#898989", label='$\delta_{sim2}$')

sns.axlabel('Log Likelihood ratio ($\delta$)', 'Density')
sns.despine()

plt.vlines(delta12obs, 0, 0.055, lw=3, color='#F65655',
           linestyles='dashed', label='$\delta_{obs}$')

plt.legend(loc=2)
plt.title("(C) models 1 vs. 2")
plt.savefig("delta12.png", dpi=300)
plt.savefig("delta12.svg")

sum(delta12sim2 > delta12star)/float(len(delta12sim2))