## import libraries import dadi import numpy as np import itertools as itt import pandas as pd import sys, os ## prettier printing of arrays pd.set_option('precision',4) pd.set_option('chop_threshold', 0.001) ## plotting libraries import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline ## a function to select most common element in a list def most_common(L): return max(itt.groupby(sorted(L)), key=lambda(x, v):(len(list(v)), -L.index(x)))[0] ## a function to get alleles from ambiguous bases def unstruct(amb): " returns bases from ambiguity code" D = {"R":["G","A"], "K":["G","T"], "S":["G","C"], "Y":["T","C"], "W":["T","A"], "M":["C","A"]} if amb in D: return D.get(amb) else: return [amb,amb] ## parse the loci locifile = open("analysis_pyrad/outfiles/virentes_c85d6m4p5.loci") loci = locifile.read().strip().split("|") ## define focal populations and the outgroups cuba = ['CUCA4','CUVN10','CUSV6'] centralA = ['CRL0030','CRL0001','HNDA09','BZBB1','MXSA3017'] virginiana = ['FLSF33','FLBA140','SCCU3','LALC2'] geminata = ["FLCK18","FLSF54","FLWO6","FLAB109"] minima = ["FLSF47","FLMO62","FLSA185","FLCK216"] outgroup = ["DO","DU","EN","AR","CH","NI","HE","BJSB3","BJVL19","BJSL25"] ## florida is a composite of three 'species' florida = virginiana+geminata+minima ## minimum samples required to use the locus proj = [10,6,6] ## only examine loci w/ at least 1 outgroup sample ## and at least two inds from each focal populations Floci = [] ## filtered locus list for loc in loci: names = [i.strip().split(" ")[0] for i in loc.strip().split("\n")[:-1]] if len(set([">"+i for i in outgroup]).intersection(names)) >= 1: if len(set([">"+i for i in florida]).intersection(names)) >= proj[0]/2: if len(set([">"+i for i in centralA]).intersection(names)) >= proj[1]/2: if len(set([">"+i for i in cuba]).intersection(names)) >= proj[2]/2: Floci.append(loc) ## outfile location outfile = open("oaks.dadi.snps", 'w') ## print header print >>outfile, "\t".join(["REF","OUT", "Allele1","fl","ca","cu", "Allele2","fl","ca","cu", "locus","site"]) ## recording values uncalled = 0 calledSNP = 0 uncalledSNP = 0 ## iterate over loci for locN, loc in enumerate(Floci): ## to filter a single SNP per locus singleSNP = 0 ## separate names, loci, and SNP identifier line names = [i.strip().split(" ")[0] for i in loc.strip().split("\n")[:-1]] dat = [i.strip().split(" ")[-1] for i in loc.strip().split("\n")[:-1]] snps = loc.strip().split("\n")[-1][14:] ## select ingroups v. outgroups ihits = [names.index(">"+i) for i in cuba+centralA+florida \ if ">"+i in names] ohits = [names.index(">"+i) for i in outgroup \ if ">"+i in names] ## select a single variable SNP from each locus ## but also record how many are skipped for calculating ## effective sequence length later for site,char in enumerate(snps): if char in ["-","*"]: ## get common (reference) alleles i2 = ["-"+dat[k][site]+"-" for k in ihits if dat[k][site] not in ["N","-"]] o2 = ["-"+dat[k][site]+"-" for k in ohits if dat[k][site] not in ["N","-"]] ## filter out uninformative sites b1 = any([i[1] not in ['N','-'] for i in i2]) b2 = any([i[1] not in ['N','-'] for i in o2]) if not (b1 and b2): uncalled += 1 else: ## if site is variable in the ingroups if len(set(i2)) > 1: ## get the segregating alleles alleles = list(itt.chain(*[unstruct(i[1]) for i in i2])) allele1 = most_common(alleles) allele2 = most_common([i for i in alleles if i!=allele1]) outg = most_common(list(itt.chain(*[unstruct(i[1]) for i in o2]))) ## florida fl = [names.index(">"+z) for z in florida if ">"+z in names] fldat = list(itt.chain(*[unstruct(dat[z][site]) for z in fl])) fl1,fl2 = fldat.count(allele1), fldat.count(allele2) ## central America ca = [names.index(">"+z) for z in centralA if ">"+z in names] cadat = list(itt.chain(*[unstruct(dat[z][site]) for z in ca])) ca1,ca2 = cadat.count(allele1), cadat.count(allele2) ## cuba cu = [names.index(">"+z) for z in cuba if ">"+z in names] cudat = list(itt.chain(*[unstruct(dat[z][site]) for z in cu])) cu1,cu2 = cudat.count(allele1), cudat.count(allele2) if not singleSNP: calledSNP += 1 print >>outfile, "\t".join(map(str,["-"+allele1+"-", "-"+outg+"-", allele1, fl1, ca1, cu1, allele2, fl2, ca2, cu2, str(locN), str(site)])) singleSNP = 1 else: uncalledSNP += 1 outfile.close() Nloci = len(Floci) print "loci\t",Nloci print "bp\t",Nloci*90 L = Nloci*90-uncalled print "called bp\t",L print 'called SNP\t', calledSNP print 'uncalledSNP\t', uncalledSNP propcalled = calledSNP/float(calledSNP+uncalledSNP) print 'prop\t', propcalled ! head -20 oaks.dadi.snps ## parse the snps file dd = dadi.Misc.make_data_dict('oaks.dadi.snps') ## extract the fs from this dict with data not projected ## down to fewer than the total samples fs = dadi.Spectrum.from_data_dict(dd, pop_ids=["fl","ca","cu"], projections=proj, polarized=True) ## how many segregating sites after projection? print fs.S() ## scenario ((florida,cuba),centralA); def IM_split1(params, ns, pts): ## parse params N1,N2,N3,T2,T1,m13,m31,m23,m32 = params ## create a search grid xx = dadi.Numerics.default_grid(pts) ## create ancestral pop phi = dadi.PhiManip.phi_1D(xx) ## split ancestral pop into two species phi = dadi.PhiManip.phi_1D_to_2D(xx,phi) ## allow drift to occur along each of these branches phi = dadi.Integration.two_pops(phi, xx, T2, nu1=N1, nu2=N2, m12=0., m21=0.) ## split pop1 into pops 1 and 3 phi = dadi.PhiManip.phi_2D_to_3D_split_1(xx, phi) ## allow drift and migration to occur along these branches phi = dadi.Integration.three_pops(phi, xx, T1, nu1=N1, nu2=N2, nu3=N3, m12=0.0, m13=m13, m21=0.0, m23=m23, m31=m31, m32=m32) ## simulate the fs fs = dadi.Spectrum.from_phi(phi,ns,(xx,xx,xx), pop_ids=['fl', 'ca', 'cu']) return fs ## scenario ((centralA,cuba),florida); def IM_split2(params, ns, pts): ## parse params N1,N2,N3,T2,T1,m13,m31,m23,m32 = params ## create a search grid xx = dadi.Numerics.default_grid(pts) ## create ancestral pop phi = dadi.PhiManip.phi_1D(xx) ## split ancestral pop into two species phi = dadi.PhiManip.phi_1D_to_2D(xx,phi) ## allow drift to occur along each of these branches phi = dadi.Integration.two_pops(phi, xx, T2, nu1=N1, nu2=N2, m12=0., m21=0.) ## split pop2 into pops 2 and 3 phi = dadi.PhiManip.phi_2D_to_3D_split_2(xx, phi) ## allow drift and migration to occur along these branches phi = dadi.Integration.three_pops(phi, xx, T1, nu1=N1, nu2=N2, nu3=N3, m12=0.0, m13=m13, m21=0.0, m23=m23, m31=m31, m32=m32) ## simulate the fs fs = dadi.Spectrum.from_phi(phi,ns,(xx,xx,xx), pop_ids=['fl', 'ca', 'cu']) return fs ## scenario hybrid speciation forms cuba. def admix(params, ns, pts): ## parse params N1,N2,N3,T2,T1,f = params ## create a search grid xx = dadi.Numerics.default_grid(pts) ## make ancestral pop that splits into two phi = dadi.PhiManip.phi_1D(xx) phi = dadi.PhiManip.phi_1D_to_2D(xx,phi) ## allow drift to occur along each of these branches phi = dadi.Integration.two_pops(phi, xx, T2, nu1=N1, nu2=N2, m12=0., m21=0.) ## create pop 3 from a mixture of 1 and 2 phi = dadi.PhiManip.phi_2D_to_3D_admix(phi, f, xx, xx, xx) ## allow drift and migration to occur along these branches ## cuba population shrinks in size after divergence phi = dadi.Integration.three_pops(phi, xx, T1, nu1=N1, nu2=N2, nu3=N3, m12=0.0, m13=0.0, m21=0.0, m23=0.0, m31=0.0, m32=0.0) ## simulate the fs fs = dadi.Spectrum.from_phi(phi,ns,(xx,xx,xx), pop_ids=['fl', 'ca', 'cu']) return fs ## sample sizes ns = fs.sample_sizes ## points used for exrapolation pts_l = [12, 20, 32] ## starting values for params N1 = N2 = N3 = 2.0 T2 = 2.0 T1 = 2.0 m13 = m31 = m23 = m32 = 0.1 f = 0.4999 ## create starting parameter sets params_IM = np.array([N1,N2,N3,T2,T1,m13,m31,m23,m32]) params_admix = np.array([N1,N2,N3,T2,T1,f]) ## search limits upper_IM = [10.0,10.0,10.0, 10.0,10.0, 25.0, 25.0, 25.0, 25.0] lower_IM = [1e-3,1e-3,1e-3, 1e-3,1e-3, 1e-5, 1e-5, 1e-5, 1e-5] upper_admix = [10.0,10.0,10.0, 10.0,10.0, 0.9999] lower_admix = [1e-3,1e-3,1e-3, 1e-3,1e-3, 0.0001] %%bash ## example of running code ## python analysis_dadi/dadi_m1X.py #### Example code for optimization in script dadi_m1X.py ## model = IM_split1 ## params= params_IM ## uppers= upper_IM ## lowers= lower_IM ## maxiters=10 #### Create extrapolating function ## Func = dadi.Numerics.make_extrap_log_func(model) #### Perturb start values ## p0 = dadi.Misc.perturb_params(params, fold=2., ## upper_bound=uppers, ## lower_bound=lowers) #### optimize from starting position ## args = [p0, fs, Func, pts_l, lowers, uppers, 0, maxiters] ## popt = dadi.Inference.optimize_log_lbfgsb(*args) #### return the likelihood of the data under these parameters ## mod = Func(popt, ns, pts_l) ## ll_opt = dadi.Inference.ll_multinom(mod, fs) ## optimized at projection (10,6,6); extrap (12,20,32); S=1626 ml1 = [2.5013156716397473, 0.68871161421841698, 0.076807020757753017, 0.70851628972828207, 0.14901468477012897, 0.14349173980106683, 12.58678666702065, 1.2625236526206685, 21.036230092460865, -543.08153591196356] ml2 = [2.412336866389539, 0.66060145092919675, 0.23061219403870617, 0.71073109877391338, 0.084218967415615298, 0.138041546493126, 6.036453130870238, 4.09467065466802, 0.0031179372963987696, -541.90248015938994] ml3 = [2.2562645699939852, 0.71705301735090954, 0.14307134998023263, 0.57976328038133673, 0.025790751880146609, 0.38345451331222341, -555.34376821105138] indexnames = ["N1","N2","N3","T2","T1","m13","m31","m23","m32","LL"] ML1 = pd.Series(ml1, index = indexnames) ML2 = pd.Series(ml2, index = indexnames) ML3 = pd.Series(ml3, index=["N1","N2","N3","T2","T1","f","LL"]) data = pd.DataFrame([ML1,ML2,ML3],index=["ML1","ML2","ML3"]).T print data ## optimized grid pts_l = [12,20,32] # The optimal value of theta given the model. Func = dadi.Numerics.make_extrap_log_func(IM_split1) model1_AFS = Func(ML1.tolist()[:-1],ns,pts_l) theta1 = dadi.Inference.optimal_sfs_scaling(model1_AFS, fs) # The optimal value of theta given the model. Func = dadi.Numerics.make_extrap_log_func(IM_split2) model2_AFS = Func(ML2.tolist()[:-1],ns,pts_l) theta2 = dadi.Inference.optimal_sfs_scaling(model2_AFS, fs) # The optimal value of theta given the model. Func = dadi.Numerics.make_extrap_log_func(admix) model3_AFS = Func(ML3.tolist()[:-1],ns,pts_l) theta3 = dadi.Inference.optimal_sfs_scaling(model3_AFS, fs) ###### Fixed params mu = 2.5e-9 ## from Populus article by xxyy gentime = 30.0 ## (years) from Gugger & Cavender-Bares models = [ML1,ML2,ML3] thetas = [theta1,theta2,theta3] for model,theta in zip(models,thetas): print "\n-----model %s-------" % str(thetas.index(theta)+1) ## Population sizes Nref = theta/(4*mu*L) print "theta =\t",theta, "("+str(theta/Nloci)+")" print "Nref =\t",round(Nref) print "N1 =\t",round(Nref*model["N1"]) print "N2 =\t",round(Nref*model["N2"]) print "N3 =\t",round(Nref*model["N3"]) ## Divergence times T1 = model["T1"] T12 = model["T1"] + model["T2"] print 'T1 =\t',round( ( T1*2*Nref * gentime)/1e6, 3), 'Mya' print 'T12 =\t',round( (T12*2*Nref * gentime)/1e6, 3), 'Mya' ## Migration if 'f' in model.index: print 'f =\t', model['f'] print '1-f =\t', 1-(model["f"]) else: print 'm13 =\t', (model["m13"])/(2*Nref) * 1e3 print 'm31 =\t', (model["m31"])/(2*Nref) * 1e3 print 'm23 =\t', (model["m23"])/(2*Nref) * 1e3 print 'm32 =\t', (model["m32"])/(2*Nref) * 1e3 plt.rcParams['figure.figsize'] = (6,6) dadi.Plotting.plot_3d_comp_multinom(model1_AFS, fs, vmin=0.1, resid_range=3, pop_ids =('fl', 'ca', 'cu')) plt.rcParams['figure.figsize'] = (6,6) dadi.Plotting.plot_3d_comp_multinom(model2_AFS, fs, vmin=0.1, resid_range=3, pop_ids =('fl', 'ca', 'cu')) plt.rcParams['figure.figsize'] = (6,6) dadi.Plotting.plot_3d_comp_multinom(model3_AFS, fs, vmin=0.1, resid_range=3, pop_ids =('fl', 'ca','cu')) ############################################################# ### IM_split 1 def simSFS_model1(theta, N1,N2,N3,mig,T1, T12): R1,R2,R3 = np.random.random_integers(0,9999999,3) command = """ ms 22 7794 \ -t %(theta)f \ -I 3 10 6 6 \ -n 1 %(N1)f \ -n 2 %(N2)f \ -n 3 %(N3)f \ -en %(T1)f 1 %(N1)f \ -ma %(mig)s \ -ej %(T1)f 3 1 \ -ej %(T12)f 2 1 \ -en %(T12)f 1 1 \ -seeds %(R1)f %(R2)f %(R3)f """ sub_dict = {'theta':theta, 'N1':N1, 'N2':N2, 'N3':N3, 'T1':T1, 'mig':" ".join(mig), 'R1':R1, 'R2':R2, 'R3':R3} mscommand = command % sub_dict fs = dadi.Spectrum.from_ms_file(os.popen(mscommand), average=False) return fs ############################################################### ## IM_split 2 def simSFS_model2(theta, N1,N2,N3,mig,T1, T12): R1,R2,R3 = np.random.random_integers(0,9999999,3) command = """ ms 22 7794 \ -t %(theta)f \ -I 3 10 6 6 \ -n 1 %(N1)f \ -n 2 %(N2)f \ -n 3 %(N3)f \ -en %(T1)f 2 %(N2)f \ -ma %(mig)s \ -ej %(T1)f 3 2 \ -ej %(T12)f 2 1 \ -en %(T12)f 1 1 \ -seeds %(R1)f %(R2)f %(R3)f """ sub_dict = {'theta':theta, 'N1':N1, 'N2':N2, 'N3':N3, 'T1':T1, 'mig':" ".join(mig), 'R1':R1, 'R2':R2, 'R3':R3} mscommand = command % sub_dict fs = dadi.Spectrum.from_ms_file(os.popen(mscommand), average=False) return fs ################################################################# ## Admix def simSFS_model3(theta, N1,N2,N3,T1, T12,f): R1,R2,R3 = np.random.random_integers(0,9999999,3) command = """ ms 22 7794 \ -t %(theta)f \ -I 3 10 6 6 \ -n 1 %(N1)f \ -n 2 %(N2)f \ -n 3 %(N3)f \ -en %(T1)f 2 %(N2)f \ -en %(T1)f 1 %(N1)f \ -es %(T1)f 3 %(f)f \ -ej %(T1)f 4 2 \ -ej %(T1)f 3 1 \ -ej %(T12)f 2 1 \ -en %(T12)f 1 1 \ -seeds %(R1)f %(R2)f %(R3)f """ sub_dict = {'theta':theta, 'N1':N1, 'N2':N2, 'N3':N3, 'T1':T1, 'f':f, 'R1':R1, 'R2':R2, 'R3':R3} mscommand = command % sub_dict fs = dadi.Spectrum.from_ms_file(os.popen(mscommand), average=False) return fs %%bash ## commented out b/c it takes a long time to run ## for i in $(seq 1 200); ## do python analysis_dadi/dadi_m1boot_fit3.py ## done ## for i in $(seq 1 200); ## do python analysis_dadi/dadi_m2boot_fit3.py ## done ## for i in $(seq 1 100); ## do python analysis_dadi/dadi_m3boot_fit3.py ## done ###### Fixed params mu = 2.5e-9 ## from Populus genome article gentime = 30.0 ## (years) from Gugger & Cavender-Bares def makedict(bootfile, theta): ## parse file DD = {} infile = open(bootfile).readlines() lines = [i.strip().split("\t") for i in infile] bootorder = [int(i[0].replace("boot","")) for i in lines] elements = ["N1","N2","N3","T2","T1"] for el,li in zip(elements, range(1,6)): DD[el] = [float(i[li]) for i in lines] if len(lines[0]) > 8: elements = ["m13","m31","m23","m32","LL"] for el,li in zip(elements, range(6,11)): DD[el] = [float(i[li]) for i in lines] else: DD['f'] = [float(i[6]) for i in lines] DD['LL'] = [float(i[7]) for i in lines] DD['sLL'] = [DD["LL"][i-1] for i in bootorder[:60]] return DD DD = makedict("analysis_dadi/dadi_m1_f1_boots.txt", theta1) ############################# Nref = theta1/(4*mu*L) ############################# print "Model 1" for param in ["N1","N2","N3"]: m = np.mean([(i*Nref) for i in DD[param]]) s = np.std([(i*Nref) for i in DD[param]]) o = (ML1[param]*Nref) unbiased = o-(m-o) print param, max(0,unbiased-1.96*s), o, unbiased+1.96*s ############################# m = np.mean([(i*Nref*2*gentime)/1e6 for i in DD["T1"]]) s = np.std([(i*Nref*2*gentime)/1e6 for i in DD["T1"]]) o = (ML1["T1"]*Nref*2*gentime)/1e6 unbiased = o-(m-o) print "T1", max(0,unbiased-1.96*s), o, unbiased+1.96*s m = np.mean([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])]) s = np.std([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])]) o = ((ML1["T1"]+ML1["T2"])*Nref*2*gentime)/1e6 unbiased = o-(m-o) print "T12", max(0,unbiased-1.96*s), o, unbiased+1.96*s ############################# for param in ["m13","m31","m23","m32"]: m = np.mean([i/(2*Nref) for i in DD[param]]) s = np.std([i/(2*Nref) for i in DD[param]]) o = (ML1[param]/(2*Nref)) unbiased = o-(m-o) print param, max(0,(unbiased-1.96*s)*1e3), o*1e3, (unbiased+1.96*s)*1e3 m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML1["LL"] unbiased = o-(m-o) print "LL", m-1.96*s, o, m+1.96*s DD = makedict("analysis_dadi/dadi_m2_f2_boots.txt", theta2) ############################# Nref = theta2/(4*mu*L) ############################# print "Model 2" for param in ["N1","N2","N3"]: m = np.mean([(i*Nref) for i in DD[param]]) s = np.std([(i*Nref) for i in DD[param]]) o = (ML2[param]*Nref) unbiased = o-(m-o) print param, max(0,unbiased-1.96*s), o, unbiased+1.96*s ############################# m = np.mean([(i*Nref*2*gentime)/1e6 for i in DD["T1"]]) s = np.std([(i*Nref*2*gentime)/1e6 for i in DD["T1"]]) o = (ML2["T1"]*Nref*2*gentime)/1e6 unbiased = o-(m-o) print "T1", max(0,unbiased-1.96*s), o, unbiased+1.96*s m = np.mean([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])]) s = np.std([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])]) o = ((ML2["T1"]+ML2["T2"])*Nref*2*gentime)/1e6 unbiased = o-(m-o) print "T12", max(0,unbiased-1.96*s), o, unbiased+1.96*s ############################# for param in ["m13","m31","m23","m32"]: m = np.mean([i/(2*Nref) for i in DD[param]]) s = np.std([i/(2*Nref) for i in DD[param]]) o = (ML2[param]/(2*Nref)) unbiased = o-(m-o) print param, max(0,(unbiased-1.96*s)*1e3), o*1e3, (unbiased+1.96*s)*1e3 m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML2["LL"] unbiased = o-(m-o) #print "LL", unbiased-1.96*s, o, unbiased+1.96*s print "LL", m-1.96*s, o, m+1.96*s DD = makedict("analysis_dadi/dadi_m3_f3_boots.txt", theta2) ############################# Nref = theta3/(4*mu*L) ############################# print "Model 3" for param in ["N1","N2","N3"]: m = np.mean([(i*Nref) for i in DD[param]]) s = np.std([(i*Nref) for i in DD[param]]) o = (ML3[param]*Nref) unbiased = o-(m-o) print param, max(0,unbiased-1.96*s), o, unbiased+1.96*s ############################# m = np.mean([(i*Nref*2*gentime)/1e6 for i in DD["T1"]]) s = np.std([(i*Nref*2*gentime)/1e6 for i in DD["T1"]]) o = (ML3["T1"]*Nref*2*gentime)/1e6 unbiased = o-(m-o) print "T1", max(0,unbiased-1.96*s), o, unbiased+1.96*s m = np.mean([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])]) s = np.std([(i*Nref*2*gentime)/1e6 for i in (DD["T1"]+DD["T2"])]) o = ((ML3["T1"]+ML3["T2"])*Nref*2*gentime)/1e6 unbiased = o-(m-o) print "T12", max(0,unbiased-1.96*s), o, unbiased+1.96*s ############################# m = np.mean(DD["f"]) s = np.std(DD["f"]) o = (ML3['f']) unbiased = o-(m-o) print 'f', unbiased-1.96*s, o, unbiased+1.96*s m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML3["LL"] unbiased = o-(m-o) print "LL", m-1.96*s, o, m+1.96*s DD = makedict("dadi_m1_f3_boots.txt", theta2) m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML1["LL"] print "LL", m-1.96*s, o, m+1.96*s DD = makedict("dadi_m1_f2_boots.txt", theta2) m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML1["LL"] print "LL", m-1.96*s, o, m+1.96*s DD = makedict("dadi_m2_f1_boots.txt", theta2) m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML2["LL"] print "LL", m-1.96*s, o, m+1.96*s DD = makedict("dadi_m2_f3_boots.txt", theta2) m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML2["LL"] print "LL", m-1.96*s, o, m+1.96*s DD = makedict("dadi_m3_f1_boots.txt", theta2) m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML3["LL"] print "LL", m-1.96*s, o, m+1.96*s DD = makedict("dadi_m3_f2_boots.txt", theta2) m = np.mean(DD["LL"]) s = np.std(DD["LL"]) o = ML3["LL"] print "LL", m-1.96*s, o, m+1.96*s delta32obs = -2.*(ML3["LL"] - ML2["LL"]) delta31obs = -2.*(ML3["LL"] - ML1["LL"]) print 'LL model3:', ML3["LL"] print 'LL model2:', ML2["LL"] print 'LL model1:', ML1["LL"] print 'delta32 observed:', delta32obs print 'delta31 observed:', delta31obs W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL'] LL31 = pd.read_table("dadi_m3_f1_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL'] LL32 = pd.read_table("dadi_m3_f2_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] W = ['title','N1','N2','N3','T2','T1','f','LL'] LL33 = pd.read_table("dadi_m3_f3_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] delta32sim3 = -2.*(LL33-LL32) delta31sim3 = -2.*(LL33-LL31) plt.rcParams['figure.figsize'] = (6,4) sns.kdeplot(delta32sim3, shade=True, label="$\delta_{32}$") sns.kdeplot(delta31sim3, shade=True, label="$\delta_{31}$") sns.despine() plt.vlines(delta32obs, 0, 0.05, lw=3, color='#262626', label='$\delta_{32obs}$', linestyles='dashed') plt.vlines(delta31obs, 0, 0.05, lw=3, color='#F65655', label='$\delta_{31obs}$', linestyles='dashed') plt.title("model 3 sims") plt.legend(loc=2) delta32star = np.percentile(delta32sim3, 99) delta31star = np.percentile(delta31sim3, 99) print 'delta32star', delta32star print 'delta31star', delta31star print 'deltaobs', delta32obs print delta32obs > delta32star print delta31obs > delta31star W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL'] LL21 = pd.read_table("dadi_m2_f1_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL'] LL22 = pd.read_table("dadi_m2_f2_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] W = ['title','N1','N2','N3','T2','T1','f','LL'] LL23 = pd.read_table("dadi_m2_f3_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] delta32sim2 = -2.*(LL23-LL22) plt.rcParams['figure.figsize'] = (6,4) plt.gcf().subplots_adjust(bottom=0.20, left=0.15) sns.set_style("ticks") sns.set_context("paper", font_scale=1.8, rc={"lines.linewidth": 2}) sns.kdeplot(delta32sim2, shade=True, color="#898989",label='$\delta_{sim2}$') sns.kdeplot(delta32sim3, shade=True, color="#262626", label='$\delta_{sim3}$') sns.axlabel('Log Likelihood ratio ($\delta$)', 'Density') sns.despine() #plt.vlines(delta32star, 0, 0.061, lw=3, color="#262626", # linestyles='dashed', label="$\delta^{*}$") plt.vlines(delta32obs, 0, 0.061, lw=3, color='#F65655', linestyles='dashed', label='$\delta_{obs}$') plt.legend(loc=1) plt.title("(B) models 2 vs. 3") plt.savefig("delta32.png", dpi=300) plt.savefig("delta32.svg") 1.-sum(delta32sim2 < delta32star)/float(len(delta32sim2)) W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL'] LL11 = pd.read_table("dadi_m1_f1_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] W = ['title','N1','N2','N3','T2','T1','m13','m31','m23','m32','LL'] LL12 = pd.read_table("dadi_m1_f2_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] W = ['title','N1','N2','N3','T2','T1','f','LL'] LL13 = pd.read_table("dadi_m1_f3_boots.txt", sep="\t", names=W, index_col=0).sort()["LL"] delta31sim1 = -2.*(LL13-LL11) plt.rcParams['figure.figsize'] = (6,4) plt.gcf().subplots_adjust(bottom=0.20, left=0.15) sns.set_style("ticks") sns.set_context("paper", font_scale=1.8, rc={"lines.linewidth": 2}) sns.kdeplot(delta31sim1, shade=True, color="#898989",label='$\delta_{sim1}$') sns.kdeplot(delta31sim3, shade=True, color="#262626", label='$\delta_{sim3}$') sns.axlabel('Log Likelihood ratio ($\delta$)', 'Density') sns.despine() plt.vlines(delta31obs, 0, 0.085, lw=3, color='#F65655', linestyles='dashed', label='$\delta_{obs}$') plt.legend(loc=1) plt.title("(A) models 1 vs. 3") plt.savefig("delta31.png", dpi=300) plt.savefig("delta31.svg") 1.-sum(delta31sim1 < delta31star)/float(len(delta31sim1)) delta12obs = -2.*(ML1["LL"] - ML2["LL"]) delta13obs = -2.*(ML1["LL"] - ML3["LL"]) print 'LL model3:', ML3["LL"] print 'LL model2:', ML2["LL"] print 'LL model1:', ML1["LL"] print 'delta12 observed:', delta12obs print 'delta13 observed:', delta13obs delta12sim1 = -2.*(LL11-LL12) print 'obs=', delta12obs delta12star = np.percentile(delta12sim1, 95) print '\ndelta*95=',delta12star print delta12obs > delta12star delta12star = np.percentile(delta12sim1, 94) print '\ndelta*94=',delta12star print delta12obs > delta12star delta12star = np.percentile(delta12sim1, 93) print '\ndelta*93=',delta12star print delta12obs > delta12star delta12sim2 = -2.*(LL21-LL22) plt.rcParams['figure.figsize'] = (6,4) plt.gcf().subplots_adjust(bottom=0.20, left=0.15) sns.set_context("paper", font_scale=1.8, rc={"lines.linewidth": 2}) sns.kdeplot(delta12sim1, shade=True, color="#262626", label='$\delta_{sim1}$') sns.kdeplot(delta12sim2, shade=True, color="#898989", label='$\delta_{sim2}$') sns.axlabel('Log Likelihood ratio ($\delta$)', 'Density') sns.despine() plt.vlines(delta12obs, 0, 0.055, lw=3, color='#F65655', linestyles='dashed', label='$\delta_{obs}$') plt.legend(loc=2) plt.title("(C) models 1 vs. 2") plt.savefig("delta12.png", dpi=300) plt.savefig("delta12.svg") sum(delta12sim2 > delta12star)/float(len(delta12sim2))