## RevBayes script to infer concatenated tree for Etheostoma lepidum ## Last modified 2/27/2019 by Dan MacGuigan #################### # Read in the data # #################### rRNA18s <- readDiscreteCharacterData("18s_BLAST-genbankCalanoida_sub_realign.nex") rRNA28s <- readDiscreteCharacterData("28s_BLAST-genbankCalanoida_sub_realign.nex") # get some basic info about the dataset n_taxa <- rRNA18s.ntaxa() n_branches <- 2 * n_taxa - 2 taxa <- rRNA18s.taxa() # set counters to 0 mvi = 0 mni = 0 n_data_subsets = 2 ####################### # Substitution Models # ####################### # rRNA18s # SYM + Gamma er_prior <- v(1,1,1,1,1,1) rRNA18s_er ~ dnDirichlet(er_prior) moves[++mvi] = mvBetaSimplex(rRNA18s_er, weight=3) moves[++mvi] = mvDirichletSimplex(rRNA18s_er, weight=1) rRNA18s_pi <- simplex(rep(1,4)) # specify constant node for base frequencies rRNA18s_Q := fnGTR(rRNA18s_er,rRNA18s_pi) # rRNA28s # SYM + Gamma er_prior <- v(1,1,1,1,1,1) rRNA28s_er ~ dnDirichlet(er_prior) moves[++mvi] = mvBetaSimplex(rRNA28s_er, weight=3) moves[++mvi] = mvDirichletSimplex(rRNA28s_er, weight=1) rRNA28s_pi <- simplex(rep(1,4)) # specify constant node for base frequencies rRNA28s_Q := fnGTR(rRNA28s_er,rRNA28s_pi) ############################# # Among site rate variation # ############################# # rRNA18s # among site rate variation, +Gamma4 alpha_prior_mean <- ln(2.0) alpha_prior_sd <- 0.587405 rRNA18s_alpha ~ dnLognormal( alpha_prior_mean, alpha_prior_sd ) moves[++mvi] = mvScale(rRNA18s_alpha, lambda=1.0, weight=2.0) rRNA18s_sr := fnDiscretizeGamma( rRNA18s_alpha, rRNA18s_alpha, 4, false ) # rRNA28s # among site rate variation, +Gamma4 alpha_prior_mean <- ln(2.0) alpha_prior_sd <- 0.587405 rRNA28s_alpha ~ dnLognormal( alpha_prior_mean, alpha_prior_sd ) moves[++mvi] = mvScale(rRNA28s_alpha, lambda=1.0, weight=2.0) rRNA28s_sr := fnDiscretizeGamma( rRNA28s_alpha, rRNA28s_alpha, 4, false ) ############################## # Partition rate multipliers # ############################## # specify a rate multiplier for each partition part_rate_mult ~ dnDirichlet( rep(1.0, n_data_subsets) ) moves[++mvi] = mvBetaSimplex(part_rate_mult, alpha=1.0, tune=true, weight=n_data_subsets) moves[++mvi] = mvDirichletSimplex(part_rate_mult, alpha=1.0, tune=true, weight=2.0) # note that we use here a vector multiplication, # i.e., multiplying each element of part_rate_mult by n_data_subsets part_rate := part_rate_mult * n_data_subsets ############################################ # Uncorrelated lognormal rates clock model # ############################################ ucln_mean ~ dnExponential(1000.0) ucln_sigma ~ dnExponential(10.0) ucln_var := ucln_sigma * ucln_sigma ucln_mu := ln(ucln_mean) - (ucln_var * 0.5) moves[mvi++] = mvScale(ucln_mean, lambda=1.0, tune=true, weight=4.0) moves[mvi++] = mvScale(ucln_sigma, lambda=0.5, tune=true, weight=4.0) for(i in 1:n_branches){ branch_rates[i] ~ dnLnorm(ucln_mu, ucln_sigma) moves[mvi++] = mvScale(branch_rates[i], lambda=1, tune=true, weight=2.0) } moves[mvi++] = mvVectorScale(branch_rates,lambda=1.0,tune=true,weight=2.0) moves[mvi++] = mvVectorSingleElementScale(branch_rates,lambda=30.0,tune=true,weight=1.0) mean_rt := mean(branch_rates) ########################## # Birth-Death Tree Model # ########################## diversification ~ dnExponential(10.0) diversification.setValue(0.01) moves[mvi++] = mvScale(diversification, lambda=1.0, tune=true, weight=3.0) turnover ~ dnBeta(2.0, 2.0) turnover.setValue(0.5) moves[mvi++] = mvSlide(turnover,delta=1.0,tune=true,weight=3.0) denom := abs(1.0 - turnover) birth_rate := diversification / denom death_rate := (turnover * diversification) / denom # sampling probability (we have sampled only a tiny fraction of all copepod species) rho <- 0.01 # prior on the root node # based on analysis by Eyun 2017 BMC Evol. Biol. root_time ~ dnNormal(mean=266.3, sd=20.3, min=0.0, max=Inf) root_time.setValue(270) timetree ~ dnBDP(lambda=birth_rate, mu=death_rate, rho=rho, rootAge=root_time, samplingStrategy="uniform", condition="nTaxa", taxa=taxa) print("time tree set") #c1 = clade("Acanthodiaptomus_pacificus", "Arctodiaptomus_salinus", "Calanipeda_aquaedulcis", "Copidodiaptomus_numidicus", "Diaptomus_castor", "Diaptomus_cyaneus", "Diaptomus_kenitraensis", "Diaptomus_mirus", "Eudiaptomus_vulgaris", "Heliodiaptomus_kikuchii", "Neodiaptomus_schmackeri", "Pseudodiaptomus_inopinus", "Pseudodiaptomus_marinus", "Pseudodiaptomus_nihonkaiensis", "Sinodiaptomus_sarsi") #c2 = clade("Candacia_simplex", "Centropages_abdominalis", "Centropages_furcatus", "Centropages_violaceus", "Epischura_baikalensis", "Epischura_chankensis", "Epischura_fluviatilis", "Epischura_lacustris", "Epischura_nevadensis", "Epischura_nordenskioldi", "Heterocope_septentrionalis", "Limnocalanus_macrurus", "Pontellina_plumata", "Sinocalanus_tenellus", "Sulcanus_conflictus", "Temora_discaudata", "Temora_longicornis", "Temora_turbinata", "Tortanus_gracilis") #c3 = clade("Subeucalanus_pileatus_1", "Subeucalanus_pileatus_2", "Acrocalanus_longicornis", "Aetideus_armatus", "Arctodiaptomus_wierzejskii", "Bathycalanus_princeps", "Calanoides_carinatus", "Calanus_finmarchicus", "Calanus_helgolandicus", "Calanus_hyperboreus", "Calanus_pacificus", "Calanus_propinquus", "Calanus_sinicus", "Calocalanus_curtus", "Calocalanus_minutus", "Calocalanus_plumulosus", "Calocalanus_styliremis", "Clausocalanus_arcuicornis", "Cosmocalanus_darwinii", "Ctenocalanus_vanus", "Delibus_nudus", "Diaixis_hibernica", "Euchaeta_indica", "Euchaeta_rimana", "Foxtonia_barbatula", "Mecynocera_clausi", "Microcalanus_pygmaeus", "Nannocalanus_minor", "Neocalanus_cristatus", "Neocalanus_flemingeri", "Neocalanus_gracilis", "Neocalanus_plumchrus", "Neocalanus_robustior", "Paracalanus_aculeatus", "Paracalanus_denudatus", "Paracalanus_indicus", "Paracalanus_parvus", "Paracalanus_tropicus", "Paraeuchaeta_glacialis", "Paraeuchaeta_norvegica", "Pareucalanus_attenuatus", "Phaenna_spinifera", "Rhincalanus_cornutus", "Rhincalanus_nasutus", "Rhincalanus_rostrifrons", "Scolecithrix_bradyi", "Scolecithrix_danae", "Spinocalanus_abyssalis", "Spinocalanus_angusticeps", "Spinocalanus_elongatus", "Spinocalanus_horridus", "Spinocalanus_spinosus", "Subeucalanus_crassus", "Subeucalanus_subcrassus", "Subeucalanus_subtenuis", "Temorites_brevis", "Temoropia_mayumbaensis", "Tharybis_groenlandica", "Undeuchaeta_major") #constraints = v(c1, c2, c3) #print("constraints set") #timetree ~ dnConstrainedTopology(tree_dist, constraints=constraints) #print("time tree set") # read in starting tree, from previous IQTree analysis of 18s + 28s T <- readTrees("initTree_subCal.tre") # set starting tree as the IQTree topology timetree.setValue(T[1]) print("starting tree set") # node age proposals moves[mvi++] = mvNodeTimeSlideUniform(timetree, weight=30.0) moves[mvi++] = mvSlide(root_time, delta=2.0, tune=true, weight=10.0) moves[mvi++] = mvScale(root_time, lambda=2.0, tune=true, weight=10.0) moves[mvi++] = mvTreeScale(tree=timetree, rootAge=root_time, delta=1.0, tune=true, weight=3.0) # topology change proposals moves[mvi++] = mvNNI(timetree, weight=8.0) moves[mvi++] = mvNarrow(timetree, weight=8.0) moves[mvi++] = mvFNPR(timetree, weight=8.0) ################### # PhyloCTMC Model # ################### rRNA18s_phyloSeq ~ dnPhyloCTMC(tree=timetree, Q=rRNA18s_Q, branchRates=part_rate[1]*branch_rates, siteRates=rRNA18s_sr, type="DNA") rRNA18s_phyloSeq.clamp(rRNA18s) rRNA28s_phyloSeq ~ dnPhyloCTMC(tree=timetree, Q=rRNA28s_Q, branchRates=part_rate[2]*branch_rates, siteRates=rRNA28s_sr, type="DNA") rRNA28s_phyloSeq.clamp(rRNA28s) ############ # Analysis # ############ mymodel = model(timetree) print("model set") # add monitors monitors[++mni] = mnModel(filename="rRNA18s-rRNA28s_timeTree.log",printgen=100) monitors[++mni] = mnFile(timetree, filename="rRNA18s-rRNA28s_timeTree.trees", printgen=100) monitors[++mni] = mnScreen(root_time, printgen=100) print("monitors set") # run the analysis mymcmc = mcmc(mymodel, moves, monitors, nruns=2) #mymcmc.burnin(10000,100) mymcmc.run(100000) # summarize output treetrace = readTreeTrace("rRNA18s-rRNA28s_timeTree.trees", treetype="clock") map_tree = mapTree(treetrace,"rRNA18s-rRNA28s_timeTree.map.tre") consensusTree(treetrace, "rRNA18s-rRNA28s_timeTree.con.tre") mcc_tree = mccTree(treetrace,"rRNA18s-rRNA28s_timeTree.mcc.tre") # quit RevBayes q()