#Created by Kurt Galbreath (kurt.galbreath@wwu.edu); copyright 2009 #this script creates garli.conf files and a batch file to run a garli batch analysis; it is useful for ML parametric bootstrapping in conjunction with simulated matrices from Mesquite #nreps = the number of data matrices to analyze #matrixname = the base name of the data matrices (minus the 1 through N suffix); if you used MESQUITE to create the matrices this is the same as the matrixname that you provided there #noconstrain = tells the script to produce a batch file and garli.config files for analyses without a topological constraint; change to FALSE to not produce the unconstrained analysis #constrain = tells the script to produce a batch file and garli.config files for analyses that include a topological constraint; change to TRUE and give the constraint file name in constraintfile #constraintfile = name of a file with a topological constraint for Garli #if you indicate a constraint, the script will first create a .conf file for the unconstrained analysis and a record in the batch file, then the .conf file for the constrained analysis and a record in the batch file #partmodel = this tells the script whether or not you require config files that include partitioned substitution models; TRUE indicates that there is a partition to include #numparts = this tells the script how many model partitions to include; note that if you are doing site specific rates for codon positions, you only need one model partition, but the codon positions need to be defined as sets in the nexus file and linkmodels and subsetspecificrates must be set to 1 #execname = currently there are different executables for GARLI; you can tell the script which you are using here #the remaining arguments are identical to the settings in the garli.conf file #for the model settings (datatype, ratematrix, statefrequencies, ratehetmodel, numratecats, and invariantsites) you must input a vector of length equal to the number of model partitions, with the elements of the vector corresponding to model0, model1, model2, etc. All model settings require this even if the setting is the same for all partitions. Remember that vectors in R are in the following format: c(24, 'sample', '45 data') garliBatchMaker<-function(nreps, matrixname, noconstrain=T, constrain=F, constraintfile=NA, partmodel=F, numparts=1, execname='Garli0.96.exe', streefname='random', attachmentspertaxon=100, randseed=(-1), availablememory=512, logevery=10, saveevery=100, refinestart=1, outputeachbettertopology=0, outputcurrentbesttopology=0, enforcetermconditions=1, genthreshfortopoterm=10000, scorethreshforterm=0.001, significanttopochange=0.01, outputphyliptree=0, outputmostlyuselessfiles=0, writecheckpoints=0, restart=0, outgroup=1, searchreps=2, collapsebranches=1, linkmodels=1, subsetspecificrates=1, datatype='nucleotide', ratematrix='6rate', statefrequencies='estimate', ratehetmodel='gamma', numratecats=1, invariantsites='estimate', nindivs=4, holdover=1, selectionintensity=0.5, holdoverpenalty=0, stopgen=5000000, stoptime=5000000, startoptprec=0.5, minoptprec=0.01, numberofprecreductions=10, treerejectionthreshold=50.0, topoweight=0.01, modweight=0.002, brlenweight=0.002, randnniweight=0.1, randsprweight=0.3, limsprweight=0.6, intervallength=100, intervalstostore=5, limsprrange=6, meanbrlenmuts=5, gammashapebrlen=1000, gammashapemodel=1000, uniqueswapbias=0.1, distanceswapbias=1.0, bootstrapreps=0, resampleproportion=1.0, inferinternalstateprobs=0){ garli.conf<- NA for (i in 1:nreps){ if (noconstrain==T){ counter<- 25 garli.conf[1]<- '[general]' garli.conf[2]<- paste('datafname = ', matrixname, (i-1), '.nex', sep='') garli.conf[3]<- paste('constraintfile = none') garli.conf[4]<- paste('streefname =', streefname) garli.conf[5]<- paste('attachmentspertaxon =', attachmentspertaxon) garli.conf[6]<- paste('ofprefix = ', matrixname, (i-1), '.garli', sep='') garli.conf[7]<- paste('randseed =', randseed) garli.conf[8]<- paste('availablememory =', availablememory) garli.conf[9]<- paste('logevery =', logevery) garli.conf[10]<- paste('saveevery =', saveevery) garli.conf[11]<- paste('refinestart =', refinestart) garli.conf[12]<- paste('outputeachbettertopology =', outputeachbettertopology) garli.conf[13]<- paste('outputcurrentbesttopology =', outputcurrentbesttopology) garli.conf[14]<- paste('enforcetermconditions =', enforcetermconditions) garli.conf[15]<- paste('genthreshfortopoterm =', genthreshfortopoterm) garli.conf[16]<- paste('scorethreshforterm =', scorethreshforterm) garli.conf[17]<- paste('significanttopochange =', significanttopochange) garli.conf[18]<- paste('outputphyliptree =', outputphyliptree) garli.conf[19]<- paste('outputmostlyuselessfiles =', outputmostlyuselessfiles) garli.conf[20]<- paste('writecheckpoints =', writecheckpoints) garli.conf[21]<- paste('restart =', restart) garli.conf[22]<- paste('outgroup =', outgroup) garli.conf[23]<- paste('searchreps =', searchreps) garli.conf[24]<- paste('collapsebranches =', collapsebranches) if (partmodel==T){ garli.conf[counter]<- paste('linkmodels =', linkmodels) garli.conf[counter+1]<- paste('subsetspecificrates =', subsetspecificrates) counter<- (counter+2) } for (m in 1:numparts){ garli.conf[(counter)] <- paste('[model', (m-1), ']', sep='') garli.conf[(counter+1)] <- paste('datatype =', datatype[m]) garli.conf[(counter+2)] <- paste('ratematrix =', ratematrix[m]) garli.conf[(counter+3)] <- paste('statefrequencies =', statefrequencies[m]) garli.conf[(counter+4)] <- paste('ratehetmodel =', ratehetmodel[m]) garli.conf[(counter+5)] <- paste('numratecats =', numratecats[m]) garli.conf[(counter+6)] <- paste('invariantsites =', invariantsites[m]) counter<-(counter+7) } garli.conf[counter]<- '[master]' garli.conf[counter+1]<- paste('nindivs =', nindivs) garli.conf[counter+2]<- paste('holdover =', holdover) garli.conf[counter+3]<- paste('selectionintensity =', selectionintensity) garli.conf[counter+4]<- paste('holdoverpenalty =', holdoverpenalty) garli.conf[counter+5]<- paste('stopgen =', stopgen) garli.conf[counter+6]<- paste('stoptime =', stoptime) garli.conf[counter+7]<- paste('startoptprec =', startoptprec) garli.conf[counter+8]<- paste('minoptprec =', minoptprec) garli.conf[counter+9]<- paste('numberofprecreductions =', numberofprecreductions) garli.conf[counter+10]<- paste('treerejectionthreshold =', treerejectionthreshold) garli.conf[counter+11]<- paste('topoweight =', topoweight) garli.conf[counter+12]<- paste('modweight =', modweight) garli.conf[counter+13]<- paste('brlenweight =', brlenweight) garli.conf[counter+14]<- paste('randnniweight =', randnniweight) garli.conf[counter+15]<- paste('randsprweight =', randsprweight) garli.conf[counter+16]<- paste('limsprweight =', limsprweight) garli.conf[counter+17]<- paste('intervallength =', intervallength) garli.conf[counter+18]<- paste('intervalstostore =', intervalstostore) garli.conf[counter+19]<- paste('limsprrange =', limsprrange) garli.conf[counter+20]<- paste('meanbrlenmuts =', meanbrlenmuts) garli.conf[counter+21]<- paste('gammashapebrlen =', gammashapebrlen) garli.conf[counter+22]<- paste('gammashapemodel =', gammashapemodel) garli.conf[counter+23]<- paste('uniqueswapbias =', uniqueswapbias) garli.conf[counter+24]<- paste('distanceswapbias =', distanceswapbias) garli.conf[counter+25]<- paste('bootstrapreps =', bootstrapreps) garli.conf[counter+26]<- paste('resampleproportion =', resampleproportion) garli.conf[counter+27]<- paste('inferinternalstateprobs =', inferinternalstateprobs) write(garli.conf, file=paste(matrixname, i-1, '.conf', sep=''), sep='\n') } if (constrain==T){ counter<- 25 garli.conf[1]<- '[general]' garli.conf[2]<- paste('datafname = ', matrixname, (i-1), '.nex', sep='') garli.conf[3]<- paste('constraintfile = ', constraintfile, sep='') garli.conf[4]<- paste('streefname =', streefname) garli.conf[5]<- paste('attachmentspertaxon =', attachmentspertaxon) garli.conf[6]<- paste('ofprefix = ', matrixname, (i-1), '.const.garli', sep='') garli.conf[7]<- paste('randseed =', randseed) garli.conf[8]<- paste('availablememory =', availablememory) garli.conf[9]<- paste('logevery =', logevery) garli.conf[10]<- paste('saveevery =', saveevery) garli.conf[11]<- paste('refinestart =', refinestart) garli.conf[12]<- paste('outputeachbettertopology =', outputeachbettertopology) garli.conf[13]<- paste('outputcurrentbesttopology =', outputcurrentbesttopology) garli.conf[14]<- paste('enforcetermconditions =', enforcetermconditions) garli.conf[15]<- paste('genthreshfortopoterm =', genthreshfortopoterm) garli.conf[16]<- paste('scorethreshforterm =', scorethreshforterm) garli.conf[17]<- paste('significanttopochange =', significanttopochange) garli.conf[18]<- paste('outputphyliptree =', outputphyliptree) garli.conf[19]<- paste('outputmostlyuselessfiles =', outputmostlyuselessfiles) garli.conf[20]<- paste('writecheckpoints =', writecheckpoints) garli.conf[21]<- paste('restart =', restart) garli.conf[22]<- paste('outgroup =', outgroup) garli.conf[23]<- paste('searchreps =', searchreps) garli.conf[24]<- paste('collapsebranches =', collapsebranches) if (partmodel==T){ garli.conf[counter]<- paste('linkmodels =', linkmodels) garli.conf[counter+1]<- paste('subsetspecificrates =', subsetspecificrates) counter<- (counter+2) } for (n in 1:numparts){ garli.conf[(counter)] <- paste('[model', (n-1), ']', sep='') garli.conf[(counter+1)] <- paste('datatype =', datatype[n]) garli.conf[(counter+2)] <- paste('ratematrix =', ratematrix[n]) garli.conf[(counter+3)] <- paste('statefrequencies =', statefrequencies[n]) garli.conf[(counter+4)] <- paste('ratehetmodel =', ratehetmodel[n]) garli.conf[(counter+5)] <- paste('numratecats =', numratecats[n]) garli.conf[(counter+6)] <- paste('invariantsites =', invariantsites[n]) counter<-(counter+7) } garli.conf[counter]<- '[master]' garli.conf[counter+1]<- paste('nindivs =', nindivs) garli.conf[counter+2]<- paste('holdover =', holdover) garli.conf[counter+3]<- paste('selectionintensity =', selectionintensity) garli.conf[counter+4]<- paste('holdoverpenalty =', holdoverpenalty) garli.conf[counter+5]<- paste('stopgen =', stopgen) garli.conf[counter+6]<- paste('stoptime =', stoptime) garli.conf[counter+7]<- paste('startoptprec =', startoptprec) garli.conf[counter+8]<- paste('minoptprec =', minoptprec) garli.conf[counter+9]<- paste('numberofprecreductions =', numberofprecreductions) garli.conf[counter+10]<- paste('treerejectionthreshold =', treerejectionthreshold) garli.conf[counter+11]<- paste('topoweight =', topoweight) garli.conf[counter+12]<- paste('modweight =', modweight) garli.conf[counter+13]<- paste('brlenweight =', brlenweight) garli.conf[counter+14]<- paste('randnniweight =', randnniweight) garli.conf[counter+15]<- paste('randsprweight =', randsprweight) garli.conf[counter+16]<- paste('limsprweight =', limsprweight) garli.conf[counter+17]<- paste('intervallength =', intervallength) garli.conf[counter+18]<- paste('intervalstostore =', intervalstostore) garli.conf[counter+19]<- paste('limsprrange =', limsprrange) garli.conf[counter+20]<- paste('meanbrlenmuts =', meanbrlenmuts) garli.conf[counter+21]<- paste('gammashapebrlen =', gammashapebrlen) garli.conf[counter+22]<- paste('gammashapemodel =', gammashapemodel) garli.conf[counter+23]<- paste('uniqueswapbias =', uniqueswapbias) garli.conf[counter+24]<- paste('distanceswapbias =', distanceswapbias) garli.conf[counter+25]<- paste('bootstrapreps =', bootstrapreps) garli.conf[counter+26]<- paste('resampleproportion =', resampleproportion) garli.conf[counter+27]<- paste('inferinternalstateprobs =', inferinternalstateprobs) write(garli.conf, file=paste(matrixname, i-1, '_const.conf', sep=''), sep='\n') } if (noconstrain==T){ batlist<- paste(execname, ' -b ', matrixname, i-1, '.conf', sep='') write(batlist, file=paste(matrixname, 'garlirun.bat', sep=''), append=T) } if (constrain==T){ batlist<- paste(execname, ' -b ', matrixname, i-1, '_const.conf', sep='') write(batlist, file=paste(matrixname, 'garlirun.bat', sep=''), append=T) } } }