################################################################################ # Parmfile for Migrate 3.5.2-2152 [do not remove these first TWO lines] # generated automatically on # Mon Apr 22 14:35:18 2013 # # please report problems to Peter Beerli # email: beerli@fsu.edu # http://popgen.csit.fsu.edu/migrate.html ################################################################################ # ################################################################################ # General options ################################################################################ # # Interactive or batch job usage # Syntax: menu= < YES | NO > # For batch runs it needs to be set to NO menu=YES # # Specification of length of names of indiviudals # Syntax: nmlength= nmlength=10 # # ################################################################################ # Data options ################################################################################ # # Several different main datatypes are possible: # INFINITE ALLELE: usable for electrophoretic markers, # other markers with unknown mutation model # STEPWISE MUTATION: usable for microsatellite data or # other markers with stepwise change # from one allele to another # [singlestep versus multistep model, see micro-submodel option] # FINITE SITES MUTATION: standard DNA/RNA sequence mutation # model, usable for DNA or RNA contiguous # sequences or varialbe sites only (SNP) # GENEALOGY SUMMARY: reanalyzing an old migrate run # #------------------------------------------------------------------------------- # INFINITE ALLELE # Syntax: datatype=ALLELICDATA # include-unknown= with YES unknown alleles # are included into analysis, NO is the default # #------------------------------------------------------------------------------- # # STEPWISE MUTATION # Syntax: datatype= # 1 means singlestep mutation model (this is the default and the standard # 2 is the Multistep model (see Watkins 2007 TPB, section 4.2) it needs # two parameters: tune specifies how close the model is to a singlestep model # so tune=0 --> singlestep, tune=1 --> infinite allele model; # the second parameter defines the probability that the repeat number # is increasing, this value cannot be larger than 0.666, I suggest 0.5. # Example: micro-submodel=2:{0.5,0.5} # micro-threshold= Default is 10 [MICRO only, NEEDS TO BE EVEN!], # smaller values speed up analysis, but might also # crash, large values slow down analysis considerably. # Change this value only when you suspect that your # data has huge gaps in repeat length. # include-unknown= with YES unknown alleles # are included into analysis, NO is the default # #------------------------------------------------------------------------------- # # FINITE SITES MUTATION # Syntax: datatype= # calculate the prior base frequencies from the data, # or specify the frequencies # ttratio= Default is 2.0, # ratio between transitions and transversions. # seq-error= Default is 0.0, typical values for ABI 3700 # sequencers after base calling are around 0.001 (1/650) # categories= The categories are integers or letters # specified in file called CATFILE, this assumes that all # sites belong to known categories, this can be used to # weight third positions etc. # rates= the rates are specified arbitrarily or # then are from a Gamma distribution with alpha=x, currently # the alpha value gets lost and is not recorded in the parmfile # prob-rates= These rates can be arbitrary or # generated with gamma-deviated rates and then are derived # using Laguerre's quadrature, this should get better # results than equal probability methods. # autocorrelation= Default is NO # autocorrelation makes only sense with rates, # VALUE should be >1.0 # weights= The weights are specified # in file called WEIGHTFILE, this assumes that all sites # belong to known weights, this can be used to weight # portions of the sequence etc. # interleaved= Use either an interleaved or # non-interleaved format. Default is NO, # interleaved=YES is discouraged # fast-likelihood= Default is YES, use NO when you # have many hundred individuals and get strange errors # during a run, NO is scaling the conditional likelihood # so that very small values are >0.00000 # inheritance-scalars={values for each locus} # these values are multiplied with Theta, for example having # two autosomal and a locus on X- and one on Y-chromosome we would give # inheritance-scalars={1 1 0.75 0.25} # [if all loci have the same scalar, just use {1}, even for many loci]] # population-relabel={assignment for each location in the infile} # example is population-relabel={1 2 2} # random-subset=number<:seed> # allows to subset the dataset randomly, if number > sample in population # all samples are taken, if number is smaller then the pop sample is shuffled and # and the first number samples are taken. # the random number seed guarantees that the # same subset is chosen in different runs # usertree= # Default is RANDOM, NO delivers a UPGMA tree using the data # with TREE and DISTANCE the user needs to # give a usertreefile or a pairwise distance file, with RANDOM # a random tree will be the starting tree # #------------------------------------------------------------------------------- # # datatype=BrownianMicrosatelliteData include-unknown=NO inheritance-scalars={1.00000000000000000000} population-relabel={1, 2} random-subset=5:13 usertree=AUTOMATIC # ################################################################################ # Input options ################################################################################ # # input file location # Syntax infile=FILEPATH infile=infile # # Random number seed specification # Syntax random-seed= # AUTO uses computer system clock to generate seed # OWN:seedfile uses file seedfile with random number seed # OWN:value uses number value for seed random-seed=OWN:310705631 # # Specify the title of the run, will be overridden by title in datafile # Syntax: title=title text [up to 80 characters] title=AUTO # # ################################################################################ # Output options ################################################################################ # # Progress report to the window where the program was started # Syntax: progress= # NO nothing is printed to the console # YES some messages about progress are reported [default] # VERBOSE more messages are reported to console progress=YES # #------------------------------------------------------------------------------- # # Recording messages to screen into logfile # Syntax logfile= # NONE no recording of progress # logfilename path to logfile logfile=NO # #------------------------------------------------------------------------------- # # Print the data as read into the program # Syntax print-data= print-data=NO # #------------------------------------------------------------------------------- # # Print output to file [default is outfile] # Syntax outfile=outfilename outfile=outfile-bayes # #------------------------------------------------------------------------------- # # Print output to a PDF file [default is outfile.pdf] # Syntax pdf-outfile=outfilename.pdf pdf-outfile=outfile-bayes.pdf # #------------------------------------------------------------------------------- # # Report M (=migration rate/mutation rate) instead of 4Nm or 2 Nm or Nm # Syntax use-M= Default is YES, the name 4Nm is ambiguous # for non-diploid data use-M=YES # #------------------------------------------------------------------------------- # # Plotting parameters: migration versus population size, such that Theta1 x immigration_.1 # this shows the sum of all imigrations int a population # Syntax plot=: # {x-start, x-end, y-start, y-end}::interval> # NO do not show a plot # YES show plot with following specifications # BOTH print raw coordinates into MATHFILE and plot to OUTFILE # OUTFILE plot only to OUTFILE # LOG scaling of both axes # STD non-log scaling # {...} plot range of both parameters # N use xNm to plot immigration, x=<1,2,3,4> # depending on the inheritance characteristic of the data # M plot migration rate/mutation rate as immigration axis # interval the plot range is broken up into interval intervals plot=NO # # Print plot data into a file # Syntax: mathtfile=mathfile the values are printed in a mathematica readable way mathfile=mathfile # #------------------------------------------------------------------------------- # # Profile likelihood for each estimated parameter # Syntax profile=: # > # NONE do not calculate profile likelihoods # ALL print individual profile tables and summary [default] # TABLES show only tables and no summary # SUMMARY show only summary # PRECISE evaluate profile likelihood at percentiles [Default] # QUICK assumes that there is no interaction of parameters # FAST same as QUICK except in last calculation cycle assumes interaction # DISCRETE uses fixed mutipliers: 0.02,0.1,0.2,0.5,1,2,5,10,50 profile=ALL:PRECISE # #------------------------------------------------------------------------------- # # Print tree into treefile # Syntax print-tree=< NONE | :treefile > # NONE no tree printed [Default, and only choice using parallel # ALL print all visited genealogies [careful this will be huge] # BEST print only the best tree visited # LASTCHAIN print all trees in last chain # with increment INCREMENT print-tree=NONE # #------------------------------------------------------------------------------- # # write intermediate minimal statistics into a file for later use # Syntax write-summary= # Default is NO, with YES the user needs to # give a file to record the summary statistics write-summary=NO # #------------------------------------------------------------------------------- # # Likelihood ratio test # Syntax l-ratio= # Values_to_test are compared to the values generated in the run # values_to_test={ab..bbab..ba ... a} # the {} is a square matrix with values for the population sizes # on the diagonal and migration rates off-diagonal # the values a for the diagonal can be any of these: # number constant, the value is for example 0.002 # * free to vary, the default is * for every parameter # m mean of theta, this can be a subgroup of all thetas # for example the theta 1-3 are averaged and thetas 4,5 are estimated # the values b for the migration rates can be any of these: # number constant, the value is for example 45.0 or 0.0 # * free to vary, the default is * for every parameter # m mean of M_ij, this can be a subgroup of migration rates # for example the M_1-3i are averaged and M_4,5i are estimated # M means of 4Nm (diploid), 2Nm (haploid), Nm (mtDNA, Y-chromosome) # s symmetric migration rates M # S symmetric migrants 4Nm # an example for 5 populations could look like this: # l-ratio=YES:{*s00s s*s00 0s*s0 00s*s s00s* # this describes a circular stepping stone model with 5 symmetric rates # and independent sizes, a very basic stepping stone with 2 parameters would # look like this l-ratio=YES:{mm00m mmm00 0mmm0 00mmm m00mm} # [The L-RATIO statement can be repeated] # Default: l-ratio=NO # #------------------------------------------------------------------------------- # # AIC model selection [do not use yet, will come in Summer 2004] # Syntax aic-modeltest=> # FAST [do not use yet] # EXHAUSTIVE [do not use yet] aic-modeltest=NO # #------------------------------------------------------------------------------- # # Print a histogram of the time of migration events for each M(i->j) # Syntax mig-histogram=:binsize:mighistfile > # NO do not record any events # ALL record migration and coalescence event # MIGRATIONEVENTSONLY record only migration events # binsize has to be in mutation units, with an average Theta=0.01 try 0.001 # Print a histogram of the parameters through time (skyline plot) # Syntax skyline=:binsize:skylinefile > # NO do not calculate parameter estimates through time # YES calculate parameters through time # binsize has to be in mutation units, with an average Theta=0.01 try 0.001 # If the interval is too fine the output will be very noisy mig-histogram=NO skyline=NO #needs mig-histogram=ALL:... # # ################################################################################ # Parameter start settings ################################################################################ # # Syntax: theta= # migrationt= # FST starting parameter are derived from # an FST-like calculation (Beerli&Felsenstein 1999 # OWN starting values are supplied by user # {value} if only one value is supplied then all population # have the same starting value # {value1, value2, ..., valuen} each population has its # own starting value, if the number of values is # insuffient, then the last value is the template # for the remaining populations # NRANDOM starting parameter is drawn randomely from a Normal distribution # {mean std} with mean and standard deviation # URANDOM starting parameter is drawn randomely from a Uniform distribution # {min max} with minimum and maximum values theta=URANDOM: {0.000000 10.000000} migration=FST # #------------------------------------------------------------------------------- # Mutation rate modifiers # # Syntax: mutation= # NOGAMMA all loci have same mutation rate # CONSTANT all loci have same mutation rate # ESTIMATE BAYESIAN estimate: mutation rate is drawn from prior # GAMMA:alpha ML estimate: mutation rate has Gamma distribution with alpha # OWN mutation rate is different for every locus, but fixed # :loci: rate1, ... number of loci, rate of locus 1, locus 2 etc. # DATA mutation rate modifier is deducted from loci in the data # using Watterson's Theta and then scaling all rates Theta_locus/mean(Theta_loci mutation=CONSTANT # # Treatment of inviariant sequence loci # Syntax: analyze-loci= # A = analyze all loci (Default!) # F = analyze all variable loci and ONE invariant and extrapolate # V = analyze only variable loci #analyze-loci=A # #------------------------------------------------------------------------------- # FST model # fst-type=THETA # #------------------------------------------------------------------------------- # Custom migration model # # Syntax: custom-migration={ab..bbab..ba ... a} # the {} is a square matrix with values for the population sizes # on the diagonal and migration rates off-diagonal # the values a for the diagonal can be any of these: # c constant, the value needs to be defined in the theta option # * free to vary, the default is * for every parameter # m mean of theta, this can be a subgroup of all thetas # for example the theta 1-3 are averaged and thetas 4,5 are estimated # the values b for the migration rates can be any of these: # c constant, the value needs to be defined in the migration option # * free to vary, the default is * for every parameter # m mean of M_ij, this can be a subgroup of migration rates # for example the M_1-3i are averaged and M_4,5i are estimated # M means of 4Nm (diploid), 2Nm (haploid), Nm (mtDNA, Y-chromosome) # s symmetric migration rates M # S symmetric migrants 4Nm # an example for 5 populations could look like this: # custom-migration={*s00s s*s00 0s*s0 00s*s s00s* # this describes a circular stepping stone model with 5 symmetric rates # and independent sizes, a very basic stepping stone with 2 parameters would # look like this custom-migration={mm00m mmm00 0mmm0 00mmm m00mm} custom-migration={*0**} # # Influence of geography on migration rate # a distance matrix between populations changes the migration rate matrix so that # (genetic?) migration rates = inferred migration rate / distance ~ a dispersion coefficient # the geofile contains a number of populations, names for populations (10 characters), they # need to be in order of the dataset. And the distances between the populations, they do not # need to be symmetric # Syntax: geo: # NO distances among populations are considered to be 1 [all equal] # YES distances are read from a file geo=NO # # ################################################################################ # Search strategies ################################################################################ # # MCMC Strategy method # Syntax: bayes-update=< NO | YES> # NO maximum likelihood method # YES Bayesian method # Some of the options are only available in one or other mode # BAYESIAN OPTIONS # bayes-updatefreq=VALUE # VALUE is a ratio between 0 and 1 # ratio of how many times the genealogy is updated compared to the parameters # If the value is 0.4 in a 2 population scenario and with 1000000 steps # The tree will be evaluated 400000 times, Theta_1, Theta_2, M_21, and M_12 # will be each evaluated 125000 times. # bayes-posteriorbins=VALUE VALUE # VALUE is the number of bins in the psterior distribution histogram for Theta or M # bayes-posteriormaxtype=< ALL | P99 | MAXP99 | P100 > # ALL plots the WHOLE prior-parameter range # P99 plots from the minimum prior range value to # the 99% percentile value of EACH parameter # MAXP99 sets all axes from minimum to the maximal # 99% percentile value of ALL parameter # P100 plots from the minimum prior range value to # the 100% percentile value of EACH parameter # bayes-file= # FILENAME is the name of the file that will contain # the results for the posterior distribution # bayes-allfile=<:INTERVAL:FILENAME|NO> # FILENAME is the name of the file that will contain # all parameters of the posterior distribution [HUGE] # INTERVAL is the interval at which all parameters are written to file # # bayes-proposals= THETA < SLICE | METROPOLIS > # bayes-proposals= MIG < SLICE | METROPOLIS > # SLICE uses the slice sampler to propose new parameter values # METROPOLIS uses the Metropolis-Hastings sampler # (this is done for each parameter group: THETA or MIGration) # # bayes-priors= THETA :SKIP:TEMPERATURES # NO No heating # YES heating using TEMPERATURES # ADAPTIVE adaptive heating using start TEMPERATURES [fails sometimes!!] # SKIP skip that many comparisons, this lengthens the run by SKIP # TEMPERATURES { 1.0, temp1, temp2, temp3 .. tempn} # Example: heating=YES:1:{1.0, 1.2, 3.0,1000000.0} # Heating: swapping chains # Syntax: heated-swap=< YES | NO > # YES swapping of chains enabled [DEFAULT] # NO swapping of chains disabled # Example: heated-swap=YES heating=YES:1:{1.000000,1.500000,3.000000,1000000.000000} heated-swap=YES # # Lengthening chain schemes # Syntax: moving-steps=< NO | YES:VALUE> # VALUE frequency is between 0..1 moving-steps=NO # # Syntax: long-chain-epsilon=VALUE # VALUE is between 0..INFINITY # the VALUE is the likelihood ratio between the old and thew chain # the VALUE depends on the number of parameters: with 1 values of 0.5 are great # but with many parameters values and bad data >20 is more reasonable long-chain-epsilon=INFINITY # # Convergence statistic [Gelman and Rubin] # Syntax: gelman-convergence=< YES:Pairs|Summary | NO > # NO do not use Gelman's convergence criterium # YES use Gelman's convergence criteria between chain i, and i-1 # PAIRS reports all replicate pairs # SUM reports only mean and maxima gelman-convergence=No # # Syntax: replicate=< NO | YES: > # NO no replication of run # YES replicate run # VALUE number between 2 and many, complete replicates # LastChains replications over last chains replicate=YES:2 # # Migration rates are attracted to zero (fatal attraction) # Resistance is the lowest migration value for all but the last chain # Syntax resistance=VALUE # VALUE is the lowest migration rate value allowed during all but the last chain # typical values are 0.01 or _lower_ for data with sequences and 0.0001 or _lower_ for other data resistance=0.000001 # #------------------------------------------------------------------------------- # end