Package Bio :: Package Phylo :: Package Applications :: Module _Phyml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.Applications._Phyml

  1  # Copyright 2011 by Eric Talevich.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its license. 
  3  # Please see the LICENSE file that should have been included as part of this 
  4  # package. 
  5  """Command-line wrapper for the tree inference program PhyML.""" 
  6   
  7  from Bio._py3k import basestring 
  8   
  9  from Bio.Application import _Option, _Switch, AbstractCommandline 
 10   
 11   
12 -class PhymlCommandline(AbstractCommandline):
13 """Command-line wrapper for the tree inference program PhyML. 14 15 Homepage: http://www.atgc-montpellier.fr/phyml 16 17 Citations: 18 19 Guindon S, Gascuel O. 20 A simple, fast, and accurate algorithm to estimate large phylogenies by maximum 21 likelihood. 22 Systematic Biology, 2003 Oct;52(5):696-704. 23 PubMed PMID: 14530136. 24 25 Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O. 26 New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing 27 the Performance of PhyML 3.0. 28 Systematic Biology, 2010 59(3):307-21. 29 30 """ 31
32 - def __init__(self, cmd='phyml', **kwargs):
33 self.parameters = [ 34 _Option(['-i', '--input', 'input'], 35 "PHYLIP format input nucleotide or amino-acid sequence filenam.", 36 filename=True, 37 is_required=True, 38 equate=False, 39 ), 40 41 _Option(['-d', '--datatype', 'datatype'], 42 "Datatype 'nt' for nucleotide (default) or 'aa' for amino-acids.", 43 checker_function=lambda x: x in ('nt', 'aa'), 44 equate=False, 45 ), 46 47 _Switch(['-q', '--sequential', 'sequential'], 48 "Changes interleaved format (default) to sequential format." 49 ), 50 51 _Option(['-n', '--multiple', 'multiple'], 52 "Number of data sets to analyse (integer).", 53 checker_function=(lambda x: isinstance(x, int) or x.isdigit()), 54 equate=False, 55 ), 56 57 _Switch(['-p', '--pars', 'pars'], 58 """Use a minimum parsimony starting tree. 59 60 This option is taken into account when the '-u' option is absent 61 and when tree topology modifications are to be done. 62 """ 63 ), 64 65 _Option(['-b', '--bootstrap', 'bootstrap'], 66 """Number of bootstrap replicates, if value is > 0. 67 68 Otherwise: 69 70 0: neither approximate likelihood ratio test nor bootstrap 71 values are computed. 72 -1: approximate likelihood ratio test returning aLRT statistics. 73 -2: approximate likelihood ratio test returning Chi2-based 74 parametric branch supports. 75 -4: SH-like branch supports alone. 76 """, 77 equate=False, 78 ), 79 80 _Option(['-m', '--model', 'model'], 81 """Substitution model name. 82 83 Nucleotide-based models: 84 85 HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom 86 87 For the custom option, a string of six digits identifies the 88 model. For instance, 000000 corresponds to F81 (or JC69, 89 provided the distribution of nucleotide frequencies is uniform). 90 012345 corresponds to GTR. This option can be used for encoding 91 any model that is a nested within GTR. 92 93 Amino-acid based models: 94 95 LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV | 96 CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom 97 """, 98 checker_function=(lambda x: x in ( 99 # Nucleotide models: 100 'HKY85', 'JC69', 'K80', 'F81', 'F84', 'TN93', 'GTR', 101 # Amino acid models: 102 'LG', 'WAG', 'JTT', 'MtREV', 'Dayhoff', 'DCMut', 103 'RtREV', 'CpREV', 'VT', 'Blosum62', 'MtMam', 'MtArt', 104 'HIVw', 'HIVb') or isinstance(x, int)), 105 equate=False, 106 ), 107 108 _Option(['-f', 'frequencies'], 109 """Character frequencies. 110 111 -f e, m, or "fA fC fG fT" 112 113 e : Empirical frequencies, determined as follows : 114 115 - Nucleotide sequences: (Empirical) the equilibrium base 116 frequencies are estimated by counting the occurrence 117 of the different bases in the alignment. 118 - Amino-acid sequences: (Empirical) the equilibrium 119 amino-acid frequencies are estimated by counting the 120 occurrence of the different amino-acids in the alignment. 121 122 m : ML/model-based frequencies, determined as follows : 123 124 - Nucleotide sequences: (ML) the equilibrium base 125 frequencies are estimated using maximum likelihood 126 - Amino-acid sequences: (Model) the equilibrium amino-acid 127 frequencies are estimated using the frequencies defined by 128 the substitution model. 129 130 "fA fC fG fT" : only valid for nucleotide-based models. 131 fA, fC, fG and fT are floating-point numbers that correspond 132 to the frequencies of A, C, G and T, respectively. 133 """, 134 filename=True, # ensure ".25 .25 .25 .25" stays quoted 135 equate=False, 136 ), 137 138 _Option(['-t', '--ts/tv', 'ts_tv_ratio'], 139 """Transition/transversion ratio. (DNA sequences only.) 140 141 Can be a fixed positive value (ex:4.0) or e to get the 142 maximum-likelihood estimate. 143 """, 144 equate=False, 145 ), 146 147 _Option(['-v', '--pinv', 'prop_invar'], 148 """Proportion of invariable sites. 149 150 Can be a fixed value in the range [0,1], or 'e' to get the 151 maximum-likelihood estimate. 152 """, 153 equate=False, 154 ), 155 156 _Option(['-c', '--nclasses', 'nclasses'], 157 """Number of relative substitution rate categories. 158 159 Default 1. Must be a positive integer. 160 """, 161 equate=False, 162 ), 163 164 _Option(['-a', '--alpha', 'alpha'], 165 """Distribution of the gamma distribution shape parameter. 166 167 Can be a fixed positive value, or 'e' to get the 168 maximum-likelihood estimate. 169 """, 170 equate=False, 171 ), 172 173 _Option(['-s', '--search', 'search'], 174 """Tree topology search operation option. 175 176 Can be one of: 177 178 NNI : default, fast 179 SPR : a bit slower than NNI 180 BEST : best of NNI and SPR search 181 """, 182 checker_function=lambda x: x in ('NNI', 'SPR', 'BEST'), 183 equate=False, 184 ), 185 186 # alt name: user_tree_file 187 _Option(['-u', '--inputtree', 'input_tree'], 188 "Starting tree filename. The tree must be in Newick format.", 189 filename=True, 190 equate=False, 191 ), 192 193 _Option(['-o', 'optimize'], 194 """Specific parameter optimisation. 195 196 tlr : tree topology (t), branch length (l) and 197 rate parameters (r) are optimised. 198 tl : tree topology and branch length are optimised. 199 lr : branch length and rate parameters are optimised. 200 l : branch length are optimised. 201 r : rate parameters are optimised. 202 n : no parameter is optimised. 203 """, 204 equate=False, 205 ), 206 207 _Switch(['--rand_start', 'rand_start'], 208 """Sets the initial tree to random. 209 210 Only valid if SPR searches are to be performed. 211 """, 212 ), 213 214 _Option(['--n_rand_starts', 'n_rand_starts'], 215 """Number of initial random trees to be used. 216 217 Only valid if SPR searches are to be performed. 218 """, 219 equate=False, 220 ), 221 222 _Option(['--r_seed', 'r_seed'], 223 """Seed used to initiate the random number generator. 224 225 Must be an integer. 226 """, 227 equate=False, 228 ), 229 230 _Switch(['--print_site_lnl', 'print_site_lnl'], 231 "Print the likelihood for each site in file *_phyml_lk.txt." 232 ), 233 234 _Switch(['--print_trace', 'print_trace'], 235 """Print each phylogeny explored during the tree search process 236 in file *_phyml_trace.txt.""" 237 ), 238 239 _Option(['--run_id', 'run_id'], 240 """Append the given string at the end of each PhyML output file. 241 242 This option may be useful when running simulations involving 243 PhyML. 244 """, 245 checker_function=lambda x: isinstance(x, basestring), 246 equate=False, 247 ), 248 249 # XXX should this always be set to True? 250 _Switch(['--quiet', 'quiet'], 251 "No interactive questions (for running in batch mode)." 252 ), 253 ] 254 AbstractCommandline.__init__(self, cmd, **kwargs)
255