Package Bio :: Package Phylo :: Package Applications :: Module _Phyml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.Applications._Phyml

  1  # Copyright 2011 by Eric Talevich.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its license. 
  3  # Please see the LICENSE file that should have been included as part of this 
  4  # package. 
  5  """Command-line wrapper for the tree inference program PhyML.""" 
  6   
  7  from Bio._py3k import basestring 
  8   
  9  from Bio.Application import _Option, _Switch, AbstractCommandline 
 10   
 11   
12 -class PhymlCommandline(AbstractCommandline):
13 """Command-line wrapper for the tree inference program PhyML. 14 15 Homepage: http://www.atgc-montpellier.fr/phyml 16 17 Citations: 18 19 Guindon S, Gascuel O. 20 A simple, fast, and accurate algorithm to estimate large phylogenies by maximum 21 likelihood. 22 Systematic Biology, 2003 Oct;52(5):696-704. 23 PubMed PMID: 14530136. 24 25 Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O. 26 New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing 27 the Performance of PhyML 3.0. 28 Systematic Biology, 2010 59(3):307-21. 29 30 """ 31
32 - def __init__(self, cmd='phyml', **kwargs):
33 self.parameters = [ 34 _Option(['-i', '--input', 'input'], 35 """Name of the nucleotide or amino-acid sequence file in PHYLIP 36 format.""", 37 filename=True, 38 is_required=True, 39 equate=False, 40 ), 41 42 _Option(['-d', '--datatype', 'datatype'], 43 """Data type is 'nt' for nucleotide (default) and 'aa' for 44 amino-acid sequences.""", 45 checker_function=lambda x: x in ('nt', 'aa'), 46 equate=False, 47 ), 48 49 _Switch(['-q', '--sequential', 'sequential'], 50 "Changes interleaved format (default) to sequential format." 51 ), 52 53 _Option(['-n', '--multiple', 'multiple'], 54 "Number of data sets to analyse (integer).", 55 checker_function=(lambda x: 56 isinstance(x, int) or x.isdigit()), 57 equate=False, 58 ), 59 60 _Switch(['-p', '--pars', 'pars'], 61 """Use a minimum parsimony starting tree. 62 63 This option is taken into account when the '-u' option is absent 64 and when tree topology modifications are to be done. 65 """ 66 ), 67 68 _Option(['-b', '--bootstrap', 'bootstrap'], 69 """Number of bootstrap replicates, if value is > 0. 70 71 Otherwise: 72 73 0: neither approximate likelihood ratio test nor bootstrap 74 values are computed. 75 -1: approximate likelihood ratio test returning aLRT statistics. 76 -2: approximate likelihood ratio test returning Chi2-based 77 parametric branch supports. 78 -4: SH-like branch supports alone. 79 """, 80 equate=False, 81 ), 82 83 _Option(['-m', '--model', 'model'], 84 """Substitution model name. 85 86 Nucleotide-based models: 87 88 HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom 89 90 For the custom option, a string of six digits identifies the 91 model. For instance, 000000 corresponds to F81 (or JC69, 92 provided the distribution of nucleotide frequencies is uniform). 93 012345 corresponds to GTR. This option can be used for encoding 94 any model that is a nested within GTR. 95 96 Amino-acid based models: 97 98 LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV | 99 CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom 100 """, 101 checker_function=(lambda x: x in ( 102 # Nucleotide models: 103 'HKY85', 'JC69', 'K80', 'F81', 'F84', 'TN93', 'GTR', 104 # Amino acid models: 105 'LG', 'WAG', 'JTT', 'MtREV', 'Dayhoff', 'DCMut', 106 'RtREV', 'CpREV', 'VT', 'Blosum62', 'MtMam', 'MtArt', 107 'HIVw', 'HIVb') or 108 isinstance(x, int)), 109 equate=False, 110 ), 111 112 _Option(['-f', 'frequencies'], 113 """Character frequencies. 114 115 -f e, m, or "fA fC fG fT" 116 117 e : Empirical frequencies, determined as follows : 118 119 - Nucleotide sequences: (Empirical) the equilibrium base 120 frequencies are estimated by counting the occurrence of the 121 different bases in the alignment. 122 - Amino-acid sequences: (Empirical) the equilibrium 123 amino-acid frequencies are estimated by counting the 124 occurrence of the different amino-acids in the alignment. 125 126 m : ML/model-based frequencies, determined as follows : 127 128 - Nucleotide sequences: (ML) the equilibrium base 129 frequencies are estimated using maximum likelihood 130 - Amino-acid sequences: (Model) the equilibrium amino-acid 131 frequencies are estimated using the frequencies defined by 132 the substitution model. 133 134 "fA fC fG fT" : only valid for nucleotide-based models. 135 fA, fC, fG and fT are floating-point numbers that correspond 136 to the frequencies of A, C, G and T, respectively. 137 """, 138 filename=True, # ensure ".25 .25 .25 .25" stays quoted 139 equate=False, 140 ), 141 142 _Option(['-t', '--ts/tv', 'ts_tv_ratio'], 143 """Transition/transversion ratio. (DNA sequences only.) 144 145 Can be a fixed positive value (ex:4.0) or e to get the 146 maximum-likelihood estimate. 147 """, 148 equate=False, 149 ), 150 151 _Option(['-v', '--pinv', 'prop_invar'], 152 """Proportion of invariable sites. 153 154 Can be a fixed value in the range [0,1], or 'e' to get the 155 maximum-likelihood estimate. 156 """, 157 equate=False, 158 ), 159 160 _Option(['-c', '--nclasses', 'nclasses'], 161 """Number of relative substitution rate categories. 162 163 Default 1. Must be a positive integer. 164 """, 165 equate=False, 166 ), 167 168 _Option(['-a', '--alpha', 'alpha'], 169 """Distribution of the gamma distribution shape parameter. 170 171 Can be a fixed positive value, or 'e' to get the 172 maximum-likelihood estimate. 173 """, 174 equate=False, 175 ), 176 177 _Option(['-s', '--search', 'search'], 178 """Tree topology search operation option. 179 180 Can be one of: 181 182 NNI : default, fast 183 SPR : a bit slower than NNI 184 BEST : best of NNI and SPR search 185 """, 186 checker_function=lambda x: x in ('NNI', 'SPR', 'BEST'), 187 equate=False, 188 ), 189 190 # alt name: user_tree_file 191 _Option(['-u', '--inputtree', 'input_tree'], 192 "Starting tree filename. The tree must be in Newick format.", 193 filename=True, 194 equate=False, 195 ), 196 197 _Option(['-o', 'optimize'], 198 """Specific parameter optimisation. 199 200 tlr : tree topology (t), branch length (l) and 201 rate parameters (r) are optimised. 202 tl : tree topology and branch length are optimised. 203 lr : branch length and rate parameters are optimised. 204 l : branch length are optimised. 205 r : rate parameters are optimised. 206 n : no parameter is optimised. 207 """, 208 equate=False, 209 ), 210 211 _Switch(['--rand_start', 'rand_start'], 212 """Sets the initial tree to random. 213 214 Only valid if SPR searches are to be performed. 215 """, 216 ), 217 218 _Option(['--n_rand_starts', 'n_rand_starts'], 219 """Number of initial random trees to be used. 220 221 Only valid if SPR searches are to be performed. 222 """, 223 equate=False, 224 ), 225 226 _Option(['--r_seed', 'r_seed'], 227 """Seed used to initiate the random number generator. 228 229 Must be an integer. 230 """, 231 equate=False, 232 ), 233 234 _Switch(['--print_site_lnl', 'print_site_lnl'], 235 "Print the likelihood for each site in file *_phyml_lk.txt." 236 ), 237 238 _Switch(['--print_trace', 'print_trace'], 239 """Print each phylogeny explored during the tree search process 240 in file *_phyml_trace.txt.""" 241 ), 242 243 _Option(['--run_id', 'run_id'], 244 """Append the given string at the end of each PhyML output file. 245 246 This option may be useful when running simulations involving 247 PhyML. 248 """, 249 checker_function=lambda x: isinstance(x, basestring), 250 equate=False, 251 ), 252 253 # XXX should this always be set to True? 254 _Switch(['--quiet', 'quiet'], 255 "No interactive questions (for running in batch mode)." 256 ), 257 ] 258 AbstractCommandline.__init__(self, cmd, **kwargs)
259