Package Bio :: Package Phylo :: Package Applications :: Module _Fasttree
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.Applications._Fasttree

  1  # Copyright 2013 by Nate Sutton.  Based on code in _Phyml.py by Eric Talevich.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its license. 
  3  # Please see the LICENSE file that should have been included as part of this 
  4  # package. 
  5  """Command-line wrapper for tree inference program Fasttree.""" 
  6  from __future__ import print_function 
  7   
  8  __docformat__ = "restructuredtext en" 
  9   
 10  from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline 
 11   
 12   
13 -def _is_int(x):
14 """Test whether the argument can be serialized as an integer.""" 15 return isinstance(x, int) or str(x).isdigit()
16 17
18 -def _is_numeric(x):
19 """Test whether the argument can be serialized as a number.""" 20 try: 21 float(str(x)) 22 return True 23 except ValueError: 24 return False
25 26
27 -class FastTreeCommandline(AbstractCommandline):
28 """Command-line wrapper for FastTree. 29 30 Homepage: http://www.microbesonline.org/fasttree/ 31 32 Citations: 33 34 Price, M.N., Dehal, P.S., and Arkin, A.P. (2010) FastTree 2 -- Approximately 35 Maximum-Likelihood Trees for Large Alignments. PLoS ONE, 5(3):e9490. 36 doi:10.1371/journal.pone.0009490. 37 38 Example usage: 39 40 >>> import _Fasttree 41 >>> fasttree_exe = r"C:\FasttreeWin32\fasttree.exe" 42 >>> cmd = _Fasttree.FastTreeCommandline(fasttree_exe, input=r'C:\Input\ExampleAlignment.fsa', out='C:\Output\ExampleTree.tree') 43 >>> print(cmd) 44 >>> out, err = cmd() 45 >>> print(out) 46 >>> print(err) 47 48 Usage advice: 49 the only parameters needed are (fasttree_exe, input='<InputFile>' out='<OutputFile>') 50 51 parameters that use values are added this way: (fasttree_exe, parameter=value, input='<InputFile>' out='<OutputFile>') 52 parameters that don't use values are added this way: (fasttree_exe, parameter=True, input='<InputFile>' out='<OutputFile>') 53 54 from the command line use 'fasttree.exe -help' or 'fasttree.exe -expert' for more explanation of usage options 55 """ 56
57 - def __init__(self, cmd='fasttree', **kwargs):
58 self.parameters = [ 59 _Switch(['-nt', 'nt'], 60 """By default FastTree expects protein alignments, use -nt for nucleotides""", 61 ), 62 _Option(['-n', 'n'], 63 """-n -- read N multiple alignments in. 64 65 This only works with phylip interleaved format. For example, you can 66 use it with the output from phylip's seqboot. If you use -n, FastTree 67 will write 1 tree per line to standard output.""", 68 checker_function=_is_int, 69 equate=False, 70 ), 71 _Switch(['-quote', 'quote'], 72 """-quote -- add quotes to sequence names in output. 73 74 Quote sequence names in the output and allow spaces, commas, 75 parentheses, and colons in them but not ' characters (fasta files only).""", 76 ), 77 _Option(['-pseudo', 'pseudo'], 78 """-pseudo [weight] -- Pseudocounts are used with sequence distance estimation. 79 80 Use pseudocounts to estimate distances between sequences with little or no 81 overlap. (Off by default.) Recommended if analyzing the alignment has 82 sequences with little or no overlap. 83 If the weight is not specified, it is 1.0 """, 84 checker_function=_is_numeric, 85 equate=False, 86 ), 87 _Option(['-boot', 'boot'], 88 """Specify the number of resamples for support values. 89 90 Support value options: 91 By default, FastTree computes local support values by resampling the site 92 likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, 93 it will compute minimum-evolution bootstrap supports instead 94 In either case, the support values are proportions ranging from 0 to 1 95 96 Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.""", 97 checker_function=_is_int, 98 equate=False, 99 ), 100 _Switch(['-nosupport', 'nosupport'], 101 """Turn off support values. 102 103 Support value options: 104 By default, FastTree computes local support values by resampling the site 105 likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, 106 it will compute minimum-evolution bootstrap supports instead 107 In either case, the support values are proportions ranging from 0 to 1 108 109 Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.""", 110 ), 111 _Option(['-intree', 'intree'], 112 """-intree newickfile -- read the starting tree in from newickfile. 113 114 Any branch lengths in the starting trees are ignored. 115 -intree with -n will read a separate starting tree for each alignment.""", 116 filename=True, 117 equate=False, 118 ), 119 _Option(['-intree1', 'intree1'], 120 """-intree1 newickfile -- read the same starting tree for each alignment.""", 121 filename=True, 122 equate=False, 123 ), 124 _Switch(['-quiet', 'quiet'], 125 """-quiet -- do not write to standard error during normal operation 126 127 (no progress indicator, no options summary, no likelihood values, etc.)""", 128 ), 129 _Switch(['-nopr', 'nopr'], 130 """-nopr -- do not write the progress indicator to stderr.""", 131 ), 132 _Option(['-nni', 'nni'], 133 """Set the rounds of minimum-evolution nearest-neighbor interchanges 134 135 Topology refinement: 136 By default, FastTree tries to improve the tree with up to 4*log2(N) 137 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 138 where N is the number of unique sequences, 2 rounds of 139 subtree-prune-regraft (SPR) moves (also min. evo.), and 140 up to 2*log(N) rounds of maximum-likelihood NNIs. 141 Use -nni to set the number of rounds of min. evo. NNIs.""", 142 checker_function=_is_int, 143 equate=False, 144 ), 145 _Option(['-spr', 'spr'], 146 """Set the rounds of subtree-prune-regraft moves 147 148 Topology refinement: 149 By default, FastTree tries to improve the tree with up to 4*log2(N) 150 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 151 where N is the number of unique sequences, 2 rounds of 152 subtree-prune-regraft (SPR) moves (also min. evo.), and 153 up to 2*log(N) rounds of maximum-likelihood NNIs. 154 Use -nni to set the number of rounds of min. evo. NNIs, 155 and -spr to set the rounds of SPRs.""", 156 checker_function=_is_int, 157 equate=False, 158 ), 159 _Switch(['-noml', 'noml'], 160 """Deactivate min-evo NNIs and SPRs. 161 162 Topology refinement: 163 By default, FastTree tries to improve the tree with up to 4*log2(N) 164 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 165 where N is the number of unique sequences, 2 rounds of 166 subtree-prune-regraft (SPR) moves (also min. evo.), and 167 up to 2*log(N) rounds of maximum-likelihood NNIs. 168 Use -nni to set the number of rounds of min. evo. NNIs, 169 and -spr to set the rounds of SPRs. 170 Use -noml to turn off both min-evo NNIs and SPRs (useful if refining 171 an approximately maximum-likelihood tree with further NNIs) """, 172 ), 173 _Switch(['-mllen', 'mllen'], 174 """Optimize branch lengths on a fixed topology. 175 176 Topology refinement: 177 By default, FastTree tries to improve the tree with up to 4*log2(N) 178 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 179 where N is the number of unique sequences, 2 rounds of 180 subtree-prune-regraft (SPR) moves (also min. evo.), and 181 up to 2*log(N) rounds of maximum-likelihood NNIs. 182 Use -nni to set the number of rounds of min. evo. NNIs, 183 and -spr to set the rounds of SPRs. 184 Use -mllen to optimize branch lengths without ML NNIs 185 Use -mllen -nome with -intree to optimize branch lengths on a fixed topology.""", 186 ), 187 _Switch(['-nome', 'nome'], 188 """Changes support values calculation to a minimum-evolution bootstrap method. 189 190 Topology refinement: 191 By default, FastTree tries to improve the tree with up to 4*log2(N) 192 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 193 where N is the number of unique sequences, 2 rounds of 194 subtree-prune-regraft (SPR) moves (also min. evo.), and 195 up to 2*log(N) rounds of maximum-likelihood NNIs. 196 Use -nni to set the number of rounds of min. evo. NNIs, 197 and -spr to set the rounds of SPRs. 198 Use -mllen to optimize branch lengths without ML NNIs 199 Use -mllen -nome with -intree to optimize branch lengths on a fixed topology 200 201 Support value options: 202 By default, FastTree computes local support values by resampling the site 203 likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, 204 it will compute minimum-evolution bootstrap supports instead 205 In either case, the support values are proportions ranging from 0 to 1.""", 206 ), 207 _Option(['-mlnni', 'mlnni'], 208 """Set the number of rounds of maximum-likelihood NNIs. 209 210 Topology refinement: 211 By default, FastTree tries to improve the tree with up to 4*log2(N) 212 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 213 where N is the number of unique sequences, 2 rounds of 214 subtree-prune-regraft (SPR) moves (also min. evo.), and 215 up to 2*log(N) rounds of maximum-likelihood NNIs. 216 Use -nni to set the number of rounds of min. evo. NNIs, 217 and -spr to set the rounds of SPRs. 218 Use -mlnni to set the number of rounds of maximum-likelihood NNIs.""", 219 checker_function=_is_int, 220 equate=False, 221 ), 222 _Option(['-mlacc', 'mlacc'], 223 """Option for optimization of branches at each NNI. 224 225 Topology refinement: 226 By default, FastTree tries to improve the tree with up to 4*log2(N) 227 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 228 where N is the number of unique sequences, 2 rounds of 229 subtree-prune-regraft (SPR) moves (also min. evo.), and 230 up to 2*log(N) rounds of maximum-likelihood NNIs. 231 Use -nni to set the number of rounds of min. evo. NNIs, 232 and -spr to set the rounds of SPRs. 233 Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI, 234 and to optimize all 5 branches in 2 or 3 rounds.""", 235 checker_function=_is_int, 236 equate=False, 237 ), 238 _Switch(['-slownni', 'slownni'], 239 """Turn off heuristics to avoid constant subtrees with NNIs. 240 241 Topology refinement: 242 By default, FastTree tries to improve the tree with up to 4*log2(N) 243 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 244 where N is the number of unique sequences, 2 rounds of 245 subtree-prune-regraft (SPR) moves (also min. evo.), and 246 up to 2*log(N) rounds of maximum-likelihood NNIs. 247 Use -nni to set the number of rounds of min. evo. NNIs, 248 and -spr to set the rounds of SPRs. 249 Use -slownni to turn off heuristics to avoid constant subtrees (affects both 250 ML and ME NNIs).""", 251 ), 252 _Switch(['-wag', 'wag'], 253 """Maximum likelihood model options: Whelan-And-Goldman 2001 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)""", 254 ), 255 _Switch(['-gtr', 'gtr'], 256 """Maximum likelihood model options: Use generalized time-reversible instead of (default) Jukes-Cantor (nt only)""", 257 ), 258 _Option(['-cat', 'cat'], 259 """Maximum likelihood model options: Specify the number of rate categories of sites (default 20).""", 260 checker_function=_is_int, 261 equate=False, 262 ), 263 _Switch(['-nocat', 'nocat'], 264 """Maximum likelihood model options: No CAT model (just 1 category)""", 265 ), 266 _Switch(['-gamma', 'gamma'], 267 """Report the likelihood under the discrete gamma model. 268 269 Maximum likelihood model options: 270 -gamma -- after the final round of optimizing branch lengths with the CAT model, 271 report the likelihood under the discrete gamma model with the same 272 number of categories. FastTree uses the same branch lengths but 273 optimizes the gamma shape parameter and the scale of the lengths. 274 The final tree will have rescaled lengths. Used with -log, this 275 also generates per-site likelihoods for use with CONSEL, see 276 GammaLogToPaup.pl and documentation on the FastTree web site.""", 277 ), 278 _Switch(['-slow', 'slow'], 279 """Use an exhaustive search. 280 281 Searching for the best join: 282 By default, FastTree combines the 'visible set' of fast neighbor-joining with 283 local hill-climbing as in relaxed neighbor-joining 284 -slow -- exhaustive search (like NJ or BIONJ, but different gap handling) 285 -slow takes half an hour instead of 8 seconds for 1,250 proteins""", 286 ), 287 _Switch(['-fastest', 'fastest'], 288 """Search the visible set (the top hit for each node) only. 289 290 Searching for the best join: 291 By default, FastTree combines the 'visible set' of fast neighbor-joining with 292 local hill-climbing as in relaxed neighbor-joining 293 -fastest -- search the visible set (the top hit for each node) only 294 Unlike the original fast neighbor-joining, -fastest updates visible(C) 295 after joining A and B if join(AB,C) is better than join(C,visible(C)) 296 -fastest also updates out-distances in a very lazy way, 297 -fastest sets -2nd on as well, use -fastest -no2nd to avoid this""", 298 ), 299 _Switch(['-2nd', 'second'], 300 """Turn 2nd-level top hits heuristic on. 301 302 Top-hit heuristics: 303 By default, FastTree uses a top-hit list to speed up search 304 Use -notop (or -slow) to turn this feature off 305 and compare all leaves to each other, 306 and all new joined nodes to each other 307 308 -2nd or -no2nd to turn 2nd-level top hits heuristic on or off 309 This reduces memory usage and running time but may lead to 310 marginal reductions in tree quality. 311 (By default, -fastest turns on -2nd.)""", 312 ), 313 _Switch(['-no2nd', 'no2nd'], 314 """Turn 2nd-level top hits heuristic off. 315 316 Top-hit heuristics: 317 By default, FastTree uses a top-hit list to speed up search 318 Use -notop (or -slow) to turn this feature off 319 and compare all leaves to each other, 320 and all new joined nodes to each other 321 322 -2nd or -no2nd to turn 2nd-level top hits heuristic on or off 323 This reduces memory usage and running time but may lead to 324 marginal reductions in tree quality. 325 (By default, -fastest turns on -2nd.)""", 326 ), 327 _Option(['-seed', 'seed'], 328 """Use -seed to initialize the random number generator. 329 330 Support value options: 331 By default, FastTree computes local support values by resampling the site 332 likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome, 333 it will compute minimum-evolution bootstrap supports instead 334 In either case, the support values are proportions ranging from 0 to 1""", 335 checker_function=_is_int, 336 equate=False, 337 ), 338 _Switch(['-top', 'top'], 339 """Top-hit list to speed up search 340 341 Top-hit heuristics: 342 By default, FastTree uses a top-hit list to speed up search 343 Use -notop (or -slow) to turn this feature off 344 and compare all leaves to each other, 345 and all new joined nodes to each other""", 346 ), 347 _Switch(['-notop', 'notop'], 348 """Turn off top-hit list to speed up search 349 350 Top-hit heuristics: 351 By default, FastTree uses a top-hit list to speed up search 352 Use -notop (or -slow) to turn this feature off 353 and compare all leaves to each other, 354 and all new joined nodes to each other""", 355 ), 356 _Option(['-topm', 'topm'], 357 """Change the top hits calculation method 358 359 Top-hit heuristics: 360 By default, FastTree uses a top-hit list to speed up search 361 -topm 1.0 -- set the top-hit list size to parameter*sqrt(N) 362 FastTree estimates the top m hits of a leaf from the 363 top 2*m hits of a 'close' neighbor, where close is 364 defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m), 365 and updates the top-hits as joins proceed""", 366 checker_function=_is_numeric, 367 equate=False, 368 ), 369 _Option(['-close', 'close'], 370 """Modify the close heuristic for the top-hit list 371 372 Top-hit heuristics: 373 By default, FastTree uses a top-hit list to speed up search 374 -close 0.75 -- modify the close heuristic, lower is more conservative""", 375 checker_function=_is_numeric, 376 equate=False, 377 ), 378 _Option(['-refresh', 'refresh'], 379 """Parameter for conditions that joined nodes are compared to other nodes 380 381 Top-hit heuristics: 382 By default, FastTree uses a top-hit list to speed up search 383 -refresh 0.8 -- compare a joined node to all other nodes if its 384 top-hit list is less than 80% of the desired length, 385 or if the age of the top-hit list is log2(m) or greater""", 386 checker_function=_is_numeric, 387 equate=False, 388 ), 389 _Option(['-matrix', 'matrix'], 390 """Specify a matrix for nucleotide or amino acid distances 391 392 Distances: 393 Default: For protein sequences, log-corrected distances and an 394 amino acid dissimilarity matrix derived from BLOSUM45 395 or for nucleotide sequences, Jukes-Cantor distances 396 To specify a different matrix, use -matrix FilePrefix or -nomatrix""", 397 filename=True, 398 equate=False, 399 ), 400 _Switch(['-nomatrix', 'nomatrix'], 401 """Specify that no matrix should be used for nucleotide or amino acid distances 402 403 Distances: 404 Default: For protein sequences, log-corrected distances and an 405 amino acid dissimilarity matrix derived from BLOSUM45 406 or for nucleotide sequences, Jukes-Cantor distances 407 To specify a different matrix, use -matrix FilePrefix or -nomatrix""", 408 ), 409 _Switch(['-nj', 'nj'], 410 """Join options: regular (unweighted) neighbor-joining (default)""", 411 ), 412 _Switch(['-bionj', 'bionj'], 413 """Join options: weighted joins as in BIONJ. FastTree will also weight joins during NNIs""", 414 ), 415 _Option(['-gtrrates', 'gtrrates'], 416 """-gtrrates ac ag at cg ct gt""", 417 equate=False, 418 ), 419 _Option(['-gtrfreq', 'gtrfreq'], 420 """-gtrfreq A C G T""", 421 equate=False, 422 ), 423 _Option(['-constraints', 'constraints'], 424 """Specifies an alignment file for use with constrained topology searching 425 426 Constrained topology search options: 427 -constraints alignmentfile -- an alignment with values of 0, 1, and - 428 Not all sequences need be present. A column of 0s and 1s defines a 429 constrained split. Some constraints may be violated 430 (see 'violating constraints:' in standard error).""", 431 filename=True, 432 equate=False, 433 ), 434 _Option(['-constraintWeight', 'constraintWeight'], 435 """Weight strength of contraints in topology searching 436 437 Constrained topology search options: 438 -constraintWeight -- how strongly to weight the constraints. A value of 1 439 means a penalty of 1 in tree length for violating a constraint 440 Default: 100.0""", 441 checker_function=_is_numeric, 442 equate=False, 443 ), 444 _Option(['-log', 'log'], 445 """Create log files of data such as intermediate trees and per-site rates 446 447 -log logfile -- save intermediate trees so you can extract 448 the trees and restart long-running jobs if they crash 449 -log also reports the per-site rates (1 means slowest category).""", 450 filename=True, 451 equate=False, 452 ), 453 _Option(['-makematrix', 'makematrix'], 454 """-makematrix [alignment]""", 455 filename=True, 456 equate=False, 457 ), 458 _Switch(['-rawdist', 'rawdist'], 459 """Use -rawdist to turn the log-correction off or to use %different instead of Jukes-Cantor in AA or NT distances 460 461 Distances: 462 Default: For protein sequences, log-corrected distances and an 463 amino acid dissimilarity matrix derived from BLOSUM45 464 or for nucleotide sequences, Jukes-Cantor distances 465 To specify a different matrix, use -matrix FilePrefix or -nomatrix""", 466 ), 467 _Option(['-sprlength', 'sprlength'], 468 """Use -sprlength set the maximum length of a SPR move (default 10) in topology refinement 469 470 Topology refinement: 471 By default, FastTree tries to improve the tree with up to 4*log2(N) 472 rounds of minimum-evolution nearest-neighbor interchanges (NNI), 473 where N is the number of unique sequences, 2 rounds of 474 subtree-prune-regraft (SPR) moves (also min. evo.), and 475 up to 2*log(N) rounds of maximum-likelihood NNIs. 476 Use -nni to set the number of rounds of min. evo. NNIs, 477 and -spr to set the rounds of SPRs.""", 478 checker_function=_is_int, 479 equate=False, 480 ), 481 _Switch(['-help', 'help'], 482 """Show the help""" 483 ), 484 _Switch(['-expert', 'expert'], 485 """Show the expert level help""" 486 ), 487 _Option(['-out', 'out'], 488 """Enter <output file> 489 490 The path to a Newick Tree output file needs to be specified.""", 491 filename=True, 492 equate=False, 493 ), 494 _Argument(['input'], 495 """Enter <input file> 496 497 An input file of sequence alignments in fasta or phylip format is needed. By default FastTree expects protein 498 alignments, use -nt for nucleotides""", 499 filename=True, 500 is_required=True, 501 ), 502 ] 503 504 AbstractCommandline.__init__(self, cmd, **kwargs)
505