Package Bio :: Package Align :: Package Applications :: Module _Prank
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Prank

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program PRANK. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  __docformat__ = "restructuredtext en"  # Don't just use plain text in epydoc API pages! 
 11   
 12  from Bio.Application import _Option, _Switch, AbstractCommandline 
 13   
 14   
15 -class PrankCommandline(AbstractCommandline):
16 """Command line wrapper for the multiple alignment program PRANK. 17 18 http://www.ebi.ac.uk/goldman-srv/prank/prank/ 19 20 Example: 21 -------- 22 23 To align a FASTA file (unaligned.fasta) with the output in aligned 24 FASTA format with the output filename starting with "aligned" (you 25 can't pick the filename explicitly), no tree output and no XML output, 26 use: 27 28 >>> from Bio.Align.Applications import PrankCommandline 29 >>> prank_cline = PrankCommandline(d="unaligned.fasta", 30 ... o="aligned", # prefix only! 31 ... f=8, # FASTA output 32 ... notree=True, noxml=True) 33 >>> print(prank_cline) 34 prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree 35 36 You would typically run the command line with prank_cline() or via 37 the Python subprocess module, as described in the Biopython tutorial. 38 39 Citations: 40 ---------- 41 42 Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive 43 multiple alignment of sequences with insertions. Proceedings of 44 the National Academy of Sciences, 102: 10557--10562. 45 46 Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement 47 prevents errors in sequence alignment and evolutionary analysis. 48 Science, 320: 1632. 49 50 Last checked against version: 081202 51 """
52 - def __init__(self, cmd="prank", **kwargs):
53 OUTPUT_FORMAT_VALUES = list(range(1, 18)) 54 self.parameters = [ 55 # ################# input/output parameters: ################## 56 # -d=sequence_file 57 _Option(["-d", "d"], 58 "Input filename", 59 filename=True, 60 is_required=True), 61 # -t=tree_file [default: no tree, generate approximate NJ tree] 62 _Option(["-t", "t"], "Input guide tree filename", 63 filename=True), 64 # -tree="tree_string" [tree in newick format; in double quotes] 65 _Option(["-tree", "tree"], 66 "Input guide tree as Newick string"), 67 # -m=model_file [default: HKY2/WAG] 68 _Option(["-m", "m"], 69 "User-defined alignment model filename. Default: " 70 "HKY2/WAG"), 71 # -o=output_file [default: 'output'] 72 _Option(["-o", "o"], 73 "Output filenames prefix. Default: 'output'\n " 74 "Will write: output.?.fas (depending on requested " 75 "format), output.?.xml and output.?.dnd", 76 filename=True), 77 # -f=output_format [default: 8] 78 _Option(["-f", "f"], 79 "Output alignment format. Default: 8 FASTA\n" 80 "Option are:\n" 81 "1. IG/Stanford 8. Pearson/Fasta\n" 82 "2. GenBank/GB 11. Phylip3.2\n" 83 "3. NBRF 12. Phylip\n" 84 "4. EMBL 14. PIR/CODATA\n" 85 "6. DNAStrider 15. MSF\n" 86 "7. Fitch 17. PAUP/NEXUS", 87 checker_function=lambda x: x in OUTPUT_FORMAT_VALUES), 88 _Switch(["-noxml", "noxml"], 89 "Do not output XML files " 90 "(PRANK versions earlier than v.120626)"), 91 _Switch(["-notree", "notree"], 92 "Do not output dnd tree files " 93 "(PRANK versions earlier than v.120626)"), 94 _Switch(["-showxml", "showxml"], 95 "Output XML files (PRANK v.120626 and later)"), 96 _Switch(["-showtree", "showtree"], 97 "Output dnd tree files (PRANK v.120626 and later)"), 98 _Switch(["-shortnames", "shortnames"], 99 "Truncate names at first space"), 100 _Switch(["-quiet", "quiet"], 101 "Reduce verbosity"), 102 # ###################### model parameters: ###################### 103 # +F [force insertions to be always skipped] 104 # -F [equivalent] 105 _Switch(["-F", "+F", "F"], 106 "Force insertions to be always skipped: same as +F"), 107 # -dots [show insertion gaps as dots] 108 _Switch(["-dots", "dots"], 109 "Show insertion gaps as dots"), 110 # -gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025] 111 _Option(["-gaprate", "gaprate"], 112 "Gap opening rate. Default: dna 0.025 prot 0.0025", 113 checker_function=lambda x: isinstance(x, float)), 114 # -gapext=# [gap extension probability; default: dna 0.5 / prot 0.5] 115 _Option(["-gapext", "gapext"], 116 "Gap extension probability. Default: dna 0.5 " 117 "/ prot 0.5", 118 checker_function=lambda x: isinstance(x, float)), 119 # -dnafreqs=#,#,#,# [ACGT; default: empirical] 120 _Option(["-dnafreqs", "dnafreqs"], 121 "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote " 122 "surrounded string value. Default: empirical", 123 checker_function=lambda x: isinstance(x, bytes)), 124 # -kappa=# [ts/tv rate ratio; default:2] 125 _Option(["-kappa", "kappa"], 126 "Transition/transversion ratio. Default: 2", 127 checker_function=lambda x: isinstance(x, int)), 128 # -rho=# [pur/pyr rate ratio; default:1] 129 _Option(["-rho", "rho"], 130 "Purine/pyrimidine ratio. Default: 1", 131 checker_function=lambda x: isinstance(x, int)), 132 # -codon [for DNA: use empirical codon model] 133 # Assuming this is an input file as in -m 134 _Option(["-codon", "codon"], 135 "Codon model filename. Default: empirical codon model"), 136 # -termgap [penalise terminal gaps normally] 137 _Switch(["-termgap", "termgap"], 138 "Penalise terminal gaps normally"), 139 # ############### other parameters: ################################ 140 # -nopost [do not compute posterior support; default: compute] 141 _Switch(["-nopost", "nopost"], 142 "Do not compute posterior support. Default: compute"), 143 # -pwdist=# [expected pairwise distance for computing guidetree; 144 # default: dna 0.25 / prot 0.5] 145 _Option(["-pwdist", "pwdist"], 146 "Expected pairwise distance for computing guidetree. " 147 "Default: dna 0.25 / prot 0.5", 148 checker_function=lambda x: isinstance(x, float)), 149 _Switch(["-once", "once"], 150 "Run only once. Default: twice if no guidetree given"), 151 _Switch(["-twice", "twice"], 152 "Always run twice"), 153 _Switch(["-skipins", "skipins"], 154 "Skip insertions in posterior support"), 155 _Switch(["-uselogs", "uselogs"], 156 "Slower but should work for a greater number of sequences"), 157 _Switch(["-writeanc", "writeanc"], 158 "Output ancestral sequences"), 159 _Switch(["-printnodes", "printnodes"], 160 "Output each node; mostly for debugging"), 161 # -matresize=# [matrix resizing multiplier] 162 # Doesnt specify type but Float and Int work 163 _Option(["-matresize", "matresize"], 164 "Matrix resizing multiplier", 165 checker_function=lambda x: isinstance(x, float) or 166 isinstance(x, int)), 167 # -matinitsize=# [matrix initial size multiplier] 168 # Doesnt specify type but Float and Int work 169 _Option(["-matinitsize", "matinitsize"], 170 "Matrix initial size multiplier", 171 checker_function=lambda x: isinstance(x, float) or 172 isinstance(x, int)), 173 _Switch(["-longseq", "longseq"], 174 "Save space in pairwise alignments"), 175 _Switch(["-pwgenomic", "pwgenomic"], 176 "Do pairwise alignment, no guidetree"), 177 # -pwgenomicdist=# [distance for pairwise alignment; default: 0.3] 178 _Option(["-pwgenomicdist", "pwgenomicdist"], 179 "Distance for pairwise alignment. Default: 0.3", 180 checker_function=lambda x: isinstance(x, float)), 181 # -scalebranches=# [scale branch lengths; default: dna 1 / prot 2] 182 _Option(["-scalebranches", "scalebranches"], 183 "Scale branch lengths. Default: dna 1 / prot 2", 184 checker_function=lambda x: isinstance(x, int)), 185 # -fixedbranches=# [use fixed branch lengths] 186 # Assume looking for a float 187 _Option(["-fixedbranches", "fixedbranches"], 188 "Use fixed branch lengths of input value", 189 checker_function=lambda x: isinstance(x, float)), 190 # -maxbranches=# [set maximum branch length] 191 # Assume looking for a float 192 _Option(["-maxbranches", "maxbranches"], 193 "Use maximum branch lengths of input value", 194 checker_function=lambda x: isinstance(x, float)), 195 # -realbranches [disable branch length truncation] 196 _Switch(["-realbranches", "realbranches"], 197 "Disable branch length truncation"), 198 _Switch(["-translate", "translate"], 199 "Translate to protein"), 200 _Switch(["-mttranslate", "mttranslate"], 201 "Translate to protein using mt table"), 202 # ##################### other: #################### 203 _Switch(["-convert", "convert"], 204 "Convert input alignment to new format. Do " 205 "not perform alignment") 206 ] 207 AbstractCommandline.__init__(self, cmd, **kwargs)
208 209
210 -def _test():
211 """Run the module's doctests (PRIVATE).""" 212 print("Running modules doctests...") 213 import doctest 214 doctest.testmod() 215 print("Done")
216 217 if __name__ == "__main__": 218 _test() 219