1
2
3
4
5 """Command line wrapper for the multiple alignment program PRANK.
6 """
7
8 __docformat__ = "epytext en"
9
10 from Bio.Application import _Option, _Switch, AbstractCommandline
11
12
14 """Command line wrapper for the multiple alignment program PRANK.
15
16 http://www.ebi.ac.uk/goldman-srv/prank/prank/
17
18 Example:
19
20 To align a FASTA file (unaligned.fasta) with the output in aligned
21 FASTA format with the output filename starting with "aligned" (you
22 can't pick the filename explicitly), no tree output and no XML output,
23 use:
24
25 >>> from Bio.Align.Applications import PrankCommandline
26 >>> prank_cline = PrankCommandline(d="unaligned.fasta",
27 ... o="aligned", #prefix only!
28 ... f=8, #FASTA output
29 ... notree=True, noxml=True)
30 >>> print prank_cline
31 prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree
32
33 You would typically run the command line with prank_cline() or via
34 the Python subprocess module, as described in the Biopython tutorial.
35
36 Citations:
37
38 Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive
39 multiple alignment of sequences with insertions. Proceedings of
40 the National Academy of Sciences, 102: 10557--10562.
41
42 Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement
43 prevents errors in sequence alignment and evolutionary analysis.
44 Science, 320: 1632.
45
46 Last checked against version: 081202
47 """
48 - def __init__(self, cmd="prank", **kwargs):
49 OUTPUT_FORMAT_VALUES = list(range(1,18))
50 self.parameters = [
51
52
53 _Option(["-d", "d"],
54 "Input filename",
55 filename=True,
56 is_required=True),
57
58 _Option(["-t", "t"],"Input guide tree filename",
59 filename=True),
60
61 _Option(["-tree", "tree"],
62 "Input guide tree as Newick string"),
63
64 _Option(["-m", "m"],
65 "User-defined alignment model filename. Default: "
66 "HKY2/WAG"),
67
68 _Option(["-o", "o"],
69 "Output filenames prefix. Default: 'output'\n "
70 "Will write: output.?.fas (depending on requested "
71 "format), output.?.xml and output.?.dnd",
72 filename=True),
73
74 _Option(["-f", "f"],
75 "Output alignment format. Default: 8 FASTA\n"
76 "Option are:\n"
77 "1. IG/Stanford 8. Pearson/Fasta\n"
78 "2. GenBank/GB 11. Phylip3.2\n"
79 "3. NBRF 12. Phylip\n"
80 "4. EMBL 14. PIR/CODATA\n"
81 "6. DNAStrider 15. MSF\n"
82 "7. Fitch 17. PAUP/NEXUS",
83 checker_function=lambda x: x in OUTPUT_FORMAT_VALUES),
84 _Switch(["-noxml", "noxml"],
85 "Do not output XML files"),
86 _Switch(["-notree", "notree"],
87 "Do not output dnd tree files"),
88 _Switch(["-shortnames", "shortnames"],
89 "Truncate names at first space"),
90 _Switch(["-quiet", "quiet"],
91 "Reduce verbosity"),
92
93
94
95 _Switch(["-F", "+F", "F"],
96 "Force insertions to be always skipped: same as +F"),
97
98 _Switch(["-dots", "dots"],
99 "Show insertion gaps as dots"),
100
101 _Option(["-gaprate", "gaprate"],
102 "Gap opening rate. Default: dna 0.025 prot 0.0025",
103 checker_function=lambda x: isinstance(x, float)),
104
105 _Option(["-gapext", "gapext"],
106 "Gap extension probability. Default: dna 0.5 "
107 "/ prot 0.5",
108 checker_function=lambda x: isinstance(x, float)),
109
110 _Option(["-dnafreqs", "dnafreqs"],
111 "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote "
112 "surrounded string value. Default: empirical",
113 checker_function=lambda x: isinstance(x, bytes)),
114
115 _Option(["-kappa", "kappa"],
116 "Transition/transversion ratio. Default: 2",
117 checker_function=lambda x: isinstance(x, int)),
118
119 _Option(["-rho", "rho"],
120 "Purine/pyrimidine ratio. Default: 1",
121 checker_function=lambda x: isinstance(x, int)),
122
123
124 _Option(["-codon", "codon"],
125 "Codon model filename. Default: empirical codon model"),
126
127 _Switch(["-termgap", "termgap"],
128 "Penalise terminal gaps normally"),
129
130
131 _Switch(["-nopost", "nopost"],
132 "Do not compute posterior support. Default: compute"),
133
134
135 _Option(["-pwdist", "pwdist"],
136 "Expected pairwise distance for computing guidetree. "
137 "Default: dna 0.25 / prot 0.5",
138 checker_function=lambda x: isinstance(x, float)),
139 _Switch(["-once", "once"],
140 "Run only once. Default: twice if no guidetree given"),
141 _Switch(["-twice", "twice"],
142 "Always run twice"),
143 _Switch(["-skipins", "skipins"],
144 "Skip insertions in posterior support"),
145 _Switch(["-uselogs", "uselogs"],
146 "Slower but should work for a greater number of sequences"),
147 _Switch(["-writeanc", "writeanc"],
148 "Output ancestral sequences"),
149 _Switch(["-printnodes", "printnodes"],
150 "Output each node; mostly for debugging"),
151
152
153 _Option(["-matresize", "matresize"],
154 "Matrix resizing multiplier",
155 checker_function=lambda x: isinstance(x, float) or
156 isinstance(x, int)),
157
158
159 _Option(["-matinitsize", "matinitsize"],
160 "Matrix initial size multiplier",
161 checker_function=lambda x: isinstance(x, float) or
162 isinstance(x, int)),
163 _Switch(["-longseq", "longseq"],
164 "Save space in pairwise alignments"),
165 _Switch(["-pwgenomic", "pwgenomic"],
166 "Do pairwise alignment, no guidetree"),
167
168 _Option(["-pwgenomicdist", "pwgenomicdist"],
169 "Distance for pairwise alignment. Default: 0.3",
170 checker_function=lambda x: isinstance(x, float)),
171
172 _Option(["-scalebranches", "scalebranches"],
173 "Scale branch lengths. Default: dna 1 / prot 2",
174 checker_function=lambda x: isinstance(x, int)),
175
176
177 _Option(["-fixedbranches", "fixedbranches"],
178 "Use fixed branch lengths of input value",
179 checker_function=lambda x: isinstance(x, float)),
180
181
182 _Option(["-maxbranches", "maxbranches"],
183 "Use maximum branch lengths of input value",
184 checker_function=lambda x: isinstance(x, float)),
185
186 _Switch(["-realbranches", "realbranches"],
187 "Disable branch length truncation"),
188 _Switch(["-translate", "translate"],
189 "Translate to protein"),
190 _Switch(["-mttranslate", "mttranslate"],
191 "Translate to protein using mt table"),
192
193 _Switch(["-convert", "convert"],
194 "Convert input alignment to new format. Do "
195 "not perform alignment")
196 ]
197 AbstractCommandline.__init__(self, cmd, **kwargs)
198
199
201 """Run the module's doctests (PRIVATE)."""
202 print "Running modules doctests..."
203 import doctest
204 doctest.testmod()
205 print "Done"
206
207 if __name__ == "__main__":
208 _test()
209