1
2
3
4
5 """Command-line wrapper for the tree inference program PhyML."""
6 __docformat__ = "restructuredtext en"
7
8 from Bio.Application import _Option, _Switch, AbstractCommandline
9
10
12 """Command-line wrapper for the tree inference program PhyML.
13
14 Homepage: http://www.atgc-montpellier.fr/phyml
15
16 Citations:
17
18 Guindon S, Gascuel O.
19 A simple, fast, and accurate algorithm to estimate large phylogenies by maximum
20 likelihood.
21 Systematic Biology, 2003 Oct;52(5):696-704.
22 PubMed PMID: 14530136.
23
24 Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O.
25 New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing
26 the Performance of PhyML 3.0.
27 Systematic Biology, 2010 59(3):307-21.
28
29 """
30
31 - def __init__(self, cmd='phyml', **kwargs):
32 self.parameters = [
33 _Option(['-i', '--input', 'input'],
34 """Name of the nucleotide or amino-acid sequence file in PHYLIP
35 format.""",
36 filename=True,
37 is_required=True,
38 equate=False,
39 ),
40
41 _Option(['-d', '--datatype', 'datatype'],
42 """Data type is 'nt' for nucleotide (default) and 'aa' for
43 amino-acid sequences.""",
44 checker_function=lambda x: x in ('nt', 'aa'),
45 equate=False,
46 ),
47
48 _Switch(['-q', '--sequential', 'sequential'],
49 "Changes interleaved format (default) to sequential format."
50 ),
51
52 _Option(['-n', '--multiple', 'multiple'],
53 "Number of data sets to analyse (integer).",
54 checker_function=(lambda x:
55 isinstance(x, int) or x.isdigit()),
56 equate=False,
57 ),
58
59 _Switch(['-p', '--pars', 'pars'],
60 """Use a minimum parsimony starting tree.
61
62 This option is taken into account when the '-u' option is absent
63 and when tree topology modifications are to be done.
64 """
65 ),
66
67 _Option(['-b', '--bootstrap', 'bootstrap'],
68 """Number of bootstrap replicates, if value is > 0.
69
70 Otherwise:
71
72 0: neither approximate likelihood ratio test nor bootstrap
73 values are computed.
74 -1: approximate likelihood ratio test returning aLRT statistics.
75 -2: approximate likelihood ratio test returning Chi2-based
76 parametric branch supports.
77 -4: SH-like branch supports alone.
78 """,
79 equate=False,
80 ),
81
82 _Option(['-m', '--model', 'model'],
83 """Substitution model name.
84
85 Nucleotide-based models:
86
87 HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom
88
89 For the custom option, a string of six digits identifies the
90 model. For instance, 000000 corresponds to F81 (or JC69,
91 provided the distribution of nucleotide frequencies is uniform).
92 012345 corresponds to GTR. This option can be used for encoding
93 any model that is a nested within GTR.
94
95 Amino-acid based models:
96
97 LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV |
98 CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom
99 """,
100 checker_function=(lambda x: x in (
101
102 'HKY85', 'JC69', 'K80', 'F81', 'F84', 'TN93', 'GTR',
103
104 'LG', 'WAG', 'JTT', 'MtREV', 'Dayhoff', 'DCMut',
105 'RtREV', 'CpREV', 'VT', 'Blosum62', 'MtMam', 'MtArt',
106 'HIVw', 'HIVb')
107 or isinstance(x, int)),
108 equate=False,
109 ),
110
111 _Option(['-f', 'frequencies'],
112 """Character frequencies.
113
114 -f e, m, or "fA fC fG fT"
115
116 e : Empirical frequencies, determined as follows :
117
118 - Nucleotide sequences: (Empirical) the equilibrium base
119 frequencies are estimated by counting the occurence of the
120 different bases in the alignment.
121 - Amino-acid sequences: (Empirical) the equilibrium
122 amino-acid frequencies are estimated by counting the
123 occurence of the different amino-acids in the alignment.
124
125 m : ML/model-based frequencies, determined as follows :
126
127 - Nucleotide sequences: (ML) the equilibrium base
128 frequencies are estimated using maximum likelihood
129 - Amino-acid sequences: (Model) the equilibrium amino-acid
130 frequencies are estimated using the frequencies defined by
131 the substitution model.
132
133 "fA fC fG fT" : only valid for nucleotide-based models.
134 fA, fC, fG and fT are floating-point numbers that correspond
135 to the frequencies of A, C, G and T, respectively.
136 """,
137 filename=True,
138 equate=False,
139 ),
140
141 _Option(['-t', '--ts/tv', 'ts_tv_ratio'],
142 """Transition/transversion ratio. (DNA sequences only.)
143
144 Can be a fixed positive value (ex:4.0) or e to get the
145 maximum-likelihood estimate.
146 """,
147 equate=False,
148 ),
149
150 _Option(['-v', '--pinv', 'prop_invar'],
151 """Proportion of invariable sites.
152
153 Can be a fixed value in the range [0,1], or 'e' to get the
154 maximum-likelihood estimate.
155 """,
156 equate=False,
157 ),
158
159 _Option(['-c', '--nclasses', 'nclasses'],
160 """Number of relative substitution rate categories.
161
162 Default 1. Must be a positive integer.
163 """,
164 equate=False,
165 ),
166
167 _Option(['-a', '--alpha', 'alpha'],
168 """Distribution of the gamma distribution shape parameter.
169
170 Can be a fixed positive value, or 'e' to get the
171 maximum-likelihood estimate.
172 """,
173 equate=False,
174 ),
175
176 _Option(['-s', '--search', 'search'],
177 """Tree topology search operation option.
178
179 Can be one of:
180
181 NNI : default, fast
182 SPR : a bit slower than NNI
183 BEST : best of NNI and SPR search
184 """,
185 checker_function=lambda x: x in ('NNI', 'SPR', 'BEST'),
186 equate=False,
187 ),
188
189
190 _Option(['-u', '--inputtree', 'input_tree'],
191 "Starting tree filename. The tree must be in Newick format.",
192 filename=True,
193 equate=False,
194 ),
195
196 _Option(['-o', 'optimize'],
197 """Specific parameter optimisation.
198
199 tlr : tree topology (t), branch length (l) and
200 rate parameters (r) are optimised.
201 tl : tree topology and branch length are optimised.
202 lr : branch length and rate parameters are optimised.
203 l : branch length are optimised.
204 r : rate parameters are optimised.
205 n : no parameter is optimised.
206 """,
207 equate=False,
208 ),
209
210 _Switch(['--rand_start', 'rand_start'],
211 """Sets the initial tree to random.
212
213 Only valid if SPR searches are to be performed.
214 """,
215 ),
216
217 _Option(['--n_rand_starts', 'n_rand_starts'],
218 """Number of initial random trees to be used.
219
220 Only valid if SPR searches are to be performed.
221 """,
222 equate=False,
223 ),
224
225 _Option(['--r_seed', 'r_seed'],
226 """Seed used to initiate the random number generator.
227
228 Must be an integer.
229 """,
230 equate=False,
231 ),
232
233 _Switch(['--print_site_lnl', 'print_site_lnl'],
234 "Print the likelihood for each site in file *_phyml_lk.txt."
235 ),
236
237 _Switch(['--print_trace', 'print_trace'],
238 """Print each phylogeny explored during the tree search process
239 in file *_phyml_trace.txt."""
240 ),
241
242 _Option(['--run_id', 'run_id'],
243 """Append the given string at the end of each PhyML output file.
244
245 This option may be useful when running simulations involving
246 PhyML.
247 """,
248 checker_function=lambda x: isinstance(x, basestring),
249 equate=False,
250 ),
251
252
253 _Switch(['--quiet', 'quiet'],
254 "No interactive questions (for running in batch mode)."
255 ),
256 ]
257 AbstractCommandline.__init__(self, cmd, **kwargs)
258