1
2
3
4
5 """Command line wrapper for the multiple alignment program Clustal W.
6 """
7
8 __docformat__ = "epytext en"
9
10 import os
11 from Bio.Application import _Option, _Switch, AbstractCommandline
12
13
15 """Command line wrapper for clustalw (version one or two).
16
17 http://www.clustal.org/
18
19 Example:
20
21 >>> from Bio.Align.Applications import ClustalwCommandline
22 >>> in_file = "unaligned.fasta"
23 >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
24 >>> print clustalw_cline
25 clustalw2 -infile=unaligned.fasta
26
27 You would typically run the command line with clustalw_cline() or via
28 the Python subprocess module, as described in the Biopython tutorial.
29
30 Citation:
31
32 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
33 McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
34 Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
35 Bioinformatics, 23, 2947-2948.
36
37 Last checked against versions: 1.83 and 2.1
38 """
39
40 - def __init__(self, cmd="clustalw", **kwargs):
41 self.parameters = \
42 [
43 _Option(["-infile", "-INFILE", "INFILE", "infile"],
44 "Input sequences.",
45 filename=True),
46 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"],
47 "Profiles (old alignment).",
48 filename=True),
49 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"],
50 "Profiles (old alignment).",
51 filename=True),
52
53 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"],
54 "List the command line parameters"),
55 _Switch(["-help", "-HELP", "HELP", "help"],
56 "Outline the command line params."),
57 _Switch(["-check", "-CHECK", "CHECK", "check"],
58 "Outline the command line params."),
59 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"],
60 "Output full help content."),
61 _Switch(["-align", "-ALIGN", "ALIGN", "align"],
62 "Do full multiple alignment."),
63 _Switch(["-tree", "-TREE", "TREE", "tree"],
64 "Calculate NJ tree."),
65 _Switch(["-pim", "-PIM", "PIM", "pim"],
66 "Output percent identity matrix (while calculating the tree)."),
67 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"],
68 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).",
69 checker_function=lambda x: isinstance(x, int)),
70 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"],
71 "Output the input sequences in a different file format."),
72
73
74
75
76
77
78
79
80
81 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"],
82 "Use FAST algorithm for the alignment guide tree"),
83 _Option(["-type", "-TYPE", "TYPE", "type"],
84 "PROTEIN or DNA sequences",
85 checker_function=lambda x: x in ["PROTEIN", "DNA",
86 "protein", "dna"]),
87 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"],
88 "Protein alignment with negative values in matrix"),
89 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"],
90 "Output sequence alignment file name",
91 filename=True),
92 _Option(["-output", "-OUTPUT", "OUTPUT", "output"],
93 "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA",
94 checker_function=lambda x: x in ["CLUSTAL", "GCG", "GDE", "PHYLIP",
95 "PIR", "NEXUS", "FASTA",
96 "clustal", "gcg", "gde", "phylip",
97 "pir", "nexus", "fasta"]),
98 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"],
99 "Output taxon order: INPUT or ALIGNED",
100 checker_function=lambda x: x in ["INPUT", "input",
101 "ALIGNED", "aligned"]),
102 _Option(["-case", "-CASE", "CASE", "case"],
103 "LOWER or UPPER (for GDE output only)",
104 checker_function=lambda x: x in ["UPPER", "upper",
105 "LOWER", "lower"]),
106 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"],
107 "OFF or ON (for Clustal output only)",
108 checker_function=lambda x: x in ["ON", "on",
109 "OFF", "off"]),
110 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"],
111 "OFF or ON (NEW- for all output formats)",
112 checker_function=lambda x: x in ["ON", "on",
113 "OFF", "off"]),
114 _Option(["-range", "-RANGE", "RANGE", "range"],
115 "Sequence range to write starting m to m+n. "
116 "Input as string eg. '24,200'"),
117 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"],
118 "Maximum allowed input sequence length",
119 checker_function=lambda x: isinstance(x, int)),
120 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"],
121 "Reduce console output to minimum"),
122 _Option(["-stats", "-STATS", "STATS", "stats"],
123 "Log some alignment statistics to file",
124 filename=True),
125
126 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
127 "Word size",
128 checker_function=lambda x: isinstance(x, int) or
129 isinstance(x, float)),
130 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
131 "Number of best diags.",
132 checker_function=lambda x: isinstance(x, int) or
133 isinstance(x, float)),
134 _Option(["-window", "-WINDOW", "WINDOW", "window"],
135 "Window around best diags.",
136 checker_function=lambda x: isinstance(x, int) or
137 isinstance(x, float)),
138 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
139 "Gap penalty",
140 checker_function=lambda x: isinstance(x, int) or
141 isinstance(x, float)),
142 _Option(["-score", "-SCORE", "SCORE", "score"],
143 "Either: PERCENT or ABSOLUTE",
144 checker_function=lambda x: x in ["percent", "PERCENT",
145 "absolute","ABSOLUTE"]),
146
147 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
148 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
149 checker_function=lambda x: x in ["BLOSUM", "PAM",
150 "GONNET", "ID",
151 "blosum", "pam",
152 "gonnet", "id"] or
153 os.path.exists(x),
154 filename=True),
155 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
156 "DNA weight matrix=IUB, CLUSTALW or filename",
157 checker_function=lambda x: x in ["IUB", "CLUSTALW",
158 "iub", "clustalw"] or
159 os.path.exists(x),
160 filename=True),
161 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
162 "Gap opening penalty",
163 checker_function=lambda x: isinstance(x, int) or
164 isinstance(x, float)),
165 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
166 "Gap extension penalty",
167 checker_function=lambda x: isinstance(x, int) or
168 isinstance(x, float)),
169
170 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
171 "Output file name for newly created guide tree",
172 filename=True),
173 _Option(["-usetree", "-USETREE", "USETREE", "usetree"],
174 "File name of guide tree",
175 checker_function=lambda x: os.path.exists,
176 filename=True),
177 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"],
178 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
179 checker_function=lambda x: x in ["BLOSUM", "PAM",
180 "GONNET", "ID",
181 "blosum", "pam",
182 "gonnet", "id"] or
183 os.path.exists(x),
184 filename=True),
185 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
186 "DNA weight matrix=IUB, CLUSTALW or filename",
187 checker_function=lambda x: x in ["IUB", "CLUSTALW",
188 "iub", "clustalw"] or
189 os.path.exists(x),
190 filename=True),
191 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
192 "Gap opening penalty",
193 checker_function=lambda x: isinstance(x, int) or
194 isinstance(x, float)),
195 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
196 "Gap extension penalty",
197 checker_function=lambda x: isinstance(x, int) or
198 isinstance(x, float)),
199 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
200 "No end gap separation pen."),
201 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
202 "Gap separation pen. range",
203 checker_function=lambda x: isinstance(x, int) or
204 isinstance(x, float)),
205 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"],
206 "Residue-specific gaps off"),
207 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"],
208 "Hydrophilic gaps off"),
209 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"],
210 "List hydrophilic res."),
211 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
212 "% ident. for delay",
213 checker_function=lambda x: isinstance(x, int) or
214 isinstance(x, float)),
215
216
217
218
219
220
221 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
222 "Transitions weighting",
223 checker_function=lambda x: isinstance(x, int) or
224 isinstance(x, float)),
225 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"],
226 "NONE or TREE or ALIGNMENT",
227 checker_function=lambda x: x in ["NONE", "TREE",
228 "ALIGNMENT",
229 "none", "tree",
230 "alignment"]),
231 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"],
232 "maximum number of iterations to perform",
233 checker_function=lambda x: isinstance(x, int)),
234 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"],
235 "Disable sequence weighting"),
236
237 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"],
238 "Merge two alignments by profile alignment"),
239 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"],
240 "Output file name for new guide tree of profile1",
241 filename=True),
242 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"],
243 "Output file for new guide tree of profile2",
244 filename=True),
245 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"],
246 "File name of guide tree for profile1",
247 checker_function=lambda x: os.path.exists,
248 filename=True),
249 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"],
250 "File name of guide tree for profile2",
251 checker_function=lambda x: os.path.exists,
252 filename=True),
253
254 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"],
255 "Sequentially add profile2 sequences to profile1 alignment"),
256
257
258
259
260
261
262
263
264
265
266 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"],
267 "Do not use secondary structure-gap penalty mask for profile 1"),
268 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"],
269 "Do not use secondary structure-gap penalty mask for profile 2"),
270 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"],
271 "STRUCTURE or MASK or BOTH or NONE output in alignment file",
272 checker_function=lambda x: x in ["STRUCTURE", "MASK",
273 "BOTH", "NONE",
274 "structure", "mask",
275 "both", "none"]),
276 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
277 "Gap penalty for helix core residues",
278 checker_function=lambda x: isinstance(x, int) or
279 isinstance(x, float)),
280 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
281 "gap penalty for strand core residues",
282 checker_function=lambda x: isinstance(x, int) or
283 isinstance(x, float)),
284 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
285 "Gap penalty for loop regions",
286 checker_function=lambda x: isinstance(x, int) or
287 isinstance(x, float)),
288 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
289 "Gap penalty for structure termini",
290 checker_function=lambda x: isinstance(x, int) or
291 isinstance(x, float)),
292 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
293 "Number of residues inside helix to be treated as terminal",
294 checker_function=lambda x: isinstance(x, int)),
295 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"],
296 "Number of residues outside helix to be treated as terminal",
297 checker_function=lambda x: isinstance(x, int)),
298 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"],
299 "Number of residues inside strand to be treated as terminal",
300 checker_function=lambda x: isinstance(x, int)),
301 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"],
302 "Number of residues outside strand to be treated as terminal",
303 checker_function=lambda x: isinstance(x, int)),
304
305 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"],
306 "nj OR phylip OR dist OR nexus",
307 checker_function=lambda x: x in ["NJ", "PHYLIP",
308 "DIST", "NEXUS",
309 "nj", "phylip",
310 "dist", "nexus"]),
311 _Option(["-seed", "-SEED", "SEED", "seed"],
312 "Seed number for bootstraps.",
313 checker_function=lambda x: isinstance(x, int)),
314 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"],
315 "Use Kimura's correction."),
316 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"],
317 "Ignore positions with gaps."),
318 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"],
319 "Node OR branch position of bootstrap values in tree display",
320 checker_function=lambda x: x in ["NODE", "BRANCH",
321 "node", "branch"]),
322 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"],
323 "NJ or UPGMA",
324 checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"])
325 ]
326 AbstractCommandline.__init__(self, cmd, **kwargs)
327
328
330 """Run the module's doctests (PRIVATE)."""
331 print "Running ClustalW doctests..."
332 import doctest
333 doctest.testmod()
334 print "Done"
335
336 if __name__ == "__main__":
337 _test()
338