1
2
3
4
5
6
7
8
9
10
11 """Command line wrapper for the multiple alignment program Clustal Omega.
12 """
13
14 from Bio.Application import _Option, _Switch, AbstractCommandline
15
16
18 """Command line wrapper for clustal omega
19
20 http://www.clustal.org/omega
21
22 Example:
23
24 >>> from Bio.Align.Applications import ClustalOmegaCommandline
25 >>> in_file = "unaligned.fasta"
26 >>> out_file = "aligned.fasta"
27 >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
28 >>> print clustalomega_cline
29 clustalo -i unaligned.fasta -o aligned.fasta --auto -v
30
31
32 You would typically run the command line with clustalomega_cline() or via
33 the Python subprocess module, as described in the Biopython tutorial.
34
35 Citation:
36
37 Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R,
38 McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011).
39 Fast, scalable generation of high-quality protein multiple
40 sequence alignments using Clustal Omega.
41 Molecular Systems Biology 7:539 doi:10.1038/msb.2011.75
42
43 Last checked against versions: 1.1.0
44 """
45 - def __init__(self, cmd="clustalo", **kwargs):
46
47 self.parameters = \
48 [
49
50 _Option(["-i", "--in", "--infile", "infile"],
51 "Multiple sequence input file",
52 filename=True,
53 equate=False),
54 _Option(["--hmm-in", "HMM input", "hmm_input"],
55 "HMM input files",
56 filename=True,
57 equate=False),
58 _Switch(["--dealign", "dealign"],
59 "Dealign input sequences"),
60 _Option(["--profile1", "--p1", "profile1"],
61 "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
62 filename=True,
63 equate=False),
64 _Option(["--profile2", "--p2", "profile2"],
65 "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
66 filename=True,
67 equate=False),
68 _Option(["-t", "--seqtype", "seqtype"],
69 "{Protein, RNA, DNA} Force a sequence type (default: auto).",
70 equate=False,
71 checker_function=lambda x: x in ["protein", "rna", "dna",
72 "Protein", "RNA", "DNA",
73 "PROTEIN"]),
74 _Option(["--infmt", "infmt"],
75 """Forced sequence input file format (default: auto)
76
77 Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna]
78 """,
79 equate=False,
80 checker_function=lambda x: x in ["a2m", "fa", "fasta",
81 "clu", "clustal",
82 "msf",
83 "phy", "phylip",
84 "selex",
85 "st", "stockholm",
86 "vie", "vienna"]),
87
88
89 _Option(["--distmat-in", "distmat_in"],
90 "Pairwise distance matrix input file (skips distance computation).",
91 filename=True,
92 equate=False),
93 _Option(["--distmat-out", "distmat_out"],
94 "Pairwise distance matrix output file.",
95 filename=True,
96 equate=False),
97 _Option(["--guidetree-in", "guidetree_in"],
98 "Guide tree input file (skips distance computation and guide-tree clustering step).",
99 filename=True,
100 equate=False),
101 _Option(["--guidetree-out", "guidetree_out"],
102 "Guide tree output file.",
103 filename=True,
104 equate=False),
105 _Switch(["--full", "distmat_full"],
106 "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)"),
107 _Switch(["--full-iter", "distmat_full_iter"],
108 "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)"),
109
110
111 _Option(["-o", "--out", "--outfile", "outfile"],
112 "Multiple sequence alignment output file (default: stdout).",
113 filename=True,
114 equate=False),
115 _Option(["--outfmt", "outfmt"],
116 "MSA output file format:"
117 " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]"
118 " (default: fasta).",
119 equate=False,
120 checker_function=lambda x: x in ["a2m", "fa", "fasta",
121 "clu", "clustal",
122 "msf",
123 "phy", "phylip",
124 "selex",
125 "st", "stockholm",
126 "vie", "vienna"]),
127
128 _Option(["--iterations", "--iter", "iterations"],
129 "Number of (combined guide-tree/HMM) iterations",
130 equate=False,
131 checker_function=lambda x: isinstance(x, int)),
132 _Option(["--max-guidetree-iterations", "max_guidetree_iterations"],
133 "Maximum number of guidetree iterations",
134 equate=False,
135 checker_function=lambda x: isinstance(x, int)),
136 _Option(["--max-hmm-iterations", "max_hmm_iterations"],
137 "Maximum number of HMM iterations",
138 equate=False,
139 checker_function=lambda x: isinstance(x, int)),
140
141
142 _Option(["--maxnumseq", "maxnumseq"],
143 "Maximum allowed number of sequences",
144 equate=False,
145 checker_function=lambda x: isinstance(x, int)),
146 _Option(["--maxseqlen", "maxseqlen"],
147 "Maximum allowed sequence length",
148 equate=False,
149 checker_function=lambda x: isinstance(x, int)),
150
151
152
153 _Switch(["--auto", "auto"],
154 "Set options automatically (might overwrite some of your options)"),
155 _Option(["--threads", "threads"],
156 "Number of processors to use",
157 equate=False,
158 checker_function=lambda x: isinstance(x, int)),
159 _Option(["-l", "--log", "log"],
160 "Log all non-essential output to this file.",
161 filename=True,
162 equate=False),
163 _Switch(["-h", "--help", "help"],
164 "Outline the command line params."),
165 _Switch(["-v", "--verbose", "verbose"],
166 "Verbose output"),
167 _Switch(["--version", "version"],
168 "Print version information and exit"),
169 _Switch(["--long-version", "long_version"],
170 "Print long version information and exit"),
171 _Switch(["--force", "force"],
172 "Force file overwriting."),
173
174 ]
175 AbstractCommandline.__init__(self, cmd, **kwargs)
176
177
179 """Run the module's doctests (PRIVATE)."""
180 print "Running ClustalOmega doctests..."
181 import doctest
182 doctest.testmod()
183 print "Done"
184
185 if __name__ == "__main__":
186 _test()
187