1
2
3
4
5 """Command line wrapper for the multiple alignment program DIALIGN2-2.
6 """
7
8 __docformat__ = "epytext en"
9
10 from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
11
12
14 """Command line wrapper for the multiple alignment program DIALIGN2-2.
15
16 http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html
17
18 Example:
19
20 To align a FASTA file (unaligned.fasta) with the output files names
21 aligned.* including a FASTA output file (aligned.fa), use:
22
23 >>> from Bio.Align.Applications import DialignCommandline
24 >>> dialign_cline = DialignCommandline(input="unaligned.fasta",
25 ... fn="aligned", fa=True)
26 >>> print dialign_cline
27 dialign2-2 -fa -fn aligned unaligned.fasta
28
29 You would typically run the command line with dialign_cline() or via
30 the Python subprocess module, as described in the Biopython tutorial.
31
32 Citation:
33
34 B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
35 Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.
36
37 Last checked against version: 2.2
38 """
39 - def __init__(self, cmd="dialign2-2", **kwargs):
40 self.program_name = cmd
41 self.parameters = \
42 [
43 _Switch(["-afc", "afc"],
44 "Creates additional output file '*.afc' "
45 "containing data of all fragments considered "
46 "for alignment WARNING: this file can be HUGE !"),
47 _Switch(["-afc_v", "afc_v"],
48 "Like '-afc' but verbose: fragments are explicitly "
49 "printed. WARNING: this file can be EVEN BIGGER !"),
50 _Switch(["-anc", "anc"],
51 "Anchored alignment. Requires a file <seq_file>.anc "
52 "containing anchor points."),
53 _Switch(["-cs", "cs"],
54 "If segments are translated, not only the `Watson "
55 "strand' but also the `Crick strand' is looked at."),
56 _Switch(["-cw", "cw"],
57 "Additional output file in CLUSTAL W format."),
58 _Switch(["-ds", "ds"],
59 "`dna alignment speed up' - non-translated nucleic acid "
60 "fragments are taken into account only if they start "
61 "with at least two matches. Speeds up DNA alignment at "
62 "the expense of sensitivity."),
63 _Switch(["-fa", "fa"],
64 "Additional output file in FASTA format."),
65 _Switch(["-ff", "ff"],
66 "Creates file *.frg containing information about all "
67 "fragments that are part of the respective optimal "
68 "pairwise alignmnets plus information about "
69 "consistency in the multiple alignment"),
70 _Option(["-fn", "fn"],
71 "Output files are named <out_file>.<extension>.",
72 equate=False),
73 _Switch(["-fop", "fop"],
74 "Creates file *.fop containing coordinates of all "
75 "fragments that are part of the respective pairwise alignments."),
76 _Switch(["-fsm", "fsm"],
77 "Creates file *.fsm containing coordinates of all "
78 "fragments that are part of the final alignment"),
79 _Switch(["-iw", "iw"],
80 "Overlap weights switched off (by default, overlap "
81 "weights are used if up to 35 sequences are aligned). "
82 "This option speeds up the alignment but may lead "
83 "to reduced alignment quality."),
84 _Switch(["-lgs", "lgs"],
85 "`long genomic sequences' - combines the following "
86 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
87 "-fop, -ff, -cs, -ds, -pst "),
88 _Switch(["-lgs_t", "lgs_t"],
89 "Like '-lgs' but with all segment pairs assessed "
90 "at the peptide level (rather than 'mixed alignments' "
91 "as with the '-lgs' option). Therefore faster than "
92 "-lgs but not very sensitive for non-coding regions."),
93 _Option(["-lmax", "lmax"],
94 "Maximum fragment length = x (default: x = 40 or "
95 "x = 120 for `translated' fragments). Shorter x "
96 "speeds up the program but may affect alignment quality.",
97 checker_function=lambda x: isinstance(x, int),
98 equate=False),
99 _Switch(["-lo", "lo"],
100 "(Long Output) Additional file *.log with information "
101 "about fragments selected for pairwise alignment and "
102 "about consistency in multi-alignment proceedure."),
103 _Switch(["-ma", "ma"],
104 "`mixed alignments' consisting of P-fragments and "
105 "N-fragments if nucleic acid sequences are aligned."),
106 _Switch(["-mask", "mask"],
107 "Residues not belonging to selected fragments are "
108 "replaced by `*' characters in output alignment "
109 "(rather than being printed in lower-case characters)"),
110 _Switch(["-mat", "mat"],
111 "Creates file *mat with substitution counts derived "
112 "from the fragments that have been selected for alignment."),
113 _Switch(["-mat_thr", "mat_thr"],
114 "Like '-mat' but only fragments with weight score "
115 "> t are considered"),
116 _Switch(["-max_link", "max_link"],
117 "'maximum linkage' clustering used to construct "
118 "sequence tree (instead of UPGMA)."),
119 _Switch(["-min_link", "min_link"],
120 "'minimum linkage' clustering used."),
121 _Option(["-mot", "mot"],
122 "'motif' option.",
123 equate=False),
124 _Switch(["-msf", "msf"],
125 "Separate output file in MSF format."),
126 _Switch(["-n", "n"],
127 "Input sequences are nucleic acid sequences. "
128 "No translation of fragments."),
129 _Switch(["-nt", "nt"],
130 "Input sequences are nucleic acid sequences and "
131 "`nucleic acid segments' are translated to `peptide "
132 "segments'."),
133 _Switch(["-nta", "nta"],
134 "`no textual alignment' - textual alignment suppressed. "
135 "This option makes sense if other output files are of "
136 "intrest -- e.g. the fragment files created with -ff, "
137 "-fop, -fsm or -lo."),
138 _Switch(["-o", "o"],
139 "Fast version, resulting alignments may be slightly "
140 "different."),
141 _Switch(["-ow", "ow"],
142 "Overlap weights enforced (By default, overlap weights "
143 "are used only if up to 35 sequences are aligned since "
144 "calculating overlap weights is time consuming)."),
145 _Switch(["-pst", "pst"],
146 "'print status'. Creates and updates a file *.sta with "
147 "information about the current status of the program "
148 "run. This option is recommended if large data sets "
149 "are aligned since it allows the user to estimate the "
150 "remaining running time."),
151 _Switch(["-smin", "smin"],
152 "Minimum similarity value for first residue pair "
153 "(or codon pair) in fragments. Speeds up protein "
154 "alignment or alignment of translated DNA fragments "
155 "at the expense of sensitivity."),
156 _Option(["-stars", "stars"],
157 "Maximum number of `*' characters indicating degree "
158 "of local similarity among sequences. By default, no "
159 "stars are used but numbers between 0 and 9, instead.",
160 checker_function = lambda x: x in range(0,10),
161 equate=False),
162 _Switch(["-stdo", "stdo"],
163 "Results written to standard output."),
164 _Switch(["-ta", "ta"],
165 "Standard textual alignment printed (overrides "
166 "suppression of textual alignments in special "
167 "options, e.g. -lgs)"),
168 _Option(["-thr", "thr"],
169 "Threshold T = x.",
170 checker_function = lambda x: isinstance(x, int),
171 equate=False),
172 _Switch(["-xfr", "xfr"],
173 "'exclude fragments' - list of fragments can be "
174 "specified that are NOT considered for pairwise alignment"),
175 _Argument(["input"],
176 "Input file name. Must be FASTA format",
177 filename=True,
178 is_required=True),
179 ]
180 AbstractCommandline.__init__(self, cmd, **kwargs)
181
182
184 """Run the module's doctests (PRIVATE)."""
185 print "Running modules doctests..."
186 import doctest
187 doctest.testmod()
188 print "Done"
189
190 if __name__ == "__main__":
191 _test()
192