Package Bio :: Package Align :: Package Applications :: Module _Dialign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Dialign

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program DIALIGN2-2. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  __docformat__ = "restructuredtext en"  # Don't just use plain text in epydoc API pages! 
 11   
 12  from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline 
 13   
 14   
15 -class DialignCommandline(AbstractCommandline):
16 """Command line wrapper for the multiple alignment program DIALIGN2-2. 17 18 http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html 19 20 Example: 21 -------- 22 23 To align a FASTA file (unaligned.fasta) with the output files names 24 aligned.* including a FASTA output file (aligned.fa), use: 25 26 >>> from Bio.Align.Applications import DialignCommandline 27 >>> dialign_cline = DialignCommandline(input="unaligned.fasta", 28 ... fn="aligned", fa=True) 29 >>> print(dialign_cline) 30 dialign2-2 -fa -fn aligned unaligned.fasta 31 32 You would typically run the command line with dialign_cline() or via 33 the Python subprocess module, as described in the Biopython tutorial. 34 35 Citation: 36 --------- 37 38 B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence 39 Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36. 40 41 Last checked against version: 2.2 42 """
43 - def __init__(self, cmd="dialign2-2", **kwargs):
44 self.program_name = cmd 45 self.parameters = \ 46 [ 47 _Switch(["-afc", "afc"], 48 "Creates additional output file '*.afc' " 49 "containing data of all fragments considered " 50 "for alignment WARNING: this file can be HUGE !"), 51 _Switch(["-afc_v", "afc_v"], 52 "Like '-afc' but verbose: fragments are explicitly " 53 "printed. WARNING: this file can be EVEN BIGGER !"), 54 _Switch(["-anc", "anc"], 55 "Anchored alignment. Requires a file <seq_file>.anc " 56 "containing anchor points."), 57 _Switch(["-cs", "cs"], 58 "If segments are translated, not only the `Watson " 59 "strand' but also the `Crick strand' is looked at."), 60 _Switch(["-cw", "cw"], 61 "Additional output file in CLUSTAL W format."), 62 _Switch(["-ds", "ds"], 63 "`dna alignment speed up' - non-translated nucleic acid " 64 "fragments are taken into account only if they start " 65 "with at least two matches. Speeds up DNA alignment at " 66 "the expense of sensitivity."), 67 _Switch(["-fa", "fa"], 68 "Additional output file in FASTA format."), 69 _Switch(["-ff", "ff"], 70 "Creates file *.frg containing information about all " 71 "fragments that are part of the respective optimal " 72 "pairwise alignmnets plus information about " 73 "consistency in the multiple alignment"), 74 _Option(["-fn", "fn"], 75 "Output files are named <out_file>.<extension>.", 76 equate=False), 77 _Switch(["-fop", "fop"], 78 "Creates file *.fop containing coordinates of all " 79 "fragments that are part of the respective pairwise alignments."), 80 _Switch(["-fsm", "fsm"], 81 "Creates file *.fsm containing coordinates of all " 82 "fragments that are part of the final alignment"), 83 _Switch(["-iw", "iw"], 84 "Overlap weights switched off (by default, overlap " 85 "weights are used if up to 35 sequences are aligned). " 86 "This option speeds up the alignment but may lead " 87 "to reduced alignment quality."), 88 _Switch(["-lgs", "lgs"], 89 "`long genomic sequences' - combines the following " 90 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " 91 "-fop, -ff, -cs, -ds, -pst "), 92 _Switch(["-lgs_t", "lgs_t"], 93 "Like '-lgs' but with all segment pairs assessed " 94 "at the peptide level (rather than 'mixed alignments' " 95 "as with the '-lgs' option). Therefore faster than " 96 "-lgs but not very sensitive for non-coding regions."), 97 _Option(["-lmax", "lmax"], 98 "Maximum fragment length = x (default: x = 40 or " 99 "x = 120 for `translated' fragments). Shorter x " 100 "speeds up the program but may affect alignment quality.", 101 checker_function=lambda x: isinstance(x, int), 102 equate=False), 103 _Switch(["-lo", "lo"], 104 "(Long Output) Additional file *.log with information " 105 "about fragments selected for pairwise alignment and " 106 "about consistency in multi-alignment proceedure."), 107 _Switch(["-ma", "ma"], 108 "`mixed alignments' consisting of P-fragments and " 109 "N-fragments if nucleic acid sequences are aligned."), 110 _Switch(["-mask", "mask"], 111 "Residues not belonging to selected fragments are " 112 "replaced by `*' characters in output alignment " 113 "(rather than being printed in lower-case characters)"), 114 _Switch(["-mat", "mat"], 115 "Creates file *mat with substitution counts derived " 116 "from the fragments that have been selected for alignment."), 117 _Switch(["-mat_thr", "mat_thr"], 118 "Like '-mat' but only fragments with weight score " 119 "> t are considered"), 120 _Switch(["-max_link", "max_link"], 121 "'maximum linkage' clustering used to construct " 122 "sequence tree (instead of UPGMA)."), 123 _Switch(["-min_link", "min_link"], 124 "'minimum linkage' clustering used."), 125 _Option(["-mot", "mot"], 126 "'motif' option.", 127 equate=False), 128 _Switch(["-msf", "msf"], 129 "Separate output file in MSF format."), 130 _Switch(["-n", "n"], 131 "Input sequences are nucleic acid sequences. " 132 "No translation of fragments."), 133 _Switch(["-nt", "nt"], 134 "Input sequences are nucleic acid sequences and " 135 "`nucleic acid segments' are translated to `peptide " 136 "segments'."), 137 _Switch(["-nta", "nta"], 138 "`no textual alignment' - textual alignment suppressed. " 139 "This option makes sense if other output files are of " 140 "intrest -- e.g. the fragment files created with -ff, " 141 "-fop, -fsm or -lo."), 142 _Switch(["-o", "o"], 143 "Fast version, resulting alignments may be slightly " 144 "different."), 145 _Switch(["-ow", "ow"], 146 "Overlap weights enforced (By default, overlap weights " 147 "are used only if up to 35 sequences are aligned since " 148 "calculating overlap weights is time consuming)."), 149 _Switch(["-pst", "pst"], 150 "'print status'. Creates and updates a file *.sta with " 151 "information about the current status of the program " 152 "run. This option is recommended if large data sets " 153 "are aligned since it allows the user to estimate the " 154 "remaining running time."), 155 _Switch(["-smin", "smin"], 156 "Minimum similarity value for first residue pair " 157 "(or codon pair) in fragments. Speeds up protein " 158 "alignment or alignment of translated DNA fragments " 159 "at the expense of sensitivity."), 160 _Option(["-stars", "stars"], 161 "Maximum number of `*' characters indicating degree " 162 "of local similarity among sequences. By default, no " 163 "stars are used but numbers between 0 and 9, instead.", 164 checker_function = lambda x: x in range(0, 10), 165 equate=False), 166 _Switch(["-stdo", "stdo"], 167 "Results written to standard output."), 168 _Switch(["-ta", "ta"], 169 "Standard textual alignment printed (overrides " 170 "suppression of textual alignments in special " 171 "options, e.g. -lgs)"), 172 _Option(["-thr", "thr"], 173 "Threshold T = x.", 174 checker_function = lambda x: isinstance(x, int), 175 equate=False), 176 _Switch(["-xfr", "xfr"], 177 "'exclude fragments' - list of fragments can be " 178 "specified that are NOT considered for pairwise alignment"), 179 _Argument(["input"], 180 "Input file name. Must be FASTA format", 181 filename=True, 182 is_required=True), 183 ] 184 AbstractCommandline.__init__(self, cmd, **kwargs)
185 186
187 -def _test():
188 """Run the module's doctests (PRIVATE).""" 189 print("Running modules doctests...") 190 import doctest 191 doctest.testmod() 192 print("Done")
193 194 if __name__ == "__main__": 195 _test() 196