Package Bio :: Package Align :: Package Applications :: Module _ClustalOmega
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._ClustalOmega

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2011 by Andreas Wilm. All rights reserved. 
  3  # Based on ClustalW wrapper copyright 2009 by Cymon J. Cox. 
  4  # 
  5  # Wrapper for Clustal Omega by Andreas Wilm (2011). Used _Clustalw.py 
  6  # as template. 
  7  # 
  8  # This code is part of the Biopython distribution and governed by its 
  9  # license.  Please see the LICENSE file that should have been included 
 10  # as part of this package. 
 11  """Command line wrapper for the multiple alignment program Clustal Omega. 
 12  """ 
 13   
 14  from Bio.Application import _Option, _Switch, AbstractCommandline 
 15   
 16   
17 -class ClustalOmegaCommandline(AbstractCommandline):
18 """Command line wrapper for clustal omega 19 20 http://www.clustal.org/omega 21 22 Example: 23 24 >>> from Bio.Align.Applications import ClustalOmegaCommandline 25 >>> in_file = "unaligned.fasta" 26 >>> out_file = "aligned.fasta" 27 >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True) 28 >>> print clustalomega_cline 29 clustalo -i unaligned.fasta -o aligned.fasta --auto -v 30 31 32 You would typically run the command line with clustalomega_cline() or via 33 the Python subprocess module, as described in the Biopython tutorial. 34 35 Citation: 36 37 Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R, 38 McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011). 39 Fast, scalable generation of high-quality protein multiple 40 sequence alignments using Clustal Omega. 41 Molecular Systems Biology 7:539 doi:10.1038/msb.2011.75 42 43 Last checked against versions: 1.1.0 44 """
45 - def __init__(self, cmd="clustalo", **kwargs):
46 # order parameters in the same order as clustalo --help 47 self.parameters = \ 48 [ 49 # Sequence Input 50 _Option(["-i", "--in", "--infile", "infile"], 51 "Multiple sequence input file", 52 filename=True, 53 equate=False), 54 _Option(["--hmm-in", "HMM input", "hmm_input"], 55 "HMM input files", 56 filename=True, 57 equate=False), 58 _Switch(["--dealign", "dealign"], 59 "Dealign input sequences"), 60 _Option(["--profile1", "--p1", "profile1"], 61 "Pre-aligned multiple sequence file (aligned columns will be kept fix).", 62 filename=True, 63 equate=False), 64 _Option(["--profile2", "--p2", "profile2"], 65 "Pre-aligned multiple sequence file (aligned columns will be kept fix).", 66 filename=True, 67 equate=False), 68 _Option(["-t", "--seqtype", "seqtype"], 69 "{Protein, RNA, DNA} Force a sequence type (default: auto).", 70 equate=False, 71 checker_function=lambda x: x in ["protein", "rna", "dna", 72 "Protein", "RNA", "DNA", 73 "PROTEIN"]), 74 _Option(["--infmt", "infmt"], 75 """Forced sequence input file format (default: auto) 76 77 Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna] 78 """, 79 equate=False, 80 checker_function=lambda x: x in ["a2m", "fa", "fasta", 81 "clu", "clustal", 82 "msf", 83 "phy", "phylip", 84 "selex", 85 "st", "stockholm", 86 "vie", "vienna"]), 87 88 # Clustering 89 _Option(["--distmat-in", "distmat_in"], 90 "Pairwise distance matrix input file (skips distance computation).", 91 filename=True, 92 equate=False), 93 _Option(["--distmat-out", "distmat_out"], 94 "Pairwise distance matrix output file.", 95 filename=True, 96 equate=False), 97 _Option(["--guidetree-in", "guidetree_in"], 98 "Guide tree input file (skips distance computation and guide-tree clustering step).", 99 filename=True, 100 equate=False), 101 _Option(["--guidetree-out", "guidetree_out"], 102 "Guide tree output file.", 103 filename=True, 104 equate=False), 105 _Switch(["--full", "distmat_full"], 106 "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)"), 107 _Switch(["--full-iter", "distmat_full_iter"], 108 "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)"), 109 110 # Alignment Output 111 _Option(["-o", "--out", "--outfile", "outfile"], 112 "Multiple sequence alignment output file (default: stdout).", 113 filename=True, 114 equate=False), 115 _Option(["--outfmt", "outfmt"], 116 "MSA output file format:" 117 " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]" 118 " (default: fasta).", 119 equate=False, 120 checker_function=lambda x: x in ["a2m", "fa", "fasta", 121 "clu", "clustal", 122 "msf", 123 "phy", "phylip", 124 "selex", 125 "st", "stockholm", 126 "vie", "vienna"]), 127 # Iteration 128 _Option(["--iterations", "--iter", "iterations"], 129 "Number of (combined guide-tree/HMM) iterations", 130 equate=False, 131 checker_function=lambda x: isinstance(x, int)), 132 _Option(["--max-guidetree-iterations", "max_guidetree_iterations"], 133 "Maximum number of guidetree iterations", 134 equate=False, 135 checker_function=lambda x: isinstance(x, int)), 136 _Option(["--max-hmm-iterations", "max_hmm_iterations"], 137 "Maximum number of HMM iterations", 138 equate=False, 139 checker_function=lambda x: isinstance(x, int)), 140 141 # Limits (will exit early, if exceeded): 142 _Option(["--maxnumseq", "maxnumseq"], 143 "Maximum allowed number of sequences", 144 equate=False, 145 checker_function=lambda x: isinstance(x, int)), 146 _Option(["--maxseqlen", "maxseqlen"], 147 "Maximum allowed sequence length", 148 equate=False, 149 checker_function=lambda x: isinstance(x, int)), 150 151 # Miscellaneous: 152 153 _Switch(["--auto", "auto"], 154 "Set options automatically (might overwrite some of your options)"), 155 _Option(["--threads", "threads"], 156 "Number of processors to use", 157 equate=False, 158 checker_function=lambda x: isinstance(x, int)), 159 _Option(["-l", "--log", "log"], 160 "Log all non-essential output to this file.", 161 filename=True, 162 equate=False), 163 _Switch(["-h", "--help", "help"], 164 "Outline the command line params."), 165 _Switch(["-v", "--verbose", "verbose"], 166 "Verbose output"), 167 _Switch(["--version", "version"], 168 "Print version information and exit"), 169 _Switch(["--long-version", "long_version"], 170 "Print long version information and exit"), 171 _Switch(["--force", "force"], 172 "Force file overwriting."), 173 174 ] 175 AbstractCommandline.__init__(self, cmd, **kwargs)
176 177
178 -def _test():
179 """Run the module's doctests (PRIVATE).""" 180 print "Running ClustalOmega doctests..." 181 import doctest 182 doctest.testmod() 183 print "Done"
184 185 if __name__ == "__main__": 186 _test() 187