Package Bio :: Package Align :: Package Applications :: Module _ClustalOmega
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._ClustalOmega

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2011 by Andreas Wilm. All rights reserved. 
  3  # Based on ClustalW wrapper copyright 2009 by Cymon J. Cox. 
  4  # 
  5  # Wrapper for Clustal Omega by Andreas Wilm (2011). Used _Clustalw.py 
  6  # as template. 
  7  # 
  8  # This code is part of the Biopython distribution and governed by its 
  9  # license.  Please see the LICENSE file that should have been included 
 10  # as part of this package. 
 11  """Command line wrapper for the multiple alignment program Clustal Omega. 
 12  """ 
 13   
 14  from __future__ import print_function 
 15   
 16  __docformat__ = "epytext en"  # Don't just use plain text in epydoc API pages! 
 17   
 18  from Bio.Application import _Option, _Switch, AbstractCommandline 
 19   
 20   
21 -class ClustalOmegaCommandline(AbstractCommandline):
22 """Command line wrapper for clustal omega 23 24 http://www.clustal.org/omega 25 26 Example: 27 28 >>> from Bio.Align.Applications import ClustalOmegaCommandline 29 >>> in_file = "unaligned.fasta" 30 >>> out_file = "aligned.fasta" 31 >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True) 32 >>> print(clustalomega_cline) 33 clustalo -i unaligned.fasta -o aligned.fasta --auto -v 34 35 36 You would typically run the command line with clustalomega_cline() or via 37 the Python subprocess module, as described in the Biopython tutorial. 38 39 Citation: 40 41 Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R, 42 McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011). 43 Fast, scalable generation of high-quality protein multiple 44 sequence alignments using Clustal Omega. 45 Molecular Systems Biology 7:539 doi:10.1038/msb.2011.75 46 47 Last checked against versions: 1.2.0 48 """
49 - def __init__(self, cmd="clustalo", **kwargs):
50 # order parameters in the same order as clustalo --help 51 self.parameters = \ 52 [ 53 # Sequence Input 54 _Option(["-i", "--in", "--infile", "infile"], 55 "Multiple sequence input file", 56 filename=True, 57 equate=False), 58 _Option(["--hmm-in", "HMM input", "hmm_input"], 59 "HMM input files", 60 filename=True, 61 equate=False), 62 _Switch(["--dealign", "dealign"], 63 "Dealign input sequences"), 64 _Option(["--profile1", "--p1", "profile1"], 65 "Pre-aligned multiple sequence file (aligned columns will be kept fix).", 66 filename=True, 67 equate=False), 68 _Option(["--profile2", "--p2", "profile2"], 69 "Pre-aligned multiple sequence file (aligned columns will be kept fix).", 70 filename=True, 71 equate=False), 72 _Option(["-t", "--seqtype", "seqtype"], 73 "{Protein, RNA, DNA} Force a sequence type (default: auto).", 74 equate=False, 75 checker_function=lambda x: x in ["protein", "rna", "dna", 76 "Protein", "RNA", "DNA", 77 "PROTEIN"]), 78 _Switch(["--is-profile", "isprofile"], 79 "disable check if profile, force profile (default no)"), 80 _Option(["--infmt", "infmt"], 81 """Forced sequence input file format (default: auto) 82 83 Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna] 84 """, 85 equate=False, 86 checker_function=lambda x: x in ["a2m", "fa", "fasta", 87 "clu", "clustal", 88 "msf", 89 "phy", "phylip", 90 "selex", 91 "st", "stockholm", 92 "vie", "vienna"]), 93 94 # Clustering 95 _Option(["--distmat-in", "distmat_in"], 96 "Pairwise distance matrix input file (skips distance computation).", 97 filename=True, 98 equate=False), 99 _Option(["--distmat-out", "distmat_out"], 100 "Pairwise distance matrix output file.", 101 filename=True, 102 equate=False), 103 _Option(["--guidetree-in", "guidetree_in"], 104 "Guide tree input file (skips distance computation and guide-tree clustering step).", 105 filename=True, 106 equate=False), 107 _Option(["--guidetree-out", "guidetree_out"], 108 "Guide tree output file.", 109 filename=True, 110 equate=False), 111 _Switch(["--full", "distmat_full"], 112 "Use full distance matrix for guide-tree calculation (slow; mBed is default)"), 113 _Switch(["--full-iter", "distmat_full_iter"], 114 "Use full distance matrix for guide-tree calculation during iteration (mBed is default)"), 115 _Option(["--cluster-size", "clustersize"], 116 "soft maximum of sequences in sub-clusters", 117 checker_function=lambda x: isinstance(x, int)), 118 _Option(["--clustering-out", "clusteringout"], 119 "Clustering output file", 120 filename=True), 121 _Switch(["--use-kimura", "usekimura"], 122 "use Kimura distance correction for aligned sequences (default no)"), 123 _Switch(["--percent-id", "percentid"], 124 "convert distances into percent identities (default no)"), 125 126 # Alignment Output 127 _Option(["-o", "--out", "--outfile", "outfile"], 128 "Multiple sequence alignment output file (default: stdout).", 129 filename=True, 130 equate=False), 131 _Option(["--outfmt", "outfmt"], 132 "MSA output file format:" 133 " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]" 134 " (default: fasta).", 135 equate=False, 136 checker_function=lambda x: x in ["a2m", "fa", "fasta", 137 "clu", "clustal", 138 "msf", 139 "phy", "phylip", 140 "selex", 141 "st", "stockholm", 142 "vie", "vienna"]), 143 _Switch(["--residuenumber", "--resno", "residuenumber"], 144 "in Clustal format print residue numbers (default no)"), 145 _Option(["--wrap", "wrap"], 146 "number of residues before line-wrap in output", 147 checker_function=lambda x: isinstance(x, int)), 148 _Option(["--output-order", "outputorder"], 149 "MSA output order like in input/guide-tree", 150 checker_function=lambda x: x in ["input-order", "tree-order"]), 151 152 # Iteration 153 _Option(["--iterations", "--iter", "iterations"], 154 "Number of (combined guide-tree/HMM) iterations", 155 equate=False, 156 checker_function=lambda x: isinstance(x, int)), 157 _Option(["--max-guidetree-iterations", "max_guidetree_iterations"], 158 "Maximum number of guidetree iterations", 159 equate=False, 160 checker_function=lambda x: isinstance(x, int)), 161 _Option(["--max-hmm-iterations", "max_hmm_iterations"], 162 "Maximum number of HMM iterations", 163 equate=False, 164 checker_function=lambda x: isinstance(x, int)), 165 166 # Limits (will exit early, if exceeded): 167 _Option(["--maxnumseq", "maxnumseq"], 168 "Maximum allowed number of sequences", 169 equate=False, 170 checker_function=lambda x: isinstance(x, int)), 171 _Option(["--maxseqlen", "maxseqlen"], 172 "Maximum allowed sequence length", 173 equate=False, 174 checker_function=lambda x: isinstance(x, int)), 175 176 # Miscellaneous: 177 _Switch(["--auto", "auto"], 178 "Set options automatically (might overwrite some of your options)"), 179 _Option(["--threads", "threads"], 180 "Number of processors to use", 181 equate=False, 182 checker_function=lambda x: isinstance(x, int)), 183 _Option(["-l", "--log", "log"], 184 "Log all non-essential output to this file.", 185 filename=True, 186 equate=False), 187 _Switch(["-h", "--help", "help"], 188 "Print help and exit."), 189 _Switch(["-v", "--verbose", "verbose"], 190 "Verbose output"), 191 _Switch(["--version", "version"], 192 "Print version information and exit"), 193 _Switch(["--long-version", "long_version"], 194 "Print long version information and exit"), 195 _Switch(["--force", "force"], 196 "Force file overwriting."), 197 198 ] 199 AbstractCommandline.__init__(self, cmd, **kwargs)
200 201
202 -def _test():
203 """Run the module's doctests (PRIVATE).""" 204 print("Running ClustalOmega doctests...") 205 import doctest 206 doctest.testmod() 207 print("Done")
208 209 if __name__ == "__main__": 210 _test() 211