Package Bio :: Package Motif :: Package Applications :: Module _XXmotif
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Applications._XXmotif

  1  # -*- coding: utf-8 -*- 
  2  # Copyright 2012 by Christian Brueffer.  All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """Command line wrapper for the motif finding program XXmotif.""" 
  8   
  9  import os 
 10  from Bio.Application import AbstractCommandline, _Option, _Switch, _Argument 
 11   
 12   
13 -class XXmotifCommandline(AbstractCommandline):
14 """Command line wrapper for XXmotif. 15 16 http://xxmotif.genzentrum.lmu.de/ 17 18 Example: 19 20 >>> from Bio.Motif.Applications import XXmotifCommandline 21 >>> out_dir = "results" 22 >>> in_file = "sequences.fasta" 23 >>> xxmotif_cline = XXmotifCommandline(outdir=out_dir, seqfile=in_file, revcomp=True) 24 >>> print xxmotif_cline 25 XXmotif results sequences.fasta --revcomp 26 27 You would typically run the command line with xxmotif_cline() or via 28 the Python subprocess module, as described in the Biopython tutorial. 29 30 Citations: 31 32 Luehr S, Hartmann H, and Söding J. The XXmotif web server for eXhaustive, 33 weight matriX-based motif discovery in nucleotide sequences, 34 Nucleic Acids Res. 40: W104-W109 (2012). 35 36 Hartmann H, Guthoehrlein EW, Siebert M., Luehr S, and Söding J. P-value 37 based regulatory motif discovery using positional weight matrices 38 (to be published) 39 40 Last checked against version: 1.3 41 """ 42
43 - def __init__(self, cmd="XXmotif", **kwargs):
44 # order of parameters is the same as in XXmotif --help 45 _valid_alphabet = set("ACGTNX") 46 47 self.parameters = \ 48 [ 49 _Argument(["outdir", "OUTDIR"], 50 "output directory for all results", 51 filename = True, 52 is_required = True, 53 # XXmotif currently does not accept spaces in the outdir name 54 checker_function = lambda x: " " not in x), 55 _Argument(["seqfile", "SEQFILE"], 56 "file name with sequences from positive set in FASTA format", 57 filename = True, 58 is_required = True, 59 # XXmotif currently only accepts a pure filename 60 checker_function = lambda x: os.path.split(x)[0] == ""), 61 62 # Options 63 _Option(["--negSet", "negSet", "negset", "NEGSET"], 64 "sequence set which has to be used as a reference set", 65 filename = True, 66 equate = False), 67 _Switch(["--zoops", "zoops", "ZOOPS"], 68 "use zero-or-one occurrence per sequence model (DEFAULT)"), 69 _Switch(["--mops", "mops", "MOPS"], 70 "use multiple occurrence per sequence model"), 71 _Switch(["--oops", "oops", "OOPS"], 72 "use one occurrence per sequence model"), 73 _Switch(["--revcomp", "revcomp", "REVCOMP"], 74 "search in reverse complement of sequences as well (DEFAULT: NO)"), 75 _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER"], 76 "order of background distribution (DEFAULT: 2, 8(--negset) )", 77 checker_function = lambda x: isinstance(x, int), 78 equate = False), 79 _Option(["--pseudo", "pseudo", "PSEUDO"], 80 "percentage of pseudocounts used (DEFAULT: 10)", 81 checker_function = lambda x: isinstance(x, int), 82 equate = False), 83 _Option(["-g", "--gaps", "gaps", "GAPS"], 84 "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)", 85 checker_function = lambda x: x in [0-3], 86 equate = False), 87 _Option(["--type", "type", "TYPE"], 88 "defines what kind of start seeds are used (DEFAULT: ALL)" 89 "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM", 90 checker_function = lambda x: x in ["ALL", "all", 91 "FIVEMERS", "fivemers", 92 "PALINDROME", "palindrome", 93 "TANDEM", "tandem", 94 "NOPALINDROME", "nopalindrome", 95 "NOTANDEM", "notandem"], 96 equate = False), 97 _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD"], 98 "defines the similarity threshold for merging motifs (DEFAULT: HIGH)" 99 "possible modes: LOW, MEDIUM, HIGH", 100 checker_function = lambda x: x in ["LOW", "low", 101 "MEDIUM", "medium", 102 "HIGH", "high"], 103 equate = False), 104 _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION"], 105 "do not optimize length during iterations (runtime advantages)"), 106 _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS"], 107 "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)", 108 checker_function = lambda x: isinstance(x, int), 109 equate = False), 110 _Switch(["--batch", "batch", "BATCH"], 111 "suppress progress bars (reduce output size for batch jobs)"), 112 _Option(["--maxPosSetSize", "maxPosSetSize", "maxpossetsize", "MAXPOSSETSIZE"], 113 "maximum number of sequences from the positive set used [DEFAULT: all]", 114 checker_function = lambda x: isinstance(x, int), 115 equate = False), 116 # does not make sense in biopython 117 #_Switch(["--help", "help", "HELP"], 118 # "print this help page"), 119 _Option(["--trackedMotif", "trackedMotif", "trackedmotif", "TRACKEDMOTIF"], 120 "inspect extensions and refinement of a given seed (DEFAULT: not used)", 121 checker_function = lambda x: any((c in _valid_alphabet) for c in x), 122 equate = False), 123 124 # Using conservation information 125 _Option(["--format", "format", "FORMAT"], 126 "defines what kind of format the input sequences have (DEFAULT: FASTA)", 127 checker_function = lambda x: x in ["FASTA", "fasta", 128 "MFASTA", "mfasta"], 129 equate = False), 130 _Option(["--maxMultipleSequences", "maxMultipleSequences", "maxmultiplesequences", "MAXMULTIPLESEQUENCES"], 131 "maximum number of sequences used in an alignment [DEFAULT: all]", 132 checker_function = lambda x: isinstance(x, int), 133 equate = False), 134 135 # Using localization information 136 _Switch(["--localization", "localization", "LOCALIZATION"], 137 "use localization information to calculate combined P-values" 138 "(sequences should have all the same length)"), 139 _Option(["--downstream", "downstream", "DOWNSTREAM"], 140 "number of residues in positive set downstream of anchor point (DEFAULT: 0)", 141 checker_function = lambda x: isinstance(x, int), 142 equate = False), 143 144 # Start with self defined motif 145 _Option(["-m", "--startMotif", "startMotif", "startmotif", "STARTMOTIF"], 146 "Start motif (IUPAC characters)", 147 checker_function = lambda x: any((c in _valid_alphabet) for c in x), 148 equate = False), 149 _Option(["-p", "--profileFile", "profileFile", "profilefile", "PROFILEFILE"], 150 "profile file", 151 filename = True, 152 equate = False), 153 _Option(["--startRegion", "startRegion", "startregion", "STARTREGION"], 154 "expected start position for motif occurrences relative to anchor point (--localization)", 155 checker_function = lambda x: isinstance(x, int), 156 equate = False), 157 _Option(["--endRegion", "endRegion", "endregion", "ENDREGION"], 158 "expected end position for motif occurrences relative to anchor point (--localization)", 159 checker_function = lambda x: isinstance(x, int), 160 equate = False), 161 ] 162 AbstractCommandline.__init__(self, cmd, **kwargs)
163 164
165 -def _test():
166 """Run the module's doctests (PRIVATE).""" 167 print "Running XXmotif doctests..." 168 import doctest 169 doctest.testmod() 170 print "Done"
171 172 173 if __name__ == "__main__": 174 _test() 175