1
2
3
4
5
6
7 """Command line wrapper for the motif finding program XXmotif."""
8
9 import os
10 from Bio.Application import AbstractCommandline, _Option, _Switch, _Argument
11
12
14 """Command line wrapper for XXmotif.
15
16 http://xxmotif.genzentrum.lmu.de/
17
18 Example:
19
20 >>> from Bio.Motif.Applications import XXmotifCommandline
21 >>> out_dir = "results"
22 >>> in_file = "sequences.fasta"
23 >>> xxmotif_cline = XXmotifCommandline(outdir=out_dir, seqfile=in_file, revcomp=True)
24 >>> print xxmotif_cline
25 XXmotif results sequences.fasta --revcomp
26
27 You would typically run the command line with xxmotif_cline() or via
28 the Python subprocess module, as described in the Biopython tutorial.
29
30 Citations:
31
32 Luehr S, Hartmann H, and Söding J. The XXmotif web server for eXhaustive,
33 weight matriX-based motif discovery in nucleotide sequences,
34 Nucleic Acids Res. 40: W104-W109 (2012).
35
36 Hartmann H, Guthoehrlein EW, Siebert M., Luehr S, and Söding J. P-value
37 based regulatory motif discovery using positional weight matrices
38 (to be published)
39
40 Last checked against version: 1.3
41 """
42
43 - def __init__(self, cmd="XXmotif", **kwargs):
44
45 _valid_alphabet = set("ACGTNX")
46
47 self.parameters = \
48 [
49 _Argument(["outdir", "OUTDIR"],
50 "output directory for all results",
51 filename = True,
52 is_required = True,
53
54 checker_function = lambda x: " " not in x),
55 _Argument(["seqfile", "SEQFILE"],
56 "file name with sequences from positive set in FASTA format",
57 filename = True,
58 is_required = True,
59
60 checker_function = lambda x: os.path.split(x)[0] == ""),
61
62
63 _Option(["--negSet", "negSet", "negset", "NEGSET"],
64 "sequence set which has to be used as a reference set",
65 filename = True,
66 equate = False),
67 _Switch(["--zoops", "zoops", "ZOOPS"],
68 "use zero-or-one occurrence per sequence model (DEFAULT)"),
69 _Switch(["--mops", "mops", "MOPS"],
70 "use multiple occurrence per sequence model"),
71 _Switch(["--oops", "oops", "OOPS"],
72 "use one occurrence per sequence model"),
73 _Switch(["--revcomp", "revcomp", "REVCOMP"],
74 "search in reverse complement of sequences as well (DEFAULT: NO)"),
75 _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER"],
76 "order of background distribution (DEFAULT: 2, 8(--negset) )",
77 checker_function = lambda x: isinstance(x, int),
78 equate = False),
79 _Option(["--pseudo", "pseudo", "PSEUDO"],
80 "percentage of pseudocounts used (DEFAULT: 10)",
81 checker_function = lambda x: isinstance(x, int),
82 equate = False),
83 _Option(["-g", "--gaps", "gaps", "GAPS"],
84 "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)",
85 checker_function = lambda x: x in [0-3],
86 equate = False),
87 _Option(["--type", "type", "TYPE"],
88 "defines what kind of start seeds are used (DEFAULT: ALL)"
89 "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM",
90 checker_function = lambda x: x in ["ALL", "all",
91 "FIVEMERS", "fivemers",
92 "PALINDROME", "palindrome",
93 "TANDEM", "tandem",
94 "NOPALINDROME", "nopalindrome",
95 "NOTANDEM", "notandem"],
96 equate = False),
97 _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD"],
98 "defines the similarity threshold for merging motifs (DEFAULT: HIGH)"
99 "possible modes: LOW, MEDIUM, HIGH",
100 checker_function = lambda x: x in ["LOW", "low",
101 "MEDIUM", "medium",
102 "HIGH", "high"],
103 equate = False),
104 _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION"],
105 "do not optimize length during iterations (runtime advantages)"),
106 _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS"],
107 "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)",
108 checker_function = lambda x: isinstance(x, int),
109 equate = False),
110 _Switch(["--batch", "batch", "BATCH"],
111 "suppress progress bars (reduce output size for batch jobs)"),
112 _Option(["--maxPosSetSize", "maxPosSetSize", "maxpossetsize", "MAXPOSSETSIZE"],
113 "maximum number of sequences from the positive set used [DEFAULT: all]",
114 checker_function = lambda x: isinstance(x, int),
115 equate = False),
116
117
118
119 _Option(["--trackedMotif", "trackedMotif", "trackedmotif", "TRACKEDMOTIF"],
120 "inspect extensions and refinement of a given seed (DEFAULT: not used)",
121 checker_function = lambda x: any((c in _valid_alphabet) for c in x),
122 equate = False),
123
124
125 _Option(["--format", "format", "FORMAT"],
126 "defines what kind of format the input sequences have (DEFAULT: FASTA)",
127 checker_function = lambda x: x in ["FASTA", "fasta",
128 "MFASTA", "mfasta"],
129 equate = False),
130 _Option(["--maxMultipleSequences", "maxMultipleSequences", "maxmultiplesequences", "MAXMULTIPLESEQUENCES"],
131 "maximum number of sequences used in an alignment [DEFAULT: all]",
132 checker_function = lambda x: isinstance(x, int),
133 equate = False),
134
135
136 _Switch(["--localization", "localization", "LOCALIZATION"],
137 "use localization information to calculate combined P-values"
138 "(sequences should have all the same length)"),
139 _Option(["--downstream", "downstream", "DOWNSTREAM"],
140 "number of residues in positive set downstream of anchor point (DEFAULT: 0)",
141 checker_function = lambda x: isinstance(x, int),
142 equate = False),
143
144
145 _Option(["-m", "--startMotif", "startMotif", "startmotif", "STARTMOTIF"],
146 "Start motif (IUPAC characters)",
147 checker_function = lambda x: any((c in _valid_alphabet) for c in x),
148 equate = False),
149 _Option(["-p", "--profileFile", "profileFile", "profilefile", "PROFILEFILE"],
150 "profile file",
151 filename = True,
152 equate = False),
153 _Option(["--startRegion", "startRegion", "startregion", "STARTREGION"],
154 "expected start position for motif occurrences relative to anchor point (--localization)",
155 checker_function = lambda x: isinstance(x, int),
156 equate = False),
157 _Option(["--endRegion", "endRegion", "endregion", "ENDREGION"],
158 "expected end position for motif occurrences relative to anchor point (--localization)",
159 checker_function = lambda x: isinstance(x, int),
160 equate = False),
161 ]
162 AbstractCommandline.__init__(self, cmd, **kwargs)
163
164
166 """Run the module's doctests (PRIVATE)."""
167 print "Running XXmotif doctests..."
168 import doctest
169 doctest.testmod()
170 print "Done"
171
172
173 if __name__ == "__main__":
174 _test()
175