1
2
3
4
5
6
7 """Definitions for interacting with BLAST related applications.
8
9 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
10
11 - FastacmdCommandline
12 - BlastallCommandline
13 - BlastpgpCommandline
14 - RpsBlastCommandline
15
16 Wrappers for the new NCBI BLAST+ tools (written in C++):
17
18 - NcbiblastpCommandline - Protein-Protein BLAST
19 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
20 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
21 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
22 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
23 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
24 - NcbirpsblastCommandline - Reverse Position Specific BLAST
25 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
26
27 For further details, see:
28
29 Camacho et al. BLAST+: architecture and applications
30 BMC Bioinformatics 2009, 10:421
31 doi:10.1186/1471-2105-10-421
32 """
33 from Bio.Application import _Option, AbstractCommandline, _Switch
34
36 """Create a commandline for the fasta program from NCBI (OBSOLETE).
37
38 """
39 - def __init__(self, cmd="fastacmd", **kwargs):
40 self.parameters = \
41 [
42 _Option(["-d", "database"], ["input"], None, 1,
43 "The database to retrieve from."),
44 _Option(["-s", "search_string"], ["input"], None, 1,
45 "The id to search for.")
46 ]
47 AbstractCommandline.__init__(self, cmd, **kwargs)
48
49
51 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
52
53 This is provided for subclassing, it deals with shared options
54 common to all the BLAST tools (blastall, rpsblast, blastpgp).
55 """
57 assert cmd is not None
58 extra_parameters = [\
59 _Switch(["--help", "help"], ["input"],
60 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
61 _Option(["-d", "database"], ["input"], None, 1,
62 "The database to BLAST against.", False),
63 _Option(["-i", "infile"], ["input", "file"], None, 1,
64 "The sequence to search with.", False),
65 _Option(["-e", "expectation"], ["input"], None, 0,
66 "Expectation value cutoff.", False),
67 _Option(["-m", "align_view"], ["input"], None, 0,
68 "Alignment view. Integer 0-11. Use 7 for XML output.",
69 False),
70 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0,
71 "Output file for alignment.", False),
72 _Option(["-y", "xdrop_extension"], ["input"], None, 0,
73 "Dropoff for blast extensions.", False),
74 _Option(["-F", "filter"], ["input"], None, 0,
75 "Filter query sequence with SEG? T/F", False),
76 _Option(["-X", "xdrop"], ["input"], None, 0,
77 "Dropoff value (bits) for gapped alignments."),
78 _Option(["-I", "show_gi"], ["input"], None, 0,
79 "Show GI's in deflines? T/F", False),
80 _Option(["-J", "believe_query"], ["input"], None, 0,
81 "Believe the query defline? T/F", False),
82 _Option(["-Z", "xdrop_final"], ["input"], None, 0,
83 "X dropoff for final gapped alignment.", False),
84 _Option(["-z", "db_length"], ["input"], None, 0,
85 "Effective database length.", False),
86 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0,
87 "seqalign file to output.", False),
88 _Option(["-v", "descriptions"], ["input"], None, 0,
89 "Number of one-line descriptions.", False),
90 _Option(["-b", "alignments"], ["input"], None, 0,
91 "Number of alignments.", False),
92 _Option(["-Y", "search_length"], ["input"], None, 0,
93 "Effective length of search space (use zero for the " + \
94 "real size).", False),
95 _Option(["-T", "html"], ["input"], None, 0,
96 "Produce HTML output? T/F", False),
97 _Option(["-U", "case_filter"], ["input"], None, 0,
98 "Use lower case filtering of FASTA sequence? T/F", False),
99
100 _Option(["-a", "nprocessors"], ["input"], None, 0,
101 "Number of processors to use.", False),
102 _Option(["-g", "gapped"], ["input"], None, 0,
103 "Whether to do a gapped alignment. T/F", False),
104 ]
105 try:
106
107
108 self.parameters = extra_parameters + self.parameters
109 except AttributeError:
110
111 self.parameters = extra_parameters
112 AbstractCommandline.__init__(self, cmd, **kwargs)
113
119
120
122 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
123
124 This is provided for subclassing, it deals with shared options
125 common to all the blastall and blastpgp tools (but not rpsblast).
126 """
127 - def __init__(self, cmd=None, **kwargs):
128 assert cmd is not None
129 extra_parameters = [\
130 _Option(["-G", "gap_open"], ["input"], None, 0,
131 "Gap open penalty", False),
132 _Option(["-E", "gap_extend"], ["input"], None, 0,
133 "Gap extension penalty", False),
134 _Option(["-A", "window_size"], ["input"], None, 0,
135 "Multiple hits window size", False),
136 _Option(["-f", "hit_extend"], ["input"], None, 0,
137 "Threshold for extending hits.", False),
138 _Option(["-K", "keep_hits"], ["input"], None, 0,
139 " Number of best hits from a region to keep.", False),
140 _Option(["-W", "wordsize"], ["input"], None, 0,
141 "Word size", False),
142 _Option(["-P", "passes"], ["input"], None, 0,
143 "Hits/passes. Integer 0-2. 0 for multiple hit, "
144 "1 for single hit (does not apply to blastn)", False),
145 ]
146 try:
147
148
149 self.parameters = extra_parameters + self.parameters
150 except AttributeError:
151
152 self.parameters = extra_parameters
153 _BlastCommandLine.__init__(self, cmd, **kwargs)
154
155
157 """Create a commandline for the blastall program from NCBI (OBSOLETE).
158
159 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
160 are replacing blastall with separate tools blastn, blastp, blastx, tblastn
161 and tblastx.
162
163 Like blastall, this wrapper is now obsolete, and will be deprecated and
164 removed in a future release of Biopython.
165
166 >>> from Bio.Blast.Applications import BlastallCommandline
167 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
168 ... database="nr", expectation=0.001)
169 >>> cline
170 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
171 >>> print cline
172 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
173
174 You would typically run the command line with the Python subprocess module,
175 as described in the Biopython tutorial.
176 """
177
178 - def __init__(self, cmd="blastall",**kwargs):
179 self.parameters = [ \
180
181
182
183 _Option(["-p", "program"], ["input"], None, 1,
184 "The blast program to use (e.g. blastp, blastn).", False),
185 _Option(["-q", "nuc_mismatch"], ["input"], None, 0,
186 "Penalty for a nucleotide mismatch (blastn only).", False),
187 _Option(["-r", "nuc_match"], ["input"], None, 0,
188 "Reward for a nucleotide match (blastn only).", False),
189 _Option(["-Q", "query_genetic_code"], ["input"], None, 0,
190 "Query Genetic code to use.", False),
191 _Option(["-D", "db_genetic_code"], ["input"], None, 0,
192 "DB Genetic code (for tblast[nx] only).", False),
193 _Option(["-M", "matrix"], ["input"], None, 0,
194 "Matrix to use", False),
195 _Option(["-S", "strands"], ["input"], None, 0,
196 "Query strands to search against database (for blast[nx], " + \
197 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False),
198 _Option(["-l", "restrict_gi"], ["input"], None, 0,
199 "Restrict search of database to list of GI's.", False),
200 _Option(["-R", "checkpoint"], ["input", "file"], None, 0,
201 "PSI-TBLASTN checkpoint input file.", False),
202 _Option(["-n", "megablast"], ["input"], None, 0,
203 "MegaBlast search T/F.", False),
204
205
206 _Option(["-L", "region_length", "range_restriction"], ["input"],
207 None, 0,
208 """Location on query sequence (string format start,end).
209
210 In older versions of BLAST, -L set the length of region
211 used to judge hits (see -K parameter).""", False),
212 _Option(["-w", "frame_shit_penalty"], ["input"], None, 0,
213 "Frame shift penalty (OOF algorithm for blastx).", False),
214 _Option(["-t", "largest_intron"], ["input"], None, 0,
215 "Length of the largest intron allowed in a translated " + \
216 "nucleotide sequence when linking multiple distinct " + \
217 "alignments. (0 invokes default behavior; a negative value " + \
218 "disables linking.)", False),
219 _Option(["-B", "num_concatenated_queries"], ["input"], None, 0,
220 "Number of concatenated queries, for blastn and tblastn.",
221 False),
222 _Option(["-V", "oldengine"], ["input"], None, 0,
223 "Force use of the legacy BLAST engine.", False),
224 _Option(["-C", "composition_based"], ["input"], None, 0,
225 """Use composition-based statistics for tblastn:
226 D or d: default (equivalent to F)
227 0 or F or f: no composition-based statistics
228 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
229 2: Composition-based score adjustment as in Bioinformatics
230 21:902-911, 2005, conditioned on sequence properties
231 3: Composition-based score adjustment as in Bioinformatics
232 21:902-911, 2005, unconditionally
233 For programs other than tblastn, must either be absent or be
234 D, F or 0.""", False),
235 _Option(["-s", "smith_waterman"], ["input"], None, 0,
236 "Compute locally optimal Smith-Waterman alignments (This " + \
237 "option is only available for gapped tblastn.) T/F", False),
238 ]
239 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
240
241
243 """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
244
245 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
246 are replacing blastpgp with a renamed tool psiblast. This module provides
247 NcbipsiblastCommandline as a wrapper for the new tool psiblast.
248
249 Like blastpgp (and blastall), this wrapper is now obsolete, and will be
250 deprecated and removed in a future release of Biopython.
251
252 >>> from Bio.Blast.Applications import BlastpgpCommandline
253 >>> cline = BlastpgpCommandline(help=True)
254 >>> cline
255 BlastpgpCommandline(cmd='blastpgp', help=True)
256 >>> print cline
257 blastpgp --help
258
259 You would typically run the command line with the Python subprocess module,
260 as described in the Biopython tutorial.
261 """
262 - def __init__(self, cmd="blastpgp",**kwargs):
263 self.parameters = [ \
264 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0,
265 "Output file for PSI-BLAST checkpointing.", False),
266 _Option(["-R", "restart_infile"], ["input", "file"], None, 0,
267 "Input file for PSI-BLAST restart.", False),
268 _Option(["-k", "hit_infile"], ["input", "file"], None, 0,
269 "Hit file for PHI-BLAST.", False),
270 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0,
271 "Output file for PSI-BLAST matrix in ASCII.", False),
272 _Option(["-B", "align_infile"], ["input", "file"], None, 0,
273 "Input alignment file for PSI-BLAST restart.", False),
274 _Option(["-S", "required_start"], ["input"], None, 0,
275 "Start of required region in query.", False),
276 _Option(["-H", "required_end"], ["input"], None, 0,
277 "End of required region in query.", False),
278 _Option(["-j", "npasses"], ["input"], None, 0,
279 "Number of passes", False),
280 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
281 "Number of bits to trigger gapping.", False),
282 _Option(["-c", "pseudocounts"], ["input"], None, 0,
283 "Pseudocounts constants for multiple passes.", False),
284 _Option(["-h", "model_threshold"], ["input"], None, 0,
285 "E-value threshold to include in multipass model.", False),
286
287 _Option(["-L", "region_length"], ["input"], None, 0,
288 "Cost to decline alignment (disabled when zero).", False),
289 _Option(["-M", "matrix"], ["input"], None, 0,
290 "Matrix (string, default BLOSUM62).", False),
291 _Option(["-p", "program"], ["input"], None, 1,
292 "The blast program to use (e.g blastpgp, patseedp or seedp).", False),
293 ]
294 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
295
296
298 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
299
300 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
301 are replacing the old rpsblast with a new version of the same name plus a
302 second tool rpstblastn, both taking different command line arguments. This
303 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
304 wrappers for the new tools.
305
306 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
307 be deprecated and removed in a future release of Biopython.
308
309 >>> from Bio.Blast.Applications import RpsBlastCommandline
310 >>> cline = RpsBlastCommandline(help=True)
311 >>> cline
312 RpsBlastCommandline(cmd='rpsblast', help=True)
313 >>> print cline
314 rpsblast --help
315
316 You would typically run the command line with the Python subprocess module,
317 as described in the Biopython tutorial.
318 """
319 - def __init__(self, cmd="rpsblast",**kwargs):
320 self.parameters = [ \
321
322 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
323 "Number of bits to trigger gapping.", False),
324
325
326 _Option(["-P", "multihit"], ["input"], None, 0,
327 "0 for multiple hit, 1 for single hit", False),
328 _Option(["-l", "logfile"], ["output", "file"], None, 0,
329 "Logfile name.", False),
330 _Option(["-p", "protein"], ["input"], None, 0,
331 "Query sequence is protein. T/F", False),
332 _Option(["-L", "range_restriction"], ["input"], None, 0,
333 "Location on query sequence (string format start,end).",
334 False),
335 ]
336 _BlastCommandLine.__init__(self, cmd, **kwargs)
337
338
340 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
341
342 This is provided for subclassing, it deals with shared options
343 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
344 """
345 - def __init__(self, cmd=None, **kwargs):
346 assert cmd is not None
347 extra_parameters = [ \
348
349 _Switch(["-h", "h"], ["input"],
350 "Print USAGE and DESCRIPTION; ignore other arguments."),
351 _Switch(["-help", "help"], ["input"],
352 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
353 _Switch(["-version", "version"], ["input"],
354 "Print version number; ignore other arguments."),
355
356 _Option(["-query", "query"], ["input", "file"], None, 0,
357 "The sequence to search with.", False),
358 _Option(["-query_loc", "query_loc"], ["input"], None, 0,
359 "Location on the query sequence (Format: start-stop)", False),
360
361 _Option(["-db", "db"], ["input"], None, 0,
362 "The database to BLAST against.", False),
363 _Option(["-out", "out"], ["output", "file"], None, 0,
364 "Output file for alignment.", False),
365 _Option(["-evalue", "evalue"], ["input"], None, 0,
366 "Expectation value cutoff.", False),
367 _Option(["-word_size","word_size"], ["input"], None, 0,
368 """Word size for wordfinder algorithm.
369
370 Integer. Minimum 2.""", False),
371
372
373
374 _Option(["-outfmt", "outfmt"], ["input"], None, 0,
375 "Alignment view. Integer 0-10. Use 5 for XML output (differs from classic BLAST which used 7 for XML).",
376 False),
377 _Switch(["-show_gis","show_gis"], ["input"],
378 "Show NCBI GIs in deflines?"),
379 _Option(["-num_descriptions","num_descriptions"], ["input"], None, 0,
380 """Number of database sequences to show one-line descriptions for.
381
382 Integer argument (at least zero). Default is 500.
383 See also num_alignments.""", False),
384 _Option(["-num_alignments","num_alignments"], ["input"], None, 0,
385 """Number of database sequences to show num_alignments for.
386
387 Integer argument (at least zero). Default is 200.
388 See also num_alignments.""", False),
389 _Switch(["-html", "html"], ["input"],
390 "Produce HTML output? See also the outfmt option."),
391
392
393
394
395 _Switch(["-lcase_masking", "lcase_masking"], ["input"],
396 "Use lower case filtering in query and subject sequence(s)?"),
397
398 _Option(["-gilist", "gilist"], ["input", "file"], None, 0,
399 """Restrict search of database to list of GI's.
400
401 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""",
402 False),
403 _Option(["-negative_gilist", "negative_gilist"], ["input", "file"], None, 0,
404 """Restrict search of database to everything except the listed GIs.
405
406 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""",
407 False),
408 _Option(["-seqidlist", "seqidlist"], ["input", "file"], None, 0,
409 """Restrict search of database to list of SeqID's.
410
411 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""",
412 False),
413 _Option(["-entrez_query", "entrez_query"], ["input"], None, 0,
414 "Restrict search with the given Entrez query (requires remote).", False),
415 _Option(["-max_target_seqs", "max_target_seqs"], ["input"], None, 0,
416 """Maximum number of aligned sequences to keep.
417
418 Integer argument (at least one).""", False),
419
420 _Option(["-dbsize", "dbsize"], ["input"], None, 0,
421 "Effective length of the database (integer)", False),
422 _Option(["-searchsp", "searchsp"], ["input"], None, 0,
423 "Effective length of the search space (integer)", False),
424
425 _Option(["-xdrop_ungap", "xdrop_ungap"], ["input"], None, 0,
426 "X-dropoff value (in bits) for ungapped extensions. Float.",
427 False),
428 _Option(["-xdrop_gap", "xdrop_gap"], ["input"], None, 0,
429 "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
430 False),
431 _Option(["-xdrop_gap_final", "xdrop_gap_final"], ["input"], None, 0,
432 "X-dropoff value (in bits) for final gapped alignment. Float.",
433 False),
434 _Option(["-window_size", "window_size"], ["input"], None, 0,
435 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
436 False),
437
438 _Option(["-import_search_strategy", "import_search_strategy"],
439 ["input", "file"], None, 0,
440 """Search strategy to use.
441
442 Incompatible with: export_search_strategy""", False),
443 _Option(["-export_search_strategy", "export_search_strategy"],
444 ["output", "file"], None, 0,
445 """File name to record the search strategy used.
446
447 Incompatible with: import_search_strategy""", False),
448
449 _Switch(["-parse_deflines", "parse_deflines"], ["input"],
450 "Should the query and subject defline(s) be parsed?"),
451 _Option(["-num_threads", "num_threads"], ["input"], None, 0,
452 """Number of threads to use in the BLAST search.
453
454 Integer of at least one. Default is one.
455 Incompatible with: remote""", False),
456 _Switch(["-remote", "remote"], ["input"],
457 """Execute search remotely?
458
459 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
460 ]
461 try:
462
463
464 self.parameters = extra_parameters + self.parameters
465 except AttributeError:
466
467 self.parameters = extra_parameters
468 AbstractCommandline.__init__(self, cmd, **kwargs)
469
471 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
472 "import_search_strategy" : ["export_search_strategy"],
473 "gilist":["negative_gilist"],
474 "seqidlist":["gilist", "negative_gilist", "remote"]}
475 self._validate_incompatibilities(incompatibles)
476 if self.entrez_query and not self.remote :
477 raise ValueError("Option entrez_query requires remote option.")
478 AbstractCommandline._validate(self)
479
481 for a in incompatibles:
482 if self._get_parameter(a):
483 for b in incompatibles[a]:
484 if self._get_parameter(b):
485 raise ValueError("Options %s and %s are incompatible." \
486 % (a,b))
487
489 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
490
491 This is provided for subclassing, it deals with shared options
492 common to all the BLAST tools supporting two-sequence BLAST
493 (blastn, psiblast, etc) but not rpsblast or rpstblastn.
494 """
495 - def __init__(self, cmd=None, **kwargs):
496 assert cmd is not None
497 extra_parameters = [ \
498
499 _Option(["-gapopen", "gapopen"], ["input"], None, 0,
500 "Cost to open a gap (integer).", False),
501 _Option(["-gapextend", "gapextend"], ["input"], None, 0,
502 "Cost to extend a gap (integer).", False),
503
504 _Option(["-subject", "subject"], ["input", "file"], None, 0,
505 """Subject sequence(s) to search.
506
507 Incompatible with: db, gilist, negative_gilist.
508 See also subject_loc.""", False),
509 _Option(["-subject_loc", "subject_loc"], ["input"], None, 0,
510 """Location on the subject sequence (Format: start-stop)
511
512 Incompatible with: db, gilist, negative_gilist, remote.
513 See also subject.""", False),
514
515 _Option(["-culling_limit", "culling_limit"], ["input"], None, 0,
516 """Hit culling limit (integer).
517
518 If the query range of a hit is enveloped by that of at least this many
519 higher-scoring hits, delete the hit.
520
521 Incompatible with: best_hit_overhang, best_hit_score_edge.""", False),
522 _Option(["-best_hit_overhang", "best_hit_overhang"], ["input"], None, 0,
523 """Best Hit algorithm overhang value (recommended value: 0.1)
524
525 Float between 0.0 and 0.5 inclusive.
526
527 Incompatible with: culling_limit.""", False),
528 _Option(["-best_hit_score_edge", "best_hit_score_edge"], ["input"], None, 0,
529 """Best Hit algorithm score edge value (recommended value: 0.1)
530
531 Float between 0.0 and 0.5 inclusive.
532
533 Incompatible with: culling_limit.""", False), ]
534 try:
535
536
537 self.parameters = extra_parameters + self.parameters
538 except AttributeError:
539
540 self.parameters = extra_parameters
541 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
542
543
545 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"],
546 "culling_limit":["best_hit_overhang","best_hit_score_edge"],
547 "subject":["db", "gilist", "negative_gilist", "seqidlist"]}
548 self._validate_incompatibilities(incompatibles)
549 _NcbiblastCommandline._validate(self)
550
552 """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
553
554 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
555 replaced the old blastall tool with separate tools for each of the searches.
556 This wrapper therefore replaces BlastallCommandline with option -p blastp.
557
558 >>> from Bio.Blast.Applications import NcbiblastpCommandline
559 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
560 ... evalue=0.001, remote=True, ungapped=True)
561 >>> cline
562 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
563 >>> print cline
564 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
565
566 You would typically run the command line with the Python subprocess module,
567 as described in the Biopython tutorial.
568 """
569 - def __init__(self, cmd="blastp", **kwargs):
570 self.parameters = [ \
571
572 _Option(["-task", "task"], ["input"],
573 lambda value : value in ["blastp", "blastp-short"], 0,
574 "Task to execute (string, blastp (default) or blastp-short).", False),
575 _Option(["-matrix", "matrix"], ["input"], None, 0,
576 "Scoring matrix name (default BLOSUM62).", False),
577 _Option(["-threshold", "threshold"], ["input"], None, 0,
578 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
579 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
580 lambda value : value in "0Ft2TtDd", 0,
581 """Use composition-based statistics (string, default 2, i.e. True).
582
583 0, F or f: no composition-based statistics
584 2, T or t, D or d : Composition-based score adjustment as in
585 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
586
587 Note that tblastn also supports values of 1 and 3.""", False),
588
589 _Option(["-seg", "seg"], ["input"], None, 0,
590 """Filter query sequence with SEG (string).
591
592 Format: "yes", "window locut hicut", or "no" to disable.
593 Default is "12 2.2 2.5""", False),
594
595 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
596 """Filtering algorithm for soft masking (integer).
597
598 Filtering algorithm ID to apply to the BLAST database as soft masking.
599
600 Incompatible with: subject, subject_loc""", False),
601
602 _Switch(["-ungapped", "ungapped"], ["input"],
603 "Perform ungapped alignment only?"),
604
605 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
606 "Compute locally optimal Smith-Waterman alignments?"),
607 ]
608 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
609
614
615
617 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
618
619 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
620 replaced the old blastall tool with separate tools for each of the searches.
621 This wrapper therefore replaces BlastallCommandline with option -p blastn.
622
623 For example, to run a search against the "nt" nucleotide database using the
624 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
625 cut off of 0.001, saving the output to a file in XML format:
626
627 >>> from Bio.Blast.Applications import NcbiblastnCommandline
628 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
629 ... evalue=0.001, out="m_cold.xml", outfmt=5)
630 >>> cline
631 NcbiblastnCommandline(cmd='blastn', query='m_cold.fasta', db='nt', out='m_cold.xml', evalue=0.001, outfmt=5, strand='plus')
632 >>> print cline
633 blastn -query m_cold.fasta -db nt -out m_cold.xml -evalue 0.001 -outfmt 5 -strand plus
634
635 You would typically run the command line with the Python subprocess module,
636 as described in the Biopython tutorial.
637 """
638 - def __init__(self, cmd="blastn", **kwargs):
639 self.parameters = [ \
640
641 _Option(["-strand", "strand"], ["input"],
642 lambda value : value in ["both", "minus", "plus"],0,
643 """Query strand(s) to search against database/subject.
644
645 Values allowed are "both" (default), "minus", "plus".""", False),
646
647 _Option(["-task", "task"], ["input"],
648 lambda value : value in ['blastn', 'blastn-short', 'dc-megablast',
649 'megablast', 'vecscreen'], 0,
650 """Task to execute (string, default 'megablast')
651
652 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast'
653 (the default), or 'vecscreen'.""", False),
654 _Option(["-penalty", "penalty"], ["input"], None, 0,
655 "Penalty for a nucleotide mismatch (integer, at most zero).", False),
656 _Option(["-reward", "reward"], ["input"], None, 0,
657 "Reward for a nucleotide match (integer, at least zero).", False),
658
659
660
661 _Option(["-index_name", "index_name"], ["input"], None, 0,
662 "MegaBLAST database index name.", False),
663
664 _Option(["-dust", "dust"], ["input"], None, 0,
665 """Filter query sequence with DUST (string).
666
667 Format: 'yes', 'level window linker', or 'no' to disable.
668 Default = '20 64 1'.
669 """, False),
670 _Option(["-filtering_db", "filtering_db"], ["input"], None, 0,
671 "BLAST database containing filtering elements (i.e. repeats).", False),
672 _Option(["-window_masker_taxid", "window_masker_taxid"], ["input"], None, 0,
673 "Enable WindowMasker filtering using a Taxonomic ID (integer).", False),
674 _Option(["-window_masker_db", "window_masker_db"], ["input"], None, 0,
675 "Enable WindowMasker filtering using this repeats database (string).", False),
676
677 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
678 """Filtering algorithm for soft masking (integer).
679
680 Filtering algorithm ID to apply to the BLAST database as soft masking.
681
682 Incompatible with: subject, subject_loc""", False),
683 _Option(["-perc_identity", "perc_identity"], ["input"], None, 0,
684 "Percent identity (real, 0 to 100 inclusive).", False),
685
686 _Option(["-template_type", "template_type"], ["input"],
687 lambda value : value in ['coding', 'coding_and_optimal','optimal'], 0,
688 """Discontiguous MegaBLAST template type (string).
689
690 Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
691 Requires: template_length.""", False),
692 _Option(["-template_length", "template_length"], ["input"],
693 lambda value : value in [16,18,21,'16','18','21'], 0,
694 """Discontiguous MegaBLAST template length (integer).
695
696 Allowed values: 16, 18, 21
697
698 Requires: template_type.""", False),
699
700 _Switch(["-no_greedy", "no_greedy"], ["input"],
701 "Use non-greedy dynamic programming extension"),
702 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], ["input"], None, 0,
703 "Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).", False),
704 _Switch(["-ungapped", "ungapped"], ["input"],
705 "Perform ungapped alignment only?"),
706 _Option(["-off_diagonal_range", "off_diagonal_range"], ["input"], None, 0,
707 """Number of off-diagonals to search for the 2nd hit (integer).
708
709 Expects a positive integer, or 0 (default) to turn off.
710
711 Added in BLAST 2.2.23+
712 """, False),
713 ]
714 _Ncbiblast2SeqCommandline.