Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

   1  # Copyright 2001 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # Revisions copyright 2010 by Phillip Garland. 
   4  # All rights reserved. 
   5  # This code is part of the Biopython distribution and governed by its 
   6  # license.  Please see the LICENSE file that should have been included 
   7  # as part of this package. 
   8  """Definitions for interacting with BLAST related applications. 
   9   
  10  Obsolete wrappers for the old/classic NCBI BLAST tools (written in C): 
  11   
  12  - FastacmdCommandline 
  13  - BlastallCommandline 
  14  - BlastpgpCommandline 
  15  - RpsBlastCommandline 
  16   
  17  Wrappers for the new NCBI BLAST+ tools (written in C++): 
  18   
  19  - NcbiblastpCommandline - Protein-Protein BLAST 
  20  - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST 
  21  - NcbiblastxCommandline - Translated Query-Protein Subject BLAST 
  22  - NcbitblastnCommandline - Protein Query-Translated Subject BLAST 
  23  - NcbitblastxCommandline - Translated Query-Protein Subject BLAST 
  24  - NcbipsiblastCommandline - Position-Specific Initiated BLAST 
  25  - NcbirpsblastCommandline - Reverse Position Specific BLAST 
  26  - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST 
  27  - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats 
  28   
  29  For further details, see: 
  30   
  31  Camacho et al. BLAST+: architecture and applications 
  32  BMC Bioinformatics 2009, 10:421 
  33  doi:10.1186/1471-2105-10-421 
  34  """ 
  35  from __future__ import print_function 
  36   
  37  from Bio import BiopythonDeprecationWarning 
  38   
  39  from Bio.Application import _Option, AbstractCommandline, _Switch 
  40   
  41   
42 -class FastacmdCommandline(AbstractCommandline):
43 """Create a commandline for the fasta program from NCBI (OBSOLETE). 44 45 """
46 - def __init__(self, cmd="fastacmd", **kwargs):
47 self.parameters = [ 48 _Option(["-d", "database"], 49 "The database to retrieve from.", 50 is_required=True, 51 equate=False), 52 _Option(["-s", "search_string"], 53 "The id to search for.", 54 is_required=True, 55 equate=False) 56 ] 57 AbstractCommandline.__init__(self, cmd, **kwargs)
58 59
60 -class _BlastCommandLine(AbstractCommandline):
61 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE). 62 63 This is provided for subclassing, it deals with shared options 64 common to all the BLAST tools (blastall, rpsblast, blastpgp). 65 """
66 - def __init__(self, cmd=None, **kwargs):
67 assert cmd is not None 68 extra_parameters = [ 69 _Switch(["--help", "help"], 70 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), 71 _Option(["-d", "database"], 72 "The database to BLAST against.", 73 is_required=True, 74 equate=False), 75 _Option(["-i", "infile"], 76 "The sequence to search with.", 77 filename=True, 78 is_required=True, 79 equate=False), 80 _Option(["-e", "expectation"], 81 "Expectation value cutoff.", 82 equate=False), 83 _Option(["-m", "align_view"], 84 "Alignment view. Integer 0-11. Use 7 for XML output.", 85 equate=False), 86 _Option(["-o", "align_outfile", "outfile"], 87 "Output file for alignment.", 88 filename=True, 89 equate=False), 90 _Option(["-y", "xdrop_extension"], 91 "Dropoff for blast extensions.", 92 equate=False), 93 _Option(["-F", "filter"], 94 "Filter query sequence with SEG? T/F", 95 equate=False), 96 _Option(["-X", "xdrop"], 97 "Dropoff value (bits) for gapped alignments.", 98 equate=False), 99 _Option(["-I", "show_gi"], 100 "Show GI's in deflines? T/F", 101 equate=False), 102 _Option(["-J", "believe_query"], 103 "Believe the query defline? T/F", 104 equate=False), 105 _Option(["-Z", "xdrop_final"], 106 "X dropoff for final gapped alignment.", 107 equate=False), 108 _Option(["-z", "db_length"], 109 "Effective database length.", 110 equate=False), 111 _Option(["-O", "seqalign_file"], 112 "seqalign file to output.", 113 filename=True, 114 equate=False), 115 _Option(["-v", "descriptions"], 116 "Number of one-line descriptions.", 117 equate=False), 118 _Option(["-b", "alignments"], 119 "Number of alignments.", 120 equate=False), 121 _Option(["-Y", "search_length"], 122 "Effective length of search space (use zero for the " 123 "real size).", 124 equate=False), 125 _Option(["-T", "html"], 126 "Produce HTML output? T/F", 127 equate=False), 128 _Option(["-U", "case_filter"], 129 "Use lower case filtering of FASTA sequence? T/F", 130 equate=False), 131 _Option(["-a", "nprocessors"], 132 "Number of processors to use.", 133 equate=False), 134 _Option(["-g", "gapped"], 135 "Whether to do a gapped alignment. T/F", 136 equate=False), 137 ] 138 try: 139 #Insert extra parameters - at the start just in case there 140 #are any arguments which must come last: 141 self.parameters = extra_parameters + self.parameters 142 except AttributeError: 143 #Should we raise an error? The subclass should have set this up! 144 self.parameters = extra_parameters 145 AbstractCommandline.__init__(self, cmd, **kwargs)
146
147 - def _validate(self):
148 if self.help: 149 #Don't want to check the normally mandatory arguments like db 150 return 151 AbstractCommandline._validate(self)
152 153
154 -class _BlastAllOrPgpCommandLine(_BlastCommandLine):
155 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 156 157 This is provided for subclassing, it deals with shared options 158 common to all the blastall and blastpgp tools (but not rpsblast). 159 """
160 - def __init__(self, cmd=None, **kwargs):
161 assert cmd is not None 162 extra_parameters = [ 163 _Option(["-G", "gap_open"], 164 "Gap open penalty", 165 equate=False), 166 _Option(["-E", "gap_extend"], 167 "Gap extension penalty", 168 equate=False), 169 _Option(["-A", "window_size"], 170 "Multiple hits window size", 171 equate=False), 172 _Option(["-f", "hit_extend"], 173 "Threshold for extending hits.", 174 equate=False), 175 _Option(["-K", "keep_hits"], 176 " Number of best hits from a region to keep.", 177 equate=False), 178 _Option(["-W", "wordsize"], 179 "Word size", 180 equate=False), 181 _Option(["-P", "passes"], 182 "Hits/passes. Integer 0-2. 0 for multiple hit, " 183 "1 for single hit (does not apply to blastn)", 184 equate=False), 185 ] 186 try: 187 #Insert extra parameters - at the start just in case there 188 #are any arguments which must come last: 189 self.parameters = extra_parameters + self.parameters 190 except AttributeError: 191 #Should we raise an error? The subclass should have set this up! 192 self.parameters = extra_parameters 193 _BlastCommandLine.__init__(self, cmd, **kwargs)
194 195
196 -class BlastallCommandline(_BlastAllOrPgpCommandLine):
197 """Create a commandline for the blastall program from NCBI (OBSOLETE). 198 199 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 200 are replacing blastall with separate tools blastn, blastp, blastx, tblastn 201 and tblastx. 202 203 Like blastall, this wrapper is now obsolete, and will be deprecated and 204 removed in a future release of Biopython. 205 206 >>> from Bio.Blast.Applications import BlastallCommandline 207 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta", 208 ... database="nr", expectation=0.001) 209 >>> cline 210 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx') 211 >>> print(cline) 212 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx 213 214 You would typically run the command line with cline() or via the Python 215 subprocess module, as described in the Biopython tutorial. 216 """ 217 #TODO - This could use more checking for valid parameters to the program.
218 - def __init__(self, cmd="blastall",**kwargs):
219 import warnings 220 warnings.warn("Like blastall, this wrapper is now deprecated and will be removed in a future release of Biopython.", BiopythonDeprecationWarning) 221 self.parameters = [ 222 #Sorted in the same order as the output from blastall --help 223 #which should make it easier to keep them up to date in future. 224 #Note that some arguments are defined in the base classes (above). 225 _Option(["-p", "program"], 226 "The blast program to use (e.g. blastp, blastn).", 227 is_required=True, 228 equate=False), 229 _Option(["-q", "nuc_mismatch"], 230 "Penalty for a nucleotide mismatch (blastn only).", 231 equate=False), 232 _Option(["-r", "nuc_match"], 233 "Reward for a nucleotide match (blastn only).", 234 equate=False), 235 _Option(["-Q", "query_genetic_code"], 236 "Query Genetic code to use.", 237 equate=False), 238 _Option(["-D", "db_genetic_code"], 239 "DB Genetic code (for tblast[nx] only).", 240 equate=False), 241 _Option(["-M", "matrix"], 242 "Matrix to use", 243 equate=False), 244 _Option(["-S", "strands"], 245 "Query strands to search against database (for blast[nx], " 246 "and tblastx). 3 is both, 1 is top, 2 is bottom.", 247 equate=False), 248 _Option(["-l", "restrict_gi"], 249 "Restrict search of database to list of GI's.", 250 equate=False), 251 _Option(["-R", "checkpoint"], 252 "PSI-TBLASTN checkpoint input file.", 253 filename=True, 254 equate=False), 255 _Option(["-n", "megablast"], 256 "MegaBlast search T/F.", 257 equate=False), 258 #The old name "region_length" is for consistency with our 259 #old blastall function wrapper: 260 _Option(["-L", "region_length", "range_restriction"], 261 """Location on query sequence (string format start,end). 262 263 In older versions of BLAST, -L set the length of region 264 used to judge hits (see -K parameter).""", 265 equate=False), 266 _Option(["-w", "frame_shift_penalty"], 267 "Frame shift penalty (OOF algorithm for blastx).", 268 equate=False), 269 _Option(["-t", "largest_intron"], 270 "Length of the largest intron allowed in a translated " 271 "nucleotide sequence when linking multiple distinct " 272 "alignments. (0 invokes default behavior; a negative value " 273 "disables linking.)", 274 equate=False), 275 _Option(["-B", "num_concatenated_queries"], 276 "Number of concatenated queries, for blastn and tblastn.", 277 equate=False), 278 _Option(["-V", "oldengine"], 279 "Force use of the legacy BLAST engine.", 280 equate=False), 281 _Option(["-C", "composition_based"], 282 """Use composition-based statistics for tblastn: 283 D or d: default (equivalent to F) 284 0 or F or f: no composition-based statistics 285 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 286 2: Composition-based score adjustment as in Bioinformatics 287 21:902-911, 2005, conditioned on sequence properties 288 3: Composition-based score adjustment as in Bioinformatics 289 21:902-911, 2005, unconditionally 290 For programs other than tblastn, must either be absent or be 291 D, F or 0.""", 292 equate=False), 293 _Option(["-s", "smith_waterman"], 294 "Compute locally optimal Smith-Waterman alignments (This " 295 "option is only available for gapped tblastn.) T/F", 296 equate=False), 297 ] 298 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
299 300
301 -class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
302 """Create a commandline for the blastpgp program from NCBI (OBSOLETE). 303 304 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 305 are replacing blastpgp with a renamed tool psiblast. This module provides 306 NcbipsiblastCommandline as a wrapper for the new tool psiblast. 307 308 Like blastpgp (and blastall), this wrapper is now obsolete, and will be 309 deprecated and removed in a future release of Biopython. 310 311 >>> from Bio.Blast.Applications import BlastpgpCommandline 312 >>> cline = BlastpgpCommandline(help=True) 313 >>> cline 314 BlastpgpCommandline(cmd='blastpgp', help=True) 315 >>> print(cline) 316 blastpgp --help 317 318 You would typically run the command line with cline() or via the Python 319 subprocess module, as described in the Biopython tutorial. 320 """
321 - def __init__(self, cmd="blastpgp",**kwargs):
322 import warnings 323 warnings.warn("Like blastpgp (and blastall), this wrapper is now deprecated and will be removed in a future release of Biopython.", BiopythonDeprecationWarning) 324 self.parameters = [ 325 _Option(["-C", "checkpoint_outfile"], 326 "Output file for PSI-BLAST checkpointing.", 327 filename=True, 328 equate=False), 329 _Option(["-R", "restart_infile"], 330 "Input file for PSI-BLAST restart.", 331 filename=True, 332 equate=False), 333 _Option(["-k", "hit_infile"], 334 "Hit file for PHI-BLAST.", 335 filename=True, 336 equate=False), 337 _Option(["-Q", "matrix_outfile"], 338 "Output file for PSI-BLAST matrix in ASCII.", 339 filename=True, 340 equate=False), 341 _Option(["-B", "align_infile"], 342 "Input alignment file for PSI-BLAST restart.", 343 filename=True, 344 equate=False), 345 _Option(["-S", "required_start"], 346 "Start of required region in query.", 347 equate=False), 348 _Option(["-H", "required_end"], 349 "End of required region in query.", 350 equate=False), 351 _Option(["-j", "npasses"], 352 "Number of passes", 353 equate=False), 354 _Option(["-N", "nbits_gapping"], 355 "Number of bits to trigger gapping.", 356 equate=False), 357 _Option(["-c", "pseudocounts"], 358 "Pseudocounts constants for multiple passes.", 359 equate=False), 360 _Option(["-h", "model_threshold"], 361 "E-value threshold to include in multipass model.", 362 equate=False), 363 #Does the old name "region_length" for -L make sense? 364 _Option(["-L", "region_length"], 365 "Cost to decline alignment (disabled when zero).", 366 equate=False), 367 _Option(["-M", "matrix"], 368 "Matrix (string, default BLOSUM62).", 369 equate=False), 370 _Option(["-p", "program"], 371 "The blast program to use (e.g blastpgp, patseedp or seedp).", 372 is_required=True, 373 equate=False), 374 ] 375 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
376 377
378 -class RpsBlastCommandline(_BlastCommandLine):
379 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE). 380 381 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 382 are replacing the old rpsblast with a new version of the same name plus a 383 second tool rpstblastn, both taking different command line arguments. This 384 module provides NcbirpsblastCommandline and NcbirpsblastCommandline as 385 wrappers for the new tools. 386 387 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will 388 be deprecated and removed in a future release of Biopython. 389 390 >>> from Bio.Blast.Applications import RpsBlastCommandline 391 >>> cline = RpsBlastCommandline(help=True) 392 >>> cline 393 RpsBlastCommandline(cmd='rpsblast', help=True) 394 >>> print(cline) 395 rpsblast --help 396 397 You would typically run the command line with cline() or via the Python 398 subprocess module, as described in the Biopython tutorial. 399 """
400 - def __init__(self, cmd="rpsblast",**kwargs):
401 import warnings 402 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now deprecated and will be removed in a future release of Biopython.", BiopythonDeprecationWarning) 403 self.parameters = [ 404 #Note -N is also in blastpgp, but not blastall 405 _Option(["-N", "nbits_gapping"], 406 "Number of bits to trigger gapping.", 407 equate=False), 408 #Note blastall and blastpgp wrappers have -P with name "passes". 409 #If this is the same thing, we should be consistent! 410 _Option(["-P", "multihit"], 411 "0 for multiple hit, 1 for single hit", 412 equate=False), 413 _Option(["-l", "logfile"], 414 "Logfile name.", 415 filename=True, 416 equate=False), 417 _Option(["-p", "protein"], 418 "Query sequence is protein. T/F", 419 equate=False), 420 _Option(["-L", "range_restriction"], 421 "Location on query sequence (string format start,end).", 422 equate=False), 423 ] 424 _BlastCommandLine.__init__(self, cmd, **kwargs)
425 426 ############################################################################## 427 # Legacy BLAST wrappers above, (new) BLAST+ wrappers below 428 ############################################################################## 429 430
431 -class _NcbibaseblastCommandline(AbstractCommandline):
432 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 433 434 This is provided for subclassing, it deals with shared options 435 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc 436 AND blast_formatter). 437 """
438 - def __init__(self, cmd=None, **kwargs):
439 assert cmd is not None 440 extra_parameters = [ 441 #Core: 442 _Switch(["-h", "h"], 443 "Print USAGE and DESCRIPTION; ignore other arguments."), 444 _Switch(["-help", "help"], 445 "Print USAGE, DESCRIPTION and ARGUMENTS description; " 446 "ignore other arguments."), 447 _Switch(["-version", "version"], 448 "Print version number; ignore other arguments."), 449 # Output configuration options 450 _Option(["-out", "out"], 451 "Output file for alignment.", 452 filename=True, 453 equate=False), 454 #Formatting options: 455 _Option(["-outfmt", "outfmt"], 456 "Alignment view. Integer 0-11. Use 5 for XML output " 457 "(differs from classic BLAST which used 7 for XML).", 458 equate=False), 459 #TODO - Document and test the column options 460 _Switch(["-show_gis", "show_gis"], 461 "Show NCBI GIs in deflines?"), 462 _Option(["-num_descriptions", "num_descriptions"], 463 """Number of database sequences to show one-line descriptions for. 464 465 Integer argument (at least zero). Default is 500. 466 See also num_alignments.""", 467 equate=False), 468 _Option(["-num_alignments", "num_alignments"], 469 """Number of database sequences to show num_alignments for. 470 471 Integer argument (at least zero). Default is 200. 472 See also num_alignments.""", 473 equate=False), 474 _Switch(["-html", "html"], 475 "Produce HTML output? See also the outfmt option."), 476 #Miscellaneous options 477 _Switch(["-parse_deflines", "parse_deflines"], 478 "Should the query and subject defline(s) be parsed?"), 479 ] 480 try: 481 #Insert extra parameters - at the start just in case there 482 #are any arguments which must come last: 483 self.parameters = extra_parameters + self.parameters 484 except AttributeError: 485 #Should we raise an error? The subclass should have set this up! 486 self.parameters = extra_parameters 487 AbstractCommandline.__init__(self, cmd, **kwargs)
488
489 - def _validate_incompatibilities(self, incompatibles):
490 """Used by the BLAST+ _validate method (PRIVATE).""" 491 for a in incompatibles: 492 if self._get_parameter(a): 493 for b in incompatibles[a]: 494 if self._get_parameter(b): 495 raise ValueError("Options %s and %s are incompatible." 496 % (a, b))
497 498
499 -class _NcbiblastCommandline(_NcbibaseblastCommandline):
500 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 501 502 This is provided for subclassing, it deals with shared options 503 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). 504 """
505 - def __init__(self, cmd=None, **kwargs):
506 assert cmd is not None 507 extra_parameters = [ 508 #Input query options: 509 _Option(["-query", "query"], 510 "The sequence to search with.", 511 filename=True, 512 equate=False), # Should this be required? 513 _Option(["-query_loc", "query_loc"], 514 "Location on the query sequence (Format: start-stop)", 515 equate=False), 516 #General search options: 517 _Option(["-db", "db"], 518 "The database to BLAST against.", 519 equate=False), 520 _Option(["-evalue", "evalue"], 521 "Expectation value cutoff.", 522 equate=False), 523 _Option(["-word_size", "word_size"], 524 """Word size for wordfinder algorithm. 525 526 Integer. Minimum 2.""", 527 equate=False), 528 #BLAST-2-Sequences options: 529 # - see subclass 530 #Formatting options: 531 # - see baseclass 532 #Query filtering options 533 # TODO -soft_masking <Boolean>, is this a switch or an option? 534 #_Switch(["-soft_masking", "soft_masking"], 535 # "Apply filtering locations as soft masks?"), 536 _Switch(["-lcase_masking", "lcase_masking"], 537 "Use lower case filtering in query and subject sequence(s)?"), 538 #Restrict search or results 539 _Option(["-gilist", "gilist"], 540 """Restrict search of database to list of GI's. 541 542 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", 543 filename=True, 544 equate=False), 545 _Option(["-negative_gilist", "negative_gilist"], 546 """Restrict search of database to everything except the listed GIs. 547 548 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", 549 filename=True, 550 equate=False), 551 _Option(["-seqidlist", "seqidlist"], 552 """Restrict search of database to list of SeqID's. 553 554 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", 555 filename=True, 556 equate=False), 557 _Option(["-entrez_query", "entrez_query"], 558 "Restrict search with the given Entrez query (requires remote).", 559 equate=False), 560 _Option(["-max_target_seqs", "max_target_seqs"], 561 """Maximum number of aligned sequences to keep. 562 563 Integer argument (at least one).""", 564 equate=False), 565 #Statistical options 566 _Option(["-dbsize", "dbsize"], 567 "Effective length of the database (integer)", 568 equate=False), 569 _Option(["-searchsp", "searchsp"], 570 "Effective length of the search space (integer)", 571 equate=False), 572 _Option(["-max_hsps_per_subject", "max_hsps_per_subject"], 573 "Override maximum number of HSPs per subject to save for ungapped searches (integer)", 574 equate=False), 575 #Extension options 576 _Option(["-xdrop_ungap", "xdrop_ungap"], 577 "X-dropoff value (in bits) for ungapped extensions. Float.", 578 equate=False), 579 _Option(["-xdrop_gap", "xdrop_gap"], 580 "X-dropoff value (in bits) for preliminary gapped extensions. Float.", 581 equate=False), 582 _Option(["-xdrop_gap_final", "xdrop_gap_final"], 583 "X-dropoff value (in bits) for final gapped alignment. Float.", 584 equate=False), 585 _Option(["-window_size", "window_size"], 586 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.", 587 equate=False), 588 # Search strategy options 589 _Option(["-import_search_strategy", "import_search_strategy"], 590 """Search strategy to use. 591 592 Incompatible with: export_search_strategy""", 593 filename=True, 594 equate=False), 595 _Option(["-export_search_strategy", "export_search_strategy"], 596 """File name to record the search strategy used. 597 598 Incompatible with: import_search_strategy""", 599 filename=True, 600 equate=False), 601 #Miscellaneous options 602 _Option(["-num_threads", "num_threads"], 603 """Number of threads to use in the BLAST search. 604 605 Integer of at least one. Default is one. 606 Incompatible with: remote""", 607 equate=False), 608 _Switch(["-remote", "remote"], 609 """Execute search remotely? 610 611 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""), 612 ] 613 try: 614 #Insert extra parameters - at the start just in case there 615 #are any arguments which must come last: 616 self.parameters = extra_parameters + self.parameters 617 except AttributeError: 618 #Should we raise an error? The subclass should have set this up! 619 self.parameters = extra_parameters 620 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
621
622 - def _validate(self):
623 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"], 624 "import_search_strategy" : ["export_search_strategy"], 625 "gilist":["negative_gilist"], 626 "seqidlist":["gilist", "negative_gilist", "remote"]} 627 self._validate_incompatibilities(incompatibles) 628 if self.entrez_query and not self.remote : 629 raise ValueError("Option entrez_query requires remote option.") 630 AbstractCommandline._validate(self)
631 632
633 -class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
634 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 635 636 This is provided for subclassing, it deals with shared options 637 common to all the BLAST tools supporting two-sequence BLAST 638 (blastn, psiblast, etc) but not rpsblast or rpstblastn. 639 """
640 - def __init__(self, cmd=None, **kwargs):
641 assert cmd is not None 642 extra_parameters = [ 643 #General search options: 644 _Option(["-gapopen", "gapopen"], 645 "Cost to open a gap (integer).", 646 equate=False), 647 _Option(["-gapextend", "gapextend"], 648 "Cost to extend a gap (integer).", 649 equate=False), 650 #BLAST-2-Sequences options: 651 _Option(["-subject", "subject"], 652 """Subject sequence(s) to search. 653 654 Incompatible with: db, gilist, negative_gilist. 655 See also subject_loc.""", 656 filename=True, 657 equate=False), 658 _Option(["-subject_loc", "subject_loc"], 659 """Location on the subject sequence (Format: start-stop) 660 661 Incompatible with: db, gilist, seqidlist, negative_gilist, 662 db_soft_mask, db_hard_mask, remote. 663 664 See also subject.""", 665 equate=False), 666 #Restrict search or results: 667 _Option(["-culling_limit", "culling_limit"], 668 """Hit culling limit (integer). 669 670 If the query range of a hit is enveloped by that of at 671 least this many higher-scoring hits, delete the hit. 672 673 Incompatible with: best_hit_overhang, best_hit_score_edge. 674 """, 675 equate=False), 676 _Option(["-best_hit_overhang", "best_hit_overhang"], 677 """Best Hit algorithm overhang value (recommended value: 0.1) 678 679 Float between 0.0 and 0.5 inclusive. 680 681 Incompatible with: culling_limit.""", 682 equate=False), 683 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 684 """Best Hit algorithm score edge value (recommended value: 0.1) 685 686 Float between 0.0 and 0.5 inclusive. 687 688 Incompatible with: culling_limit.""", 689 equate=False), 690 ] 691 try: 692 #Insert extra parameters - at the start just in case there 693 #are any arguments which must come last: 694 self.parameters = extra_parameters + self.parameters 695 except AttributeError: 696 #Should we raise an error? The subclass should have set this up! 697 self.parameters = extra_parameters 698 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
699
700 - def _validate(self):
701 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"], 702 "culling_limit":["best_hit_overhang", "best_hit_score_edge"], 703 "subject":["db", "gilist", "negative_gilist", "seqidlist"]} 704 self._validate_incompatibilities(incompatibles) 705 _NcbiblastCommandline._validate(self)
706 707
708 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
709 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 710 711 This is provided for subclassing, it deals with shared options 712 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn 713 but not psiblast, rpsblast or rpstblastn. 714 """
715 - def __init__(self, cmd=None, **kwargs):
716 assert cmd is not None 717 extra_parameters = [ 718 #Restrict search or results: 719 _Option(["-db_soft_mask", "db_soft_mask"], 720 """Filtering algorithm for soft masking (integer). 721 722 Filtering algorithm ID to apply to the BLAST database as soft masking. 723 724 Incompatible with: db_hard_mask, subject, subject_loc""", 725 equate=False), 726 _Option(["-db_hard_mask", "db_hard_mask"], 727 """Filtering algorithm for hard masking (integer). 728 729 Filtering algorithm ID to apply to the BLAST database as hard masking. 730 731 Incompatible with: db_soft_mask, subject, subject_loc""", 732 equate=False), 733 ] 734 try: 735 #Insert extra parameters - at the start just in case there 736 #are any arguments which must come last: 737 self.parameters = extra_parameters + self.parameters 738 except AttributeError: 739 #Should we raise an error? The subclass should have set this up! 740 self.parameters = extra_parameters 741 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
742
743 - def _validate(self):
744 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"], 745 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]} 746 self._validate_incompatibilities(incompatibles) 747 _Ncbiblast2SeqCommandline._validate(self)
748 749
750 -class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
751 """Create a commandline for the NCBI BLAST+ program blastp (for proteins). 752 753 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 754 replaced the old blastall tool with separate tools for each of the searches. 755 This wrapper therefore replaces BlastallCommandline with option -p blastp. 756 757 >>> from Bio.Blast.Applications import NcbiblastpCommandline 758 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", 759 ... evalue=0.001, remote=True, ungapped=True) 760 >>> cline 761 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) 762 >>> print(cline) 763 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped 764 765 You would typically run the command line with cline() or via the Python 766 subprocess module, as described in the Biopython tutorial. 767 """
768 - def __init__(self, cmd="blastp", **kwargs):
769 self.parameters = [ 770 #General search options: 771 _Option(["-task", "task"], 772 "Task to execute (string, blastp (default) or blastp-short).", 773 checker_function=lambda value : value in ["blastp", 774 "blastp-short"], 775 equate=False), 776 _Option(["-matrix", "matrix"], 777 "Scoring matrix name (default BLOSUM62)."), 778 _Option(["-threshold", "threshold"], 779 "Minimum word score such that the word is added to the " 780 "BLAST lookup table (float)", 781 equate=False), 782 _Option(["-comp_based_stats", "comp_based_stats"], 783 """Use composition-based statistics (string, default 2, i.e. True). 784 785 0, F or f: no composition-based statistics 786 2, T or t, D or d : Composition-based score adjustment as in 787 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 788 789 Note that tblastn also supports values of 1 and 3.""", 790 checker_function=lambda value : value in "0Ft2TtDd", 791 equate=False), 792 #Query filtering options: 793 _Option(["-seg", "seg"], 794 """Filter query sequence with SEG (string). 795 796 Format: "yes", "window locut hicut", or "no" to disable. 797 Default is "12 2.2 2.5""", 798 equate=False), 799 #Extension options: 800 _Switch(["-ungapped", "ungapped"], 801 "Perform ungapped alignment only?"), 802 #Miscellaneous options: 803 _Switch(["-use_sw_tback", "use_sw_tback"], 804 "Compute locally optimal Smith-Waterman alignments?"), 805 ] 806 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
807 808
809 -class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
810 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). 811 812 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 813 replaced the old blastall tool with separate tools for each of the searches. 814 This wrapper therefore replaces BlastallCommandline with option -p blastn. 815 816 For example, to run a search against the "nt" nucleotide database using the 817 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value 818 cut off of 0.001, saving the output to a file in XML format: 819 820 >>> from Bio.Blast.Applications import NcbiblastnCommandline 821 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", 822 ... evalue=0.001, out="m_cold.xml", outfmt=5) 823 >>> cline 824 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus') 825 >>> print(cline) 826 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus 827 828 You would typically run the command line with cline() or via the Python 829 subprocess module, as described in the Biopython tutorial. 830 """
831 - def __init__(self, cmd="blastn", **kwargs):
832 self.parameters = [ 833 #Input query options: 834 _Option(["-strand", "strand"], 835 """Query strand(s) to search against database/subject. 836 837 Values allowed are "both" (default), "minus", "plus".""", 838 checker_function=lambda value : value in ["both", 839 "minus", 840 "plus"], 841 equate=False), 842 #General search options: 843 _Option(["-task", "task"], 844 """Task to execute (string, default 'megablast') 845 846 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' 847 (the default), or 'vecscreen'.""", 848 checker_function=lambda value : value in ['blastn', 849 'blastn-short', 850 'dc-megablast', 851 'megablast', 852 'vecscreen'], 853 equate=False), 854 _Option(["-penalty", "penalty"], 855 "Penalty for a nucleotide mismatch (integer, at most zero).", 856 equate=False), 857 _Option(["-reward", "reward"], 858 "Reward for a nucleotide match (integer, at least zero).", 859 equate=False), 860 #TODO - Does this need an argument or is it a switch? 861 #_Option(["-use_index", "use_index"], 862 # "Use MegaBLAST database index (boolean).", 863 # equate=False), 864 _Option(["-index_name", "index_name"], 865 "MegaBLAST database index name.", 866 equate=False), 867 #Query filtering options: 868 _Option(["-dust", "dust"], 869 """Filter query sequence with DUST (string). 870 871 Format: 'yes', 'level window linker', or 'no' to disable. 872 Default = '20 64 1'. 873 """, 874 equate=False), 875 _Option(["-filtering_db", "filtering_db"], 876 "BLAST database containing filtering elements (i.e. repeats).", 877 equate=False), 878 _Option(["-window_masker_taxid", "window_masker_taxid"], 879 "Enable WindowMasker filtering using a Taxonomic ID (integer).", 880 equate=False), 881 _Option(["-window_masker_db", "window_masker_db"], 882 "Enable WindowMasker filtering using this repeats database (string).", 883 equate=False), 884 #Restrict search or results: 885 _Option(["-perc_identity", "perc_identity"], 886 "Percent identity (real, 0 to 100 inclusive).", 887 equate=False), 888 #Discontiguous MegaBLAST options 889 _Option(["-template_type", "template_type"], 890 """Discontiguous MegaBLAST template type (string). 891 892 Allowed values: 'coding', 'coding_and_optimal' or 'optimal' 893 Requires: template_length.""", 894 checker_function=lambda value : value in ['coding', 'coding_and_optimal', 'optimal'], 895 equate=False), 896 _Option(["-template_length", "template_length"], 897 """Discontiguous MegaBLAST template length (integer). 898 899 Allowed values: 16, 18, 21 900 901 Requires: template_type.""", 902 checker_function=lambda value : value in [16, 18, 21, '16', '18', '21'], 903 equate=False), 904 #Extension options: 905 _Switch(["-no_greedy", "no_greedy"], 906 "Use non-greedy dynamic programming extension"), 907 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], 908 "Minimum raw gapped score to keep an alignment in the " 909 "preliminary gapped and traceback stages (integer).", 910 equate=False), 911 _Switch(["-ungapped", "ungapped"], 912 "Perform ungapped alignment only?"), 913 _Option(["-off_diagonal_range", "off_diagonal_range"], 914 """Number of off-diagonals to search for the 2nd hit (integer). 915 916 Expects a positive integer, or 0 (default) to turn off. 917 918 Added in BLAST 2.2.23+ 919 """, 920 equate=False), 921 ] 922 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
923
924 - def _validate(self):
925 if (self.template_type and not self.template_length) \ 926 or (self.template_length and not self.template_type) : 927 raise ValueError("Options template_type and template_type require each other.") 928 _NcbiblastMain2SeqCommandline._validate(self)
929 930
931 -class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
932 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database). 933 934 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 935 replaced the old blastall tool with separate tools for each of the searches. 936 This wrapper therefore replaces BlastallCommandline with option -p blastx. 937 938 >>> from Bio.Blast.Applications import NcbiblastxCommandline 939 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001) 940 >>> cline 941 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001) 942 >>> print(cline) 943 blastx -query m_cold.fasta -db nr -evalue 0.001 944 945 You would typically run the command line with cline() or via the Python 946 subprocess module, as described in the Biopython tutorial. 947 """
948 - def __init__(self, cmd="blastx", **kwargs):
949 self.parameters = [ 950 #Input query options: 951 _Option(["-strand", "strand"], 952 """Query strand(s) to search against database/subject. 953 954 Values allowed are "both" (default), "minus", "plus".""", 955 checker_function=lambda value : value in ["both", "minus", "plus"], 956 equate=False), 957 #Input query options: 958 _Option(["-query_gencode", "query_gencode"], 959 """Genetic code to use to translate query 960 961 Integer. Default is one.""", 962 equate=False), 963 #General search options: 964 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 965 """Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE). 966 967 This was removed in BLAST 2.2.27+""", 968 equate=False), 969 _Option(["-max_intron_length", "max_intron_length"], 970 """Maximum intron length (integer). 971 972 Length of the largest intron allowed in a translated nucleotide 973 sequence when linking multiple distinct alignments (a negative 974 value disables linking). Default zero.""", 975 equate=False), 976 _Option(["-matrix", "matrix"], 977 "Scoring matrix name (default BLOSUM62).", 978 equate=False), 979 _Option(["-threshold", "threshold"], 980 "Minimum word score such that the word is added to the " 981 "BLAST lookup table (float)", 982 equate=False), 983 _Option(["-comp_based_stats", "comp_based_stats"], 984 """Use composition-based statistics for blastp, blastx, or tblastn: 985 986 D or d: default (equivalent to 2 ) 987 0 or F or f: no composition-based statistics 988 1: Composition-based statistics as in NAR 29:2994-3005, 2001 989 2 or T or t : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties 990 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally 991 992 For programs other than tblastn, must either be absent or be D, F or 0 993 Default = `2' 994 """, 995 equate=False), 996 #Query filtering options: 997 _Option(["-seg", "seg"], 998 """Filter query sequence with SEG (string). 999 1000 Format: "yes", "window locut hicut", or "no" to disable. 1001 Default is "12 2.2 2.5""", 1002 equate=False), 1003 #Extension options: 1004 _Switch(["-ungapped", "ungapped"], 1005 "Perform ungapped alignment only?"), 1006 _Switch(["-use_sw_tback", "use_sw_tback"], 1007 "Compute locally optimal Smith-Waterman alignments?"), 1008 ] 1009 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1010 1011
1012 -class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
1013 """Wrapper for the NCBI BLAST+ program tblastn. 1014 1015 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1016 replaced the old blastall tool with separate tools for each of the searches. 1017 This wrapper therefore replaces BlastallCommandline with option -p tblastn. 1018 1019 >>> from Bio.Blast.Applications import NcbitblastnCommandline 1020 >>> cline = NcbitblastnCommandline(help=True) 1021 >>> cline 1022 NcbitblastnCommandline(cmd='tblastn', help=True) 1023 >>> print(cline) 1024 tblastn -help 1025 1026 You would typically run the command line with cline() or via the Python 1027 subprocess module, as described in the Biopython tutorial. 1028 """
1029 - def __init__(self, cmd="tblastn", **kwargs):
1030 self.parameters = [ 1031 #General search options: 1032 _Option(["-db_gencode", "db_gencode"], 1033 """Genetic code to use to translate query 1034 1035 Integer. Default is one.""", 1036 equate=False), 1037 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 1038 """Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE). 1039 1040 This was removed in BLAST 2.2.27+""", 1041 equate=False), 1042 _Option(["-max_intron_length", "max_intron_length"], 1043 """Maximum intron length (integer). 1044 1045 Length of the largest intron allowed in a translated nucleotide 1046 sequence when linking multiple distinct alignments (a negative 1047 value disables linking). Default zero.""", 1048 equate=False), 1049 _Option(["-matrix", "matrix"], 1050 "Scoring matrix name (default BLOSUM62).", 1051 equate=False), 1052 _Option(["-threshold", "threshold"], 1053 "Minimum word score such that the word is added to the BLAST lookup table (float)", 1054 equate=False), 1055 _Option(["-comp_based_stats", "comp_based_stats"], 1056 """Use composition-based statistics (string, default 2, i.e. True). 1057 1058 0, F or f: no composition-based statistics 1059 1: Composition-based statistics as in NAR 29:2994-3005, 2001 1060 2, T or t, D or d : Composition-based score adjustment as in 1061 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 1062 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 1063 2005, unconditionally 1064 1065 Note that only tblastn supports values of 1 and 3.""", 1066 checker_function=lambda value : value in "0Ft12TtDd3", 1067 equate=False), 1068 #Query filtering options: 1069 _Option(["-seg", "seg"], 1070 """Filter query sequence with SEG (string). 1071 1072 Format: "yes", "window locut hicut", or "no" to disable. 1073 Default is "12 2.2 2.5""", 1074 equate=False), 1075 #Extension options: 1076 _Switch(["-ungapped", "ungapped"], 1077 "Perform ungapped alignment only?"), 1078 #Miscellaneous options: 1079 _Switch(["-use_sw_tback", "use_sw_tback"], 1080 "Compute locally optimal Smith-Waterman alignments?"), 1081 #PSI-TBLASTN options: 1082 _Option(["-in_pssm", "in_pssm"], 1083 """PSI-BLAST checkpoint file 1084 1085 Incompatible with: remote, query""", 1086 filename=True, 1087 equate=False), 1088 ] 1089 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1090 1091
1092 -class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
1093 """Wrapper for the NCBI BLAST+ program tblastx. 1094 1095 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1096 replaced the old blastall tool with separate tools for each of the searches. 1097 This wrapper therefore replaces BlastallCommandline with option -p tblastx. 1098 1099 >>> from Bio.Blast.Applications import NcbitblastxCommandline 1100 >>> cline = NcbitblastxCommandline(help=True) 1101 >>> cline 1102 NcbitblastxCommandline(cmd='tblastx', help=True) 1103 >>> print(cline) 1104 tblastx -help 1105 1106 You would typically run the command line with cline() or via the Python 1107 subprocess module, as described in the Biopython tutorial. 1108 """
1109 - def __init__(self, cmd="tblastx", **kwargs):
1110 self.parameters = [ 1111 #Input query options: 1112 _Option(["-strand", "strand"], 1113 """Query strand(s) to search against database/subject. 1114 1115 Values allowed are "both" (default), "minus", "plus".""", 1116 checker_function=lambda value : value in ["both", "minus", "plus"], 1117 equate=False), 1118 #Input query options: 1119 _Option(["-query_gencode", "query_gencode"], 1120 """Genetic code to use to translate query 1121 1122 Integer. Default is one.""", 1123 equate=False), 1124 #General search options: 1125 _Option(["-db_gencode", "db_gencode"], 1126 """Genetic code to use to translate query 1127 1128 Integer. Default is one.""", 1129 equate=False), 1130 _Option(["-max_intron_length", "max_intron_length"], 1131 """Maximum intron length (integer). 1132 1133 Length of the largest intron allowed in a translated nucleotide 1134 sequence when linking multiple distinct alignments (a negative 1135 value disables linking). Default zero.""", 1136 equate=False), 1137 _Option(["-matrix", "matrix"], 1138 "Scoring matrix name (default BLOSUM62).", 1139 equate=False), 1140 _Option(["-threshold", "threshold"], 1141 "Minimum word score such that the word is added to the " 1142 "BLAST lookup table (float)", 1143 equate=False), 1144 #Query filtering options: 1145 _Option(["-seg", "seg"], 1146 """Filter query sequence with SEG (string). 1147 1148 Format: "yes", "window locut hicut", or "no" to disable. 1149 Default is "12 2.2 2.5""", 1150 equate=False), 1151 ] 1152 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1153 1154
1155 -class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline):
1156 """Wrapper for the NCBI BLAST+ program psiblast. 1157 1158 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1159 replaced the old blastpgp tool with a similar tool psiblast. This wrapper 1160 therefore replaces BlastpgpCommandline, the wrapper for blastpgp. 1161 1162 >>> from Bio.Blast.Applications import NcbipsiblastCommandline 1163 >>> cline = NcbipsiblastCommandline(help=True) 1164 >>> cline 1165 NcbipsiblastCommandline(cmd='psiblast', help=True) 1166 >>> print(cline) 1167 psiblast -help 1168 1169 You would typically run the command line with cline() or via the Python 1170 subprocess module, as described in the Biopython tutorial. 1171 """
1172 - def __init__(self, cmd="psiblast", **kwargs):
1173 self.parameters = [ 1174 #General search options: 1175 _Option(["-matrix", "matrix"], 1176 "Scoring matrix name (default BLOSUM62).", 1177 equate=False), 1178 _Option(["-threshold", "threshold"], 1179 "Minimum word score such that the word is added to the " 1180 "BLAST lookup table (float)", 1181 equate=False), 1182 _Option(["-comp_based_stats", "comp_based_stats"], 1183 """Use composition-based statistics (string, default 2, i.e. True). 1184 1185 0, F or f: no composition-based statistics 1186 2, T or t, D or d : Composition-based score adjustment 1187 as in Bioinformatics 21:902-911, 2005, conditioned on 1188 sequence properties 1189 1190 Note that tblastn also supports values of 1 and 3.""", 1191 checker_function=lambda value : value in "0Ft2TtDd", 1192 equate=False), 1193 #Query filtering options: 1194 _Option(["-seg", "seg"], 1195 """Filter query sequence with SEG (string). 1196 1197 Format: "yes", "window locut hicut", or "no" to disable. 1198 Default is "12 2.2 2.5""", 1199 equate=False), 1200 #Extension options: 1201 _Option(["-gap_trigger", "gap_trigger"], 1202 "Number of bits to trigger gapping (float, default 22)", 1203 equate=False), 1204 #Miscellaneous options: 1205 _Switch(["-use_sw_tback", "use_sw_tback"], 1206 "Compute locally optimal Smith-Waterman alignments?"), 1207 #PSI-BLAST options: 1208 _Option(["-num_iterations", "num_iterations"], 1209 """Number of iterations to perform, integer 1210 1211 Integer of at least one. Default is one. 1212 Incompatible with: remote""", 1213 equate=False), 1214 _Option(["-out_pssm", "out_pssm"], 1215 "File name to store checkpoint file", 1216 filename=True, 1217 equate=False), 1218 _Option(["-out_ascii_pssm", "out_ascii_pssm"], 1219 "File name to store ASCII version of PSSM", 1220 filename=True, 1221 equate=False), 1222 _Option(["-in_msa", "in_msa"], 1223 """File name of multiple sequence alignment to restart 1224 PSI-BLAST 1225 1226 Incompatible with: in_pssm, query""", 1227 filename=True, 1228 equate=False), 1229 _Option(["-msa_master_idx", "msa_master_idx"], 1230 """Index of sequence to use as master in MSA. 1231 1232 Index (1-based) of sequence to use as the master in the 1233 multiple sequence alignment. If not specified, the first 1234 sequence is used.""", 1235 equate=False), 1236 _Option(["-in_pssm", "in_pssm"], 1237 """PSI-BLAST checkpoint file 1238 1239 Incompatible with: in_msa, query, phi_pattern""", 1240 filename=True, 1241 equate=False), 1242 #PSSM engine options: 1243 _Option(["-pseudocount", "pseudocount"], 1244 """Pseudo-count value used when constructing PSSM 1245 1246 Integer. Default is zero.""", 1247 equate=False), 1248 _Option(["-inclusion_ethresh", "inclusion_ethresh"], 1249 """E-value inclusion threshold for pairwise alignments 1250 1251 Float. Default is 0.002.""", 1252 equate=False), 1253 _Switch(["-ignore_msa_master", "ignore_msa_master"], 1254 """Ignore the master sequence when creating PSSM 1255 1256 * Requires: in_msa 1257 * Incompatible with: msa_master_idx, in_pssm, query, 1258 query_loc, phi_pattern 1259 """), 1260 #PHI-BLAST options: 1261 _Option(["-phi_pattern", "phi_pattern"], 1262 """File name containing pattern to search 1263 1264 Incompatible with: in_pssm""", 1265 filename=True, 1266 equate=False), 1267 ] 1268 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1269
1270 - def _validate(self):
1271 incompatibles = {"num_iterations": ["remote"], 1272 "in_msa": ["in_pssm", "query"], 1273 "in_pssm": ["in_msa", "query", "phi_pattern"], 1274 "ignore_msa_master": ["msa_master_idx", "in_pssm", 1275 "query", "query_loc", "phi_pattern"], 1276 } 1277 self._validate_incompatibilities(incompatibles) 1278 _Ncbiblast2SeqCommandline._validate(self)
1279 1280
1281 -class NcbirpsblastCommandline(_NcbiblastCommandline):
1282 """Wrapper for the NCBI BLAST+ program rpsblast. 1283 1284 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1285 replaced the old rpsblast tool with a similar tool of the same name. This 1286 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast. 1287 1288 >>> from Bio.Blast.Applications import NcbirpsblastCommandline 1289 >>> cline = NcbirpsblastCommandline(help=True) 1290 >>> cline 1291 NcbirpsblastCommandline(cmd='rpsblast', help=True) 1292 >>> print(cline) 1293 rpsblast -help 1294 1295 You would typically run the command line with cline() or via the Python 1296 subprocess module, as described in the Biopython tutorial. 1297 """
1298 - def __init__(self, cmd="rpsblast", **kwargs):
1299 self.parameters = [ 1300 #Query filtering options: 1301 _Option(["-seg", "seg"], 1302 """Filter query sequence with SEG (string). 1303 1304 Format: "yes", "window locut hicut", or "no" to disable. 1305 Default is "12 2.2 2.5""", 1306 equate=False), 1307 #Restrict search or results: 1308 _Option(["-culling_limit", "culling_limit"], 1309 """Hit culling limit (integer). 1310 1311 If the query range of a hit is enveloped by that of at 1312 least this many higher-scoring hits, delete the hit. 1313 1314 Incompatible with: best_hit_overhang, best_hit_score_edge. 1315 """, 1316 equate=False), 1317 _Option(["-best_hit_overhang", "best_hit_overhang"], 1318 """Best Hit algorithm overhang value (recommended value: 0.1) 1319 1320 Float between 0.0 and 0.5 inclusive. 1321 1322 Incompatible with: culling_limit.""", 1323 equate=False), 1324 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 1325 """Best Hit algorithm score edge value (recommended value: 0.1) 1326 1327 Float between 0.0 and 0.5 inclusive. 1328 1329 Incompatible with: culling_limit.""", 1330 equate=False), 1331 #General search options: 1332 _Option(["-comp_based_stats", "comp_based_stats"], 1333 """Use composition-based statistics. 1334 1335 D or d: default (equivalent to 0 ) 1336 0 or F or f: Simplified Composition-based statistics as in 1337 Bioinformatics 15:1000-1011, 1999 1338 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 1339 1340 Default = `0' 1341 """, 1342 checker_function=lambda value : value in "Dd0Ff1Tt", 1343 equate=False), 1344 #Misc options: 1345 _Switch(["-use_sw_tback", "use_sw_tback"], 1346 "Compute locally optimal Smith-Waterman alignments?"), 1347 ] 1348 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1349
1350 - def _validate(self):
1351 incompatibles = {"culling_limit":["best_hit_overhang", "best_hit_score_edge"]} 1352 self._validate_incompatibilities(incompatibles) 1353 _NcbiblastCommandline._validate(self)
1354 1355
1356 -class NcbirpstblastnCommandline(_NcbiblastCommandline):
1357 """Wrapper for the NCBI BLAST+ program rpstblastn. 1358 1359 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1360 replaced the old rpsblast tool with a similar tool of the same name, and a 1361 separate tool rpstblastn for Translated Reverse Position Specific BLAST. 1362 1363 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline 1364 >>> cline = NcbirpstblastnCommandline(help=True) 1365 >>> cline 1366 NcbirpstblastnCommandline(cmd='rpstblastn', help=True) 1367 >>> print(cline) 1368 rpstblastn -help 1369 1370 You would typically run the command line with cline() or via the Python 1371 subprocess module, as described in the Biopython tutorial. 1372 """
1373 - def __init__(self, cmd="rpstblastn", **kwargs):
1374 self.parameters = [ 1375 #Input query options: 1376 _Option(["-strand", "strand"], 1377 """Query strand(s) to search against database/subject. 1378 1379 Values allowed are "both" (default), "minus", "plus".""", 1380 checker_function=lambda value : value in ["both", 1381 "minus", 1382 "plus"], 1383 equate=False), 1384 #Input query options: 1385 _Option(["-query_gencode", "query_gencode"], 1386 """Genetic code to use to translate query 1387 1388 Integer. Default is one.""", 1389 equate=False), 1390 #Query filtering options: 1391 _Option(["-seg", "seg"], 1392 """Filter query sequence with SEG (string). 1393 1394 Format: "yes", "window locut hicut", or "no" to disable. 1395 Default is "12 2.2 2.5""", 1396 equate=False), 1397 #Extension options: 1398 _Switch(["-ungapped", "ungapped"], 1399 "Perform ungapped alignment only?"), 1400 ] 1401 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1402 1403
1404 -class NcbiblastformatterCommandline(_NcbibaseblastCommandline):
1405 """Wrapper for the NCBI BLAST+ program blast_formatter. 1406 1407 With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++ 1408 instead of C), the NCBI added the ASN.1 output format option to all the 1409 search tools, and extended the blast_formatter to support this as input. 1410 1411 The blast_formatter command allows you to convert the ASN.1 output into 1412 the other output formats (XML, tabular, plain text, HTML). 1413 1414 >>> from Bio.Blast.Applications import NcbiblastformatterCommandline 1415 >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml") 1416 >>> cline 1417 NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn') 1418 >>> print(cline) 1419 blast_formatter -out example.xml -outfmt 5 -archive example.asn 1420 1421 You would typically run the command line with cline() or via the Python 1422 subprocess module, as described in the Biopython tutorial. 1423 1424 Note that this wrapper is for the version of blast_formatter from BLAST 1425 2.2.24+ (or later) which is when the NCBI first announced the inclusion 1426 this tool. There was actually an early version in BLAST 2.2.23+ (and 1427 possibly in older releases) but this did not have the -archive option 1428 (instead -rid is a mandatory argument), and is not supported by this 1429 wrapper. 1430 """
1431 - def __init__(self, cmd="blast_formatter", **kwargs):
1432 self.parameters = [ 1433 # Input options 1434 _Option(["-rid", "rid"], 1435 "BLAST Request ID (RID), not compatible with archive arg", 1436 equate=False), 1437 _Option(["-archive", "archive"], 1438 "Archive file of results, not compatible with rid arg.", 1439 filename=True, 1440 equate=False), 1441 # Restrict search or results 1442 _Option(["-max_target_seqs", "max_target_seqs"], 1443 "Maximum number of aligned sequences to keep", 1444 checker_function=lambda value: value >= 1, 1445 equate=False), 1446 ] 1447 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
1448
1449 - def _validate(self):
1450 incompatibles = {"rid":["archive"]} 1451 self._validate_incompatibilities(incompatibles) 1452 _NcbibaseblastCommandline._validate(self)
1453 1454
1455 -def _test():
1456 """Run the Bio.Blast.Applications module's doctests.""" 1457 import doctest 1458 doctest.testmod(verbose=1)
1459 1460 if __name__ == "__main__": 1461 #Run the doctests 1462 _test() 1463