Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

   1  # Copyright 2001 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # Revisions copyright 2010 by Phillip Garland. 
   4  # All rights reserved. 
   5  # This code is part of the Biopython distribution and governed by its 
   6  # license.  Please see the LICENSE file that should have been included 
   7  # as part of this package. 
   8  """Definitions for interacting with BLAST related applications. 
   9   
  10  Wrappers for the new NCBI BLAST+ tools (written in C++): 
  11   
  12  - NcbiblastpCommandline - Protein-Protein BLAST 
  13  - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST 
  14  - NcbiblastxCommandline - Translated Query-Protein Subject BLAST 
  15  - NcbitblastnCommandline - Protein Query-Translated Subject BLAST 
  16  - NcbitblastxCommandline - Translated Query-Protein Subject BLAST 
  17  - NcbipsiblastCommandline - Position-Specific Initiated BLAST 
  18  - NcbirpsblastCommandline - Reverse Position Specific BLAST 
  19  - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST 
  20  - NcbideltablastCommandline - Protein-Protein domain enhanced lookup time accelerated blast 
  21  - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats 
  22   
  23  For further details, see: 
  24   
  25  Camacho et al. BLAST+: architecture and applications 
  26  BMC Bioinformatics 2009, 10:421 
  27  doi:10.1186/1471-2105-10-421 
  28  """ 
  29  from __future__ import print_function 
  30   
  31  from Bio.Application import _Option, AbstractCommandline, _Switch 
  32   
  33   
34 -class _NcbibaseblastCommandline(AbstractCommandline):
35 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 36 37 This is provided for subclassing, it deals with shared options 38 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc 39 AND blast_formatter). 40 """
41 - def __init__(self, cmd=None, **kwargs):
42 assert cmd is not None 43 extra_parameters = [ 44 #Core: 45 _Switch(["-h", "h"], 46 "Print USAGE and DESCRIPTION; ignore other arguments."), 47 _Switch(["-help", "help"], 48 "Print USAGE, DESCRIPTION and ARGUMENTS description; " 49 "ignore other arguments."), 50 _Switch(["-version", "version"], 51 "Print version number; ignore other arguments."), 52 # Output configuration options 53 _Option(["-out", "out"], 54 "Output file for alignment.", 55 filename=True, 56 equate=False), 57 #Formatting options: 58 _Option(["-outfmt", "outfmt"], 59 "Alignment view. Integer 0-11. Use 5 for XML output " 60 "(differs from classic BLAST which used 7 for XML).", 61 equate=False), 62 #TODO - Document and test the column options 63 _Switch(["-show_gis", "show_gis"], 64 "Show NCBI GIs in deflines?"), 65 _Option(["-num_descriptions", "num_descriptions"], 66 """Number of database sequences to show one-line descriptions for. 67 68 Integer argument (at least zero). Default is 500. 69 See also num_alignments.""", 70 equate=False), 71 _Option(["-num_alignments", "num_alignments"], 72 """Number of database sequences to show num_alignments for. 73 74 Integer argument (at least zero). Default is 200. 75 See also num_alignments.""", 76 equate=False), 77 _Switch(["-html", "html"], 78 "Produce HTML output? See also the outfmt option."), 79 #Miscellaneous options 80 _Switch(["-parse_deflines", "parse_deflines"], 81 "Should the query and subject defline(s) be parsed?"), 82 ] 83 try: 84 #Insert extra parameters - at the start just in case there 85 #are any arguments which must come last: 86 self.parameters = extra_parameters + self.parameters 87 except AttributeError: 88 #Should we raise an error? The subclass should have set this up! 89 self.parameters = extra_parameters 90 AbstractCommandline.__init__(self, cmd, **kwargs)
91
92 - def _validate_incompatibilities(self, incompatibles):
93 """Used by the BLAST+ _validate method (PRIVATE).""" 94 for a in incompatibles: 95 if self._get_parameter(a): 96 for b in incompatibles[a]: 97 if self._get_parameter(b): 98 raise ValueError("Options %s and %s are incompatible." 99 % (a, b))
100 101
102 -class _NcbiblastCommandline(_NcbibaseblastCommandline):
103 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 104 105 This is provided for subclassing, it deals with shared options 106 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). 107 """
108 - def __init__(self, cmd=None, **kwargs):
109 assert cmd is not None 110 extra_parameters = [ 111 #Input query options: 112 _Option(["-query", "query"], 113 "The sequence to search with.", 114 filename=True, 115 equate=False), # Should this be required? 116 _Option(["-query_loc", "query_loc"], 117 "Location on the query sequence (Format: start-stop)", 118 equate=False), 119 #General search options: 120 _Option(["-db", "db"], 121 "The database to BLAST against.", 122 equate=False), 123 _Option(["-evalue", "evalue"], 124 "Expectation value cutoff.", 125 equate=False), 126 _Option(["-word_size", "word_size"], 127 """Word size for wordfinder algorithm. 128 129 Integer. Minimum 2.""", 130 equate=False), 131 #BLAST-2-Sequences options: 132 # - see subclass 133 #Formatting options: 134 # - see baseclass 135 #Query filtering options 136 # TODO -soft_masking <Boolean>, is this a switch or an option? 137 #_Switch(["-soft_masking", "soft_masking"], 138 # "Apply filtering locations as soft masks?"), 139 _Switch(["-lcase_masking", "lcase_masking"], 140 "Use lower case filtering in query and subject sequence(s)?"), 141 #Restrict search or results 142 _Option(["-gilist", "gilist"], 143 """Restrict search of database to list of GI's. 144 145 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", 146 filename=True, 147 equate=False), 148 _Option(["-negative_gilist", "negative_gilist"], 149 """Restrict search of database to everything except the listed GIs. 150 151 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", 152 filename=True, 153 equate=False), 154 _Option(["-seqidlist", "seqidlist"], 155 """Restrict search of database to list of SeqID's. 156 157 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", 158 filename=True, 159 equate=False), 160 _Option(["-entrez_query", "entrez_query"], 161 "Restrict search with the given Entrez query (requires remote).", 162 equate=False), 163 _Option(["-max_target_seqs", "max_target_seqs"], 164 "Maximum number of aligned sequences to keep (integer, at least one).", 165 equate=False), 166 #Statistical options 167 _Option(["-dbsize", "dbsize"], 168 "Effective length of the database (integer).", 169 equate=False), 170 _Option(["-searchsp", "searchsp"], 171 "Effective length of the search space (integer).", 172 equate=False), 173 _Option(["-max_hsps_per_subject", "max_hsps_per_subject"], 174 "Override maximum number of HSPs per subject to save for ungapped searches (integer).", 175 equate=False), 176 _Option(["-max_hsps", "max_hsps"], 177 "Set maximum number of HSPs per subject sequence to save (default 0 means no limit).", 178 equate=False), 179 _Switch(["-sum_statistics", "sum_statistics"], 180 "Use sum statistics."), 181 #Extension options 182 _Option(["-xdrop_ungap", "xdrop_ungap"], 183 "X-dropoff value (in bits) for ungapped extensions (float).", 184 equate=False), 185 _Option(["-xdrop_gap", "xdrop_gap"], 186 "X-dropoff value (in bits) for preliminary gapped extensions (float).", 187 equate=False), 188 _Option(["-xdrop_gap_final", "xdrop_gap_final"], 189 "X-dropoff value (in bits) for final gapped alignment (float).", 190 equate=False), 191 _Option(["-window_size", "window_size"], 192 "Multiple hits window size, use 0 to specify 1-hit algorithm (integer).", 193 equate=False), 194 # Search strategy options 195 _Option(["-import_search_strategy", "import_search_strategy"], 196 """Search strategy to use. 197 198 Incompatible with: export_search_strategy""", 199 filename=True, 200 equate=False), 201 _Option(["-export_search_strategy", "export_search_strategy"], 202 """File name to record the search strategy used. 203 204 Incompatible with: import_search_strategy""", 205 filename=True, 206 equate=False), 207 #Miscellaneous options 208 _Option(["-num_threads", "num_threads"], 209 """Number of threads to use in the BLAST search (integer, at least one). 210 211 Default is one. 212 Incompatible with: remote""", 213 equate=False), 214 _Switch(["-remote", "remote"], 215 """Execute search remotely? 216 217 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""), 218 ] 219 try: 220 #Insert extra parameters - at the start just in case there 221 #are any arguments which must come last: 222 self.parameters = extra_parameters + self.parameters 223 except AttributeError: 224 #Should we raise an error? The subclass should have set this up! 225 self.parameters = extra_parameters 226 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
227
228 - def _validate(self):
229 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"], 230 "import_search_strategy" : ["export_search_strategy"], 231 "gilist":["negative_gilist"], 232 "seqidlist":["gilist", "negative_gilist", "remote"]} 233 self._validate_incompatibilities(incompatibles) 234 if self.entrez_query and not self.remote : 235 raise ValueError("Option entrez_query requires remote option.") 236 AbstractCommandline._validate(self)
237 238
239 -class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
240 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 241 242 This is provided for subclassing, it deals with shared options 243 common to all the BLAST tools supporting two-sequence BLAST 244 (blastn, psiblast, etc) but not rpsblast or rpstblastn. 245 """
246 - def __init__(self, cmd=None, **kwargs):
247 assert cmd is not None 248 extra_parameters = [ 249 #General search options: 250 _Option(["-gapopen", "gapopen"], 251 "Cost to open a gap (integer).", 252 equate=False), 253 _Option(["-gapextend", "gapextend"], 254 "Cost to extend a gap (integer).", 255 equate=False), 256 #BLAST-2-Sequences options: 257 _Option(["-subject", "subject"], 258 """Subject sequence(s) to search. 259 260 Incompatible with: db, gilist, negative_gilist. 261 See also subject_loc.""", 262 filename=True, 263 equate=False), 264 _Option(["-subject_loc", "subject_loc"], 265 """Location on the subject sequence (Format: start-stop). 266 267 Incompatible with: db, gilist, seqidlist, negative_gilist, 268 db_soft_mask, db_hard_mask, remote. 269 270 See also subject.""", 271 equate=False), 272 #Restrict search or results: 273 _Option(["-culling_limit", "culling_limit"], 274 """Hit culling limit (integer). 275 276 If the query range of a hit is enveloped by that of at 277 least this many higher-scoring hits, delete the hit. 278 279 Incompatible with: best_hit_overhang, best_hit_score_edge. 280 """, 281 equate=False), 282 _Option(["-best_hit_overhang", "best_hit_overhang"], 283 """Best Hit algorithm overhang value (float, recommended value: 0.1) 284 285 Float between 0.0 and 0.5 inclusive. 286 287 Incompatible with: culling_limit.""", 288 equate=False), 289 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 290 """Best Hit algorithm score edge value (float, recommended value: 0.1) 291 292 Float between 0.0 and 0.5 inclusive. 293 294 Incompatible with: culling_limit.""", 295 equate=False), 296 ] 297 try: 298 #Insert extra parameters - at the start just in case there 299 #are any arguments which must come last: 300 self.parameters = extra_parameters + self.parameters 301 except AttributeError: 302 #Should we raise an error? The subclass should have set this up! 303 self.parameters = extra_parameters 304 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
305
306 - def _validate(self):
307 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"], 308 "culling_limit":["best_hit_overhang", "best_hit_score_edge"], 309 "subject":["db", "gilist", "negative_gilist", "seqidlist"]} 310 self._validate_incompatibilities(incompatibles) 311 _NcbiblastCommandline._validate(self)
312 313
314 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
315 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 316 317 This is provided for subclassing, it deals with shared options 318 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn 319 but not psiblast, rpsblast or rpstblastn. 320 """
321 - def __init__(self, cmd=None, **kwargs):
322 assert cmd is not None 323 extra_parameters = [ 324 #Restrict search or results: 325 _Option(["-db_soft_mask", "db_soft_mask"], 326 """Filtering algorithm for soft masking (integer). 327 328 Filtering algorithm ID to apply to the BLAST database as soft masking. 329 330 Incompatible with: db_hard_mask, subject, subject_loc""", 331 equate=False), 332 _Option(["-db_hard_mask", "db_hard_mask"], 333 """Filtering algorithm for hard masking (integer). 334 335 Filtering algorithm ID to apply to the BLAST database as hard masking. 336 337 Incompatible with: db_soft_mask, subject, subject_loc""", 338 equate=False), 339 ] 340 try: 341 #Insert extra parameters - at the start just in case there 342 #are any arguments which must come last: 343 self.parameters = extra_parameters + self.parameters 344 except AttributeError: 345 #Should we raise an error? The subclass should have set this up! 346 self.parameters = extra_parameters 347 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
348
349 - def _validate(self):
350 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"], 351 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]} 352 self._validate_incompatibilities(incompatibles) 353 _Ncbiblast2SeqCommandline._validate(self)
354 355
356 -class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
357 """Create a commandline for the NCBI BLAST+ program blastp (for proteins). 358 359 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 360 replaced the old blastall tool with separate tools for each of the searches. 361 This wrapper therefore replaces BlastallCommandline with option -p blastp. 362 363 >>> from Bio.Blast.Applications import NcbiblastpCommandline 364 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", 365 ... evalue=0.001, remote=True, ungapped=True) 366 >>> cline 367 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) 368 >>> print(cline) 369 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped 370 371 You would typically run the command line with cline() or via the Python 372 subprocess module, as described in the Biopython tutorial. 373 """
374 - def __init__(self, cmd="blastp", **kwargs):
375 self.parameters = [ 376 #General search options: 377 _Option(["-task", "task"], 378 "Task to execute (string, blastp (default) or blastp-short).", 379 checker_function=lambda value : value in ["blastp", 380 "blastp-short"], 381 equate=False), 382 _Option(["-matrix", "matrix"], 383 "Scoring matrix name (default BLOSUM62)."), 384 _Option(["-threshold", "threshold"], 385 "Minimum score for words to be added to the BLAST lookup table (float).", 386 equate=False), 387 _Option(["-comp_based_stats", "comp_based_stats"], 388 """Use composition-based statistics (string, default 2, i.e. True). 389 390 0, F or f: no composition-based statistics 391 2, T or t, D or d : Composition-based score adjustment as in 392 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 393 394 Note that tblastn also supports values of 1 and 3.""", 395 checker_function=lambda value : value in "0Ft2TtDd", 396 equate=False), 397 #Query filtering options: 398 _Option(["-seg", "seg"], 399 """Filter query sequence with SEG (string). 400 401 Format: "yes", "window locut hicut", or "no" to disable. 402 Default is "12 2.2 2.5""", 403 equate=False), 404 #Extension options: 405 _Switch(["-ungapped", "ungapped"], 406 "Perform ungapped alignment only?"), 407 #Miscellaneous options: 408 _Switch(["-use_sw_tback", "use_sw_tback"], 409 "Compute locally optimal Smith-Waterman alignments?"), 410 ] 411 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
412 413
414 -class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
415 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). 416 417 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 418 replaced the old blastall tool with separate tools for each of the searches. 419 This wrapper therefore replaces BlastallCommandline with option -p blastn. 420 421 For example, to run a search against the "nt" nucleotide database using the 422 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value 423 cut off of 0.001, saving the output to a file in XML format: 424 425 >>> from Bio.Blast.Applications import NcbiblastnCommandline 426 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", 427 ... evalue=0.001, out="m_cold.xml", outfmt=5) 428 >>> cline 429 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus') 430 >>> print(cline) 431 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus 432 433 You would typically run the command line with cline() or via the Python 434 subprocess module, as described in the Biopython tutorial. 435 """
436 - def __init__(self, cmd="blastn", **kwargs):
437 self.parameters = [ 438 #Input query options: 439 _Option(["-strand", "strand"], 440 """Query strand(s) to search against database/subject. 441 442 Values allowed are "both" (default), "minus", "plus".""", 443 checker_function=lambda value : value in ["both", 444 "minus", 445 "plus"], 446 equate=False), 447 #General search options: 448 _Option(["-task", "task"], 449 """Task to execute (string, default 'megablast') 450 451 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' 452 (the default), or 'vecscreen'.""", 453 checker_function=lambda value : value in ['blastn', 454 'blastn-short', 455 'dc-megablast', 456 'megablast', 457 'vecscreen'], 458 equate=False), 459 _Option(["-penalty", "penalty"], 460 "Penalty for a nucleotide mismatch (integer, at most zero).", 461 equate=False), 462 _Option(["-reward", "reward"], 463 "Reward for a nucleotide match (integer, at least zero).", 464 equate=False), 465 #TODO - Does this need an argument or is it a switch? 466 #_Option(["-use_index", "use_index"], 467 # "Use MegaBLAST database index (boolean).", 468 # equate=False), 469 _Option(["-index_name", "index_name"], 470 "MegaBLAST database index name.", 471 equate=False), 472 #Query filtering options: 473 _Option(["-dust", "dust"], 474 """Filter query sequence with DUST (string). 475 476 Format: 'yes', 'level window linker', or 'no' to disable. 477 Default = '20 64 1'. 478 """, 479 equate=False), 480 _Option(["-filtering_db", "filtering_db"], 481 "BLAST database containing filtering elements (i.e. repeats).", 482 equate=False), 483 _Option(["-window_masker_taxid", "window_masker_taxid"], 484 "Enable WindowMasker filtering using a Taxonomic ID (integer).", 485 equate=False), 486 _Option(["-window_masker_db", "window_masker_db"], 487 "Enable WindowMasker filtering using this repeats database (string).", 488 equate=False), 489 #Restrict search or results: 490 _Option(["-perc_identity", "perc_identity"], 491 "Percent identity (real, 0 to 100 inclusive).", 492 equate=False), 493 #Discontiguous MegaBLAST options 494 _Option(["-template_type", "template_type"], 495 """Discontiguous MegaBLAST template type (string). 496 497 Allowed values: 'coding', 'coding_and_optimal' or 'optimal' 498 Requires: template_length.""", 499 checker_function=lambda value : value in ['coding', 'coding_and_optimal', 'optimal'], 500 equate=False), 501 _Option(["-template_length", "template_length"], 502 """Discontiguous MegaBLAST template length (integer). 503 504 Allowed values: 16, 18, 21 505 506 Requires: template_type.""", 507 checker_function=lambda value : value in [16, 18, 21, '16', '18', '21'], 508 equate=False), 509 #Extension options: 510 _Switch(["-no_greedy", "no_greedy"], 511 "Use non-greedy dynamic programming extension"), 512 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], 513 "Minimum raw gapped score to keep an alignment in the " 514 "preliminary gapped and traceback stages (integer).", 515 equate=False), 516 _Switch(["-ungapped", "ungapped"], 517 "Perform ungapped alignment only?"), 518 _Option(["-off_diagonal_range", "off_diagonal_range"], 519 """Number of off-diagonals to search for the 2nd hit (integer). 520 521 Expects a positive integer, or 0 (default) to turn off. 522 523 Added in BLAST 2.2.23+ 524 """, 525 equate=False), 526 ] 527 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
528
529 - def _validate(self):
530 if (self.template_type and not self.template_length) \ 531 or (self.template_length and not self.template_type) : 532 raise ValueError("Options template_type and template_type require each other.") 533 _NcbiblastMain2SeqCommandline._validate(self)
534 535
536 -class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
537 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database). 538 539 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 540 replaced the old blastall tool with separate tools for each of the searches. 541 This wrapper therefore replaces BlastallCommandline with option -p blastx. 542 543 >>> from Bio.Blast.Applications import NcbiblastxCommandline 544 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001) 545 >>> cline 546 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001) 547 >>> print(cline) 548 blastx -query m_cold.fasta -db nr -evalue 0.001 549 550 You would typically run the command line with cline() or via the Python 551 subprocess module, as described in the Biopython tutorial. 552 """
553 - def __init__(self, cmd="blastx", **kwargs):
554 self.parameters = [ 555 #Input query options: 556 _Option(["-strand", "strand"], 557 """Query strand(s) to search against database/subject. 558 559 Values allowed are "both" (default), "minus", "plus".""", 560 checker_function=lambda value : value in ["both", "minus", "plus"], 561 equate=False), 562 #Input query options: 563 _Option(["-query_gencode", "query_gencode"], 564 "Genetic code to use to translate query (integer, default 1).", 565 equate=False), 566 #General search options: 567 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 568 """Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE). 569 570 This was removed in BLAST 2.2.27+""", 571 equate=False), 572 _Option(["-max_intron_length", "max_intron_length"], 573 """Maximum intron length (integer). 574 575 Length of the largest intron allowed in a translated nucleotide 576 sequence when linking multiple distinct alignments (a negative 577 value disables linking). Default zero.""", 578 equate=False), 579 _Option(["-matrix", "matrix"], 580 "Scoring matrix name (default BLOSUM62).", 581 equate=False), 582 _Option(["-threshold", "threshold"], 583 "Minimum score for words to be added to the BLAST lookup table (float).", 584 equate=False), 585 _Option(["-comp_based_stats", "comp_based_stats"], 586 """Use composition-based statistics for blastp, blastx, or tblastn: 587 588 D or d: default (equivalent to 2 ) 589 0 or F or f: no composition-based statistics 590 1: Composition-based statistics as in NAR 29:2994-3005, 2001 591 2 or T or t : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties 592 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally 593 594 For programs other than tblastn, must either be absent or be D, F or 0 595 Default = `2' 596 """, 597 equate=False), 598 #Query filtering options: 599 _Option(["-seg", "seg"], 600 """Filter query sequence with SEG (string). 601 602 Format: "yes", "window locut hicut", or "no" to disable. 603 Default is "12 2.2 2.5""", 604 equate=False), 605 #Extension options: 606 _Switch(["-ungapped", "ungapped"], 607 "Perform ungapped alignment only?"), 608 _Switch(["-use_sw_tback", "use_sw_tback"], 609 "Compute locally optimal Smith-Waterman alignments?"), 610 ] 611 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
612 613
614 -class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
615 """Wrapper for the NCBI BLAST+ program tblastn. 616 617 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 618 replaced the old blastall tool with separate tools for each of the searches. 619 This wrapper therefore replaces BlastallCommandline with option -p tblastn. 620 621 >>> from Bio.Blast.Applications import NcbitblastnCommandline 622 >>> cline = NcbitblastnCommandline(help=True) 623 >>> cline 624 NcbitblastnCommandline(cmd='tblastn', help=True) 625 >>> print(cline) 626 tblastn -help 627 628 You would typically run the command line with cline() or via the Python 629 subprocess module, as described in the Biopython tutorial. 630 """
631 - def __init__(self, cmd="tblastn", **kwargs):
632 self.parameters = [ 633 #General search options: 634 _Option(["-db_gencode", "db_gencode"], 635 "Genetic code to use to translate query (integer, default 1).", 636 equate=False), 637 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 638 """Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE). 639 640 This was removed in BLAST 2.2.27+""", 641 equate=False), 642 _Option(["-max_intron_length", "max_intron_length"], 643 """Maximum intron length (integer). 644 645 Length of the largest intron allowed in a translated nucleotide 646 sequence when linking multiple distinct alignments (a negative 647 value disables linking). Default zero.""", 648 equate=False), 649 _Option(["-matrix", "matrix"], 650 "Scoring matrix name (default BLOSUM62).", 651 equate=False), 652 _Option(["-threshold", "threshold"], 653 "Minimum score for words to be added to the BLAST lookup table (float).", 654 equate=False), 655 _Option(["-comp_based_stats", "comp_based_stats"], 656 """Use composition-based statistics (string, default 2, i.e. True). 657 658 0, F or f: no composition-based statistics 659 1: Composition-based statistics as in NAR 29:2994-3005, 2001 660 2, T or t, D or d : Composition-based score adjustment as in 661 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 662 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 663 2005, unconditionally 664 665 Note that only tblastn supports values of 1 and 3.""", 666 checker_function=lambda value : value in "0Ft12TtDd3", 667 equate=False), 668 #Query filtering options: 669 _Option(["-seg", "seg"], 670 """Filter query sequence with SEG (string). 671 672 Format: "yes", "window locut hicut", or "no" to disable. 673 Default is "12 2.2 2.5""", 674 equate=False), 675 #Extension options: 676 _Switch(["-ungapped", "ungapped"], 677 "Perform ungapped alignment only?"), 678 #Miscellaneous options: 679 _Switch(["-use_sw_tback", "use_sw_tback"], 680 "Compute locally optimal Smith-Waterman alignments?"), 681 #PSI-TBLASTN options: 682 _Option(["-in_pssm", "in_pssm"], 683 """PSI-BLAST checkpoint file 684 685 Incompatible with: remote, query""", 686 filename=True, 687 equate=False), 688 ] 689 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
690 691
692 -class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
693 """Wrapper for the NCBI BLAST+ program tblastx. 694 695 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 696 replaced the old blastall tool with separate tools for each of the searches. 697 This wrapper therefore replaces BlastallCommandline with option -p tblastx. 698 699 >>> from Bio.Blast.Applications import NcbitblastxCommandline 700 >>> cline = NcbitblastxCommandline(help=True) 701 >>> cline 702 NcbitblastxCommandline(cmd='tblastx', help=True) 703 >>> print(cline) 704 tblastx -help 705 706 You would typically run the command line with cline() or via the Python 707 subprocess module, as described in the Biopython tutorial. 708 """
709 - def __init__(self, cmd="tblastx", **kwargs):
710 self.parameters = [ 711 #Input query options: 712 _Option(["-strand", "strand"], 713 """Query strand(s) to search against database/subject. 714 715 Values allowed are "both" (default), "minus", "plus".""", 716 checker_function=lambda value : value in ["both", "minus", "plus"], 717 equate=False), 718 #Input query options: 719 _Option(["-query_gencode", "query_gencode"], 720 "Genetic code to use to translate query (integer, default 1).", 721 equate=False), 722 #General search options: 723 _Option(["-db_gencode", "db_gencode"], 724 "Genetic code to use to translate query (integer, default 1).", 725 equate=False), 726 _Option(["-max_intron_length", "max_intron_length"], 727 """Maximum intron length (integer). 728 729 Length of the largest intron allowed in a translated nucleotide 730 sequence when linking multiple distinct alignments (a negative 731 value disables linking). Default zero.""", 732 equate=False), 733 _Option(["-matrix", "matrix"], 734 "Scoring matrix name (default BLOSUM62).", 735 equate=False), 736 _Option(["-threshold", "threshold"], 737 "Minimum score for words to be added to the BLAST lookup table (float).", 738 equate=False), 739 #Query filtering options: 740 _Option(["-seg", "seg"], 741 """Filter query sequence with SEG (string). 742 743 Format: "yes", "window locut hicut", or "no" to disable. 744 Default is "12 2.2 2.5""", 745 equate=False), 746 ] 747 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
748 749
750 -class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline):
751 """Wrapper for the NCBI BLAST+ program psiblast. 752 753 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 754 replaced the old blastpgp tool with a similar tool psiblast. This wrapper 755 therefore replaces BlastpgpCommandline, the wrapper for blastpgp. 756 757 >>> from Bio.Blast.Applications import NcbipsiblastCommandline 758 >>> cline = NcbipsiblastCommandline(help=True) 759 >>> cline 760 NcbipsiblastCommandline(cmd='psiblast', help=True) 761 >>> print(cline) 762 psiblast -help 763 764 You would typically run the command line with cline() or via the Python 765 subprocess module, as described in the Biopython tutorial. 766 """
767 - def __init__(self, cmd="psiblast", **kwargs):
768 self.parameters = [ 769 #General search options: 770 _Option(["-matrix", "matrix"], 771 "Scoring matrix name (default BLOSUM62).", 772 equate=False), 773 _Option(["-threshold", "threshold"], 774 "Minimum score for words to be added to the BLAST lookup table (float).", 775 equate=False), 776 _Option(["-comp_based_stats", "comp_based_stats"], 777 """Use composition-based statistics (string, default 2, i.e. True). 778 779 0, F or f: no composition-based statistics 780 2, T or t, D or d : Composition-based score adjustment 781 as in Bioinformatics 21:902-911, 2005, conditioned on 782 sequence properties 783 784 Note that tblastn also supports values of 1 and 3.""", 785 checker_function=lambda value : value in "0Ft2TtDd", 786 equate=False), 787 #Query filtering options: 788 _Option(["-seg", "seg"], 789 """Filter query sequence with SEG (string). 790 791 Format: "yes", "window locut hicut", or "no" to disable. 792 Default is "12 2.2 2.5""", 793 equate=False), 794 #Extension options: 795 _Option(["-gap_trigger", "gap_trigger"], 796 "Number of bits to trigger gapping (float, default 22).", 797 equate=False), 798 #Miscellaneous options: 799 _Switch(["-use_sw_tback", "use_sw_tback"], 800 "Compute locally optimal Smith-Waterman alignments?"), 801 #PSI-BLAST options: 802 _Option(["-num_iterations", "num_iterations"], 803 """Number of iterations to perform (integer, at least one). 804 805 Default is one. 806 Incompatible with: remote""", 807 equate=False), 808 _Option(["-out_pssm", "out_pssm"], 809 "File name to store checkpoint file.", 810 filename=True, 811 equate=False), 812 _Option(["-out_ascii_pssm", "out_ascii_pssm"], 813 "File name to store ASCII version of PSSM.", 814 filename=True, 815 equate=False), 816 _Option(["-in_msa", "in_msa"], 817 """File name of multiple sequence alignment to restart PSI-BLAST. 818 819 Incompatible with: in_pssm, query""", 820 filename=True, 821 equate=False), 822 _Option(["-msa_master_idx", "msa_master_idx"], 823 """Index of sequence to use as master in MSA. 824 825 Index (1-based) of sequence to use as the master in the 826 multiple sequence alignment. If not specified, the first 827 sequence is used.""", 828 equate=False), 829 _Option(["-in_pssm", "in_pssm"], 830 """PSI-BLAST checkpoint file. 831 832 Incompatible with: in_msa, query, phi_pattern""", 833 filename=True, 834 equate=False), 835 #PSSM engine options: 836 _Option(["-pseudocount", "pseudocount"], 837 """Pseudo-count value used when constructing PSSM. 838 839 Integer. Default is zero.""", 840 equate=False), 841 _Option(["-inclusion_ethresh", "inclusion_ethresh"], 842 "E-value inclusion threshold for pairwise alignments (float, default 0.002).", 843 equate=False), 844 _Switch(["-ignore_msa_master", "ignore_msa_master"], 845 """Ignore the master sequence when creating PSSM 846 847 Requires: in_msa 848 Incompatible with: msa_master_idx, in_pssm, query, query_loc, phi_pattern 849 """), 850 #PHI-BLAST options: 851 _Option(["-phi_pattern", "phi_pattern"], 852 """File name containing pattern to search. 853 854 Incompatible with: in_pssm""", 855 filename=True, 856 equate=False), 857 ] 858 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
859
860 - def _validate(self):
861 incompatibles = {"num_iterations": ["remote"], 862 "in_msa": ["in_pssm", "query"], 863 "in_pssm": ["in_msa", "query", "phi_pattern"], 864 "ignore_msa_master": ["msa_master_idx", "in_pssm", 865 "query", "query_loc", "phi_pattern"], 866 } 867 self._validate_incompatibilities(incompatibles) 868 _Ncbiblast2SeqCommandline._validate(self)
869 870
871 -class NcbirpsblastCommandline(_NcbiblastCommandline):
872 """Wrapper for the NCBI BLAST+ program rpsblast. 873 874 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 875 replaced the old rpsblast tool with a similar tool of the same name. This 876 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast. 877 878 >>> from Bio.Blast.Applications import NcbirpsblastCommandline 879 >>> cline = NcbirpsblastCommandline(help=True) 880 >>> cline 881 NcbirpsblastCommandline(cmd='rpsblast', help=True) 882 >>> print(cline) 883 rpsblast -help 884 885 You would typically run the command line with cline() or via the Python 886 subprocess module, as described in the Biopython tutorial. 887 """
888 - def __init__(self, cmd="rpsblast", **kwargs):
889 self.parameters = [ 890 #Query filtering options: 891 _Option(["-seg", "seg"], 892 """Filter query sequence with SEG (string). 893 894 Format: "yes", "window locut hicut", or "no" to disable. 895 Default is "12 2.2 2.5""", 896 equate=False), 897 #Restrict search or results: 898 _Option(["-culling_limit", "culling_limit"], 899 """Hit culling limit (integer). 900 901 If the query range of a hit is enveloped by that of at 902 least this many higher-scoring hits, delete the hit. 903 904 Incompatible with: best_hit_overhang, best_hit_score_edge. 905 """, 906 equate=False), 907 _Option(["-best_hit_overhang", "best_hit_overhang"], 908 """Best Hit algorithm overhang value (recommended value: 0.1) 909 910 Float between 0.0 and 0.5 inclusive. 911 912 Incompatible with: culling_limit.""", 913 equate=False), 914 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 915 """Best Hit algorithm score edge value (recommended value: 0.1) 916 917 Float between 0.0 and 0.5 inclusive. 918 919 Incompatible with: culling_limit.""", 920 equate=False), 921 #General search options: 922 _Option(["-comp_based_stats", "comp_based_stats"], 923 """Use composition-based statistics. 924 925 D or d: default (equivalent to 0 ) 926 0 or F or f: Simplified Composition-based statistics as in 927 Bioinformatics 15:1000-1011, 1999 928 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 929 930 Default = `0' 931 """, 932 checker_function=lambda value : value in "Dd0Ff1Tt", 933 equate=False), 934 #Misc options: 935 _Switch(["-use_sw_tback", "use_sw_tback"], 936 "Compute locally optimal Smith-Waterman alignments?"), 937 ] 938 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
939
940 - def _validate(self):
941 incompatibles = {"culling_limit":["best_hit_overhang", "best_hit_score_edge"]} 942 self._validate_incompatibilities(incompatibles) 943 _NcbiblastCommandline._validate(self)
944 945
946 -class NcbirpstblastnCommandline(_NcbiblastCommandline):
947 """Wrapper for the NCBI BLAST+ program rpstblastn. 948 949 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 950 replaced the old rpsblast tool with a similar tool of the same name, and a 951 separate tool rpstblastn for Translated Reverse Position Specific BLAST. 952 953 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline 954 >>> cline = NcbirpstblastnCommandline(help=True) 955 >>> cline 956 NcbirpstblastnCommandline(cmd='rpstblastn', help=True) 957 >>> print(cline) 958 rpstblastn -help 959 960 You would typically run the command line with cline() or via the Python 961 subprocess module, as described in the Biopython tutorial. 962 """
963 - def __init__(self, cmd="rpstblastn", **kwargs):
964 self.parameters = [ 965 #Input query options: 966 _Option(["-strand", "strand"], 967 """Query strand(s) to search against database/subject. 968 969 Values allowed are "both" (default), "minus", "plus".""", 970 checker_function=lambda value : value in ["both", 971 "minus", 972 "plus"], 973 equate=False), 974 #Input query options: 975 _Option(["-query_gencode", "query_gencode"], 976 "Genetic code to use to translate query (integer, default 1).", 977 equate=False), 978 #Query filtering options: 979 _Option(["-seg", "seg"], 980 """Filter query sequence with SEG (string). 981 982 Format: "yes", "window locut hicut", or "no" to disable. 983 Default is "12 2.2 2.5""", 984 equate=False), 985 #Extension options: 986 _Switch(["-ungapped", "ungapped"], 987 "Perform ungapped alignment only?"), 988 ] 989 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
990 991
992 -class NcbiblastformatterCommandline(_NcbibaseblastCommandline):
993 """Wrapper for the NCBI BLAST+ program blast_formatter. 994 995 With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++ 996 instead of C), the NCBI added the ASN.1 output format option to all the 997 search tools, and extended the blast_formatter to support this as input. 998 999 The blast_formatter command allows you to convert the ASN.1 output into 1000 the other output formats (XML, tabular, plain text, HTML). 1001 1002 >>> from Bio.Blast.Applications import NcbiblastformatterCommandline 1003 >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml") 1004 >>> cline 1005 NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn') 1006 >>> print(cline) 1007 blast_formatter -out example.xml -outfmt 5 -archive example.asn 1008 1009 You would typically run the command line with cline() or via the Python 1010 subprocess module, as described in the Biopython tutorial. 1011 1012 Note that this wrapper is for the version of blast_formatter from BLAST 1013 2.2.24+ (or later) which is when the NCBI first announced the inclusion 1014 this tool. There was actually an early version in BLAST 2.2.23+ (and 1015 possibly in older releases) but this did not have the -archive option 1016 (instead -rid is a mandatory argument), and is not supported by this 1017 wrapper. 1018 """
1019 - def __init__(self, cmd="blast_formatter", **kwargs):
1020 self.parameters = [ 1021 # Input options 1022 _Option(["-rid", "rid"], 1023 "BLAST Request ID (RID), not compatible with archive arg", 1024 equate=False), 1025 _Option(["-archive", "archive"], 1026 "Archive file of results, not compatible with rid arg.", 1027 filename=True, 1028 equate=False), 1029 # Restrict search or results 1030 _Option(["-max_target_seqs", "max_target_seqs"], 1031 "Maximum number of aligned sequences to keep", 1032 checker_function=lambda value: value >= 1, 1033 equate=False), 1034 ] 1035 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
1036
1037 - def _validate(self):
1038 incompatibles = {"rid":["archive"]} 1039 self._validate_incompatibilities(incompatibles) 1040 _NcbibaseblastCommandline._validate(self)
1041 1042
1043 -class NcbideltablastCommandline(_Ncbiblast2SeqCommandline):
1044 """Create a commandline for the NCBI BLAST+ program deltablast (for proteins). 1045 1046 This is a wrapper for the deltablast command line command included in 1047 the NCBI BLAST+ software (not present in the original BLAST). 1048 1049 >>> from Bio.Blast.Applications import NcbideltablastCommandline 1050 >>> cline = NcbideltablastCommandline(query="rosemary.pro", db="nr", 1051 ... evalue=0.001, remote=True) 1052 >>> cline 1053 NcbideltablastCommandline(cmd='deltablast', query='rosemary.pro', db='nr', evalue=0.001, remote=True) 1054 >>> print(cline) 1055 deltablast -query rosemary.pro -db nr -evalue 0.001 -remote 1056 1057 You would typically run the command line with cline() or via the Python 1058 subprocess module, as described in the Biopython tutorial. 1059 """
1060 - def __init__(self, cmd="deltablast", **kwargs):
1061 self.parameters = [ 1062 #General search options: 1063 _Option(["-matrix", "matrix"], 1064 "Scoring matrix name (default BLOSUM62)."), 1065 _Option(["-threshold", "threshold"], 1066 "Minimum score for words to be added to the BLAST lookup table (float).", 1067 equate=False), 1068 _Option(["-comp_based_stats", "comp_based_stats"], 1069 """Use composition-based statistics (string, default 2, i.e. True). 1070 1071 0, F or f: no composition-based statistics. 1072 2, T or t, D or d : Composition-based score adjustment as in 1073 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 1074 1075 Note that tblastn also supports values of 1 and 3.""", 1076 checker_function=lambda value : value in "0Ft2TtDd", 1077 equate=False), 1078 #Query filtering options: 1079 _Option(["-seg", "seg"], 1080 """Filter query sequence with SEG (string). 1081 1082 Format: "yes", "window locut hicut", or "no" to disable. 1083 Default is "12 2.2 2.5""", 1084 equate=False), 1085 #Extension options: 1086 _Option(["-gap_trigger", "gap_trigger"], 1087 "Number of bits to trigger gapping Default = 22", 1088 equate=False), 1089 #Miscellaneous options: 1090 _Switch(["-use_sw_tback", "use_sw_tback"], 1091 "Compute locally optimal Smith-Waterman alignments?"), 1092 #PSI-BLAST options 1093 _Option(["-num_iterations", "num_iterations"], 1094 """Number of iterations to perform. (integer >=1, Default is 1) 1095 1096 Incompatible with: remote""", 1097 equate=False), 1098 _Option(["-out_pssm", "out_pssm"], 1099 "File name to store checkpoint file.", 1100 filename=True, 1101 equate=False), 1102 _Option(["-out_ascii_pssm", "out_ascii_pssm"], 1103 "File name to store ASCII version of PSSM.", 1104 filename=True, 1105 equate=False), 1106 #PSSM engine options 1107 _Option(["-pseudocount", "pseudocount"], 1108 "Pseudo-count value used when constructing PSSM (integer, default 0).", 1109 equate=False), 1110 _Option(["-domain_inclusion_ethresh", "domain_inclusion_ethresh"], 1111 """E-value inclusion threshold for alignments with conserved domains. 1112 1113 (float, Default is 0.05)""", 1114 equate=False), 1115 _Option(["-inclusion_ethresh", "inclusion_ethresh"], 1116 "E-value inclusion threshold for pairwise alignments (float, Default is 0.002).", 1117 equate=False), 1118 #DELTA-BLAST options 1119 _Option(["-rpsdb", "rpsdb"], 1120 "BLAST domain database name (dtring, Default = 'cdd_delta').", 1121 equate=False), 1122 _Switch(["-show_domain_hits", "show_domain_hits"], 1123 """Show domain hits? 1124 1125 Incompatible with: remote, subject""") 1126 ] 1127 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1128 1129
1130 -def _test():
1131 """Run the Bio.Blast.Applications module's doctests.""" 1132 import doctest 1133 doctest.testmod(verbose=1)
1134 1135 if __name__ == "__main__": 1136 #Run the doctests 1137 _test() 1138