Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

   1  # Copyright 2001 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # All rights reserved. 
   4  # This code is part of the Biopython distribution and governed by its 
   5  # license.  Please see the LICENSE file that should have been included 
   6  # as part of this package. 
   7  """Definitions for interacting with BLAST related applications. 
   8   
   9  Obsolete wrappers for the old/classic NCBI BLAST tools (written in C): 
  10   
  11  - FastacmdCommandline 
  12  - BlastallCommandline 
  13  - BlastpgpCommandline 
  14  - RpsBlastCommandline 
  15   
  16  Wrappers for the new NCBI BLAST+ tools (written in C++): 
  17   
  18  - NcbiblastpCommandline - Protein-Protein BLAST 
  19  - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST 
  20  - NcbiblastxCommandline - Translated Query-Protein Subject BLAST 
  21  - NcbitblastnCommandline - Protein Query-Translated Subject BLAST 
  22  - NcbitblastxCommandline - Translated Query-Protein Subject BLAST 
  23  - NcbipsiblastCommandline - Position-Specific Initiated BLAST 
  24  - NcbirpsblastCommandline - Reverse Position Specific BLAST 
  25  - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST 
  26   
  27  For further details, see: 
  28   
  29  Camacho et al. BLAST+: architecture and applications 
  30  BMC Bioinformatics 2009, 10:421 
  31  doi:10.1186/1471-2105-10-421 
  32  """ 
  33  from Bio.Application import _Option, AbstractCommandline, _Switch 
  34   
35 -class FastacmdCommandline(AbstractCommandline):
36 """Create a commandline for the fasta program from NCBI (OBSOLETE). 37 38 """
39 - def __init__(self, cmd="fastacmd", **kwargs):
40 self.parameters = \ 41 [ 42 _Option(["-d", "database"], ["input"], None, 1, 43 "The database to retrieve from."), 44 _Option(["-s", "search_string"], ["input"], None, 1, 45 "The id to search for.") 46 ] 47 AbstractCommandline.__init__(self, cmd, **kwargs)
48 49
50 -class _BlastCommandLine(AbstractCommandline):
51 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE). 52 53 This is provided for subclassing, it deals with shared options 54 common to all the BLAST tools (blastall, rpsblast, blastpgp). 55 """
56 - def __init__(self, cmd=None, **kwargs):
57 assert cmd is not None 58 extra_parameters = [\ 59 _Switch(["--help", "help"], ["input"], 60 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), 61 _Option(["-d", "database"], ["input"], None, 1, 62 "The database to BLAST against.", False), 63 _Option(["-i", "infile"], ["input", "file"], None, 1, 64 "The sequence to search with.", False), 65 _Option(["-e", "expectation"], ["input"], None, 0, 66 "Expectation value cutoff.", False), 67 _Option(["-m", "align_view"], ["input"], None, 0, 68 "Alignment view. Integer 0-11. Use 7 for XML output.", 69 False), 70 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0, 71 "Output file for alignment.", False), 72 _Option(["-y", "xdrop_extension"], ["input"], None, 0, 73 "Dropoff for blast extensions.", False), 74 _Option(["-F", "filter"], ["input"], None, 0, 75 "Filter query sequence with SEG? T/F", False), 76 _Option(["-X", "xdrop"], ["input"], None, 0, 77 "Dropoff value (bits) for gapped alignments."), 78 _Option(["-I", "show_gi"], ["input"], None, 0, 79 "Show GI's in deflines? T/F", False), 80 _Option(["-J", "believe_query"], ["input"], None, 0, 81 "Believe the query defline? T/F", False), 82 _Option(["-Z", "xdrop_final"], ["input"], None, 0, 83 "X dropoff for final gapped alignment.", False), 84 _Option(["-z", "db_length"], ["input"], None, 0, 85 "Effective database length.", False), 86 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0, 87 "seqalign file to output.", False), 88 _Option(["-v", "descriptions"], ["input"], None, 0, 89 "Number of one-line descriptions.", False), 90 _Option(["-b", "alignments"], ["input"], None, 0, 91 "Number of alignments.", False), 92 _Option(["-Y", "search_length"], ["input"], None, 0, 93 "Effective length of search space (use zero for the " + \ 94 "real size).", False), 95 _Option(["-T", "html"], ["input"], None, 0, 96 "Produce HTML output? T/F", False), 97 _Option(["-U", "case_filter"], ["input"], None, 0, 98 "Use lower case filtering of FASTA sequence? T/F", False), 99 100 _Option(["-a", "nprocessors"], ["input"], None, 0, 101 "Number of processors to use.", False), 102 _Option(["-g", "gapped"], ["input"], None, 0, 103 "Whether to do a gapped alignment. T/F", False), 104 ] 105 try: 106 #Insert extra parameters - at the start just in case there 107 #are any arguments which must come last: 108 self.parameters = extra_parameters + self.parameters 109 except AttributeError: 110 #Should we raise an error? The subclass should have set this up! 111 self.parameters = extra_parameters 112 AbstractCommandline.__init__(self, cmd, **kwargs)
113
114 - def _validate(self):
115 if self.help: 116 #Don't want to check the normally mandatory arguments like db 117 return 118 AbstractCommandline._validate(self)
119 120
121 -class _BlastAllOrPgpCommandLine(_BlastCommandLine):
122 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 123 124 This is provided for subclassing, it deals with shared options 125 common to all the blastall and blastpgp tools (but not rpsblast). 126 """
127 - def __init__(self, cmd=None, **kwargs):
128 assert cmd is not None 129 extra_parameters = [\ 130 _Option(["-G", "gap_open"], ["input"], None, 0, 131 "Gap open penalty", False), 132 _Option(["-E", "gap_extend"], ["input"], None, 0, 133 "Gap extension penalty", False), 134 _Option(["-A", "window_size"], ["input"], None, 0, 135 "Multiple hits window size", False), 136 _Option(["-f", "hit_extend"], ["input"], None, 0, 137 "Threshold for extending hits.", False), 138 _Option(["-K", "keep_hits"], ["input"], None, 0, 139 " Number of best hits from a region to keep.", False), 140 _Option(["-W", "wordsize"], ["input"], None, 0, 141 "Word size", False), 142 _Option(["-P", "passes"], ["input"], None, 0, 143 "Hits/passes. Integer 0-2. 0 for multiple hit, " 144 "1 for single hit (does not apply to blastn)", False), 145 ] 146 try: 147 #Insert extra parameters - at the start just in case there 148 #are any arguments which must come last: 149 self.parameters = extra_parameters + self.parameters 150 except AttributeError: 151 #Should we raise an error? The subclass should have set this up! 152 self.parameters = extra_parameters 153 _BlastCommandLine.__init__(self, cmd, **kwargs)
154 155
156 -class BlastallCommandline(_BlastAllOrPgpCommandLine):
157 """Create a commandline for the blastall program from NCBI (OBSOLETE). 158 159 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 160 are replacing blastall with separate tools blastn, blastp, blastx, tblastn 161 and tblastx. 162 163 Like blastall, this wrapper is now obsolete, and will be deprecated and 164 removed in a future release of Biopython. 165 166 >>> from Bio.Blast.Applications import BlastallCommandline 167 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta", 168 ... database="nr", expectation=0.001) 169 >>> cline 170 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx') 171 >>> print cline 172 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx 173 174 You would typically run the command line with the Python subprocess module, 175 as described in the Biopython tutorial. 176 """ 177 #TODO - This could use more checking for valid parameters to the program.
178 - def __init__(self, cmd="blastall",**kwargs):
179 self.parameters = [ \ 180 #Sorted in the same order as the output from blastall --help 181 #which should make it easier to keep them up to date in future. 182 #Note that some arguments are defined the the base clases (above). 183 _Option(["-p", "program"], ["input"], None, 1, 184 "The blast program to use (e.g. blastp, blastn).", False), 185 _Option(["-q", "nuc_mismatch"], ["input"], None, 0, 186 "Penalty for a nucleotide mismatch (blastn only).", False), 187 _Option(["-r", "nuc_match"], ["input"], None, 0, 188 "Reward for a nucleotide match (blastn only).", False), 189 _Option(["-Q", "query_genetic_code"], ["input"], None, 0, 190 "Query Genetic code to use.", False), 191 _Option(["-D", "db_genetic_code"], ["input"], None, 0, 192 "DB Genetic code (for tblast[nx] only).", False), 193 _Option(["-M", "matrix"], ["input"], None, 0, 194 "Matrix to use", False), 195 _Option(["-S", "strands"], ["input"], None, 0, 196 "Query strands to search against database (for blast[nx], " + \ 197 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False), 198 _Option(["-l", "restrict_gi"], ["input"], None, 0, 199 "Restrict search of database to list of GI's.", False), 200 _Option(["-R", "checkpoint"], ["input", "file"], None, 0, 201 "PSI-TBLASTN checkpoint input file.", False), 202 _Option(["-n", "megablast"], ["input"], None, 0, 203 "MegaBlast search T/F.", False), 204 #The old name "region_length" is for consistency with our 205 #old blastall function wrapper: 206 _Option(["-L", "region_length", "range_restriction"], ["input"], 207 None, 0, 208 """Location on query sequence (string format start,end). 209 210 In older versions of BLAST, -L set the length of region 211 used to judge hits (see -K parameter).""", False), 212 _Option(["-w", "frame_shit_penalty"], ["input"], None, 0, 213 "Frame shift penalty (OOF algorithm for blastx).", False), 214 _Option(["-t", "largest_intron"], ["input"], None, 0, 215 "Length of the largest intron allowed in a translated " + \ 216 "nucleotide sequence when linking multiple distinct " + \ 217 "alignments. (0 invokes default behavior; a negative value " + \ 218 "disables linking.)", False), 219 _Option(["-B", "num_concatenated_queries"], ["input"], None, 0, 220 "Number of concatenated queries, for blastn and tblastn.", 221 False), 222 _Option(["-V", "oldengine"], ["input"], None, 0, 223 "Force use of the legacy BLAST engine.", False), 224 _Option(["-C", "composition_based"], ["input"], None, 0, 225 """Use composition-based statistics for tblastn: 226 D or d: default (equivalent to F) 227 0 or F or f: no composition-based statistics 228 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 229 2: Composition-based score adjustment as in Bioinformatics 230 21:902-911, 2005, conditioned on sequence properties 231 3: Composition-based score adjustment as in Bioinformatics 232 21:902-911, 2005, unconditionally 233 For programs other than tblastn, must either be absent or be 234 D, F or 0.""", False), 235 _Option(["-s", "smith_waterman"], ["input"], None, 0, 236 "Compute locally optimal Smith-Waterman alignments (This " + \ 237 "option is only available for gapped tblastn.) T/F", False), 238 ] 239 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
240 241
242 -class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
243 """Create a commandline for the blastpgp program from NCBI (OBSOLETE). 244 245 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 246 are replacing blastpgp with a renamed tool psiblast. This module provides 247 NcbipsiblastCommandline as a wrapper for the new tool psiblast. 248 249 Like blastpgp (and blastall), this wrapper is now obsolete, and will be 250 deprecated and removed in a future release of Biopython. 251 252 >>> from Bio.Blast.Applications import BlastpgpCommandline 253 >>> cline = BlastpgpCommandline(help=True) 254 >>> cline 255 BlastpgpCommandline(cmd='blastpgp', help=True) 256 >>> print cline 257 blastpgp --help 258 259 You would typically run the command line with the Python subprocess module, 260 as described in the Biopython tutorial. 261 """
262 - def __init__(self, cmd="blastpgp",**kwargs):
263 self.parameters = [ \ 264 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0, 265 "Output file for PSI-BLAST checkpointing.", False), 266 _Option(["-R", "restart_infile"], ["input", "file"], None, 0, 267 "Input file for PSI-BLAST restart.", False), 268 _Option(["-k", "hit_infile"], ["input", "file"], None, 0, 269 "Hit file for PHI-BLAST.", False), 270 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0, 271 "Output file for PSI-BLAST matrix in ASCII.", False), 272 _Option(["-B", "align_infile"], ["input", "file"], None, 0, 273 "Input alignment file for PSI-BLAST restart.", False), 274 _Option(["-S", "required_start"], ["input"], None, 0, 275 "Start of required region in query.", False), 276 _Option(["-H", "required_end"], ["input"], None, 0, 277 "End of required region in query.", False), 278 _Option(["-j", "npasses"], ["input"], None, 0, 279 "Number of passes", False), 280 _Option(["-N", "nbits_gapping"], ["input"], None, 0, 281 "Number of bits to trigger gapping.", False), 282 _Option(["-c", "pseudocounts"], ["input"], None, 0, 283 "Pseudocounts constants for multiple passes.", False), 284 _Option(["-h", "model_threshold"], ["input"], None, 0, 285 "E-value threshold to include in multipass model.", False), 286 #Does the old name "region_length" for -L make sense? 287 _Option(["-L", "region_length"], ["input"], None, 0, 288 "Cost to decline alignment (disabled when zero).", False), 289 _Option(["-M", "matrix"], ["input"], None, 0, 290 "Matrix (string, default BLOSUM62).", False), 291 _Option(["-p", "program"], ["input"], None, 1, 292 "The blast program to use (e.g blastpgp, patseedp or seedp).", False), 293 ] 294 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
295 296
297 -class RpsBlastCommandline(_BlastCommandLine):
298 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE). 299 300 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 301 are replacing the old rpsblast with a new version of the same name plus a 302 second tool rpstblastn, both taking different command line arguments. This 303 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as 304 wrappers for the new tools. 305 306 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will 307 be deprecated and removed in a future release of Biopython. 308 309 >>> from Bio.Blast.Applications import RpsBlastCommandline 310 >>> cline = RpsBlastCommandline(help=True) 311 >>> cline 312 RpsBlastCommandline(cmd='rpsblast', help=True) 313 >>> print cline 314 rpsblast --help 315 316 You would typically run the command line with the Python subprocess module, 317 as described in the Biopython tutorial. 318 """
319 - def __init__(self, cmd="rpsblast",**kwargs):
320 self.parameters = [ \ 321 #Note -N is also in blastpgp, but not blastall 322 _Option(["-N", "nbits_gapping"], ["input"], None, 0, 323 "Number of bits to trigger gapping.", False), 324 #Note blastall and blastpgp wrappers have -P with name "passes". 325 #If this is the same thing, we should be consistent! 326 _Option(["-P", "multihit"], ["input"], None, 0, 327 "0 for multiple hit, 1 for single hit", False), 328 _Option(["-l", "logfile"], ["output", "file"], None, 0, 329 "Logfile name.", False), 330 _Option(["-p", "protein"], ["input"], None, 0, 331 "Query sequence is protein. T/F", False), 332 _Option(["-L", "range_restriction"], ["input"], None, 0, 333 "Location on query sequence (string format start,end).", 334 False), 335 ] 336 _BlastCommandLine.__init__(self, cmd, **kwargs)
337 338
339 -class _NcbiblastCommandline(AbstractCommandline):
340 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 341 342 This is provided for subclassing, it deals with shared options 343 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). 344 """
345 - def __init__(self, cmd=None, **kwargs):
346 assert cmd is not None 347 extra_parameters = [ \ 348 #Core: 349 _Switch(["-h", "h"], ["input"], 350 "Print USAGE and DESCRIPTION; ignore other arguments."), 351 _Switch(["-help", "help"], ["input"], 352 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), 353 _Switch(["-version", "version"], ["input"], 354 "Print version number; ignore other arguments."), 355 #Input query options: 356 _Option(["-query", "query"], ["input", "file"], None, 0, 357 "The sequence to search with.", False), #Should this be required? 358 _Option(["-query_loc", "query_loc"], ["input"], None, 0, 359 "Location on the query sequence (Format: start-stop)", False), 360 #General search options: 361 _Option(["-db", "db"], ["input"], None, 0, 362 "The database to BLAST against.", False), #Should this be required? 363 _Option(["-out", "out"], ["output", "file"], None, 0, 364 "Output file for alignment.", False), 365 _Option(["-evalue", "evalue"], ["input"], None, 0, 366 "Expectation value cutoff.", False), 367 _Option(["-word_size","word_size"], ["input"], None, 0, 368 """Word size for wordfinder algorithm. 369 370 Integer. Minimum 2.""", False), 371 #BLAST-2-Sequences options: 372 # - see subclass 373 #Formatting options: 374 _Option(["-outfmt", "outfmt"], ["input"], None, 0, 375 "Alignment view. Integer 0-10. Use 5 for XML output (differs from classic BLAST which used 7 for XML).", 376 False), #Did not include old aliases as meaning has changed! 377 _Switch(["-show_gis","show_gis"], ["input"], 378 "Show NCBI GIs in deflines?"), 379 _Option(["-num_descriptions","num_descriptions"], ["input"], None, 0, 380 """Number of database sequences to show one-line descriptions for. 381 382 Integer argument (at least zero). Default is 500. 383 See also num_alignments.""", False), 384 _Option(["-num_alignments","num_alignments"], ["input"], None, 0, 385 """Number of database sequences to show num_alignments for. 386 387 Integer argument (at least zero). Default is 200. 388 See also num_alignments.""", False), 389 _Switch(["-html", "html"], ["input"], 390 "Produce HTML output? See also the outfmt option."), 391 #Query filtering options 392 # TODO -soft_masking <Boolean>, is this a switch or an option? 393 #_Switch(["-soft_masking", "soft_masking"], ["input"], 394 # "Apply filtering locations as soft masks?"), 395 _Switch(["-lcase_masking", "lcase_masking"], ["input"], 396 "Use lower case filtering in query and subject sequence(s)?"), 397 #Restrict search or results 398 _Option(["-gilist", "gilist"], ["input", "file"], None, 0, 399 """Restrict search of database to list of GI's. 400 401 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", 402 False), 403 _Option(["-negative_gilist", "negative_gilist"], ["input", "file"], None, 0, 404 """Restrict search of database to everything except the listed GIs. 405 406 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", 407 False), 408 _Option(["-seqidlist", "seqidlist"], ["input", "file"], None, 0, 409 """Restrict search of database to list of SeqID's. 410 411 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", 412 False), 413 _Option(["-entrez_query", "entrez_query"], ["input"], None, 0, 414 "Restrict search with the given Entrez query (requires remote).", False), 415 _Option(["-max_target_seqs", "max_target_seqs"], ["input"], None, 0, 416 """Maximum number of aligned sequences to keep. 417 418 Integer argument (at least one).""", False), 419 #Statistical options 420 _Option(["-dbsize", "dbsize"], ["input"], None, 0, 421 "Effective length of the database (integer)", False), 422 _Option(["-searchsp", "searchsp"], ["input"], None, 0, 423 "Effective length of the search space (integer)", False), 424 #Extension options 425 _Option(["-xdrop_ungap", "xdrop_ungap"], ["input"], None, 0, 426 "X-dropoff value (in bits) for ungapped extensions. Float.", 427 False), 428 _Option(["-xdrop_gap", "xdrop_gap"], ["input"], None, 0, 429 "X-dropoff value (in bits) for preliminary gapped extensions. Float.", 430 False), 431 _Option(["-xdrop_gap_final", "xdrop_gap_final"], ["input"], None, 0, 432 "X-dropoff value (in bits) for final gapped alignment. Float.", 433 False), 434 _Option(["-window_size", "window_size"], ["input"], None, 0, 435 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.", 436 False), 437 # Search strategy options 438 _Option(["-import_search_strategy", "import_search_strategy"], 439 ["input", "file"], None, 0, 440 """Search strategy to use. 441 442 Incompatible with: export_search_strategy""", False), 443 _Option(["-export_search_strategy", "export_search_strategy"], 444 ["output", "file"], None, 0, 445 """File name to record the search strategy used. 446 447 Incompatible with: import_search_strategy""", False), 448 #Miscellaneous options 449 _Switch(["-parse_deflines", "parse_deflines"], ["input"], 450 "Should the query and subject defline(s) be parsed?"), 451 _Option(["-num_threads", "num_threads"], ["input"], None, 0, 452 """Number of threads to use in the BLAST search. 453 454 Integer of at least one. Default is one. 455 Incompatible with: remote""", False), 456 _Switch(["-remote", "remote"], ["input"], 457 """Execute search remotely? 458 459 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""), 460 ] 461 try: 462 #Insert extra parameters - at the start just in case there 463 #are any arguments which must come last: 464 self.parameters = extra_parameters + self.parameters 465 except AttributeError: 466 #Should we raise an error? The subclass should have set this up! 467 self.parameters = extra_parameters 468 AbstractCommandline.__init__(self, cmd, **kwargs)
469
470 - def _validate(self):
471 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"], 472 "import_search_strategy" : ["export_search_strategy"], 473 "gilist":["negative_gilist"], 474 "seqidlist":["gilist", "negative_gilist", "remote"]} 475 self._validate_incompatibilities(incompatibles) 476 if self.entrez_query and not self.remote : 477 raise ValueError("Option entrez_query requires remote option.") 478 AbstractCommandline._validate(self)
479
480 - def _validate_incompatibilities(self, incompatibles):
481 for a in incompatibles: 482 if self._get_parameter(a): 483 for b in incompatibles[a]: 484 if self._get_parameter(b): 485 raise ValueError("Options %s and %s are incompatible." \ 486 % (a,b))
487
488 -class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
489 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 490 491 This is provided for subclassing, it deals with shared options 492 common to all the BLAST tools supporting two-sequence BLAST 493 (blastn, psiblast, etc) but not rpsblast or rpstblastn. 494 """
495 - def __init__(self, cmd=None, **kwargs):
496 assert cmd is not None 497 extra_parameters = [ \ 498 #General search options: 499 _Option(["-gapopen", "gapopen"], ["input"], None, 0, 500 "Cost to open a gap (integer).", False), 501 _Option(["-gapextend", "gapextend"], ["input"], None, 0, 502 "Cost to extend a gap (integer).", False), 503 #BLAST-2-Sequences options: 504 _Option(["-subject", "subject"], ["input", "file"], None, 0, 505 """Subject sequence(s) to search. 506 507 Incompatible with: db, gilist, negative_gilist. 508 See also subject_loc.""", False), 509 _Option(["-subject_loc", "subject_loc"], ["input"], None, 0, 510 """Location on the subject sequence (Format: start-stop) 511 512 Incompatible with: db, gilist, negative_gilist, remote. 513 See also subject.""", False), 514 #Restrict search or results: 515 _Option(["-culling_limit", "culling_limit"], ["input"], None, 0, 516 """Hit culling limit (integer). 517 518 If the query range of a hit is enveloped by that of at least this many 519 higher-scoring hits, delete the hit. 520 521 Incompatible with: best_hit_overhang, best_hit_score_edge.""", False), 522 _Option(["-best_hit_overhang", "best_hit_overhang"], ["input"], None, 0, 523 """Best Hit algorithm overhang value (recommended value: 0.1) 524 525 Float between 0.0 and 0.5 inclusive. 526 527 Incompatible with: culling_limit.""", False), 528 _Option(["-best_hit_score_edge", "best_hit_score_edge"], ["input"], None, 0, 529 """Best Hit algorithm score edge value (recommended value: 0.1) 530 531 Float between 0.0 and 0.5 inclusive. 532 533 Incompatible with: culling_limit.""", False), ] 534 try: 535 #Insert extra parameters - at the start just in case there 536 #are any arguments which must come last: 537 self.parameters = extra_parameters + self.parameters 538 except AttributeError: 539 #Should we raise an error? The subclass should have set this up! 540 self.parameters = extra_parameters 541 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
542 543
544 - def _validate(self):
545 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"], 546 "culling_limit":["best_hit_overhang","best_hit_score_edge"], 547 "subject":["db", "gilist", "negative_gilist", "seqidlist"]} 548 self._validate_incompatibilities(incompatibles) 549 _NcbiblastCommandline._validate(self)
550
551 -class NcbiblastpCommandline(_Ncbiblast2SeqCommandline):
552 """Create a commandline for the NCBI BLAST+ program blastp (for proteins). 553 554 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 555 replaced the old blastall tool with separate tools for each of the searches. 556 This wrapper therefore replaces BlastallCommandline with option -p blastp. 557 558 >>> from Bio.Blast.Applications import NcbiblastpCommandline 559 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", 560 ... evalue=0.001, remote=True, ungapped=True) 561 >>> cline 562 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) 563 >>> print cline 564 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped 565 566 You would typically run the command line with the Python subprocess module, 567 as described in the Biopython tutorial. 568 """
569 - def __init__(self, cmd="blastp", **kwargs):
570 self.parameters = [ \ 571 #General search options: 572 _Option(["-task", "task"], ["input"], 573 lambda value : value in ["blastp", "blastp-short"], 0, 574 "Task to execute (string, blastp (default) or blastp-short).", False), 575 _Option(["-matrix", "matrix"], ["input"], None, 0, 576 "Scoring matrix name (default BLOSUM62).", False), 577 _Option(["-threshold", "threshold"], ["input"], None, 0, 578 "Minimum word score such that the word is added to the BLAST lookup table (float)", False), 579 _Option(["-comp_based_stats", "comp_based_stats"], ["input"], 580 lambda value : value in "0Ft2TtDd", 0, 581 """Use composition-based statistics (string, default 2, i.e. True). 582 583 0, F or f: no composition-based statistics 584 2, T or t, D or d : Composition-based score adjustment as in 585 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 586 587 Note that tblastn also supports values of 1 and 3.""", False), 588 #Query filtering options: 589 _Option(["-seg", "seg"], ["input"], None, 0, 590 """Filter query sequence with SEG (string). 591 592 Format: "yes", "window locut hicut", or "no" to disable. 593 Default is "12 2.2 2.5""", False), 594 #Restrict search or results: 595 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0, 596 """Filtering algorithm for soft masking (integer). 597 598 Filtering algorithm ID to apply to the BLAST database as soft masking. 599 600 Incompatible with: subject, subject_loc""", False), 601 #Extension options: 602 _Switch(["-ungapped", "ungapped"], ["input"], 603 "Perform ungapped alignment only?"), 604 #Miscellaneous options: 605 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"], 606 "Compute locally optimal Smith-Waterman alignments?"), 607 ] 608 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
609
610 - def _validate(self):
611 incompatibles = {"db_soft_mask":["subject", "subject_loc"]} 612 self._validate_incompatibilities(incompatibles) 613 _Ncbiblast2SeqCommandline._validate(self)
614 615
616 -class NcbiblastnCommandline(_Ncbiblast2SeqCommandline):
617 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). 618 619 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 620 replaced the old blastall tool with separate tools for each of the searches. 621 This wrapper therefore replaces BlastallCommandline with option -p blastn. 622 623 For example, to run a search against the "nt" nucleotide database using the 624 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value 625 cut off of 0.001, saving the output to a file in XML format: 626 627 >>> from Bio.Blast.Applications import NcbiblastnCommandline 628 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", 629 ... evalue=0.001, out="m_cold.xml", outfmt=5) 630 >>> cline 631 NcbiblastnCommandline(cmd='blastn', query='m_cold.fasta', db='nt', out='m_cold.xml', evalue=0.001, outfmt=5, strand='plus') 632 >>> print cline 633 blastn -query m_cold.fasta -db nt -out m_cold.xml -evalue 0.001 -outfmt 5 -strand plus 634 635 You would typically run the command line with the Python subprocess module, 636 as described in the Biopython tutorial. 637 """
638 - def __init__(self, cmd="blastn", **kwargs):
639 self.parameters = [ \ 640 #Input query options: 641 _Option(["-strand", "strand"], ["input"], 642 lambda value : value in ["both", "minus", "plus"],0, 643 """Query strand(s) to search against database/subject. 644 645 Values allowed are "both" (default), "minus", "plus".""", False), 646 #General search options: 647 _Option(["-task", "task"], ["input"], 648 lambda value : value in ['blastn', 'blastn-short', 'dc-megablast', 649 'megablast', 'vecscreen'], 0, 650 """Task to execute (string, default 'megablast') 651 652 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' 653 (the default), or 'vecscreen'.""", False), 654 _Option(["-penalty", "penalty"], ["input"], None, 0, 655 "Penalty for a nucleotide mismatch (integer, at most zero).", False), 656 _Option(["-reward", "reward"], ["input"], None, 0, 657 "Reward for a nucleotide match (integer, at least zero).", False), 658 #TODO - Does this need an argument or is it a switch? 659 #_Option(["-use_index", "use_index"], ["input"], None, 0, 660 # "Use MegaBLAST database index (boolean).", False), 661 _Option(["-index_name", "index_name"], ["input"], None, 0, 662 "MegaBLAST database index name.", False), 663 #Query filtering options: 664 _Option(["-dust", "dust"], ["input"], None, 0, 665 """Filter query sequence with DUST (string). 666 667 Format: 'yes', 'level window linker', or 'no' to disable. 668 Default = '20 64 1'. 669 """, False), 670 _Option(["-filtering_db", "filtering_db"], ["input"], None, 0, 671 "BLAST database containing filtering elements (i.e. repeats).", False), 672 _Option(["-window_masker_taxid", "window_masker_taxid"], ["input"], None, 0, 673 "Enable WindowMasker filtering using a Taxonomic ID (integer).", False), 674 _Option(["-window_masker_db", "window_masker_db"], ["input"], None, 0, 675 "Enable WindowMasker filtering using this repeats database (string).", False), 676 #Restrict search or results: 677 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0, 678 """Filtering algorithm for soft masking (integer). 679 680 Filtering algorithm ID to apply to the BLAST database as soft masking. 681 682 Incompatible with: subject, subject_loc""", False), 683 _Option(["-perc_identity", "perc_identity"], ["input"], None, 0, 684 "Percent identity (real, 0 to 100 inclusive).", False), 685 #Discontiguous MegaBLAST options 686 _Option(["-template_type", "template_type"], ["input"], 687 lambda value : value in ['coding', 'coding_and_optimal','optimal'], 0, 688 """Discontiguous MegaBLAST template type (string). 689 690 Allowed values: 'coding', 'coding_and_optimal' or 'optimal' 691 Requires: template_length.""", False), 692 _Option(["-template_length", "template_length"], ["input"], 693 lambda value : value in [16,18,21,'16','18','21'], 0, 694 """Discontiguous MegaBLAST template length (integer). 695 696 Allowed values: 16, 18, 21 697 698 Requires: template_type.""", False), 699 #Extension options: 700 _Switch(["-no_greedy", "no_greedy"], ["input"], 701 "Use non-greedy dynamic programming extension"), 702 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], ["input"], None, 0, 703 "Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).", False), 704 _Switch(["-ungapped", "ungapped"], ["input"], 705 "Perform ungapped alignment only?"), 706 _Option(["-off_diagonal_range", "off_diagonal_range"], ["input"], None, 0, 707 """Number of off-diagonals to search for the 2nd hit (integer). 708 709 Expects a positive integer, or 0 (default) to turn off. 710 711 Added in BLAST 2.2.23+ 712 """, False), 713 ] 714 _Ncbiblast2SeqCommandline.