Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # 
  3  # This file is part of the Biopython distribution and governed by your 
  4  # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 
  5  # Please see the LICENSE file that should have been included as part of this 
  6  # package. 
  7  """Record classes to hold BLAST output. 
  8   
  9  Classes: 
 10  Blast              Holds all the information from a blast search. 
 11  PSIBlast           Holds all the information from a psi-blast search. 
 12   
 13  Header             Holds information from the header. 
 14  Description        Holds information about one hit description. 
 15  Alignment          Holds information about one alignment hit. 
 16  HSP                Holds information about one HSP. 
 17  MultipleAlignment  Holds information about a multiple alignment. 
 18  DatabaseReport     Holds information from the database report. 
 19  Parameters         Holds information from the parameters. 
 20   
 21  """ 
 22  # XXX finish printable BLAST output 
 23   
 24  from Bio.Seq import Seq 
 25  from Bio.SeqRecord import SeqRecord 
 26  from Bio.Align import MultipleSeqAlignment 
 27   
 28   
29 -class Header(object):
30 """Saves information from a blast header. 31 32 Members: 33 application The name of the BLAST flavor that generated this data. 34 version Version of blast used. 35 date Date this data was generated. 36 reference Reference for blast. 37 38 query Name of query sequence. 39 query_letters Number of letters in the query sequence. (int) 40 41 database Name of the database. 42 database_sequences Number of sequences in the database. (int) 43 database_letters Number of letters in the database. (int) 44 45 """ 46
47 - def __init__(self):
48 """Initialize the class.""" 49 self.application = '' 50 self.version = '' 51 self.date = '' 52 self.reference = '' 53 54 self.query = '' 55 self.query_letters = None 56 57 self.database = '' 58 self.database_sequences = None 59 self.database_letters = None
60 61
62 -class Description(object):
63 """Stores information about one hit in the descriptions section. 64 65 Members: 66 title Title of the hit. 67 score Number of bits. (int) 68 bits Bit score. (float) 69 e E value. (float) 70 num_alignments Number of alignments for the same subject. (int) 71 """ 72
73 - def __init__(self):
74 """Initialize the class.""" 75 self.title = '' 76 self.score = None 77 self.bits = None 78 self.e = None 79 self.num_alignments = None
80
81 - def __str__(self):
82 """Return the description as a string.""" 83 return "%-66s %5s %s" % (self.title, self.score, self.e)
84 85
86 -class DescriptionExt(Description):
87 """Extended description record for BLASTXML version 2. 88 89 Members: 90 items List of DescriptionExtItem 91 """ 92
93 - def __init__(self):
94 """Initialize the class.""" 95 super(DescriptionExt, self).__init__() 96 97 self.items = []
98
99 - def append_item(self, item):
100 """Add a description extended record.""" 101 if len(self.items) == 0: 102 self.title = str(item) 103 self.items.append(item)
104 105
106 -class DescriptionExtItem(object):
107 """Stores information about one record in hit description for BLASTXML version 2. 108 109 Members: 110 id Database identifier 111 title Title of the hit. 112 """ 113
114 - def __init__(self):
115 """Initialize the class.""" 116 self.id = None 117 self.title = None 118 self.accession = None 119 self.taxid = None 120 self.sciname = None
121
122 - def __str__(self):
123 """Return the description identifier and title as a string.""" 124 return "%s %s" % (self.id, self.title)
125 126
127 -class Alignment(object):
128 """Stores information about one hit in the alignments section. 129 130 Members: 131 title Name. 132 hit_id Hit identifier. (str) 133 hit_def Hit definition. (str) 134 length Length. (int) 135 hsps A list of HSP objects. 136 137 """ 138
139 - def __init__(self):
140 """Initialize the class.""" 141 self.title = '' 142 self.hit_id = '' 143 self.hit_def = '' 144 self.length = None 145 self.hsps = []
146
147 - def __str__(self):
148 """Return the BLAST alignment as a formatted string.""" 149 lines = self.title.split('\n') 150 lines.append("Length = %s\n" % self.length) 151 return '\n '.join(lines)
152 153
154 -class HSP(object):
155 """Stores information about one hsp in an alignment hit. 156 157 Members: 158 - score BLAST score of hit. (float) 159 - bits Number of bits for that score. (float) 160 - expect Expect value. (float) 161 - num_alignments Number of alignments for same subject. (int) 162 - identities Number of identities (int) if using the XML parser. 163 Tuple of number of identities/total aligned (int, int) 164 if using the (obsolete) plain text parser. 165 - positives Number of positives (int) if using the XML parser. 166 Tuple of number of positives/total aligned (int, int) 167 if using the (obsolete) plain text parser. 168 - gaps Number of gaps (int) if using the XML parser. 169 Tuple of number of gaps/total aligned (int, int) if 170 using the (obsolete) plain text parser. 171 - align_length Length of the alignment. (int) 172 - strand Tuple of (query, target) strand. 173 - frame Tuple of 1 or 2 frame shifts, depending on the flavor. 174 175 - query The query sequence. 176 - query_start The start residue for the query sequence. (1-based) 177 - query_end The end residue for the query sequence. (1-based) 178 - match The match sequence. 179 - sbjct The sbjct sequence. 180 - sbjct_start The start residue for the sbjct sequence. (1-based) 181 - sbjct_end The end residue for the sbjct sequence. (1-based) 182 183 Not all flavors of BLAST return values for every attribute:: 184 185 score expect identities positives strand frame 186 BLASTP X X X X 187 BLASTN X X X X X 188 BLASTX X X X X X 189 TBLASTN X X X X X 190 TBLASTX X X X X X/X 191 192 Note: for BLASTX, the query sequence is shown as a protein sequence, 193 but the numbering is based on the nucleotides. Thus, the numbering 194 is 3x larger than the number of amino acid residues. A similar effect 195 can be seen for the sbjct sequence in TBLASTN, and for both sequences 196 in TBLASTX. 197 198 Also, for negative frames, the sequence numbering starts from 199 query_start and counts down. 200 201 """ 202
203 - def __init__(self):
204 """Initialize the class.""" 205 self.score = None 206 self.bits = None 207 self.expect = None 208 self.num_alignments = None 209 self.identities = (None, None) 210 self.positives = (None, None) 211 self.gaps = (None, None) 212 self.align_length = None 213 self.strand = (None, None) 214 self.frame = () 215 216 self.query = '' 217 self.query_start = None 218 self.query_end = None 219 self.match = '' 220 self.sbjct = '' 221 self.sbjct_start = None 222 self.sbjct_end = None
223
224 - def __str__(self):
225 """Return the BLAST HSP as a formatted string.""" 226 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" 227 % (self.score, self.bits, self.expect, self.align_length)] 228 if self.align_length < 50: 229 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 230 str(self.query), 231 str(self.query_end))) 232 lines.append(" %s" 233 % (str(self.match))) 234 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 235 str(self.sbjct), 236 str(self.sbjct_end))) 237 else: 238 lines.append("Query:%s %s...%s %s" 239 % (str(self.query_start).rjust(8), 240 str(self.query)[:45], 241 str(self.query)[-3:], 242 str(self.query_end))) 243 lines.append(" %s...%s" 244 % (str(self.match)[:45], 245 str(self.match)[-3:])) 246 lines.append("Sbjct:%s %s...%s %s" 247 % (str(self.sbjct_start).rjust(8), 248 str(self.sbjct)[:45], 249 str(self.sbjct)[-3:], 250 str(self.sbjct_end))) 251 return "\n".join(lines)
252 253
254 -class MultipleAlignment(object):
255 """Holds information about a multiple alignment. 256 257 Members: 258 alignment A list of tuples (name, start residue, sequence, end residue). 259 260 The start residue is 1-based. It may be blank, if that sequence is 261 not aligned in the multiple alignment. 262 263 """ 264
265 - def __init__(self):
266 """Initialize the class.""" 267 self.alignment = []
268
269 - def to_generic(self, alphabet):
270 """Retrieve generic alignment object for the given alignment. 271 272 Instead of the tuples, this returns a MultipleSeqAlignment object 273 from Bio.Align, through which you can manipulate and query 274 the object. 275 276 alphabet is the specified alphabet for the sequences in the code (for 277 example IUPAC.IUPACProtein). 278 279 Thanks to James Casbon for the code. 280 """ 281 # TODO - Switch to new Bio.Align.MultipleSeqAlignment class? 282 seq_parts = [] 283 seq_names = [] 284 parse_number = 0 285 n = 0 286 for name, start, seq, end in self.alignment: 287 if name == 'QUERY': # QUERY is the first in each alignment block 288 parse_number += 1 289 n = 0 290 291 if parse_number == 1: # create on first_parse, append on all others 292 seq_parts.append(seq) 293 seq_names.append(name) 294 else: 295 seq_parts[n] += seq 296 n += 1 297 298 generic = MultipleSeqAlignment([], alphabet) 299 for (name, seq) in zip(seq_names, seq_parts): 300 generic.append(SeqRecord(Seq(seq, alphabet), name)) 301 302 return generic
303 304
305 -class Round(object):
306 """Holds information from a PSI-BLAST round. 307 308 Members: 309 number Round number. (int) 310 reused_seqs Sequences in model, found again. List of Description objects. 311 new_seqs Sequences not found, or below threshold. List of Description. 312 alignments A list of Alignment objects. 313 multiple_alignment A MultipleAlignment object. 314 """ 315
316 - def __init__(self):
317 """Initialize the class.""" 318 self.number = None 319 self.reused_seqs = [] 320 self.new_seqs = [] 321 self.alignments = [] 322 self.multiple_alignment = None
323 324
325 -class DatabaseReport(object):
326 """Holds information about a database report. 327 328 Members: 329 database_name List of database names. (can have multiple dbs) 330 num_letters_in_database Number of letters in the database. (int) 331 num_sequences_in_database List of number of sequences in the database. 332 posted_date List of the dates the databases were posted. 333 ka_params A tuple of (lambda, k, h) values. (floats) 334 gapped # XXX this isn't set right! 335 ka_params_gap A tuple of (lambda, k, h) values. (floats) 336 337 """ 338
339 - def __init__(self):
340 """Initialize the class.""" 341 self.database_name = [] 342 self.posted_date = [] 343 self.num_letters_in_database = [] 344 self.num_sequences_in_database = [] 345 self.ka_params = (None, None, None) 346 self.gapped = 0 347 self.ka_params_gap = (None, None, None)
348 349
350 -class Parameters(object):
351 """Holds information about the parameters. 352 353 Members: 354 matrix Name of the matrix. 355 gap_penalties Tuple of (open, extend) penalties. (floats) 356 sc_match Match score for nucleotide-nucleotide comparison 357 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 358 num_hits Number of hits to the database. (int) 359 num_sequences Number of sequences. (int) 360 num_good_extends Number of extensions. (int) 361 num_seqs_better_e Number of sequences better than e-value. (int) 362 hsps_no_gap Number of HSP's better, without gapping. (int) 363 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 364 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 365 hsps_gapped Total number of HSP's gapped. (int) 366 query_length Length of the query. (int) 367 query_id Identifier of the query sequence. (str) 368 database_length Number of letters in the database. (int) 369 effective_hsp_length Effective HSP length. (int) 370 effective_query_length Effective length of query. (int) 371 effective_database_length Effective length of database. (int) 372 effective_search_space Effective search space. (int) 373 effective_search_space_used Effective search space used. (int) 374 frameshift Frameshift window. Tuple of (int, float) 375 threshold Threshold. (int) 376 window_size Window size. (int) 377 dropoff_1st_pass Tuple of (score, bits). (int, float) 378 gap_x_dropoff Tuple of (score, bits). (int, float) 379 gap_x_dropoff_final Tuple of (score, bits). (int, float) 380 gap_trigger Tuple of (score, bits). (int, float) 381 blast_cutoff Tuple of (score, bits). (int, float) 382 """ 383
384 - def __init__(self):
385 """Initialize the class.""" 386 self.matrix = '' 387 self.gap_penalties = (None, None) 388 self.sc_match = None 389 self.sc_mismatch = None 390 self.num_hits = None 391 self.num_sequences = None 392 self.num_good_extends = None 393 self.num_seqs_better_e = None 394 self.hsps_no_gap = None 395 self.hsps_prelim_gapped = None 396 self.hsps_prelim_gapped_attemped = None 397 self.hsps_gapped = None 398 self.query_id = None 399 self.query_length = None 400 self.database_length = None 401 self.effective_hsp_length = None 402 self.effective_query_length = None 403 self.effective_database_length = None 404 self.effective_search_space = None 405 self.effective_search_space_used = None 406 self.frameshift = (None, None) 407 self.threshold = None 408 self.window_size = None 409 self.dropoff_1st_pass = (None, None) 410 self.gap_x_dropoff = (None, None) 411 self.gap_x_dropoff_final = (None, None) 412 self.gap_trigger = (None, None) 413 self.blast_cutoff = (None, None)
414 415 416 # TODO - Add a friendly __str__ method to BLAST results
417 -class Blast(Header, DatabaseReport, Parameters):
418 """Saves the results from a blast search. 419 420 Members: 421 descriptions A list of Description objects. 422 alignments A list of Alignment objects. 423 multiple_alignment A MultipleAlignment object. 424 + members inherited from base classes 425 426 """ 427
428 - def __init__(self):
429 """Initialize the class.""" 430 Header.__init__(self) 431 DatabaseReport.__init__(self) 432 Parameters.__init__(self) 433 self.descriptions = [] 434 self.alignments = [] 435 self.multiple_alignment = None
436 437
438 -class PSIBlast(Header, DatabaseReport, Parameters):
439 """Saves the results from a blastpgp search. 440 441 Members: 442 rounds A list of Round objects. 443 converged Whether the search converged. 444 + members inherited from base classes 445 446 """ 447
448 - def __init__(self):
449 """Initialize the class.""" 450 Header.__init__(self) 451 DatabaseReport.__init__(self) 452 Parameters.__init__(self) 453 self.rounds = [] 454 self.converged = 0
455