Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # 
  3  # This file is part of the Biopython distribution and governed by your 
  4  # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 
  5  # Please see the LICENSE file that should have been included as part of this 
  6  # package. 
  7  """Record classes to hold BLAST output. 
  8   
  9  Classes: 
 10  Blast              Holds all the information from a blast search. 
 11  PSIBlast           Holds all the information from a psi-blast search. 
 12   
 13  Header             Holds information from the header. 
 14  Description        Holds information about one hit description. 
 15  Alignment          Holds information about one alignment hit. 
 16  HSP                Holds information about one HSP. 
 17  MultipleAlignment  Holds information about a multiple alignment. 
 18  DatabaseReport     Holds information from the database report. 
 19  Parameters         Holds information from the parameters. 
 20   
 21  """ 
 22  # XXX finish printable BLAST output 
 23   
 24  from Bio.Seq import Seq 
 25  from Bio.SeqRecord import SeqRecord 
 26  from Bio.Align import MultipleSeqAlignment 
 27   
 28   
29 -class Header(object):
30 """Saves information from a blast header. 31 32 Members: 33 application The name of the BLAST flavor that generated this data. 34 version Version of blast used. 35 date Date this data was generated. 36 reference Reference for blast. 37 38 query Name of query sequence. 39 query_letters Number of letters in the query sequence. (int) 40 41 database Name of the database. 42 database_sequences Number of sequences in the database. (int) 43 database_letters Number of letters in the database. (int) 44 45 """ 46
47 - def __init__(self):
48 """Initialize the class.""" 49 self.application = '' 50 self.version = '' 51 self.date = '' 52 self.reference = '' 53 54 self.query = '' 55 self.query_letters = None 56 57 self.database = '' 58 self.database_sequences = None 59 self.database_letters = None
60 61
62 -class Description(object):
63 """Stores information about one hit in the descriptions section. 64 65 Members: 66 title Title of the hit. 67 score Number of bits. (int) 68 bits Bit score. (float) 69 e E value. (float) 70 num_alignments Number of alignments for the same subject. (int) 71 """ 72
73 - def __init__(self):
74 """Initialize the class.""" 75 self.title = '' 76 self.score = None 77 self.bits = None 78 self.e = None 79 self.num_alignments = None
80
81 - def __str__(self):
82 return "%-66s %5s %s" % (self.title, self.score, self.e)
83 84
85 -class Alignment(object):
86 """Stores information about one hit in the alignments section. 87 88 Members: 89 title Name. 90 hit_id Hit identifier. (str) 91 hit_def Hit definition. (str) 92 length Length. (int) 93 hsps A list of HSP objects. 94 95 """ 96
97 - def __init__(self):
98 """Initialize the class.""" 99 self.title = '' 100 self.hit_id = '' 101 self.hit_def = '' 102 self.length = None 103 self.hsps = []
104
105 - def __str__(self):
106 lines = self.title.split('\n') 107 lines.append("Length = %s\n" % self.length) 108 return '\n '.join(lines)
109 110
111 -class HSP(object):
112 """Stores information about one hsp in an alignment hit. 113 114 Members: 115 - score BLAST score of hit. (float) 116 - bits Number of bits for that score. (float) 117 - expect Expect value. (float) 118 - num_alignments Number of alignments for same subject. (int) 119 - identities Number of identities (int) if using the XML parser. 120 Tuple of number of identities/total aligned (int, int) 121 if using the (obsolete) plain text parser. 122 - positives Number of positives (int) if using the XML parser. 123 Tuple of number of positives/total aligned (int, int) 124 if using the (obsolete) plain text parser. 125 - gaps Number of gaps (int) if using the XML parser. 126 Tuple of number of gaps/total aligned (int, int) if 127 using the (obsolete) plain text parser. 128 - align_length Length of the alignment. (int) 129 - strand Tuple of (query, target) strand. 130 - frame Tuple of 1 or 2 frame shifts, depending on the flavor. 131 132 - query The query sequence. 133 - query_start The start residue for the query sequence. (1-based) 134 - query_end The end residue for the query sequence. (1-based) 135 - match The match sequence. 136 - sbjct The sbjct sequence. 137 - sbjct_start The start residue for the sbjct sequence. (1-based) 138 - sbjct_end The end residue for the sbjct sequence. (1-based) 139 140 Not all flavors of BLAST return values for every attribute:: 141 142 score expect identities positives strand frame 143 BLASTP X X X X 144 BLASTN X X X X X 145 BLASTX X X X X X 146 TBLASTN X X X X X 147 TBLASTX X X X X X/X 148 149 Note: for BLASTX, the query sequence is shown as a protein sequence, 150 but the numbering is based on the nucleotides. Thus, the numbering 151 is 3x larger than the number of amino acid residues. A similar effect 152 can be seen for the sbjct sequence in TBLASTN, and for both sequences 153 in TBLASTX. 154 155 Also, for negative frames, the sequence numbering starts from 156 query_start and counts down. 157 158 """ 159
160 - def __init__(self):
161 """Initialize the class.""" 162 self.score = None 163 self.bits = None 164 self.expect = None 165 self.num_alignments = None 166 self.identities = (None, None) 167 self.positives = (None, None) 168 self.gaps = (None, None) 169 self.align_length = None 170 self.strand = (None, None) 171 self.frame = () 172 173 self.query = '' 174 self.query_start = None 175 self.query_end = None 176 self.match = '' 177 self.sbjct = '' 178 self.sbjct_start = None 179 self.sbjct_end = None
180
181 - def __str__(self):
182 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" 183 % (self.score, self.bits, self.expect, self.align_length)] 184 if self.align_length < 50: 185 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 186 str(self.query), 187 str(self.query_end))) 188 lines.append(" %s" 189 % (str(self.match))) 190 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 191 str(self.sbjct), 192 str(self.sbjct_end))) 193 else: 194 lines.append("Query:%s %s...%s %s" 195 % (str(self.query_start).rjust(8), 196 str(self.query)[:45], 197 str(self.query)[-3:], 198 str(self.query_end))) 199 lines.append(" %s...%s" 200 % (str(self.match)[:45], 201 str(self.match)[-3:])) 202 lines.append("Sbjct:%s %s...%s %s" 203 % (str(self.sbjct_start).rjust(8), 204 str(self.sbjct)[:45], 205 str(self.sbjct)[-3:], 206 str(self.sbjct_end))) 207 return "\n".join(lines)
208 209
210 -class MultipleAlignment(object):
211 """Holds information about a multiple alignment. 212 213 Members: 214 alignment A list of tuples (name, start residue, sequence, end residue). 215 216 The start residue is 1-based. It may be blank, if that sequence is 217 not aligned in the multiple alignment. 218 219 """ 220
221 - def __init__(self):
222 """Initialize the class.""" 223 self.alignment = []
224
225 - def to_generic(self, alphabet):
226 """Retrieve generic alignment object for the given alignment. 227 228 Instead of the tuples, this returns a MultipleSeqAlignment object 229 from Bio.Align, through which you can manipulate and query 230 the object. 231 232 alphabet is the specified alphabet for the sequences in the code (for 233 example IUPAC.IUPACProtein). 234 235 Thanks to James Casbon for the code. 236 """ 237 # TODO - Switch to new Bio.Align.MultipleSeqAlignment class? 238 seq_parts = [] 239 seq_names = [] 240 parse_number = 0 241 n = 0 242 for name, start, seq, end in self.alignment: 243 if name == 'QUERY': # QUERY is the first in each alignment block 244 parse_number += 1 245 n = 0 246 247 if parse_number == 1: # create on first_parse, append on all others 248 seq_parts.append(seq) 249 seq_names.append(name) 250 else: 251 seq_parts[n] += seq 252 n += 1 253 254 generic = MultipleSeqAlignment([], alphabet) 255 for (name, seq) in zip(seq_names, seq_parts): 256 generic.append(SeqRecord(Seq(seq, alphabet), name)) 257 258 return generic
259 260
261 -class Round(object):
262 """Holds information from a PSI-BLAST round. 263 264 Members: 265 number Round number. (int) 266 reused_seqs Sequences in model, found again. List of Description objects. 267 new_seqs Sequences not found, or below threshold. List of Description. 268 alignments A list of Alignment objects. 269 multiple_alignment A MultipleAlignment object. 270 """ 271
272 - def __init__(self):
273 """Initialize the class.""" 274 self.number = None 275 self.reused_seqs = [] 276 self.new_seqs = [] 277 self.alignments = [] 278 self.multiple_alignment = None
279 280
281 -class DatabaseReport(object):
282 """Holds information about a database report. 283 284 Members: 285 database_name List of database names. (can have multiple dbs) 286 num_letters_in_database Number of letters in the database. (int) 287 num_sequences_in_database List of number of sequences in the database. 288 posted_date List of the dates the databases were posted. 289 ka_params A tuple of (lambda, k, h) values. (floats) 290 gapped # XXX this isn't set right! 291 ka_params_gap A tuple of (lambda, k, h) values. (floats) 292 293 """ 294
295 - def __init__(self):
296 """Initialize the class.""" 297 self.database_name = [] 298 self.posted_date = [] 299 self.num_letters_in_database = [] 300 self.num_sequences_in_database = [] 301 self.ka_params = (None, None, None) 302 self.gapped = 0 303 self.ka_params_gap = (None, None, None)
304 305
306 -class Parameters(object):
307 """Holds information about the parameters. 308 309 Members: 310 matrix Name of the matrix. 311 gap_penalties Tuple of (open, extend) penalties. (floats) 312 sc_match Match score for nucleotide-nucleotide comparison 313 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 314 num_hits Number of hits to the database. (int) 315 num_sequences Number of sequences. (int) 316 num_good_extends Number of extensions. (int) 317 num_seqs_better_e Number of sequences better than e-value. (int) 318 hsps_no_gap Number of HSP's better, without gapping. (int) 319 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 320 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 321 hsps_gapped Total number of HSP's gapped. (int) 322 query_length Length of the query. (int) 323 query_id Identifier of the query sequence. (str) 324 database_length Number of letters in the database. (int) 325 effective_hsp_length Effective HSP length. (int) 326 effective_query_length Effective length of query. (int) 327 effective_database_length Effective length of database. (int) 328 effective_search_space Effective search space. (int) 329 effective_search_space_used Effective search space used. (int) 330 frameshift Frameshift window. Tuple of (int, float) 331 threshold Threshold. (int) 332 window_size Window size. (int) 333 dropoff_1st_pass Tuple of (score, bits). (int, float) 334 gap_x_dropoff Tuple of (score, bits). (int, float) 335 gap_x_dropoff_final Tuple of (score, bits). (int, float) 336 gap_trigger Tuple of (score, bits). (int, float) 337 blast_cutoff Tuple of (score, bits). (int, float) 338 """ 339
340 - def __init__(self):
341 """Initialize the class.""" 342 self.matrix = '' 343 self.gap_penalties = (None, None) 344 self.sc_match = None 345 self.sc_mismatch = None 346 self.num_hits = None 347 self.num_sequences = None 348 self.num_good_extends = None 349 self.num_seqs_better_e = None 350 self.hsps_no_gap = None 351 self.hsps_prelim_gapped = None 352 self.hsps_prelim_gapped_attemped = None 353 self.hsps_gapped = None 354 self.query_id = None 355 self.query_length = None 356 self.database_length = None 357 self.effective_hsp_length = None 358 self.effective_query_length = None 359 self.effective_database_length = None 360 self.effective_search_space = None 361 self.effective_search_space_used = None 362 self.frameshift = (None, None) 363 self.threshold = None 364 self.window_size = None 365 self.dropoff_1st_pass = (None, None) 366 self.gap_x_dropoff = (None, None) 367 self.gap_x_dropoff_final = (None, None) 368 self.gap_trigger = (None, None) 369 self.blast_cutoff = (None, None)
370 371 372 # TODO - Add a friendly __str__ method to BLAST results
373 -class Blast(Header, DatabaseReport, Parameters):
374 """Saves the results from a blast search. 375 376 Members: 377 descriptions A list of Description objects. 378 alignments A list of Alignment objects. 379 multiple_alignment A MultipleAlignment object. 380 + members inherited from base classes 381 382 """ 383
384 - def __init__(self):
385 """Initialize the class.""" 386 Header.__init__(self) 387 DatabaseReport.__init__(self) 388 Parameters.__init__(self) 389 self.descriptions = [] 390 self.alignments = [] 391 self.multiple_alignment = None
392 393
394 -class PSIBlast(Header, DatabaseReport, Parameters):
395 """Saves the results from a blastpgp search. 396 397 Members: 398 rounds A list of Round objects. 399 converged Whether the search converged. 400 + members inherited from base classes 401 402 """ 403
404 - def __init__(self):
405 """Initialize the class.""" 406 Header.__init__(self) 407 DatabaseReport.__init__(self) 408 Parameters.__init__(self) 409 self.rounds = [] 410 self.converged = 0
411