Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Record classes to hold BLAST output. 
  7   
  8  Classes: 
  9  Blast              Holds all the information from a blast search. 
 10  PSIBlast           Holds all the information from a psi-blast search. 
 11   
 12  Header             Holds information from the header. 
 13  Description        Holds information about one hit description. 
 14  Alignment          Holds information about one alignment hit. 
 15  HSP                Holds information about one HSP. 
 16  MultipleAlignment  Holds information about a multiple alignment. 
 17  DatabaseReport     Holds information from the database report. 
 18  Parameters         Holds information from the parameters. 
 19   
 20  """ 
 21  # XXX finish printable BLAST output 
 22   
 23  from Bio.Seq import Seq 
 24  from Bio.SeqRecord import SeqRecord 
 25  from Bio.Align import MultipleSeqAlignment 
 26   
 27   
28 -class Header(object):
29 """Saves information from a blast header. 30 31 Members: 32 application The name of the BLAST flavor that generated this data. 33 version Version of blast used. 34 date Date this data was generated. 35 reference Reference for blast. 36 37 query Name of query sequence. 38 query_letters Number of letters in the query sequence. (int) 39 40 database Name of the database. 41 database_sequences Number of sequences in the database. (int) 42 database_letters Number of letters in the database. (int) 43 44 """
45 - def __init__(self):
46 self.application = '' 47 self.version = '' 48 self.date = '' 49 self.reference = '' 50 51 self.query = '' 52 self.query_letters = None 53 54 self.database = '' 55 self.database_sequences = None 56 self.database_letters = None
57 58
59 -class Description(object):
60 """Stores information about one hit in the descriptions section. 61 62 Members: 63 title Title of the hit. 64 score Number of bits. (int) 65 bits Bit score. (float) 66 e E value. (float) 67 num_alignments Number of alignments for the same subject. (int) 68 """
69 - def __init__(self):
70 self.title = '' 71 self.score = None 72 self.bits = None 73 self.e = None 74 self.num_alignments = None
75
76 - def __str__(self):
77 return "%-66s %5s %s" % (self.title, self.score, self.e)
78 79
80 -class Alignment(object):
81 """Stores information about one hit in the alignments section. 82 83 Members: 84 title Name. 85 hit_id Hit identifier. (str) 86 hit_def Hit definition. (str) 87 length Length. (int) 88 hsps A list of HSP objects. 89 90 """
91 - def __init__(self):
92 self.title = '' 93 self.hit_id = '' 94 self.hit_def = '' 95 self.length = None 96 self.hsps = []
97
98 - def __str__(self):
99 lines = self.title.split('\n') 100 lines.append("Length = %s\n" % self.length) 101 return '\n '.join(lines)
102 103
104 -class HSP(object):
105 """Stores information about one hsp in an alignment hit. 106 107 Members: 108 - score BLAST score of hit. (float) 109 - bits Number of bits for that score. (float) 110 - expect Expect value. (float) 111 - num_alignments Number of alignments for same subject. (int) 112 - identities Number of identities (int) if using the XML parser. 113 Tuple of number of identities/total aligned (int, int) 114 if using the (obsolete) plain text parser. 115 - positives Number of positives (int) if using the XML parser. 116 Tuple of number of positives/total aligned (int, int) 117 if using the (obsolete) plain text parser. 118 - gaps Number of gaps (int) if using the XML parser. 119 Tuple of number of gaps/total aligned (int, int) if 120 using the (obsolete) plain text parser. 121 - align_length Length of the alignment. (int) 122 - strand Tuple of (query, target) strand. 123 - frame Tuple of 1 or 2 frame shifts, depending on the flavor. 124 125 - query The query sequence. 126 - query_start The start residue for the query sequence. (1-based) 127 - query_end The end residue for the query sequence. (1-based) 128 - match The match sequence. 129 - sbjct The sbjct sequence. 130 - sbjct_start The start residue for the sbjct sequence. (1-based) 131 - sbjct_end The end residue for the sbjct sequence. (1-based) 132 133 Not all flavors of BLAST return values for every attribute:: 134 135 score expect identities positives strand frame 136 BLASTP X X X X 137 BLASTN X X X X X 138 BLASTX X X X X X 139 TBLASTN X X X X X 140 TBLASTX X X X X X/X 141 142 Note: for BLASTX, the query sequence is shown as a protein sequence, 143 but the numbering is based on the nucleotides. Thus, the numbering 144 is 3x larger than the number of amino acid residues. A similar effect 145 can be seen for the sbjct sequence in TBLASTN, and for both sequences 146 in TBLASTX. 147 148 Also, for negative frames, the sequence numbering starts from 149 query_start and counts down. 150 151 """
152 - def __init__(self):
153 self.score = None 154 self.bits = None 155 self.expect = None 156 self.num_alignments = None 157 self.identities = (None, None) 158 self.positives = (None, None) 159 self.gaps = (None, None) 160 self.align_length = None 161 self.strand = (None, None) 162 self.frame = () 163 164 self.query = '' 165 self.query_start = None 166 self.query_end = None 167 self.match = '' 168 self.sbjct = '' 169 self.sbjct_start = None 170 self.sbjct_end = None
171
172 - def __str__(self):
173 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" 174 % (self.score, self.bits, self.expect, self.align_length)] 175 if self.align_length < 50: 176 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 177 str(self.query), 178 str(self.query_end))) 179 lines.append(" %s" 180 % (str(self.match))) 181 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 182 str(self.sbjct), 183 str(self.sbjct_end))) 184 else: 185 lines.append("Query:%s %s...%s %s" 186 % (str(self.query_start).rjust(8), 187 str(self.query)[:45], 188 str(self.query)[-3:], 189 str(self.query_end))) 190 lines.append(" %s...%s" 191 % (str(self.match)[:45], 192 str(self.match)[-3:])) 193 lines.append("Sbjct:%s %s...%s %s" 194 % (str(self.sbjct_start).rjust(8), 195 str(self.sbjct)[:45], 196 str(self.sbjct)[-3:], 197 str(self.sbjct_end))) 198 return "\n".join(lines)
199 200
201 -class MultipleAlignment(object):
202 """Holds information about a multiple alignment. 203 204 Members: 205 alignment A list of tuples (name, start residue, sequence, end residue). 206 207 The start residue is 1-based. It may be blank, if that sequence is 208 not aligned in the multiple alignment. 209 210 """
211 - def __init__(self):
212 self.alignment = []
213
214 - def to_generic(self, alphabet):
215 """Retrieve generic alignment object for the given alignment. 216 217 Instead of the tuples, this returns a MultipleSeqAlignment object 218 from Bio.Align, through which you can manipulate and query 219 the object. 220 221 alphabet is the specified alphabet for the sequences in the code (for 222 example IUPAC.IUPACProtein). 223 224 Thanks to James Casbon for the code. 225 """ 226 # TODO - Switch to new Bio.Align.MultipleSeqAlignment class? 227 seq_parts = [] 228 seq_names = [] 229 parse_number = 0 230 n = 0 231 for name, start, seq, end in self.alignment: 232 if name == 'QUERY': # QUERY is the first in each alignment block 233 parse_number += 1 234 n = 0 235 236 if parse_number == 1: # create on first_parse, append on all others 237 seq_parts.append(seq) 238 seq_names.append(name) 239 else: 240 seq_parts[n] += seq 241 n += 1 242 243 generic = MultipleSeqAlignment([], alphabet) 244 for (name, seq) in zip(seq_names, seq_parts): 245 generic.append(SeqRecord(Seq(seq, alphabet), name)) 246 247 return generic
248 249
250 -class Round(object):
251 """Holds information from a PSI-BLAST round. 252 253 Members: 254 number Round number. (int) 255 reused_seqs Sequences in model, found again. List of Description objects. 256 new_seqs Sequences not found, or below threshold. List of Description. 257 alignments A list of Alignment objects. 258 multiple_alignment A MultipleAlignment object. 259 """
260 - def __init__(self):
261 self.number = None 262 self.reused_seqs = [] 263 self.new_seqs = [] 264 self.alignments = [] 265 self.multiple_alignment = None
266 267
268 -class DatabaseReport(object):
269 """Holds information about a database report. 270 271 Members: 272 database_name List of database names. (can have multiple dbs) 273 num_letters_in_database Number of letters in the database. (int) 274 num_sequences_in_database List of number of sequences in the database. 275 posted_date List of the dates the databases were posted. 276 ka_params A tuple of (lambda, k, h) values. (floats) 277 gapped # XXX this isn't set right! 278 ka_params_gap A tuple of (lambda, k, h) values. (floats) 279 280 """
281 - def __init__(self):
282 self.database_name = [] 283 self.posted_date = [] 284 self.num_letters_in_database = [] 285 self.num_sequences_in_database = [] 286 self.ka_params = (None, None, None) 287 self.gapped = 0 288 self.ka_params_gap = (None, None, None)
289 290
291 -class Parameters(object):
292 """Holds information about the parameters. 293 294 Members: 295 matrix Name of the matrix. 296 gap_penalties Tuple of (open, extend) penalties. (floats) 297 sc_match Match score for nucleotide-nucleotide comparison 298 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 299 num_hits Number of hits to the database. (int) 300 num_sequences Number of sequences. (int) 301 num_good_extends Number of extensions. (int) 302 num_seqs_better_e Number of sequences better than e-value. (int) 303 hsps_no_gap Number of HSP's better, without gapping. (int) 304 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 305 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 306 hsps_gapped Total number of HSP's gapped. (int) 307 query_length Length of the query. (int) 308 query_id Identifier of the query sequence. (str) 309 database_length Number of letters in the database. (int) 310 effective_hsp_length Effective HSP length. (int) 311 effective_query_length Effective length of query. (int) 312 effective_database_length Effective length of database. (int) 313 effective_search_space Effective search space. (int) 314 effective_search_space_used Effective search space used. (int) 315 frameshift Frameshift window. Tuple of (int, float) 316 threshold Threshold. (int) 317 window_size Window size. (int) 318 dropoff_1st_pass Tuple of (score, bits). (int, float) 319 gap_x_dropoff Tuple of (score, bits). (int, float) 320 gap_x_dropoff_final Tuple of (score, bits). (int, float) 321 gap_trigger Tuple of (score, bits). (int, float) 322 blast_cutoff Tuple of (score, bits). (int, float) 323 """
324 - def __init__(self):
325 self.matrix = '' 326 self.gap_penalties = (None, None) 327 self.sc_match = None 328 self.sc_mismatch = None 329 self.num_hits = None 330 self.num_sequences = None 331 self.num_good_extends = None 332 self.num_seqs_better_e = None 333 self.hsps_no_gap = None 334 self.hsps_prelim_gapped = None 335 self.hsps_prelim_gapped_attemped = None 336 self.hsps_gapped = None 337 self.query_id = None 338 self.query_length = None 339 self.database_length = None 340 self.effective_hsp_length = None 341 self.effective_query_length = None 342 self.effective_database_length = None 343 self.effective_search_space = None 344 self.effective_search_space_used = None 345 self.frameshift = (None, None) 346 self.threshold = None 347 self.window_size = None 348 self.dropoff_1st_pass = (None, None) 349 self.gap_x_dropoff = (None, None) 350 self.gap_x_dropoff_final = (None, None) 351 self.gap_trigger = (None, None) 352 self.blast_cutoff = (None, None)
353 354 355 # TODO - Add a friendly __str__ method to BLAST results
356 -class Blast(Header, DatabaseReport, Parameters):
357 """Saves the results from a blast search. 358 359 Members: 360 descriptions A list of Description objects. 361 alignments A list of Alignment objects. 362 multiple_alignment A MultipleAlignment object. 363 + members inherited from base classes 364 365 """
366 - def __init__(self):
367 Header.__init__(self) 368 DatabaseReport.__init__(self) 369 Parameters.__init__(self) 370 self.descriptions = [] 371 self.alignments = [] 372 self.multiple_alignment = None
373 374
375 -class PSIBlast(Header, DatabaseReport, Parameters):
376 """Saves the results from a blastpgp search. 377 378 Members: 379 rounds A list of Round objects. 380 converged Whether the search converged. 381 + members inherited from base classes 382 383 """
384 - def __init__(self):
385 Header.__init__(self) 386 DatabaseReport.__init__(self) 387 Parameters.__init__(self) 388 self.rounds = [] 389 self.converged = 0
390