Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Record classes to hold BLAST output. 
  7   
  8  Classes: 
  9  Blast              Holds all the information from a blast search. 
 10  PSIBlast           Holds all the information from a psi-blast search. 
 11   
 12  Header             Holds information from the header. 
 13  Description        Holds information about one hit description. 
 14  Alignment          Holds information about one alignment hit. 
 15  HSP                Holds information about one HSP. 
 16  MultipleAlignment  Holds information about a multiple alignment. 
 17  DatabaseReport     Holds information from the database report. 
 18  Parameters         Holds information from the parameters. 
 19   
 20  """ 
 21  # XXX finish printable BLAST output 
 22   
 23  from Bio.Align import Generic 
 24   
 25   
26 -class Header(object):
27 """Saves information from a blast header. 28 29 Members: 30 application The name of the BLAST flavor that generated this data. 31 version Version of blast used. 32 date Date this data was generated. 33 reference Reference for blast. 34 35 query Name of query sequence. 36 query_letters Number of letters in the query sequence. (int) 37 38 database Name of the database. 39 database_sequences Number of sequences in the database. (int) 40 database_letters Number of letters in the database. (int) 41 42 """
43 - def __init__(self):
44 self.application = '' 45 self.version = '' 46 self.date = '' 47 self.reference = '' 48 49 self.query = '' 50 self.query_letters = None 51 52 self.database = '' 53 self.database_sequences = None 54 self.database_letters = None
55 56
57 -class Description(object):
58 """Stores information about one hit in the descriptions section. 59 60 Members: 61 title Title of the hit. 62 score Number of bits. (int) 63 bits Bit score. (float) 64 e E value. (float) 65 num_alignments Number of alignments for the same subject. (int) 66 """
67 - def __init__(self):
68 self.title = '' 69 self.score = None 70 self.bits = None 71 self.e = None 72 self.num_alignments = None
73
74 - def __str__(self):
75 return "%-66s %5s %s" % (self.title, self.score, self.e)
76 77
78 -class Alignment(object):
79 """Stores information about one hit in the alignments section. 80 81 Members: 82 title Name. 83 hit_id Hit identifier. (str) 84 hit_def Hit definition. (str) 85 length Length. (int) 86 hsps A list of HSP objects. 87 88 """
89 - def __init__(self):
90 self.title = '' 91 self.hit_id = '' 92 self.hit_def = '' 93 self.length = None 94 self.hsps = []
95
96 - def __str__(self):
97 lines = self.title.split('\n') 98 lines.append("Length = %s\n" % self.length) 99 return '\n '.join(lines)
100 101
102 -class HSP(object):
103 """Stores information about one hsp in an alignment hit. 104 105 Members: 106 score BLAST score of hit. (float) 107 bits Number of bits for that score. (float) 108 expect Expect value. (float) 109 num_alignments Number of alignments for same subject. (int) 110 identities Number of identities (int) if using the XML parser. 111 Tuple of numer of identities/total aligned (int, int) 112 if using the (obsolete) plain text parser. 113 positives Number of positives (int) if using the XML parser. 114 Tuple of numer of positives/total aligned (int, int) 115 if using the (obsolete) plain text parser. 116 gaps Number of gaps (int) if using the XML parser. 117 Tuple of numer of gaps/total aligned (int, int) if 118 using the (obsolete) plain text parser. 119 align_length Length of the alignment. (int) 120 strand Tuple of (query, target) strand. 121 frame Tuple of 1 or 2 frame shifts, depending on the flavor. 122 123 query The query sequence. 124 query_start The start residue for the query sequence. (1-based) 125 query_end The end residue for the query sequence. (1-based) 126 match The match sequence. 127 sbjct The sbjct sequence. 128 sbjct_start The start residue for the sbjct sequence. (1-based) 129 sbjct_end The end residue for the sbjct sequence. (1-based) 130 131 Not all flavors of BLAST return values for every attribute: 132 score expect identities positives strand frame 133 BLASTP X X X X 134 BLASTN X X X X X 135 BLASTX X X X X X 136 TBLASTN X X X X X 137 TBLASTX X X X X X/X 138 139 Note: for BLASTX, the query sequence is shown as a protein sequence, 140 but the numbering is based on the nucleotides. Thus, the numbering 141 is 3x larger than the number of amino acid residues. A similar effect 142 can be seen for the sbjct sequence in TBLASTN, and for both sequences 143 in TBLASTX. 144 145 Also, for negative frames, the sequence numbering starts from 146 query_start and counts down. 147 148 """
149 - def __init__(self):
150 self.score = None 151 self.bits = None 152 self.expect = None 153 self.num_alignments = None 154 self.identities = (None, None) 155 self.positives = (None, None) 156 self.gaps = (None, None) 157 self.align_length = None 158 self.strand = (None, None) 159 self.frame = () 160 161 self.query = '' 162 self.query_start = None 163 self.query_end = None 164 self.match = '' 165 self.sbjct = '' 166 self.sbjct_start = None 167 self.sbjct_end = None
168
169 - def __str__(self):
170 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" 171 % (self.score, self.bits, self.expect, self.align_length)] 172 if self.align_length < 50: 173 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 174 str(self.query), 175 str(self.query_end))) 176 lines.append(" %s" 177 % (str(self.match))) 178 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 179 str(self.sbjct), 180 str(self.sbjct_end))) 181 else: 182 lines.append("Query:%s %s...%s %s" 183 % (str(self.query_start).rjust(8), 184 str(self.query)[:45], 185 str(self.query)[-3:], 186 str(self.query_end))) 187 lines.append(" %s...%s" 188 % (str(self.match)[:45], 189 str(self.match)[-3:])) 190 lines.append("Sbjct:%s %s...%s %s" 191 % (str(self.sbjct_start).rjust(8), 192 str(self.sbjct)[:45], 193 str(self.sbjct)[-3:], 194 str(self.sbjct_end))) 195 return "\n".join(lines)
196 197
198 -class MultipleAlignment(object):
199 """Holds information about a multiple alignment. 200 201 Members: 202 alignment A list of tuples (name, start residue, sequence, end residue). 203 204 The start residue is 1-based. It may be blank, if that sequence is 205 not aligned in the multiple alignment. 206 207 """
208 - def __init__(self):
209 self.alignment = []
210
211 - def to_generic(self, alphabet):
212 """Retrieve generic alignment object for the given alignment. 213 214 Instead of the tuples, this returns an Alignment object from 215 Bio.Align.Generic, through which you can manipulate and query 216 the object. 217 218 alphabet is the specified alphabet for the sequences in the code (for 219 example IUPAC.IUPACProtein. 220 221 Thanks to James Casbon for the code. 222 """ 223 #TODO - Switch to new Bio.Align.MultipleSeqAlignment class? 224 seq_parts = [] 225 seq_names = [] 226 parse_number = 0 227 n = 0 228 for name, start, seq, end in self.alignment: 229 if name == 'QUERY': # QUERY is the first in each alignment block 230 parse_number += 1 231 n = 0 232 233 if parse_number == 1: # create on first_parse, append on all others 234 seq_parts.append(seq) 235 seq_names.append(name) 236 else: 237 seq_parts[n] += seq 238 n += 1 239 240 generic = Generic.Alignment(alphabet) 241 for (name, seq) in zip(seq_names, seq_parts): 242 generic.add_sequence(name, seq) 243 244 return generic
245 246
247 -class Round(object):
248 """Holds information from a PSI-BLAST round. 249 250 Members: 251 number Round number. (int) 252 reused_seqs Sequences in model, found again. List of Description objects. 253 new_seqs Sequences not found, or below threshold. List of Description. 254 alignments A list of Alignment objects. 255 multiple_alignment A MultipleAlignment object. 256 """
257 - def __init__(self):
258 self.number = None 259 self.reused_seqs = [] 260 self.new_seqs = [] 261 self.alignments = [] 262 self.multiple_alignment = None
263 264
265 -class DatabaseReport(object):
266 """Holds information about a database report. 267 268 Members: 269 database_name List of database names. (can have multiple dbs) 270 num_letters_in_database Number of letters in the database. (int) 271 num_sequences_in_database List of number of sequences in the database. 272 posted_date List of the dates the databases were posted. 273 ka_params A tuple of (lambda, k, h) values. (floats) 274 gapped # XXX this isn't set right! 275 ka_params_gap A tuple of (lambda, k, h) values. (floats) 276 277 """
278 - def __init__(self):
279 self.database_name = [] 280 self.posted_date = [] 281 self.num_letters_in_database = [] 282 self.num_sequences_in_database = [] 283 self.ka_params = (None, None, None) 284 self.gapped = 0 285 self.ka_params_gap = (None, None, None)
286 287
288 -class Parameters(object):
289 """Holds information about the parameters. 290 291 Members: 292 matrix Name of the matrix. 293 gap_penalties Tuple of (open, extend) penalties. (floats) 294 sc_match Match score for nucleotide-nucleotide comparison 295 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 296 num_hits Number of hits to the database. (int) 297 num_sequences Number of sequences. (int) 298 num_good_extends Number of extensions. (int) 299 num_seqs_better_e Number of sequences better than e-value. (int) 300 hsps_no_gap Number of HSP's better, without gapping. (int) 301 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 302 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 303 hsps_gapped Total number of HSP's gapped. (int) 304 query_length Length of the query. (int) 305 query_id Identifier of the query sequence. (str) 306 database_length Number of letters in the database. (int) 307 effective_hsp_length Effective HSP length. (int) 308 effective_query_length Effective length of query. (int) 309 effective_database_length Effective length of database. (int) 310 effective_search_space Effective search space. (int) 311 effective_search_space_used Effective search space used. (int) 312 frameshift Frameshift window. Tuple of (int, float) 313 threshold Threshold. (int) 314 window_size Window size. (int) 315 dropoff_1st_pass Tuple of (score, bits). (int, float) 316 gap_x_dropoff Tuple of (score, bits). (int, float) 317 gap_x_dropoff_final Tuple of (score, bits). (int, float) 318 gap_trigger Tuple of (score, bits). (int, float) 319 blast_cutoff Tuple of (score, bits). (int, float) 320 """
321 - def __init__(self):
322 self.matrix = '' 323 self.gap_penalties = (None, None) 324 self.sc_match = None 325 self.sc_mismatch = None 326 self.num_hits = None 327 self.num_sequences = None 328 self.num_good_extends = None 329 self.num_seqs_better_e = None 330 self.hsps_no_gap = None 331 self.hsps_prelim_gapped = None 332 self.hsps_prelim_gapped_attemped = None 333 self.hsps_gapped = None 334 self.query_id = None 335 self.query_length = None 336 self.database_length = None 337 self.effective_hsp_length = None 338 self.effective_query_length = None 339 self.effective_database_length = None 340 self.effective_search_space = None 341 self.effective_search_space_used = None 342 self.frameshift = (None, None) 343 self.threshold = None 344 self.window_size = None 345 self.dropoff_1st_pass = (None, None) 346 self.gap_x_dropoff = (None, None) 347 self.gap_x_dropoff_final = (None, None) 348 self.gap_trigger = (None, None) 349 self.blast_cutoff = (None, None)
350 351 352 #TODO - Add a friendly __str__ method to BLAST results
353 -class Blast(Header, DatabaseReport, Parameters):
354 """Saves the results from a blast search. 355 356 Members: 357 descriptions A list of Description objects. 358 alignments A list of Alignment objects. 359 multiple_alignment A MultipleAlignment object. 360 + members inherited from base classes 361 362 """
363 - def __init__(self):
364 Header.__init__(self) 365 DatabaseReport.__init__(self) 366 Parameters.__init__(self) 367 self.descriptions = [] 368 self.alignments = [] 369 self.multiple_alignment = None
370 371
372 -class PSIBlast(Header, DatabaseReport, Parameters):
373 """Saves the results from a blastpgp search. 374 375 Members: 376 rounds A list of Round objects. 377 converged Whether the search converged. 378 + members inherited from base classes 379 380 """
381 - def __init__(self):
382 Header.__init__(self) 383 DatabaseReport.__init__(self) 384 Parameters.__init__(self) 385 self.rounds = [] 386 self.converged = 0
387