1
2
3
4
5
6 """Record classes to hold BLAST output.
7
8 Classes:
9 Blast Holds all the information from a blast search.
10 PSIBlast Holds all the information from a psi-blast search.
11
12 Header Holds information from the header.
13 Description Holds information about one hit description.
14 Alignment Holds information about one alignment hit.
15 HSP Holds information about one HSP.
16 MultipleAlignment Holds information about a multiple alignment.
17 DatabaseReport Holds information from the database report.
18 Parameters Holds information from the parameters.
19
20 """
21
22
23 from Bio.Align import Generic
24
25
27 """Saves information from a blast header.
28
29 Members:
30 application The name of the BLAST flavor that generated this data.
31 version Version of blast used.
32 date Date this data was generated.
33 reference Reference for blast.
34
35 query Name of query sequence.
36 query_letters Number of letters in the query sequence. (int)
37
38 database Name of the database.
39 database_sequences Number of sequences in the database. (int)
40 database_letters Number of letters in the database. (int)
41
42 """
44 self.application = ''
45 self.version = ''
46 self.date = ''
47 self.reference = ''
48
49 self.query = ''
50 self.query_letters = None
51
52 self.database = ''
53 self.database_sequences = None
54 self.database_letters = None
55
56
58 """Stores information about one hit in the descriptions section.
59
60 Members:
61 title Title of the hit.
62 score Number of bits. (int)
63 bits Bit score. (float)
64 e E value. (float)
65 num_alignments Number of alignments for the same subject. (int)
66 """
68 self.title = ''
69 self.score = None
70 self.bits = None
71 self.e = None
72 self.num_alignments = None
73
75 return "%-66s %5s %s" % (self.title, self.score, self.e)
76
77
79 """Stores information about one hit in the alignments section.
80
81 Members:
82 title Name.
83 hit_id Hit identifier. (str)
84 hit_def Hit definition. (str)
85 length Length. (int)
86 hsps A list of HSP objects.
87
88 """
95
100
101
103 """Stores information about one hsp in an alignment hit.
104
105 Members:
106 score BLAST score of hit. (float)
107 bits Number of bits for that score. (float)
108 expect Expect value. (float)
109 num_alignments Number of alignments for same subject. (int)
110 identities Number of identities (int) if using the XML parser.
111 Tuple of numer of identities/total aligned (int, int)
112 if using the (obsolete) plain text parser.
113 positives Number of positives (int) if using the XML parser.
114 Tuple of numer of positives/total aligned (int, int)
115 if using the (obsolete) plain text parser.
116 gaps Number of gaps (int) if using the XML parser.
117 Tuple of numer of gaps/total aligned (int, int) if
118 using the (obsolete) plain text parser.
119 align_length Length of the alignment. (int)
120 strand Tuple of (query, target) strand.
121 frame Tuple of 1 or 2 frame shifts, depending on the flavor.
122
123 query The query sequence.
124 query_start The start residue for the query sequence. (1-based)
125 query_end The end residue for the query sequence. (1-based)
126 match The match sequence.
127 sbjct The sbjct sequence.
128 sbjct_start The start residue for the sbjct sequence. (1-based)
129 sbjct_end The end residue for the sbjct sequence. (1-based)
130
131 Not all flavors of BLAST return values for every attribute:
132 score expect identities positives strand frame
133 BLASTP X X X X
134 BLASTN X X X X X
135 BLASTX X X X X X
136 TBLASTN X X X X X
137 TBLASTX X X X X X/X
138
139 Note: for BLASTX, the query sequence is shown as a protein sequence,
140 but the numbering is based on the nucleotides. Thus, the numbering
141 is 3x larger than the number of amino acid residues. A similar effect
142 can be seen for the sbjct sequence in TBLASTN, and for both sequences
143 in TBLASTX.
144
145 Also, for negative frames, the sequence numbering starts from
146 query_start and counts down.
147
148 """
150 self.score = None
151 self.bits = None
152 self.expect = None
153 self.num_alignments = None
154 self.identities = (None, None)
155 self.positives = (None, None)
156 self.gaps = (None, None)
157 self.align_length = None
158 self.strand = (None, None)
159 self.frame = ()
160
161 self.query = ''
162 self.query_start = None
163 self.query_end = None
164 self.match = ''
165 self.sbjct = ''
166 self.sbjct_start = None
167 self.sbjct_end = None
168
170 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i"
171 % (self.score, self.bits, self.expect, self.align_length)]
172 if self.align_length < 50:
173 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8),
174 str(self.query),
175 str(self.query_end)))
176 lines.append(" %s"
177 % (str(self.match)))
178 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8),
179 str(self.sbjct),
180 str(self.sbjct_end)))
181 else:
182 lines.append("Query:%s %s...%s %s"
183 % (str(self.query_start).rjust(8),
184 str(self.query)[:45],
185 str(self.query)[-3:],
186 str(self.query_end)))
187 lines.append(" %s...%s"
188 % (str(self.match)[:45],
189 str(self.match)[-3:]))
190 lines.append("Sbjct:%s %s...%s %s"
191 % (str(self.sbjct_start).rjust(8),
192 str(self.sbjct)[:45],
193 str(self.sbjct)[-3:],
194 str(self.sbjct_end)))
195 return "\n".join(lines)
196
197
199 """Holds information about a multiple alignment.
200
201 Members:
202 alignment A list of tuples (name, start residue, sequence, end residue).
203
204 The start residue is 1-based. It may be blank, if that sequence is
205 not aligned in the multiple alignment.
206
207 """
210
212 """Retrieve generic alignment object for the given alignment.
213
214 Instead of the tuples, this returns an Alignment object from
215 Bio.Align.Generic, through which you can manipulate and query
216 the object.
217
218 alphabet is the specified alphabet for the sequences in the code (for
219 example IUPAC.IUPACProtein.
220
221 Thanks to James Casbon for the code.
222 """
223
224 seq_parts = []
225 seq_names = []
226 parse_number = 0
227 n = 0
228 for name, start, seq, end in self.alignment:
229 if name == 'QUERY':
230 parse_number += 1
231 n = 0
232
233 if parse_number == 1:
234 seq_parts.append(seq)
235 seq_names.append(name)
236 else:
237 seq_parts[n] += seq
238 n += 1
239
240 generic = Generic.Alignment(alphabet)
241 for (name,seq) in zip(seq_names,seq_parts):
242 generic.add_sequence(name, seq)
243
244 return generic
245
246
248 """Holds information from a PSI-BLAST round.
249
250 Members:
251 number Round number. (int)
252 reused_seqs Sequences in model, found again. List of Description objects.
253 new_seqs Sequences not found, or below threshold. List of Description.
254 alignments A list of Alignment objects.
255 multiple_alignment A MultipleAlignment object.
256 """
258 self.number = None
259 self.reused_seqs = []
260 self.new_seqs = []
261 self.alignments = []
262 self.multiple_alignment = None
263
264
266 """Holds information about a database report.
267
268 Members:
269 database_name List of database names. (can have multiple dbs)
270 num_letters_in_database Number of letters in the database. (int)
271 num_sequences_in_database List of number of sequences in the database.
272 posted_date List of the dates the databases were posted.
273 ka_params A tuple of (lambda, k, h) values. (floats)
274 gapped # XXX this isn't set right!
275 ka_params_gap A tuple of (lambda, k, h) values. (floats)
276
277 """
286
287
289 """Holds information about the parameters.
290
291 Members:
292 matrix Name of the matrix.
293 gap_penalties Tuple of (open, extend) penalties. (floats)
294 sc_match Match score for nucleotide-nucleotide comparison
295 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison
296 num_hits Number of hits to the database. (int)
297 num_sequences Number of sequences. (int)
298 num_good_extends Number of extensions. (int)
299 num_seqs_better_e Number of sequences better than e-value. (int)
300 hsps_no_gap Number of HSP's better, without gapping. (int)
301 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int)
302 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int)
303 hsps_gapped Total number of HSP's gapped. (int)
304 query_length Length of the query. (int)
305 query_id Identifier of the query sequence. (str)
306 database_length Number of letters in the database. (int)
307 effective_hsp_length Effective HSP length. (int)
308 effective_query_length Effective length of query. (int)
309 effective_database_length Effective length of database. (int)
310 effective_search_space Effective search space. (int)
311 effective_search_space_used Effective search space used. (int)
312 frameshift Frameshift window. Tuple of (int, float)
313 threshold Threshold. (int)
314 window_size Window size. (int)
315 dropoff_1st_pass Tuple of (score, bits). (int, float)
316 gap_x_dropoff Tuple of (score, bits). (int, float)
317 gap_x_dropoff_final Tuple of (score, bits). (int, float)
318 gap_trigger Tuple of (score, bits). (int, float)
319 blast_cutoff Tuple of (score, bits). (int, float)
320 """
350
351
352
353 -class Blast(Header, DatabaseReport, Parameters):
354 """Saves the results from a blast search.
355
356 Members:
357 descriptions A list of Description objects.
358 alignments A list of Alignment objects.
359 multiple_alignment A MultipleAlignment object.
360 + members inherited from base classes
361
362 """
370
371
372 -class PSIBlast(Header, DatabaseReport, Parameters):
373 """Saves the results from a blastpgp search.
374
375 Members:
376 rounds A list of Round objects.
377 converged Whether the search converged.
378 + members inherited from base classes
379
380 """
387