1
2
3
4
5
6 """Bio.SearchIO objects to model high scoring regions between query and hit."""
7
8 import warnings
9 from operator import ge, le
10
11 from Bio import BiopythonWarning
12 from Bio.Align import MultipleSeqAlignment
13 from Bio.Alphabet import single_letter_alphabet
14 from Bio.Seq import Seq
15 from Bio.SeqRecord import SeqRecord
16
17 from Bio._utils import getattr_str, trim_str
18 from Bio.SearchIO._utils import singleitem, allitems, fullcascade, \
19 fragcascade
20
21 from _base import _BaseHSP
22
23
25
26 """Class representing high-scoring region(s) between query and hit.
27
28 HSP (high-scoring pair) objects are contained by Hit objects (see Hit).
29 In most cases, HSP objects store the bulk of the statistics and results
30 (e.g. e-value, bitscores, query sequence, etc.) produced by a search
31 program.
32
33 Depending on the search output file format, a given HSP will contain one
34 or more HSPFragment object(s). Examples of search programs that produce HSP
35 with one HSPFragments are BLAST, HMMER, and FASTA. Other programs such as
36 BLAT or Exonerate may produce HSPs containing more than one HSPFragment.
37 However, their native terminologies may differ: in BLAT these fragments
38 are called 'blocks' while in Exonerate they are called exons or NER.
39
40 Here are examples from each type of HSP. The first one comes from a BLAST
41 search:
42
43 >>> from Bio import SearchIO
44 >>> blast_qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next()
45 >>> blast_hsp = blast_qresult[1][0] # the first HSP from the second hit
46 >>> blast_hsp
47 HSP(hit_id='gi|301171311|ref|NR_035856.1|', query_id='33211', 1 fragments)
48 >>> print blast_hsp
49 Query: 33211 mir_1
50 Hit: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b ...
51 Query range: [1:61] (1)
52 Hit range: [0:60] (1)
53 Quick stats: evalue 1.7e-22; bitscore 109.49
54 Fragments: 1 (60 columns)
55 Query - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
56 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
57 Hit - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
58
59 For HSPs with a single HSPFragment, you can invoke `print` on it and see the
60 underlying sequence alignment, if it exists. This is not the case for HSPs
61 with more than one HSPFragment. Below is an example, using an HSP from a
62 BLAT search. Invoking `print` on these HSPs will instead show a table of the
63 HSPFragment objects it contains:
64
65 >>> blat_qresult = SearchIO.read('Blat/mirna.pslx', 'blat-psl', pslx=True)
66 >>> blat_hsp = blat_qresult[1][0] # the first HSP from the second hit
67 >>> blat_hsp
68 HSP(hit_id='chr11', query_id='blat_1', 2 fragments)
69 >>> print blat_hsp
70 Query: blat_1 <unknown description>
71 Hit: chr11 <unknown description>
72 Query range: [42:67] (-1)
73 Hit range: [59018929:59018955] (1)
74 Quick stats: evalue ?; bitscore ?
75 Fragments: --- -------------- ---------------------- ----------------------
76 # Span Query range Hit range
77 --- -------------- ---------------------- ----------------------
78 0 6 [61:67] [59018929:59018935]
79 1 16 [42:58] [59018939:59018955]
80
81 Notice that in HSPs with more than one HSPFragments, the HSP's `query_range`
82 `hit_range` properties encompasses all fragments it contains.
83
84 You can check whether an HSP has more than one HSPFragments or not using the
85 `is_fragmented` property:
86
87 >>> blast_hsp.is_fragmented
88 False
89 >>> blat_hsp.is_fragmented
90 True
91
92 Since HSP objects are also containers similar to Python lists, you can
93 access a single fragment in an HSP using its integer index:
94
95 >>> blat_fragment = blat_hsp[0]
96 >>> print blat_fragment
97 Query: blat_1 <unknown description>
98 Hit: chr11 <unknown description>
99 Query range: [61:67] (-1)
100 Hit range: [59018929:59018935] (1)
101 Fragments: 1 (6 columns)
102 Query - tatagt
103 Hit - tatagt
104
105 This applies to HSPs objects with a single fragment as well:
106
107 >>> blast_fragment = blast_hsp[0]
108 >>> print blast_fragment
109 Query: 33211 mir_1
110 Hit: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b ...
111 Query range: [1:61] (1)
112 Hit range: [0:60] (1)
113 Fragments: 1 (60 columns)
114 Query - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
115 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
116 Hit - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
117
118 Regardless of the search output file format, HSP objects provide the
119 properties listed below. These properties always return values in a list,
120 due to the HSP object itself being a list-like container. However, for
121 HSP objects with a single HSPFragment, shortcut properties that fetches
122 the item from the list are also provided.
123
124 +----------------------+---------------------+-----------------------------+
125 | Property | Shortcut | Value |
126 +======================+=====================+=============================+
127 | aln_all | aln | HSP alignments as |
128 | | | MultipleSeqAlignment object |
129 +----------------------+---------------------+-----------------------------+
130 | aln_annotation_all | aln_annotation | dictionary of annotation(s) |
131 | | | of all fragments' alignments|
132 +----------------------+---------------------+-----------------------------+
133 | fragments | fragment | HSPFragment objects |
134 +----------------------+---------------------+-----------------------------+
135 | hit_all | hit | hit sequence as SeqRecord |
136 | | | objects |
137 +----------------------+---------------------+-----------------------------+
138 | hit_features_all | hit_features | SeqFeatures of all hit |
139 | | | fragments |
140 +----------------------+---------------------+-----------------------------+
141 | hit_start_all | hit_start* | start coordinates of the |
142 | | | hit fragments |
143 +----------------------+---------------------+-----------------------------+
144 | hit_end_all | hit_end* | end coordinates of the hit |
145 | | | fragments |
146 +----------------------+---------------------+-----------------------------+
147 | hit_span_all | hit_span* | sizes of each hit fragments |
148 +----------------------+---------------------+-----------------------------+
149 | hit_strand_all | hit_strand | strand orientations of the |
150 | | | hit fragments |
151 +----------------------+---------------------+-----------------------------+
152 | hit_frame_all | hit_frame | reading frames of the hit |
153 | | | fragments |
154 +----------------------+---------------------+-----------------------------+
155 | hit_range_all | hit_range | tuples of start and end |
156 | | | coordinates of each hit |
157 | | | fragment |
158 +----------------------+---------------------+-----------------------------+
159 | query_all | query | query sequence as SeqRecord |
160 | | | object |
161 +----------------------+---------------------+-----------------------------+
162 | query_features_all | query_features | SeqFeatures of all query |
163 | | | fragments |
164 +----------------------+---------------------+-----------------------------+
165 | query_start_all | query_start* | start coordinates of the |
166 | | | fragments |
167 +----------------------+---------------------+-----------------------------+
168 | query_end_all | query_end* | end coordinates of the |
169 | | | query fragments |
170 +----------------------+---------------------+-----------------------------+
171 | query_span_all | query_span* | sizes of each query |
172 | | | fragments |
173 +----------------------+---------------------+-----------------------------+
174 | query_strand_all | query_strand | strand orientations of the |
175 | | | query fragments |
176 +----------------------+---------------------+-----------------------------+
177 | query_frame_all | query_frame | reading frames of the query |
178 | | | fragments |
179 +----------------------+---------------------+-----------------------------+
180 | query_range_all | query_range | tuples of start and end |
181 | | | coordinates of each query |
182 | | | fragment |
183 +----------------------+---------------------+-----------------------------+
184 * may be used in HSPs with multiple fragments
185
186 For all types of HSP objects, the property will return the values in a list.
187 Shorcuts are only applicable for HSPs with one fragment. Except the ones
188 noted, if they are used on an HSP with more than one fragments, an exception
189 will be raised.
190
191 For properties that may be used in HSPs with multiple or single fragments
192 (`*_start`, `*_end`, and `*_span` properties), their interpretation depends
193 on how many fragment the HSP has:
194
195 +------------+---------------------------------------------------+
196 | Property | Value |
197 +============+===================================================+
198 | hit_start | smallest coordinate value of all hit fragments |
199 +------------+---------------------------------------------------+
200 | hit_end | largest coordinate value of all hit fragments |
201 +------------+---------------------------------------------------+
202 | hit_span | difference between `hit_start` and `hit_end` |
203 +------------+---------------------------------------------------+
204 | query_start| smallest coordinate value of all query fragments |
205 +------------+---------------------------------------------------+
206 | query_end | largest coordinate value of all query fragments |
207 +------------+---------------------------------------------------+
208 | query_span | difference between `query_start` and `query_end` |
209 +------------+---------------------------------------------------+
210
211 In addition to the objects listed above, HSP objects also provide the
212 following properties:
213
214 +--------------------+------------------------------------------------------+
215 | Property | Value |
216 +====================+======================================================+
217 | aln_span | total number of residues in all HSPFragment objects |
218 +--------------------+------------------------------------------------------+
219 | alphabet | alphabet used in hit and query SeqRecord objects |
220 +--------------------+------------------------------------------------------+
221 | is_fragmented | boolean, whether there are multiple fragments or not |
222 +--------------------+------------------------------------------------------+
223 | hit_id | ID of the hit sequence |
224 +--------------------+------------------------------------------------------+
225 | hit_description | description of the hit sequence |
226 +--------------------+------------------------------------------------------+
227 | hit_inter_ranges | list of hit sequence coordinates of the regions |
228 | | between fragments |
229 +--------------------+------------------------------------------------------+
230 | hit_inter_spans | list of lengths of the regions between hit fragments |
231 +--------------------+------------------------------------------------------+
232 | query_id | ID of the query sequence |
233 +--------------------+------------------------------------------------------+
234 | query_description | description of the query sequence |
235 +--------------------+------------------------------------------------------+
236 | query_inter_ranges | list of query sequence coordinates of the regions |
237 | | between fragments |
238 +--------------------+------------------------------------------------------+
239 | query_inter_spans | list of lengths of the regions between query |
240 | |fragments |
241 +--------------------+------------------------------------------------------+
242
243 """
244
245
246 _NON_STICKY_ATTRS = ('_items', )
247
249 """Initializes an HSP object.
250
251 Arguments:
252 fragments -- List of HSPFragment objects.
253
254 HSP objects must be initialized with a list containing at least one
255 HSPFragment object. If multiple HSPFragment objects are used for
256 initialization, they must all have the same `query_id`,
257 `query_description`, `hit_id`, `hit_description`, and alphabet
258 properties.
259
260 """
261 if not fragments:
262 raise ValueError("HSP objects must have at least one HSPFragment "
263 "object.")
264
265 for attr in ('query_id', 'query_description', 'hit_id',
266 'hit_description', 'alphabet'):
267 if len(set([getattr(frag, attr) for frag in fragments])) != 1:
268 raise ValueError("HSP object can not contain fragments with "
269 "more than one %s." % attr)
270
271 self._items = []
272 for fragment in fragments:
273 self._validate_fragment(fragment)
274 self._items.append(fragment)
275
277 return "%s(hit_id=%r, query_id=%r, %r fragments)" % \
278 (self.__class__.__name__, self.hit_id, self.query_id, len(self))
279
281 return iter(self._items)
282
285
287 return len(self._items)
288
290 return bool(self._items)
291
293
294 lines = []
295
296 statline = []
297
298 evalue = getattr_str(self, 'evalue', fmt='%.2g')
299 statline.append('evalue ' + evalue)
300
301 bitscore = getattr_str(self, 'bitscore', fmt='%.2f')
302 statline.append('bitscore ' + bitscore)
303 lines.append('Quick stats: ' + '; '.join(statline))
304
305 if len(self.fragments) == 1:
306 return '\n'.join([self._str_hsp_header(), '\n'.join(lines),
307 self.fragments[0]._str_aln()])
308 else:
309 lines.append(' Fragments: %s %s %s %s' %
310 ('-'*3, '-'*14, '-'*22, '-'*22))
311 pattern = '%16s %14s %22s %22s'
312 lines.append(pattern % ('#', 'Span', 'Query range', 'Hit range'))
313 lines.append(pattern % ('-'*3, '-'*14, '-'*22, '-'*22))
314 for idx, block in enumerate(self.fragments):
315
316
317 aln_span = getattr_str(block, 'aln_span')
318
319 query_start = getattr_str(block, 'query_start')
320 query_end = getattr_str(block, 'query_end')
321 query_range = '[%s:%s]' % (query_start, query_end)
322
323 query_range = trim_str(query_range, 22, '~]')
324
325 hit_start = getattr_str(block, 'hit_start')
326 hit_end = getattr_str(block, 'hit_end')
327 hit_range = '[%s:%s]' % (hit_start, hit_end)
328 hit_range = trim_str(hit_range, 22, '~]')
329
330 lines.append(pattern % (str(idx), aln_span, query_range, hit_range))
331
332 return self._str_hsp_header() + '\n' + '\n'.join(lines)
333
335
336 if isinstance(idx, slice):
337 obj = self.__class__(self._items[idx])
338 self._transfer_attrs(obj)
339 return obj
340 return self._items[idx]
341
351
353
354
355 del self._items[idx]
356
358 if not isinstance(fragment, HSPFragment):
359 raise TypeError("HSP objects can only contain HSPFragment "
360 "objects.")
361
367
368 aln_span = property(fget=_aln_span_get,
369 doc="""Total number of columns in all HSPFragment objects.""")
370
371
373 assert seq_type in ('hit', 'query')
374 assert coord_type in ('start', 'end')
375 coord_name = '%s_%s' % (seq_type, coord_type)
376 coords = [getattr(frag, coord_name) for frag in self.fragments]
377 if None in coords:
378 warnings.warn("'None' exist in %s coordinates; ignored" %
379 (coord_name), BiopythonWarning)
380 return coords
381
384
385 hit_start = property(fget=_hit_start_get,
386 doc="""Smallest coordinate value of all hit fragments""")
387
390
391 query_start = property(fget=_query_start_get,
392 doc="""Smallest coordinate value of all query fragments""")
393
396
397 hit_end = property(fget=_hit_end_get,
398 doc="""Largest coordinate value of all hit fragments""")
399
402
403 query_end = property(fget=_query_end_get,
404 doc="""Largest coordinate value of all hit fragments""")
405
406
408 try:
409 return self.hit_end - self.hit_start
410 except TypeError:
411 return None
412
413 hit_span = property(fget=_hit_span_get,
414 doc="""The number of hit residues covered by the HSP.""")
415
421
422 query_span = property(fget=_query_span_get,
423 doc="""The number of query residues covered by the HSP.""")
424
427
428 hit_range = property(fget=_hit_range_get,
429 doc="""Tuple of HSP hit start and end coordinates.""")
430
433
434 query_range = property(fget=_query_range_get,
435 doc="""Tuple of HSP query start and end coordinates.""")
436
438
439 assert seq_type in ('query', 'hit')
440 strand = getattr(self, '%s_strand_all' % seq_type)[0]
441 coords = getattr(self, '%s_range_all' % seq_type)
442
443
444
445 if strand == -1:
446 startfunc, endfunc = min, max
447 else:
448 startfunc, endfunc = max, min
449 inter_coords = []
450 for idx, coord in enumerate(coords[:-1]):
451 start = startfunc(coords[idx])
452 end = endfunc(coords[idx+1])
453 inter_coords.append((min(start, end), max(start, end)))
454
455 return inter_coords
456
459
460 hit_inter_ranges = property(fget=_hit_inter_ranges_get,
461 doc="""Hit sequence coordinates of the regions between fragments""")
462
465
466 query_inter_ranges = property(fget=_query_inter_ranges_get,
467 doc="""Query sequence coordinates of the regions between fragments""")
468
470 assert seq_type in ('query', 'hit')
471 attr_name = '%s_inter_ranges' % seq_type
472 return [coord[1] - coord[0] for coord in getattr(self, attr_name)]
473
476
477 hit_inter_spans = property(fget=_hit_inter_spans_get,
478 doc="""Lengths of regions between hit fragments""")
479
482
483 query_inter_spans = property(fget=_query_inter_spans_get,
484 doc="""Lengths of regions between query fragments""")
485
486
487
488
489 is_fragmented = property(lambda self: len(self) > 1,
490 doc="""Whether the HSP has more than one HSPFragment objects""")
491
492
493 hit_description = fullcascade('hit_description',
494 doc="""Description of the hit sequence""")
495
496 query_description = fullcascade('query_description',
497 doc="""Description of the query sequence""")
498
499 hit_id = fullcascade('hit_id',
500 doc="""ID of the hit sequence""")
501
502 query_id = fullcascade('query_id',
503 doc="""ID of the query sequence""")
504
505 alphabet = fullcascade('alphabet',
506 doc="""Alphabet used in hit and query SeqRecord objects""")
507
508
509 fragment = singleitem(
510 doc="""HSPFragment object, first fragment""")
511
512 hit = singleitem('hit',
513 doc="""Hit sequence as a SeqRecord object, first fragment""")
514
515 query = singleitem('query',
516 doc="""Query sequence as a SeqRecord object, first fragment""")
517
518 aln = singleitem('aln',
519 doc="""Alignment of the first fragment as a MultipleSeqAlignment object""")
520
521 aln_annotation = singleitem('aln_annotation',
522 doc="""Dictionary of annotation(s) of the first fragment's alignment""")
523
524 hit_features = singleitem('hit_features',
525 doc="""Hit sequence features, first fragment""")
526
527 query_features = singleitem('query_features',
528 doc="""Query sequence features, first fragment""")
529
530 hit_strand = singleitem('hit_strand',
531 doc="""Hit strand orientation, first fragment""")
532
533 query_strand = singleitem('query_strand',
534 doc="""Query strand orientation, first fragment""")
535
536 hit_frame = singleitem('hit_frame',
537 doc="""Hit sequence reading frame, first fragment""")
538
539 query_frame = singleitem('query_frame',
540 doc="""Query sequence reading frame, first fragment""")
541
542
543 fragments = allitems(doc="""List of all HSPFragment objects""")
544
545 hit_all = allitems('hit',
546 doc="""List of all fragments' hit sequences as SeqRecord objects""")
547
548 query_all = allitems('query',
549 doc="""List of all fragments' query sequences as SeqRecord objects""")
550
551 aln_all = allitems('aln',
552 doc="""List of all fragments' alignments as MultipleSeqAlignment objects""")
553
554 aln_annotation_all = allitems('aln_annotation',
555 doc="""Dictionary of annotation(s) of all fragments' alignments""")
556
557 hit_features_all = allitems('hit_features',
558 doc="""List of all hit sequence features""")
559
560 query_features_all = allitems('query_features',
561 doc="""List of all query sequence features""")
562
563 hit_strand_all = allitems('hit_strand',
564 doc="""List of all fragments' hit sequence strands""")
565
566 query_strand_all = allitems('query_strand',
567 doc="""List of all fragments' query sequence strands""")
568
569 hit_frame_all = allitems('hit_frame',
570 doc="""List of all fragments' hit sequence reading frames""")
571
572 query_frame_all = allitems('query_frame',
573 doc="""List of all fragments' query sequence reading frames""")
574
575 hit_start_all = allitems('hit_start',
576 doc="""List of all fragments' hit start coordinates""")
577
578 query_start_all = allitems('query_starts',
579 doc="""List of all fragments' query start coordinates""")
580
581 hit_end_all = allitems('hit_ends',
582 doc="""List of all fragments' hit end coordinates""")
583
584 query_end_all = allitems('query_ends',
585 doc="""List of all fragments' query end coordinates""")
586
587 hit_span_all = allitems('hit_span',
588 doc="""List of all fragments' hit sequence size""")
589
590 query_span_all = allitems('query_span',
591 doc="""List of all fragments' query sequence size""")
592
593 hit_range_all = allitems('hit_range',
594 doc="""List of all fragments' hit start and end coordinates""")
595
596 query_range_all = allitems('query_range',
597 doc="""List of all fragments' query start and end coordinates""")
598
599
601
602 """Class representing a contiguous alignment of hit-query sequence.
603
604 HSPFragment forms the core of any parsed search output file. Depending on
605 the search output file format, it may contain the actual query and/or hit
606 sequences that produces the search hits. These sequences are stored as
607 SeqRecord objects (see SeqRecord):
608
609 >>> from Bio import SearchIO
610 >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next()
611 >>> fragment = qresult[0][0][0] # first hit, first hsp, first fragment
612 >>> print fragment
613 Query: 33211 mir_1
614 Hit: gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 520b (MIR520...
615 Query range: [0:61] (1)
616 Hit range: [0:61] (1)
617 Fragments: 1 (61 columns)
618 Query - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
619 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
620 Hit - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
621
622 # the query sequence is a SeqRecord object
623 >>> fragment.query.__class__
624 <class 'Bio.SeqRecord.SeqRecord'>
625 >>> print fragment.query
626 ID: 33211
627 Name: aligned query sequence
628 Description: mir_1
629 Number of features: 0
630 Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG', DNAAlphabet())
631
632 # the hit sequence is a SeqRecord object as well
633 >>> fragment.hit.__class__
634 <class 'Bio.SeqRecord.SeqRecord'>
635 >>> print fragment.hit
636 ID: gi|262205317|ref|NR_030195.1|
637 Name: aligned hit sequence
638 Description: Homo sapiens microRNA 520b (MIR520B), microRNA
639 Number of features: 0
640 Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG', DNAAlphabet())
641
642 # when both query and hit are present, we get a MultipleSeqAlignment object
643 >>> fragment.aln.__class__
644 <class 'Bio.Align.MultipleSeqAlignment'>
645 >>> print fragment.aln
646 DNAAlphabet() alignment with 2 rows and 61 columns
647 CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG 33211
648 CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG gi|262205317|ref|NR_030195.1|
649
650 """
651
654
655 self._alphabet = alphabet
656 self.aln_annotation = {}
657
658 self._hit_id = hit_id
659 self._query_id = query_id
660
661 for seq_type in ('query', 'hit'):
662
663 setattr(self, '_%s_description' % seq_type, '<unknown description>')
664 setattr(self, '_%s_features' % seq_type, [])
665
666 for attr in ('strand', 'frame', 'start', 'end'):
667 setattr(self, '%s_%s' % (seq_type, attr), None)
668
669 if eval(seq_type):
670 setattr(self, seq_type, eval(seq_type))
671 else:
672 setattr(self, seq_type, None)
673
675 info = "hit_id=%r, query_id=%r" % (self.hit_id, self.query_id)
676 try:
677 info += ", %i columns" % len(self)
678 except AttributeError:
679 pass
680 return "%s(%s)" % (self.__class__.__name__, info)
681
684
687
689 if self.aln is not None:
690 obj = self.__class__(
691 hit_id=self.hit_id, query_id=self.query_id,
692 alphabet=self.alphabet)
693
694
695
696 if self.query is not None:
697 obj.query = self.query[idx]
698 obj.query_features = obj.query.features
699 if self.hit is not None:
700 obj.hit = self.hit[idx]
701 obj.hit_features = obj.hit.features
702
703 for attr in ('description', 'strand', 'frame'):
704 for seq_type in ('hit', 'query'):
705 attr_name = '%s_%s' % (seq_type, attr)
706 self_val = getattr(self, attr_name)
707 setattr(obj, attr_name, self_val)
708
709
710 obj.aln_annotation = {}
711 for key, value in self.aln_annotation.items():
712 assert len(value[idx]) == len(obj)
713 obj.aln_annotation[key] = value[idx]
714 return obj
715 else:
716 raise TypeError("Slicing for HSP objects without "
717 "alignment is not supported.")
718
720 lines = []
721
722 aln_span = getattr_str(self, 'aln_span')
723 lines.append(' Fragments: 1 (%s columns)' % aln_span)
724
725 if self.query is not None and self.hit is not None:
726 try:
727 qseq = str(self.query.seq)
728 except AttributeError:
729 qseq = '?'
730 try:
731 hseq = str(self.hit.seq)
732 except AttributeError:
733 hseq = '?'
734
735
736 homol = ''
737 if 'homology' in self.aln_annotation:
738 homol = self.aln_annotation['homology']
739
740 if self.aln_span <= 67:
741 lines.append("%10s - %s" % ('Query', qseq))
742 if homol:
743 lines.append(" %s" % homol)
744 lines.append("%10s - %s" % ('Hit', hseq))
745 else:
746
747
748 if self.aln_span - 66 > 3:
749 cont = '~' * 3
750 else:
751 cont = '~' * (self.aln_span - 66)
752 lines.append("%10s - %s%s%s" % ('Query',
753 qseq[:59], cont, qseq[-5:]))
754 if homol:
755 lines.append(" %s%s%s" %
756 (homol[:59], cont, homol[-5:]))
757 lines.append("%10s - %s%s%s" % ('Hit',
758 hseq[:59], cont, hseq[-5:]))
759
760 return '\n'.join(lines)
761
762
764 """Checks the given sequence for attribute setting
765
766 Arguments:
767 seq -- String or SeqRecord to check
768 seq_type -- String of sequence type, must be 'hit' or 'query'
769
770 """
771 assert seq_type in ('hit', 'query')
772 if seq is None:
773 return seq
774 else:
775 if not isinstance(seq, (basestring, SeqRecord)):
776 raise TypeError("%s sequence must be a string or a SeqRecord"
777 " object." % seq_type)
778
779 opp_type = 'hit' if seq_type == 'query' else 'query'
780 opp_seq = getattr(self, '_%s' % opp_type, None)
781 if opp_seq is not None:
782 if len(seq) != len(opp_seq):
783 raise ValueError("Sequence lengths do not match. Expected: "
784 "%r (%s); found: %r (%s)." % (len(opp_seq), opp_type,
785 len(seq), seq_type))
786
787 seq_id = getattr(self, '%s_id' % seq_type)
788 seq_desc = getattr(self, '%s_description' % seq_type)
789 seq_feats = getattr(self, '%s_features' % seq_type)
790 seq_name = 'aligned %s sequence' % seq_type
791
792 if isinstance(seq, SeqRecord):
793 seq.id = seq_id
794 seq.description = seq_desc
795 seq.name = seq_name
796 seq.features = seq_feats
797 seq.seq.alphabet = self.alphabet
798 elif isinstance(seq, basestring):
799 seq = SeqRecord(Seq(seq, self.alphabet), id=seq_id, name=seq_name,
800 description=seq_desc, features=seq_feats)
801
802 return seq
803
806
809
810 hit = property(fget=_hit_get, fset=_hit_set,
811 doc="""Hit sequence as a SeqRecord object, defaults to None""")
812
815
818
819 query = property(fget=_query_get, fset=_query_set,
820 doc="""Query sequence as a SeqRecord object, defaults to None""")
821
831
832 aln = property(fget=_aln_get,
833 doc="""Query-hit alignment as a MultipleSeqAlignment object,
834 defaults to None""")
835
837 return self._alphabet
838
849
850 alphabet = property(fget=_alphabet_get, fset=_alphabet_set,
851 doc="""Alphabet object used in the fragment's sequences and alignment,
852 defaults to single_letter_alphabet""")
853
855
856
857
858 if not hasattr(self, '_aln_span'):
859 if self.query is not None:
860 self._aln_span = len(self.query)
861 elif self.hit is not None:
862 self._aln_span = len(self.hit)
863
864 return self._aln_span
865
867 self._aln_span = value
868
869 aln_span = property(fget=_aln_span_get, fset=_aln_span_set,
870 doc="""The number of alignment columns covered by the fragment""")
871
872
873 hit_description = fragcascade('description', 'hit',
874 doc="""Hit sequence description""")
875
876 query_description = fragcascade('description', 'query',
877 doc="""Query sequence description""")
878
879 hit_id = fragcascade('id', 'hit',
880 doc="""Hit sequence ID""")
881
882 query_id = fragcascade('id', 'query',
883 doc="""Query sequence ID""")
884
885 hit_features = fragcascade('features', 'hit',
886 doc="""Hit sequence features""")
887
888 query_features = fragcascade('features', 'query',
889 doc="""Query sequence features""")
890
891
893
894 if not strand in (-1, 0, 1, None):
895 raise ValueError("Strand should be -1, 0, 1, or None; not %r" %
896 strand)
897 return strand
898
900 assert seq_type in ('hit', 'query')
901 strand = getattr(self, '_%s_strand' % seq_type)
902
903 if strand is None:
904
905 frame = getattr(self, '%s_frame' % seq_type)
906 if frame is not None:
907 try:
908 strand = frame // abs(frame)
909 except ZeroDivisionError:
910 strand = 0
911 setattr(self, '%s_strand' % seq_type, strand)
912
913 return strand
914
917
920
921 hit_strand = property(fget=_hit_strand_get, fset=_hit_strand_set,
922 doc="""Hit sequence strand, defaults to None""")
923
926
929
930 query_strand = property(fget=_query_strand_get, fset=_query_strand_set,
931 doc="""Query sequence strand, defaults to None""")
932
933
935 if not frame in (-3, -2, -1, 0, 1, 2, 3, None):
936 raise ValueError("Strand should be an integer between -3 and 3, "
937 "or None; not %r" % frame)
938 return frame
939
941 return self._hit_frame
942
945
946 hit_frame = property(fget=_hit_frame_get, fset=_hit_frame_set,
947 doc="""Hit sequence reading frame, defaults to None""")
948
950 return self._query_frame
951
954
955 query_frame = property(fget=_query_frame_get, fset=_query_frame_set,
956 doc="""Query sequence reading frame, defaults to None""")
957
958
960
961 if coord is None:
962 return coord
963 assert isinstance(coord, int)
964
965 try:
966 opp_coord = getattr(self, opp_coord_name)
967 except AttributeError:
968 return coord
969
970 if opp_coord is None:
971 return coord
972
973 else:
974 assert op(coord, opp_coord)
975 return coord
976
978 return self._hit_start
979
982
983 hit_start = property(fget=_hit_start_get, fset=_hit_start_set,
984 doc="""Hit sequence start coordinate, defaults to None""")
985
987 return self._query_start
988
991
992 query_start = property(fget=_query_start_get, fset=_query_start_set,
993 doc="""Query sequence start coordinate, defaults to None""")
994
997
1000
1001 hit_end = property(fget=_hit_end_get, fset=_hit_end_set,
1002 doc="""Hit sequence start coordinate, defaults to None""")
1003
1005 return self._query_end
1006
1009
1010 query_end = property(fget=_query_end_get, fset=_query_end_set,
1011 doc="""Query sequence end coordinate, defaults to None""")
1012
1013
1015 try:
1016 return self.hit_end - self.hit_start
1017 except TypeError:
1018 return None
1019
1020 hit_span = property(fget=_hit_span_get,
1021 doc="""The number of residues covered by the hit sequence""")
1022
1028
1029 query_span = property(fget=_query_span_get,
1030 doc="""The number of residues covered by the query sequence""")
1031
1034
1035 hit_range = property(fget=_hit_range_get,
1036 doc="""Tuple of hit start and end coordinates""")
1037
1040
1041 query_range = property(fget=_query_range_get,
1042 doc="""Tuple of query start and end coordinates""")
1043
1044
1045
1046 if __name__ == "__main__":
1047 from Bio._utils import run_doctest
1048 run_doctest()
1049