Package Bio :: Package SearchIO :: Package _model :: Module hit
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO._model.hit

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO object to model a single database hit.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from itertools import chain 
 11   
 12  from Bio._py3k import filter 
 13   
 14  from Bio._utils import getattr_str, trim_str 
 15  from Bio.SearchIO._utils import allitems, optionalcascade 
 16   
 17  from ._base import _BaseSearchObject 
 18  from .hsp import HSP 
 19   
 20   
 21  __docformat__ = "restructuredtext en" 
22 23 24 -class Hit(_BaseSearchObject):
25 26 """Class representing a single database hit of a search result. 27 28 Hit objects are the second-level container in the SearchIO module. They 29 are the objects contained within a QueryResult (see QueryResult). They 30 themselves are container for HSP objects and will contain at least one 31 HSP. 32 33 To have a quick look at a Hit and its contents, invoke ``print`` on it:: 34 35 >>> from Bio import SearchIO 36 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 37 >>> hit = qresult[3] 38 >>> print(hit) 39 Query: 33211 40 mir_1 41 Hit: gi|301171322|ref|NR_035857.1| (86) 42 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 43 HSPs: ---- -------- --------- ------ --------------- --------------------- 44 # E-value Bit score Span Query range Hit range 45 ---- -------- --------- ------ --------------- --------------------- 46 0 8.9e-20 100.47 60 [1:61] [13:73] 47 1 3.3e-06 55.39 60 [0:60] [13:73] 48 49 You can invoke ``len`` on a Hit object to see how many HSP objects it contains:: 50 51 >>> len(hit) 52 2 53 54 Hit objects behave very similar to Python lists. You can retrieve the HSP 55 object inside a Hit using the HSP's integer index. Hit objects can also be 56 sliced, which will return a new Hit objects containing only the sliced HSPs:: 57 58 # HSP items inside the Hit can be retrieved using its integer index 59 >>> hit[0] 60 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) 61 62 # slicing returns a new Hit 63 >>> hit 64 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) 65 >>> hit[:1] 66 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) 67 >>> print(hit[1:]) 68 Query: 33211 69 mir_1 70 Hit: gi|301171322|ref|NR_035857.1| (86) 71 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 72 HSPs: ---- -------- --------- ------ --------------- --------------------- 73 # E-value Bit score Span Query range Hit range 74 ---- -------- --------- ------ --------------- --------------------- 75 0 3.3e-06 55.39 60 [0:60] [13:73] 76 77 Hit objects provide ``filter`` and ``map`` methods, which are analogous to 78 Python's built-in ``filter`` and ``map`` except that they return a new Hit 79 object instead of a list. 80 81 Here is an example of using ``filter`` to select for HSPs whose e-value is 82 less than 1e-10:: 83 84 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 85 >>> filtered_hit = hit.filter(evalue_filter) 86 >>> len(hit) 87 2 88 >>> len(filtered_hit) 89 1 90 >>> print(filtered_hit) 91 Query: 33211 92 mir_1 93 Hit: gi|301171322|ref|NR_035857.1| (86) 94 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 95 HSPs: ---- -------- --------- ------ --------------- --------------------- 96 # E-value Bit score Span Query range Hit range 97 ---- -------- --------- ------ --------------- --------------------- 98 0 8.9e-20 100.47 60 [1:61] [13:73] 99 100 There are also other methods which are counterparts of Python lists' methods 101 with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their 102 respective documentations for more details and examples of their usage. 103 104 """ 105 106 # attributes we don't want to transfer when creating a new Hit class 107 # from this one 108 _NON_STICKY_ATTRS = ('_items', ) 109
110 - def __init__(self, hsps=[], id=None, query_id=None):
111 """Initializes a Hit object. 112 113 :param hsps: HSP objects contained in the Hit object 114 :type hsps: iterable yielding HSP 115 :param id: hit ID 116 :type id: string 117 :param query_id: query ID 118 :type query_id: string 119 120 If multiple HSP objects are used for initialization, they must all 121 have the same ``query_id``, ``query_description``, ``hit_id``, and 122 ``hit_description`` properties. 123 """ 124 # default attribute values 125 self._id = id 126 self._id_alt = [] 127 self._query_id = query_id 128 self._description = None 129 self._description_alt = [] 130 self._query_description = None 131 132 for attr in ('query_id', 'query_description', 'hit_id', 133 'hit_description'): 134 # HACK: setting the if clause to '> 1' allows for empty hit objects. 135 # This makes it easier to work with file formats with unpredictable 136 # hit-hsp ordering. The empty hit object itself is nonfunctional, 137 # however, since all its cascading properties are empty. 138 if len(set(getattr(hsp, attr) for hsp in hsps)) > 1: 139 raise ValueError("Hit object can not contain HSPs with " 140 "more than one %s." % attr) 141 142 self._items = [] 143 for hsp in hsps: 144 # validate each HSP 145 self._validate_hsp(hsp) 146 # and store it them as an instance attribute 147 self.append(hsp)
148
149 - def __repr__(self):
150 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, 151 len(self))
152
153 - def __iter__(self):
154 return iter(self.hsps)
155
156 - def __len__(self):
157 return len(self.hsps)
158 159 # Python 3:
160 - def __bool__(self):
161 return bool(self.hsps)
162 163 # Python 2: 164 __nonzero__= __bool__ 165
166 - def __contains__(self, hsp):
167 return hsp in self._items
168
169 - def __str__(self):
170 lines = [] 171 172 # set query id line 173 qid_line = 'Query: %s' % self.query_id 174 if self.query_description: 175 qid_line += trim_str('\n %s' % 176 self.query_description, 80, '...') 177 lines.append(qid_line) 178 179 # set hit id line 180 hid_line = ' Hit: %s' % self.id 181 if hasattr(self, 'seq_len'): 182 hid_line += ' (%i)' % self.seq_len 183 if self.description: 184 hid_line += trim_str('\n %s' % self.description, 185 80, '...') 186 lines.append(hid_line) 187 188 # set hsp line and table 189 if not self.hsps: 190 lines.append(' HSPs: ?') 191 else: 192 lines.append(' HSPs: %s %s %s %s %s %s' % 193 ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21)) 194 pattern = '%11s %8s %9s %6s %15s %21s' 195 lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span', 196 'Query range', 'Hit range')) 197 lines.append(pattern % ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21)) 198 for idx, hsp in enumerate(self.hsps): 199 # evalue 200 evalue = getattr_str(hsp, 'evalue', fmt='%.2g') 201 # bitscore 202 bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f') 203 # alignment length 204 aln_span = getattr_str(hsp, 'aln_span') 205 # query region 206 query_start = getattr_str(hsp, 'query_start') 207 query_end = getattr_str(hsp, 'query_end') 208 query_range = '[%s:%s]' % (query_start, query_end) 209 # max column length is 18 210 query_range = trim_str(query_range, 15, '~]') 211 # hit region 212 hit_start = getattr_str(hsp, 'hit_start') 213 hit_end = getattr_str(hsp, 'hit_end') 214 hit_range = '[%s:%s]' % (hit_start, hit_end) 215 hit_range = trim_str(hit_range, 21, '~]') 216 # append the hsp row 217 lines.append(pattern % (str(idx), evalue, bitscore, aln_span, 218 query_range, hit_range)) 219 220 return '\n'.join(lines)
221
222 - def __getitem__(self, idx):
223 # if key is slice, return a new Hit instance 224 if isinstance(idx, slice): 225 obj = self.__class__(self.hsps[idx]) 226 self._transfer_attrs(obj) 227 return obj 228 return self._items[idx]
229
230 - def __setitem__(self, idx, hsps):
231 # handle case if hsps is a list of hsp 232 if isinstance(hsps, (list, tuple)): 233 for hsp in hsps: 234 self._validate_hsp(hsp) 235 else: 236 self._validate_hsp(hsps) 237 238 self._items[idx] = hsps
239
240 - def __delitem__(self, idx):
241 del self._items[idx]
242 243 # hsp properties #
244 - def _validate_hsp(self, hsp):
245 """Validates an HSP object. 246 247 Valid HSP objects have the same hit_id as the Hit object ID and the 248 same query_id as the Hit object's query_id. 249 250 """ 251 if not isinstance(hsp, HSP): 252 raise TypeError("Hit objects can only contain HSP objects.") 253 # HACK: to make validation during __init__ work 254 if self._items: 255 if self.id is not None: 256 if hsp.hit_id != self.id: 257 raise ValueError("Expected HSP with hit ID %r, " 258 "found %r instead." % (self.id, hsp.hit_id)) 259 else: 260 self.id = hsp.hit_id 261 262 if self.description is not None: 263 if hsp.hit_description != self.description: 264 raise ValueError("Expected HSP with hit description %r, " 265 "found %r instead." % (self.description, 266 hsp.hit_description)) 267 else: 268 self.description = hsp.hit_description 269 270 if self.query_id is not None: 271 if hsp.query_id != self.query_id: 272 raise ValueError("Expected HSP with query ID %r, " 273 "found %r instead." % (self.query_id, hsp.query_id)) 274 else: 275 self.query_id = hsp.query_id 276 277 if self.query_description is not None: 278 if hsp.query_description != self.query_description: 279 raise ValueError("Expected HSP with query description %r, " 280 "found %r instead." % (self.query_description, 281 hsp.query_description)) 282 else: 283 self.query_description = hsp.query_description
284 285 # properties # 286 description = optionalcascade('_description', 'hit_description', 287 """Hit description""") 288 query_description = optionalcascade('_query_description', 289 'query_description', 290 """Description of the query that produced the hit""") 291 id = optionalcascade('_id', 'hit_id', """Hit ID string.""") 292 query_id = optionalcascade('_query_id', 'query_id', 293 """ID string of the query that produced the hit""") 294 # returns all hsps 295 hsps = allitems(doc="""HSP objects contained in the Hit""") 296 297 @property
298 - def id_all(self):
299 """Alternative ID(s) of the Hit""" 300 return [self.id] + self._id_alt
301 302 @property
303 - def description_all(self):
304 """Alternative descriptions of the Hit""" 305 return [self.description] + self._description_alt
306 307 @property
308 - def fragments(self):
309 """HSPFragment objects contained in the Hit""" 310 return [frag for frag in chain(*self._items)]
311 312 # public methods #
313 - def append(self, hsp):
314 """Adds a HSP object to the end of Hit. 315 316 Parameters 317 hsp -- HSP object to append. 318 319 Any HSP object appended must have the same ``hit_id`` property as the 320 Hit object's ``id`` property and the same ``query_id`` property as the 321 Hit object's ``query_id`` property. 322 323 """ 324 self._validate_hsp(hsp) 325 self._items.append(hsp)
326
327 - def filter(self, func=None):
328 """Creates a new Hit object whose HSP objects pass the filter 329 function. 330 331 :param func: function for filtering 332 :type func: callable, accepts HSP, returns bool 333 334 ``filter`` is analogous to Python's built-in ``filter`` function, except 335 that instead of returning a list it returns a ``Hit`` object. Here is an 336 example of using ``filter`` to select for HSPs having bitscores bigger 337 than 60:: 338 339 >>> from Bio import SearchIO 340 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 341 >>> hit = qresult[3] 342 >>> evalue_filter = lambda hsp: hsp.bitscore > 60 343 >>> filtered_hit = hit.filter(evalue_filter) 344 >>> len(hit) 345 2 346 >>> len(filtered_hit) 347 1 348 >>> print(filtered_hit) 349 Query: 33211 350 mir_1 351 Hit: gi|301171322|ref|NR_035857.1| (86) 352 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 353 HSPs: ---- -------- --------- ------ --------------- --------------------- 354 # E-value Bit score Span Query range Hit range 355 ---- -------- --------- ------ --------------- --------------------- 356 0 8.9e-20 100.47 60 [1:61] [13:73] 357 358 """ 359 hsps = list(filter(func, self.hsps)) 360 if hsps: 361 obj = self.__class__(hsps) 362 self._transfer_attrs(obj) 363 return obj
364
365 - def index(self, hsp):
366 """Returns the index of a given HSP object, zero-based. 367 368 :param hsp: object to look up 369 :type hsp: HSP 370 371 """ 372 return self._items.index(hsp)
373
374 - def map(self, func=None):
375 """Creates a new Hit object, mapping the given function to its HSPs. 376 377 :param func: function for mapping 378 :type func: callable, accepts HSP, returns HSP 379 380 ``map`` is analogous to Python's built-in ``map`` function. It is applied to 381 all HSPs contained in the Hit object and returns a new Hit object. 382 383 """ 384 if func is not None: 385 hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy 386 else: 387 hsps = self.hsps[:] 388 if hsps: 389 obj = self.__class__(hsps) 390 self._transfer_attrs(obj) 391 return obj
392
393 - def pop(self, index=-1):
394 """Removes and returns the HSP object at the specified index. 395 396 :param index: index of HSP object to pop 397 :type index: int 398 399 """ 400 return self._items.pop(index)
401
402 - def sort(self, key=None, reverse=False, in_place=True):
403 """Sorts the HSP objects. 404 405 :param key: sorting function 406 :type key: callable, accepts HSP, returns key for sorting 407 :param reverse: whether to reverse sorting results or no 408 :type reverse: bool 409 :param in_place: whether to do in-place sorting or no 410 :type in_place: bool 411 412 ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort`` 413 method. If you set the ``in_place`` argument to False, it will treat 414 return a new, sorted Hit object and keep the initial one unsorted 415 416 """ 417 if in_place: 418 self._items.sort(key=key, reverse=reverse) 419 else: 420 hsps = self.hsps[:] 421 hsps.sort(key=key, reverse=reverse) 422 obj = self.__class__(hsps) 423 self._transfer_attrs(obj) 424 return obj
425 426 427 # if not used as a module, run the doctest 428 if __name__ == "__main__": 429 from Bio._utils import run_doctest 430 run_doctest() 431