Package Bio :: Package SearchIO :: Package _model :: Module hit
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO._model.hit

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO object to model a single database hit.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from itertools import chain 
 11   
 12  from Bio._py3k import filter 
 13   
 14  from Bio._utils import getattr_str, trim_str 
 15  from Bio.SearchIO._utils import allitems, optionalcascade 
 16   
 17  from ._base import _BaseSearchObject 
 18  from .hsp import HSP 
19 20 21 -class Hit(_BaseSearchObject):
22 23 """Class representing a single database hit of a search result. 24 25 Hit objects are the second-level container in the SearchIO module. They 26 are the objects contained within a QueryResult (see QueryResult). They 27 themselves are container for HSP objects and will contain at least one 28 HSP. 29 30 To have a quick look at a Hit and its contents, invoke ``print`` on it:: 31 32 >>> from Bio import SearchIO 33 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 34 >>> hit = qresult[3] 35 >>> print(hit) 36 Query: 33211 37 mir_1 38 Hit: gi|301171322|ref|NR_035857.1| (86) 39 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 40 HSPs: ---- -------- --------- ------ --------------- --------------------- 41 # E-value Bit score Span Query range Hit range 42 ---- -------- --------- ------ --------------- --------------------- 43 0 8.9e-20 100.47 60 [1:61] [13:73] 44 1 3.3e-06 55.39 60 [0:60] [13:73] 45 46 You can invoke ``len`` on a Hit object to see how many HSP objects it contains:: 47 48 >>> len(hit) 49 2 50 51 Hit objects behave very similar to Python lists. You can retrieve the HSP 52 object inside a Hit using the HSP's integer index. Hit objects can also be 53 sliced, which will return a new Hit objects containing only the sliced HSPs:: 54 55 # HSP items inside the Hit can be retrieved using its integer index 56 >>> hit[0] 57 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) 58 59 # slicing returns a new Hit 60 >>> hit 61 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) 62 >>> hit[:1] 63 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) 64 >>> print(hit[1:]) 65 Query: 33211 66 mir_1 67 Hit: gi|301171322|ref|NR_035857.1| (86) 68 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 69 HSPs: ---- -------- --------- ------ --------------- --------------------- 70 # E-value Bit score Span Query range Hit range 71 ---- -------- --------- ------ --------------- --------------------- 72 0 3.3e-06 55.39 60 [0:60] [13:73] 73 74 Hit objects provide ``filter`` and ``map`` methods, which are analogous to 75 Python's built-in ``filter`` and ``map`` except that they return a new Hit 76 object instead of a list. 77 78 Here is an example of using ``filter`` to select for HSPs whose e-value is 79 less than 1e-10:: 80 81 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 82 >>> filtered_hit = hit.filter(evalue_filter) 83 >>> len(hit) 84 2 85 >>> len(filtered_hit) 86 1 87 >>> print(filtered_hit) 88 Query: 33211 89 mir_1 90 Hit: gi|301171322|ref|NR_035857.1| (86) 91 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 92 HSPs: ---- -------- --------- ------ --------------- --------------------- 93 # E-value Bit score Span Query range Hit range 94 ---- -------- --------- ------ --------------- --------------------- 95 0 8.9e-20 100.47 60 [1:61] [13:73] 96 97 There are also other methods which are counterparts of Python lists' methods 98 with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their 99 respective documentations for more details and examples of their usage. 100 101 """ 102 103 # attributes we don't want to transfer when creating a new Hit class 104 # from this one 105 _NON_STICKY_ATTRS = ('_items', ) 106
107 - def __init__(self, hsps=(), id=None, query_id=None):
108 """Initializes a Hit object. 109 110 :param hsps: HSP objects contained in the Hit object 111 :type hsps: iterable yielding HSP 112 :param id: hit ID 113 :type id: string 114 :param query_id: query ID 115 :type query_id: string 116 117 If multiple HSP objects are used for initialization, they must all 118 have the same ``query_id``, ``query_description``, ``hit_id``, and 119 ``hit_description`` properties. 120 """ 121 # default attribute values 122 self._id = id 123 self._id_alt = [] 124 self._query_id = query_id 125 self._description = None 126 self._description_alt = [] 127 self._query_description = None 128 129 # TODO - Move this into the for look below in case 130 # hsps is a single use iterator? 131 for attr in ('query_id', 'query_description', 'hit_id', 132 'hit_description'): 133 # HACK: setting the if clause to '> 1' allows for empty hit objects. 134 # This makes it easier to work with file formats with unpredictable 135 # hit-hsp ordering. The empty hit object itself is nonfunctional, 136 # however, since all its cascading properties are empty. 137 if len(set(getattr(hsp, attr) for hsp in hsps)) > 1: 138 raise ValueError("Hit object can not contain HSPs with " 139 "more than one %s." % attr) 140 141 self._items = [] 142 for hsp in hsps: 143 # validate each HSP 144 self._validate_hsp(hsp) 145 # and store it them as an instance attribute 146 self.append(hsp)
147
148 - def __repr__(self):
149 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, 150 len(self))
151
152 - def __iter__(self):
153 return iter(self.hsps)
154
155 - def __len__(self):
156 return len(self.hsps)
157 158 # Python 3:
159 - def __bool__(self):
160 return bool(self.hsps)
161 162 # Python 2: 163 __nonzero__ = __bool__ 164
165 - def __contains__(self, hsp):
166 return hsp in self._items
167
168 - def __str__(self):
169 lines = [] 170 171 # set query id line 172 qid_line = 'Query: %s' % self.query_id 173 if self.query_description: 174 qid_line += trim_str('\n %s' % 175 self.query_description, 80, '...') 176 lines.append(qid_line) 177 178 # set hit id line 179 hid_line = ' Hit: %s' % self.id 180 if hasattr(self, 'seq_len'): 181 hid_line += ' (%i)' % self.seq_len 182 if self.description: 183 hid_line += trim_str('\n %s' % self.description, 184 80, '...') 185 lines.append(hid_line) 186 187 # set hsp line and table 188 if not self.hsps: 189 lines.append(' HSPs: ?') 190 else: 191 lines.append(' HSPs: %s %s %s %s %s %s' % 192 ('-' * 4, '-' * 8, '-' * 9, '-' * 6, '-' * 15, '-' * 21)) 193 pattern = '%11s %8s %9s %6s %15s %21s' 194 lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span', 195 'Query range', 'Hit range')) 196 lines.append(pattern % ('-' * 4, '-' * 8, '-' * 9, '-' * 6, '-' * 15, '-' * 21)) 197 for idx, hsp in enumerate(self.hsps): 198 # evalue 199 evalue = getattr_str(hsp, 'evalue', fmt='%.2g') 200 # bitscore 201 bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f') 202 # alignment length 203 aln_span = getattr_str(hsp, 'aln_span') 204 # query region 205 query_start = getattr_str(hsp, 'query_start') 206 query_end = getattr_str(hsp, 'query_end') 207 query_range = '[%s:%s]' % (query_start, query_end) 208 # max column length is 18 209 query_range = trim_str(query_range, 15, '~]') 210 # hit region 211 hit_start = getattr_str(hsp, 'hit_start') 212 hit_end = getattr_str(hsp, 'hit_end') 213 hit_range = '[%s:%s]' % (hit_start, hit_end) 214 hit_range = trim_str(hit_range, 21, '~]') 215 # append the hsp row 216 lines.append(pattern % (str(idx), evalue, bitscore, aln_span, 217 query_range, hit_range)) 218 219 return '\n'.join(lines)
220
221 - def __getitem__(self, idx):
222 # if key is slice, return a new Hit instance 223 if isinstance(idx, slice): 224 obj = self.__class__(self.hsps[idx]) 225 self._transfer_attrs(obj) 226 return obj 227 return self._items[idx]
228
229 - def __setitem__(self, idx, hsps):
230 # handle case if hsps is a list of hsp 231 if isinstance(hsps, (list, tuple)): 232 for hsp in hsps: 233 self._validate_hsp(hsp) 234 else: 235 self._validate_hsp(hsps) 236 237 self._items[idx] = hsps
238
239 - def __delitem__(self, idx):
240 del self._items[idx]
241 242 # hsp properties #
243 - def _validate_hsp(self, hsp):
244 """Validates an HSP object. 245 246 Valid HSP objects have the same hit_id as the Hit object ID and the 247 same query_id as the Hit object's query_id. 248 249 """ 250 if not isinstance(hsp, HSP): 251 raise TypeError("Hit objects can only contain HSP objects.") 252 # HACK: to make validation during __init__ work 253 if self._items: 254 if self.id is not None: 255 if hsp.hit_id != self.id: 256 raise ValueError("Expected HSP with hit ID %r, " 257 "found %r instead." % (self.id, hsp.hit_id)) 258 else: 259 self.id = hsp.hit_id 260 261 if self.description is not None: 262 if hsp.hit_description != self.description: 263 raise ValueError("Expected HSP with hit description %r, " 264 "found %r instead." % (self.description, 265 hsp.hit_description)) 266 else: 267 self.description = hsp.hit_description 268 269 if self.query_id is not None: 270 if hsp.query_id != self.query_id: 271 raise ValueError("Expected HSP with query ID %r, " 272 "found %r instead." % (self.query_id, hsp.query_id)) 273 else: 274 self.query_id = hsp.query_id 275 276 if self.query_description is not None: 277 if hsp.query_description != self.query_description: 278 raise ValueError("Expected HSP with query description %r, " 279 "found %r instead." % (self.query_description, 280 hsp.query_description)) 281 else: 282 self.query_description = hsp.query_description
283 284 # properties # 285 description = optionalcascade('_description', 'hit_description', 286 """Hit description""") 287 query_description = optionalcascade('_query_description', 288 'query_description', 289 """Description of the query that produced the hit""") 290 id = optionalcascade('_id', 'hit_id', """Hit ID string.""") 291 query_id = optionalcascade('_query_id', 'query_id', 292 """ID string of the query that produced the hit""") 293 # returns all hsps 294 hsps = allitems(doc="""HSP objects contained in the Hit""") 295 296 @property
297 - def id_all(self):
298 """Alternative ID(s) of the Hit""" 299 return [self.id] + self._id_alt
300 301 @property
302 - def description_all(self):
303 """Alternative descriptions of the Hit""" 304 return [self.description] + self._description_alt
305 306 @property
307 - def fragments(self):
308 """HSPFragment objects contained in the Hit""" 309 return [frag for frag in chain(*self._items)]
310 311 # public methods #
312 - def append(self, hsp):
313 """Adds a HSP object to the end of Hit. 314 315 Parameters 316 hsp -- HSP object to append. 317 318 Any HSP object appended must have the same ``hit_id`` property as the 319 Hit object's ``id`` property and the same ``query_id`` property as the 320 Hit object's ``query_id`` property. 321 322 """ 323 self._validate_hsp(hsp) 324 self._items.append(hsp)
325
326 - def filter(self, func=None):
327 """Creates a new Hit object whose HSP objects pass the filter 328 function. 329 330 :param func: function for filtering 331 :type func: callable, accepts HSP, returns bool 332 333 ``filter`` is analogous to Python's built-in ``filter`` function, except 334 that instead of returning a list it returns a ``Hit`` object. Here is an 335 example of using ``filter`` to select for HSPs having bitscores bigger 336 than 60:: 337 338 >>> from Bio import SearchIO 339 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 340 >>> hit = qresult[3] 341 >>> evalue_filter = lambda hsp: hsp.bitscore > 60 342 >>> filtered_hit = hit.filter(evalue_filter) 343 >>> len(hit) 344 2 345 >>> len(filtered_hit) 346 1 347 >>> print(filtered_hit) 348 Query: 33211 349 mir_1 350 Hit: gi|301171322|ref|NR_035857.1| (86) 351 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 352 HSPs: ---- -------- --------- ------ --------------- --------------------- 353 # E-value Bit score Span Query range Hit range 354 ---- -------- --------- ------ --------------- --------------------- 355 0 8.9e-20 100.47 60 [1:61] [13:73] 356 357 """ 358 hsps = list(filter(func, self.hsps)) 359 if hsps: 360 obj = self.__class__(hsps) 361 self._transfer_attrs(obj) 362 return obj
363
364 - def index(self, hsp):
365 """Returns the index of a given HSP object, zero-based. 366 367 :param hsp: object to look up 368 :type hsp: HSP 369 370 """ 371 return self._items.index(hsp)
372
373 - def map(self, func=None):
374 """Creates a new Hit object, mapping the given function to its HSPs. 375 376 :param func: function for mapping 377 :type func: callable, accepts HSP, returns HSP 378 379 ``map`` is analogous to Python's built-in ``map`` function. It is applied to 380 all HSPs contained in the Hit object and returns a new Hit object. 381 382 """ 383 if func is not None: 384 hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy 385 else: 386 hsps = self.hsps[:] 387 if hsps: 388 obj = self.__class__(hsps) 389 self._transfer_attrs(obj) 390 return obj
391
392 - def pop(self, index=-1):
393 """Removes and returns the HSP object at the specified index. 394 395 :param index: index of HSP object to pop 396 :type index: int 397 398 """ 399 return self._items.pop(index)
400
401 - def sort(self, key=None, reverse=False, in_place=True):
402 """Sorts the HSP objects. 403 404 :param key: sorting function 405 :type key: callable, accepts HSP, returns key for sorting 406 :param reverse: whether to reverse sorting results or no 407 :type reverse: bool 408 :param in_place: whether to do in-place sorting or no 409 :type in_place: bool 410 411 ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort`` 412 method. If you set the ``in_place`` argument to False, it will treat 413 return a new, sorted Hit object and keep the initial one unsorted 414 415 """ 416 if in_place: 417 self._items.sort(key=key, reverse=reverse) 418 else: 419 hsps = self.hsps[:] 420 hsps.sort(key=key, reverse=reverse) 421 obj = self.__class__(hsps) 422 self._transfer_attrs(obj) 423 return obj
424 425 426 # if not used as a module, run the doctest 427 if __name__ == "__main__": 428 from Bio._utils import run_doctest 429 run_doctest() 430