Package Bio :: Package SearchIO :: Package _model :: Module hit
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO._model.hit

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO object to model a single database hit.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from itertools import chain 
 11   
 12  from Bio._py3k import filter 
 13   
 14  from Bio._utils import getattr_str, trim_str 
 15  from Bio.SearchIO._utils import allitems, optionalcascade 
 16   
 17  from ._base import _BaseSearchObject 
 18  from .hsp import HSP 
19 20 21 -class Hit(_BaseSearchObject):
22 """Class representing a single database hit of a search result. 23 24 Hit objects are the second-level container in the SearchIO module. They 25 are the objects contained within a QueryResult (see QueryResult). They 26 themselves are container for HSP objects and will contain at least one 27 HSP. 28 29 To have a quick look at a Hit and its contents, invoke ``print`` on it:: 30 31 >>> from Bio import SearchIO 32 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 33 >>> hit = qresult[3] 34 >>> print(hit) 35 Query: 33211 36 mir_1 37 Hit: gi|301171322|ref|NR_035857.1| (86) 38 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 39 HSPs: ---- -------- --------- ------ --------------- --------------------- 40 # E-value Bit score Span Query range Hit range 41 ---- -------- --------- ------ --------------- --------------------- 42 0 8.9e-20 100.47 60 [1:61] [13:73] 43 1 3.3e-06 55.39 60 [0:60] [13:73] 44 45 You can invoke ``len`` on a Hit object to see how many HSP objects it contains:: 46 47 >>> len(hit) 48 2 49 50 Hit objects behave very similar to Python lists. You can retrieve the HSP 51 object inside a Hit using the HSP's integer index. Hit objects can also be 52 sliced, which will return a new Hit objects containing only the sliced HSPs:: 53 54 # HSP items inside the Hit can be retrieved using its integer index 55 >>> hit[0] 56 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) 57 58 # slicing returns a new Hit 59 >>> hit 60 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) 61 >>> hit[:1] 62 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) 63 >>> print(hit[1:]) 64 Query: 33211 65 mir_1 66 Hit: gi|301171322|ref|NR_035857.1| (86) 67 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 68 HSPs: ---- -------- --------- ------ --------------- --------------------- 69 # E-value Bit score Span Query range Hit range 70 ---- -------- --------- ------ --------------- --------------------- 71 0 3.3e-06 55.39 60 [0:60] [13:73] 72 73 Hit objects provide ``filter`` and ``map`` methods, which are analogous to 74 Python's built-in ``filter`` and ``map`` except that they return a new Hit 75 object instead of a list. 76 77 Here is an example of using ``filter`` to select for HSPs whose e-value is 78 less than 1e-10:: 79 80 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 81 >>> filtered_hit = hit.filter(evalue_filter) 82 >>> len(hit) 83 2 84 >>> len(filtered_hit) 85 1 86 >>> print(filtered_hit) 87 Query: 33211 88 mir_1 89 Hit: gi|301171322|ref|NR_035857.1| (86) 90 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 91 HSPs: ---- -------- --------- ------ --------------- --------------------- 92 # E-value Bit score Span Query range Hit range 93 ---- -------- --------- ------ --------------- --------------------- 94 0 8.9e-20 100.47 60 [1:61] [13:73] 95 96 There are also other methods which are counterparts of Python lists' methods 97 with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their 98 respective documentations for more details and examples of their usage. 99 100 """ 101 102 # attributes we don't want to transfer when creating a new Hit class 103 # from this one 104 _NON_STICKY_ATTRS = ('_items', ) 105
106 - def __init__(self, hsps=(), id=None, query_id=None):
107 """Initializes a Hit object. 108 109 :param hsps: HSP objects contained in the Hit object 110 :type hsps: iterable yielding HSP 111 :param id: hit ID 112 :type id: string 113 :param query_id: query ID 114 :type query_id: string 115 116 If multiple HSP objects are used for initialization, they must all 117 have the same ``query_id``, ``query_description``, ``hit_id``, and 118 ``hit_description`` properties. 119 """ 120 # default attribute values 121 self._id = id 122 self._id_alt = [] 123 self._query_id = query_id 124 self._description = None 125 self._description_alt = [] 126 self._query_description = None 127 128 # TODO - Move this into the for look below in case 129 # hsps is a single use iterator? 130 for attr in ('query_id', 'query_description', 'hit_id', 131 'hit_description'): 132 # HACK: setting the if clause to '> 1' allows for empty hit objects. 133 # This makes it easier to work with file formats with unpredictable 134 # hit-hsp ordering. The empty hit object itself is nonfunctional, 135 # however, since all its cascading properties are empty. 136 if len(set(getattr(hsp, attr) for hsp in hsps)) > 1: 137 raise ValueError("Hit object can not contain HSPs with " 138 "more than one %s." % attr) 139 140 self._items = [] 141 for hsp in hsps: 142 # validate each HSP 143 self._validate_hsp(hsp) 144 # and store it them as an instance attribute 145 self.append(hsp)
146
147 - def __repr__(self):
148 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, 149 len(self))
150
151 - def __iter__(self):
152 return iter(self.hsps)
153
154 - def __len__(self):
155 return len(self.hsps)
156 157 # Python 3:
158 - def __bool__(self):
159 return bool(self.hsps)
160 161 # Python 2: 162 __nonzero__ = __bool__ 163
164 - def __contains__(self, hsp):
165 return hsp in self._items
166
167 - def __str__(self):
168 lines = [] 169 170 # set query id line 171 qid_line = 'Query: %s' % self.query_id 172 if self.query_description: 173 qid_line += trim_str('\n %s' % 174 self.query_description, 80, '...') 175 lines.append(qid_line) 176 177 # set hit id line 178 hid_line = ' Hit: %s' % self.id 179 if hasattr(self, 'seq_len'): 180 hid_line += ' (%i)' % self.seq_len 181 if self.description: 182 hid_line += trim_str('\n %s' % self.description, 183 80, '...') 184 lines.append(hid_line) 185 186 # set hsp line and table 187 if not self.hsps: 188 lines.append(' HSPs: ?') 189 else: 190 lines.append(' HSPs: %s %s %s %s %s %s' % 191 ('-' * 4, '-' * 8, '-' * 9, '-' * 6, '-' * 15, '-' * 21)) 192 pattern = '%11s %8s %9s %6s %15s %21s' 193 lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span', 194 'Query range', 'Hit range')) 195 lines.append(pattern % ('-' * 4, '-' * 8, '-' * 9, '-' * 6, '-' * 15, '-' * 21)) 196 for idx, hsp in enumerate(self.hsps): 197 # evalue 198 evalue = getattr_str(hsp, 'evalue', fmt='%.2g') 199 # bitscore 200 bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f') 201 # alignment length 202 aln_span = getattr_str(hsp, 'aln_span') 203 # query region 204 query_start = getattr_str(hsp, 'query_start') 205 query_end = getattr_str(hsp, 'query_end') 206 query_range = '[%s:%s]' % (query_start, query_end) 207 # max column length is 18 208 query_range = trim_str(query_range, 15, '~]') 209 # hit region 210 hit_start = getattr_str(hsp, 'hit_start') 211 hit_end = getattr_str(hsp, 'hit_end') 212 hit_range = '[%s:%s]' % (hit_start, hit_end) 213 hit_range = trim_str(hit_range, 21, '~]') 214 # append the hsp row 215 lines.append(pattern % (str(idx), evalue, bitscore, aln_span, 216 query_range, hit_range)) 217 218 return '\n'.join(lines)
219
220 - def __getitem__(self, idx):
221 # if key is slice, return a new Hit instance 222 if isinstance(idx, slice): 223 obj = self.__class__(self.hsps[idx]) 224 self._transfer_attrs(obj) 225 return obj 226 return self._items[idx]
227
228 - def __setitem__(self, idx, hsps):
229 # handle case if hsps is a list of hsp 230 if isinstance(hsps, (list, tuple)): 231 for hsp in hsps: 232 self._validate_hsp(hsp) 233 else: 234 self._validate_hsp(hsps) 235 236 self._items[idx] = hsps
237
238 - def __delitem__(self, idx):
239 del self._items[idx]
240 241 # hsp properties #
242 - def _validate_hsp(self, hsp):
243 """Validates an HSP object. 244 245 Valid HSP objects have the same hit_id as the Hit object ID and the 246 same query_id as the Hit object's query_id. 247 248 """ 249 if not isinstance(hsp, HSP): 250 raise TypeError("Hit objects can only contain HSP objects.") 251 # HACK: to make validation during __init__ work 252 if self._items: 253 if self.id is not None: 254 if hsp.hit_id != self.id: 255 raise ValueError("Expected HSP with hit ID %r, " 256 "found %r instead." % (self.id, hsp.hit_id)) 257 else: 258 self.id = hsp.hit_id 259 260 if self.description is not None: 261 if hsp.hit_description != self.description: 262 raise ValueError("Expected HSP with hit description %r, " 263 "found %r instead." % (self.description, 264 hsp.hit_description)) 265 else: 266 self.description = hsp.hit_description 267 268 if self.query_id is not None: 269 if hsp.query_id != self.query_id: 270 raise ValueError("Expected HSP with query ID %r, " 271 "found %r instead." % (self.query_id, hsp.query_id)) 272 else: 273 self.query_id = hsp.query_id 274 275 if self.query_description is not None: 276 if hsp.query_description != self.query_description: 277 raise ValueError("Expected HSP with query description %r, " 278 "found %r instead." % (self.query_description, 279 hsp.query_description)) 280 else: 281 self.query_description = hsp.query_description
282 283 # properties # 284 description = optionalcascade('_description', 'hit_description', 285 """Hit description""") 286 query_description = optionalcascade('_query_description', 287 'query_description', 288 """Description of the query that produced the hit""") 289 id = optionalcascade('_id', 'hit_id', """Hit ID string.""") 290 query_id = optionalcascade('_query_id', 'query_id', 291 """ID string of the query that produced the hit""") 292 # returns all hsps 293 hsps = allitems(doc="""HSP objects contained in the Hit""") 294 295 @property
296 - def id_all(self):
297 """Alternative ID(s) of the Hit""" 298 return [self.id] + self._id_alt
299 300 @property
301 - def description_all(self):
302 """Alternative descriptions of the Hit""" 303 return [self.description] + self._description_alt
304 305 @property
306 - def fragments(self):
307 """HSPFragment objects contained in the Hit""" 308 return [frag for frag in chain(*self._items)]
309 310 # public methods #
311 - def append(self, hsp):
312 """Adds a HSP object to the end of Hit. 313 314 Parameters 315 hsp -- HSP object to append. 316 317 Any HSP object appended must have the same ``hit_id`` property as the 318 Hit object's ``id`` property and the same ``query_id`` property as the 319 Hit object's ``query_id`` property. 320 321 """ 322 self._validate_hsp(hsp) 323 self._items.append(hsp)
324
325 - def filter(self, func=None):
326 """Creates a new Hit object whose HSP objects pass the filter 327 function. 328 329 :param func: function for filtering 330 :type func: callable, accepts HSP, returns bool 331 332 ``filter`` is analogous to Python's built-in ``filter`` function, except 333 that instead of returning a list it returns a ``Hit`` object. Here is an 334 example of using ``filter`` to select for HSPs having bitscores bigger 335 than 60:: 336 337 >>> from Bio import SearchIO 338 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 339 >>> hit = qresult[3] 340 >>> evalue_filter = lambda hsp: hsp.bitscore > 60 341 >>> filtered_hit = hit.filter(evalue_filter) 342 >>> len(hit) 343 2 344 >>> len(filtered_hit) 345 1 346 >>> print(filtered_hit) 347 Query: 33211 348 mir_1 349 Hit: gi|301171322|ref|NR_035857.1| (86) 350 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 351 HSPs: ---- -------- --------- ------ --------------- --------------------- 352 # E-value Bit score Span Query range Hit range 353 ---- -------- --------- ------ --------------- --------------------- 354 0 8.9e-20 100.47 60 [1:61] [13:73] 355 356 """ 357 hsps = list(filter(func, self.hsps)) 358 if hsps: 359 obj = self.__class__(hsps) 360 self._transfer_attrs(obj) 361 return obj
362
363 - def index(self, hsp):
364 """Returns the index of a given HSP object, zero-based. 365 366 :param hsp: object to look up 367 :type hsp: HSP 368 369 """ 370 return self._items.index(hsp)
371
372 - def map(self, func=None):
373 """Creates a new Hit object, mapping the given function to its HSPs. 374 375 :param func: function for mapping 376 :type func: callable, accepts HSP, returns HSP 377 378 ``map`` is analogous to Python's built-in ``map`` function. It is applied to 379 all HSPs contained in the Hit object and returns a new Hit object. 380 381 """ 382 if func is not None: 383 hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy 384 else: 385 hsps = self.hsps[:] 386 if hsps: 387 obj = self.__class__(hsps) 388 self._transfer_attrs(obj) 389 return obj
390
391 - def pop(self, index=-1):
392 """Removes and returns the HSP object at the specified index. 393 394 :param index: index of HSP object to pop 395 :type index: int 396 397 """ 398 return self._items.pop(index)
399
400 - def sort(self, key=None, reverse=False, in_place=True):
401 """Sorts the HSP objects. 402 403 :param key: sorting function 404 :type key: callable, accepts HSP, returns key for sorting 405 :param reverse: whether to reverse sorting results or no 406 :type reverse: bool 407 :param in_place: whether to do in-place sorting or no 408 :type in_place: bool 409 410 ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort`` 411 method. If you set the ``in_place`` argument to False, it will treat 412 return a new, sorted Hit object and keep the initial one unsorted 413 414 """ 415 if in_place: 416 self._items.sort(key=key, reverse=reverse) 417 else: 418 hsps = self.hsps[:] 419 hsps.sort(key=key, reverse=reverse) 420 obj = self.__class__(hsps) 421 self._transfer_attrs(obj) 422 return obj
423 424 425 # if not used as a module, run the doctest 426 if __name__ == "__main__": 427 from Bio._utils import run_doctest 428 run_doctest() 429