Package Bio :: Package SearchIO :: Package _model :: Module hit
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO._model.hit

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO object to model a single database hit.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from itertools import chain 
 11   
 12  from Bio._py3k import filter 
 13   
 14  from Bio._utils import getattr_str, trim_str 
 15  from Bio.SearchIO._utils import allitems, optionalcascade 
 16   
 17  from ._base import _BaseSearchObject 
 18  from .hsp import HSP 
19 20 21 -class Hit(_BaseSearchObject):
22 23 """Class representing a single database hit of a search result. 24 25 Hit objects are the second-level container in the SearchIO module. They 26 are the objects contained within a QueryResult (see QueryResult). They 27 themselves are container for HSP objects and will contain at least one 28 HSP. 29 30 To have a quick look at a Hit and its contents, invoke `print` on it: 31 32 >>> from Bio import SearchIO 33 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 34 >>> hit = qresult[3] 35 >>> print(hit) 36 Query: 33211 37 mir_1 38 Hit: gi|301171322|ref|NR_035857.1| (86) 39 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 40 HSPs: ---- -------- --------- ------ --------------- --------------------- 41 # E-value Bit score Span Query range Hit range 42 ---- -------- --------- ------ --------------- --------------------- 43 0 8.9e-20 100.47 60 [1:61] [13:73] 44 1 3.3e-06 55.39 60 [0:60] [13:73] 45 46 You can invoke `len` on a Hit object to see how many HSP objects it contains: 47 48 >>> len(hit) 49 2 50 51 Hit objects behave very similar to Python lists. You can retrieve the HSP 52 object inside a Hit using the HSP's integer index. Hit objects can also be 53 sliced, which will return a new Hit objects containing only the sliced HSPs: 54 55 # HSP items inside the Hit can be retrieved using its integer index 56 >>> hit[0] 57 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) 58 59 # slicing returns a new Hit 60 >>> hit 61 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) 62 >>> hit[:1] 63 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) 64 >>> print(hit[1:]) 65 Query: 33211 66 mir_1 67 Hit: gi|301171322|ref|NR_035857.1| (86) 68 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 69 HSPs: ---- -------- --------- ------ --------------- --------------------- 70 # E-value Bit score Span Query range Hit range 71 ---- -------- --------- ------ --------------- --------------------- 72 0 3.3e-06 55.39 60 [0:60] [13:73] 73 74 Hit objects provide `filter` and `map` methods, which are analogous to 75 Python's built-in `filter` and `map` except that they return a new Hit 76 object instead of a list. 77 78 Here is an example of using `filter` to select for HSPs whose e-value is 79 less than 1e-10: 80 81 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 82 >>> filtered_hit = hit.filter(evalue_filter) 83 >>> len(hit) 84 2 85 >>> len(filtered_hit) 86 1 87 >>> print(filtered_hit) 88 Query: 33211 89 mir_1 90 Hit: gi|301171322|ref|NR_035857.1| (86) 91 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 92 HSPs: ---- -------- --------- ------ --------------- --------------------- 93 # E-value Bit score Span Query range Hit range 94 ---- -------- --------- ------ --------------- --------------------- 95 0 8.9e-20 100.47 60 [1:61] [13:73] 96 97 There are also other methods which are counterparts of Python lists' methods 98 with the same names: `append`, `index`, `pop`, and `sort`. Consult their 99 respective documentations for more details and examples of their usage. 100 101 """ 102 103 # attributes we don't want to transfer when creating a new Hit class 104 # from this one 105 _NON_STICKY_ATTRS = ('_items', ) 106
107 - def __init__(self, hsps=[], id=None, query_id=None):
108 """Initializes a Hit object. 109 110 Arguments: 111 hsps -- List containing HSP objects. 112 id -- String of the Hit ID 113 query_id -- String of the Hit's query ID 114 115 If multiple HSP objects are used for initialization, they must all 116 have the same `query_id`, `query_description`, `hit_id`, and 117 `hit_description` properties. 118 """ 119 # default attribute values 120 self._id = id 121 self._query_id = query_id 122 self._description = None 123 self._query_description = None 124 125 for attr in ('query_id', 'query_description', 'hit_id', 126 'hit_description'): 127 # HACK: setting the if clause to '> 1' allows for empty hit objects. 128 # This makes it easier to work with file formats with unpredictable 129 # hit-hsp ordering. The empty hit object itself is nonfunctional, 130 # however, since all its cascading properties are empty. 131 if len(set(getattr(hsp, attr) for hsp in hsps)) > 1: 132 raise ValueError("Hit object can not contain HSPs with " 133 "more than one %s." % attr) 134 135 self._items = [] 136 for hsp in hsps: 137 # validate each HSP 138 self._validate_hsp(hsp) 139 # and store it them as an instance attribute 140 self.append(hsp)
141
142 - def __repr__(self):
143 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, 144 len(self))
145
146 - def __iter__(self):
147 return iter(self.hsps)
148
149 - def __len__(self):
150 return len(self.hsps)
151 152 #Python 3:
153 - def __bool__(self):
154 return bool(self.hsps)
155 156 #Python 2: 157 __nonzero__= __bool__ 158
159 - def __contains__(self, hsp):
160 return hsp in self._items
161
162 - def __str__(self):
163 lines = [] 164 165 # set query id line 166 qid_line = 'Query: %s' % self.query_id 167 if self.query_description: 168 qid_line += trim_str('\n %s' % 169 self.query_description, 80, '...') 170 lines.append(qid_line) 171 172 # set hit id line 173 hid_line = ' Hit: %s' % self.id 174 if hasattr(self, 'seq_len'): 175 hid_line += ' (%i)' % self.seq_len 176 if self.description: 177 hid_line += trim_str('\n %s' % self.description, 178 80, '...') 179 lines.append(hid_line) 180 181 # set hsp line and table 182 if not self.hsps: 183 lines.append(' HSPs: ?') 184 else: 185 lines.append(' HSPs: %s %s %s %s %s %s' % 186 ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21)) 187 pattern = '%11s %8s %9s %6s %15s %21s' 188 lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span', 189 'Query range', 'Hit range')) 190 lines.append(pattern % ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21)) 191 for idx, hsp in enumerate(self.hsps): 192 # evalue 193 evalue = getattr_str(hsp, 'evalue', fmt='%.2g') 194 # bitscore 195 bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f') 196 # alignment length 197 aln_span = getattr_str(hsp, 'aln_span') 198 # query region 199 query_start = getattr_str(hsp, 'query_start') 200 query_end = getattr_str(hsp, 'query_end') 201 query_range = '[%s:%s]' % (query_start, query_end) 202 # max column length is 18 203 query_range = trim_str(query_range, 15, '~]') 204 # hit region 205 hit_start = getattr_str(hsp, 'hit_start') 206 hit_end = getattr_str(hsp, 'hit_end') 207 hit_range = '[%s:%s]' % (hit_start, hit_end) 208 hit_range = trim_str(hit_range, 21, '~]') 209 # append the hsp row 210 lines.append(pattern % (str(idx), evalue, bitscore, aln_span, 211 query_range, hit_range)) 212 213 return '\n'.join(lines)
214
215 - def __getitem__(self, idx):
216 # if key is slice, return a new Hit instance 217 if isinstance(idx, slice): 218 obj = self.__class__(self.hsps[idx]) 219 self._transfer_attrs(obj) 220 return obj 221 return self._items[idx]
222
223 - def __setitem__(self, idx, hsps):
224 # handle case if hsps is a list of hsp 225 if isinstance(hsps, (list, tuple)): 226 for hsp in hsps: 227 self._validate_hsp(hsp) 228 else: 229 self._validate_hsp(hsps) 230 231 self._items[idx] = hsps
232
233 - def __delitem__(self, idx):
234 del self._items[idx]
235 236 ## hsp properties ##
237 - def _validate_hsp(self, hsp):
238 """Validates an HSP object. 239 240 Valid HSP objects have the same hit_id as the Hit object ID and the 241 same query_id as the Hit object's query_id. 242 243 """ 244 if not isinstance(hsp, HSP): 245 raise TypeError("Hit objects can only contain HSP objects.") 246 # HACK: to make validation during __init__ work 247 if self._items: 248 if self.id is not None: 249 if hsp.hit_id != self.id: 250 raise ValueError("Expected HSP with hit ID %r, " 251 "found %r instead." % (self.id, hsp.hit_id)) 252 else: 253 self.id = hsp.hit_id 254 255 if self.description is not None: 256 if hsp.hit_description != self.description: 257 raise ValueError("Expected HSP with hit description %r, " 258 "found %r instead." % (self.description, 259 hsp.hit_description)) 260 else: 261 self.description = hsp.hit_description 262 263 if self.query_id is not None: 264 if hsp.query_id != self.query_id: 265 raise ValueError("Expected HSP with query ID %r, " 266 "found %r instead." % (self.query_id, hsp.query_id)) 267 else: 268 self.query_id = hsp.query_id 269 270 if self.query_description is not None: 271 if hsp.query_description != self.query_description: 272 raise ValueError("Expected HSP with query description %r, " 273 "found %r instead." % (self.query_description, 274 hsp.query_description)) 275 else: 276 self.query_description = hsp.query_description
277 278 ## properties ## 279 description = optionalcascade('_description', 'hit_description', 280 """Hit description""") 281 query_description = optionalcascade('_query_description', 282 'query_description', 283 """Description of the query that produced the hit""") 284 id = optionalcascade('_id', 'hit_id', """Hit ID string.""") 285 query_id = optionalcascade('_query_id', 'query_id', 286 """ID string of the query that produced the hit""") 287 # returns all hsps 288 hsps = allitems(doc="""HSP objects contained in the Hit""") 289 290 @property
291 - def fragments(self):
292 """HSPFragment objects contained in the Hit""" 293 return [frag for frag in chain(*self._items)]
294 295 ## public methods ##
296 - def append(self, hsp):
297 """Adds a HSP object to the end of Hit. 298 299 Parameters 300 hsp -- HSP object to append. 301 302 Any HSP object appended must have the same `hit_id` property as the 303 Hit object's `id` property and the same `query_id` property as the 304 Hit object's `query_id` property. 305 306 """ 307 self._validate_hsp(hsp) 308 self._items.append(hsp)
309
310 - def filter(self, func=None):
311 """Creates a new Hit object whose HSP objects pass the filter 312 function. 313 314 Arguments: 315 func -- Callback function that accepts a HSP object as its parameter, 316 does a boolean check, and returns True or False. 317 318 `filter` is analogous to Python's built-in `filter` function, except 319 that instead of returning a list it returns a `Hit` object. Here is an 320 example of using `filter` to select for HSPs having bitscores bigger 321 than 60: 322 323 >>> from Bio import SearchIO 324 >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml')) 325 >>> hit = qresult[3] 326 >>> evalue_filter = lambda hsp: hsp.bitscore > 60 327 >>> filtered_hit = hit.filter(evalue_filter) 328 >>> len(hit) 329 2 330 >>> len(filtered_hit) 331 1 332 >>> print(filtered_hit) 333 Query: 33211 334 mir_1 335 Hit: gi|301171322|ref|NR_035857.1| (86) 336 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 337 HSPs: ---- -------- --------- ------ --------------- --------------------- 338 # E-value Bit score Span Query range Hit range 339 ---- -------- --------- ------ --------------- --------------------- 340 0 8.9e-20 100.47 60 [1:61] [13:73] 341 342 """ 343 hsps = list(filter(func, self.hsps)) 344 if hsps: 345 obj = self.__class__(hsps) 346 self._transfer_attrs(obj) 347 return obj
348
349 - def index(self, hsp):
350 """Returns the index of a given HSP object, zero-based. 351 352 Arguments: 353 hsp -- HSP object to be looked up. 354 355 """ 356 return self._items.index(hsp)
357
358 - def map(self, func=None):
359 """Creates a new Hit object, mapping the given function to its HSPs. 360 361 Arguments: 362 func -- Callback function that accepts a HSP object as its parameter and 363 also returns a HSP object. 364 365 `map` is analogous to Python's built-in `map` function. It is applied to 366 all HSPs contained in the Hit object and returns a new Hit object. 367 368 """ 369 if func is not None: 370 hsps = [func(x) for x in self.hsps[:]] # this creates a shallow copy 371 else: 372 hsps = self.hsps[:] 373 if hsps: 374 obj = self.__class__(hsps) 375 self._transfer_attrs(obj) 376 return obj
377
378 - def pop(self, index=-1):
379 """Removes and returns the HSP object at the specified index. 380 381 Arguments: 382 index -- Integer denoting the index of the HSP object to remove. 383 384 """ 385 return self._items.pop(index)
386
387 - def sort(self, key=None, reverse=False, in_place=True):
388 """Sorts the HSP objects. 389 390 Arguments: 391 key -- Function used to sort the HSP objects. 392 reverse -- Boolean, whether to reverse the sorting or not. 393 in_place -- Boolean, whether to perform sorting in place (in the same 394 object) or not (creating a new object). 395 396 `sort` defaults to sorting in-place, to mimick Python's `list.sort` 397 method. If you set the `in_place` argument to False, it will treat 398 return a new, sorted Hit object and keep the initial one unsorted 399 400 """ 401 if in_place: 402 self._items.sort(key=key, reverse=reverse) 403 else: 404 hsps = self.hsps[:] 405 hsps.sort(key=key, reverse=reverse) 406 obj = self.__class__(hsps) 407 self._transfer_attrs(obj) 408 return obj
409 410 411 # if not used as a module, run the doctest 412 if __name__ == "__main__": 413 from Bio._utils import run_doctest 414 run_doctest() 415