Package Bio :: Package SearchIO :: Package _model :: Module hit
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO._model.hit

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO object to model a single database hit.""" 
  7   
  8  from itertools import chain 
  9   
 10  from Bio._utils import getattr_str, trim_str 
 11  from Bio.SearchIO._utils import allitems, optionalcascade 
 12   
 13  from _base import _BaseSearchObject 
 14  from hsp import HSP 
15 16 17 -class Hit(_BaseSearchObject):
18 19 """Class representing a single database hit of a search result. 20 21 Hit objects are the second-level container in the SearchIO module. They 22 are the objects contained within a QueryResult (see QueryResult). They 23 themselves are container for HSP objects and will contain at least one 24 HSP. 25 26 To have a quick look at a Hit and its contents, invoke `print` on it: 27 28 >>> from Bio import SearchIO 29 >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next() 30 >>> hit = qresult[3] 31 >>> print hit 32 Query: 33211 33 mir_1 34 Hit: gi|301171322|ref|NR_035857.1| (86) 35 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 36 HSPs: ---- -------- --------- ------ --------------- --------------------- 37 # E-value Bit score Span Query range Hit range 38 ---- -------- --------- ------ --------------- --------------------- 39 0 8.9e-20 100.47 60 [1:61] [13:73] 40 1 3.3e-06 55.39 60 [0:60] [13:73] 41 42 You can invoke `len` on a Hit object to see how many HSP objects it contains: 43 44 >>> len(hit) 45 2 46 47 Hit objects behave very similar to Python lists. You can retrieve the HSP 48 object inside a Hit using the HSP's integer index. Hit objects can also be 49 sliced, which will return a new Hit objects containing only the sliced HSPs: 50 51 # HSP items inside the Hit can be retrieved using its integer index 52 >>> hit[0] 53 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments) 54 55 # slicing returns a new Hit 56 >>> hit 57 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps) 58 >>> hit[:1] 59 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps) 60 >>> print hit[1:] 61 Query: 33211 62 mir_1 63 Hit: gi|301171322|ref|NR_035857.1| (86) 64 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 65 HSPs: ---- -------- --------- ------ --------------- --------------------- 66 # E-value Bit score Span Query range Hit range 67 ---- -------- --------- ------ --------------- --------------------- 68 0 3.3e-06 55.39 60 [0:60] [13:73] 69 70 Hit objects provide `filter` and `map` methods, which are analogous to 71 Python's built-in `filter` and `map` except that they return a new Hit 72 object instead of a list. 73 74 Here is an example of using `filter` to select for HSPs whose e-value is 75 less than 1e-10: 76 77 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10 78 >>> filtered_hit = hit.filter(evalue_filter) 79 >>> len(hit) 80 2 81 >>> len(filtered_hit) 82 1 83 >>> print filtered_hit 84 Query: 33211 85 mir_1 86 Hit: gi|301171322|ref|NR_035857.1| (86) 87 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 88 HSPs: ---- -------- --------- ------ --------------- --------------------- 89 # E-value Bit score Span Query range Hit range 90 ---- -------- --------- ------ --------------- --------------------- 91 0 8.9e-20 100.47 60 [1:61] [13:73] 92 93 There are also other methods which are counterparts of Python lists' methods 94 with the same names: `append`, `index`, `pop`, and `sort`. Consult their 95 respective documentations for more details and examples of their usage. 96 97 """ 98 99 # attributes we don't want to transfer when creating a new Hit class 100 # from this one 101 _NON_STICKY_ATTRS = ('_items', ) 102
103 - def __init__(self, hsps=[]):
104 """Initializes a Hit object. 105 106 Arguments: 107 hsps -- List containing HSP objects. 108 109 Hit objects must be initialized with a list containing at least one HSP 110 object. If multiple HSP objects are used for initialization, they must 111 all have the same `query_id`, `query_description`, `hit_id`, and 112 `hit_description` properties. 113 114 """ 115 for attr in ('query_id', 'query_description', 'hit_id', 116 'hit_description'): 117 # HACK: setting the if clause to '> 1' allows for empty hit objects. 118 # This makes it easier to work with file formats with unpredictable 119 # hit-hsp ordering. The empty hit object itself is nonfunctional, 120 # however, since all its cascading properties are empty. 121 if len(set([getattr(hsp, attr) for hsp in hsps])) > 1: 122 raise ValueError("Hit object can not contain HSPs with " 123 "more than one %s." % attr) 124 125 self._items = [] 126 for hsp in hsps: 127 # validate each HSP 128 self._validate_hsp(hsp) 129 # and store it them as an instance attribute 130 self.append(hsp)
131
132 - def __repr__(self):
133 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, 134 len(self))
135
136 - def __iter__(self):
137 return iter(self.hsps)
138
139 - def __len__(self):
140 return len(self.hsps)
141
142 - def __nonzero__(self):
143 return bool(self.hsps)
144
145 - def __contains__(self, hsp):
146 return hsp in self._items
147
148 - def __str__(self):
149 lines = [] 150 151 # set query id line 152 qid_line = 'Query: %s' % self.query_id 153 if self.query_description: 154 qid_line += trim_str('\n %s' % 155 self.query_description, 80, '...') 156 lines.append(qid_line) 157 158 # set hit id line 159 hid_line = ' Hit: %s' % self.id 160 if hasattr(self, 'seq_len'): 161 hid_line += ' (%i)' % self.seq_len 162 if self.description: 163 hid_line += trim_str('\n %s' % self.description, 164 80, '...') 165 lines.append(hid_line) 166 167 # set hsp line and table 168 if not self.hsps: 169 lines.append(' HSPs: ?') 170 else: 171 lines.append(' HSPs: %s %s %s %s %s %s' % 172 ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21)) 173 pattern = '%11s %8s %9s %6s %15s %21s' 174 lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span', 175 'Query range', 'Hit range')) 176 lines.append(pattern % ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21)) 177 for idx, hsp in enumerate(self.hsps): 178 # evalue 179 evalue = getattr_str(hsp, 'evalue', fmt='%.2g') 180 # bitscore 181 bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f') 182 # alignment length 183 aln_span = getattr_str(hsp, 'aln_span') 184 # query region 185 query_start = getattr_str(hsp, 'query_start') 186 query_end = getattr_str(hsp, 'query_end') 187 query_range = '[%s:%s]' % (query_start, query_end) 188 # max column length is 18 189 query_range = trim_str(query_range, 15, '~]') 190 # hit region 191 hit_start = getattr_str(hsp, 'hit_start') 192 hit_end = getattr_str(hsp, 'hit_end') 193 hit_range = '[%s:%s]' % (hit_start, hit_end) 194 hit_range = trim_str(hit_range, 21, '~]') 195 # append the hsp row 196 lines.append(pattern % (str(idx), evalue, bitscore, aln_span, 197 query_range, hit_range)) 198 199 return '\n'.join(lines)
200
201 - def __getitem__(self, idx):
202 # if key is slice, return a new Hit instance 203 if isinstance(idx, slice): 204 obj = self.__class__(self.hsps[idx]) 205 self._transfer_attrs(obj) 206 return obj 207 return self._items[idx]
208
209 - def __setitem__(self, idx, hsps):
210 # handle case if hsps is a list of hsp 211 if isinstance(hsps, (list, tuple)): 212 for hsp in hsps: 213 self._validate_hsp(hsp) 214 else: 215 self._validate_hsp(hsps) 216 217 self._items[idx] = hsps
218
219 - def __delitem__(self, idx):
220 del self._items[idx]
221 222 ## hsp properties ##
223 - def _validate_hsp(self, hsp):
224 """Validates an HSP object. 225 226 Valid HSP objects have the same hit_id as the Hit object ID and the 227 same query_id as the Hit object's query_id. 228 229 """ 230 if not isinstance(hsp, HSP): 231 raise TypeError("Hit objects can only contain HSP objects.") 232 # HACK: to make validation during __init__ work 233 if self._items: 234 if hsp.hit_id != self.id: 235 raise ValueError("Expected HSP with hit ID %r, " 236 "found %r instead." % (self.id, hsp.hit_id)) 237 if hsp.query_id != self.query_id: 238 raise ValueError("Expected HSP with query ID %r, " 239 "found %r instead." % (self.query_id, hsp.query_id))
240 241 ## properties ## 242 description = optionalcascade('hit_description', """Hit description""") 243 query_description = optionalcascade('query_description', 244 """Description of the query that produced the hit""") 245 id = optionalcascade('hit_id', """Hit ID string.""") 246 query_id = optionalcascade('query_id', 247 """ID string of the query that produced the hit""") 248 # returns all hsps 249 hsps = allitems(doc="""HSP objects contained in the Hit""") 250 251 @property
252 - def fragments(self):
253 """HSPFragment objects contained in the Hit""" 254 return [frag for frag in chain(*self._items)]
255 256 ## public methods ##
257 - def append(self, hsp):
258 """Adds a HSP object to the end of Hit. 259 260 Parameters 261 hsp -- HSP object to append. 262 263 Any HSP object appended must have the same `hit_id` property as the 264 Hit object's `id` property and the same `query_id` property as the 265 Hit object's `query_id` property. 266 267 """ 268 self._validate_hsp(hsp) 269 self._items.append(hsp)
270
271 - def filter(self, func=None):
272 """Creates a new Hit object whose HSP objects pass the filter 273 function. 274 275 Arguments: 276 func -- Callback function that accepts a HSP object as its parameter, 277 does a boolean check, and returns True or False. 278 279 `filter` is analogous to Python's built-in `filter` function, except 280 that instead of returning a list it returns a `Hit` object. Here is an 281 example of using `filter` to select for HSPs having bitscores bigger 282 than 60: 283 284 >>> from Bio import SearchIO 285 >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next() 286 >>> hit = qresult[3] 287 >>> evalue_filter = lambda hsp: hsp.bitscore > 60 288 >>> filtered_hit = hit.filter(evalue_filter) 289 >>> len(hit) 290 2 291 >>> len(filtered_hit) 292 1 293 >>> print filtered_hit 294 Query: 33211 295 mir_1 296 Hit: gi|301171322|ref|NR_035857.1| (86) 297 Pan troglodytes microRNA mir-520c (MIR520C), microRNA 298 HSPs: ---- -------- --------- ------ --------------- --------------------- 299 # E-value Bit score Span Query range Hit range 300 ---- -------- --------- ------ --------------- --------------------- 301 0 8.9e-20 100.47 60 [1:61] [13:73] 302 303 """ 304 hsps = filter(func, self.hsps) 305 if hsps: 306 obj = self.__class__(hsps) 307 self._transfer_attrs(obj) 308 return obj
309
310 - def index(self, hsp):
311 """Returns the index of a given HSP object, zero-based. 312 313 Arguments: 314 hsp -- HSP object to be looked up. 315 316 """ 317 return self._items.index(hsp)
318
319 - def map(self, func=None):
320 """Creates a new Hit object, mapping the given function to its HSPs. 321 322 Arguments: 323 func -- Callback function that accepts a HSP object as its parameter and 324 also returns a HSP object. 325 326 `map` is analogous to Python's built-in `map` function. It is applied to 327 all HSPs contained in the Hit object and returns a new Hit object. 328 329 """ 330 if func is not None: 331 hsps = map(func, self.hsps[:]) # this creates a shallow copy 332 else: 333 hsps = self.hsps[:] 334 if hsps: 335 obj = self.__class__(hsps) 336 self._transfer_attrs(obj) 337 return obj
338
339 - def pop(self, index=-1):
340 """Removes and returns the HSP object at the specified index. 341 342 Arguments: 343 index -- Integer denoting the index of the HSP object to remove. 344 345 """ 346 return self._items.pop(index)
347
348 - def sort(self, key=None, reverse=False, in_place=True):
349 """Sorts the HSP objects. 350 351 Arguments: 352 key -- Function used to sort the HSP objects. 353 reverse -- Boolean, whether to reverse the sorting or not. 354 in_place -- Boolean, whether to perform sorting in place (in the same 355 object) or not (creating a new object). 356 357 `sort` defaults to sorting in-place, to mimick Python's `list.sort` 358 method. If you set the `in_place` argument to False, it will treat 359 return a new, sorted Hit object and keep the initial one unsorted 360 361 """ 362 if in_place: 363 self._items.sort(key=key, reverse=reverse) 364 else: 365 hsps = self.hsps[:] 366 hsps.sort(key=key, reverse=reverse) 367 obj = self.__class__(hsps) 368 self._transfer_attrs(obj) 369 return obj
370 371 372 # if not used as a module, run the doctest 373 if __name__ == "__main__": 374 from Bio._utils import run_doctest 375 run_doctest() 376