Package Bio :: Package motifs :: Module thresholds
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.thresholds

  1  # Copyright 2008 by Norbert Dojer.  All rights reserved. 
  2  # Adapted by Bartek Wilczynski. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Approximate calculation of appropriate thresholds for motif finding 
  7  """ 
  8   
  9   
10 -class ScoreDistribution(object):
11 """ Class representing approximate score distribution for a given motif. 12 13 Utilizes a dynamic programming approch to calculate the distribution of 14 scores with a predefined precision. Provides a number of methods for calculating 15 thresholds for motif occurences. 16 """
17 - def __init__(self, motif=None, precision=10**3, pssm=None, background=None):
18 if pssm is None: 19 self.min_score = min(0.0, motif.min_score()) 20 self.interval = max(0.0, motif.max_score())-self.min_score 21 self.n_points = precision * motif.length 22 self.ic=motif.ic() 23 else: 24 self.min_score = min(0.0, pssm.min) 25 self.interval = max(0.0, pssm.max)-self.min_score 26 self.n_points = precision * pssm.length 27 self.ic = pssm.mean(background) 28 self.step = self.interval/(self.n_points-1) 29 self.mo_density = [0.0]*self.n_points 30 self.mo_density[-self._index_diff(self.min_score)] = 1.0 31 self.bg_density = [0.0]*self.n_points 32 self.bg_density[-self._index_diff(self.min_score)] = 1.0 33 if pssm is None: 34 for lo, mo in zip(motif.log_odds(), motif.pwm()): 35 self.modify(lo, mo, motif.background) 36 else: 37 for position in range(pssm.length): 38 mo_new=[0.0]*self.n_points 39 bg_new=[0.0]*self.n_points 40 lo = pssm[:, position] 41 for letter, score in lo.items(): 42 bg = background[letter] 43 mo = pow(2, pssm[letter, position]) * bg 44 d=self._index_diff(score) 45 for i in range(self.n_points): 46 mo_new[self._add(i, d)]+=self.mo_density[i]*mo 47 bg_new[self._add(i, d)]+=self.bg_density[i]*bg 48 self.mo_density=mo_new 49 self.bg_density=bg_new
50
51 - def _index_diff(self, x, y=0.0):
52 return int((x-y+0.5*self.step)//self.step)
53
54 - def _add(self, i, j):
55 return max(0, min(self.n_points-1, i+j))
56
57 - def modify(self, scores, mo_probs, bg_probs):
58 mo_new=[0.0]*self.n_points 59 bg_new=[0.0]*self.n_points 60 for k, v in scores.items(): 61 d=self._index_diff(v) 62 for i in range(self.n_points): 63 mo_new[self._add(i, d)]+=self.mo_density[i]*mo_probs[k] 64 bg_new[self._add(i, d)]+=self.bg_density[i]*bg_probs[k] 65 self.mo_density=mo_new 66 self.bg_density=bg_new
67
68 - def threshold_fpr(self, fpr):
69 """ 70 Approximate the log-odds threshold which makes the type I error (false positive rate). 71 """ 72 i=self.n_points 73 prob=0.0 74 while prob<fpr: 75 i-=1 76 prob+=self.bg_density[i] 77 return self.min_score+i*self.step
78
79 - def threshold_fnr(self, fnr):
80 """ 81 Approximate the log-odds threshold which makes the type II error (false negative rate). 82 """ 83 i=-1 84 prob=0.0 85 while prob<fnr: 86 i+=1 87 prob+=self.mo_density[i] 88 return self.min_score+i*self.step
89
90 - def threshold_balanced(self, rate_proportion=1.0, return_rate=False):
91 """ 92 Approximate the log-odds threshold which makes FNR equal to FPR times rate_proportion 93 """ 94 i=self.n_points 95 fpr=0.0 96 fnr=1.0 97 while fpr*rate_proportion<fnr: 98 i-=1 99 fpr+=self.bg_density[i] 100 fnr-=self.mo_density[i] 101 if return_rate: 102 return self.min_score+i*self.step, fpr 103 else: 104 return self.min_score+i*self.step
105
106 - def threshold_patser(self):
107 """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software. 108 109 It selects such a threshold that the log(fpr)=-ic(M) 110 note: the actual patser software uses natural logarithms instead of log_2, so the numbers 111 are not directly comparable. 112 """ 113 return self.threshold_fpr(fpr=2**-self.ic)
114