[frames] | no frames]

# Source Code for Module Bio.Motif.Thresholds

``` 1  # Copyright 2008 by Norbert Dojer.  All rights reserved.
2  # Adapted by Bartek Wilczynski.
3  # This code is part of the Biopython distribution and governed by its
5  # as part of this package.
6  """Approximate calculation of appropriate thresholds for motif finding
7  """
8  import math
9  import random
10
11
12 -class ScoreDistribution(object):
13      """ Class representing approximate score distribution for a given motif.
14
15      Utilizes a dynamic programming approch to calculate the distribution of
16      scores with a predefined precision. Provides a number of methods for calculating
17      thresholds for motif occurences.
18      """
19 -    def __init__(self, motif, precision=10 ** 3):
20          self.min_score = min(0.0, motif.min_score())
21          self.interval = max(0.0, motif.max_score()) - self.min_score
22          self.n_points = precision * motif.length
23          self.step = self.interval / (self.n_points - 1)
24          self.mo_density = [0.0] * self.n_points
25          self.mo_density[-self._index_diff(self.min_score)] = 1.0
26          self.bg_density = [0.0] * self.n_points
27          self.bg_density[-self._index_diff(self.min_score)] = 1.0
28          self.ic = motif.ic()
29          for lo, mo in zip(motif.log_odds(), motif.pwm()):
30              self.modify(lo, mo, motif.background)
31
32 -    def _index_diff(self, x, y=0.0):
33          return int((x - y + 0.5 * self.step) // self.step)
34
35 -    def _add(self, i, j):
36          return max(0, min(self.n_points - 1, i + j))
37
38 -    def modify(self, scores, mo_probs, bg_probs):
39          mo_new = [0.0] * self.n_points
40          bg_new = [0.0] * self.n_points
41          for k, v in scores.items():
42              d = self._index_diff(v)
43              for i in range(self.n_points):
44                  mo_new[self._add(i, d)] += self.mo_density[i] * mo_probs[k]
45                  bg_new[self._add(i, d)] += self.bg_density[i] * bg_probs[k]
46          self.mo_density = mo_new
47          self.bg_density = bg_new
48
49 -    def threshold_fpr(self, fpr):
50          """
51          Approximate the log-odds threshold which makes the type I error (false positive rate).
52          """
53          i = self.n_points
54          prob = 0.0
55          while prob < fpr:
56              i -= 1
57              prob += self.bg_density[i]
58          return self.min_score + i * self.step
59
60 -    def threshold_fnr(self, fnr):
61          """
62          Approximate the log-odds threshold which makes the type II error (false negative rate).
63          """
64          i = -1
65          prob = 0.0
66          while prob < fnr:
67              i += 1
68              prob += self.mo_density[i]
69          return self.min_score + i * self.step
70
71 -    def threshold_balanced(self, rate_proportion=1.0, return_rate=False):
72          """
73          Approximate the log-odds threshold which makes FNR equal to FPR times rate_proportion
74          """
75          i = self.n_points
76          fpr = 0.0
77          fnr = 1.0
78          while fpr * rate_proportion < fnr:
79              i -= 1
80              fpr += self.bg_density[i]
81              fnr -= self.mo_density[i]
82          if return_rate:
83              return self.min_score + i * self.step, fpr
84          else:
85              return self.min_score + i * self.step
86
87 -    def threshold_patser(self):
88          """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software.
89
90          It selects such a threshold that the log(fpr)=-ic(M)
91          note: the actual patser software uses natural logarithms instead of log_2, so the numbers
92          are not directly comparable.
93          """
94          return self.threshold_fpr(fpr=2 ** -self.ic)
95
<!--
expandto(location.href);
// -->

```

 Generated by Epydoc 3.0.1 on Wed Oct 21 18:04:45 2015 http://epydoc.sourceforge.net