Package Bio :: Package Statistics :: Module lowess
[hide private]
[frames] | no frames]

Source Code for Module Bio.Statistics.lowess

  1  # Copyright 2004-2008 by M de Hoon. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Implements the Lowess function for nonparametric regression. 
  7   
  8  Functions: 
  9  lowess        Fit a smooth nonparametric regression curve to a scatterplot. 
 10   
 11  For more information, see 
 12   
 13  William S. Cleveland: "Robust locally weighted regression and smoothing 
 14  scatterplots", Journal of the American Statistical Association, December 1979, 
 15  volume 74, number 368, pp. 829-836. 
 16   
 17  William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An 
 18  approach to regression analysis by local fitting", Journal of the American 
 19  Statistical Association, September 1988, volume 83, number 403, pp. 596-610. 
 20  """ 
 21   
 22  from __future__ import print_function 
 23   
 24  from Bio._py3k import range 
 25   
 26  import numpy 
 27   
 28   
 29  try: 
 30      from Bio.Cluster import median 
 31      # The function median in Bio.Cluster is faster than the function median 
 32      # in NumPy, as it does not require a full sort. 
 33  except ImportError as x: 
 34      # Use the median function in NumPy if Bio.Cluster is not available 
 35      from numpy import median 
 36   
 37   
38 -def lowess(x, y, f=2. / 3., iter=3):
39 """lowess(x, y, f=2./3., iter=3) -> yest 40 41 Lowess smoother: Robust locally weighted regression. 42 The lowess function fits a nonparametric regression curve to a scatterplot. 43 The arrays x and y contain an equal number of elements; each pair 44 (x[i], y[i]) defines a data point in the scatterplot. The function returns 45 the estimated (smooth) values of y. 46 47 The smoothing span is given by f. A larger value for f will result in a 48 smoother curve. The number of robustifying iterations is given by iter. The 49 function will run faster with a smaller number of iterations. 50 51 x and y should be numpy float arrays of equal length. The return value is 52 also a numpy float array of that length. 53 54 e.g. 55 >>> import numpy 56 >>> x = numpy.array([4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 57 ... 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, 58 ... 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, 59 ... 20, 22, 23, 24, 24, 24, 24, 25], numpy.float) 60 >>> y = numpy.array([2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 61 ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40, 62 ... 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56, 63 ... 64, 66, 54, 70, 92, 93, 120, 85], numpy.float) 64 >>> result = lowess(x, y) 65 >>> len(result) 66 50 67 >>> print("[%0.2f, ..., %0.2f]" % (result[0], result[-1])) 68 [4.85, ..., 84.98] 69 """ 70 n = len(x) 71 r = int(numpy.ceil(f * n)) 72 h = [numpy.sort(abs(x - x[i]))[r] for i in range(n)] 73 w = numpy.clip(abs(([x] - numpy.transpose([x])) / h), 0.0, 1.0) 74 w = 1 - w * w * w 75 w = w * w * w 76 yest = numpy.zeros(n) 77 delta = numpy.ones(n) 78 for iteration in range(iter): 79 for i in range(n): 80 weights = delta * w[:, i] 81 weights_mul_x = weights * x 82 b1 = numpy.dot(weights, y) 83 b2 = numpy.dot(weights_mul_x, y) 84 A11 = sum(weights) 85 A12 = sum(weights_mul_x) 86 A21 = A12 87 A22 = numpy.dot(weights_mul_x, x) 88 determinant = A11 * A22 - A12 * A21 89 beta1 = (A22 * b1 - A12 * b2) / determinant 90 beta2 = (A11 * b2 - A21 * b1) / determinant 91 yest[i] = beta1 + beta2 * x[i] 92 residuals = y - yest 93 s = median(abs(residuals)) 94 delta[:] = numpy.clip(residuals / (6 * s), -1, 1) 95 delta[:] = 1 - delta * delta 96 delta[:] = delta * delta 97 return yest
98 99
100 -def _test():
101 """Run the Bio.Statistics.lowess module's doctests.""" 102 print("Running doctests...") 103 import doctest 104 doctest.testmod() 105 print("Done")
106 107 if __name__ == "__main__": 108 _test() 109