1
2
3
4
5
6 """
7 This module implements the Lowess function for nonparametric regression.
8
9 Functions:
10 lowess Fit a smooth nonparametric regression curve to a scatterplot.
11
12 For more information, see
13
14 William S. Cleveland: "Robust locally weighted regression and smoothing
15 scatterplots", Journal of the American Statistical Association, December 1979,
16 volume 74, number 368, pp. 829-836.
17
18 William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
19 approach to regression analysis by local fitting", Journal of the American
20 Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
21 """
22
23 import numpy
24
25 try:
26 from Bio.Cluster import median
27
28
29 except ImportError, x:
30
31 from numpy import median
32
33
34 -def lowess(x, y, f=2. / 3., iter=3):
35 """lowess(x, y, f=2./3., iter=3) -> yest
36
37 Lowess smoother: Robust locally weighted regression.
38 The lowess function fits a nonparametric regression curve to a scatterplot.
39 The arrays x and y contain an equal number of elements; each pair
40 (x[i], y[i]) defines a data point in the scatterplot. The function returns
41 the estimated (smooth) values of y.
42
43 The smoothing span is given by f. A larger value for f will result in a
44 smoother curve. The number of robustifying iterations is given by iter. The
45 function will run faster with a smaller number of iterations.
46
47 x and y should be numpy float arrays of equal length. The return value is
48 also a numpy float array of that length.
49
50 e.g.
51 >>> import numpy
52 >>> x = numpy.array([4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12,
53 ... 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16,
54 ... 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20,
55 ... 20, 22, 23, 24, 24, 24, 24, 25], numpy.float)
56 >>> y = numpy.array([2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24,
57 ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40,
58 ... 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56,
59 ... 64, 66, 54, 70, 92, 93, 120, 85], numpy.float)
60 >>> result = lowess(x, y)
61 >>> len(result)
62 50
63 >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1])
64 [4.85, ..., 84.98]
65 """
66 n = len(x)
67 r = int(numpy.ceil(f * n))
68 h = [numpy.sort(abs(x - x[i]))[r] for i in range(n)]
69 w = numpy.clip(abs(([x] - numpy.transpose([x])) / h), 0.0, 1.0)
70 w = 1 - w * w * w
71 w = w * w * w
72 yest = numpy.zeros(n)
73 delta = numpy.ones(n)
74 for iteration in range(iter):
75 for i in xrange(n):
76 weights = delta * w[:, i]
77 weights_mul_x = weights * x
78 b1 = numpy.dot(weights, y)
79 b2 = numpy.dot(weights_mul_x, y)
80 A11 = sum(weights)
81 A12 = sum(weights_mul_x)
82 A21 = A12
83 A22 = numpy.dot(weights_mul_x, x)
84 determinant = A11 * A22 - A12 * A21
85 beta1 = (A22 * b1 - A12 * b2) / determinant
86 beta2 = (A11 * b2 - A21 * b1) / determinant
87 yest[i] = beta1 + beta2 * x[i]
88 residuals = y - yest
89 s = median(abs(residuals))
90 delta[:] = numpy.clip(residuals / (6 * s), -1, 1)
91 delta[:] = 1 - delta * delta
92 delta[:] = delta * delta
93 return yest
94
95
97 """Run the Bio.Statistics.lowess module's doctests."""
98 print "Running doctests..."
99 import doctest
100 doctest.testmod()
101 print "Done"
102
103 if __name__ == "__main__":
104 _test()
105