[frames] | no frames]

# Source Code for Module Bio.KDTree.KDTree'

```  1  # Copyright 2004 by Thomas Hamelryck.
3  # This code is part of the Biopython distribution and governed by its
5  # as part of this package.
6  """KD tree data structure for searching N-dimensional vectors.
7
8  The KD tree data structure can be used for all kinds of searches that
9  involve N-dimensional vectors, e.g.  neighbor searches (find all points
10  within a radius of a given point) or finding all point pairs in a set
11  that are within a certain radius of each other. See "Computational Geometry:
12  Algorithms and Applications" (Mark de Berg, Marc van Kreveld, Mark Overmars,
13  Otfried Schwarzkopf). Author: Thomas Hamelryck.
14  """
15
16  from __future__ import print_function
17
18  from numpy import sum, sqrt, array
19  from numpy import random
20
21  from Bio.KDTree import _CKDTree
22
23
24 -def _dist(p, q):
25      diff = p - q
26      return sqrt(sum(diff * diff))
27
28
29 -def _neighbor_test(nr_points, dim, bucket_size, radius):
30      """Test all fixed radius neighbor search.
31
32      Test all fixed radius neighbor search using the
33      KD tree C module.
34
35      Arguments:
36       - nr_points: number of points used in test
37       - dim: dimension of coords
38       - bucket_size: nr of points per tree node
40
41      Returns true if the test passes.
42      """
43      # KD tree search
44      kdt = _CKDTree.KDTree(dim, bucket_size)
45      coords = random.random((nr_points, dim))
46      kdt.set_data(coords)
48      r = [neighbor.radius for neighbor in neighbors]
49      if r is None:
50          l1 = 0
51      else:
52          l1 = len(r)
53      # now do a slow search to compare results
55      r = [neighbor.radius for neighbor in neighbors]
56      if r is None:
57          l2 = 0
58      else:
59          l2 = len(r)
60      if l1 == l2:
61          # print("Passed.")
62          return True
63      else:
64          print("Not passed: %i != %i." % (l1, l2))
65          return False
66
67
68 -def _test(nr_points, dim, bucket_size, radius):
69      """Test neighbor search.
70
71      Test neighbor search using the KD tree C module.
72
73      Arguments:
74       - nr_points: number of points used in test
75       - dim: dimension of coords
76       - bucket_size: nr of points per tree node
78
79      Returns true if the test passes.
80      """
81      # kd tree search
82      kdt = _CKDTree.KDTree(dim, bucket_size)
83      coords = random.random((nr_points, dim))
84      center = coords[0]
85      kdt.set_data(coords)
87      r = kdt.get_indices()
88      if r is None:
89          l1 = 0
90      else:
91          l1 = len(r)
92      l2 = 0
93      # now do a manual search to compare results
94      for i in range(0, nr_points):
95          p = coords[i]
96          if _dist(p, center) <= radius:
97              l2 = l2 + 1
98      if l1 == l2:
99          # print("Passed.")
100          return True
101      else:
102          print("Not passed: %i != %i." % (l1, l2))
103          return False
104
105
106 -class KDTree(object):
107      """KD tree implementation (C++, SWIG python wrapper)
108
109      The KD tree data structure can be used for all kinds of searches that
110      involve N-dimensional vectors, e.g.  neighbor searches (find all points
111      within a radius of a given point) or finding all point pairs in a set
112      that are within a certain radius of each other.
113
114      Reference:
115
116      Computational Geometry: Algorithms and Applications
117      Second Edition
118      Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf
120      2nd rev. ed. 2000.
121      ISBN: 3-540-65620-0
122
123      The KD tree data structure is described in chapter 5, pg. 99.
124
125      The following article made clear to me that the nodes should
126      contain more than one point (this leads to dramatic speed
127      improvements for the "all fixed radius neighbor search", see
128      below):
129
130      JL Bentley, "Kd trees for semidynamic point sets," in Sixth Annual ACM
131      Symposium on Computational Geometry, vol. 91. San Francisco, 1990
132
133      This KD implementation also performs a "all fixed radius neighbor search",
134      i.e. it can find all point pairs in a set that are within a certain radius
135      of each other. As far as I know the algorithm has not been published.
136      """
137
138 -    def __init__(self, dim, bucket_size=1):
139          self.dim = dim
140          self.kdt = _CKDTree.KDTree(dim, bucket_size)
141          self.built = 0
142
143      # Set data
144
145 -    def set_coords(self, coords):
146          """Add the coordinates of the points.
147
148          Arguments:
149           - coords: two dimensional NumPy array. E.g. if the points
150             have dimensionality D and there are N points, the coords
151             array should be NxD dimensional.
152
153          """
154          if coords.min() <= -1e6 or coords.max() >= 1e6:
155              raise Exception("Points should lie between -1e6 and 1e6")
156          if len(coords.shape) != 2 or coords.shape[1] != self.dim:
157              raise Exception("Expected a Nx%i NumPy array" % self.dim)
158          self.kdt.set_data(coords)
159          self.built = 1
160
161      # Fixed radius search for a point
162
163 -    def search(self, center, radius):
164          """Search all points within radius of center.
165
166          Arguments:
167           - center: one dimensional NumPy array. E.g. if the points have
168             dimensionality D, the center array should be D dimensional.
170
171          """
172          if not self.built:
173              raise Exception("No point set specified")
174          if center.shape != (self.dim,):
175              raise Exception("Expected a %i-dimensional NumPy array"
176                              % self.dim)
178
181
182          Return the list of distances from center after
183          a neighbor search.
184          """
186          if a is None:
187              return []
188          return a
189
190 -    def get_indices(self):
191          """Return the list of indices.
192
193          Return the list of indices after a neighbor search.
194          The indices refer to the original coords NumPy array. The
195          coordinates with these indices were within radius of center.
196
197          For an index pair, the first index<second index.
198          """
199          a = self.kdt.get_indices()
200          if a is None:
201              return []
202          return a
203
204      # Fixed radius search for all points
205
207          """All fixed neighbor search.
208
209          Search all point pairs that are within radius.
210
211          Arguments:
213
214          """
215          if not self.built:
216              raise Exception("No point set specified")
218
219 -    def all_get_indices(self):
220          """Return All Fixed Neighbor Search results.
221
222          Return a Nx2 dim NumPy array containing
223          the indices of the point pairs, where N
224          is the number of neighbor pairs.
225          """
226          a = array([[neighbor.index1, neighbor.index2] for neighbor in self.neighbors])
227          return a
228
230          """Return All Fixed Neighbor Search results.
231
232          Return an N-dim array containing the distances
233          of all the point pairs, where N is the number
234          of neighbor pairs..
235          """
236          return [neighbor.radius for neighbor in self.neighbors]
237
238
239  if __name__ == "__main__":
240
241      nr_points = 100000
242      dim = 3
243      bucket_size = 10
245
246      coords = 200 * random.random((nr_points, dim))
247
248      kdtree = KDTree(dim, bucket_size)
249
250      # enter coords
251      kdtree.set_coords(coords)
252
253      # Find all point pairs within radius
254
256
257      # get indices & radii of points
258
259      # indices is a list of tuples. Each tuple contains the
260      # two indices of a point pair within query_radius of
261      # each other.
262      indices = kdtree.all_get_indices()
264
266
267      # Do 10 individual queries
268
269      for i in range(0, 10):
270          # pick a random center
271          center = random.random(dim)
272
273          # search neighbors
275
276          # get indices & radii of points
277          indices = kdtree.get_indices()