Package Bio :: Package KDTree :: Module KDTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.KDTree.KDTree

  1  # Copyright 2004 by Thomas Hamelryck. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """KD tree data structure for searching N-dimensional vectors. 
  7   
  8  The KD tree data structure can be used for all kinds of searches that 
  9  involve N-dimensional vectors, e.g.  neighbor searches (find all points 
 10  within a radius of a given point) or finding all point pairs in a set 
 11  that are within a certain radius of each other. See "Computational Geometry: 
 12  Algorithms and Applications" (Mark de Berg, Marc van Kreveld, Mark Overmars, 
 13  Otfried Schwarzkopf). Author: Thomas Hamelryck. 
 14  """ 
 15   
 16  from __future__ import print_function 
 17   
 18  from numpy import sum, sqrt, array 
 19  from numpy import random 
 20   
 21  from Bio.KDTree import _CKDTree 
 22   
 23   
24 -def _dist(p, q):
25 diff = p - q 26 return sqrt(sum(diff * diff))
27 28
29 -def _neighbor_test(nr_points, dim, bucket_size, radius):
30 """Test all fixed radius neighbor search. 31 32 Test all fixed radius neighbor search using the 33 KD tree C module. 34 35 Arguments: 36 - nr_points: number of points used in test 37 - dim: dimension of coords 38 - bucket_size: nr of points per tree node 39 - radius: radius of search (typically 0.05 or so) 40 41 Returns true if the test passes. 42 """ 43 # KD tree search 44 kdt = _CKDTree.KDTree(dim, bucket_size) 45 coords = random.random((nr_points, dim)) 46 kdt.set_data(coords) 47 neighbors = kdt.neighbor_search(radius) 48 r = [neighbor.radius for neighbor in neighbors] 49 if r is None: 50 l1 = 0 51 else: 52 l1 = len(r) 53 # now do a slow search to compare results 54 neighbors = kdt.neighbor_simple_search(radius) 55 r = [neighbor.radius for neighbor in neighbors] 56 if r is None: 57 l2 = 0 58 else: 59 l2 = len(r) 60 if l1 == l2: 61 # print("Passed.") 62 return True 63 else: 64 print("Not passed: %i != %i." % (l1, l2)) 65 return False
66 67
68 -def _test(nr_points, dim, bucket_size, radius):
69 """Test neighbor search. 70 71 Test neighbor search using the KD tree C module. 72 73 Arguments: 74 - nr_points: number of points used in test 75 - dim: dimension of coords 76 - bucket_size: nr of points per tree node 77 - radius: radius of search (typically 0.05 or so) 78 79 Returns true if the test passes. 80 """ 81 # kd tree search 82 kdt = _CKDTree.KDTree(dim, bucket_size) 83 coords = random.random((nr_points, dim)) 84 center = coords[0] 85 kdt.set_data(coords) 86 kdt.search_center_radius(center, radius) 87 r = kdt.get_indices() 88 if r is None: 89 l1 = 0 90 else: 91 l1 = len(r) 92 l2 = 0 93 # now do a manual search to compare results 94 for i in range(0, nr_points): 95 p = coords[i] 96 if _dist(p, center) <= radius: 97 l2 = l2 + 1 98 if l1 == l2: 99 # print("Passed.") 100 return True 101 else: 102 print("Not passed: %i != %i." % (l1, l2)) 103 return False
104 105
106 -class KDTree(object):
107 """KD tree implementation (C++, SWIG python wrapper) 108 109 The KD tree data structure can be used for all kinds of searches that 110 involve N-dimensional vectors, e.g. neighbor searches (find all points 111 within a radius of a given point) or finding all point pairs in a set 112 that are within a certain radius of each other. 113 114 Reference: 115 116 Computational Geometry: Algorithms and Applications 117 Second Edition 118 Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf 119 published by Springer-Verlag 120 2nd rev. ed. 2000. 121 ISBN: 3-540-65620-0 122 123 The KD tree data structure is described in chapter 5, pg. 99. 124 125 The following article made clear to me that the nodes should 126 contain more than one point (this leads to dramatic speed 127 improvements for the "all fixed radius neighbor search", see 128 below): 129 130 JL Bentley, "Kd trees for semidynamic point sets," in Sixth Annual ACM 131 Symposium on Computational Geometry, vol. 91. San Francisco, 1990 132 133 This KD implementation also performs a "all fixed radius neighbor search", 134 i.e. it can find all point pairs in a set that are within a certain radius 135 of each other. As far as I know the algorithm has not been published. 136 """ 137
138 - def __init__(self, dim, bucket_size=1):
139 self.dim = dim 140 self.kdt = _CKDTree.KDTree(dim, bucket_size) 141 self.built = 0
142 143 # Set data 144
145 - def set_coords(self, coords):
146 """Add the coordinates of the points. 147 148 Arguments: 149 - coords: two dimensional NumPy array. E.g. if the points 150 have dimensionality D and there are N points, the coords 151 array should be NxD dimensional. 152 """ 153 if coords.min() <= -1e6 or coords.max() >= 1e6: 154 raise Exception("Points should lie between -1e6 and 1e6") 155 if len(coords.shape) != 2 or coords.shape[1] != self.dim: 156 raise Exception("Expected a Nx%i NumPy array" % self.dim) 157 self.kdt.set_data(coords) 158 self.built = 1
159 160 # Fixed radius search for a point 161
162 - def search(self, center, radius):
163 """Search all points within radius of center. 164 165 Arguments: 166 - center: one dimensional NumPy array. E.g. if the points have 167 dimensionality D, the center array should be D dimensional. 168 - radius: float>0 169 """ 170 if not self.built: 171 raise Exception("No point set specified") 172 if center.shape != (self.dim,): 173 raise Exception("Expected a %i-dimensional NumPy array" 174 % self.dim) 175 self.kdt.search_center_radius(center, radius)
176
177 - def get_radii(self):
178 """Return radii. 179 180 Return the list of distances from center after 181 a neighbor search. 182 """ 183 a = self.kdt.get_radii() 184 if a is None: 185 return [] 186 return a
187
188 - def get_indices(self):
189 """Return the list of indices. 190 191 Return the list of indices after a neighbor search. 192 The indices refer to the original coords NumPy array. The 193 coordinates with these indices were within radius of center. 194 195 For an index pair, the first index<second index. 196 """ 197 a = self.kdt.get_indices() 198 if a is None: 199 return [] 200 return a
201 202 # Fixed radius search for all points 203
204 - def all_search(self, radius):
205 """All fixed neighbor search. 206 207 Search all point pairs that are within radius. 208 209 Arguments: 210 - radius: float (>0) 211 """ 212 if not self.built: 213 raise Exception("No point set specified") 214 self.neighbors = self.kdt.neighbor_search(radius)
215
216 - def all_get_indices(self):
217 """Return All Fixed Neighbor Search results. 218 219 Return a Nx2 dim NumPy array containing 220 the indices of the point pairs, where N 221 is the number of neighbor pairs. 222 """ 223 a = array([[neighbor.index1, neighbor.index2] for neighbor in self.neighbors]) 224 return a
225
226 - def all_get_radii(self):
227 """Return All Fixed Neighbor Search results. 228 229 Return an N-dim array containing the distances 230 of all the point pairs, where N is the number 231 of neighbor pairs.. 232 """ 233 return [neighbor.radius for neighbor in self.neighbors]
234 235 if __name__ == "__main__": 236 237 nr_points = 100000 238 dim = 3 239 bucket_size = 10 240 query_radius = 10 241 242 coords = 200 * random.random((nr_points, dim)) 243 244 kdtree = KDTree(dim, bucket_size) 245 246 # enter coords 247 kdtree.set_coords(coords) 248 249 # Find all point pairs within radius 250 251 kdtree.all_search(query_radius) 252 253 # get indices & radii of points 254 255 # indices is a list of tuples. Each tuple contains the 256 # two indices of a point pair within query_radius of 257 # each other. 258 indices = kdtree.all_get_indices() 259 radii = kdtree.all_get_radii() 260 261 print("Found %i point pairs within radius %f." % (len(indices), query_radius)) 262 263 # Do 10 individual queries 264 265 for i in range(0, 10): 266 # pick a random center 267 center = random.random(dim) 268 269 # search neighbors 270 kdtree.search(center, query_radius) 271 272 # get indices & radii of points 273 indices = kdtree.get_indices() 274 radii = kdtree.get_radii() 275 276 x, y, z = center 277 print("Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z)) 278