Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Classify protein backbone structure according to Kolodny et al's fragment 
  7  libraries. 
  8   
  9  It can be regarded as a form of objective secondary structure classification. 
 10  Only fragments of length 5 or 7 are supported (ie. there is a 'central' 
 11  residue). 
 12   
 13  Full reference: 
 14   
 15  Kolodny R, Koehl P, Guibas L, Levitt M. 
 16  Small libraries of protein fragments model native protein structures accurately. 
 17  J Mol Biol. 2002 323(2):297-307. 
 18   
 19  The definition files of the fragments can be obtained from: 
 20   
 21  U{http://csb.stanford.edu/~rachel/fragments/} 
 22   
 23  You need these files to use this module. 
 24   
 25  The following example uses the library with 10 fragments of length 5. 
 26  The library files can be found in directory 'fragment_data'. 
 27   
 28      >>> model = structure[0] 
 29      >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data") 
 30      >>> fragment = fm[residue] 
 31  """ 
 32   
 33  from __future__ import print_function 
 34   
 35  import numpy 
 36   
 37  from Bio.SVDSuperimposer import SVDSuperimposer 
 38   
 39  from Bio.PDB import Selection 
 40  from Bio.PDB.PDBExceptions import PDBException 
 41  from Bio.PDB.PDBParser import PDBParser 
 42  from Bio.PDB.Polypeptide import PPBuilder 
 43   
 44   
 45  # fragment file (lib_SIZE_z_LENGTH.txt) 
 46  # SIZE=number of fragments 
 47  # LENGTH=length of fragment (4,5,6,7) 
 48  _FRAGMENT_FILE = "lib_%s_z_%s.txt" 
 49   
 50   
51 -def _read_fragments(size, length, dir="."):
52 """ 53 Read a fragment spec file (available from 54 U{http://csb.stanford.edu/rachel/fragments/} 55 and return a list of Fragment objects. 56 57 @param size: number of fragments in the library 58 @type size: int 59 60 @param length: length of the fragments 61 @type length: int 62 63 @param dir: directory where the fragment spec files can be found 64 @type dir: string 65 """ 66 filename = (dir + "/" + _FRAGMENT_FILE) % (size, length) 67 with open(filename, "r") as fp: 68 flist = [] 69 # ID of fragment=rank in spec file 70 fid = 0 71 for l in fp.readlines(): 72 # skip comment and blank lines 73 if l[0] == "*" or l[0] == "\n": 74 continue 75 sl = l.split() 76 if sl[1] == "------": 77 # Start of fragment definition 78 f = Fragment(length, fid) 79 flist.append(f) 80 # increase fragment id (rank) 81 fid += 1 82 continue 83 # Add CA coord to Fragment 84 coord = numpy.array([float(x) for x in sl[0:3]]) 85 # XXX= dummy residue name 86 f.add_residue("XXX", coord) 87 return flist
88 89
90 -class Fragment(object):
91 """ 92 Represent a polypeptide C-alpha fragment. 93 """
94 - def __init__(self, length, fid):
95 """ 96 @param length: length of the fragment 97 @type length: int 98 99 @param fid: id for the fragment 100 @type fid: int 101 """ 102 # nr of residues in fragment 103 self.length = length 104 # nr of residues added 105 self.counter = 0 106 self.resname_list = [] 107 # CA coordinate matrix 108 self.coords_ca = numpy.zeros((length, 3), "d") 109 self.fid = fid
110
111 - def get_resname_list(self):
112 """ 113 @return: the residue names 114 @rtype: [string, string,...] 115 """ 116 return self.resname_list
117
118 - def get_id(self):
119 """ 120 @return: id for the fragment 121 @rtype: int 122 """ 123 return self.fid
124
125 - def get_coords(self):
126 """ 127 @return: the CA coords in the fragment 128 @rtype: Numeric (Nx3) array 129 """ 130 return self.coords_ca
131
132 - def add_residue(self, resname, ca_coord):
133 """ 134 @param resname: residue name (eg. GLY). 135 @type resname: string 136 137 @param ca_coord: the c-alpha coorinates of the residues 138 @type ca_coord: Numeric array with length 3 139 """ 140 if self.counter >= self.length: 141 raise PDBException("Fragment boundary exceeded.") 142 self.resname_list.append(resname) 143 self.coords_ca[self.counter] = ca_coord 144 self.counter = self.counter + 1
145
146 - def __len__(self):
147 """ 148 @return: length of fragment 149 @rtype: int 150 """ 151 return self.length
152
153 - def __sub__(self, other):
154 """ 155 Return rmsd between two fragments. 156 157 Example: 158 >>> rmsd=fragment1-fragment2 159 160 @return: rmsd between fragments 161 @rtype: float 162 """ 163 sup = SVDSuperimposer() 164 sup.set(self.coords_ca, other.coords_ca) 165 sup.run() 166 return sup.get_rms()
167
168 - def __repr__(self):
169 """ 170 Returns <Fragment length=L id=ID> where L=length of fragment 171 and ID the identifier (rank in the library). 172 """ 173 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
174 175
176 -def _make_fragment_list(pp, length):
177 """ 178 Dice up a peptide in fragments of length "length". 179 180 @param pp: a list of residues (part of one peptide) 181 @type pp: [L{Residue}, L{Residue}, ...] 182 183 @param length: fragment length 184 @type length: int 185 """ 186 frag_list = [] 187 for i in range(0, len(pp) - length + 1): 188 f = Fragment(length, -1) 189 for j in range(0, length): 190 residue = pp[i + j] 191 resname = residue.get_resname() 192 if residue.has_id("CA"): 193 ca = residue["CA"] 194 else: 195 raise PDBException("CHAINBREAK") 196 if ca.is_disordered(): 197 raise PDBException("CHAINBREAK") 198 ca_coord = ca.get_coord() 199 f.add_residue(resname, ca_coord) 200 frag_list.append(f) 201 return frag_list
202 203
204 -def _map_fragment_list(flist, reflist):
205 """ 206 Map all frgaments in flist to the closest 207 (in RMSD) fragment in reflist. 208 209 Returns a list of reflist indices. 210 211 @param flist: list of protein fragments 212 @type flist: [L{Fragment}, L{Fragment}, ...] 213 214 @param reflist: list of reference (ie. library) fragments 215 @type reflist: [L{Fragment}, L{Fragment}, ...] 216 """ 217 mapped = [] 218 for f in flist: 219 rank = [] 220 for i in range(0, len(reflist)): 221 rf = reflist[i] 222 rms = f - rf 223 rank.append((rms, rf)) 224 rank.sort() 225 fragment = rank[0][1] 226 mapped.append(fragment) 227 return mapped
228 229
230 -class FragmentMapper(object):
231 """ 232 Map polypeptides in a model to lists of representative fragments. 233 """
234 - def __init__(self, model, lsize=20, flength=5, fdir="."):
235 """Create instance of FragmentMapper 236 237 @param model: the model that will be mapped 238 @type model: L{Model} 239 240 @param lsize: number of fragments in the library 241 @type lsize: int 242 243 @param flength: length of fragments in the library 244 @type flength: int 245 246 @param fdir: directory where the definition files are 247 found (default=".") 248 @type fdir: string 249 """ 250 if flength == 5: 251 self.edge = 2 252 elif flength == 7: 253 self.edge = 3 254 else: 255 raise PDBException("Fragment length should be 5 or 7.") 256 self.flength = flength 257 self.lsize = lsize 258 self.reflist = _read_fragments(lsize, flength, fdir) 259 self.model = model 260 self.fd = self._map(self.model)
261
262 - def _map(self, model):
263 """ 264 @param model: the model that will be mapped 265 @type model: L{Model} 266 """ 267 ppb = PPBuilder() 268 ppl = ppb.build_peptides(model) 269 fd = {} 270 for pp in ppl: 271 try: 272 # make fragments 273 flist = _make_fragment_list(pp, self.flength) 274 # classify fragments 275 mflist = _map_fragment_list(flist, self.reflist) 276 for i in range(0, len(pp)): 277 res = pp[i] 278 if i < self.edge: 279 # start residues 280 continue 281 elif i >= (len(pp) - self.edge): 282 # end residues 283 continue 284 else: 285 # fragment 286 index = i - self.edge 287 assert(index >= 0) 288 fd[res] = mflist[index] 289 except PDBException as why: 290 if why == 'CHAINBREAK': 291 # Funny polypeptide - skip 292 pass 293 else: 294 raise PDBException(why) 295 return fd
296
297 - def has_key(self, res):
298 """(Obsolete) 299 300 @type res: L{Residue} 301 """ 302 import warnings 303 from Bio import BiopythonDeprecationWarning 304 warnings.warn("has_key is deprecated; use 'res in object' instead", BiopythonDeprecationWarning) 305 return (res in self)
306
307 - def __contains__(self, res):
308 """True if the given residue is in any of the mapped fragments. 309 310 @type res: L{Residue} 311 """ 312 return (res in self.fd)
313
314 - def __getitem__(self, res):
315 """ 316 @type res: L{Residue} 317 318 @return: fragment classification 319 @rtype: L{Fragment} 320 """ 321 return self.fd[res]
322 323 324 if __name__ == "__main__": 325 326 import sys 327 328 p = PDBParser() 329 s = p.get_structure("X", sys.argv[1]) 330 m = s[0] 331 fm = FragmentMapper(m, 10, 5, "levitt_data") 332 333 for r in Selection.unfold_entities(m, "R"): 334 print("%s:" % r) 335 if r in fm: 336 print(fm[r]) 337