Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Classify protein backbone structure according to Kolodny et al's fragment 
  7  libraries. 
  8   
  9  It can be regarded as a form of objective secondary structure classification. 
 10  Only fragments of length 5 or 7 are supported (ie. there is a 'central' 
 11  residue). 
 12   
 13  Full reference: 
 14   
 15  Kolodny R, Koehl P, Guibas L, Levitt M. 
 16  Small libraries of protein fragments model native protein structures accurately. 
 17  J Mol Biol. 2002 323(2):297-307. 
 18   
 19  The definition files of the fragments can be obtained from: 
 20   
 21  http://github.com/csblab/fragments/ 
 22   
 23  You need these files to use this module. 
 24   
 25  The following example uses the library with 10 fragments of length 5. 
 26  The library files can be found in directory 'fragment_data'. 
 27   
 28      >>> model = structure[0] 
 29      >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data") 
 30      >>> fragment = fm[residue] 
 31   
 32  """ 
 33   
 34  from __future__ import print_function 
 35   
 36  import numpy 
 37   
 38  from Bio.SVDSuperimposer import SVDSuperimposer 
 39   
 40  from Bio.PDB import Selection 
 41  from Bio.PDB.PDBExceptions import PDBException 
 42  from Bio.PDB.PDBParser import PDBParser 
 43  from Bio.PDB.Polypeptide import PPBuilder 
 44   
 45   
 46  # fragment file (lib_SIZE_z_LENGTH.txt) 
 47  # SIZE=number of fragments 
 48  # LENGTH=length of fragment (4,5,6,7) 
 49  _FRAGMENT_FILE = "lib_%s_z_%s.txt" 
 50   
 51   
52 -def _read_fragments(size, length, dir="."):
53 """Read a fragment spec file (PRIVATE). 54 55 Read a fragment spec file available from 56 http://github.com/csblab/fragments/ 57 and return a list of Fragment objects. 58 59 @param size: number of fragments in the library 60 @type size: int 61 62 @param length: length of the fragments 63 @type length: int 64 65 @param dir: directory where the fragment spec files can be found 66 @type dir: string 67 """ 68 filename = (dir + "/" + _FRAGMENT_FILE) % (size, length) 69 with open(filename, "r") as fp: 70 flist = [] 71 # ID of fragment=rank in spec file 72 fid = 0 73 for l in fp.readlines(): 74 # skip comment and blank lines 75 if l[0] == "*" or l[0] == "\n": 76 continue 77 sl = l.split() 78 if sl[1] == "------": 79 # Start of fragment definition 80 f = Fragment(length, fid) 81 flist.append(f) 82 # increase fragment id (rank) 83 fid += 1 84 continue 85 # Add CA coord to Fragment 86 coord = numpy.array([float(x) for x in sl[0:3]]) 87 # XXX= dummy residue name 88 f.add_residue("XXX", coord) 89 return flist
90 91
92 -class Fragment(object):
93 """Represent a polypeptide C-alpha fragment.""" 94
95 - def __init__(self, length, fid):
96 """Initialize fragment object. 97 98 @param length: length of the fragment 99 @type length: int 100 101 @param fid: id for the fragment 102 @type fid: int 103 """ 104 # nr of residues in fragment 105 self.length = length 106 # nr of residues added 107 self.counter = 0 108 self.resname_list = [] 109 # CA coordinate matrix 110 self.coords_ca = numpy.zeros((length, 3), "d") 111 self.fid = fid
112
113 - def get_resname_list(self):
114 """Get residue list. 115 116 @return: the residue names 117 @rtype: [string, string,...] 118 """ 119 return self.resname_list
120
121 - def get_id(self):
122 """Get identifier for the fragment. 123 124 @return: id for the fragment 125 @rtype: int 126 """ 127 return self.fid
128
129 - def get_coords(self):
130 """Get the CA coordinates in the fragment. 131 132 @return: the CA coords in the fragment 133 @rtype: Numeric (Nx3) array 134 """ 135 return self.coords_ca
136
137 - def add_residue(self, resname, ca_coord):
138 """Add a residue. 139 140 @param resname: residue name (eg. GLY). 141 @type resname: string 142 143 @param ca_coord: the c-alpha coorinates of the residues 144 @type ca_coord: Numeric array with length 3 145 """ 146 if self.counter >= self.length: 147 raise PDBException("Fragment boundary exceeded.") 148 self.resname_list.append(resname) 149 self.coords_ca[self.counter] = ca_coord 150 self.counter = self.counter + 1
151
152 - def __len__(self):
153 """Return lengt of the fragment.""" 154 return self.length
155
156 - def __sub__(self, other):
157 """Return rmsd between two fragments. 158 159 Example: 160 161 >>> rmsd=fragment1-fragment2 162 163 @return: rmsd between fragments 164 @rtype: float 165 """ 166 sup = SVDSuperimposer() 167 sup.set(self.coords_ca, other.coords_ca) 168 sup.run() 169 return sup.get_rms()
170
171 - def __repr__(self):
172 """String representing the fragment object. 173 174 Returns <Fragment length=L id=ID> where L=length of fragment 175 and ID the identifier (rank in the library). 176 """ 177 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
178 179
180 -def _make_fragment_list(pp, length):
181 """Dice up a peptide in fragments of length "length". 182 183 @param pp: a list of residues (part of one peptide) 184 @type pp: [L{Residue}, L{Residue}, ...] 185 186 @param length: fragment length 187 @type length: int 188 """ 189 frag_list = [] 190 for i in range(0, len(pp) - length + 1): 191 f = Fragment(length, -1) 192 for j in range(0, length): 193 residue = pp[i + j] 194 resname = residue.get_resname() 195 if residue.has_id("CA"): 196 ca = residue["CA"] 197 else: 198 raise PDBException("CHAINBREAK") 199 if ca.is_disordered(): 200 raise PDBException("CHAINBREAK") 201 ca_coord = ca.get_coord() 202 f.add_residue(resname, ca_coord) 203 frag_list.append(f) 204 return frag_list
205 206
207 -def _map_fragment_list(flist, reflist):
208 """Map flist fragments to closest entry in reflist (PRIVATE). 209 210 Map all frgaments in flist to the closest (in RMSD) fragment in reflist. 211 212 Returns a list of reflist indices. 213 214 @param flist: list of protein fragments 215 @type flist: [L{Fragment}, L{Fragment}, ...] 216 217 @param reflist: list of reference (ie. library) fragments 218 @type reflist: [L{Fragment}, L{Fragment}, ...] 219 """ 220 mapped = [] 221 for f in flist: 222 rank = [] 223 for i in range(0, len(reflist)): 224 rf = reflist[i] 225 rms = f - rf 226 rank.append((rms, rf)) 227 rank.sort() 228 fragment = rank[0][1] 229 mapped.append(fragment) 230 return mapped
231 232
233 -class FragmentMapper(object):
234 """Map polypeptides in a model to lists of representative fragments.""" 235
236 - def __init__(self, model, lsize=20, flength=5, fdir="."):
237 """Create instance of FragmentMapper 238 239 @param model: the model that will be mapped 240 @type model: L{Model} 241 242 @param lsize: number of fragments in the library 243 @type lsize: int 244 245 @param flength: length of fragments in the library 246 @type flength: int 247 248 @param fdir: directory where the definition files are 249 found (default=".") 250 @type fdir: string 251 """ 252 if flength == 5: 253 self.edge = 2 254 elif flength == 7: 255 self.edge = 3 256 else: 257 raise PDBException("Fragment length should be 5 or 7.") 258 self.flength = flength 259 self.lsize = lsize 260 self.reflist = _read_fragments(lsize, flength, fdir) 261 self.model = model 262 self.fd = self._map(self.model)
263
264 - def _map(self, model):
265 """@param model: the model that will be mapped 266 @type model: L{Model} 267 """ 268 ppb = PPBuilder() 269 ppl = ppb.build_peptides(model) 270 fd = {} 271 for pp in ppl: 272 try: 273 # make fragments 274 flist = _make_fragment_list(pp, self.flength) 275 # classify fragments 276 mflist = _map_fragment_list(flist, self.reflist) 277 for i in range(0, len(pp)): 278 res = pp[i] 279 if i < self.edge: 280 # start residues 281 continue 282 elif i >= (len(pp) - self.edge): 283 # end residues 284 continue 285 else: 286 # fragment 287 index = i - self.edge 288 assert(index >= 0) 289 fd[res] = mflist[index] 290 except PDBException as why: 291 if why == 'CHAINBREAK': 292 # Funny polypeptide - skip 293 pass 294 else: 295 raise PDBException(why) 296 return fd
297
298 - def has_key(self, res):
299 """(Obsolete) 300 301 @type res: L{Residue} 302 """ 303 import warnings 304 from Bio import BiopythonDeprecationWarning 305 warnings.warn("has_key is deprecated; use 'res in object' instead", BiopythonDeprecationWarning) 306 return (res in self)
307
308 - def __contains__(self, res):
309 """True if the given residue is in any of the mapped fragments. 310 311 @type res: L{Residue} 312 """ 313 return (res in self.fd)
314
315 - def __getitem__(self, res):
316 """Get an entry. 317 318 @type res: L{Residue} 319 320 @return: fragment classification 321 @rtype: L{Fragment} 322 """ 323 return self.fd[res]
324