Package Bio :: Package PDB :: Module ResidueDepth'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.ResidueDepth'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # Copyright (C) 2017, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com) 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """Calculation of residue depth using command line tool MSMS. 
  8   
  9  This module uses Michel Sanner's MSMS program for the surface calculation. 
 10  See: http://mgltools.scripps.edu/packages/MSMS 
 11   
 12  Residue depth is the average distance of the atoms of a residue from 
 13  the solvent accessible surface. 
 14   
 15  Residue Depth: 
 16   
 17      >>> rd = ResidueDepth(model) 
 18      >>> print(rd[(chain_id, res_id)]) 
 19   
 20  Direct MSMS interface, typical use: 
 21   
 22      >>> parser = PDBParser() 
 23      >>> structure = parser.get_structure('1FAT.pdb') 
 24      >>> model = structure[0] 
 25      >>> surface = get_surface(model) 
 26   
 27  The surface is a Numeric array with all the surface vertices. 
 28   
 29  Distance to surface: 
 30   
 31      >>> dist = min_dist(coord, surface) 
 32   
 33  where coord is the coord of an atom within the volume bound by 
 34  the surface (ie. atom depth). 
 35   
 36  To calculate the residue depth (average atom depth of the atoms 
 37  in a residue): 
 38   
 39      >>> rd = residue_depth(residue, surface) 
 40   
 41  """ 
 42   
 43  from __future__ import print_function 
 44   
 45  import os 
 46  import tempfile 
 47  import warnings 
 48   
 49  import numpy 
 50   
 51  from Bio.PDB import PDBParser 
 52  from Bio.PDB import Selection 
 53  from Bio.PDB.AbstractPropertyMap import AbstractPropertyMap 
 54  from Bio.PDB.Polypeptide import is_aa 
 55   
 56  from Bio import BiopythonWarning 
 57  from Bio import BiopythonDeprecationWarning 
 58   
 59  # PDB_TO_XYZR is a BASH script and will not run on Windows 
 60  # Since it only reads atmtypenumbers to a mapping structure we can replicate 
 61  # that functionality here and avoid this dependency altogether. 
 62  # 
 63  # Description of PDB_TO_XYZR 
 64  # Maps residue type and atom name pairs into Connolly ".atm" numeric codes 
 65  #  as used in MS and AMS, and into actual radius values 
 66  # 
 67  # In case of missing radius, use 0.01 
 68  # 
 69  # Table 1: Atom Type to radius 
 70  _atomic_radii = { 
 71      #   atom num dist  Rexplicit Runited-atom 
 72      1: (0.57, 1.40, 1.40), 
 73      2: (0.66, 1.40, 1.60), 
 74      3: (0.57, 1.40, 1.40), 
 75      4: (0.70, 1.54, 1.70), 
 76      5: (0.70, 1.54, 1.80), 
 77      6: (0.70, 1.54, 2.00), 
 78      7: (0.77, 1.74, 2.00), 
 79      8: (0.77, 1.74, 2.00), 
 80      9: (0.77, 1.74, 2.00), 
 81      10: (0.67, 1.74, 1.74), 
 82      11: (0.70, 1.74, 1.86), 
 83      12: (1.04, 1.80, 1.85), 
 84      13: (1.04, 1.80, 1.80),  # P, S, and LonePairs 
 85      14: (0.70, 1.54, 1.54),  # non-protonated nitrogens 
 86      15: (0.37, 1.20, 1.20),  # H, D  hydrogen and deuterium 
 87      16: (0.70, 0.00, 1.50),  # obsolete entry, purpose unknown 
 88      17: (3.50, 5.00, 5.00),  # pseudoatom - big ball 
 89      18: (1.74, 1.97, 1.97),  # Ca calcium 
 90      19: (1.25, 1.40, 1.40),  # Zn zinc    (traditional radius) 
 91      20: (1.17, 1.40, 1.40),  # Cu copper  (traditional radius) 
 92      21: (1.45, 1.30, 1.30),  # Fe heme iron 
 93      22: (1.41, 1.49, 1.49),  # Cd cadmium 
 94      23: (0.01, 0.01, 0.01),  # pseudoatom - tiny dot 
 95      24: (0.37, 1.20, 0.00),  # hydrogen vanishing if united-atoms 
 96      25: (1.16, 1.24, 1.24),  # Fe not in heme 
 97      26: (1.36, 1.60, 1.60),  # Mg magnesium 
 98      27: (1.17, 1.24, 1.24),  # Mn manganese 
 99      28: (1.16, 1.25, 1.25),  # Co cobalt 
100      29: (1.17, 2.15, 2.15),  # Se selenium 
101      30: (3.00, 3.00, 3.00),  # obsolete entry, original purpose unknown 
102      31: (1.15, 1.15, 1.15),  # Yb ytterbium +3 ion --- wild guess only 
103      38: (0.95, 1.80, 1.80),  # obsolete entry, original purpose unknown 
104  } 
105   
106  # Table 2: Resname/Aname to Atom Type 
107  # MSMS uses an awk/gawk pattern matching strategy that we cannot replicate 
108  # We will take advantage of our parser to help us in the mapping. 
109   
110   
111 -def _get_atom_radius(atom, rtype='united'):
112 """Translates an atom object to an atomic radius defined in MSMS. 113 114 Uses information from the parent residue and the atom object to define 115 the atom type. 116 117 Returns the radius (float) according to the selected type: 118 - explicit (reads hydrogens) 119 - united (default) 120 121 """ 122 if rtype == 'explicit': 123 typekey = 1 124 elif rtype == 'united': 125 typekey = 2 126 else: 127 raise ValueError("Radius type (%r) not understood. " 128 "Must be 'explicit' or 'united'" % rtype) 129 130 resname = atom.parent.resname 131 het_atm = atom.parent.id[0] 132 133 at_name = atom.name 134 at_elem = atom.element 135 136 # Hydrogens 137 if at_elem == 'H' or at_elem == 'D': 138 return _atomic_radii[15][typekey] 139 # HETATMs 140 elif het_atm == 'W' and at_elem == 'O': 141 return _atomic_radii[2][typekey] 142 elif het_atm != ' ' and at_elem == 'CA': 143 return _atomic_radii[18][typekey] 144 elif het_atm != ' ' and at_elem == 'CD': 145 return _atomic_radii[22][typekey] 146 elif resname == 'ACE' and at_name == 'CA': 147 return _atomic_radii[9][typekey] 148 # Main chain atoms 149 elif at_name == 'N': 150 return _atomic_radii[4][typekey] 151 elif at_name == 'CA': 152 return _atomic_radii[7][typekey] 153 elif at_name == 'C': 154 return _atomic_radii[10][typekey] 155 elif at_name == 'O': 156 return _atomic_radii[1][typekey] 157 elif at_name == 'P': 158 return _atomic_radii[13][typekey] 159 # CB atoms 160 elif at_name == 'CB' and resname == 'ALA': 161 return _atomic_radii[9][typekey] 162 elif at_name == 'CB' and resname in set(('ILE', 'THR', 'VAL')): 163 return _atomic_radii[7][typekey] 164 elif at_name == 'CB': 165 return _atomic_radii[8][typekey] 166 # CG atoms 167 elif at_name == 'CG' and resname in set(('ASN', 'ASP', 'ASX', 'HIS', 'HIP', 168 'HIE', 'HID', 'HISN', 'HISL', 169 'LEU', 'PHE', 'TRP', 'TYR')): 170 return _atomic_radii[10][typekey] 171 elif at_name == 'CG' and resname == 'LEU': 172 return _atomic_radii[7][typekey] 173 elif at_name == 'CG': 174 return _atomic_radii[8][typekey] 175 # General amino acids in alphabetical order 176 elif resname == 'GLN' and at_elem == 'O': 177 return _atomic_radii[3][typekey] 178 elif resname == 'ACE' and at_name == 'CH3': 179 return _atomic_radii[9][typekey] 180 elif resname == 'ARG' and at_name == 'CD': 181 return _atomic_radii[8][typekey] 182 elif resname == 'ARG' and at_name in set(('NE', 'RE')): 183 return _atomic_radii[4][typekey] 184 elif resname == 'ARG' and at_name == 'CZ': 185 return _atomic_radii[10][typekey] 186 elif resname == 'ARG' and at_name.startswith(('NH', 'RH')): 187 return _atomic_radii[5][typekey] 188 elif resname == 'ASN' and at_name == 'OD1': 189 return _atomic_radii[1][typekey] 190 elif resname == 'ASN' and at_name == 'ND2': 191 return _atomic_radii[5][typekey] 192 elif resname == 'ASN' and at_name.startswith('AD'): 193 return _atomic_radii[3][typekey] 194 elif resname == 'ASP' and at_name.startswith(('OD', 'ED')): 195 return _atomic_radii[3][typekey] 196 elif resname == 'ASX' and at_name.startswith('OD1'): 197 return _atomic_radii[1][typekey] 198 elif resname == 'ASX' and at_name == 'ND2': 199 return _atomic_radii[3][typekey] 200 elif resname == 'ASX' and at_name.startswith(('OD', 'AD')): 201 return _atomic_radii[3][typekey] 202 elif resname in set(('CYS', 'CYX', 'CYM')) and at_name == 'SG': 203 return _atomic_radii[13][typekey] 204 elif resname in set(('CYS', 'MET')) and at_name.startswith('LP'): 205 return _atomic_radii[13][typekey] 206 elif resname == 'CUH' and at_name == 'SG': 207 return _atomic_radii[12][typekey] 208 elif resname == 'GLU' and at_name.startswith(('OE', 'EE')): 209 return _atomic_radii[3][typekey] 210 elif resname in set(('GLU', 'GLN', 'GLX')) and at_name == 'CD': 211 return _atomic_radii[10][typekey] 212 elif resname == 'GLN' and at_name == 'OE1': 213 return _atomic_radii[1][typekey] 214 elif resname == 'GLN' and at_name == 'NE2': 215 return _atomic_radii[5][typekey] 216 elif resname in set(('GLN', 'GLX')) and at_name.startswith('AE'): 217 return _atomic_radii[3][typekey] 218 # Histdines and friends 219 # There are 4 kinds of HIS rings: HIS (no protons), HID (proton on Delta), 220 # HIE (proton on epsilon), and HIP (protons on both) 221 # Protonated nitrogens are numbered 4, else 14 222 # HIS is treated here as the same as HIE 223 # 224 # HISL is a deprotonated HIS (the L means liganded) 225 elif resname in set(('HIS', 'HID', 'HIE', 'HIP', 'HISL')) and at_name in set(('CE1', 'CD2')): # noqa: E501 226 return _atomic_radii[11][typekey] 227 elif resname in set(('HIS', 'HID', 'HIE', 'HISL')) and at_name == 'ND1': 228 return _atomic_radii[14][typekey] 229 elif resname in set(('HID', 'HIP')) and at_name in set(('ND1', 'RD1')): 230 return _atomic_radii[4][typekey] 231 elif resname in set(('HIS', 'HIE', 'HIP')) and at_name in set(('NE2', 'RE2')): 232 return _atomic_radii[4][typekey] 233 elif resname in set(('HID', 'HISL')) and at_name in set(('NE2', 'RE2')): 234 return _atomic_radii[14][typekey] 235 elif resname in set(('HIS', 'HID', 'HIP', 'HISL')) and at_name.startswith(('AD', 'AE')): # noqa: E501 236 return _atomic_radii[4][typekey] 237 # More amino acids 238 elif resname == 'ILE' and at_name == 'CG1': 239 return _atomic_radii[8][typekey] 240 elif resname == 'ILE' and at_name == 'CG2': 241 return _atomic_radii[9][typekey] 242 elif resname == 'ILE' and at_name in set(('CD', 'CD1')): 243 return _atomic_radii[9][typekey] 244 elif resname == 'LEU' and at_name.startswith('CD'): 245 return _atomic_radii[9][typekey] 246 elif resname == 'LYS' and at_name in set(('CG', 'CD', 'CE')): 247 return _atomic_radii[8][typekey] 248 elif resname == 'LYS' and at_name in set(('NZ', 'KZ')): 249 return _atomic_radii[6][typekey] 250 elif resname == 'MET' and at_name == 'SD': 251 return _atomic_radii[13][typekey] 252 elif resname == 'MET' and at_name == 'CE': 253 return _atomic_radii[9][typekey] 254 elif resname == 'PHE' and at_name.startswith(('CD', 'CE', 'CZ')): 255 return _atomic_radii[11][typekey] 256 elif resname == 'PRO' and at_name in set(('CG', 'CD')): 257 return _atomic_radii[8][typekey] 258 elif resname == 'CSO' and at_name in set(('SE', 'SEG')): 259 return _atomic_radii[9][typekey] 260 elif resname == 'CSO' and at_name.startswith('OD'): 261 return _atomic_radii[3][typekey] 262 elif resname == 'SER' and at_name == 'OG': 263 return _atomic_radii[2][typekey] 264 elif resname == 'THR' and at_name == 'OG1': 265 return _atomic_radii[2][typekey] 266 elif resname == 'THR' and at_name == 'CG2': 267 return _atomic_radii[9][typekey] 268 elif resname == 'TRP' and at_name == 'CD1': 269 return _atomic_radii[11][typekey] 270 elif resname == 'TRP' and at_name in set(('CD2', 'CE2')): 271 return _atomic_radii[10][typekey] 272 elif resname == 'TRP' and at_name == 'NE1': 273 return _atomic_radii[4][typekey] 274 elif resname == 'TRP' and at_name in set(('CE3', 'CZ2', 'CZ3', 'CH2')): 275 return _atomic_radii[11][typekey] 276 elif resname == 'TYR' and at_name in set(('CD1', 'CD2', 'CE1', 'CE2')): 277 return _atomic_radii[11][typekey] 278 elif resname == 'TYR' and at_name == 'CZ': 279 return _atomic_radii[10][typekey] 280 elif resname == 'TYR' and at_name == 'OH': 281 return _atomic_radii[2][typekey] 282 elif resname == 'VAL' and at_name in set(('CG1', 'CG2')): 283 return _atomic_radii[9][typekey] 284 elif at_name in set(('CD', 'CD')): 285 return _atomic_radii[8][typekey] 286 # Co-factors, and other weirdos 287 elif resname in set(('FS3', 'FS4')) and at_name.startswith('FE') \ 288 and at_name.endswith(('1', '2', '3', '4', '5', '6', '7')): 289 return _atomic_radii[21][typekey] 290 elif resname in set(('FS3', 'FS4')) and at_name.startswith('S') \ 291 and at_name.endswith(('1', '2', '3', '4', '5', '6', '7')): 292 return _atomic_radii[13][typekey] 293 elif resname == 'FS3' and at_name == 'OXO': 294 return _atomic_radii[1][typekey] 295 elif resname == 'FEO' and at_name in set(('FE1', 'FE2')): 296 return _atomic_radii[21][typekey] 297 elif resname == 'HEM' and at_name in set(('O1', 'O2')): 298 return _atomic_radii[1][typekey] 299 elif resname == 'HEM' and at_name == 'FE': 300 return _atomic_radii[21][typekey] 301 elif resname == 'HEM' and at_name in set(('CHA', 'CHB', 'CHC', 'CHD', 302 'CAB', 'CAC', 'CBB', 'CBC')): 303 return _atomic_radii[11][typekey] 304 elif resname == 'HEM' and at_name in set(('NA', 'NB', 'NC', 'ND', 305 'N A', 'N B', 'N C', 'N D')): 306 return _atomic_radii[14][typekey] 307 elif resname == 'HEM' and at_name in set(('C1A', 'C1B', 'C1C', 'C1D', 308 'C2A', 'C2B', 'C2C', 'C2D', 309 'C3A', 'C3B', 'C3C', 'C3D', 310 'C4A', 'C4B', 'C4C', 'C4D', 311 'CGA', 'CGD')): 312 return _atomic_radii[10][typekey] 313 elif resname == 'HEM' and at_name in set(('CMA', 'CMB', 'CMC', 'CMD')): 314 return _atomic_radii[9][typekey] 315 elif resname == 'HEM' and at_name == 'OH2': 316 return _atomic_radii[2][typekey] 317 elif resname == 'AZI' and at_name in set(('N1', 'N2', 'N3')): 318 return _atomic_radii[14][typekey] 319 elif resname == 'MPD' and at_name in set(('C1', 'C5', 'C6')): 320 return _atomic_radii[9][typekey] 321 elif resname == 'MPD' and at_name == 'C2': 322 return _atomic_radii[10][typekey] 323 elif resname == 'MPD' and at_name == 'C3': 324 return _atomic_radii[8][typekey] 325 elif resname == 'MPD' and at_name == 'C4': 326 return _atomic_radii[7][typekey] 327 elif resname == 'MPD' and at_name in set(('O7', 'O8')): 328 return _atomic_radii[2][typekey] 329 elif resname in set(('SO4', 'SUL')) and at_name == 'S': 330 return _atomic_radii[13][typekey] 331 elif resname in set(('SO4', 'SUL', 'PO4', 'PHO')) and at_name in set(('O1', 'O2', 'O3', 'O4')): # noqa: E501 332 return _atomic_radii[3][typekey] 333 elif resname == 'PC ' and at_name in set(('O1', 'O2', 'O3', 'O4')): 334 return _atomic_radii[3][typekey] 335 elif resname == 'PC ' and at_name == 'P1': 336 return _atomic_radii[13][typekey] 337 elif resname == 'PC ' and at_name in set(('C1', 'C2')): 338 return _atomic_radii[8][typekey] 339 elif resname == 'PC ' and at_name in set(('C3', 'C4', 'C5')): 340 return _atomic_radii[9][typekey] 341 elif resname == 'PC ' and at_name == 'N1': 342 return _atomic_radii[14][typekey] 343 elif resname == 'BIG' and at_name == 'BAL': 344 return _atomic_radii[17][typekey] 345 elif resname in set(('POI', 'DOT')) and at_name in set(('POI', 'DOT')): 346 return _atomic_radii[23][typekey] 347 elif resname == 'FMN' and at_name in set(('N1', 'N5', 'N10')): 348 return _atomic_radii[4][typekey] 349 elif resname == 'FMN' and at_name in set(('C2', 'C4', 'C7', 'C8', 'C10', 350 'C4A', 'C5A', 'C9A')): 351 return _atomic_radii[10][typekey] 352 elif resname == 'FMN' and at_name in set(('O2', 'O4')): 353 return _atomic_radii[1][typekey] 354 elif resname == 'FMN' and at_name == 'N3': 355 return _atomic_radii[14][typekey] 356 elif resname == 'FMN' and at_name in set(('C6', 'C9')): 357 return _atomic_radii[11][typekey] 358 elif resname == 'FMN' and at_name in set(('C7M', 'C8M')): 359 return _atomic_radii[9][typekey] 360 elif resname == 'FMN' and at_name.startswith(('C1', 'C2', 'C3', 'C4', 'C5')): # noqa: E501 361 return _atomic_radii[8][typekey] 362 elif resname == 'FMN' and at_name.startswith(('O2', 'O3', 'O4')): 363 return _atomic_radii[2][typekey] 364 elif resname == 'FMN' and at_name.startswith('O5'): 365 return _atomic_radii[3][typekey] 366 elif resname == 'FMN' and at_name in set(('OP1', 'OP2', 'OP3')): 367 return _atomic_radii[3][typekey] 368 elif resname in set(('ALK', 'MYR')) and at_name == 'OT1': 369 return _atomic_radii[3][typekey] 370 elif resname in set(('ALK', 'MYR')) and at_name == 'C01': 371 return _atomic_radii[10][typekey] 372 elif resname == 'ALK' and at_name == 'C16': 373 return _atomic_radii[9][typekey] 374 elif resname == 'MYR' and at_name == 'C14': 375 return _atomic_radii[9][typekey] 376 elif resname in set(('ALK', 'MYR')) and at_name.startswith('C'): 377 return _atomic_radii[8][typekey] 378 # Metals 379 elif at_elem == 'CU': 380 return _atomic_radii[20][typekey] 381 elif at_elem == 'ZN': 382 return _atomic_radii[19][typekey] 383 elif at_elem == 'MN': 384 return _atomic_radii[27][typekey] 385 elif at_elem == 'FE': 386 return _atomic_radii[25][typekey] 387 elif at_elem == 'MG': 388 return _atomic_radii[26][typekey] 389 elif at_elem == 'CO': 390 return _atomic_radii[28][typekey] 391 elif at_elem == 'SE': 392 return _atomic_radii[29][typekey] 393 elif at_elem == 'YB': 394 return _atomic_radii[31][typekey] 395 # Others 396 elif at_name == 'SEG': 397 return _atomic_radii[9][typekey] 398 elif at_name == 'OXT': 399 return _atomic_radii[3][typekey] 400 # Catch-alls 401 elif at_name.startswith(('OT', 'E')): 402 return _atomic_radii[3][typekey] 403 elif at_name.startswith('S'): 404 return _atomic_radii[13][typekey] 405 elif at_name.startswith('C'): 406 return _atomic_radii[7][typekey] 407 elif at_name.startswith('A'): 408 return _atomic_radii[11][typekey] 409 elif at_name.startswith('O'): 410 return _atomic_radii[1][typekey] 411 elif at_name.startswith(('N', 'R')): 412 return _atomic_radii[4][typekey] 413 elif at_name.startswith('K'): 414 return _atomic_radii[6][typekey] 415 elif at_name in set(('PA', 'PB', 'PC', 'PD')): 416 return _atomic_radii[13][typekey] 417 elif at_name.startswith('P'): 418 return _atomic_radii[13][typekey] 419 elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('O'): # noqa: E501 420 return _atomic_radii[1][typekey] 421 elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('N'): # noqa: E501 422 return _atomic_radii[4][typekey] 423 elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('C'): # noqa: E501 424 return _atomic_radii[7][typekey] 425 elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('P'): # noqa: E501 426 return _atomic_radii[13][typekey] 427 elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('H'): # noqa: E501 428 return _atomic_radii[15][typekey] 429 else: 430 warnings.warn('{}:{} not in radii library.'.format(at_name, resname), 431 BiopythonWarning) 432 return 0.01
433 434
435 -def _read_vertex_array(filename):
436 """Read the vertex list into a Numeric array.""" 437 with open(filename, "r") as fp: 438 vertex_list = [] 439 for l in fp: 440 sl = l.split() 441 if len(sl) != 9: 442 # skip header 443 continue 444 vl = [float(x) for x in sl[0:3]] 445 vertex_list.append(vl) 446 return numpy.array(vertex_list)
447 448
449 -def get_surface(model, PDB_TO_XYZR=None, MSMS="msms"):
450 """Represent molecular surface as a vertex list array. 451 452 Return a Numpy array that represents the vertex list of the 453 molecular surface. 454 455 Arguments: 456 - PDB_TO_XYZR - deprecated, ignore this. 457 - MSMS - msms executable (used as argument to os.system) 458 459 """ 460 # Issue warning if PDB_TO_XYZR is given 461 if PDB_TO_XYZR is not None: 462 warnings.warn(("PDB_TO_XYZR argument will be deprecated soon" 463 " in favor of an internal mapping algorithm."), 464 BiopythonDeprecationWarning) 465 466 # Replace pdb_to_xyzr 467 # Make x,y,z,radius file 468 atom_list = Selection.unfold_entities(model, 'A') 469 470 xyz_tmp = tempfile.mktemp() 471 with open(xyz_tmp, 'w') as pdb_to_xyzr: 472 for atom in atom_list: 473 x, y, z = atom.coord 474 radius = _get_atom_radius(atom, rtype='united') 475 print('{:6.3f}\t{:6.3f}\t{:6.3f}\t{:1.2f}'.format(x, y, z, radius), 476 file=pdb_to_xyzr) 477 478 # make surface 479 surface_tmp = tempfile.mktemp() 480 MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp() 481 make_surface = MSMS % (xyz_tmp, surface_tmp) 482 os.system(make_surface) 483 surface_file = surface_tmp + ".vert" 484 assert os.path.isfile(surface_file), \ 485 "Failed to generate surface file using command:\n%s" % make_surface 486 487 # read surface vertices from vertex file 488 surface = _read_vertex_array(surface_file) 489 return surface
490 491
492 -def min_dist(coord, surface):
493 """Return minimum distance between coord and surface.""" 494 d = surface - coord 495 d2 = numpy.sum(d * d, 1) 496 return numpy.sqrt(min(d2))
497 498
499 -def residue_depth(residue, surface):
500 """Residue depth as average depth of all its atoms. 501 502 Return average distance to surface for all atoms in a residue, 503 ie. the residue depth. 504 """ 505 atom_list = residue.get_unpacked_list() 506 length = len(atom_list) 507 d = 0 508 for atom in atom_list: 509 coord = atom.get_coord() 510 d = d + min_dist(coord, surface) 511 return d / length
512 513
514 -def ca_depth(residue, surface):
515 if not residue.has_id("CA"): 516 return None 517 ca = residue["CA"] 518 coord = ca.get_coord() 519 return min_dist(coord, surface)
520 521
522 -class ResidueDepth(AbstractPropertyMap):
523 """Calculate residue and CA depth for all residues.""" 524
525 - def __init__(self, model, pdb_file=None):
526 527 # Issue warning if pdb_file is given 528 if pdb_file is not None: 529 warnings.warn(("ResidueDepth no longer requires a pdb file." 530 " This argument will be removed in a future release" 531 " of Biopython."), 532 BiopythonDeprecationWarning) 533 534 depth_dict = {} 535 depth_list = [] 536 depth_keys = [] 537 # get_residue 538 residue_list = Selection.unfold_entities(model, 'R') 539 # make surface from PDB file using MSMS 540 surface = get_surface(model) 541 # calculate rdepth for each residue 542 for residue in residue_list: 543 if not is_aa(residue): 544 continue 545 rd = residue_depth(residue, surface) 546 ca_rd = ca_depth(residue, surface) 547 # Get the key 548 res_id = residue.get_id() 549 chain_id = residue.get_parent().get_id() 550 depth_dict[(chain_id, res_id)] = (rd, ca_rd) 551 depth_list.append((residue, (rd, ca_rd))) 552 depth_keys.append((chain_id, res_id)) 553 # Update xtra information 554 residue.xtra['EXP_RD'] = rd 555 residue.xtra['EXP_RD_CA'] = ca_rd 556 AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
557 558 559 if __name__ == "__main__": 560 561 import sys 562 563 p = PDBParser() 564 s = p.get_structure("X", sys.argv[1]) 565 model = s[0] 566 567 rd = ResidueDepth(model) 568 569 for item in rd: 570 print(item) 571