Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parser for PDB files.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  import warnings 
 11   
 12  try: 
 13      import numpy 
 14  except: 
 15      from Bio import MissingPythonDependencyError 
 16      raise MissingPythonDependencyError( 
 17          "Install NumPy if you want to use the PDB parser.") 
 18   
 19  from Bio.File import as_handle 
 20   
 21  from Bio.PDB.PDBExceptions import PDBConstructionException 
 22  from Bio.PDB.PDBExceptions import PDBConstructionWarning 
 23   
 24  from Bio.PDB.StructureBuilder import StructureBuilder 
 25  from Bio.PDB.parse_pdb_header import _parse_pdb_header_list 
 26   
 27   
 28  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 29   
 30   
31 -class PDBParser(object):
32 """ 33 Parse a PDB file and return a Structure object. 34 """ 35
36 - def __init__(self, PERMISSIVE=True, get_header=False, 37 structure_builder=None, QUIET=False):
38 """ 39 The PDB parser call a number of standard methods in an aggregated 40 StructureBuilder object. Normally this object is instanciated by the 41 PDBParser object itself, but if the user provides his/her own 42 StructureBuilder object, the latter is used instead. 43 44 Arguments: 45 46 o PERMISSIVE - Evaluated as a Boolean. If false, exceptions in 47 constructing the SMCRA data structure are fatal. If true (DEFAULT), 48 the exceptions are caught, but some residues or atoms will be missing. 49 THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!. 50 51 o structure_builder - an optional user implemented StructureBuilder class. 52 53 o QUIET - Evaluated as a Boolean. If true, warnings issued in constructing 54 the SMCRA data will be suppressed. If false (DEFAULT), they will be shown. 55 These warnings might be indicative of problems in the PDB file! 56 """ 57 if structure_builder is not None: 58 self.structure_builder = structure_builder 59 else: 60 self.structure_builder = StructureBuilder() 61 self.header = None 62 self.trailer = None 63 self.line_counter = 0 64 self.PERMISSIVE = bool(PERMISSIVE) 65 self.QUIET = bool(QUIET)
66 67 # Public methods 68
69 - def get_structure(self, id, file):
70 """Return the structure. 71 72 Arguments: 73 o id - string, the id that will be used for the structure 74 o file - name of the PDB file OR an open filehandle 75 """ 76 77 if self.QUIET: 78 warning_list = warnings.filters[:] 79 warnings.filterwarnings("ignore", category=PDBConstructionWarning) 80 81 self.header = None 82 self.trailer = None 83 # Make a StructureBuilder instance (pass id of structure as parameter) 84 self.structure_builder.init_structure(id) 85 86 with as_handle(file) as handle: 87 self._parse(handle.readlines()) 88 89 self.structure_builder.set_header(self.header) 90 # Return the Structure instance 91 structure = self.structure_builder.get_structure() 92 93 if self.QUIET: 94 warnings.filters = warning_list 95 96 return structure
97
98 - def get_header(self):
99 "Return the header." 100 return self.header
101
102 - def get_trailer(self):
103 "Return the trailer." 104 return self.trailer
105 106 # Private methods 107
108 - def _parse(self, header_coords_trailer):
109 "Parse the PDB file." 110 # Extract the header; return the rest of the file 111 self.header, coords_trailer = self._get_header(header_coords_trailer) 112 # Parse the atomic data; return the PDB file trailer 113 self.trailer = self._parse_coordinates(coords_trailer)
114
115 - def _get_header(self, header_coords_trailer):
116 "Get the header of the PDB file, return the rest." 117 structure_builder = self.structure_builder 118 i = 0 119 for i in range(0, len(header_coords_trailer)): 120 structure_builder.set_line_counter(i + 1) 121 line = header_coords_trailer[i] 122 record_type = line[0:6] 123 if record_type == "ATOM " or record_type == "HETATM" or record_type == "MODEL ": 124 break 125 header = header_coords_trailer[0:i] 126 # Return the rest of the coords+trailer for further processing 127 self.line_counter = i 128 coords_trailer = header_coords_trailer[i:] 129 header_dict = _parse_pdb_header_list(header) 130 return header_dict, coords_trailer
131
132 - def _parse_coordinates(self, coords_trailer):
133 "Parse the atomic data in the PDB file." 134 local_line_counter = 0 135 structure_builder = self.structure_builder 136 current_model_id = 0 137 # Flag we have an open model 138 model_open = 0 139 current_chain_id = None 140 current_segid = None 141 current_residue_id = None 142 current_resname = None 143 for i in range(0, len(coords_trailer)): 144 line = coords_trailer[i] 145 record_type = line[0:6] 146 global_line_counter = self.line_counter + local_line_counter + 1 147 structure_builder.set_line_counter(global_line_counter) 148 if record_type == "ATOM " or record_type == "HETATM": 149 # Initialize the Model - there was no explicit MODEL record 150 if not model_open: 151 structure_builder.init_model(current_model_id) 152 current_model_id += 1 153 model_open = 1 154 fullname = line[12:16] 155 # get rid of whitespace in atom names 156 split_list = fullname.split() 157 if len(split_list) != 1: 158 # atom name has internal spaces, e.g. " N B ", so 159 # we do not strip spaces 160 name = fullname 161 else: 162 # atom name is like " CA ", so we can strip spaces 163 name = split_list[0] 164 altloc = line[16] 165 resname = line[17:20] 166 chainid = line[21] 167 try: 168 serial_number = int(line[6:11]) 169 except: 170 serial_number = 0 171 resseq = int(line[22:26].split()[0]) # sequence identifier 172 icode = line[26] # insertion code 173 if record_type == "HETATM": # hetero atom flag 174 if resname == "HOH" or resname == "WAT": 175 hetero_flag = "W" 176 else: 177 hetero_flag = "H" 178 else: 179 hetero_flag = " " 180 residue_id = (hetero_flag, resseq, icode) 181 # atomic coordinates 182 try: 183 x = float(line[30:38]) 184 y = float(line[38:46]) 185 z = float(line[46:54]) 186 except: 187 # Should we allow parsing to continue in permissive mode? 188 # If so, what coordinates should we default to? Easier to abort! 189 raise PDBConstructionException("Invalid or missing coordinate(s) at line %i." 190 % global_line_counter) 191 coord = numpy.array((x, y, z), "f") 192 # occupancy & B factor 193 try: 194 occupancy = float(line[54:60]) 195 except: 196 self._handle_PDB_exception("Invalid or missing occupancy", 197 global_line_counter) 198 occupancy = None # Rather than arbitrary zero or one 199 try: 200 bfactor = float(line[60:66]) 201 except: 202 self._handle_PDB_exception("Invalid or missing B factor", 203 global_line_counter) 204 bfactor = 0.0 # The PDB use a default of zero if the data is missing 205 segid = line[72:76] 206 element = line[76:78].strip() 207 if current_segid != segid: 208 current_segid = segid 209 structure_builder.init_seg(current_segid) 210 if current_chain_id != chainid: 211 current_chain_id = chainid 212 structure_builder.init_chain(current_chain_id) 213 current_residue_id = residue_id 214 current_resname = resname 215 try: 216 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 217 except PDBConstructionException as message: 218 self._handle_PDB_exception(message, global_line_counter) 219 elif current_residue_id != residue_id or current_resname != resname: 220 current_residue_id = residue_id 221 current_resname = resname 222 try: 223 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 224 except PDBConstructionException as message: 225 self._handle_PDB_exception(message, global_line_counter) 226 # init atom 227 try: 228 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, 229 fullname, serial_number, element) 230 except PDBConstructionException as message: 231 self._handle_PDB_exception(message, global_line_counter) 232 elif record_type == "ANISOU": 233 anisou = [float(x) for x in (line[28:35], line[35:42], line[43:49], 234 line[49:56], line[56:63], line[63:70])] 235 # U's are scaled by 10^4 236 anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f") 237 structure_builder.set_anisou(anisou_array) 238 elif record_type == "MODEL ": 239 try: 240 serial_num = int(line[10:14]) 241 except: 242 self._handle_PDB_exception("Invalid or missing model serial number", 243 global_line_counter) 244 serial_num = 0 245 structure_builder.init_model(current_model_id, serial_num) 246 current_model_id += 1 247 model_open = 1 248 current_chain_id = None 249 current_residue_id = None 250 elif record_type == "END " or record_type == "CONECT": 251 # End of atomic data, return the trailer 252 self.line_counter += local_line_counter 253 return coords_trailer[local_line_counter:] 254 elif record_type == "ENDMDL": 255 model_open = 0 256 current_chain_id = None 257 current_residue_id = None 258 elif record_type == "SIGUIJ": 259 # standard deviation of anisotropic B factor 260 siguij = [float(x) for x in (line[28:35], line[35:42], line[42:49], 261 line[49:56], line[56:63], line[63:70])] 262 # U sigma's are scaled by 10^4 263 siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f") 264 structure_builder.set_siguij(siguij_array) 265 elif record_type == "SIGATM": 266 # standard deviation of atomic positions 267 sigatm = [float(x) for x in (line[30:38], line[38:45], line[46:54], 268 line[54:60], line[60:66])] 269 sigatm_array = numpy.array(sigatm, "f") 270 structure_builder.set_sigatm(sigatm_array) 271 local_line_counter += 1 272 # EOF (does not end in END or CONECT) 273 self.line_counter = self.line_counter + local_line_counter 274 return []
275
276 - def _handle_PDB_exception(self, message, line_counter):
277 """ 278 This method catches an exception that occurs in the StructureBuilder 279 object (if PERMISSIVE), or raises it again, this time adding the 280 PDB line number to the error message. 281 """ 282 message = "%s at line %i." % (message, line_counter) 283 if self.PERMISSIVE: 284 # just print a warning - some residues/atoms may be missing 285 warnings.warn("PDBConstructionException: %s\n" 286 "Exception ignored.\n" 287 "Some atoms or residues may be missing in the data structure." 288 % message, PDBConstructionWarning) 289 else: 290 # exceptions are fatal - raise again with new message (including line nr) 291 raise PDBConstructionException(message)
292 293 294 if __name__ == "__main__": 295 296 import sys 297 298 p = PDBParser(PERMISSIVE=True) 299 300 filename = sys.argv[1] 301 s = p.get_structure("scr", filename) 302 303 for m in s: 304 p = m.get_parent() 305 assert(p is s) 306 for c in m: 307 p = c.get_parent() 308 assert(p is m) 309 for r in c: 310 print(r) 311 p = r.get_parent() 312 assert(p is c) 313 for a in r: 314 p = a.get_parent() 315 if not p is r: 316 print("%s %s" % (p, r)) 317