Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parser for PDB files.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  import warnings 
 11   
 12  try: 
 13      import numpy 
 14  except: 
 15      from Bio import MissingPythonDependencyError 
 16      raise MissingPythonDependencyError( 
 17          "Install NumPy if you want to use the PDB parser.") 
 18   
 19  from Bio.File import as_handle 
 20   
 21  from Bio.PDB.PDBExceptions import PDBConstructionException 
 22  from Bio.PDB.PDBExceptions import PDBConstructionWarning 
 23   
 24  from Bio.PDB.StructureBuilder import StructureBuilder 
 25  from Bio.PDB.parse_pdb_header import _parse_pdb_header_list 
 26   
 27   
 28  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 29   
 30   
31 -class PDBParser(object):
32 """Parse a PDB file and return a Structure object.""" 33
34 - def __init__(self, PERMISSIVE=True, get_header=False, 35 structure_builder=None, QUIET=False):
36 """Create a PDBParser object. 37 38 The PDB parser call a number of standard methods in an aggregated 39 StructureBuilder object. Normally this object is instanciated by the 40 PDBParser object itself, but if the user provides his/her own 41 StructureBuilder object, the latter is used instead. 42 43 Arguments: 44 - PERMISSIVE - Evaluated as a Boolean. If false, exceptions in 45 constructing the SMCRA data structure are fatal. If true (DEFAULT), 46 the exceptions are caught, but some residues or atoms will be missing. 47 THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!. 48 - structure_builder - an optional user implemented StructureBuilder class. 49 - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing 50 the SMCRA data will be suppressed. If false (DEFAULT), they will be shown. 51 These warnings might be indicative of problems in the PDB file! 52 """ 53 if structure_builder is not None: 54 self.structure_builder = structure_builder 55 else: 56 self.structure_builder = StructureBuilder() 57 self.header = None 58 self.trailer = None 59 self.line_counter = 0 60 self.PERMISSIVE = bool(PERMISSIVE) 61 self.QUIET = bool(QUIET)
62 63 # Public methods 64
65 - def get_structure(self, id, file):
66 """Return the structure. 67 68 Arguments: 69 - id - string, the id that will be used for the structure 70 - file - name of the PDB file OR an open filehandle 71 """ 72 with warnings.catch_warnings(): 73 if self.QUIET: 74 warnings.filterwarnings("ignore", category=PDBConstructionWarning) 75 76 self.header = None 77 self.trailer = None 78 # Make a StructureBuilder instance (pass id of structure as parameter) 79 self.structure_builder.init_structure(id) 80 81 with as_handle(file) as handle: 82 self._parse(handle.readlines()) 83 84 self.structure_builder.set_header(self.header) 85 # Return the Structure instance 86 structure = self.structure_builder.get_structure() 87 88 return structure
89
90 - def get_header(self):
91 """Return the header.""" 92 return self.header
93
94 - def get_trailer(self):
95 """Return the trailer.""" 96 return self.trailer
97 98 # Private methods 99
100 - def _parse(self, header_coords_trailer):
101 """Parse the PDB file (PRIVATE).""" 102 # Extract the header; return the rest of the file 103 self.header, coords_trailer = self._get_header(header_coords_trailer) 104 # Parse the atomic data; return the PDB file trailer 105 self.trailer = self._parse_coordinates(coords_trailer)
106
107 - def _get_header(self, header_coords_trailer):
108 """Get the header of the PDB file, return the rest (PRIVATE).""" 109 structure_builder = self.structure_builder 110 i = 0 111 for i in range(0, len(header_coords_trailer)): 112 structure_builder.set_line_counter(i + 1) 113 line = header_coords_trailer[i] 114 record_type = line[0:6] 115 if record_type == "ATOM " or record_type == "HETATM" or record_type == "MODEL ": 116 break 117 header = header_coords_trailer[0:i] 118 # Return the rest of the coords+trailer for further processing 119 self.line_counter = i 120 coords_trailer = header_coords_trailer[i:] 121 header_dict = _parse_pdb_header_list(header) 122 return header_dict, coords_trailer
123
124 - def _parse_coordinates(self, coords_trailer):
125 """Parse the atomic data in the PDB file (PRIVATE).""" 126 local_line_counter = 0 127 structure_builder = self.structure_builder 128 current_model_id = 0 129 # Flag we have an open model 130 model_open = 0 131 current_chain_id = None 132 current_segid = None 133 current_residue_id = None 134 current_resname = None 135 for i in range(0, len(coords_trailer)): 136 line = coords_trailer[i] 137 record_type = line[0:6] 138 global_line_counter = self.line_counter + local_line_counter + 1 139 structure_builder.set_line_counter(global_line_counter) 140 if record_type == "ATOM " or record_type == "HETATM": 141 # Initialize the Model - there was no explicit MODEL record 142 if not model_open: 143 structure_builder.init_model(current_model_id) 144 current_model_id += 1 145 model_open = 1 146 fullname = line[12:16] 147 # get rid of whitespace in atom names 148 split_list = fullname.split() 149 if len(split_list) != 1: 150 # atom name has internal spaces, e.g. " N B ", so 151 # we do not strip spaces 152 name = fullname 153 else: 154 # atom name is like " CA ", so we can strip spaces 155 name = split_list[0] 156 altloc = line[16] 157 resname = line[17:20] 158 chainid = line[21] 159 try: 160 serial_number = int(line[6:11]) 161 except: 162 serial_number = 0 163 resseq = int(line[22:26].split()[0]) # sequence identifier 164 icode = line[26] # insertion code 165 if record_type == "HETATM": # hetero atom flag 166 if resname == "HOH" or resname == "WAT": 167 hetero_flag = "W" 168 else: 169 hetero_flag = "H" 170 else: 171 hetero_flag = " " 172 residue_id = (hetero_flag, resseq, icode) 173 # atomic coordinates 174 try: 175 x = float(line[30:38]) 176 y = float(line[38:46]) 177 z = float(line[46:54]) 178 except: 179 # Should we allow parsing to continue in permissive mode? 180 # If so, what coordinates should we default to? Easier to abort! 181 raise PDBConstructionException("Invalid or missing coordinate(s) at line %i." 182 % global_line_counter) 183 coord = numpy.array((x, y, z), "f") 184 # occupancy & B factor 185 try: 186 occupancy = float(line[54:60]) 187 except: 188 self._handle_PDB_exception("Invalid or missing occupancy", 189 global_line_counter) 190 occupancy = None # Rather than arbitrary zero or one 191 if occupancy is not None and occupancy < 0: 192 # TODO - Should this be an error in strict mode? 193 # self._handle_PDB_exception("Negative occupancy", 194 # global_line_counter) 195 # This uses fixed text so the warning occurs once only: 196 warnings.warn("Negative occupancy in one or more atoms", PDBConstructionWarning) 197 try: 198 bfactor = float(line[60:66]) 199 except: 200 self._handle_PDB_exception("Invalid or missing B factor", 201 global_line_counter) 202 bfactor = 0.0 # The PDB use a default of zero if the data is missing 203 segid = line[72:76] 204 element = line[76:78].strip() 205 if current_segid != segid: 206 current_segid = segid 207 structure_builder.init_seg(current_segid) 208 if current_chain_id != chainid: 209 current_chain_id = chainid 210 structure_builder.init_chain(current_chain_id) 211 current_residue_id = residue_id 212 current_resname = resname 213 try: 214 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 215 except PDBConstructionException as message: 216 self._handle_PDB_exception(message, global_line_counter) 217 elif current_residue_id != residue_id or current_resname != resname: 218 current_residue_id = residue_id 219 current_resname = resname 220 try: 221 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 222 except PDBConstructionException as message: 223 self._handle_PDB_exception(message, global_line_counter) 224 # init atom 225 try: 226 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, 227 fullname, serial_number, element) 228 except PDBConstructionException as message: 229 self._handle_PDB_exception(message, global_line_counter) 230 elif record_type == "ANISOU": 231 anisou = [float(x) for x in (line[28:35], line[35:42], line[43:49], 232 line[49:56], line[56:63], line[63:70])] 233 # U's are scaled by 10^4 234 anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f") 235 structure_builder.set_anisou(anisou_array) 236 elif record_type == "MODEL ": 237 try: 238 serial_num = int(line[10:14]) 239 except: 240 self._handle_PDB_exception("Invalid or missing model serial number", 241 global_line_counter) 242 serial_num = 0 243 structure_builder.init_model(current_model_id, serial_num) 244 current_model_id += 1 245 model_open = 1 246 current_chain_id = None 247 current_residue_id = None 248 elif record_type == "END " or record_type == "CONECT": 249 # End of atomic data, return the trailer 250 self.line_counter += local_line_counter 251 return coords_trailer[local_line_counter:] 252 elif record_type == "ENDMDL": 253 model_open = 0 254 current_chain_id = None 255 current_residue_id = None 256 elif record_type == "SIGUIJ": 257 # standard deviation of anisotropic B factor 258 siguij = [float(x) for x in (line[28:35], line[35:42], line[42:49], 259 line[49:56], line[56:63], line[63:70])] 260 # U sigma's are scaled by 10^4 261 siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f") 262 structure_builder.set_siguij(siguij_array) 263 elif record_type == "SIGATM": 264 # standard deviation of atomic positions 265 sigatm = [float(x) for x in (line[30:38], line[38:45], line[46:54], 266 line[54:60], line[60:66])] 267 sigatm_array = numpy.array(sigatm, "f") 268 structure_builder.set_sigatm(sigatm_array) 269 local_line_counter += 1 270 # EOF (does not end in END or CONECT) 271 self.line_counter = self.line_counter + local_line_counter 272 return []
273
274 - def _handle_PDB_exception(self, message, line_counter):
275 """Handle exception (PRIVATE). 276 277 This method catches an exception that occurs in the StructureBuilder 278 object (if PERMISSIVE), or raises it again, this time adding the 279 PDB line number to the error message. 280 """ 281 message = "%s at line %i." % (message, line_counter) 282 if self.PERMISSIVE: 283 # just print a warning - some residues/atoms may be missing 284 warnings.warn("PDBConstructionException: %s\n" 285 "Exception ignored.\n" 286 "Some atoms or residues may be missing in the data structure." 287 % message, PDBConstructionWarning) 288 else: 289 # exceptions are fatal - raise again with new message (including line nr) 290 raise PDBConstructionException(message)
291 292 293 if __name__ == "__main__": 294 295 import sys 296 297 p = PDBParser(PERMISSIVE=True) 298 299 filename = sys.argv[1] 300 s = p.get_structure("scr", filename) 301 302 for m in s: 303 p = m.get_parent() 304 assert(p is s) 305 for c in m: 306 p = c.get_parent() 307 assert(p is m) 308 for r in c: 309 print(r) 310 p = r.get_parent() 311 assert(p is c) 312 for a in r: 313 p = a.get_parent() 314 if p is not r: 315 print("%s %s" % (p, r)) 316