Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parser for PDB files.""" 
  7   
  8  from __future__ import print_function 
  9   
 10  import warnings 
 11   
 12  try: 
 13      import numpy 
 14  except ImportError: 
 15      from Bio import MissingPythonDependencyError 
 16      raise MissingPythonDependencyError( 
 17          "Install NumPy if you want to use the PDB parser.") 
 18   
 19  from Bio.File import as_handle 
 20   
 21  from Bio.PDB.PDBExceptions import PDBConstructionException 
 22  from Bio.PDB.PDBExceptions import PDBConstructionWarning 
 23   
 24  from Bio.PDB.StructureBuilder import StructureBuilder 
 25  from Bio.PDB.parse_pdb_header import _parse_pdb_header_list 
 26   
 27   
 28  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 29   
 30   
31 -class PDBParser(object):
32 """Parse a PDB file and return a Structure object.""" 33
34 - def __init__(self, PERMISSIVE=True, get_header=False, 35 structure_builder=None, QUIET=False):
36 """Create a PDBParser object. 37 38 The PDB parser call a number of standard methods in an aggregated 39 StructureBuilder object. Normally this object is instanciated by the 40 PDBParser object itself, but if the user provides his/her own 41 StructureBuilder object, the latter is used instead. 42 43 Arguments: 44 - PERMISSIVE - Evaluated as a Boolean. If false, exceptions in 45 constructing the SMCRA data structure are fatal. If true (DEFAULT), 46 the exceptions are caught, but some residues or atoms will be missing. 47 THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!. 48 - structure_builder - an optional user implemented StructureBuilder class. 49 - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing 50 the SMCRA data will be suppressed. If false (DEFAULT), they will be shown. 51 These warnings might be indicative of problems in the PDB file! 52 53 """ 54 if structure_builder is not None: 55 self.structure_builder = structure_builder 56 else: 57 self.structure_builder = StructureBuilder() 58 self.header = None 59 self.trailer = None 60 self.line_counter = 0 61 self.PERMISSIVE = bool(PERMISSIVE) 62 self.QUIET = bool(QUIET)
63 64 # Public methods 65
66 - def get_structure(self, id, file):
67 """Return the structure. 68 69 Arguments: 70 - id - string, the id that will be used for the structure 71 - file - name of the PDB file OR an open filehandle 72 73 """ 74 with warnings.catch_warnings(): 75 if self.QUIET: 76 warnings.filterwarnings("ignore", category=PDBConstructionWarning) 77 78 self.header = None 79 self.trailer = None 80 # Make a StructureBuilder instance (pass id of structure as parameter) 81 self.structure_builder.init_structure(id) 82 83 with as_handle(file, mode='rU') as handle: 84 self._parse(handle.readlines()) 85 86 self.structure_builder.set_header(self.header) 87 # Return the Structure instance 88 structure = self.structure_builder.get_structure() 89 90 return structure
91
92 - def get_header(self):
93 """Return the header.""" 94 return self.header
95
96 - def get_trailer(self):
97 """Return the trailer.""" 98 return self.trailer
99 100 # Private methods 101
102 - def _parse(self, header_coords_trailer):
103 """Parse the PDB file (PRIVATE).""" 104 # Extract the header; return the rest of the file 105 self.header, coords_trailer = self._get_header(header_coords_trailer) 106 # Parse the atomic data; return the PDB file trailer 107 self.trailer = self._parse_coordinates(coords_trailer)
108
109 - def _get_header(self, header_coords_trailer):
110 """Get the header of the PDB file, return the rest (PRIVATE).""" 111 structure_builder = self.structure_builder 112 i = 0 113 for i in range(0, len(header_coords_trailer)): 114 structure_builder.set_line_counter(i + 1) 115 line = header_coords_trailer[i] 116 record_type = line[0:6] 117 if record_type == "ATOM " or record_type == "HETATM" or record_type == "MODEL ": 118 break 119 header = header_coords_trailer[0:i] 120 # Return the rest of the coords+trailer for further processing 121 self.line_counter = i 122 coords_trailer = header_coords_trailer[i:] 123 header_dict = _parse_pdb_header_list(header) 124 return header_dict, coords_trailer
125
126 - def _parse_coordinates(self, coords_trailer):
127 """Parse the atomic data in the PDB file (PRIVATE).""" 128 local_line_counter = 0 129 structure_builder = self.structure_builder 130 current_model_id = 0 131 # Flag we have an open model 132 model_open = 0 133 current_chain_id = None 134 current_segid = None 135 current_residue_id = None 136 current_resname = None 137 for i in range(0, len(coords_trailer)): 138 line = coords_trailer[i].rstrip('\n') 139 record_type = line[0:6] 140 global_line_counter = self.line_counter + local_line_counter + 1 141 structure_builder.set_line_counter(global_line_counter) 142 if record_type == "ATOM " or record_type == "HETATM": 143 # Initialize the Model - there was no explicit MODEL record 144 if not model_open: 145 structure_builder.init_model(current_model_id) 146 current_model_id += 1 147 model_open = 1 148 fullname = line[12:16] 149 # get rid of whitespace in atom names 150 split_list = fullname.split() 151 if len(split_list) != 1: 152 # atom name has internal spaces, e.g. " N B ", so 153 # we do not strip spaces 154 name = fullname 155 else: 156 # atom name is like " CA ", so we can strip spaces 157 name = split_list[0] 158 altloc = line[16] 159 resname = line[17:20] 160 chainid = line[21] 161 try: 162 serial_number = int(line[6:11]) 163 except Exception: 164 serial_number = 0 165 resseq = int(line[22:26].split()[0]) # sequence identifier 166 icode = line[26] # insertion code 167 if record_type == "HETATM": # hetero atom flag 168 if resname == "HOH" or resname == "WAT": 169 hetero_flag = "W" 170 else: 171 hetero_flag = "H" 172 else: 173 hetero_flag = " " 174 residue_id = (hetero_flag, resseq, icode) 175 # atomic coordinates 176 try: 177 x = float(line[30:38]) 178 y = float(line[38:46]) 179 z = float(line[46:54]) 180 except Exception: 181 # Should we allow parsing to continue in permissive mode? 182 # If so, what coordinates should we default to? Easier to abort! 183 raise PDBConstructionException("Invalid or missing coordinate(s) at line %i." 184 % global_line_counter) 185 coord = numpy.array((x, y, z), "f") 186 # occupancy & B factor 187 try: 188 occupancy = float(line[54:60]) 189 except Exception: 190 self._handle_PDB_exception("Invalid or missing occupancy", 191 global_line_counter) 192 occupancy = None # Rather than arbitrary zero or one 193 if occupancy is not None and occupancy < 0: 194 # TODO - Should this be an error in strict mode? 195 # self._handle_PDB_exception("Negative occupancy", 196 # global_line_counter) 197 # This uses fixed text so the warning occurs once only: 198 warnings.warn("Negative occupancy in one or more atoms", PDBConstructionWarning) 199 try: 200 bfactor = float(line[60:66]) 201 except Exception: 202 self._handle_PDB_exception("Invalid or missing B factor", 203 global_line_counter) 204 bfactor = 0.0 # The PDB use a default of zero if the data is missing 205 segid = line[72:76] 206 element = line[76:78].strip().upper() 207 if current_segid != segid: 208 current_segid = segid 209 structure_builder.init_seg(current_segid) 210 if current_chain_id != chainid: 211 current_chain_id = chainid 212 structure_builder.init_chain(current_chain_id) 213 current_residue_id = residue_id 214 current_resname = resname 215 try: 216 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 217 except PDBConstructionException as message: 218 self._handle_PDB_exception(message, global_line_counter) 219 elif current_residue_id != residue_id or current_resname != resname: 220 current_residue_id = residue_id 221 current_resname = resname 222 try: 223 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 224 except PDBConstructionException as message: 225 self._handle_PDB_exception(message, global_line_counter) 226 # init atom 227 try: 228 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, 229 fullname, serial_number, element) 230 except PDBConstructionException as message: 231 self._handle_PDB_exception(message, global_line_counter) 232 elif record_type == "ANISOU": 233 anisou = [float(x) for x in (line[28:35], line[35:42], line[43:49], 234 line[49:56], line[56:63], line[63:70])] 235 # U's are scaled by 10^4 236 anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f") 237 structure_builder.set_anisou(anisou_array) 238 elif record_type == "MODEL ": 239 try: 240 serial_num = int(line[10:14]) 241 except Exception: 242 self._handle_PDB_exception("Invalid or missing model serial number", 243 global_line_counter) 244 serial_num = 0 245 structure_builder.init_model(current_model_id, serial_num) 246 current_model_id += 1 247 model_open = 1 248 current_chain_id = None 249 current_residue_id = None 250 elif record_type == "END " or record_type == "CONECT": 251 # End of atomic data, return the trailer 252 self.line_counter += local_line_counter 253 return coords_trailer[local_line_counter:] 254 elif record_type == "ENDMDL": 255 model_open = 0 256 current_chain_id = None 257 current_residue_id = None 258 elif record_type == "SIGUIJ": 259 # standard deviation of anisotropic B factor 260 siguij = [float(x) for x in (line[28:35], line[35:42], line[42:49], 261 line[49:56], line[56:63], line[63:70])] 262 # U sigma's are scaled by 10^4 263 siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f") 264 structure_builder.set_siguij(siguij_array) 265 elif record_type == "SIGATM": 266 # standard deviation of atomic positions 267 sigatm = [float(x) for x in (line[30:38], line[38:45], line[46:54], 268 line[54:60], line[60:66])] 269 sigatm_array = numpy.array(sigatm, "f") 270 structure_builder.set_sigatm(sigatm_array) 271 local_line_counter += 1 272 # EOF (does not end in END or CONECT) 273 self.line_counter = self.line_counter + local_line_counter 274 return []
275
276 - def _handle_PDB_exception(self, message, line_counter):
277 """Handle exception (PRIVATE). 278 279 This method catches an exception that occurs in the StructureBuilder 280 object (if PERMISSIVE), or raises it again, this time adding the 281 PDB line number to the error message. 282 """ 283 message = "%s at line %i." % (message, line_counter) 284 if self.PERMISSIVE: 285 # just print a warning - some residues/atoms may be missing 286 warnings.warn("PDBConstructionException: %s\n" 287 "Exception ignored.\n" 288 "Some atoms or residues may be missing in the data structure." 289 % message, PDBConstructionWarning) 290 else: 291 # exceptions are fatal - raise again with new message (including line nr) 292 raise PDBConstructionException(message)
293