Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parser for PDB files.""" 
  7   
  8  # For using with statement in Python 2.5 or Jython 
  9  from __future__ import with_statement 
 10   
 11  import warnings 
 12   
 13  import numpy 
 14   
 15  from Bio.File import as_handle 
 16   
 17  from Bio.PDB.PDBExceptions import PDBConstructionException 
 18  from Bio.PDB.PDBExceptions import PDBConstructionWarning 
 19   
 20  from Bio.PDB.StructureBuilder import StructureBuilder 
 21  from Bio.PDB.parse_pdb_header import _parse_pdb_header_list 
 22   
 23   
 24  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 25   
 26   
27 -class PDBParser(object):
28 """ 29 Parse a PDB file and return a Structure object. 30 """ 31
32 - def __init__(self, PERMISSIVE=True, get_header=False, 33 structure_builder=None, QUIET=False):
34 """ 35 The PDB parser call a number of standard methods in an aggregated 36 StructureBuilder object. Normally this object is instanciated by the 37 PDBParser object itself, but if the user provides his/her own 38 StructureBuilder object, the latter is used instead. 39 40 Arguments: 41 42 o PERMISSIVE - Evaluated as a Boolean. If false, exceptions in 43 constructing the SMCRA data structure are fatal. If true (DEFAULT), 44 the exceptions are caught, but some residues or atoms will be missing. 45 THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!. 46 47 o structure_builder - an optional user implemented StructureBuilder class. 48 49 o QUIET - Evaluated as a Boolean. If true, warnings issued in constructing 50 the SMCRA data will be suppressed. If false (DEFAULT), they will be shown. 51 These warnings might be indicative of problems in the PDB file! 52 """ 53 if structure_builder is not None: 54 self.structure_builder = structure_builder 55 else: 56 self.structure_builder = StructureBuilder() 57 self.header = None 58 self.trailer = None 59 self.line_counter = 0 60 self.PERMISSIVE = bool(PERMISSIVE) 61 self.QUIET = bool(QUIET)
62 63 # Public methods 64
65 - def get_structure(self, id, file):
66 """Return the structure. 67 68 Arguments: 69 o id - string, the id that will be used for the structure 70 o file - name of the PDB file OR an open filehandle 71 """ 72 73 if self.QUIET: 74 warning_list = warnings.filters[:] 75 warnings.filterwarnings("ignore", category=PDBConstructionWarning) 76 77 self.header = None 78 self.trailer = None 79 # Make a StructureBuilder instance (pass id of structure as parameter) 80 self.structure_builder.init_structure(id) 81 82 with as_handle(file) as handle: 83 self._parse(handle.readlines()) 84 85 self.structure_builder.set_header(self.header) 86 # Return the Structure instance 87 structure = self.structure_builder.get_structure() 88 89 if self.QUIET: 90 warnings.filters = warning_list 91 92 return structure
93
94 - def get_header(self):
95 "Return the header." 96 return self.header
97
98 - def get_trailer(self):
99 "Return the trailer." 100 return self.trailer
101 102 # Private methods 103
104 - def _parse(self, header_coords_trailer):
105 "Parse the PDB file." 106 # Extract the header; return the rest of the file 107 self.header, coords_trailer = self._get_header(header_coords_trailer) 108 # Parse the atomic data; return the PDB file trailer 109 self.trailer = self._parse_coordinates(coords_trailer)
110
111 - def _get_header(self, header_coords_trailer):
112 "Get the header of the PDB file, return the rest." 113 structure_builder = self.structure_builder 114 i = 0 115 for i in range(0, len(header_coords_trailer)): 116 structure_builder.set_line_counter(i + 1) 117 line = header_coords_trailer[i] 118 record_type = line[0:6] 119 if record_type == "ATOM " or record_type == "HETATM" or record_type == "MODEL ": 120 break 121 header = header_coords_trailer[0:i] 122 # Return the rest of the coords+trailer for further processing 123 self.line_counter = i 124 coords_trailer = header_coords_trailer[i:] 125 header_dict = _parse_pdb_header_list(header) 126 return header_dict, coords_trailer
127
128 - def _parse_coordinates(self, coords_trailer):
129 "Parse the atomic data in the PDB file." 130 local_line_counter = 0 131 structure_builder = self.structure_builder 132 current_model_id = 0 133 # Flag we have an open model 134 model_open = 0 135 current_chain_id = None 136 current_segid = None 137 current_residue_id = None 138 current_resname = None 139 for i in range(0, len(coords_trailer)): 140 line = coords_trailer[i] 141 record_type = line[0:6] 142 global_line_counter = self.line_counter + local_line_counter + 1 143 structure_builder.set_line_counter(global_line_counter) 144 if record_type == "ATOM " or record_type == "HETATM": 145 # Initialize the Model - there was no explicit MODEL record 146 if not model_open: 147 structure_builder.init_model(current_model_id) 148 current_model_id += 1 149 model_open = 1 150 fullname = line[12:16] 151 # get rid of whitespace in atom names 152 split_list = fullname.split() 153 if len(split_list) != 1: 154 # atom name has internal spaces, e.g. " N B ", so 155 # we do not strip spaces 156 name = fullname 157 else: 158 # atom name is like " CA ", so we can strip spaces 159 name = split_list[0] 160 altloc = line[16] 161 resname = line[17:20] 162 chainid = line[21] 163 try: 164 serial_number = int(line[6:11]) 165 except: 166 serial_number = 0 167 resseq = int(line[22:26].split()[0]) # sequence identifier 168 icode = line[26] # insertion code 169 if record_type == "HETATM": # hetero atom flag 170 if resname == "HOH" or resname == "WAT": 171 hetero_flag = "W" 172 else: 173 hetero_flag = "H" 174 else: 175 hetero_flag = " " 176 residue_id = (hetero_flag, resseq, icode) 177 # atomic coordinates 178 try: 179 x = float(line[30:38]) 180 y = float(line[38:46]) 181 z = float(line[46:54]) 182 except: 183 # Should we allow parsing to continue in permissive mode? 184 # If so, what coordinates should we default to? Easier to abort! 185 raise PDBConstructionException("Invalid or missing coordinate(s) at line %i." 186 % global_line_counter) 187 coord = numpy.array((x, y, z), "f") 188 # occupancy & B factor 189 try: 190 occupancy = float(line[54:60]) 191 except: 192 self._handle_PDB_exception("Invalid or missing occupancy", 193 global_line_counter) 194 occupancy = 0.0 # Is one or zero a good default? 195 try: 196 bfactor = float(line[60:66]) 197 except: 198 self._handle_PDB_exception("Invalid or missing B factor", 199 global_line_counter) 200 bfactor = 0.0 # The PDB use a default of zero if the data is missing 201 segid = line[72:76] 202 element = line[76:78].strip() 203 if current_segid != segid: 204 current_segid = segid 205 structure_builder.init_seg(current_segid) 206 if current_chain_id != chainid: 207 current_chain_id = chainid 208 structure_builder.init_chain(current_chain_id) 209 current_residue_id = residue_id 210 current_resname = resname 211 try: 212 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 213 except PDBConstructionException, message: 214 self._handle_PDB_exception(message, global_line_counter) 215 elif current_residue_id != residue_id or current_resname != resname: 216 current_residue_id = residue_id 217 current_resname = resname 218 try: 219 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 220 except PDBConstructionException, message: 221 self._handle_PDB_exception(message, global_line_counter) 222 # init atom 223 try: 224 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, 225 fullname, serial_number, element) 226 except PDBConstructionException, message: 227 self._handle_PDB_exception(message, global_line_counter) 228 elif record_type == "ANISOU": 229 anisou = map(float, (line[28:35], line[35:42], line[43:49], 230 line[49:56], line[56:63], line[63:70])) 231 # U's are scaled by 10^4 232 anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f") 233 structure_builder.set_anisou(anisou_array) 234 elif record_type == "MODEL ": 235 try: 236 serial_num = int(line[10:14]) 237 except: 238 self._handle_PDB_exception("Invalid or missing model serial number", 239 global_line_counter) 240 serial_num = 0 241 structure_builder.init_model(current_model_id, serial_num) 242 current_model_id += 1 243 model_open = 1 244 current_chain_id = None 245 current_residue_id = None 246 elif record_type == "END " or record_type == "CONECT": 247 # End of atomic data, return the trailer 248 self.line_counter += local_line_counter 249 return coords_trailer[local_line_counter:] 250 elif record_type == "ENDMDL": 251 model_open = 0 252 current_chain_id = None 253 current_residue_id = None 254 elif record_type == "SIGUIJ": 255 # standard deviation of anisotropic B factor 256 siguij = map(float, (line[28:35], line[35:42], line[42:49], 257 line[49:56], line[56:63], line[63:70])) 258 # U sigma's are scaled by 10^4 259 siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f") 260 structure_builder.set_siguij(siguij_array) 261 elif record_type == "SIGATM": 262 # standard deviation of atomic positions 263 sigatm = map(float, (line[30:38], line[38:45], line[46:54], 264 line[54:60], line[60:66])) 265 sigatm_array = numpy.array(sigatm, "f") 266 structure_builder.set_sigatm(sigatm_array) 267 local_line_counter += 1 268 # EOF (does not end in END or CONECT) 269 self.line_counter = self.line_counter + local_line_counter 270 return []
271
272 - def _handle_PDB_exception(self, message, line_counter):
273 """ 274 This method catches an exception that occurs in the StructureBuilder 275 object (if PERMISSIVE), or raises it again, this time adding the 276 PDB line number to the error message. 277 """ 278 message = "%s at line %i." % (message, line_counter) 279 if self.PERMISSIVE: 280 # just print a warning - some residues/atoms may be missing 281 warnings.warn("PDBConstructionException: %s\n" 282 "Exception ignored.\n" 283 "Some atoms or residues may be missing in the data structure." 284 % message, PDBConstructionWarning) 285 else: 286 # exceptions are fatal - raise again with new message (including line nr) 287 raise PDBConstructionException(message)
288 289 290 if __name__ == "__main__": 291 292 import sys 293 294 p = PDBParser(PERMISSIVE=True) 295 296 filename = sys.argv[1] 297 s = p.get_structure("scr", filename) 298 299 for m in s: 300 p = m.get_parent() 301 assert(p is s) 302 for c in m: 303 p = c.get_parent() 304 assert(p is m) 305 for r in c: 306 print r 307 p = r.get_parent() 308 assert(p is c) 309 for a in r: 310 p = a.get_parent() 311 if not p is r: 312 print p, r 313