Package Bio :: Package PDB :: Module MMCIFParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.MMCIFParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """mmCIF parser""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from string import ascii_letters 
 11   
 12  import numpy 
 13   
 14  from Bio._py3k import range 
 15   
 16  from Bio.PDB.MMCIF2Dict import MMCIF2Dict 
 17  from Bio.PDB.StructureBuilder import StructureBuilder 
 18  from Bio.PDB.PDBExceptions import PDBConstructionException 
 19   
 20   
21 -class MMCIFParser(object):
22 - def get_structure(self, structure_id, filename):
23 self._mmcif_dict=MMCIF2Dict(filename) 24 self._structure_builder=StructureBuilder() 25 self._build_structure(structure_id) 26 return self._structure_builder.get_structure()
27
28 - def _build_structure(self, structure_id):
29 mmcif_dict=self._mmcif_dict 30 atom_id_list=mmcif_dict["_atom_site.label_atom_id"] 31 residue_id_list=mmcif_dict["_atom_site.label_comp_id"] 32 try: 33 element_list = mmcif_dict["_atom_site.type_symbol"] 34 except KeyError: 35 element_list = None 36 seq_id_list=mmcif_dict["_atom_site.label_seq_id"] 37 chain_id_list=mmcif_dict["_atom_site.label_asym_id"] 38 x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]] 39 y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]] 40 z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]] 41 alt_list=mmcif_dict["_atom_site.label_alt_id"] 42 b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"] 43 occupancy_list=mmcif_dict["_atom_site.occupancy"] 44 fieldname_list=mmcif_dict["_atom_site.group_PDB"] 45 try: 46 serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]] 47 except KeyError: 48 # No model number column 49 serial_list = None 50 except ValueError: 51 # Invalid model number (malformed file) 52 raise PDBConstructionException("Invalid model number") 53 try: 54 aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"] 55 aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"] 56 aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"] 57 aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"] 58 aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"] 59 aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"] 60 aniso_flag=1 61 except KeyError: 62 # no anisotropic B factors 63 aniso_flag=0 64 # if auth_seq_id is present, we use this. 65 # Otherwise label_seq_id is used. 66 if "_atom_site.auth_seq_id" in mmcif_dict: 67 seq_id_list=mmcif_dict["_atom_site.auth_seq_id"] 68 else: 69 seq_id_list=mmcif_dict["_atom_site.label_seq_id"] 70 # Now loop over atoms and build the structure 71 current_chain_id=None 72 current_residue_id=None 73 structure_builder=self._structure_builder 74 structure_builder.init_structure(structure_id) 75 structure_builder.init_seg(" ") 76 # Historically, Biopython PDB parser uses model_id to mean array index 77 # so serial_id means the Model ID specified in the file 78 current_model_id = 0 79 current_serial_id = 0 80 for i in range(0, len(atom_id_list)): 81 x=x_list[i] 82 y=y_list[i] 83 z=z_list[i] 84 resname=residue_id_list[i] 85 chainid=chain_id_list[i] 86 altloc=alt_list[i] 87 if altloc==".": 88 altloc=" " 89 resseq=seq_id_list[i] 90 name=atom_id_list[i] 91 # occupancy & B factor 92 try: 93 tempfactor=float(b_factor_list[i]) 94 except ValueError: 95 raise PDBConstructionException("Invalid or missing B factor") 96 try: 97 occupancy=float(occupancy_list[i]) 98 except ValueError: 99 raise PDBConstructionException("Invalid or missing occupancy") 100 fieldname=fieldname_list[i] 101 if fieldname=="HETATM": 102 hetatm_flag="H" 103 else: 104 hetatm_flag=" " 105 if serial_list is not None: 106 # model column exists; use it 107 serial_id = serial_list[i] 108 if current_serial_id != serial_id: 109 # if serial changes, update it and start new model 110 current_serial_id = serial_id 111 structure_builder.init_model(current_model_id, current_serial_id) 112 current_model_id += 1 113 else: 114 # no explicit model column; initialize single model 115 structure_builder.init_model(current_model_id) 116 if current_chain_id!=chainid: 117 current_chain_id=chainid 118 structure_builder.init_chain(current_chain_id) 119 current_residue_id=resseq 120 icode, int_resseq=self._get_icode(resseq) 121 structure_builder.init_residue(resname, hetatm_flag, int_resseq, 122 icode) 123 elif current_residue_id!=resseq: 124 current_residue_id=resseq 125 icode, int_resseq=self._get_icode(resseq) 126 structure_builder.init_residue(resname, hetatm_flag, int_resseq, 127 icode) 128 coord=numpy.array((x, y, z), 'f') 129 element = element_list[i] if element_list else None 130 structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc, 131 name, element=element) 132 if aniso_flag==1: 133 u=(aniso_u11[i], aniso_u12[i], aniso_u13[i], 134 aniso_u22[i], aniso_u23[i], aniso_u33[i]) 135 mapped_anisou = [float(x) for x in u] 136 anisou_array=numpy.array(mapped_anisou, 'f') 137 structure_builder.set_anisou(anisou_array) 138 # Now try to set the cell 139 try: 140 a=float(mmcif_dict["_cell.length_a"]) 141 b=float(mmcif_dict["_cell.length_b"]) 142 c=float(mmcif_dict["_cell.length_c"]) 143 alpha=float(mmcif_dict["_cell.angle_alpha"]) 144 beta=float(mmcif_dict["_cell.angle_beta"]) 145 gamma=float(mmcif_dict["_cell.angle_gamma"]) 146 cell=numpy.array((a, b, c, alpha, beta, gamma), 'f') 147 spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"] 148 spacegroup=spacegroup[1:-1] # get rid of quotes!! 149 if spacegroup is None: 150 raise Exception 151 structure_builder.set_symmetry(spacegroup, cell) 152 except: 153 pass # no cell found, so just ignore
154
155 - def _get_icode(self, resseq):
156 """Tries to return the icode. In MMCIF files this is just part of 157 resseq! In PDB files, it's a separate field.""" 158 last_resseq_char=resseq[-1] 159 if last_resseq_char in ascii_letters: 160 icode=last_resseq_char 161 int_resseq=int(resseq[0:-1]) 162 else: 163 icode=" " 164 int_resseq=int(resseq) 165 return icode, int_resseq
166 167 168 if __name__=="__main__": 169 import sys 170 171 if len(sys.argv) != 2: 172 print("Usage: python MMCIFparser.py filename") 173 raise SystemExit 174 filename=sys.argv[1] 175 176 p=MMCIFParser() 177 178 structure=p.get_structure("test", filename) 179 180 for model in structure.get_list(): 181 print(model) 182 for chain in model.get_list(): 183 print(chain) 184 print("Found %d residues." % len(chain.get_list())) 185