Package Bio :: Package PDB :: Module StructureAlignment'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.StructureAlignment'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Map the residues of two structures to each other based on a FASTA alignment 
  7  file. 
  8  """ 
  9   
 10  from __future__ import print_function 
 11   
 12  from Bio.Data import SCOPData 
 13   
 14  from Bio.PDB import Selection 
 15  from Bio.PDB.Polypeptide import is_aa 
 16   
 17  __docformat__ = "restructuredtext en" 
 18   
 19   
20 -class StructureAlignment(object):
21 """ 22 This class aligns two structures based on an alignment of their 23 sequences. 24 """
25 - def __init__(self, fasta_align, m1, m2, si=0, sj=1):
26 """ 27 Attributes: 28 29 - fasta_align --- Alignment object 30 - m1, m2 --- two models 31 - si, sj --- the sequences in the Alignment object that 32 correspond to the structures 33 """ 34 l = fasta_align.get_alignment_length() 35 # Get the residues in the models 36 rl1 = Selection.unfold_entities(m1, 'R') 37 rl2 = Selection.unfold_entities(m2, 'R') 38 # Residue positions 39 p1 = 0 40 p2 = 0 41 # Map equivalent residues to each other 42 map12 = {} 43 map21 = {} 44 # List of residue pairs (None if -) 45 duos = [] 46 for i in range(0, l): 47 column = fasta_align.get_column(i) 48 aa1 = column[si] 49 aa2 = column[sj] 50 if aa1 != "-": 51 # Position in seq1 is not - 52 while True: 53 # Loop until an aa is found 54 r1 = rl1[p1] 55 p1 = p1 + 1 56 if is_aa(r1): 57 break 58 self._test_equivalence(r1, aa1) 59 else: 60 r1 = None 61 if aa2 != "-": 62 # Position in seq2 is not - 63 while True: 64 # Loop until an aa is found 65 r2 = rl2[p2] 66 p2 = p2 + 1 67 if is_aa(r2): 68 break 69 self._test_equivalence(r2, aa2) 70 else: 71 r2 = None 72 if r1: 73 # Map residue in seq1 to its equivalent in seq2 74 map12[r1] = r2 75 if r2: 76 # Map residue in seq2 to its equivalent in seq1 77 map21[r2] = r1 78 # Append aligned pair (r is None if gap) 79 duos.append((r1, r2)) 80 self.map12 = map12 81 self.map21 = map21 82 self.duos = duos
83
84 - def _test_equivalence(self, r1, aa1):
85 "Test if aa in sequence fits aa in structure." 86 resname = r1.get_resname() 87 resname = SCOPData.protein_letters_3to1[resname] 88 assert(aa1 == resname)
89
90 - def get_maps(self):
91 """ 92 Return two dictionaries that map a residue in one structure to 93 the equivealent residue in the other structure. 94 """ 95 return self.map12, self.map21
96
97 - def get_iterator(self):
98 """ 99 Iterator over all residue pairs. 100 """ 101 for i in range(0, len(self.duos)): 102 yield self.duos[i]
103 104 105 if __name__ == "__main__": 106 import sys 107 from Bio.Alphabet import generic_protein 108 from Bio import AlignIO 109 from Bio.PDB import PDBParser 110 111 if len(sys.argv) != 4: 112 print("Expects three arguments,") 113 print(" - FASTA alignment filename (expect two sequences)") 114 print(" - PDB file one") 115 print(" - PDB file two") 116 sys.exit() 117 118 # The alignment 119 fa = AlignIO.read(open(sys.argv[1]), "fasta", generic_protein) 120 121 pdb_file1 = sys.argv[2] 122 pdb_file2 = sys.argv[3] 123 124 # The structures 125 p = PDBParser() 126 s1 = p.get_structure('1', pdb_file1) 127 p = PDBParser() 128 s2 = p.get_structure('2', pdb_file2) 129 130 # Get the models 131 m1 = s1[0] 132 m2 = s2[0] 133 134 al = StructureAlignment(fa, m1, m2) 135 136 # Print aligned pairs (r is None if gap) 137 for (r1, r2) in al.get_iterator(): 138 print("%s %s" % (r1, r2)) 139