Package Bio :: Package PDB :: Module StructureAlignment'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.StructureAlignment'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Map the residues of two structures to each other based on a FASTA alignment 
  7  file. 
  8  """ 
  9   
 10  from __future__ import print_function 
 11   
 12  from Bio.Data import SCOPData 
 13   
 14  from Bio.PDB import Selection 
 15  from Bio.PDB.Polypeptide import is_aa 
 16   
 17   
18 -class StructureAlignment(object):
19 """ 20 This class aligns two structures based on an alignment of their 21 sequences. 22 """
23 - def __init__(self, fasta_align, m1, m2, si=0, sj=1):
24 """ 25 Attributes: 26 27 - fasta_align --- Alignment object 28 - m1, m2 --- two models 29 - si, sj --- the sequences in the Alignment object that 30 correspond to the structures 31 """ 32 l = fasta_align.get_alignment_length() 33 # Get the residues in the models 34 rl1 = Selection.unfold_entities(m1, 'R') 35 rl2 = Selection.unfold_entities(m2, 'R') 36 # Residue positions 37 p1 = 0 38 p2 = 0 39 # Map equivalent residues to each other 40 map12 = {} 41 map21 = {} 42 # List of residue pairs (None if -) 43 duos = [] 44 for i in range(0, l): 45 column = fasta_align.get_column(i) 46 aa1 = column[si] 47 aa2 = column[sj] 48 if aa1 != "-": 49 # Position in seq1 is not - 50 while True: 51 # Loop until an aa is found 52 r1 = rl1[p1] 53 p1 = p1 + 1 54 if is_aa(r1): 55 break 56 self._test_equivalence(r1, aa1) 57 else: 58 r1 = None 59 if aa2 != "-": 60 # Position in seq2 is not - 61 while True: 62 # Loop until an aa is found 63 r2 = rl2[p2] 64 p2 = p2 + 1 65 if is_aa(r2): 66 break 67 self._test_equivalence(r2, aa2) 68 else: 69 r2 = None 70 if r1: 71 # Map residue in seq1 to its equivalent in seq2 72 map12[r1] = r2 73 if r2: 74 # Map residue in seq2 to its equivalent in seq1 75 map21[r2] = r1 76 # Append aligned pair (r is None if gap) 77 duos.append((r1, r2)) 78 self.map12 = map12 79 self.map21 = map21 80 self.duos = duos
81
82 - def _test_equivalence(self, r1, aa1):
83 "Test if aa in sequence fits aa in structure." 84 resname = r1.get_resname() 85 resname = SCOPData.protein_letters_3to1[resname] 86 assert(aa1 == resname)
87
88 - def get_maps(self):
89 """ 90 Return two dictionaries that map a residue in one structure to 91 the equivealent residue in the other structure. 92 """ 93 return self.map12, self.map21
94
95 - def get_iterator(self):
96 """ 97 Iterator over all residue pairs. 98 """ 99 for i in range(0, len(self.duos)): 100 yield self.duos[i]
101 102 103 if __name__ == "__main__": 104 import sys 105 from Bio.Alphabet import generic_protein 106 from Bio import AlignIO 107 from Bio.PDB import PDBParser 108 109 if len(sys.argv) != 4: 110 print("Expects three arguments,") 111 print(" - FASTA alignment filename (expect two sequences)") 112 print(" - PDB file one") 113 print(" - PDB file two") 114 sys.exit() 115 116 # The alignment 117 fa = AlignIO.read(open(sys.argv[1]), "fasta", generic_protein) 118 119 pdb_file1 = sys.argv[2] 120 pdb_file2 = sys.argv[3] 121 122 # The structures 123 p = PDBParser() 124 s1 = p.get_structure('1', pdb_file1) 125 p = PDBParser() 126 s2 = p.get_structure('2', pdb_file2) 127 128 # Get the models 129 m1 = s1[0] 130 m2 = s2[0] 131 132 al = StructureAlignment(fa, m1, m2) 133 134 # Print aligned pairs (r is None if gap) 135 for (r1, r2) in al.get_iterator(): 136 print("%s %s" % (r1, r2)) 137