Package Bio :: Package FSSP :: Module FSSPTools
[hide private]
[frames] | no frames]

Source Code for Module Bio.FSSP.FSSPTools

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Several routines used to extract information from FSSP sections. 
  5   
  6  filter: filters a passed summary section and alignment section according to a numeric 
  7          attribute in the summary section. Returns new summary and alignment sections 
  8   
  9  For example, to filter in only  those records which have a zscore greater than 
 10  4.0 and lesser than 7.5: 
 11   
 12  new_sum, new_align = filter(sum, align, 'zscore', 4, 7.5) 
 13  """ 
 14   
 15  from Bio import FSSP 
 16  import copy 
 17  from Bio.Align import MultipleSeqAlignment 
 18  from Bio import Alphabet 
 19  from Bio.Seq import Seq 
 20  from Bio.SeqRecord import SeqRecord 
 21   
 22   
23 -class FSSPAlign(MultipleSeqAlignment):
24 - def _add_numbering_table(self, new_record):
25 new_record.annotations['abs2pdb'] = {} 26 new_record.annotations['pdb2abs'] = {}
27 28
29 -class FSSPMultAlign(dict):
30 - def __init__(self):
31 self.abs_res = [] 32 self.pdb_res = [] 33 self.data = {}
34 35
36 -def mult_align(sum_dict, align_dict):
37 """Returns a biopython multiple alignment instance (MultipleSeqAlignment)""" 38 mult_align_dict = {} 39 for j in align_dict.abs(1).pos_align_dict: 40 mult_align_dict[j] = '' 41 42 for i in range(1, len(align_dict) + 1): 43 # loop on positions 44 for j in align_dict.abs(i).pos_align_dict: 45 # loop within a position 46 mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa 47 alpha = Alphabet.Gapped(Alphabet.IUPAC.extended_protein) 48 fssp_align = MultipleSeqAlignment([], alphabet=alpha) 49 for i in sorted(mult_align_dict): 50 fssp_align.append(SeqRecord(Seq(mult_align_dict[i], alpha), 51 sum_dict[i].pdb2 + sum_dict[i].chain2)) 52 return fssp_align
53 54 # 55 # Warning: this function really slows down when filtering large FSSP files. 56 # The reason is the use of copy.deepcopy() to copy align_dict into 57 # new_align_dict. I have to figure out something better. 58 # Took me ~160 seconds for the largest FSSP file (1reqA.fssp) 59 # 60 61
62 -def filter(sum_dict, align_dict, filter_attribute, low_bound, high_bound):
63 """Filters a passed summary section and alignment section according to a numeric 64 attribute in the summary section. Returns new summary and alignment sections. 65 """ 66 new_sum_dict = FSSP.FSSPSumDict() 67 new_align_dict = copy.deepcopy(align_dict) 68 # for i in align_dict: 69 # new_align_dict[i] = copy.copy(align_dict[i]) 70 # new_align_dict = copy.copy(align_dict) 71 for prot_num in sum_dict: 72 attr_value = getattr(sum_dict[prot_num], filter_attribute) 73 if attr_value >= low_bound and attr_value <= high_bound: 74 new_sum_dict[prot_num] = sum_dict[prot_num] 75 prot_numbers = sorted(new_sum_dict) 76 for pos_num in new_align_dict.abs_res_dict: 77 new_align_dict.abs(pos_num).pos_align_dict = {} 78 for prot_num in prot_numbers: 79 new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \ 80 align_dict.abs(pos_num).pos_align_dict[prot_num] 81 return new_sum_dict, new_align_dict
82 83
84 -def name_filter(sum_dict, align_dict, name_list):
85 """Accepts a list of names. Returns a new Summary block and Alignment block which 86 contain the info only for those names passed. 87 """ 88 new_sum_dict = FSSP.FSSPSumDict() 89 new_align_dict = copy.deepcopy(align_dict) 90 for cur_pdb_name in name_list: 91 for prot_num in sum_dict: 92 if sum_dict[prot_num].pdb2 + sum_dict[prot_num].chain2 == cur_pdb_name: 93 new_sum_dict[prot_num] = sum_dict[prot_num] 94 prot_numbers = sorted(new_sum_dict) 95 for pos_num in new_align_dict.abs_res_dict: 96 new_align_dict.abs(pos_num).pos_align_dict = {} 97 for prot_num in prot_numbers: 98 new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \ 99 align_dict.abs(pos_num).pos_align_dict[prot_num] 100 return new_sum_dict, new_align_dict
101