Package Bio :: Package FSSP :: Module FSSPTools
[hide private]
[frames] | no frames]

Source Code for Module Bio.FSSP.FSSPTools

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Several routines used to extract information from FSSP sections. 
  5   
  6  filter: filters a passed summary section and alignment section according to a numeric 
  7          attribute in the summary section. Returns new summary and alignment sections 
  8   
  9  For example, to filter in only  those records which have a zscore greater than 
 10  4.0 and lesser than 7.5: 
 11   
 12  new_sum, new_align = filter(sum, align, 'zscore', 4, 7.5) 
 13  """ 
 14   
 15  from Bio import FSSP 
 16  import copy 
 17  from Bio.Align import MultipleSeqAlignment 
 18  from Bio import Alphabet 
 19  from Bio.Seq import Seq 
 20  from Bio.SeqRecord import SeqRecord 
 21   
 22   
23 -class FSSPAlign(MultipleSeqAlignment):
24 - def _add_numbering_table(self, new_record):
25 new_record.annotations['abs2pdb'] = {} 26 new_record.annotations['pdb2abs'] = {}
27 28
29 -class FSSPMultAlign(dict):
30 - def __init__(self):
31 """Initialize the class.""" 32 self.abs_res = [] 33 self.pdb_res = [] 34 self.data = {}
35 36
37 -def mult_align(sum_dict, align_dict):
38 """Return multiple alignment instance (MultipleSeqAlignment).""" 39 mult_align_dict = {} 40 for j in align_dict.abs(1).pos_align_dict: 41 mult_align_dict[j] = '' 42 43 for i in range(1, len(align_dict) + 1): 44 # loop on positions 45 for j in align_dict.abs(i).pos_align_dict: 46 # loop within a position 47 mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa 48 alpha = Alphabet.Gapped(Alphabet.IUPAC.extended_protein) 49 fssp_align = MultipleSeqAlignment([], alphabet=alpha) 50 for i in sorted(mult_align_dict): 51 fssp_align.append(SeqRecord(Seq(mult_align_dict[i], alpha), 52 sum_dict[i].pdb2 + sum_dict[i].chain2)) 53 return fssp_align
54 55 # 56 # Warning: this function really slows down when filtering large FSSP files. 57 # The reason is the use of copy.deepcopy() to copy align_dict into 58 # new_align_dict. I have to figure out something better. 59 # Took me ~160 seconds for the largest FSSP file (1reqA.fssp) 60 # 61 62
63 -def filter(sum_dict, align_dict, filter_attribute, low_bound, high_bound):
64 """Filter a passed summary section and alignment section. 65 66 Filter according to a numeric attribute in the summary section. 67 Return new summary and alignment sections. 68 """ 69 new_sum_dict = FSSP.FSSPSumDict() 70 new_align_dict = copy.deepcopy(align_dict) 71 # for i in align_dict: 72 # new_align_dict[i] = copy.copy(align_dict[i]) 73 # new_align_dict = copy.copy(align_dict) 74 for prot_num in sum_dict: 75 attr_value = getattr(sum_dict[prot_num], filter_attribute) 76 if attr_value >= low_bound and attr_value <= high_bound: 77 new_sum_dict[prot_num] = sum_dict[prot_num] 78 prot_numbers = sorted(new_sum_dict) 79 for pos_num in new_align_dict.abs_res_dict: 80 new_align_dict.abs(pos_num).pos_align_dict = {} 81 for prot_num in prot_numbers: 82 new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \ 83 align_dict.abs(pos_num).pos_align_dict[prot_num] 84 return new_sum_dict, new_align_dict
85 86
87 -def name_filter(sum_dict, align_dict, name_list):
88 """Filter summary and alignment blocks for given names only. 89 90 Accepts a list of names. Returns a new Summary block and Alignment block which 91 contain the info only for those names passed. 92 """ 93 new_sum_dict = FSSP.FSSPSumDict() 94 new_align_dict = copy.deepcopy(align_dict) 95 for cur_pdb_name in name_list: 96 for prot_num in sum_dict: 97 if sum_dict[prot_num].pdb2 + sum_dict[prot_num].chain2 == cur_pdb_name: 98 new_sum_dict[prot_num] = sum_dict[prot_num] 99 prot_numbers = sorted(new_sum_dict) 100 for pos_num in new_align_dict.abs_res_dict: 101 new_align_dict.abs(pos_num).pos_align_dict = {} 102 for prot_num in prot_numbers: 103 new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \ 104 align_dict.abs(pos_num).pos_align_dict[prot_num] 105 return new_sum_dict, new_align_dict
106