Package Bio :: Package FSSP :: Module FSSPTools
[hide private]
[frames] | no frames]

Source Code for Module Bio.FSSP.FSSPTools

 1  # This code is part of the Biopython distribution and governed by its 
 2  # license.  Please see the LICENSE file that should have been included 
 3  # as part of this package. 
 4  # 
 5   
 6  from Bio import FSSP 
 7  import copy 
 8  from Bio.Align import MultipleSeqAlignment 
 9  from Bio import Alphabet 
10  from Bio.Seq import Seq 
11  from Bio.SeqRecord import SeqRecord 
12   
13   
14 -class FSSPAlign(MultipleSeqAlignment):
15 - def _add_numbering_table(self, new_record):
16 new_record.annotations['abs2pdb'] = {} 17 new_record.annotations['pdb2abs'] = {}
18 19
20 -class FSSPMultAlign(dict):
21 - def __init__(self):
22 self.abs_res = [] 23 self.pdb_res = [] 24 self.data = {}
25 26
27 -def mult_align(sum_dict, align_dict):
28 """Returns a biopython multiple alignment instance (MultipleSeqAlignment)""" 29 mult_align_dict = {} 30 for j in align_dict.abs(1).pos_align_dict: 31 mult_align_dict[j] = '' 32 33 for i in range(1, len(align_dict)+1): 34 # loop on positions 35 for j in align_dict.abs(i).pos_align_dict: 36 # loop within a position 37 mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa 38 alpha = Alphabet.Gapped(Alphabet.IUPAC.extended_protein) 39 fssp_align = MultipleSeqAlignment([], alphabet=alpha) 40 for i in sorted(mult_align_dict): 41 fssp_align.append(SeqRecord(Seq(mult_align_dict[i], alpha), 42 sum_dict[i].pdb2+sum_dict[i].chain2)) 43 return fssp_align
44 45 46 # Several routines used to extract information from FSSP sections 47 # filter: 48 # filters a passed summary section and alignment section according to a numeric 49 # attribute in the summary section. Returns new summary and alignment sections 50 # For example, to filter in only those records which have a zscore greater than 51 # 4.0 and lesser than 7.5: 52 # new_sum, new_align = filter(sum, align, 'zscore', 4, 7.5) 53 # 54 # Warning: this function really slows down when filtering large FSSP files. 55 # The reason is the use of copy.deepcopy() to copy align_dict into 56 # new_align_dict. I have to figure out something better. 57 # Took me ~160 seconds for the largest FSSP file (1reqA.fssp) 58 # 59
60 -def filter(sum_dict, align_dict, filter_attribute, low_bound, high_bound):
61 """filters a passed summary section and alignment section according to a numeric 62 attribute in the summary section. Returns new summary and alignment sections""" 63 new_sum_dict = FSSP.FSSPSumDict() 64 new_align_dict = copy.deepcopy(align_dict) 65 # for i in align_dict: 66 # new_align_dict[i] = copy.copy(align_dict[i]) 67 # new_align_dict = copy.copy(align_dict) 68 for prot_num in sum_dict: 69 attr_value = getattr(sum_dict[prot_num], filter_attribute) 70 if attr_value >= low_bound and attr_value <= high_bound: 71 new_sum_dict[prot_num] = sum_dict[prot_num] 72 prot_numbers = sorted(new_sum_dict) 73 for pos_num in new_align_dict.abs_res_dict: 74 new_align_dict.abs(pos_num).pos_align_dict = {} 75 for prot_num in prot_numbers: 76 new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \ 77 align_dict.abs(pos_num).pos_align_dict[prot_num] 78 return new_sum_dict, new_align_dict
79 80
81 -def name_filter(sum_dict, align_dict, name_list):
82 """ Accepts a list of names. Returns a new Summary block and Alignment block which 83 contain the info only for those names passed.""" 84 new_sum_dict = FSSP.FSSPSumDict() 85 new_align_dict = copy.deepcopy(align_dict) 86 for cur_pdb_name in name_list: 87 for prot_num in sum_dict: 88 if sum_dict[prot_num].pdb2+sum_dict[prot_num].chain2 == cur_pdb_name: 89 new_sum_dict[prot_num] = sum_dict[prot_num] 90 prot_numbers = sorted(new_sum_dict) 91 for pos_num in new_align_dict.abs_res_dict: 92 new_align_dict.abs(pos_num).pos_align_dict = {} 93 for prot_num in prot_numbers: 94 new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \ 95 align_dict.abs(pos_num).pos_align_dict[prot_num] 96 return new_sum_dict, new_align_dict
97