Package Bio :: Package SubsMat :: Module FreqTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.SubsMat.FreqTable

 1  # This code is part of the Biopython distribution and governed by its 
 2  # license.  Please see the LICENSE file that should have been included 
 3  # as part of this package. 
 4  # Copyright Iddo Friedberg idoerg@cc.huji.ac.il 
 5  """A class to handle frequency tables 
 6   
 7  Methods to read a letter frequency or a letter count file: 
 8  Example files for a DNA alphabet: 
 9   
10  A count file (whitespace separated): 
11   
12  A  50 
13  C  37 
14  G  23 
15  T  58 
16   
17  The same info as a frequency file: 
18   
19  A 0.2976 
20  C 0.2202 
21  G 0.1369 
22  T 0.3452 
23   
24  Functions: 
25    read_count(f): read a count file from stream f. Then convert to 
26    frequencies 
27    read_freq(f): read a frequency data file from stream f. Of course, we then 
28    don't have the counts, but it is usually the letter frquencies which are 
29    interesting. 
30   
31  Methods: 
32    (all internal) 
33  Attributes: 
34    alphabet: The IUPAC alphabet set (or any other) whose letters you are 
35    using. Common sets are: IUPAC.protein (20-letter protein), 
36    IUPAC.unambiguous_dna (4-letter DNA). See Bio/alphabet for more. 
37    data: frequency dictionary. 
38    count: count dictionary. Empty if no counts are provided. 
39   
40  Example of use: 
41    >>> from SubsMat import FreqTable 
42    >>> ftab = FreqTable.FreqTable(my_frequency_dictionary,FreqTable.FREQ) 
43    >>> ftab = FreqTable.FreqTable(my_count_dictionary,FreqTable.COUNT) 
44    >>> ftab = FreqTable.read_count(open('myDNACountFile')) 
45   
46  """ 
47   
48  from Bio import Alphabet 
49  COUNT = 1 
50  FREQ = 2 
51   
52   
53 -class FreqTable(dict):
54
55 - def _freq_from_count(self):
56 total = float(sum(self.count.values())) 57 for i, v in self.count.items(): 58 self[i] = v / total
59
60 - def _alphabet_from_input(self):
61 s = '' 62 for i in sorted(self): 63 s += i 64 return s
65
66 - def __init__(self, in_dict, dict_type, alphabet=None):
67 self.alphabet = alphabet 68 if dict_type == COUNT: 69 self.count = in_dict 70 self._freq_from_count() 71 elif dict_type == FREQ: 72 self.count = {} 73 self.update(in_dict) 74 else: 75 raise ValueError("bad dict_type") 76 if not alphabet: 77 self.alphabet = Alphabet.Alphabet() 78 self.alphabet.letters = self._alphabet_from_input()
79 80
81 -def read_count(f):
82 count = {} 83 for line in f: 84 key, value = line.strip().split() 85 count[key] = int(value) 86 freq_table = FreqTable(count, COUNT) 87 return freq_table
88 89
90 -def read_freq(f):
91 freq_dict = {} 92 for line in f: 93 key, value = line.strip().split() 94 freq_dict[key] = float(value) 95 return FreqTable(freq_dict, FREQ)
96