Package Bio :: Package Alphabet :: Module Reduced
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.Reduced

  1  # Copyright 2004 by Iddo Friedberg. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """Reduced alphabets which lump together several amino-acids into one letter. 
  8   
  9  Reduced (redundant or simplified) alphabets are used to represent protein sequences using an 
 10  alternative alphabet which lumps together several amino-acids into one letter, based 
 11  on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually 
 12  quite interchangeable, so many sequence studies group them into one letter 
 13   
 14  Examples of reduced alphabets are available in: 
 15   
 16  http://viscose.ifg.uni-muenster.de/html/alphabets.html 
 17   
 18  The Murphy tables are from here: 
 19   
 20  Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid 
 21  alphabets for protein fold recognition and implications for folding. 
 22  Protein Eng. 13(3):149-152 
 23   
 24  Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of 
 25  the tables here, or a user-defined table. 
 26  """ 
 27   
 28  from Bio import Alphabet 
 29   
 30   
 31  murphy_15_tab = {"L": "L", 
 32                   "V": "L", 
 33                   "I": "L", 
 34                   "M": "L", 
 35                   "C": "C", 
 36                   "A": "A", 
 37                   "G": "G", 
 38                   "S": "S", 
 39                   "T": "T", 
 40                   "P": "P", 
 41                   "F": "F", 
 42                   "Y": "F", 
 43                   "W": "W", 
 44                   "E": "E", 
 45                   "D": "D", 
 46                   "N": "N", 
 47                   "Q": "Q", 
 48                   "K": "K", 
 49                   "R": "K", 
 50                   "H": "H"} 
 51   
 52   
53 -class Murphy15(Alphabet.ProteinAlphabet):
54 letters = "LCAGSTPFWEDNQKH" 55 size = 15
56 57 58 murphy_15 = Murphy15() 59 60 murphy_10_tab = {"L": "L", 61 "V": "L", 62 "I": "L", 63 "M": "L", 64 "C": "C", 65 "A": "A", 66 "G": "G", 67 "S": "S", 68 "T": "S", 69 "P": "P", 70 "F": "F", 71 "Y": "F", 72 "W": "F", 73 "E": "E", 74 "D": "E", 75 "N": "E", 76 "Q": "E", 77 "K": "K", 78 "R": "K", 79 "H": "H"} 80 81
82 -class Murphy10(Alphabet.ProteinAlphabet):
83 letters = "LCAGSPFEKH" 84 size = 10
85 86 87 murphy_10 = Murphy10() 88 89 murphy_8_tab = {"L": "L", 90 "V": "L", 91 "I": "L", 92 "M": "L", 93 "C": "L", 94 "A": "A", 95 "G": "A", 96 "S": "S", 97 "T": "S", 98 "P": "P", 99 "F": "F", 100 "Y": "F", 101 "W": "F", 102 "E": "E", 103 "D": "E", 104 "N": "E", 105 "Q": "E", 106 "K": "K", 107 "R": "K", 108 "H": "H"} 109 110
111 -class Murphy8(Alphabet.ProteinAlphabet):
112 letters = "LASPFEKH" 113 size = 8
114 115 116 murphy_8 = Murphy8() 117 118 murphy_4_tab = {"L": "L", 119 "V": "L", 120 "I": "L", 121 "M": "L", 122 "C": "L", 123 "A": "A", 124 "G": "A", 125 "S": "A", 126 "T": "A", 127 "P": "A", 128 "F": "F", 129 "Y": "F", 130 "W": "F", 131 "E": "E", 132 "D": "E", 133 "N": "E", 134 "Q": "E", 135 "K": "E", 136 "R": "E", 137 "H": "E"} 138 139
140 -class Murphy4(Alphabet.ProteinAlphabet):
141 letters = "LAFE" 142 size = 4
143 144 145 murphy_4 = Murphy4() 146 147 hp_model_tab = {"A": "P", # Hydrophilic 148 "G": "P", 149 "T": "P", 150 "S": "P", 151 "N": "P", 152 "Q": "P", 153 "D": "P", 154 "E": "P", 155 "H": "P", 156 "R": "P", 157 "K": "P", 158 "P": "P", 159 "C": "H", # Hydrophobic 160 "M": "H", 161 "F": "H", 162 "I": "H", 163 "L": "H", 164 "V": "H", 165 "W": "H", 166 "Y": "H"} 167 168
169 -class HPModel(Alphabet.ProteinAlphabet):
170 letters = "HP" 171 size = 2
172 173 174 hp_model = HPModel() 175 176 pc_5_table = {"I": "A", # Aliphatic 177 "V": "A", 178 "L": "A", 179 "F": "R", # Aromatic 180 "Y": "R", 181 "W": "R", 182 "H": "R", 183 "K": "C", # Charged 184 "R": "C", 185 "D": "C", 186 "E": "C", 187 "G": "T", # Tiny 188 "A": "T", 189 "C": "T", 190 "S": "T", 191 "T": "D", # Diverse 192 "M": "D", 193 "Q": "D", 194 "N": "D", 195 "P": "D"} 196 197
198 -class PC5(Alphabet.ProteinAlphabet):
199 letters = "ARCTD" 200 size = 5
201 202 203 hp_model = HPModel() 204