Package Bio :: Package HMM :: Module DynamicProgramming
[hide private]
[frames] | no frames]

Source Code for Module Bio.HMM.DynamicProgramming

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  # 
  5   
  6  """Dynamic Programming algorithms for general usage. 
  7   
  8  This module contains classes which implement Dynamic Programming 
  9  algorithms that can be used generally. 
 10  """ 
 11   
 12  from Bio._py3k import range 
 13   
 14   
15 -class AbstractDPAlgorithms(object):
16 """An abstract class to calculate forward and backward probabilities. 17 18 This class should not be instantiated directly, but should be used 19 through a derived class which implements proper scaling of variables. 20 21 This class is just meant to encapsulate the basic forward and backward 22 algorithms, and allow derived classes to deal with the problems of 23 multiplying probabilities. 24 25 Derived class of this must implement: 26 27 - _forward_recursion -- Calculate the forward values in the recursion 28 using some kind of technique for preventing underflow errors. 29 - _backward_recursion -- Calculate the backward values in the recursion 30 step using some technique to prevent underflow errors. 31 32 """ 33
34 - def __init__(self, markov_model, sequence):
35 """Initialize to calculate forward and backward probabilities. 36 37 Arguments: 38 - markov_model -- The current Markov model we are working with. 39 - sequence -- A training sequence containing a set of emissions. 40 41 """ 42 self._mm = markov_model 43 self._seq = sequence
44
45 - def _forward_recursion(self, cur_state, sequence_pos, forward_vars):
46 """Calculate the forward recursion value.""" 47 raise NotImplementedError("Subclasses must implement")
48
49 - def forward_algorithm(self):
50 """Calculate sequence probability using the forward algorithm. 51 52 This implements the forward algorithm, as described on p57-58 of 53 Durbin et al. 54 55 Returns: 56 - A dictionary containing the forward variables. This has keys of the 57 form (state letter, position in the training sequence), and values 58 containing the calculated forward variable. 59 - The calculated probability of the sequence. 60 61 """ 62 # all of the different letters that the state path can be in 63 state_letters = self._seq.states.alphabet.letters 64 65 # -- initialize the algorithm 66 # 67 # NOTE: My index numbers are one less than what is given in Durbin 68 # et al, since we are indexing the sequence going from 0 to 69 # (Length - 1) not 1 to Length, like in Durbin et al. 70 # 71 forward_var = {} 72 # f_{0}(0) = 1 73 forward_var[(state_letters[0], -1)] = 1 74 # f_{k}(0) = 0, for k > 0 75 for k in range(1, len(state_letters)): 76 forward_var[(state_letters[k], -1)] = 0 77 78 # -- now do the recursion step 79 # loop over the training sequence 80 # Recursion step: (i = 1 .. L) 81 for i in range(len(self._seq.emissions)): 82 # now loop over the letters in the state path 83 for main_state in state_letters: 84 # calculate the forward value using the appropriate 85 # method to prevent underflow errors 86 forward_value = self._forward_recursion(main_state, i, 87 forward_var) 88 89 if forward_value is not None: 90 forward_var[(main_state, i)] = forward_value 91 92 # -- termination step - calculate the probability of the sequence 93 first_state = state_letters[0] 94 seq_prob = 0 95 96 for state_item in state_letters: 97 # f_{k}(L) 98 forward_value = forward_var[(state_item, 99 len(self._seq.emissions) - 1)] 100 # a_{k0} 101 transition_value = self._mm.transition_prob[(state_item, 102 first_state)] 103 104 seq_prob += forward_value * transition_value 105 106 return forward_var, seq_prob
107
108 - def _backward_recursion(self, cur_state, sequence_pos, forward_vars):
109 """Calculate the backward recursion value.""" 110 raise NotImplementedError("Subclasses must implement")
111
112 - def backward_algorithm(self):
113 """Calculate sequence probability using the backward algorithm. 114 115 This implements the backward algorithm, as described on p58-59 of 116 Durbin et al. 117 118 Returns: 119 - A dictionary containing the backwards variables. This has keys 120 of the form (state letter, position in the training sequence), 121 and values containing the calculated backward variable. 122 123 """ 124 # all of the different letters that the state path can be in 125 state_letters = self._seq.states.alphabet.letters 126 127 # -- initialize the algorithm 128 # 129 # NOTE: My index numbers are one less than what is given in Durbin 130 # et al, since we are indexing the sequence going from 0 to 131 # (Length - 1) not 1 to Length, like in Durbin et al. 132 # 133 backward_var = {} 134 135 first_letter = state_letters[0] 136 # b_{k}(L) = a_{k0} for all k 137 for state in state_letters: 138 backward_var[(state, len(self._seq.emissions) - 1)] = \ 139 self._mm.transition_prob[(state, state_letters[0])] 140 141 # -- recursion 142 # first loop over the training sequence backwards 143 # Recursion step: (i = L - 1 ... 1) 144 all_indexes = list(range(len(self._seq.emissions) - 1)) 145 all_indexes.reverse() 146 for i in all_indexes: 147 # now loop over the letters in the state path 148 for main_state in state_letters: 149 # calculate the backward value using the appropriate 150 # method to prevent underflow errors 151 backward_value = self._backward_recursion(main_state, i, 152 backward_var) 153 154 if backward_value is not None: 155 backward_var[(main_state, i)] = backward_value 156 157 # skip the termination step to avoid recalculations -- you should 158 # get sequence probabilities using the forward algorithm 159 160 return backward_var
161 162
163 -class ScaledDPAlgorithms(AbstractDPAlgorithms):
164 """Implement forward and backward algorithms using a rescaling approach. 165 166 This scales the f and b variables, so that they remain within a 167 manageable numerical interval during calculations. This approach is 168 described in Durbin et al. on p 78. 169 170 This approach is a little more straightforward then log transformation 171 but may still give underflow errors for some types of models. In these 172 cases, the LogDPAlgorithms class should be used. 173 """ 174
175 - def __init__(self, markov_model, sequence):
176 """Initialize the scaled approach to calculating probabilities. 177 178 Arguments: 179 - markov_model -- The current Markov model we are working with. 180 - sequence -- A TrainingSequence object that must have a 181 set of emissions to work with. 182 183 """ 184 AbstractDPAlgorithms.__init__(self, markov_model, sequence) 185 186 self._s_values = {}
187
188 - def _calculate_s_value(self, seq_pos, previous_vars):
189 """Calculate the next scaling variable for a sequence position. 190 191 This utilizes the approach of choosing s values such that the 192 sum of all of the scaled f values is equal to 1. 193 194 Arguments: 195 - seq_pos -- The current position we are at in the sequence. 196 - previous_vars -- All of the forward or backward variables 197 calculated so far. 198 199 Returns: 200 - The calculated scaling variable for the sequence item. 201 202 """ 203 # all of the different letters the state can have 204 state_letters = self._seq.states.alphabet.letters 205 206 # loop over all of the possible states 207 s_value = 0 208 for main_state in state_letters: 209 emission = self._mm.emission_prob[(main_state, 210 self._seq.emissions[seq_pos])] 211 212 # now sum over all of the previous vars and transitions 213 trans_and_var_sum = 0 214 for second_state in self._mm.transitions_from(main_state): 215 # the value of the previous f or b value 216 var_value = previous_vars[(second_state, seq_pos - 1)] 217 218 # the transition probability 219 trans_value = self._mm.transition_prob[(second_state, 220 main_state)] 221 222 trans_and_var_sum += (var_value * trans_value) 223 224 s_value += (emission * trans_and_var_sum) 225 226 return s_value
227
228 - def _forward_recursion(self, cur_state, sequence_pos, forward_vars):
229 """Calculate the value of the forward recursion. 230 231 Arguments: 232 - cur_state -- The letter of the state we are calculating the 233 forward variable for. 234 - sequence_pos -- The position we are at in the training seq. 235 - forward_vars -- The current set of forward variables 236 237 """ 238 # calculate the s value, if we haven't done so already (ie. during 239 # a previous forward or backward recursion) 240 if sequence_pos not in self._s_values: 241 self._s_values[sequence_pos] = \ 242 self._calculate_s_value(sequence_pos, forward_vars) 243 244 # e_{l}(x_{i}) 245 seq_letter = self._seq.emissions[sequence_pos] 246 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)] 247 # divide by the scaling value 248 scale_emission_prob = (float(cur_emission_prob) / 249 float(self._s_values[sequence_pos])) 250 251 # loop over all of the possible states at the position 252 state_pos_sum = 0 253 have_transition = 0 254 for second_state in self._mm.transitions_from(cur_state): 255 have_transition = 1 256 257 # get the previous forward_var values 258 # f_{k}(i - 1) 259 prev_forward = forward_vars[(second_state, sequence_pos - 1)] 260 261 # a_{kl} 262 cur_trans_prob = self._mm.transition_prob[(second_state, 263 cur_state)] 264 state_pos_sum += prev_forward * cur_trans_prob 265 266 # if we have the possibility of having a transition 267 # return the recursion value 268 if have_transition: 269 return (scale_emission_prob * state_pos_sum) 270 else: 271 return None
272
273 - def _backward_recursion(self, cur_state, sequence_pos, backward_vars):
274 """Calculate the value of the backward recursion. 275 276 Arguments: 277 - cur_state -- The letter of the state we are calculating the 278 forward variable for. 279 - sequence_pos -- The position we are at in the training seq. 280 - backward_vars -- The current set of backward variables 281 282 """ 283 # calculate the s value, if we haven't done so already (ie. during 284 # a previous forward or backward recursion) 285 if sequence_pos not in self._s_values: 286 self._s_values[sequence_pos] = \ 287 self._calculate_s_value(sequence_pos, backward_vars) 288 289 # loop over all of the possible states at the position 290 state_pos_sum = 0 291 have_transition = 0 292 for second_state in self._mm.transitions_from(cur_state): 293 have_transition = 1 294 # e_{l}(x_{i + 1}) 295 seq_letter = self._seq.emissions[sequence_pos + 1] 296 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)] 297 298 # get the previous backward_var value 299 # b_{l}(i + 1) 300 prev_backward = backward_vars[(second_state, sequence_pos + 1)] 301 302 # the transition probability -- a_{kl} 303 cur_transition_prob = self._mm.transition_prob[(cur_state, 304 second_state)] 305 306 state_pos_sum += (cur_emission_prob * prev_backward * 307 cur_transition_prob) 308 309 # if we have a probability for a transition, return it 310 if have_transition: 311 return (state_pos_sum / float(self._s_values[sequence_pos])) 312 # otherwise we have no probability (ie. we can't do this transition) 313 # and return None 314 else: 315 return None
316 317
318 -class LogDPAlgorithms(AbstractDPAlgorithms):
319 """Implement forward and backward algorithms using a log approach. 320 321 This uses the approach of calculating the sum of log probabilities 322 using a lookup table for common values. 323 324 XXX This is not implemented yet! 325 """ 326
327 - def __init__(self, markov_model, sequence):
328 """Initialize.""" 329 raise NotImplementedError("Haven't coded this yet...")
330