Package Bio :: Package NeuralNetwork :: Package Gene :: Module Pattern
[hide private]
[frames] | no frames]

Source Code for Module Bio.NeuralNetwork.Gene.Pattern

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  # 
  5   
  6  """Generic functionality useful for all gene representations. 
  7   
  8  This module contains classes which can be used for all the different 
  9  types of patterns available for representing gene information (ie. motifs, 
 10  signatures and schemas). These are the general classes which should be 
 11  handle any of the different specific patterns. 
 12  """ 
 13  # standard library 
 14  import random 
 15   
 16  # biopython 
 17  from Bio.Alphabet import _verify_alphabet 
 18  from Bio.Seq import Seq, MutableSeq 
 19   
 20  __docformat__ = "restructuredtext en" 
 21   
 22   
23 -class PatternIO(object):
24 """Allow reading and writing of patterns to files. 25 26 This just defines a simple persistance class for patterns, making 27 it easy to write them to a file and read 'em back. 28 """
29 - def __init__(self, alphabet=None):
30 """Intialize the reader and writer class. 31 32 Arguments: 33 34 o alphabet - An optional argument specifying the alphabet 35 which patterns should follow. If an alphabet is set it'll be used 36 to verify that all patterns follow it. 37 38 Attributes: 39 o separator - A character to use in separating items in a signature 40 when it is written to a file and read back. This character should 41 not be in the possible alphabet of the sequences, or there will 42 be trouble. 43 """ 44 self._alphabet = alphabet 45 46 self.separator = ";"
47
48 - def write(self, pattern_list, output_handle):
49 """Write a list of patterns to the given handle. 50 """ 51 for pattern in pattern_list: 52 # deal with signatures, concatentate them with the separator 53 if isinstance(pattern, list) or isinstance(pattern, tuple): 54 string_pattern = self.separator.join(pattern) 55 # deal with the normal cases 56 else: 57 string_pattern = pattern 58 59 output_handle.write("%s\n" % string_pattern)
60
61 - def write_seq(self, seq_pattern_list, output_handle):
62 """Convenience function to write Seq objects to a file. 63 64 This can take Seqs and MutableSeqs, and write them to a file 65 as strings. 66 """ 67 # convert the seq patterns into just string patterns 68 all_patterns = [] 69 70 for seq_pattern in seq_pattern_list: 71 if isinstance(seq_pattern, MutableSeq): 72 seq = seq_pattern.toseq() 73 all_patterns.append(str(seq)) 74 elif isinstance(seq_pattern, Seq): 75 all_patterns.append(str(seq_pattern)) 76 else: 77 raise ValueError("Unexpected pattern type %r" % seq_pattern) 78 79 self.write(all_patterns, output_handle)
80
81 - def read(self, input_handle):
82 """Read patterns from the specified handle. 83 """ 84 all_patterns = [] 85 86 while True: 87 cur_line = input_handle.readline() 88 89 if not(cur_line): 90 break 91 92 cur_pattern = cur_line.rstrip() 93 # split up signatures 94 if self.separator in cur_pattern: 95 cur_pattern = tuple(cur_pattern.split(self.separator)) 96 97 if self._alphabet is not None: 98 # make single patterns (not signatures) into lists, so we 99 # can check signatures and single patterns the same 100 if not isinstance(cur_pattern, tuple): 101 test_pattern = [cur_pattern] 102 else: 103 test_pattern = cur_pattern 104 for pattern_item in test_pattern: 105 pattern_seq = Seq(pattern_item, self._alphabet) 106 if not(_verify_alphabet(pattern_seq)): 107 raise ValueError("Pattern %s not matching alphabet %s" 108 % (cur_pattern, self._alphabet)) 109 110 all_patterns.append(cur_pattern) 111 112 return all_patterns
113 114
115 -class PatternRepository(object):
116 """This holds a list of specific patterns found in sequences. 117 118 This is designed to be a general holder for a set of patterns and 119 should be subclassed for specific implementations (ie. holding Motifs 120 or Signatures. 121 """
122 - def __init__(self, pattern_info):
123 """Initialize a repository with patterns, 124 125 Arguments: 126 127 - pattern_info - A representation of all of the patterns found in 128 a finder search. This should be a dictionary, where the keys 129 are patterns, and the values are the number of times a pattern is 130 found. 131 132 The patterns are represented interally as a list of two 133 tuples, where the first element is the number of times a pattern 134 occurs, and the second is the pattern itself. This makes it easy 135 to sort the list and return the top N patterns. 136 """ 137 self._pattern_dict = pattern_info 138 139 # create the list representation 140 self._pattern_list = [] 141 for pattern_name in self._pattern_dict: 142 self._pattern_list.append((self._pattern_dict[pattern_name], 143 pattern_name)) 144 145 self._pattern_list.sort() 146 self._pattern_list.reverse()
147
148 - def get_all(self):
149 """Retrieve all of the patterns in the repository. 150 """ 151 patterns = [] 152 for pattern_info in self._pattern_list: 153 patterns.append(pattern_info[1]) 154 155 return patterns
156
157 - def get_random(self, num_patterns):
158 """Retrieve the specified number of patterns randomly. 159 160 Randomly selects patterns from the list and returns them. 161 162 Arguments: 163 164 o num_patterns - The total number of patterns to return. 165 """ 166 all_patterns = [] 167 168 while len(all_patterns) < num_patterns: 169 # pick a pattern, and only add it if it is not already present 170 new_pattern_info = random.choice(self._pattern_list) 171 172 if new_pattern_info[1] not in all_patterns: 173 all_patterns.append(new_pattern_info[1]) 174 175 return all_patterns
176
177 - def get_top_percentage(self, percent):
178 """Return a percentage of the patterns. 179 180 This returns the top 'percent' percentage of the patterns in the 181 repository. 182 """ 183 all_patterns = self.get_all() 184 185 num_to_return = int(len(all_patterns) * percent) 186 187 return all_patterns[:num_to_return]
188
189 - def get_top(self, num_patterns):
190 """Return the specified number of most frequently occurring patterns 191 192 Arguments: 193 194 o num_patterns - The number of patterns to return. 195 """ 196 all_patterns = [] 197 for pattern_info in self._pattern_list[:num_patterns]: 198 all_patterns.append(pattern_info[1]) 199 200 return all_patterns
201
202 - def get_differing(self, top_num, bottom_num):
203 """Retrieve patterns that are at the extreme ranges. 204 205 This returns both patterns at the top of the list (ie. the same as 206 returned by get_top) and at the bottom of the list. This 207 is especially useful for patterns that are the differences between 208 two sets of patterns. 209 210 Arguments: 211 212 o top_num - The number of patterns to take from the top of the list. 213 214 o bottom_num - The number of patterns to take from the bottom of 215 the list. 216 """ 217 all_patterns = [] 218 # first get from the top of the list 219 for pattern_info in self._pattern_list[:top_num]: 220 all_patterns.append(pattern_info[1]) 221 222 # then from the bottom 223 for pattern_info in self._pattern_list[-bottom_num:]: 224 all_patterns.append(pattern_info[1]) 225 226 return all_patterns
227
228 - def remove_polyA(self, at_percentage=.9):
229 """Remove patterns which are likely due to polyA tails from the lists. 230 231 This is just a helper function to remove pattenrs which are likely 232 just due to polyA tails, and thus are not really great motifs. 233 This will also get rid of stuff like ATATAT, which might be a 234 useful motif, so use at your own discretion. 235 236 XXX Could we write a more general function, based on info content 237 or something like that? 238 239 Arguments: 240 241 o at_percentage - The percentage of A and T residues in a pattern 242 that qualifies it for being removed. 243 """ 244 remove_list = [] 245 # find all of the really AT rich patterns 246 for pattern_info in self._pattern_list: 247 pattern_at = float(pattern_info[1].count('A') + pattern_info[1].count('T')) / len(pattern_info[1]) 248 if pattern_at > at_percentage: 249 remove_list.append(pattern_info) 250 251 # now remove them from the master list 252 for to_remove in remove_list: 253 self._pattern_list.remove(to_remove)
254
255 - def count(self, pattern):
256 """Return the number of times the specified pattern is found. 257 """ 258 try: 259 return self._pattern_dict[pattern] 260 except KeyError: 261 return 0
262