Package Bio :: Package NeuralNetwork :: Package Gene :: Module Pattern
[hide private]
[frames] | no frames]

Source Code for Module Bio.NeuralNetwork.Gene.Pattern

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  # 
  5   
  6  """Generic functionality useful for all gene representations. 
  7   
  8  This module contains classes which can be used for all the different 
  9  types of patterns available for representing gene information (ie. motifs, 
 10  signatures and schemas). These are the general classes which should be 
 11  handle any of the different specific patterns. 
 12  """ 
 13  # standard library 
 14  import random 
 15   
 16  # biopython 
 17  from Bio.Alphabet import _verify_alphabet 
 18  from Bio.Seq import Seq, MutableSeq 
 19   
 20   
21 -class PatternIO(object):
22 """Allow reading and writing of patterns to files. 23 24 This just defines a simple persistance class for patterns, making 25 it easy to write them to a file and read 'em back. 26 """
27 - def __init__(self, alphabet=None):
28 """Intialize the reader and writer class. 29 30 Arguments: 31 32 o alphabet - An optional argument specifying the alphabet 33 which patterns should follow. If an alphabet is set it'll be used 34 to verify that all patterns follow it. 35 36 Attributes: 37 o separator - A character to use in separating items in a signature 38 when it is written to a file and read back. This character should 39 not be in the possible alphabet of the sequences, or there will 40 be trouble. 41 """ 42 self._alphabet = alphabet 43 44 self.separator = ";"
45
46 - def write(self, pattern_list, output_handle):
47 """Write a list of patterns to the given handle. 48 """ 49 for pattern in pattern_list: 50 # deal with signatures, concatentate them with the separator 51 if isinstance(pattern, list) or isinstance(pattern, tuple): 52 string_pattern = self.separator.join(pattern) 53 # deal with the normal cases 54 else: 55 string_pattern = pattern 56 57 output_handle.write("%s\n" % string_pattern)
58
59 - def write_seq(self, seq_pattern_list, output_handle):
60 """Convenience function to write Seq objects to a file. 61 62 This can take Seqs and MutableSeqs, and write them to a file 63 as strings. 64 """ 65 # convert the seq patterns into just string patterns 66 all_patterns = [] 67 68 for seq_pattern in seq_pattern_list: 69 if isinstance(seq_pattern, MutableSeq): 70 seq = seq_pattern.toseq() 71 all_patterns.append(str(seq)) 72 elif isinstance(seq_pattern, Seq): 73 all_patterns.append(str(seq_pattern)) 74 else: 75 raise ValueError("Unexpected pattern type %r" % seq_pattern) 76 77 self.write(all_patterns, output_handle)
78
79 - def read(self, input_handle):
80 """Read patterns from the specified handle. 81 """ 82 all_patterns = [] 83 84 while True: 85 cur_line = input_handle.readline() 86 87 if not(cur_line): 88 break 89 90 cur_pattern = cur_line.rstrip() 91 # split up signatures 92 if self.separator in cur_pattern: 93 cur_pattern = tuple(cur_pattern.split(self.separator)) 94 95 if self._alphabet is not None: 96 # make single patterns (not signatures) into lists, so we 97 # can check signatures and single patterns the same 98 if not isinstance(cur_pattern, tuple): 99 test_pattern = [cur_pattern] 100 else: 101 test_pattern = cur_pattern 102 for pattern_item in test_pattern: 103 pattern_seq = Seq(pattern_item, self._alphabet) 104 if not(_verify_alphabet(pattern_seq)): 105 raise ValueError("Pattern %s not matching alphabet %s" 106 % (cur_pattern, self._alphabet)) 107 108 all_patterns.append(cur_pattern) 109 110 return all_patterns
111 112
113 -class PatternRepository(object):
114 """This holds a list of specific patterns found in sequences. 115 116 This is designed to be a general holder for a set of patterns and 117 should be subclassed for specific implementations (ie. holding Motifs 118 or Signatures. 119 """
120 - def __init__(self, pattern_info):
121 """Initialize a repository with patterns, 122 123 Arguments: 124 125 o pattern_info - A representation of all of the patterns found in 126 a *Finder search. This should be a dictionary, where the keys 127 are patterns, and the values are the number of times a pattern is 128 found. 129 130 The patterns are represented interally as a list of two 131 tuples, where the first element is the number of times a pattern 132 occurs, and the second is the pattern itself. This makes it easy 133 to sort the list and return the top N patterns. 134 """ 135 self._pattern_dict = pattern_info 136 137 # create the list representation 138 self._pattern_list = [] 139 for pattern_name in self._pattern_dict: 140 self._pattern_list.append((self._pattern_dict[pattern_name], 141 pattern_name)) 142 143 self._pattern_list.sort() 144 self._pattern_list.reverse()
145
146 - def get_all(self):
147 """Retrieve all of the patterns in the repository. 148 """ 149 patterns = [] 150 for pattern_info in self._pattern_list: 151 patterns.append(pattern_info[1]) 152 153 return patterns
154
155 - def get_random(self, num_patterns):
156 """Retrieve the specified number of patterns randomly. 157 158 Randomly selects patterns from the list and returns them. 159 160 Arguments: 161 162 o num_patterns - The total number of patterns to return. 163 """ 164 all_patterns = [] 165 166 while len(all_patterns) < num_patterns: 167 # pick a pattern, and only add it if it is not already present 168 new_pattern_info = random.choice(self._pattern_list) 169 170 if new_pattern_info[1] not in all_patterns: 171 all_patterns.append(new_pattern_info[1]) 172 173 return all_patterns
174
175 - def get_top_percentage(self, percent):
176 """Return a percentage of the patterns. 177 178 This returns the top 'percent' percentage of the patterns in the 179 repository. 180 """ 181 all_patterns = self.get_all() 182 183 num_to_return = int(len(all_patterns) * percent) 184 185 return all_patterns[:num_to_return]
186
187 - def get_top(self, num_patterns):
188 """Return the specified number of most frequently occurring patterns 189 190 Arguments: 191 192 o num_patterns - The number of patterns to return. 193 """ 194 all_patterns = [] 195 for pattern_info in self._pattern_list[:num_patterns]: 196 all_patterns.append(pattern_info[1]) 197 198 return all_patterns
199
200 - def get_differing(self, top_num, bottom_num):
201 """Retrieve patterns that are at the extreme ranges. 202 203 This returns both patterns at the top of the list (ie. the same as 204 returned by get_top) and at the bottom of the list. This 205 is especially useful for patterns that are the differences between 206 two sets of patterns. 207 208 Arguments: 209 210 o top_num - The number of patterns to take from the top of the list. 211 212 o bottom_num - The number of patterns to take from the bottom of 213 the list. 214 """ 215 all_patterns = [] 216 # first get from the top of the list 217 for pattern_info in self._pattern_list[:top_num]: 218 all_patterns.append(pattern_info[1]) 219 220 # then from the bottom 221 for pattern_info in self._pattern_list[-bottom_num:]: 222 all_patterns.append(pattern_info[1]) 223 224 return all_patterns
225
226 - def remove_polyA(self, at_percentage=.9):
227 """Remove patterns which are likely due to polyA tails from the lists. 228 229 This is just a helper function to remove pattenrs which are likely 230 just due to polyA tails, and thus are not really great motifs. 231 This will also get rid of stuff like ATATAT, which might be a 232 useful motif, so use at your own discretion. 233 234 XXX Could we write a more general function, based on info content 235 or something like that? 236 237 Arguments: 238 239 o at_percentage - The percentage of A and T residues in a pattern 240 that qualifies it for being removed. 241 """ 242 remove_list = [] 243 # find all of the really AT rich patterns 244 for pattern_info in self._pattern_list: 245 pattern_at = float(pattern_info[1].count('A') + pattern_info[1].count('T')) / len(pattern_info[1]) 246 if pattern_at > at_percentage: 247 remove_list.append(pattern_info) 248 249 # now remove them from the master list 250 for to_remove in remove_list: 251 self._pattern_list.remove(to_remove)
252
253 - def count(self, pattern):
254 """Return the number of times the specified pattern is found. 255 """ 256 try: 257 return self._pattern_dict[pattern] 258 except KeyError: 259 return 0
260