Package Bio :: Package motifs :: Module meme
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.meme

  1  # Copyright 2008 by Bartek Wilczynski 
  2  # Adapted from  Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from __future__ import print_function 
  8   
  9  from Bio.Alphabet import IUPAC 
 10  from Bio import Seq 
 11  from Bio import motifs 
 12   
 13   
14 -def read(handle):
15 """Parses the text output of the MEME program into a MEME.Record object. 16 17 Example: 18 19 >>> from Bio.Motif import MEME 20 >>> with open("meme.output.txt") as f: 21 ... record = MEME.parse(f) 22 >>> for motif in record: 23 ... for instance in motif.instances: 24 ... print(instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue) 25 26 """ 27 record = Record() 28 __read_version(record, handle) 29 __read_datafile(record, handle) 30 __read_alphabet(record, handle) 31 __read_sequences(record, handle) 32 __read_command(record, handle) 33 for line in handle: 34 if line.startswith('MOTIF 1'): 35 break 36 else: 37 raise ValueError('Unexpected end of stream') 38 alphabet = record.alphabet 39 revcomp = 'revcomp' in record.command 40 while True: 41 length, num_occurrences, evalue = __read_motif_statistics(line) 42 name = __read_motif_name(handle) 43 instances = __read_motif_sequences(handle, name, alphabet, length, revcomp) 44 motif = Motif(alphabet, instances) 45 motif.length = length 46 motif.num_occurrences = num_occurrences 47 motif.evalue = evalue 48 motif.name = name 49 record.append(motif) 50 __skip_unused_lines(handle) 51 try: 52 line = next(handle) 53 except StopIteration: 54 raise ValueError('Unexpected end of stream: Expected to find new motif, or the summary of motifs') 55 if line.startswith("SUMMARY OF MOTIFS"): 56 break 57 if not line.startswith('MOTIF'): 58 raise ValueError("Line does not start with 'MOTIF':\n%s" % line) 59 return record
60 61
62 -class Motif(motifs.Motif):
63 """A subclass of Motif used in parsing MEME (and MAST) output. 64 65 This subclass defines functions and data specific to MEME motifs. 66 This includes the motif name, the evalue for a motif, and its number 67 of occurrences. 68 """
69 - def __init__(self, alphabet=None, instances=None):
70 motifs.Motif.__init__(self, alphabet, instances) 71 self.evalue = 0.0 72 self.num_occurrences = 0 73 self.name = None
74 75
76 -class Instance(Seq.Seq):
77 """A class describing the instances of a MEME motif, and the data thereof. 78 """
79 - def __init__(self,*args,**kwds):
80 Seq.Seq.__init__(self,*args,**kwds) 81 self.sequence_name = "" 82 self.start = 0 83 self.pvalue = 1.0 84 self.strand = 0 85 self.length = 0 86 self.motif_name = ""
87 88
89 -class Record(list):
90 """A class for holding the results of a MEME run. 91 92 A MEME.Record is an object that holds the results from running 93 MEME. It implements no methods of its own. 94 95 The MEME.Record class inherits from list, so you can access individual 96 motifs in the record by their index. Alternatively, you can find a motif 97 by its name: 98 99 >>> from Bio import motifs 100 >>> with open("meme.output.txt") as f: 101 ... record = motifs.parse(f, 'MEME') 102 >>> motif = record[0] 103 >>> print(motif.name) 104 Motif 1 105 >>> motif = record['Motif 1'] 106 >>> print(motif.name) 107 Motif 1 108 """ 109
110 - def __init__(self):
111 """__init__ (self)""" 112 self.version = "" 113 self.datafile = "" 114 self.command = "" 115 self.alphabet = None 116 self.sequences = []
117
118 - def __getitem__(self, key):
119 if isinstance(key, str): 120 for motif in self: 121 if motif.name==key: 122 return motif 123 else: 124 return list.__getitem__(self, key)
125 126 127 # Everything below is private 128 129
130 -def __read_version(record, handle):
131 for line in handle: 132 if line.startswith('MEME version'): 133 break 134 else: 135 raise ValueError("Improper input file. File should contain a line starting MEME version.") 136 line = line.strip() 137 ls = line.split() 138 record.version = ls[2]
139 140
141 -def __read_datafile(record, handle):
142 for line in handle: 143 if line.startswith('TRAINING SET'): 144 break 145 else: 146 raise ValueError("Unexpected end of stream: 'TRAINING SET' not found.") 147 try: 148 line = next(handle) 149 except StopIteration: 150 raise ValueError("Unexpected end of stream: Expected to find line starting with '****'") 151 if not line.startswith('****'): 152 raise ValueError("Line does not start with '****':\n%s" % line) 153 try: 154 line = next(handle) 155 except StopIteration: 156 raise ValueError("Unexpected end of stream: Expected to find line starting with 'DATAFILE'") 157 if not line.startswith('DATAFILE'): 158 raise ValueError("Line does not start with 'DATAFILE':\n%s" % line) 159 line = line.strip() 160 line = line.replace('DATAFILE= ', '') 161 record.datafile = line
162 163
164 -def __read_alphabet(record, handle):
165 try: 166 line = next(handle) 167 except StopIteration: 168 raise ValueError("Unexpected end of stream: Expected to find line starting with 'ALPHABET'") 169 if not line.startswith('ALPHABET'): 170 raise ValueError("Line does not start with 'ALPHABET':\n%s" % line) 171 line = line.strip() 172 line = line.replace('ALPHABET= ', '') 173 if line == 'ACGT': 174 al = IUPAC.unambiguous_dna 175 else: 176 al = IUPAC.protein 177 record.alphabet = al
178 179
180 -def __read_sequences(record, handle):
181 try: 182 line = next(handle) 183 except StopIteration: 184 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'") 185 if not line.startswith('Sequence name'): 186 raise ValueError("Line does not start with 'Sequence name':\n%s" % line) 187 try: 188 line = next(handle) 189 except StopIteration: 190 raise ValueError("Unexpected end of stream: Expected to find line starting with '----'") 191 if not line.startswith('----'): 192 raise ValueError("Line does not start with '----':\n%s" % line) 193 for line in handle: 194 if line.startswith('***'): 195 break 196 line = line.strip() 197 ls = line.split() 198 record.sequences.append(ls[0]) 199 if len(ls) == 6: 200 record.sequences.append(ls[3]) 201 else: 202 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
203 204
205 -def __read_command(record, handle):
206 for line in handle: 207 if line.startswith('command:'): 208 break 209 else: 210 raise ValueError("Unexpected end of stream: Expected to find line starting with 'command'") 211 line = line.strip() 212 line = line.replace('command: ', '') 213 record.command = line
214 215
216 -def __read_motif_statistics(line):
217 line = line[5:].strip() 218 ls = line.split() 219 length = int(ls[3]) 220 num_occurrences = int(ls[6]) 221 evalue = float(ls[12]) 222 return length, num_occurrences, evalue
223 224
225 -def __read_motif_name(handle):
226 for line in handle: 227 if 'sorted by position p-value' in line: 228 break 229 else: 230 raise ValueError('Unexpected end of stream: Failed to find motif name') 231 line = line.strip() 232 words = line.split() 233 name = " ".join(words[0:2]) 234 return name
235 236
237 -def __read_motif_sequences(handle, motif_name, alphabet, length, revcomp):
238 try: 239 line = next(handle) 240 except StopIteration: 241 raise ValueError('Unexpected end of stream: Failed to find motif sequences') 242 if not line.startswith('---'): 243 raise ValueError("Line does not start with '---':\n%s" % line) 244 try: 245 line = next(handle) 246 except StopIteration: 247 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'") 248 if not line.startswith('Sequence name'): 249 raise ValueError("Line does not start with 'Sequence name':\n%s" % line) 250 try: 251 line = next(handle) 252 except StopIteration: 253 raise ValueError('Unexpected end of stream: Failed to find motif sequences') 254 if not line.startswith('---'): 255 raise ValueError("Line does not start with '---':\n%s" % line) 256 instances = [] 257 for line in handle: 258 if line.startswith('---'): 259 break 260 line = line.strip() 261 words = line.split() 262 if revcomp: 263 strand = words.pop(1) 264 else: 265 strand = '+' 266 sequence = words[4] 267 assert len(sequence)==length 268 instance = Instance(sequence, alphabet) 269 instance.motif_name = motif_name 270 instance.sequence_name = words[0] 271 instance.start = int(words[1]) 272 instance.pvalue = float(words[2]) 273 instance.strand = strand 274 instance.length = length 275 instances.append(instance) 276 else: 277 raise ValueError('Unexpected end of stream') 278 return motifs.Instances(instances, alphabet)
279 280
281 -def __skip_unused_lines(handle):
282 for line in handle: 283 if line.startswith('log-odds matrix'): 284 break 285 else: 286 raise ValueError("Unexpected end of stream: Expected to find line starting with 'log-odds matrix'") 287 for line in handle: 288 if line.startswith('---'): 289 break 290 else: 291 raise ValueError("Unexpected end of stream: Expected to find line starting with '---'") 292 for line in handle: 293 if line.startswith('letter-probability matrix'): 294 break 295 else: 296 raise ValueError("Unexpected end of stream: Expected to find line starting with 'letter-probability matrix'") 297 for line in handle: 298 if line.startswith('---'): 299 break 300 else: 301 raise ValueError("Unexpected end of stream: Expected to find line starting with '---'") 302 for line in handle: 303 if line.startswith('Time'): 304 break 305 else: 306 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Time'") 307 try: 308 line = next(handle) 309 except StopIteration: 310 raise ValueError('Unexpected end of stream: Expected to find blank line') 311 if line.strip(): 312 raise ValueError("Expected blank line, but got:\n%s" % line) 313 try: 314 line = next(handle) 315 except StopIteration: 316 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'") 317 if not line.startswith('***'): 318 raise ValueError("Line does not start with '***':\n%s" % line) 319 for line in handle: 320 if line.strip(): 321 break 322 else: 323 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'") 324 if not line.startswith('***'): 325 raise ValueError("Line does not start with '***':\n%s" % line)
326