Package Bio :: Package motifs :: Module mast
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.mast

  1  # Copyright 2008 by Bartek Wilczynski. 
  2  # Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from __future__ import print_function 
  8   
  9  from Bio.Alphabet import IUPAC 
 10  from Bio.motifs import meme 
 11   
 12   
13 -class Record(list):
14 """The class for holding the results from a MAST run. 15 16 A mast.Record holds data about matches between motifs and sequences. 17 The motifs held by the Record are objects of the class meme.Motif. 18 19 The mast.Record class inherits from list, so you can access individual 20 motifs in the record by their index. Alternatively, you can find a motif 21 by its name: 22 23 >>> from Bio import motifs 24 >>> with open("mast.output.txt") as f: 25 ... record = motifs.parse(f, 'MAST') 26 >>> motif = record[0] 27 >>> print(motif.name) 28 1 29 >>> motif = record['1'] 30 >>> print(motif.name) 31 1 32 """ 33
34 - def __init__(self):
35 self.sequences = [] 36 self.version = "" 37 self.database = "" 38 self.diagrams = {} 39 self.alphabet = None
40
41 - def __getitem__(self, key):
42 if isinstance(key, str): 43 for motif in self: 44 if motif.name==key: 45 return motif 46 else: 47 return list.__getitem__(self, key)
48 49
50 -def read(handle):
51 """read(handle)""" 52 record = Record() 53 __read_version(record, handle) 54 __read_database_and_motifs(record, handle) 55 __read_section_i(record, handle) 56 __read_section_ii(record, handle) 57 __read_section_iii(record, handle) 58 return record
59 60 61 # Everything below is private 62 63
64 -def __read_version(record, handle):
65 for line in handle: 66 if "MAST version" in line: 67 break 68 else: 69 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 70 record.version = line.strip().split()[2]
71 72
73 -def __read_database_and_motifs(record, handle):
74 for line in handle: 75 if line.startswith('DATABASE AND MOTIFS'): 76 break 77 line = next(handle) 78 if not line.startswith('****'): 79 raise ValueError("Line does not start with '****':\n%s" % line) 80 line = next(handle) 81 if not 'DATABASE' in line: 82 raise ValueError("Line does not contain 'DATABASE':\n%s" % line) 83 words = line.strip().split() 84 record.database = words[1] 85 if words[2] == '(nucleotide)': 86 record.alphabet = IUPAC.unambiguous_dna 87 elif words[2] == '(peptide)': 88 record.alphabet = IUPAC.protein 89 for line in handle: 90 if 'MOTIF WIDTH' in line: 91 break 92 line = next(handle) 93 if not '----' in line: 94 raise ValueError("Line does not contain '----':\n%s" % line) 95 for line in handle: 96 if not line.strip(): 97 break 98 words = line.strip().split() 99 motif = meme.Motif(record.alphabet) 100 motif.name = words[0] 101 motif.length = int(words[1]) 102 # words[2] contains the best possible match 103 record.append(motif)
104 105
106 -def __read_section_i(record, handle):
107 for line in handle: 108 if line.startswith('SECTION I:'): 109 break 110 for line in handle: 111 if line.startswith('SEQUENCE NAME'): 112 break 113 line = next(handle) 114 if not line.startswith('---'): 115 raise ValueError("Line does not start with '---':\n%s" % line) 116 for line in handle: 117 if not line.strip(): 118 break 119 else: 120 sequence, description_evalue_length = line.split(None, 1) 121 record.sequences.append(sequence) 122 line = next(handle) 123 if not line.startswith('****'): 124 raise ValueError("Line does not start with '****':\n%s" % line)
125 126
127 -def __read_section_ii(record, handle):
128 for line in handle: 129 if line.startswith('SECTION II:'): 130 break 131 for line in handle: 132 if line.startswith('SEQUENCE NAME'): 133 break 134 line = next(handle) 135 if not line.startswith('---'): 136 raise ValueError("Line does not start with '---':\n%s" % line) 137 for line in handle: 138 if not line.strip(): 139 break 140 elif line.startswith(" "): 141 diagram = line.strip() 142 record.diagrams[sequence] += diagram 143 else: 144 sequence, pvalue, diagram = line.split() 145 record.diagrams[sequence] = diagram 146 line = next(handle) 147 if not line.startswith('****'): 148 raise ValueError("Line does not start with '****':\n%s" % line)
149 150
151 -def __read_section_iii(record, handle):
152 for line in handle: 153 if line.startswith('SECTION III:'): 154 break 155 for line in handle: 156 if line.startswith('****'): 157 break 158 for line in handle: 159 if line.startswith('*****'): 160 break 161 for line in handle: 162 if line.strip(): 163 break
164