Package Bio :: Package motifs :: Module mast
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.mast

  1  # Copyright 2008 by Bartek Wilczynski. 
  2  # Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from __future__ import print_function 
  8   
  9  from Bio.Alphabet import IUPAC 
 10  from Bio.motifs import meme 
 11   
 12   
13 -class Record(list):
14 """The class for holding the results from a MAST run. 15 16 A mast.Record holds data about matches between motifs and sequences. 17 The motifs held by the Record are objects of the class meme.Motif. 18 19 The mast.Record class inherits from list, so you can access individual 20 motifs in the record by their index. Alternatively, you can find a motif 21 by its name: 22 23 >>> from Bio import motifs 24 >>> with open("mast.output.txt") as f: 25 ... record = motifs.parse(f, 'MAST') 26 >>> motif = record[0] 27 >>> print(motif.name) 28 1 29 >>> motif = record['1'] 30 >>> print(motif.name) 31 1 32 """ 33
34 - def __init__(self):
35 self.sequences = [] 36 self.version = "" 37 self.database = "" 38 self.diagrams = {} 39 self.alphabet = None
40
41 - def __getitem__(self, key):
42 if isinstance(key, str): 43 for motif in self: 44 if motif.name == key: 45 return motif 46 else: 47 return list.__getitem__(self, key)
48 49
50 -def read(handle):
51 """read(handle)""" 52 record = Record() 53 __read_version(record, handle) 54 __read_database_and_motifs(record, handle) 55 __read_section_i(record, handle) 56 __read_section_ii(record, handle) 57 __read_section_iii(record, handle) 58 return record
59 60 61 # Everything below is private 62 63
64 -def __read_version(record, handle):
65 for line in handle: 66 if "MAST version" in line: 67 break 68 else: 69 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 70 record.version = line.strip().split()[2]
71 72
73 -def __read_database_and_motifs(record, handle):
74 for line in handle: 75 if line.startswith('DATABASE AND MOTIFS'): 76 break 77 line = next(handle) 78 if not line.startswith('****'): 79 raise ValueError("Line does not start with '****':\n%s" % line) 80 line = next(handle) 81 if 'DATABASE' not in line: 82 raise ValueError("Line does not contain 'DATABASE':\n%s" % line) 83 words = line.strip().split() 84 record.database = words[1] 85 if words[2] == '(nucleotide)': 86 record.alphabet = IUPAC.unambiguous_dna 87 elif words[2] == '(peptide)': 88 record.alphabet = IUPAC.protein 89 for line in handle: 90 if 'MOTIF WIDTH' in line: 91 break 92 line = next(handle) 93 if '----' not in line: 94 raise ValueError("Line does not contain '----':\n%s" % line) 95 for line in handle: 96 if not line.strip(): 97 break 98 words = line.strip().split() 99 motif = meme.Motif(record.alphabet) 100 motif.name = words[0] 101 motif.length = int(words[1]) 102 # words[2] contains the best possible match 103 record.append(motif)
104 105
106 -def __read_section_i(record, handle):
107 for line in handle: 108 if line.startswith('SECTION I:'): 109 break 110 for line in handle: 111 if line.startswith('SEQUENCE NAME'): 112 break 113 line = next(handle) 114 if not line.startswith('---'): 115 raise ValueError("Line does not start with '---':\n%s" % line) 116 for line in handle: 117 if not line.strip(): 118 break 119 else: 120 sequence, description_evalue_length = line.split(None, 1) 121 record.sequences.append(sequence) 122 line = next(handle) 123 if not line.startswith('****'): 124 raise ValueError("Line does not start with '****':\n%s" % line)
125 126
127 -def __read_section_ii(record, handle):
128 for line in handle: 129 if line.startswith('SECTION II:'): 130 break 131 for line in handle: 132 if line.startswith('SEQUENCE NAME'): 133 break 134 line = next(handle) 135 if not line.startswith('---'): 136 raise ValueError("Line does not start with '---':\n%s" % line) 137 sequence = None 138 for line in handle: 139 if not line.strip(): 140 break 141 elif line.startswith(" "): 142 diagram = line.strip() 143 record.diagrams[sequence] += diagram 144 else: 145 sequence, pvalue, diagram = line.split() 146 record.diagrams[sequence] = diagram 147 line = next(handle) 148 if not line.startswith('****'): 149 raise ValueError("Line does not start with '****':\n%s" % line)
150 151
152 -def __read_section_iii(record, handle):
153 for line in handle: 154 if line.startswith('SECTION III:'): 155 break 156 for line in handle: 157 if line.startswith('****'): 158 break 159 for line in handle: 160 if line.startswith('*****'): 161 break 162 for line in handle: 163 if line.strip(): 164 break
165