Package Bio :: Package KEGG :: Package Enzyme
[hide private]
[frames] | no frames]

Source Code for Package Bio.KEGG.Enzyme

  1  # Copyright 2001 by Tarjei Mikkelsen.  All rights reserved. 
  2  # Copyright 2007 by Michiel de Hoon.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """Code to work with the KEGG Enzyme database. 
  8   
  9  Functions: 
 10  parse - Returns an iterator giving Record objects. 
 11   
 12  Classes: 
 13  Record               -- Holds the information from a KEGG Enzyme record. 
 14  """ 
 15   
 16  from __future__ import print_function 
 17   
 18  from Bio.KEGG import _write_kegg 
 19  from Bio.KEGG import _wrap_kegg 
 20   
 21   
 22  # Set up line wrapping rules (see Bio.KEGG._wrap_kegg) 
 23  rxn_wrap = [0, "", 
 24              (" + ", "", 1, 1), 
 25              (" = ", "", 1, 1), 
 26              (" ", "$", 1, 1), 
 27              ("-", "$", 1, 1)] 
 28  name_wrap = [0, "", 
 29               (" ", "$", 1, 1), 
 30               ("-", "$", 1, 1)] 
 31  id_wrap = lambda indent: [indent, "", (" ", "", 1, 0)] 
 32  struct_wrap = lambda indent: [indent, "", ("  ", "", 1, 1)] 
 33   
 34   
35 -class Record(object):
36 """Holds info from a KEGG Enzyme record. 37 38 Members: 39 entry The EC number (withou the 'EC '). 40 name A list of the enzyme names. 41 classname A list of the classification terms. 42 sysname The systematic name of the enzyme. 43 reaction A list of the reaction description strings. 44 substrate A list of the substrates. 45 product A list of the products. 46 inhibitor A list of the inhibitors. 47 cofactor A list of the cofactors. 48 effector A list of the effectors. 49 comment A list of the comment strings. 50 pathway A list of 3-tuples: (database, id, pathway) 51 genes A list of 2-tuples: (organism, list of gene ids) 52 disease A list of 3-tuples: (database, id, disease) 53 structures A list of 2-tuples: (database, list of struct ids) 54 dblinks A list of 2-tuples: (database, list of db ids) 55 """ 56
57 - def __init__(self):
58 """__init___(self) 59 60 Create a new Record. 61 """ 62 self.entry = "" 63 self.name = [] 64 self.classname = [] 65 self.sysname = [] 66 self.reaction = [] 67 self.substrate = [] 68 self.product = [] 69 self.inhibitor = [] 70 self.cofactor = [] 71 self.effector = [] 72 self.comment = [] 73 self.pathway = [] 74 self.genes = [] 75 self.disease = [] 76 self.structures = [] 77 self.dblinks = []
78
79 - def __str__(self):
80 """__str__(self) 81 82 Returns a string representation of this Record. 83 """ 84 return self._entry() + \ 85 self._name() + \ 86 self._classname() + \ 87 self._sysname() + \ 88 self._reaction() + \ 89 self._substrate() + \ 90 self._product() + \ 91 self._inhibitor() + \ 92 self._cofactor() + \ 93 self._effector() + \ 94 self._comment() + \ 95 self._pathway() + \ 96 self._genes() + \ 97 self._disease() + \ 98 self._structures() + \ 99 self._dblinks() + \ 100 "///"
101
102 - def _entry(self):
103 return _write_kegg("ENTRY", 104 ["EC " + self.entry])
105
106 - def _name(self):
107 return _write_kegg("NAME", 108 [_wrap_kegg(l, wrap_rule=name_wrap) 109 for l in self.name])
110
111 - def _classname(self):
112 return _write_kegg("CLASS", 113 self.classname)
114
115 - def _sysname(self):
116 return _write_kegg("SYSNAME", 117 [_wrap_kegg(l, wrap_rule=name_wrap) 118 for l in self.sysname])
119
120 - def _reaction(self):
121 return _write_kegg("REACTION", 122 [_wrap_kegg(l, wrap_rule=rxn_wrap) 123 for l in self.reaction])
124
125 - def _substrate(self):
126 return _write_kegg("SUBSTRATE", 127 [_wrap_kegg(l, wrap_rule=name_wrap) 128 for l in self.substrate])
129
130 - def _product(self):
131 return _write_kegg("PRODUCT", 132 [_wrap_kegg(l, wrap_rule=name_wrap) 133 for l in self.product])
134
135 - def _inhibitor(self):
136 return _write_kegg("INHIBITOR", 137 [_wrap_kegg(l, wrap_rule=name_wrap) 138 for l in self.inhibitor])
139
140 - def _cofactor(self):
141 return _write_kegg("COFACTOR", 142 [_wrap_kegg(l, wrap_rule=name_wrap) 143 for l in self.cofactor])
144
145 - def _effector(self):
146 return _write_kegg("EFFECTOR", 147 [_wrap_kegg(l, wrap_rule=name_wrap) 148 for l in self.effector])
149
150 - def _comment(self):
151 return _write_kegg("COMMENT", 152 [_wrap_kegg(l, wrap_rule=id_wrap(0)) 153 for l in self.comment])
154
155 - def _pathway(self):
156 s = [] 157 for entry in self.pathway: 158 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 159 return _write_kegg("PATHWAY", 160 [_wrap_kegg(l, wrap_rule=id_wrap(16)) 161 for l in s])
162
163 - def _genes(self):
164 s = [] 165 for entry in self.genes: 166 s.append(entry[0] + ": " + " ".join(entry[1])) 167 return _write_kegg("GENES", 168 [_wrap_kegg(l, wrap_rule=id_wrap(5)) 169 for l in s])
170
171 - def _disease(self):
172 s = [] 173 for entry in self.disease: 174 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 175 return _write_kegg("DISEASE", 176 [_wrap_kegg(l, wrap_rule=id_wrap(13)) 177 for l in s])
178
179 - def _structures(self):
180 s = [] 181 for entry in self.structures: 182 s.append(entry[0] + ": " + " ".join(entry[1]) + " ") 183 return _write_kegg("STRUCTURES", 184 [_wrap_kegg(l, wrap_rule=struct_wrap(5)) 185 for l in s])
186
196 197
198 -def parse(handle):
199 """Parse a KEGG Enzyme file, returning Record objects. 200 201 This is an iterator function, typically used in a for loop. For 202 example, using one of the example KEGG files in the Biopython 203 test suite, 204 205 >>> with open("KEGG/enzyme.sample") as handle: 206 ... for record in parse(handle): 207 ... print("%s %s" % (record.entry, record.name[0])) 208 ... 209 1.1.1.1 Alcohol dehydrogenase 210 1.1.1.62 Estradiol 17beta-dehydrogenase 211 1.1.1.68 Transferred to EC 1.7.99.5 212 1.6.5.3 NADH dehydrogenase (ubiquinone) 213 1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase 214 2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase 215 3.1.1.6 Acetylesterase 216 2.7.2.1 Acetate kinase 217 218 """ 219 record = Record() 220 for line in handle: 221 if line[:3] == "///": 222 yield record 223 record = Record() 224 continue 225 if line[:12] != " ": 226 keyword = line[:12] 227 data = line[12:].strip() 228 if keyword == "ENTRY ": 229 words = data.split() 230 record.entry = words[1] 231 elif keyword == "CLASS ": 232 record.classname.append(data) 233 elif keyword == "COFACTOR ": 234 record.cofactor.append(data) 235 elif keyword == "COMMENT ": 236 record.comment.append(data) 237 elif keyword == "DBLINKS ": 238 if ":" in data: 239 key, values = data.split(":") 240 values = values.split() 241 row = (key, values) 242 record.dblinks.append(row) 243 else: 244 row = record.dblinks[-1] 245 key, values = row 246 values.extend(data.split()) 247 row = key, values 248 record.dblinks[-1] = row 249 elif keyword == "DISEASE ": 250 if ":" in data: 251 database, data = data.split(":") 252 number, name = data.split(None, 1) 253 row = (database, number, name) 254 record.disease.append(row) 255 else: 256 row = record.disease[-1] 257 database, number, name = row 258 name = name + " " + data 259 row = database, number, name 260 record.disease[-1] = row 261 elif keyword == "EFFECTOR ": 262 record.effector.append(data.strip(";")) 263 elif keyword == "GENES ": 264 if data[3:5] == ': ' or data[4:6] == ': ': 265 key, values = data.split(":", 1) 266 values = [value.split("(")[0] for value in values.split()] 267 row = (key, values) 268 record.genes.append(row) 269 else: 270 row = record.genes[-1] 271 key, values = row 272 for value in data.split(): 273 value = value.split("(")[0] 274 values.append(value) 275 row = key, values 276 record.genes[-1] = row 277 elif keyword == "INHIBITOR ": 278 record.inhibitor.append(data.strip(";")) 279 elif keyword == "NAME ": 280 record.name.append(data.strip(";")) 281 elif keyword == "PATHWAY ": 282 if data[:5] == 'PATH:': 283 _, map_num, name = data.split(None, 2) 284 pathway = ('PATH', map_num, name) 285 record.pathway.append(pathway) 286 else: 287 ec_num, name = data.split(None, 1) 288 pathway = 'PATH', ec_num, name 289 record.pathway.append(pathway) 290 elif keyword == "PRODUCT ": 291 record.product.append(data.strip(";")) 292 elif keyword == "REACTION ": 293 record.reaction.append(data.strip(";")) 294 elif keyword == "STRUCTURES ": 295 if data[:4] == 'PDB:': 296 database = data[:3] 297 accessions = data[4:].split() 298 row = (database, accessions) 299 record.structures.append(row) 300 else: 301 row = record.structures[-1] 302 database, accessions = row 303 accessions.extend(data.split()) 304 row = (database, accessions) 305 record.structures[-1] = row 306 elif keyword == "SUBSTRATE ": 307 record.substrate.append(data.strip(";")) 308 elif keyword == "SYSNAME ": 309 record.sysname.append(data.strip(";"))
310 311
312 -def read(handle):
313 """Parse a KEGG Enzyme file with exactly one entry. 314 315 If the handle contains no records, or more than one record, 316 an exception is raised. For example: 317 318 >>> with open("KEGG/enzyme.new") as handle: 319 ... record = read(handle) 320 ... print("%s %s" % (record.entry, record.name[0])) 321 ... 322 6.2.1.25 benzoate---CoA ligase 323 """ 324 iterator = parse(handle) 325 try: 326 first = next(iterator) 327 except StopIteration: 328 first = None 329 if first is None: 330 raise ValueError("No records found in handle") 331 try: 332 second = next(iterator) 333 except StopIteration: 334 second = None 335 if second is not None: 336 raise ValueError("More than one record found in handle") 337 return first
338 339 340 if __name__ == "__main__": 341 from Bio._utils import run_doctest 342 run_doctest() 343