Package Bio :: Package SCOP :: Module Cla
[hide private]
[frames] | no frames]

Source Code for Module Bio.SCOP.Cla

  1  # Copyright 2001 by Gavin E. Crooks.  All rights reserved. 
  2  # Modifications Copyright 2010 Jeffrey Finkelstein. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """ Handle the SCOP CLAssification file, which describes SCOP domains. 
  9   
 10  The file format is described in the scop 
 11  "release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html 
 12  The latest CLA file can be found 
 13  "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ 
 14   
 15  "Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73 
 16  (July 2008) 
 17   
 18  """ 
 19   
 20  from . import Residues 
 21   
 22   
23 -class Record(object):
24 """Holds information for one SCOP domain. 25 26 Attributes: 27 28 - sid - SCOP identifier. e.g. d1danl2 29 - residues - The domain definition as a Residues object 30 - sccs - SCOP concise classification strings. e.g. b.1.2.1 31 - sunid - SCOP unique identifier for this domain 32 - hierarchy - A dictionary, keys are nodetype, values are sunid, 33 describing the location of this domain in the SCOP hierarchy. See 34 the Scop module for a description of nodetypes. This used to be a 35 list of (key,value) tuples in older versions of Biopython (see 36 Bug 3109). 37 """
38 - def __init__(self, line=None):
39 self.sid = '' 40 self.residues = None 41 self.sccs = '' 42 self.sunid = '' 43 self.hierarchy = {} 44 if line: 45 self._process(line)
46
47 - def _process(self, line):
48 line = line.rstrip() # no trailing whitespace 49 columns = line.split('\t') # separate the tab-delineated cols 50 if len(columns) != 6: 51 raise ValueError("I don't understand the format of %s" % line) 52 53 self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns 54 self.residues = Residues.Residues(residues) 55 self.residues.pdbid = pdbid 56 self.sunid = int(self.sunid) 57 58 for ht in hierarchy.split(","): 59 key, value = ht.split('=') 60 self.hierarchy[key] = int(value)
61
62 - def __str__(self):
63 s = [] 64 s.append(self.sid) 65 s += str(self.residues).split(" ") 66 s.append(self.sccs) 67 s.append(self.sunid) 68 69 s.append(','.join('='.join((key, str(value))) for key, value 70 in self.hierarchy.items())) 71 72 return "\t".join(map(str, s)) + "\n"
73 74
75 -def parse(handle):
76 """Iterates over a CLA file as Cla records for each line. 77 78 Arguments: 79 80 - handle - file-like object. 81 """ 82 for line in handle: 83 if line.startswith('#'): 84 continue 85 yield Record(line)
86 87
88 -class Index(dict):
89 """A CLA file indexed by SCOP identifiers for rapid random access."""
90 - def __init__(self, filename):
91 """Create CLA index. 92 93 Arguments: 94 95 - filename - The file to index 96 """ 97 dict.__init__(self) 98 self.filename = filename 99 with open(self.filename, "rU") as f: 100 position = 0 101 while True: 102 line = f.readline() 103 if not line: 104 break 105 if line.startswith('#'): 106 continue 107 record = Record(line) 108 key = record.sid 109 if key is not None: 110 self[key] = position 111 position = f.tell()
112
113 - def __getitem__(self, key):
114 """Return an item from the indexed file.""" 115 position = dict.__getitem__(self, key) 116 117 with open(self.filename, "rU") as f: 118 f.seek(position) 119 line = f.readline() 120 record = Record(line) 121 return record
122