Package Bio :: Package SCOP :: Module Cla
[hide private]
[frames] | no frames]

Source Code for Module Bio.SCOP.Cla

  1  # Copyright 2001 by Gavin E. Crooks.  All rights reserved. 
  2  # Modifications Copyright 2010 Jeffrey Finkelstein. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Handle the SCOP CLAssification file, which describes SCOP domains. 
  9   
 10  The file format is described in the scop 
 11  "release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html 
 12  The latest CLA file can be found 
 13  "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ 
 14   
 15  "Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73 
 16  (July 2008) 
 17   
 18  """ 
 19   
 20  from . import Residues 
 21   
 22   
23 -class Record(object):
24 """Holds information for one SCOP domain. 25 26 Attributes: 27 - sid - SCOP identifier. e.g. d1danl2 28 - residues - The domain definition as a Residues object 29 - sccs - SCOP concise classification strings. e.g. b.1.2.1 30 - sunid - SCOP unique identifier for this domain 31 - hierarchy - A dictionary, keys are nodetype, values are sunid, 32 describing the location of this domain in the SCOP hierarchy. See 33 the Scop module for a description of nodetypes. This used to be a 34 list of (key,value) tuples in older versions of Biopython (see 35 Bug 3109). 36 37 """ 38
39 - def __init__(self, line=None):
40 self.sid = '' 41 self.residues = None 42 self.sccs = '' 43 self.sunid = '' 44 self.hierarchy = {} 45 if line: 46 self._process(line)
47
48 - def _process(self, line):
49 line = line.rstrip() # no trailing whitespace 50 columns = line.split('\t') # separate the tab-delineated cols 51 if len(columns) != 6: 52 raise ValueError("I don't understand the format of %s" % line) 53 54 self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns 55 self.residues = Residues.Residues(residues) 56 self.residues.pdbid = pdbid 57 self.sunid = int(self.sunid) 58 59 for ht in hierarchy.split(","): 60 key, value = ht.split('=') 61 self.hierarchy[key] = int(value)
62
63 - def __str__(self):
64 s = [] 65 s.append(self.sid) 66 s += str(self.residues).split(" ") 67 s.append(self.sccs) 68 s.append(self.sunid) 69 70 s.append(','.join('='.join((key, str(value))) for key, value 71 in self.hierarchy.items())) 72 73 return "\t".join(map(str, s)) + "\n"
74 75
76 -def parse(handle):
77 """Iterates over a CLA file as Cla records for each line. 78 79 Arguments: 80 - handle - file-like object. 81 82 """ 83 for line in handle: 84 if line.startswith('#'): 85 continue 86 yield Record(line)
87 88
89 -class Index(dict):
90 """A CLA file indexed by SCOP identifiers for rapid random access.""" 91
92 - def __init__(self, filename):
93 """Create CLA index. 94 95 Arguments: 96 - filename - The file to index 97 98 """ 99 dict.__init__(self) 100 self.filename = filename 101 with open(self.filename, "rU") as f: 102 position = 0 103 while True: 104 line = f.readline() 105 if not line: 106 break 107 if line.startswith('#'): 108 continue 109 record = Record(line) 110 key = record.sid 111 if key is not None: 112 self[key] = position 113 position = f.tell()
114
115 - def __getitem__(self, key):
116 """Return an item from the indexed file.""" 117 position = dict.__getitem__(self, key) 118 119 with open(self.filename, "rU") as f: 120 f.seek(position) 121 line = f.readline() 122 record = Record(line) 123 return record
124