Package Bio :: Module Index
[hide private]
[frames] | no frames]

Source Code for Module Bio.Index

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Index.py 
  7   
  8  This module provides a way to create indexes to text files. 
  9   
 10  Classes: 
 11  Index     Dictionary-like class used to store index information. 
 12   
 13  _ShelveIndex    An Index class based on the shelve module. 
 14  _InMemoryIndex  An in-memory Index class. 
 15   
 16  """ 
 17   
 18  import os 
 19  import array 
 20  import shelve 
 21   
 22   
 23  try: 
 24      import cPickle as pickle  # Only available under Python 2 
 25  except ImportError: 
 26      import pickle  # Python 3 
 27   
 28   
29 -class _ShelveIndex(dict):
30 """An index file wrapped around shelve. 31 32 """ 33 # Without a good dbm module installed, this is pretty slow and 34 # generates large files. When generating an index on a FASTA- 35 # formatted file with 82000 sequences (37Mb), the 36 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 37 38 __version = 2 39 __version_key = '__version' 40
41 - def __init__(self, indexname, truncate=None):
42 dict.__init__(self) 43 try: 44 if truncate: 45 # In python 1.52 and before, dumbdbm (under shelve) 46 # doesn't clear the old database. 47 files = [indexname + '.dir', 48 indexname + '.dat', 49 indexname + '.bak' 50 ] 51 for file in files: 52 if os.path.exists(file): 53 os.unlink(file) 54 raise Exception("open a new shelf") 55 self.data = shelve.open(indexname, flag='r') 56 except Exception: # TODO: Which exception? 57 # No database exists. 58 self.data = shelve.open(indexname, flag='n') 59 self.data[self.__version_key] = self.__version 60 else: 61 # Check to make sure the database is the correct version. 62 version = self.data.get(self.__version_key) 63 if version is None: 64 raise IOError("Unrecognized index format") 65 elif version != self.__version: 66 raise IOError("Version %s doesn't match my version %s" 67 % (version, self.__version))
68
69 - def __del__(self):
70 if 'data' in self.__dict__: 71 self.data.close()
72 73
74 -class _InMemoryIndex(dict):
75 """This creates an in-memory index file. 76 77 """ 78 # File Format: 79 # version 80 # key value 81 # [...] 82 83 __version = 3 84 __version_key = '__version' 85
86 - def __init__(self, indexname, truncate=None):
87 self._indexname = indexname 88 dict.__init__(self) 89 self.__changed = 0 # the index hasn't changed 90 91 # Remove the database if truncate is true. 92 if truncate and os.path.exists(indexname): 93 os.unlink(indexname) 94 self.__changed = 1 95 96 # Load the database if it exists 97 if os.path.exists(indexname): 98 with open(indexname) as handle: 99 version = self._toobj(handle.readline().rstrip()) 100 if version != self.__version: 101 raise IOError("Version %s doesn't match my version %s" 102 % (version, self.__version)) 103 for line in handle: 104 key, value = line.split() 105 key, value = self._toobj(key), self._toobj(value) 106 self[key] = value 107 self.__changed = 0
108
109 - def update(self, dict):
110 self.__changed = 1 111 dict.update(self, dict)
112
113 - def __setitem__(self, key, value):
114 self.__changed = 1 115 dict.__setitem__(self, key, value)
116
117 - def __delitem__(self, key):
118 self.__changed = 1 119 dict.__delitem__(self, key)
120
121 - def clear(self):
122 self.__changed = 1 123 dict.clear(self)
124
125 - def __del__(self):
126 if self.__changed: 127 with open(self._indexname, 'w') as handle: 128 handle.write("%s\n" % self._tostr(self.__version)) 129 for key, value in self.items(): 130 handle.write("%s %s\n" % 131 (self._tostr(key), self._tostr(value)))
132
133 - def _tostr(self, obj):
134 # I need a representation of the object that's saveable to 135 # a file that uses whitespace as delimiters. Thus, I'm 136 # going to pickle the object, and then convert each character of 137 # the string to its ASCII integer value. Then, I'm going to convert 138 # the integers into strings and join them together with commas. 139 # It's not the most efficient way of storing things, but it's 140 # relatively fast. 141 s = pickle.dumps(obj) 142 intlist = array.array('b', s) 143 return ','.join(str(i) for i in intlist)
144
145 - def _toobj(self, str):
146 intlist = [int(i) for i in str.split(',')] 147 intlist = array.array('b', intlist) 148 return pickle.loads(''.join(chr(i) for i in intlist))
149 150 Index = _InMemoryIndex 151