Package Bio :: Module Index
[hide private]
[frames] | no frames]

Source Code for Module Bio.Index

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Index.py 
  7   
  8  This module provides a way to create indexes to text files. 
  9   
 10  Classes: 
 11  Index     Dictionary-like class used to store index information. 
 12   
 13  _ShelveIndex    An Index class based on the shelve module. 
 14  _InMemoryIndex  An in-memory Index class. 
 15   
 16  """ 
 17   
 18  import os 
 19  import array 
 20  import shelve 
 21   
 22   
 23  try: 
 24      import cPickle as pickle  # Only available under Python 2 
 25  except ImportError: 
 26      import pickle  # Python 3 
 27   
 28   
29 -class _ShelveIndex(dict):
30 """An index file wrapped around shelve.""" 31 32 # Without a good dbm module installed, this is pretty slow and 33 # generates large files. When generating an index on a FASTA- 34 # formatted file with 82000 sequences (37Mb), the 35 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 36 37 __version = 2 38 __version_key = '__version' 39
40 - def __init__(self, indexname, truncate=None):
41 dict.__init__(self) 42 try: 43 if truncate: 44 # In python 1.52 and before, dumbdbm (under shelve) 45 # doesn't clear the old database. 46 files = [indexname + '.dir', 47 indexname + '.dat', 48 indexname + '.bak' 49 ] 50 for file in files: 51 if os.path.exists(file): 52 os.unlink(file) 53 raise Exception("open a new shelf") 54 self.data = shelve.open(indexname, flag='r') 55 except Exception: # TODO: Which exception? 56 # No database exists. 57 self.data = shelve.open(indexname, flag='n') 58 self.data[self.__version_key] = self.__version 59 else: 60 # Check to make sure the database is the correct version. 61 version = self.data.get(self.__version_key) 62 if version is None: 63 raise IOError("Unrecognized index format") 64 elif version != self.__version: 65 raise IOError("Version %s doesn't match my version %s" 66 % (version, self.__version))
67
68 - def __del__(self):
69 if 'data' in self.__dict__: 70 self.data.close()
71 72
73 -class _InMemoryIndex(dict):
74 """Creates an in-memory index file (PRIVATE).""" 75 76 # File Format: 77 # version 78 # key value 79 # [...] 80 81 __version = 3 82 __version_key = '__version' 83
84 - def __init__(self, indexname, truncate=None):
85 self._indexname = indexname 86 dict.__init__(self) 87 self.__changed = 0 # the index hasn't changed 88 89 # Remove the database if truncate is true. 90 if truncate and os.path.exists(indexname): 91 os.unlink(indexname) 92 self.__changed = 1 93 94 # Load the database if it exists 95 if os.path.exists(indexname): 96 with open(indexname) as handle: 97 version = self._toobj(handle.readline().rstrip()) 98 if version != self.__version: 99 raise IOError("Version %s doesn't match my version %s" 100 % (version, self.__version)) 101 for line in handle: 102 key, value = line.split() 103 key, value = self._toobj(key), self._toobj(value) 104 self[key] = value 105 self.__changed = 0
106
107 - def update(self, dict):
108 self.__changed = 1 109 dict.update(self, dict)
110
111 - def __setitem__(self, key, value):
112 self.__changed = 1 113 dict.__setitem__(self, key, value)
114
115 - def __delitem__(self, key):
116 self.__changed = 1 117 dict.__delitem__(self, key)
118
119 - def clear(self):
120 self.__changed = 1 121 dict.clear(self)
122
123 - def __del__(self):
124 if self.__changed: 125 with open(self._indexname, 'w') as handle: 126 handle.write("%s\n" % self._tostr(self.__version)) 127 for key, value in self.items(): 128 handle.write("%s %s\n" % 129 (self._tostr(key), self._tostr(value)))
130
131 - def _tostr(self, obj):
132 # I need a representation of the object that's saveable to 133 # a file that uses whitespace as delimiters. Thus, I'm 134 # going to pickle the object, and then convert each character of 135 # the string to its ASCII integer value. Then, I'm going to convert 136 # the integers into strings and join them together with commas. 137 # It's not the most efficient way of storing things, but it's 138 # relatively fast. 139 s = pickle.dumps(obj) 140 intlist = array.array('b', s) 141 return ','.join(str(i) for i in intlist)
142
143 - def _toobj(self, str):
144 intlist = [int(i) for i in str.split(',')] 145 intlist = array.array('b', intlist) 146 return pickle.loads(''.join(chr(i) for i in intlist))
147 148 149 Index = _InMemoryIndex 150