Package Bio :: Module Index
[hide private]
[frames] | no frames]

Source Code for Module Bio.Index

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Index.py 
  7   
  8  This module provides a way to create indexes to text files. 
  9   
 10  Classes: 
 11  Index     Dictionary-like class used to store index information. 
 12   
 13  _ShelveIndex    An Index class based on the shelve module. 
 14  _InMemoryIndex  An in-memory Index class. 
 15   
 16  """ 
 17  import os 
 18  import array 
 19  import shelve 
 20   
 21  try: 
 22      import cPickle as pickle  # Only available under Python 2 
 23  except ImportError: 
 24      import pickle  # Python 3 
 25   
 26   
27 -class _ShelveIndex(dict):
28 """An index file wrapped around shelve. 29 30 """ 31 # Without a good dbm module installed, this is pretty slow and 32 # generates large files. When generating an index on a FASTA- 33 # formatted file with 82000 sequences (37Mb), the 34 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 35 36 __version = 2 37 __version_key = '__version' 38
39 - def __init__(self, indexname, truncate=None):
40 dict.__init__(self) 41 try: 42 if truncate: 43 # In python 1.52 and before, dumbdbm (under shelve) 44 # doesn't clear the old database. 45 files = [indexname + '.dir', 46 indexname + '.dat', 47 indexname + '.bak' 48 ] 49 for file in files: 50 if os.path.exists(file): 51 os.unlink(file) 52 raise Exception("open a new shelf") 53 self.data = shelve.open(indexname, flag='r') 54 except: 55 # No database exists. 56 self.data = shelve.open(indexname, flag='n') 57 self.data[self.__version_key] = self.__version 58 else: 59 # Check to make sure the database is the correct version. 60 version = self.data.get(self.__version_key, None) 61 if version is None: 62 raise IOError("Unrecognized index format") 63 elif version != self.__version: 64 raise IOError("Version %s doesn't match my version %s" 65 % (version, self.__version))
66
67 - def __del__(self):
68 if 'data' in self.__dict__: 69 self.data.close()
70 71
72 -class _InMemoryIndex(dict):
73 """This creates an in-memory index file. 74 75 """ 76 # File Format: 77 # version 78 # key value 79 # [...] 80 81 __version = 3 82 __version_key = '__version' 83
84 - def __init__(self, indexname, truncate=None):
85 self._indexname = indexname 86 dict.__init__(self) 87 self.__changed = 0 # the index hasn't changed 88 89 # Remove the database if truncate is true. 90 if truncate and os.path.exists(indexname): 91 os.unlink(indexname) 92 self.__changed = 1 93 94 # Load the database if it exists 95 if os.path.exists(indexname): 96 with open(indexname) as handle: 97 version = self._toobj(handle.readline().rstrip()) 98 if version != self.__version: 99 raise IOError("Version %s doesn't match my version %s" 100 % (version, self.__version)) 101 for line in handle: 102 key, value = line.split() 103 key, value = self._toobj(key), self._toobj(value) 104 self[key] = value 105 self.__changed = 0
106
107 - def update(self, dict):
108 self.__changed = 1 109 dict.update(self, dict)
110
111 - def __setitem__(self, key, value):
112 self.__changed = 1 113 dict.__setitem__(self, key, value)
114
115 - def __delitem__(self, key):
116 self.__changed = 1 117 dict.__delitem__(self, key)
118
119 - def clear(self):
120 self.__changed = 1 121 dict.clear(self)
122
123 - def __del__(self):
124 if self.__changed: 125 with open(self._indexname, 'w') as handle: 126 handle.write("%s\n" % self._tostr(self.__version)) 127 for key, value in self.items(): 128 handle.write("%s %s\n" % 129 (self._tostr(key), self._tostr(value)))
130
131 - def _tostr(self, obj):
132 # I need a representation of the object that's saveable to 133 # a file that uses whitespace as delimiters. Thus, I'm 134 # going to pickle the object, and then convert each character of 135 # the string to its ASCII integer value. Then, I'm going to convert 136 # the integers into strings and join them together with commas. 137 # It's not the most efficient way of storing things, but it's 138 # relatively fast. 139 s = pickle.dumps(obj) 140 intlist = array.array('b', s) 141 return ','.join(str(i) for i in intlist)
142
143 - def _toobj(self, str):
144 intlist = [int(i) for i in str.split(',')] 145 intlist = array.array('b', intlist) 146 return pickle.loads(''.join(chr(i) for i in intlist))
147 148 Index = _InMemoryIndex 149