Package Bio :: Module Index
[hide private]
[frames] | no frames]

Source Code for Module Bio.Index

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Index.py 
  7   
  8  This module provides a way to create indexes to text files. 
  9   
 10  Classes: 
 11  Index     Dictionary-like class used to store index information. 
 12   
 13  _ShelveIndex    An Index class based on the shelve module. 
 14  _InMemoryIndex  An in-memory Index class. 
 15   
 16  """ 
 17  import os 
 18  import array 
 19  import shelve 
 20   
 21  try: 
 22      import cPickle as pickle # Only available under Python 2 
 23  except ImportError: 
 24      import pickle # Python 3 
 25   
26 -class _ShelveIndex(dict):
27 """An index file wrapped around shelve. 28 29 """ 30 # Without a good dbm module installed, this is pretty slow and 31 # generates large files. When generating an index on a FASTA- 32 # formatted file with 82000 sequences (37Mb), the 33 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 34 35 __version = 2 36 __version_key = '__version' 37
38 - def __init__(self, indexname, truncate=None):
39 dict.__init__(self) 40 try: 41 if truncate: 42 # In python 1.52 and before, dumbdbm (under shelve) 43 # doesn't clear the old database. 44 files = [indexname + '.dir', 45 indexname + '.dat', 46 indexname + '.bak' 47 ] 48 for file in files: 49 if os.path.exists(file): 50 os.unlink(file) 51 raise Exception("open a new shelf") 52 self.data = shelve.open(indexname, flag='r') 53 except: 54 # No database exists. 55 self.data = shelve.open(indexname, flag='n') 56 self.data[self.__version_key] = self.__version 57 else: 58 # Check to make sure the database is the correct version. 59 version = self.data.get(self.__version_key, None) 60 if version is None: 61 raise IOError("Unrecognized index format") 62 elif version != self.__version: 63 raise IOError("Version %s doesn't match my version %s" 64 % (version, self.__version))
65
66 - def __del__(self):
67 if 'data' in self.__dict__: 68 self.data.close()
69 70
71 -class _InMemoryIndex(dict):
72 """This creates an in-memory index file. 73 74 """ 75 # File Format: 76 # version 77 # key value 78 # [...] 79 80 __version = 3 81 __version_key = '__version' 82
83 - def __init__(self, indexname, truncate=None):
84 self._indexname = indexname 85 dict.__init__(self) 86 self.__changed = 0 # the index hasn't changed 87 88 # Remove the database if truncate is true. 89 if truncate and os.path.exists(indexname): 90 os.unlink(indexname) 91 self.__changed = 1 92 93 # Load the database if it exists 94 if os.path.exists(indexname): 95 with open(indexname) as handle: 96 version = self._toobj(handle.readline().rstrip()) 97 if version != self.__version: 98 raise IOError("Version %s doesn't match my version %s" 99 % (version, self.__version)) 100 for line in handle: 101 key, value = line.split() 102 key, value = self._toobj(key), self._toobj(value) 103 self[key] = value 104 self.__changed = 0
105
106 - def update(self, dict):
107 self.__changed = 1 108 dict.update(self, dict)
109
110 - def __setitem__(self, key, value):
111 self.__changed = 1 112 dict.__setitem__(self, key, value)
113
114 - def __delitem__(self, key):
115 self.__changed = 1 116 dict.__delitem__(self, key)
117
118 - def clear(self):
119 self.__changed = 1 120 dict.clear(self)
121
122 - def __del__(self):
123 if self.__changed: 124 with open(self._indexname, 'w') as handle: 125 handle.write("%s\n" % self._tostr(self.__version)) 126 for key, value in self.items(): 127 handle.write("%s %s\n" % 128 (self._tostr(key), self._tostr(value)))
129
130 - def _tostr(self, obj):
131 # I need a representation of the object that's saveable to 132 # a file that uses whitespace as delimiters. Thus, I'm 133 # going to pickle the object, and then convert each character of 134 # the string to its ASCII integer value. Then, I'm going to convert 135 # the integers into strings and join them together with commas. 136 # It's not the most efficient way of storing things, but it's 137 # relatively fast. 138 s = pickle.dumps(obj) 139 intlist = array.array('b', s) 140 return ','.join(str(i) for i in intlist)
141
142 - def _toobj(self, str):
143 intlist = [int(i) for i in str.split(',')] 144 intlist = array.array('b', intlist) 145 return pickle.loads(''.join(chr(i) for i in intlist))
146 147 Index = _InMemoryIndex 148