Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # Copyright 2009-2013 by Peter Cock. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Code for more fancy file handles. 
  9   
 10   
 11  Classes: 
 12   
 13      - UndoHandle     File object decorator with support for undo-like operations. 
 14   
 15  Additional private classes used in Bio.SeqIO and Bio.SearchIO for indexing 
 16  files are also defined under Bio.File but these are not intended for direct 
 17  use. 
 18  """ 
 19   
 20  from __future__ import print_function 
 21   
 22  import codecs 
 23  import os 
 24  import sys 
 25  import contextlib 
 26  import itertools 
 27   
 28  from Bio._py3k import basestring 
 29   
 30  try: 
 31      from collections import UserDict as _dict_base 
 32  except ImportError: 
 33      from UserDict import DictMixin as _dict_base 
 34   
 35  try: 
 36      from sqlite3 import dbapi2 as _sqlite 
 37      from sqlite3 import IntegrityError as _IntegrityError 
 38      from sqlite3 import OperationalError as _OperationalError 
 39  except ImportError: 
 40      # Not present on Jython, but should be included in Python 2.5 
 41      # or later (unless compiled from source without its dependencies) 
 42      # Still want to offer in-memory indexing. 
 43      _sqlite = None 
 44      pass 
 45   
 46  __docformat__ = "restructuredtext en" 
47 48 49 @contextlib.contextmanager 50 -def as_handle(handleish, mode='r', **kwargs):
51 r"""Context manager to ensure we are using a handle. 52 53 Context manager for arguments that can be passed to 54 SeqIO and AlignIO read, write, and parse methods: either file objects or strings. 55 56 When given a string, returns a file handle open to handleish with provided 57 mode which will be closed when the manager exits. 58 59 All other inputs are returned, and are *not* closed 60 61 - handleish - Either a string or file handle 62 - mode - Mode to open handleish (used only if handleish is a string) 63 - kwargs - Further arguments to pass to open(...) 64 65 Example: 66 67 >>> with as_handle('seqs.fasta', 'w') as fp: 68 ... fp.write('>test\nACGT') 69 >>> fp.closed 70 True 71 72 >>> handle = open('seqs.fasta', 'w') 73 >>> with as_handle(handle) as fp: 74 ... fp.write('>test\nACGT') 75 >>> fp.closed 76 False 77 >>> fp.close() 78 79 Note that if the mode argument includes U (for universal new lines) 80 this will be removed under Python 3 where is is redundant and has 81 been deprecated (this happens automatically in text mode). 82 """ 83 if isinstance(handleish, basestring): 84 if sys.version_info[0] >= 3 and "U" in mode: 85 mode = mode.replace("U", "") 86 if 'encoding' in kwargs: 87 with codecs.open(handleish, mode, **kwargs) as fp: 88 yield fp 89 else: 90 with open(handleish, mode, **kwargs) as fp: 91 yield fp 92 else: 93 yield handleish
94
95 96 -def _open_for_random_access(filename):
97 """Open a file in binary mode, spot if it is BGZF format etc (PRIVATE). 98 99 This funcationality is used by the Bio.SeqIO and Bio.SearchIO index 100 and index_db functions. 101 """ 102 handle = open(filename, "rb") 103 from . import bgzf 104 try: 105 return bgzf.BgzfReader(mode="rb", fileobj=handle) 106 except ValueError as e: 107 assert "BGZF" in str(e) 108 # Not a BGZF file after all, rewind to start: 109 handle.seek(0) 110 return handle
111
112 113 -class UndoHandle(object):
114 """A Python handle that adds functionality for saving lines. 115 116 Saves lines in a LIFO fashion. 117 118 Added methods: 119 120 - saveline Save a line to be returned next time. 121 - peekline Peek at the next line without consuming it. 122 123 """
124 - def __init__(self, handle):
125 self._handle = handle 126 self._saved = []
127
128 - def __iter__(self):
129 return self
130
131 - def __next__(self):
132 next = self.readline() 133 if not next: 134 raise StopIteration 135 return next
136 137 if sys.version_info[0] < 3:
138 - def next(self):
139 """Python 2 style alias for Python 3 style __next__ method.""" 140 return self.__next__()
141
142 - def readlines(self, *args, **keywds):
143 lines = self._saved + self._handle.readlines(*args, **keywds) 144 self._saved = [] 145 return lines
146
147 - def readline(self, *args, **keywds):
148 if self._saved: 149 line = self._saved.pop(0) 150 else: 151 line = self._handle.readline(*args, **keywds) 152 return line
153
154 - def read(self, size=-1):
155 if size == -1: 156 saved = "".join(self._saved) 157 self._saved[:] = [] 158 else: 159 saved = '' 160 while size > 0 and self._saved: 161 if len(self._saved[0]) <= size: 162 size = size - len(self._saved[0]) 163 saved = saved + self._saved.pop(0) 164 else: 165 saved = saved + self._saved[0][:size] 166 self._saved[0] = self._saved[0][size:] 167 size = 0 168 return saved + self._handle.read(size)
169
170 - def saveline(self, line):
171 if line: 172 self._saved = [line] + self._saved
173
174 - def peekline(self):
175 if self._saved: 176 line = self._saved[0] 177 else: 178 line = self._handle.readline() 179 self.saveline(line) 180 return line
181
182 - def tell(self):
183 return self._handle.tell() - sum(len(line) for line in self._saved)
184
185 - def seek(self, *args):
186 self._saved = [] 187 self._handle.seek(*args)
188
189 - def __getattr__(self, attr):
190 return getattr(self._handle, attr)
191
192 - def __enter__(self):
193 return self
194
195 - def __exit__(self, type, value, traceback):
196 self._handle.close()
197
198 199 # The rest of this file defines code used in Bio.SeqIO and Bio.SearchIO 200 # for indexing 201 202 -class _IndexedSeqFileProxy(object):
203 """Base class for file format specific random access (PRIVATE). 204 205 This is subclasses in both Bio.SeqIO for indexing as SeqRecord 206 objects, and in Bio.SearchIO for indexing QueryResult objects. 207 208 Subclasses for each file format should define '__iter__', 'get' 209 and optionally 'get_raw' methods. 210 """ 211
212 - def __iter__(self):
213 """Returns (identifier, offset, length in bytes) tuples. 214 215 The length can be zero where it is not implemented or not 216 possible for a particular file format. 217 """ 218 raise NotImplementedError("Subclass should implement this")
219
220 - def get(self, offset):
221 """Returns parsed object for this entry.""" 222 # Most file formats with self contained records can be handled by 223 # parsing StringIO(_bytes_to_string(self.get_raw(offset))) 224 raise NotImplementedError("Subclass should implement this")
225
226 - def get_raw(self, offset):
227 """Returns bytes string (if implemented for this file format).""" 228 # Should be done by each sub-class (if possible) 229 raise NotImplementedError("Not available for this file format.")
230
231 232 -class _IndexedSeqFileDict(_dict_base):
233 """Read only dictionary interface to a sequential record file. 234 235 This code is used in both Bio.SeqIO for indexing as SeqRecord 236 objects, and in Bio.SearchIO for indexing QueryResult objects. 237 238 Keeps the keys and associated file offsets in memory, reads the file 239 to access entries as objects parsing them on demand. This approach 240 is memory limited, but will work even with millions of records. 241 242 Note duplicate keys are not allowed. If this happens, a ValueError 243 exception is raised. 244 245 As used in Bio.SeqIO, by default the SeqRecord's id string is used 246 as the dictionary key. In Bio.SearchIO, the query's id string is 247 used. This can be changed by suppling an optional key_function, 248 a callback function which will be given the record id and must 249 return the desired key. For example, this allows you to parse 250 NCBI style FASTA identifiers, and extract the GI number to use 251 as the dictionary key. 252 253 Note that this dictionary is essentially read only. You cannot 254 add or change values, pop values, nor clear the dictionary. 255 """
256 - def __init__(self, random_access_proxy, key_function, 257 repr, obj_repr):
258 # Use key_function=None for default value 259 self._proxy = random_access_proxy 260 self._key_function = key_function 261 self._repr = repr 262 self._obj_repr = obj_repr 263 if key_function: 264 offset_iter = ( 265 (key_function(k), o, l) for (k, o, l) in random_access_proxy) 266 else: 267 offset_iter = random_access_proxy 268 offsets = {} 269 for key, offset, length in offset_iter: 270 # Note - we don't store the length because I want to minimise the 271 # memory requirements. With the SQLite backend the length is kept 272 # and is used to speed up the get_raw method (by about 3 times). 273 # The length should be provided by all the current backends except 274 # SFF where there is an existing Roche index we can reuse (very fast 275 # but lacks the record lengths) 276 # assert length or format in ["sff", "sff-trim"], \ 277 # "%s at offset %i given length %r (%s format %s)" \ 278 # % (key, offset, length, filename, format) 279 if key in offsets: 280 self._proxy._handle.close() 281 raise ValueError("Duplicate key '%s'" % key) 282 else: 283 offsets[key] = offset 284 self._offsets = offsets
285
286 - def __repr__(self):
287 return self._repr
288
289 - def __str__(self):
290 # TODO - How best to handle the __str__ for SeqIO and SearchIO? 291 if self: 292 return "{%r : %s(...), ...}" % (list(self.keys())[0], self._obj_repr) 293 else: 294 return "{}"
295
296 - def __contains__(self, key):
297 return key in self._offsets
298
299 - def __len__(self):
300 """How many records are there?""" 301 return len(self._offsets)
302
303 - def items(self):
304 """Iterate over the (key, SeqRecord) items. 305 306 This tries to act like a Python 3 dictionary, and does not return 307 a list of (key, value) pairs due to memory concerns. 308 """ 309 for key in self.__iter__(): 310 yield key, self.__getitem__(key)
311
312 - def values(self):
313 """Iterate over the SeqRecord items. 314 315 This tries to act like a Python 3 dictionary, and does not return 316 a list of value due to memory concerns. 317 """ 318 for key in self.__iter__(): 319 yield self.__getitem__(key)
320
321 - def keys(self):
322 """Iterate over the keys. 323 324 This tries to act like a Python 3 dictionary, and does not return 325 a list of keys due to memory concerns. 326 """ 327 return self.__iter__()
328 329 if hasattr(dict, "iteritems"): 330 # Python 2, also define iteritems etc
331 - def itervalues(self):
332 """Iterate over the SeqRecord) items.""" 333 for key in self.__iter__(): 334 yield self.__getitem__(key)
335
336 - def iteritems(self):
337 """Iterate over the (key, SeqRecord) items.""" 338 for key in self.__iter__(): 339 yield key, self.__getitem__(key)
340
341 - def iterkeys(self):
342 """Iterate over the keys.""" 343 return self.__iter__()
344
345 - def __iter__(self):
346 """Iterate over the keys.""" 347 return iter(self._offsets)
348
349 - def __getitem__(self, key):
350 """x.__getitem__(y) <==> x[y]""" 351 # Pass the offset to the proxy 352 record = self._proxy.get(self._offsets[key]) 353 if self._key_function: 354 key2 = self._key_function(record.id) 355 else: 356 key2 = record.id 357 if key != key2: 358 raise ValueError("Key did not match (%s vs %s)" % (key, key2)) 359 return record
360
361 - def get(self, k, d=None):
362 """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" 363 try: 364 return self.__getitem__(k) 365 except KeyError: 366 return d
367
368 - def get_raw(self, key):
369 """Similar to the get method, but returns the record as a raw string. 370 371 If the key is not found, a KeyError exception is raised. 372 373 Note that on Python 3 a bytes string is returned, not a typical 374 unicode string. 375 376 NOTE - This functionality is not supported for every file format. 377 """ 378 # Pass the offset to the proxy 379 return self._proxy.get_raw(self._offsets[key])
380
381 - def __setitem__(self, key, value):
382 """Would allow setting or replacing records, but not implemented.""" 383 raise NotImplementedError("An indexed a sequence file is read only.")
384
385 - def update(self, *args, **kwargs):
386 """Would allow adding more values, but not implemented.""" 387 raise NotImplementedError("An indexed a sequence file is read only.")
388
389 - def pop(self, key, default=None):
390 """Would remove specified record, but not implemented.""" 391 raise NotImplementedError("An indexed a sequence file is read only.")
392
393 - def popitem(self):
394 """Would remove and return a SeqRecord, but not implemented.""" 395 raise NotImplementedError("An indexed a sequence file is read only.")
396
397 - def clear(self):
398 """Would clear dictionary, but not implemented.""" 399 raise NotImplementedError("An indexed a sequence file is read only.")
400
401 - def fromkeys(self, keys, value=None):
402 """A dictionary method which we don't implement.""" 403 raise NotImplementedError("An indexed a sequence file doesn't " 404 "support this.")
405
406 - def copy(self):
407 """A dictionary method which we don't implement.""" 408 raise NotImplementedError("An indexed a sequence file doesn't " 409 "support this.")
410
411 - def close(self):
412 """Close the file handle being used to read the data. 413 414 Once called, further use of the index won't work. The sole purpose 415 of this method is to allow explicit handle closure - for example 416 if you wish to delete the file, on Windows you must first close 417 all open handles to that file. 418 """ 419 self._proxy._handle.close()
420
421 422 -class _SQLiteManySeqFilesDict(_IndexedSeqFileDict):
423 """Read only dictionary interface to many sequential record files. 424 425 This code is used in both Bio.SeqIO for indexing as SeqRecord 426 objects, and in Bio.SearchIO for indexing QueryResult objects. 427 428 Keeps the keys, file-numbers and offsets in an SQLite database. To access 429 a record by key, reads from the offset in the appropriate file and then 430 parses the record into an object. 431 432 There are OS limits on the number of files that can be open at once, 433 so a pool are kept. If a record is required from a closed file, then 434 one of the open handles is closed first. 435 """
436 - def __init__(self, index_filename, filenames, 437 proxy_factory, format, 438 key_function, repr, max_open=10):
439 """Loads or creates an SQLite based index.""" 440 # TODO? - Don't keep filename list in memory (just in DB)? 441 # Should save a chunk of memory if dealing with 1000s of files. 442 # Furthermore could compare a generator to the DB on reloading 443 # (no need to turn it into a list) 444 445 if not _sqlite: 446 # Hack for Jython (of if Python is compiled without it) 447 from Bio import MissingPythonDependencyError 448 raise MissingPythonDependencyError("Requires sqlite3, which is " 449 "included Python 2.5+") 450 if filenames is not None: 451 filenames = list(filenames) # In case it was a generator 452 453 # Cache the arguments as private variables 454 self._index_filename = index_filename 455 self._filenames = filenames 456 self._format = format 457 self._key_function = key_function 458 self._proxy_factory = proxy_factory 459 self._repr = repr 460 self._max_open = max_open 461 self._proxies = {} 462 463 # Note if using SQLite :memory: trick index filename, this will 464 # give $PWD as the relative path (which is fine). 465 self._relative_path = os.path.abspath(os.path.dirname(index_filename)) 466 467 if os.path.isfile(index_filename): 468 self._load_index() 469 else: 470 self._build_index()
471
472 - def _load_index(self):
473 """Called from __init__ to re-use an existing index (PRIVATE).""" 474 index_filename = self._index_filename 475 relative_path = self._relative_path 476 filenames = self._filenames 477 format = self._format 478 proxy_factory = self._proxy_factory 479 480 con = _sqlite.connect(index_filename) 481 self._con = con 482 # Check the count... 483 try: 484 count, = con.execute( 485 "SELECT value FROM meta_data WHERE key=?;", 486 ("count",)).fetchone() 487 self._length = int(count) 488 if self._length == -1: 489 con.close() 490 raise ValueError("Unfinished/partial database") 491 count, = con.execute( 492 "SELECT COUNT(key) FROM offset_data;").fetchone() 493 if self._length != int(count): 494 con.close() 495 raise ValueError("Corrupt database? %i entries not %i" 496 % (int(count), self._length)) 497 self._format, = con.execute( 498 "SELECT value FROM meta_data WHERE key=?;", 499 ("format",)).fetchone() 500 if format and format != self._format: 501 con.close() 502 raise ValueError("Index file says format %s, not %s" 503 % (self._format, format)) 504 try: 505 filenames_relative_to_index, = con.execute( 506 "SELECT value FROM meta_data WHERE key=?;", 507 ("filenames_relative_to_index",)).fetchone() 508 filenames_relative_to_index = (filenames_relative_to_index.upper() == "TRUE") 509 except TypeError: 510 # Original behaviour, assume if meta_data missing 511 filenames_relative_to_index = False 512 self._filenames = [row[0] for row in 513 con.execute("SELECT name FROM file_data " 514 "ORDER BY file_number;").fetchall()] 515 if filenames_relative_to_index: 516 # Not implicitly relative to $PWD, explicitly relative to index file 517 relative_path = os.path.abspath(os.path.dirname(index_filename)) 518 tmp = [] 519 for f in self._filenames: 520 if os.path.isabs(f): 521 tmp.append(f) 522 else: 523 # Would be stored with Unix / path separator, so convert 524 # it to the local OS path separator here: 525 tmp.append(os.path.join(relative_path, f.replace("/", os.path.sep))) 526 self._filenames = tmp 527 del tmp 528 if filenames and len(filenames) != len(self._filenames): 529 con.close() 530 raise ValueError("Index file says %i files, not %i" 531 % (len(self._filenames), len(filenames))) 532 if filenames and filenames != self._filenames: 533 for old, new in zip(self._filenames, filenames): 534 # Want exact match (after making relative to the index above) 535 if os.path.abspath(old) != os.path.abspath(new): 536 con.close() 537 if filenames_relative_to_index: 538 raise ValueError("Index file has different filenames, e.g. %r != %r" 539 % (os.path.abspath(old), os.path.abspath(new))) 540 else: 541 raise ValueError("Index file has different filenames " 542 "[This is an old index where any relative paths " 543 "were relative to the original working directory]. " 544 "e.g. %r != %r" 545 % (os.path.abspath(old), os.path.abspath(new))) 546 # Filenames are equal (after imposing abspath) 547 except _OperationalError as err: 548 con.close() 549 raise ValueError("Not a Biopython index database? %s" % err) 550 # Now we have the format (from the DB if not given to us), 551 if not proxy_factory(self._format): 552 con.close() 553 raise ValueError("Unsupported format '%s'" % self._format)
554
555 - def _build_index(self):
556 """Called from __init__ to create a new index (PRIVATE).""" 557 index_filename = self._index_filename 558 relative_path = self._relative_path 559 filenames = self._filenames 560 format = self._format 561 key_function = self._key_function 562 proxy_factory = self._proxy_factory 563 max_open = self._max_open 564 random_access_proxies = self._proxies 565 566 if not format or not filenames: 567 raise ValueError("Filenames to index and format required to build %r" % index_filename) 568 if not proxy_factory(format): 569 raise ValueError("Unsupported format '%s'" % format) 570 # Create the index 571 con = _sqlite.connect(index_filename) 572 self._con = con 573 # print("Creating index") 574 # Sqlite PRAGMA settings for speed 575 con.execute("PRAGMA synchronous=OFF") 576 con.execute("PRAGMA locking_mode=EXCLUSIVE") 577 # Don't index the key column until the end (faster) 578 # con.execute("CREATE TABLE offset_data (key TEXT PRIMARY KEY, " 579 # "offset INTEGER);") 580 con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);") 581 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 582 ("count", -1)) 583 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 584 ("format", format)) 585 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 586 ("filenames_relative_to_index", "True")) 587 # TODO - Record the alphabet? 588 # TODO - Record the file size and modified date? 589 con.execute( 590 "CREATE TABLE file_data (file_number INTEGER, name TEXT);") 591 con.execute("CREATE TABLE offset_data (key TEXT, file_number INTEGER, offset INTEGER, length INTEGER);") 592 count = 0 593 for i, filename in enumerate(filenames): 594 # Default to storing as an absolute path, 595 f = os.path.abspath(filename) 596 if not os.path.isabs(filename) and not os.path.isabs(index_filename): 597 # Since user gave BOTH filename & index as relative paths, 598 # we will store this relative to the index file even though 599 # if it may now start ../ (meaning up a level) 600 # Note for cross platfrom use (e.g. shared data drive over SAMBA), 601 # convert any Windows slash into Unix style / for relative paths. 602 f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/") 603 elif (os.path.dirname(os.path.abspath(filename)) + os.path.sep).startswith(relative_path + os.path.sep): 604 # Since sequence file is in same directory or sub directory, 605 # might as well make this into a relative path: 606 f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/") 607 assert not f.startswith("../"), f 608 # print("DEBUG - storing %r as [%r] %r" % (filename, relative_path, f)) 609 con.execute( 610 "INSERT INTO file_data (file_number, name) VALUES (?,?);", 611 (i, f)) 612 random_access_proxy = proxy_factory(format, filename) 613 if key_function: 614 offset_iter = ((key_function(k), i, o, l) 615 for (k, o, l) in random_access_proxy) 616 else: 617 offset_iter = ((k, i, o, l) 618 for (k, o, l) in random_access_proxy) 619 while True: 620 batch = list(itertools.islice(offset_iter, 100)) 621 if not batch: 622 break 623 # print("Inserting batch of %i offsets, %s ... %s" 624 # % (len(batch), batch[0][0], batch[-1][0])) 625 con.executemany( 626 "INSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);", 627 batch) 628 con.commit() 629 count += len(batch) 630 if len(random_access_proxies) < max_open: 631 random_access_proxies[i] = random_access_proxy 632 else: 633 random_access_proxy._handle.close() 634 self._length = count 635 # print("About to index %i entries" % count) 636 try: 637 con.execute("CREATE UNIQUE INDEX IF NOT EXISTS " 638 "key_index ON offset_data(key);") 639 except _IntegrityError as err: 640 self._proxies = random_access_proxies 641 self.close() 642 con.close() 643 raise ValueError("Duplicate key? %s" % err) 644 con.execute("PRAGMA locking_mode=NORMAL") 645 con.execute("UPDATE meta_data SET value = ? WHERE key = ?;", 646 (count, "count")) 647 con.commit()
648 # print("Index created") 649
650 - def __repr__(self):
651 return self._repr
652
653 - def __contains__(self, key):
654 return bool( 655 self._con.execute("SELECT key FROM offset_data WHERE key=?;", 656 (key,)).fetchone())
657
658 - def __len__(self):
659 """How many records are there?""" 660 return self._length
661 # return self._con.execute("SELECT COUNT(key) FROM offset_data;").fetchone()[0] 662
663 - def __iter__(self):
664 """Iterate over the keys.""" 665 for row in self._con.execute("SELECT key FROM offset_data;"): 666 yield str(row[0])
667 668 if hasattr(dict, "iteritems"): 669 # Python 2, use iteritems but not items etc 670 # Just need to override this...
671 - def keys(self):
672 """Return a list of all the keys (SeqRecord identifiers).""" 673 return [str(row[0]) for row in 674 self._con.execute("SELECT key FROM offset_data;").fetchall()]
675
676 - def __getitem__(self, key):
677 """x.__getitem__(y) <==> x[y]""" 678 # Pass the offset to the proxy 679 row = self._con.execute( 680 "SELECT file_number, offset FROM offset_data WHERE key=?;", 681 (key,)).fetchone() 682 if not row: 683 raise KeyError 684 file_number, offset = row 685 proxies = self._proxies 686 if file_number in proxies: 687 record = proxies[file_number].get(offset) 688 else: 689 if len(proxies) >= self._max_open: 690 # Close an old handle... 691 proxies.popitem()[1]._handle.close() 692 # Open a new handle... 693 proxy = self._proxy_factory(self._format, self._filenames[file_number]) 694 record = proxy.get(offset) 695 proxies[file_number] = proxy 696 if self._key_function: 697 key2 = self._key_function(record.id) 698 else: 699 key2 = record.id 700 if key != key2: 701 raise ValueError("Key did not match (%s vs %s)" % (key, key2)) 702 return record
703
704 - def get(self, k, d=None):
705 """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" 706 try: 707 return self.__getitem__(k) 708 except KeyError: 709 return d
710
711 - def get_raw(self, key):
712 """Similar to the get method, but returns the record as a raw string. 713 714 If the key is not found, a KeyError exception is raised. 715 716 Note that on Python 3 a bytes string is returned, not a typical 717 unicode string. 718 719 **NOTE** - This functionality is not supported for every file format. 720 """ 721 # Pass the offset to the proxy 722 row = self._con.execute( 723 "SELECT file_number, offset, length FROM offset_data WHERE key=?;", 724 (key,)).fetchone() 725 if not row: 726 raise KeyError 727 file_number, offset, length = row 728 proxies = self._proxies 729 if file_number in proxies: 730 if length: 731 # Shortcut if we have the length 732 h = proxies[file_number]._handle 733 h.seek(offset) 734 return h.read(length) 735 else: 736 return proxies[file_number].get_raw(offset) 737 else: 738 # This code is duplicated from __getitem__ to avoid a function call 739 if len(proxies) >= self._max_open: 740 # Close an old handle... 741 proxies.popitem()[1]._handle.close() 742 # Open a new handle... 743 proxy = self._proxy_factory(self._format, self._filenames[file_number]) 744 proxies[file_number] = proxy 745 if length: 746 # Shortcut if we have the length 747 h = proxy._handle 748 h.seek(offset) 749 return h.read(length) 750 else: 751 return proxy.get_raw(offset)
752
753 - def close(self):
754 """Close any open file handles.""" 755 proxies = self._proxies 756 while proxies: 757 proxies.popitem()[1]._handle.close()
758