Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # Copyright 2009-2015 by Peter Cock. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Code for more fancy file handles. 
  9   
 10   
 11  Classes: 
 12   
 13      - UndoHandle     File object decorator with support for undo-like operations. 
 14   
 15  Additional private classes used in Bio.SeqIO and Bio.SearchIO for indexing 
 16  files are also defined under Bio.File but these are not intended for direct 
 17  use. 
 18  """ 
 19   
 20  from __future__ import print_function 
 21   
 22  import codecs 
 23  import os 
 24  import sys 
 25  import contextlib 
 26  import itertools 
 27   
 28  from Bio._py3k import basestring 
 29   
 30  try: 
 31      from collections import UserDict as _dict_base 
 32  except ImportError: 
 33      from UserDict import DictMixin as _dict_base 
 34   
 35  try: 
 36      from sqlite3 import dbapi2 as _sqlite 
 37      from sqlite3 import IntegrityError as _IntegrityError 
 38      from sqlite3 import OperationalError as _OperationalError 
 39  except ImportError: 
 40      # Not present on Jython, but should be included in Python 2.5 
 41      # or later (unless compiled from source without its dependencies) 
 42      # Still want to offer in-memory indexing. 
 43      _sqlite = None 
 44      pass 
45 46 47 @contextlib.contextmanager 48 -def as_handle(handleish, mode='r', **kwargs):
49 r"""Context manager to ensure we are using a handle. 50 51 Context manager for arguments that can be passed to 52 SeqIO and AlignIO read, write, and parse methods: either file objects or strings. 53 54 When given a string, returns a file handle open to handleish with provided 55 mode which will be closed when the manager exits. 56 57 All other inputs are returned, and are *not* closed 58 59 - handleish - Either a string or file handle 60 - mode - Mode to open handleish (used only if handleish is a string) 61 - kwargs - Further arguments to pass to open(...) 62 63 Example: 64 65 >>> with as_handle('seqs.fasta', 'w') as fp: 66 ... fp.write('>test\nACGT') 67 >>> fp.closed 68 True 69 70 >>> handle = open('seqs.fasta', 'w') 71 >>> with as_handle(handle) as fp: 72 ... fp.write('>test\nACGT') 73 >>> fp.closed 74 False 75 >>> fp.close() 76 77 Note that if the mode argument includes U (for universal new lines) 78 this will be removed under Python 3 where is is redundant and has 79 been deprecated (this happens automatically in text mode). 80 """ 81 if isinstance(handleish, basestring): 82 if sys.version_info[0] >= 3 and "U" in mode: 83 mode = mode.replace("U", "") 84 if 'encoding' in kwargs: 85 with codecs.open(handleish, mode, **kwargs) as fp: 86 yield fp 87 else: 88 with open(handleish, mode, **kwargs) as fp: 89 yield fp 90 else: 91 yield handleish
92
93 94 -def _open_for_random_access(filename):
95 """Open a file in binary mode, spot if it is BGZF format etc (PRIVATE). 96 97 This functionality is used by the Bio.SeqIO and Bio.SearchIO index 98 and index_db functions. 99 """ 100 handle = open(filename, "rb") 101 from . import bgzf 102 try: 103 return bgzf.BgzfReader(mode="rb", fileobj=handle) 104 except ValueError as e: 105 assert "BGZF" in str(e) 106 # Not a BGZF file after all, rewind to start: 107 handle.seek(0) 108 return handle
109
110 111 -class UndoHandle(object):
112 """A Python handle that adds functionality for saving lines. 113 114 Saves lines in a LIFO fashion. 115 116 Added methods: 117 118 - saveline Save a line to be returned next time. 119 - peekline Peek at the next line without consuming it. 120 121 """
122 - def __init__(self, handle):
123 self._handle = handle 124 self._saved = [] 125 try: 126 # If wrapping an online handle, this this is nice to have: 127 self.url = handle.url 128 except AttributeError: 129 pass
130
131 - def __iter__(self):
132 return self
133
134 - def __next__(self):
135 next = self.readline() 136 if not next: 137 raise StopIteration 138 return next
139 140 if sys.version_info[0] < 3:
141 - def next(self):
142 """Python 2 style alias for Python 3 style __next__ method.""" 143 return self.__next__()
144
145 - def readlines(self, *args, **keywds):
146 lines = self._saved + self._handle.readlines(*args, **keywds) 147 self._saved = [] 148 return lines
149
150 - def readline(self, *args, **keywds):
151 if self._saved: 152 line = self._saved.pop(0) 153 else: 154 line = self._handle.readline(*args, **keywds) 155 return line
156
157 - def read(self, size=-1):
158 if size == -1: 159 saved = "".join(self._saved) 160 self._saved[:] = [] 161 else: 162 saved = '' 163 while size > 0 and self._saved: 164 if len(self._saved[0]) <= size: 165 size = size - len(self._saved[0]) 166 saved = saved + self._saved.pop(0) 167 else: 168 saved = saved + self._saved[0][:size] 169 self._saved[0] = self._saved[0][size:] 170 size = 0 171 return saved + self._handle.read(size)
172
173 - def saveline(self, line):
174 if line: 175 self._saved = [line] + self._saved
176
177 - def peekline(self):
178 if self._saved: 179 line = self._saved[0] 180 else: 181 line = self._handle.readline() 182 self.saveline(line) 183 return line
184
185 - def tell(self):
186 return self._handle.tell() - sum(len(line) for line in self._saved)
187
188 - def seek(self, *args):
189 self._saved = [] 190 self._handle.seek(*args)
191
192 - def __getattr__(self, attr):
193 return getattr(self._handle, attr)
194
195 - def __enter__(self):
196 return self
197
198 - def __exit__(self, type, value, traceback):
199 self._handle.close()
200
201 202 # The rest of this file defines code used in Bio.SeqIO and Bio.SearchIO 203 # for indexing 204 205 -class _IndexedSeqFileProxy(object):
206 """Base class for file format specific random access (PRIVATE). 207 208 This is subclasses in both Bio.SeqIO for indexing as SeqRecord 209 objects, and in Bio.SearchIO for indexing QueryResult objects. 210 211 Subclasses for each file format should define '__iter__', 'get' 212 and optionally 'get_raw' methods. 213 """ 214
215 - def __iter__(self):
216 """Returns (identifier, offset, length in bytes) tuples. 217 218 The length can be zero where it is not implemented or not 219 possible for a particular file format. 220 """ 221 raise NotImplementedError("Subclass should implement this")
222
223 - def get(self, offset):
224 """Returns parsed object for this entry.""" 225 # Most file formats with self contained records can be handled by 226 # parsing StringIO(_bytes_to_string(self.get_raw(offset))) 227 raise NotImplementedError("Subclass should implement this")
228
229 - def get_raw(self, offset):
230 """Return the raw record from the file as a bytes string (if implemented). 231 232 If the key is not found, a KeyError exception is raised. 233 234 This may not have been implemented for all file formats. 235 """ 236 # Should be done by each sub-class (if possible) 237 raise NotImplementedError("Not available for this file format.")
238
239 240 -class _IndexedSeqFileDict(_dict_base):
241 """Read only dictionary interface to a sequential record file. 242 243 This code is used in both Bio.SeqIO for indexing as SeqRecord 244 objects, and in Bio.SearchIO for indexing QueryResult objects. 245 246 Keeps the keys and associated file offsets in memory, reads the file 247 to access entries as objects parsing them on demand. This approach 248 is memory limited, but will work even with millions of records. 249 250 Note duplicate keys are not allowed. If this happens, a ValueError 251 exception is raised. 252 253 As used in Bio.SeqIO, by default the SeqRecord's id string is used 254 as the dictionary key. In Bio.SearchIO, the query's id string is 255 used. This can be changed by suppling an optional key_function, 256 a callback function which will be given the record id and must 257 return the desired key. For example, this allows you to parse 258 NCBI style FASTA identifiers, and extract the GI number to use 259 as the dictionary key. 260 261 Note that this dictionary is essentially read only. You cannot 262 add or change values, pop values, nor clear the dictionary. 263 """
264 - def __init__(self, random_access_proxy, key_function, 265 repr, obj_repr):
266 # Use key_function=None for default value 267 self._proxy = random_access_proxy 268 self._key_function = key_function 269 self._repr = repr 270 self._obj_repr = obj_repr 271 if key_function: 272 offset_iter = ( 273 (key_function(k), o, l) for (k, o, l) in random_access_proxy) 274 else: 275 offset_iter = random_access_proxy 276 offsets = {} 277 for key, offset, length in offset_iter: 278 # Note - we don't store the length because I want to minimise the 279 # memory requirements. With the SQLite backend the length is kept 280 # and is used to speed up the get_raw method (by about 3 times). 281 # The length should be provided by all the current backends except 282 # SFF where there is an existing Roche index we can reuse (very fast 283 # but lacks the record lengths) 284 # assert length or format in ["sff", "sff-trim"], \ 285 # "%s at offset %i given length %r (%s format %s)" \ 286 # % (key, offset, length, filename, format) 287 if key in offsets: 288 self._proxy._handle.close() 289 raise ValueError("Duplicate key '%s'" % key) 290 else: 291 offsets[key] = offset 292 self._offsets = offsets
293
294 - def __repr__(self):
295 return self._repr
296
297 - def __str__(self):
298 # TODO - How best to handle the __str__ for SeqIO and SearchIO? 299 if self: 300 return "{%r : %s(...), ...}" % (list(self.keys())[0], self._obj_repr) 301 else: 302 return "{}"
303
304 - def __contains__(self, key):
305 return key in self._offsets
306
307 - def __len__(self):
308 """How many records are there?""" 309 return len(self._offsets)
310
311 - def items(self):
312 """Iterate over the (key, SeqRecord) items. 313 314 This tries to act like a Python 3 dictionary, and does not return 315 a list of (key, value) pairs due to memory concerns. 316 """ 317 for key in self.__iter__(): 318 yield key, self.__getitem__(key)
319
320 - def values(self):
321 """Iterate over the SeqRecord items. 322 323 This tries to act like a Python 3 dictionary, and does not return 324 a list of value due to memory concerns. 325 """ 326 for key in self.__iter__(): 327 yield self.__getitem__(key)
328
329 - def keys(self):
330 """Iterate over the keys. 331 332 This tries to act like a Python 3 dictionary, and does not return 333 a list of keys due to memory concerns. 334 """ 335 return self.__iter__()
336 337 if hasattr(dict, "iteritems"): 338 # Python 2, also define iteritems etc
339 - def itervalues(self):
340 """Iterate over the SeqRecord) items.""" 341 for key in self.__iter__(): 342 yield self.__getitem__(key)
343
344 - def iteritems(self):
345 """Iterate over the (key, SeqRecord) items.""" 346 for key in self.__iter__(): 347 yield key, self.__getitem__(key)
348
349 - def iterkeys(self):
350 """Iterate over the keys.""" 351 return self.__iter__()
352
353 - def __iter__(self):
354 """Iterate over the keys.""" 355 return iter(self._offsets)
356
357 - def __getitem__(self, key):
358 """x.__getitem__(y) <==> x[y]""" 359 # Pass the offset to the proxy 360 record = self._proxy.get(self._offsets[key]) 361 if self._key_function: 362 key2 = self._key_function(record.id) 363 else: 364 key2 = record.id 365 if key != key2: 366 raise ValueError("Key did not match (%s vs %s)" % (key, key2)) 367 return record
368
369 - def get(self, k, d=None):
370 """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" 371 try: 372 return self.__getitem__(k) 373 except KeyError: 374 return d
375
376 - def get_raw(self, key):
377 """Return the raw record from the file as a bytes string. 378 379 If the key is not found, a KeyError exception is raised. 380 """ 381 # Pass the offset to the proxy 382 return self._proxy.get_raw(self._offsets[key])
383
384 - def __setitem__(self, key, value):
385 """Would allow setting or replacing records, but not implemented.""" 386 raise NotImplementedError("An indexed a sequence file is read only.")
387
388 - def update(self, *args, **kwargs):
389 """Would allow adding more values, but not implemented.""" 390 raise NotImplementedError("An indexed a sequence file is read only.")
391
392 - def pop(self, key, default=None):
393 """Would remove specified record, but not implemented.""" 394 raise NotImplementedError("An indexed a sequence file is read only.")
395
396 - def popitem(self):
397 """Would remove and return a SeqRecord, but not implemented.""" 398 raise NotImplementedError("An indexed a sequence file is read only.")
399
400 - def clear(self):
401 """Would clear dictionary, but not implemented.""" 402 raise NotImplementedError("An indexed a sequence file is read only.")
403
404 - def fromkeys(self, keys, value=None):
405 """A dictionary method which we don't implement.""" 406 raise NotImplementedError("An indexed a sequence file doesn't " 407 "support this.")
408
409 - def copy(self):
410 """A dictionary method which we don't implement.""" 411 raise NotImplementedError("An indexed a sequence file doesn't " 412 "support this.")
413
414 - def close(self):
415 """Close the file handle being used to read the data. 416 417 Once called, further use of the index won't work. The sole purpose 418 of this method is to allow explicit handle closure - for example 419 if you wish to delete the file, on Windows you must first close 420 all open handles to that file. 421 """ 422 self._proxy._handle.close()
423
424 425 -class _SQLiteManySeqFilesDict(_IndexedSeqFileDict):
426 """Read only dictionary interface to many sequential record files. 427 428 This code is used in both Bio.SeqIO for indexing as SeqRecord 429 objects, and in Bio.SearchIO for indexing QueryResult objects. 430 431 Keeps the keys, file-numbers and offsets in an SQLite database. To access 432 a record by key, reads from the offset in the appropriate file and then 433 parses the record into an object. 434 435 There are OS limits on the number of files that can be open at once, 436 so a pool are kept. If a record is required from a closed file, then 437 one of the open handles is closed first. 438 """
439 - def __init__(self, index_filename, filenames, 440 proxy_factory, format, 441 key_function, repr, max_open=10):
442 """Loads or creates an SQLite based index.""" 443 # TODO? - Don't keep filename list in memory (just in DB)? 444 # Should save a chunk of memory if dealing with 1000s of files. 445 # Furthermore could compare a generator to the DB on reloading 446 # (no need to turn it into a list) 447 448 if not _sqlite: 449 # Hack for Jython (of if Python is compiled without it) 450 from Bio import MissingPythonDependencyError 451 raise MissingPythonDependencyError("Requires sqlite3, which is " 452 "included Python 2.5+") 453 if filenames is not None: 454 filenames = list(filenames) # In case it was a generator 455 456 # Cache the arguments as private variables 457 self._index_filename = index_filename 458 self._filenames = filenames 459 self._format = format 460 self._key_function = key_function 461 self._proxy_factory = proxy_factory 462 self._repr = repr 463 self._max_open = max_open 464 self._proxies = {} 465 466 # Note if using SQLite :memory: trick index filename, this will 467 # give $PWD as the relative path (which is fine). 468 self._relative_path = os.path.abspath(os.path.dirname(index_filename)) 469 470 if os.path.isfile(index_filename): 471 self._load_index() 472 else: 473 self._build_index()
474
475 - def _load_index(self):
476 """Called from __init__ to re-use an existing index (PRIVATE).""" 477 index_filename = self._index_filename 478 relative_path = self._relative_path 479 filenames = self._filenames 480 format = self._format 481 proxy_factory = self._proxy_factory 482 483 con = _sqlite.connect(index_filename) 484 self._con = con 485 # Check the count... 486 try: 487 count, = con.execute( 488 "SELECT value FROM meta_data WHERE key=?;", 489 ("count",)).fetchone() 490 self._length = int(count) 491 if self._length == -1: 492 con.close() 493 raise ValueError("Unfinished/partial database") 494 count, = con.execute( 495 "SELECT COUNT(key) FROM offset_data;").fetchone() 496 if self._length != int(count): 497 con.close() 498 raise ValueError("Corrupt database? %i entries not %i" 499 % (int(count), self._length)) 500 self._format, = con.execute( 501 "SELECT value FROM meta_data WHERE key=?;", 502 ("format",)).fetchone() 503 if format and format != self._format: 504 con.close() 505 raise ValueError("Index file says format %s, not %s" 506 % (self._format, format)) 507 try: 508 filenames_relative_to_index, = con.execute( 509 "SELECT value FROM meta_data WHERE key=?;", 510 ("filenames_relative_to_index",)).fetchone() 511 filenames_relative_to_index = (filenames_relative_to_index.upper() == "TRUE") 512 except TypeError: 513 # Original behaviour, assume if meta_data missing 514 filenames_relative_to_index = False 515 self._filenames = [row[0] for row in 516 con.execute("SELECT name FROM file_data " 517 "ORDER BY file_number;").fetchall()] 518 if filenames_relative_to_index: 519 # Not implicitly relative to $PWD, explicitly relative to index file 520 relative_path = os.path.abspath(os.path.dirname(index_filename)) 521 tmp = [] 522 for f in self._filenames: 523 if os.path.isabs(f): 524 tmp.append(f) 525 else: 526 # Would be stored with Unix / path separator, so convert 527 # it to the local OS path separator here: 528 tmp.append(os.path.join(relative_path, f.replace("/", os.path.sep))) 529 self._filenames = tmp 530 del tmp 531 if filenames and len(filenames) != len(self._filenames): 532 con.close() 533 raise ValueError("Index file says %i files, not %i" 534 % (len(self._filenames), len(filenames))) 535 if filenames and filenames != self._filenames: 536 for old, new in zip(self._filenames, filenames): 537 # Want exact match (after making relative to the index above) 538 if os.path.abspath(old) != os.path.abspath(new): 539 con.close() 540 if filenames_relative_to_index: 541 raise ValueError("Index file has different filenames, e.g. %r != %r" 542 % (os.path.abspath(old), os.path.abspath(new))) 543 else: 544 raise ValueError("Index file has different filenames " 545 "[This is an old index where any relative paths " 546 "were relative to the original working directory]. " 547 "e.g. %r != %r" 548 % (os.path.abspath(old), os.path.abspath(new))) 549 # Filenames are equal (after imposing abspath) 550 except _OperationalError as err: 551 con.close() 552 raise ValueError("Not a Biopython index database? %s" % err) 553 # Now we have the format (from the DB if not given to us), 554 if not proxy_factory(self._format): 555 con.close() 556 raise ValueError("Unsupported format '%s'" % self._format)
557
558 - def _build_index(self):
559 """Called from __init__ to create a new index (PRIVATE).""" 560 index_filename = self._index_filename 561 relative_path = self._relative_path 562 filenames = self._filenames 563 format = self._format 564 key_function = self._key_function 565 proxy_factory = self._proxy_factory 566 max_open = self._max_open 567 random_access_proxies = self._proxies 568 569 if not format or not filenames: 570 raise ValueError("Filenames to index and format required to build %r" % index_filename) 571 if not proxy_factory(format): 572 raise ValueError("Unsupported format '%s'" % format) 573 # Create the index 574 con = _sqlite.connect(index_filename) 575 self._con = con 576 # print("Creating index") 577 # Sqlite PRAGMA settings for speed 578 con.execute("PRAGMA synchronous=OFF") 579 con.execute("PRAGMA locking_mode=EXCLUSIVE") 580 # Don't index the key column until the end (faster) 581 # con.execute("CREATE TABLE offset_data (key TEXT PRIMARY KEY, " 582 # "offset INTEGER);") 583 con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);") 584 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 585 ("count", -1)) 586 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 587 ("format", format)) 588 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 589 ("filenames_relative_to_index", "True")) 590 # TODO - Record the alphabet? 591 # TODO - Record the file size and modified date? 592 con.execute( 593 "CREATE TABLE file_data (file_number INTEGER, name TEXT);") 594 con.execute("CREATE TABLE offset_data (key TEXT, file_number INTEGER, offset INTEGER, length INTEGER);") 595 count = 0 596 for i, filename in enumerate(filenames): 597 # Default to storing as an absolute path, 598 f = os.path.abspath(filename) 599 if not os.path.isabs(filename) and not os.path.isabs(index_filename): 600 # Since user gave BOTH filename & index as relative paths, 601 # we will store this relative to the index file even though 602 # if it may now start ../ (meaning up a level) 603 # Note for cross platform use (e.g. shared data drive over SAMBA), 604 # convert any Windows slash into Unix style / for relative paths. 605 f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/") 606 elif (os.path.dirname(os.path.abspath(filename)) + os.path.sep).startswith(relative_path + os.path.sep): 607 # Since sequence file is in same directory or sub directory, 608 # might as well make this into a relative path: 609 f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/") 610 assert not f.startswith("../"), f 611 # print("DEBUG - storing %r as [%r] %r" % (filename, relative_path, f)) 612 con.execute( 613 "INSERT INTO file_data (file_number, name) VALUES (?,?);", 614 (i, f)) 615 random_access_proxy = proxy_factory(format, filename) 616 if key_function: 617 offset_iter = ((key_function(k), i, o, l) 618 for (k, o, l) in random_access_proxy) 619 else: 620 offset_iter = ((k, i, o, l) 621 for (k, o, l) in random_access_proxy) 622 while True: 623 batch = list(itertools.islice(offset_iter, 100)) 624 if not batch: 625 break 626 # print("Inserting batch of %i offsets, %s ... %s" 627 # % (len(batch), batch[0][0], batch[-1][0])) 628 con.executemany( 629 "INSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);", 630 batch) 631 con.commit() 632 count += len(batch) 633 if len(random_access_proxies) < max_open: 634 random_access_proxies[i] = random_access_proxy 635 else: 636 random_access_proxy._handle.close() 637 self._length = count 638 # print("About to index %i entries" % count) 639 try: 640 con.execute("CREATE UNIQUE INDEX IF NOT EXISTS " 641 "key_index ON offset_data(key);") 642 except _IntegrityError as err: 643 self._proxies = random_access_proxies 644 self.close() 645 con.close() 646 raise ValueError("Duplicate key? %s" % err) 647 con.execute("PRAGMA locking_mode=NORMAL") 648 con.execute("UPDATE meta_data SET value = ? WHERE key = ?;", 649 (count, "count")) 650 con.commit()
651 # print("Index created") 652
653 - def __repr__(self):
654 return self._repr
655
656 - def __contains__(self, key):
657 return bool( 658 self._con.execute("SELECT key FROM offset_data WHERE key=?;", 659 (key,)).fetchone())
660
661 - def __len__(self):
662 """How many records are there?""" 663 return self._length
664 # return self._con.execute("SELECT COUNT(key) FROM offset_data;").fetchone()[0] 665
666 - def __iter__(self):
667 """Iterate over the keys.""" 668 for row in self._con.execute("SELECT key FROM offset_data;"): 669 yield str(row[0])
670 671 if hasattr(dict, "iteritems"): 672 # Python 2, use iteritems but not items etc 673 # Just need to override this...
674 - def keys(self):
675 """Return a list of all the keys (SeqRecord identifiers).""" 676 return [str(row[0]) for row in 677 self._con.execute("SELECT key FROM offset_data;").fetchall()]
678
679 - def __getitem__(self, key):
680 """x.__getitem__(y) <==> x[y]""" 681 # Pass the offset to the proxy 682 row = self._con.execute( 683 "SELECT file_number, offset FROM offset_data WHERE key=?;", 684 (key,)).fetchone() 685 if not row: 686 raise KeyError 687 file_number, offset = row 688 proxies = self._proxies 689 if file_number in proxies: 690 record = proxies[file_number].get(offset) 691 else: 692 if len(proxies) >= self._max_open: 693 # Close an old handle... 694 proxies.popitem()[1]._handle.close() 695 # Open a new handle... 696 proxy = self._proxy_factory(self._format, self._filenames[file_number]) 697 record = proxy.get(offset) 698 proxies[file_number] = proxy 699 if self._key_function: 700 key2 = self._key_function(record.id) 701 else: 702 key2 = record.id 703 if key != key2: 704 raise ValueError("Key did not match (%s vs %s)" % (key, key2)) 705 return record
706
707 - def get(self, k, d=None):
708 """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" 709 try: 710 return self.__getitem__(k) 711 except KeyError: 712 return d
713
714 - def get_raw(self, key):
715 """Return the raw record from the file as a bytes string. 716 717 If the key is not found, a KeyError exception is raised. 718 """ 719 # Pass the offset to the proxy 720 row = self._con.execute( 721 "SELECT file_number, offset, length FROM offset_data WHERE key=?;", 722 (key,)).fetchone() 723 if not row: 724 raise KeyError 725 file_number, offset, length = row 726 proxies = self._proxies 727 if file_number in proxies: 728 if length: 729 # Shortcut if we have the length 730 h = proxies[file_number]._handle 731 h.seek(offset) 732 return h.read(length) 733 else: 734 return proxies[file_number].get_raw(offset) 735 else: 736 # This code is duplicated from __getitem__ to avoid a function call 737 if len(proxies) >= self._max_open: 738 # Close an old handle... 739 proxies.popitem()[1]._handle.close() 740 # Open a new handle... 741 proxy = self._proxy_factory(self._format, self._filenames[file_number]) 742 proxies[file_number] = proxy 743 if length: 744 # Shortcut if we have the length 745 h = proxy._handle 746 h.seek(offset) 747 return h.read(length) 748 else: 749 return proxy.get_raw(offset)
750
751 - def close(self):
752 """Close any open file handles.""" 753 proxies = self._proxies 754 while proxies: 755 proxies.popitem()[1]._handle.close()
756