Package BioSQL :: Module BioSeqDatabase
[hide private]
[frames] | no frames]

Source Code for Module BioSQL.BioSeqDatabase

  1  # Copyright 2002 by Andrew Dalke.  All rights reserved. 
  2  # Revisions 2007-2014 copyright by Peter Cock.  All rights reserved. 
  3  # Revisions 2009 copyright by Cymon J. Cox.  All rights reserved. 
  4  # Revisions 2013-2014 copyright by Tiago Antao.  All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license.  Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8  # 
  9  # Note that BioSQL (including the database schema and scripts) is 
 10  # available and licensed separately.  Please consult www.biosql.org 
 11  """Connect with a BioSQL database and load Biopython like objects from it. 
 12   
 13  This provides interfaces for loading biological objects from a relational 
 14  database, and is compatible with the BioSQL standards. 
 15  """ 
 16  import os 
 17  import sys 
 18   
 19  from Bio._py3k import _universal_read_mode 
 20  from Bio._py3k import _bytes_bytearray_to_str as bytearray_to_str 
 21  from Bio import BiopythonDeprecationWarning 
 22   
 23  from . import BioSeq 
 24  from . import Loader 
 25  from . import DBUtils 
 26   
 27  _POSTGRES_RULES_PRESENT = False  # Hack for BioSQL Bug 2839 
 28   
 29   
30 -def open_database(driver="MySQLdb", **kwargs):
31 """Main interface for loading a existing BioSQL-style database. 32 33 This function is the easiest way to retrieve a connection to a 34 database, doing something like: 35 36 >>> from BioSeq import BioSeqDatabase 37 >>> server = BioSeqDatabase.open_database(user="root", db="minidb") 38 39 Arguments: 40 - driver - The name of the database driver to use for connecting. The 41 driver should implement the python DB API. By default, the MySQLdb 42 driver is used. 43 - user -the username to connect to the database with. 44 - password, passwd - the password to connect with 45 - host - the hostname of the database 46 - database or db - the name of the database 47 """ 48 if driver == "psycopg": 49 raise ValueError("Using BioSQL with psycopg (version one) is no " 50 "longer supported. Use psycopg2 instead.") 51 52 if os.name == "java": 53 from com.ziclix.python.sql import zxJDBC 54 module = zxJDBC 55 if driver in ["MySQLdb"]: 56 jdbc_driver = "com.mysql.jdbc.Driver" 57 url_pref = "jdbc:mysql://" + kwargs["host"] + "/" 58 elif driver in ["psycopg2"]: 59 jdbc_driver = "org.postgresql.Driver" 60 url_pref = "jdbc:postgresql://" + kwargs["host"] + "/" 61 62 else: 63 module = __import__(driver, fromlist=["connect"]) 64 connect = module.connect 65 66 # Different drivers use different keywords... 67 kw = kwargs.copy() 68 if driver in ["MySQLdb", "mysql.connector"] and os.name != "java": 69 if "database" in kw: 70 kw["db"] = kw["database"] 71 del kw["database"] 72 if "password" in kw: 73 kw["passwd"] = kw["password"] 74 del kw["password"] 75 # kw["charset"] = "utf8" 76 # kw["use_unicode"] = True 77 else: 78 # DB-API recommendations 79 if "db" in kw: 80 kw["database"] = kw["db"] 81 del kw["db"] 82 if "passwd" in kw: 83 kw["password"] = kw["passwd"] 84 del kw["passwd"] 85 if driver in ["psycopg2", "pgdb"] and not kw.get("database"): 86 kw["database"] = "template1" 87 # SQLite connect takes the database name as input 88 if os.name == "java": 89 if driver in ["MySQLdb"]: 90 conn = connect(url_pref + kw.get("database", "mysql"), 91 kw["user"], kw["password"], jdbc_driver) 92 elif driver in ["psycopg2"]: 93 conn = connect(url_pref + kw.get("database", "postgresql") + 94 "?stringtype=unspecified", 95 kw["user"], kw["password"], jdbc_driver) 96 elif driver in ["sqlite3"]: 97 conn = connect(kw["database"]) 98 else: 99 conn = connect(**kw) 100 101 if os.name == "java": 102 server = DBServer(conn, module, driver) 103 else: 104 server = DBServer(conn, module) 105 106 # TODO - Remove the following once BioSQL Bug 2839 is fixed. 107 # Test for RULES in PostgreSQL schema, see also Bug 2833. 108 if driver in ["psycopg2", "pgdb"]: 109 sql = "SELECT ev_class FROM pg_rewrite WHERE " + \ 110 "rulename='rule_bioentry_i1' OR " + \ 111 "rulename='rule_bioentry_i2';" 112 if server.adaptor.execute_and_fetchall(sql): 113 import warnings 114 from Bio import BiopythonWarning 115 warnings.warn("Your BioSQL PostgreSQL schema includes some " 116 "rules currently required for bioperl-db but " 117 "which may cause problems loading data using " 118 "Biopython (see BioSQL Bug 2839). If you do not " 119 "use BioPerl, please remove these rules. " 120 "Biopython should cope with the rules present, " 121 "but with a performance penalty when loading " 122 "new records.", BiopythonWarning) 123 global _POSTGRES_RULES_PRESENT 124 _POSTGRES_RULES_PRESENT = True 125 126 return server
127 128
129 -class DBServer(object):
130 """Represents a BioSQL database continaing namespaces (sub-databases). 131 132 This acts like a Python dictionary, giving access to each namespace 133 (defined by a row in the biodatabase table) as a BioSeqDatabase object. 134 """ 135
136 - def __init__(self, conn, module, module_name=None):
137 self.module = module 138 if module_name is None: 139 module_name = module.__name__ 140 if module_name == "mysql.connector" and sys.version_info[0] == 3: 141 wrap_cursor = True 142 else: 143 wrap_cursor = False 144 # Get module specific Adaptor or the base (general) Adaptor 145 Adapt = _interface_specific_adaptors.get(module_name, Adaptor) 146 self.adaptor = Adapt(conn, DBUtils.get_dbutils(module_name), 147 wrap_cursor=wrap_cursor) 148 self.module_name = module_name
149
150 - def __repr__(self):
151 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
152
153 - def __getitem__(self, name):
154 return BioSeqDatabase(self.adaptor, name)
155
156 - def __len__(self):
157 """Number of namespaces (sub-databases) in this database.""" 158 sql = "SELECT COUNT(name) FROM biodatabase;" 159 return int(self.adaptor.execute_and_fetch_col0(sql)[0])
160
161 - def __contains__(self, value):
162 """Check if a namespace (sub-database) in this database.""" 163 sql = "SELECT COUNT(name) FROM biodatabase WHERE name=%s;" 164 return bool(self.adaptor.execute_and_fetch_col0(sql, (value,))[0])
165
166 - def __iter__(self):
167 """Iterate over namespaces (sub-databases) in the database.""" 168 # TODO - Iterate over the cursor, much more efficient 169 return iter(self.adaptor.list_biodatabase_names())
170 171 if hasattr(dict, "iteritems"): 172 # Python 2, use iteritems etc
173 - def keys(self):
174 """List of namespaces (sub-databases) in the database.""" 175 return self.adaptor.list_biodatabase_names()
176
177 - def values(self):
178 """List of BioSeqDatabase objects in the database.""" 179 return [self[key] for key in self]
180
181 - def items(self):
182 """List of (namespace, BioSeqDatabase) for entries in the database.""" 183 return [(key, self[key]) for key in self]
184
185 - def iterkeys(self):
186 """Iterate over namespaces (sub-databases) in the database.""" 187 return iter(self)
188
189 - def itervalues(self):
190 """Iterate over BioSeqDatabase objects in the database.""" 191 for key in self: 192 yield self[key]
193
194 - def iteritems(self):
195 """Iterate over (namespace, BioSeqDatabase) in the database.""" 196 for key in self: 197 yield key, self[key]
198 else: 199 # Python 3, items etc are all iterators
200 - def keys(self):
201 """Iterate over namespaces (sub-databases) in the database.""" 202 return iter(self)
203
204 - def values(self):
205 """Iterate over BioSeqDatabase objects in the database.""" 206 for key in self: 207 yield self[key]
208
209 - def items(self):
210 """Iterate over (namespace, BioSeqDatabase) in the database.""" 211 for key in self: 212 yield key, self[key]
213
214 - def __delitem__(self, name):
215 """Remove a namespace and all its entries.""" 216 if name not in self: 217 raise KeyError(name) 218 db_id = self.adaptor.fetch_dbid_by_dbname(name) 219 remover = Loader.DatabaseRemover(self.adaptor, db_id) 220 remover.remove()
221
222 - def remove_database(self, db_name):
223 """Remove a namespace and all its entries (OBSOLETE). 224 225 Try to remove all references to items in a database. 226 227 server.remove_database(name) 228 229 In keeping with the dictionary interface, you can now do this: 230 231 del server[name] 232 """ 233 import warnings 234 warnings.warn("This method is deprecated. In keeping with the " 235 "dictionary interface, you can now use 'del " 236 "server[name]' instead", BiopythonDeprecationWarning) 237 self.__delitem__(db_name)
238
239 - def new_database(self, db_name, authority=None, description=None):
240 """Add a new database to the server and return it. 241 """ 242 # make the database 243 sql = r"INSERT INTO biodatabase (name, authority, description)" \ 244 r" VALUES (%s, %s, %s)" 245 self.adaptor.execute(sql, (db_name, authority, description)) 246 return BioSeqDatabase(self.adaptor, db_name)
247
248 - def load_database_sql(self, sql_file):
249 """Load a database schema into the given database. 250 251 This is used to create tables, etc when a database is first created. 252 sql_file should specify the complete path to a file containing 253 SQL entries for building the tables. 254 """ 255 # Not sophisticated enough for PG schema. Is it needed by MySQL? 256 # Looks like we need this more complicated way for both. Leaving it 257 # the default and removing the simple-minded approach. 258 259 # read the file with all comment lines removed 260 sql = "" 261 with open(sql_file, _universal_read_mode) as sql_handle: 262 for line in sql_handle: 263 if line.startswith("--"): # don't include comment lines 264 pass 265 elif line.startswith("#"): # ditto for MySQL comments 266 pass 267 elif line.strip(): # only include non-blank lines 268 sql += line.strip() + " " 269 270 # two ways to load the SQL 271 # 1. PostgreSQL can load it all at once and actually needs to 272 # due to FUNCTION defines at the end of the SQL which mess up 273 # the splitting by semicolons 274 if self.module_name in ["psycopg2", "pgdb"]: 275 self.adaptor.cursor.execute(sql) 276 # 2. MySQL needs the database loading split up into single lines of 277 # SQL executed one at a time 278 elif self.module_name in ["mysql.connector", "MySQLdb", "sqlite3"]: 279 sql_parts = sql.split(";") # one line per sql command 280 # don't use the last item, it's blank 281 for sql_line in sql_parts[:-1]: 282 self.adaptor.cursor.execute(sql_line) 283 else: 284 raise ValueError("Module %s not supported by the loader." % 285 (self.module_name))
286
287 - def commit(self):
288 """Commits the current transaction to the database.""" 289 return self.adaptor.commit()
290
291 - def rollback(self):
292 """Rolls backs the current transaction.""" 293 return self.adaptor.rollback()
294
295 - def close(self):
296 """Close the connection. No further activity possible.""" 297 return self.adaptor.close()
298 299
300 -class _CursorWrapper(object):
301 """A wraper for mysql.connector resolving bytestring representations.""" 302
303 - def __init__(self, real_cursor):
304 self.real_cursor = real_cursor
305
306 - def execute(self, operation, params=None, multi=False):
307 self.real_cursor.execute(operation, params, multi)
308
309 - def _convert_tuple(self, tuple_):
310 tuple_list = list(tuple_) 311 for i, elem in enumerate(tuple_list): 312 if type(elem) is bytes: 313 tuple_list[i] = elem.decode("utf-8") 314 return tuple(tuple_list)
315
316 - def _convert_list(self, lst):
317 ret_lst = [] 318 for tuple_ in lst: 319 new_tuple = self._convert_tuple(tuple_) 320 ret_lst.append(new_tuple) 321 return ret_lst
322
323 - def fetchall(self):
324 rv = self.real_cursor.fetchall() 325 return self._convert_list(rv)
326
327 - def fetchone(self):
328 tuple_ = self.real_cursor.fetchone() 329 return self._convert_tuple(tuple_)
330 331
332 -class Adaptor(object):
333 """High level wrapper for a database connection and cursor 334 335 Most database calls in BioSQL are done indirectly though this adaptor 336 class. This provides helper methods for fetching data and executing 337 sql. 338 """ 339
340 - def __init__(self, conn, dbutils, wrap_cursor=False):
341 self.conn = conn 342 if wrap_cursor: 343 self.cursor = _CursorWrapper(conn.cursor()) 344 else: 345 self.cursor = conn.cursor() 346 self.dbutils = dbutils
347
348 - def last_id(self, table):
349 return self.dbutils.last_id(self.cursor, table)
350
351 - def autocommit(self, y=True):
352 """Set the autocommit mode. True values enable; False value disable.""" 353 return self.dbutils.autocommit(self.conn, y)
354
355 - def commit(self):
356 """Commits the current transaction.""" 357 return self.conn.commit()
358
359 - def rollback(self):
360 """Rolls backs the current transaction.""" 361 return self.conn.rollback()
362
363 - def close(self):
364 """Close the connection. No further activity possible.""" 365 return self.conn.close()
366
367 - def fetch_dbid_by_dbname(self, dbname):
368 self.execute( 369 r"select biodatabase_id from biodatabase where name = %s", 370 (dbname,)) 371 rv = self.cursor.fetchall() 372 if not rv: 373 raise KeyError("Cannot find biodatabase with name %r" % dbname) 374 return rv[0][0]
375
376 - def fetch_seqid_by_display_id(self, dbid, name):
377 sql = r"select bioentry_id from bioentry where name = %s" 378 fields = [name] 379 if dbid: 380 sql += " and biodatabase_id = %s" 381 fields.append(dbid) 382 self.execute(sql, fields) 383 rv = self.cursor.fetchall() 384 if not rv: 385 raise IndexError("Cannot find display id %r" % name) 386 if len(rv) > 1: 387 raise IndexError("More than one entry with display id %r" % name) 388 return rv[0][0]
389
390 - def fetch_seqid_by_accession(self, dbid, name):
391 sql = r"select bioentry_id from bioentry where accession = %s" 392 fields = [name] 393 if dbid: 394 sql += " and biodatabase_id = %s" 395 fields.append(dbid) 396 self.execute(sql, fields) 397 rv = self.cursor.fetchall() 398 if not rv: 399 raise IndexError("Cannot find accession %r" % name) 400 if len(rv) > 1: 401 raise IndexError("More than one entry with accession %r" % name) 402 return rv[0][0]
403
404 - def fetch_seqids_by_accession(self, dbid, name):
405 sql = r"select bioentry_id from bioentry where accession = %s" 406 fields = [name] 407 if dbid: 408 sql += " and biodatabase_id = %s" 409 fields.append(dbid) 410 return self.execute_and_fetch_col0(sql, fields)
411
412 - def fetch_seqid_by_version(self, dbid, name):
413 acc_version = name.split(".") 414 if len(acc_version) > 2: 415 raise IndexError("Bad version %r" % name) 416 acc = acc_version[0] 417 if len(acc_version) == 2: 418 version = acc_version[1] 419 else: 420 version = "0" 421 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \ 422 r" AND version = %s" 423 fields = [acc, version] 424 if dbid: 425 sql += " and biodatabase_id = %s" 426 fields.append(dbid) 427 self.execute(sql, fields) 428 rv = self.cursor.fetchall() 429 if not rv: 430 raise IndexError("Cannot find version %r" % name) 431 if len(rv) > 1: 432 raise IndexError("More than one entry with version %r" % name) 433 return rv[0][0]
434
435 - def fetch_seqid_by_identifier(self, dbid, identifier):
436 # YB: was fetch_seqid_by_seqid 437 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s" 438 fields = [identifier] 439 if dbid: 440 sql += " and biodatabase_id = %s" 441 fields.append(dbid) 442 self.execute(sql, fields) 443 rv = self.cursor.fetchall() 444 if not rv: 445 raise IndexError("Cannot find display id %r" % identifier) 446 return rv[0][0]
447
448 - def list_biodatabase_names(self):
449 return self.execute_and_fetch_col0( 450 "SELECT name FROM biodatabase")
451
452 - def list_bioentry_ids(self, dbid):
453 return self.execute_and_fetch_col0( 454 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s", 455 (dbid,))
456
457 - def list_bioentry_display_ids(self, dbid):
458 return self.execute_and_fetch_col0( 459 "SELECT name FROM bioentry WHERE biodatabase_id = %s", 460 (dbid,))
461
462 - def list_any_ids(self, sql, args):
463 """Return ids given a SQL statement to select for them. 464 465 This assumes that the given SQL does a SELECT statement that 466 returns a list of items. This parses them out of the 2D list 467 they come as and just returns them in a list. 468 """ 469 return self.execute_and_fetch_col0(sql, args)
470
471 - def execute_one(self, sql, args=None):
472 """Execute sql that returns 1 record, and return the record""" 473 self.execute(sql, args or ()) 474 rv = self.cursor.fetchall() 475 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv) 476 return rv[0]
477
478 - def execute(self, sql, args=None):
479 """Just execute an sql command. 480 """ 481 if os.name == "java": 482 sql = sql.replace("%s", "?") 483 self.dbutils.execute(self.cursor, sql, args)
484
485 - def get_subseq_as_string(self, seqid, start, end):
486 length = end - start 487 # XXX Check this on MySQL and PostgreSQL. substr should be general, 488 # does it need dbutils? 489 # return self.execute_one( 490 # """select SUBSTRING(seq FROM %s FOR %s) 491 # from biosequence where bioentry_id = %s""", 492 # (start+1, length, seqid))[0] 493 # 494 # Convert to a string on returning for databases that give back 495 # unicode. Shouldn't need unicode for sequences so this seems safe. 496 return str(self.execute_one( 497 """select SUBSTR(seq, %s, %s) 498 from biosequence where bioentry_id = %s""", 499 (start + 1, length, seqid))[0])
500
501 - def execute_and_fetch_col0(self, sql, args=None):
502 self.execute(sql, args or ()) 503 return [field[0] for field in self.cursor.fetchall()]
504
505 - def execute_and_fetchall(self, sql, args=None):
506 self.execute(sql, args or ()) 507 return self.cursor.fetchall()
508 509
510 -class MysqlConnectorAdaptor(Adaptor):
511 """A BioSQL Adaptor class with fixes for the MySQL interface 512 513 BioSQL was failing due to returns of bytearray objects from 514 the mysql-connector-python database connector. This adaptor 515 class scrubs returns of bytearrays and of byte strings converting 516 them to string objects instead. This adaptor class was made in 517 response to backwards incompatible changes added to 518 mysql-connector-python in release 2.0.0 of the package. 519 """
520 - def execute_one(self, sql, args=None):
521 out = super(MysqlConnectorAdaptor, self).execute_one(sql, args) 522 return tuple(bytearray_to_str(v) for v in out)
523
524 - def execute_and_fetch_col0(self, sql, args=None):
525 out = super(MysqlConnectorAdaptor, self).execute_and_fetch_col0(sql, args) 526 return [bytearray_to_str(column) for column in out]
527
528 - def execute_and_fetchall(self, sql, args=None):
529 out = super(MysqlConnectorAdaptor, self).execute_and_fetchall(sql, args) 530 return [tuple(bytearray_to_str(v) for v in o) for o in out]
531 532 533 _interface_specific_adaptors = { 534 # If SQL interfaces require a specific adaptor, use this to map the adaptor 535 "mysql.connector": MysqlConnectorAdaptor 536 } 537 538 _allowed_lookups = { 539 # Lookup name / function name to get id, function to list all ids 540 'primary_id': "fetch_seqid_by_identifier", 541 'gi': "fetch_seqid_by_identifier", 542 'display_id': "fetch_seqid_by_display_id", 543 'name': "fetch_seqid_by_display_id", 544 'accession': "fetch_seqid_by_accession", 545 'version': "fetch_seqid_by_version", 546 } 547 548
549 -class BioSeqDatabase(object):
550 """Represents a namespace (sub-database) within the BioSQL database. 551 552 i.e. One row in the biodatabase table, and all all rows in the bioentry 553 table associated with it. 554 """ 555
556 - def __init__(self, adaptor, name):
557 self.adaptor = adaptor 558 self.name = name 559 self.dbid = self.adaptor.fetch_dbid_by_dbname(name)
560
561 - def __repr__(self):
562 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
563
564 - def get_Seq_by_id(self, name):
565 """Gets a DBSeqRecord object by its name 566 567 Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN') 568 569 The name of this method is misleading since it returns a DBSeqRecord 570 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 571 """ 572 seqid = self.adaptor.fetch_seqid_by_display_id(self.dbid, name) 573 return BioSeq.DBSeqRecord(self.adaptor, seqid)
574
575 - def get_Seq_by_acc(self, name):
576 """Gets a DBSeqRecord object by accession number 577 578 Example: seq_rec = db.get_Seq_by_acc('X77802') 579 580 The name of this method is misleading since it returns a DBSeqRecord 581 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 582 """ 583 seqid = self.adaptor.fetch_seqid_by_accession(self.dbid, name) 584 return BioSeq.DBSeqRecord(self.adaptor, seqid)
585
586 - def get_Seq_by_ver(self, name):
587 """Gets a DBSeqRecord object by version number 588 589 Example: seq_rec = db.get_Seq_by_ver('X77802.1') 590 591 The name of this method is misleading since it returns a DBSeqRecord 592 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 593 """ 594 seqid = self.adaptor.fetch_seqid_by_version(self.dbid, name) 595 return BioSeq.DBSeqRecord(self.adaptor, seqid)
596
597 - def get_Seqs_by_acc(self, name):
598 """Gets a list of DBSeqRecord objects by accession number 599 600 Example: seq_recs = db.get_Seq_by_acc('X77802') 601 602 The name of this method is misleading since it returns a list of 603 DBSeqRecord objects rather than a list of DBSeq ojbects, and presumably 604 was to mirror BioPerl. 605 """ 606 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name) 607 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
608
609 - def get_all_primary_ids(self):
610 """All the primary_ids of the sequences in the database (OBSOLETE). 611 612 These maybe ids (display style) or accession numbers or 613 something else completely different - they *are not* 614 meaningful outside of this database implementation. 615 616 Please use .keys() instead of .get_all_primary_ids() 617 """ 618 import warnings 619 warnings.warn("Use bio_seq_database.keys() instead of " 620 "bio_seq_database.get_all_primary_ids()", 621 BiopythonDeprecationWarning) 622 return list(self.keys())
623
624 - def __getitem__(self, key):
625 return BioSeq.DBSeqRecord(self.adaptor, key)
626
627 - def __delitem__(self, key):
628 """Remove an entry and all its annotation.""" 629 if key not in self: 630 raise KeyError(key) 631 # Assuming this will automatically cascade to the other tables... 632 sql = "DELETE FROM bioentry " + \ 633 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 634 self.adaptor.execute(sql, (self.dbid, key))
635
636 - def __len__(self):
637 """Number of records in this namespace (sub database).""" 638 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 639 "WHERE biodatabase_id=%s;" 640 return int(self.adaptor.execute_and_fetch_col0(sql, (self.dbid, ))[0])
641
642 - def __contains__(self, value):
643 """Check if a primary (internal) id is this namespace (sub database).""" 644 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 645 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 646 # The bioentry_id field is an integer in the schema. 647 # PostgreSQL will throw an error if we use a non integer in the query. 648 try: 649 bioentry_id = int(value) 650 except ValueError: 651 return False 652 return bool(self.adaptor.execute_and_fetch_col0(sql, 653 (self.dbid, bioentry_id))[0])
654
655 - def __iter__(self):
656 """Iterate over ids (which may not be meaningful outside this database).""" 657 # TODO - Iterate over the cursor, much more efficient 658 return iter(self.adaptor.list_bioentry_ids(self.dbid))
659 660 if hasattr(dict, "iteritems"): 661 # Python 2, use iteritems etc
662 - def keys(self):
663 """List of ids which may not be meaningful outside this database.""" 664 return self.adaptor.list_bioentry_ids(self.dbid)
665
666 - def values(self):
667 """List of DBSeqRecord objects in the namespace (sub database).""" 668 return [self[key] for key in self]
669
670 - def items(self):
671 """List of (id, DBSeqRecord) for the namespace (sub database).""" 672 return [(key, self[key]) for key in self]
673
674 - def iterkeys(self):
675 """Iterate over ids (which may not be meaningful outside this database).""" 676 return iter(self)
677
678 - def itervalues(self):
679 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 680 for key in self: 681 yield self[key]
682
683 - def iteritems(self):
684 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 685 for key in self: 686 yield key, self[key]
687 else: 688 # Python 3, items etc are all iterators
689 - def keys(self):
690 """Iterate over ids (which may not be meaningful outside this database).""" 691 return iter(self)
692
693 - def values(self):
694 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 695 for key in self: 696 yield self[key]
697
698 - def items(self):
699 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 700 for key in self: 701 yield key, self[key]
702
703 - def lookup(self, **kwargs):
704 if len(kwargs) != 1: 705 raise TypeError("single key/value parameter expected") 706 k, v = list(kwargs.items())[0] 707 if k not in _allowed_lookups: 708 raise TypeError("lookup() expects one of %r, not %r" % 709 (list(_allowed_lookups.keys()), k)) 710 lookup_name = _allowed_lookups[k] 711 lookup_func = getattr(self.adaptor, lookup_name) 712 seqid = lookup_func(self.dbid, v) 713 return BioSeq.DBSeqRecord(self.adaptor, seqid)
714
715 - def get_Seq_by_primary_id(self, seqid):
716 """Get a DBSeqRecord by the primary (internal) id (OBSOLETE). 717 718 Rather than db.get_Seq_by_primary_id(my_id) use db[my_id] 719 720 The name of this method is misleading since it returns a DBSeqRecord 721 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 722 """ 723 import warnings 724 warnings.warn("Use bio_seq_database[my_id] instead of " 725 "bio_seq_database.get_Seq_by_primary_id(my_id)", 726 BiopythonDeprecationWarning) 727 return self[seqid]
728
729 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
730 """Load a set of SeqRecords into the BioSQL database. 731 732 record_iterator is either a list of SeqRecord objects, or an 733 Iterator object that returns SeqRecord objects (such as the 734 output from the Bio.SeqIO.parse() function), which will be 735 used to populate the database. 736 737 fetch_NCBI_taxonomy is boolean flag allowing or preventing 738 connection to the taxonomic database on the NCBI server 739 (via Bio.Entrez) to fetch a detailed taxonomy for each 740 SeqRecord. 741 742 Example: 743 from Bio import SeqIO 744 count = db.load(SeqIO.parse(open(filename), format)) 745 746 Returns the number of records loaded. 747 """ 748 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, 749 fetch_NCBI_taxonomy) 750 num_records = 0 751 global _POSTGRES_RULES_PRESENT 752 for cur_record in record_iterator: 753 num_records += 1 754 # Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and 755 # the RULES are present check for a duplicate record before loading 756 if _POSTGRES_RULES_PRESENT: 757 # Recreate what the Loader's _load_bioentry_table will do: 758 if cur_record.id.count(".") == 1: 759 accession, version = cur_record.id.split('.') 760 try: 761 version = int(version) 762 except ValueError: 763 accession = cur_record.id 764 version = 0 765 else: 766 accession = cur_record.id 767 version = 0 768 gi = cur_record.annotations.get("gi", None) 769 sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \ 770 "= '%s' AND biodatabase_id = '%s') OR (accession = " + \ 771 "'%s' AND version = '%s' AND biodatabase_id = '%s')" 772 self.adaptor.execute( 773 sql % (gi, self.dbid, accession, version, self.dbid)) 774 if self.adaptor.cursor.fetchone(): 775 raise self.adaptor.conn.IntegrityError("Duplicate record " 776 "detected: record has not been inserted") 777 # End of hack 778 db_loader.load_seqrecord(cur_record) 779 return num_records
780