Package BioSQL :: Module BioSeqDatabase
[hide private]
[frames] | no frames]

Source Code for Module BioSQL.BioSeqDatabase

  1  # Copyright 2002 by Andrew Dalke.  All rights reserved. 
  2  # Revisions 2007-2014 copyright by Peter Cock.  All rights reserved. 
  3  # Revisions 2009 copyright by Cymon J. Cox.  All rights reserved. 
  4  # Revisions 2013-2014 copyright by Tiago Antao.  All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license.  Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8  # 
  9  # Note that BioSQL (including the database schema and scripts) is 
 10  # available and licensed separately.  Please consult www.biosql.org 
 11  """Connect with a BioSQL database and load Biopython like objects from it. 
 12   
 13  This provides interfaces for loading biological objects from a relational 
 14  database, and is compatible with the BioSQL standards. 
 15  """ 
 16  import os 
 17  import sys 
 18   
 19  from Bio._py3k import _universal_read_mode 
 20  from Bio._py3k import _bytes_bytearray_to_str as bytearray_to_str 
 21  from Bio import BiopythonDeprecationWarning 
 22   
 23  from . import BioSeq 
 24  from . import Loader 
 25  from . import DBUtils 
 26   
 27  _POSTGRES_RULES_PRESENT = False  # Hack for BioSQL Bug 2839 
 28   
 29   
30 -def open_database(driver="MySQLdb", **kwargs):
31 """Main interface for loading a existing BioSQL-style database. 32 33 This function is the easiest way to retrieve a connection to a 34 database, doing something like: 35 36 >>> from BioSeq import BioSeqDatabase 37 >>> server = BioSeqDatabase.open_database(user="root", db="minidb") 38 39 Arguments: 40 - driver - The name of the database driver to use for connecting. The 41 driver should implement the python DB API. By default, the MySQLdb 42 driver is used. 43 - user -the username to connect to the database with. 44 - password, passwd - the password to connect with 45 - host - the hostname of the database 46 - database or db - the name of the database 47 """ 48 if driver == "psycopg": 49 raise ValueError("Using BioSQL with psycopg (version one) is no " 50 "longer supported. Use psycopg2 instead.") 51 52 if os.name == "java": 53 from com.ziclix.python.sql import zxJDBC 54 module = zxJDBC 55 if driver in ["MySQLdb"]: 56 jdbc_driver = "com.mysql.jdbc.Driver" 57 url_pref = "jdbc:mysql://" + kwargs["host"] + "/" 58 elif driver in ["psycopg2"]: 59 jdbc_driver = "org.postgresql.Driver" 60 url_pref = "jdbc:postgresql://" + kwargs["host"] + "/" 61 62 else: 63 module = __import__(driver, fromlist=["connect"]) 64 connect = module.connect 65 66 # Different drivers use different keywords... 67 kw = kwargs.copy() 68 if driver in ["MySQLdb", "mysql.connector"] and os.name != "java": 69 if "database" in kw: 70 kw["db"] = kw["database"] 71 del kw["database"] 72 if "password" in kw: 73 kw["passwd"] = kw["password"] 74 del kw["password"] 75 # kw["charset"] = "utf8" 76 # kw["use_unicode"] = True 77 else: 78 # DB-API recommendations 79 if "db" in kw: 80 kw["database"] = kw["db"] 81 del kw["db"] 82 if "passwd" in kw: 83 kw["password"] = kw["passwd"] 84 del kw["passwd"] 85 if driver in ["psycopg2", "pgdb"] and not kw.get("database"): 86 kw["database"] = "template1" 87 # SQLite connect takes the database name as input 88 if os.name == "java": 89 if driver in ["MySQLdb"]: 90 conn = connect(url_pref + kw.get("database", "mysql"), 91 kw["user"], kw["password"], jdbc_driver) 92 elif driver in ["psycopg2"]: 93 conn = connect(url_pref + kw.get("database", "postgresql") + 94 "?stringtype=unspecified", 95 kw["user"], kw["password"], jdbc_driver) 96 elif driver in ["sqlite3"]: 97 conn = connect(kw["database"]) 98 else: 99 conn = connect(**kw) 100 101 if os.name == "java": 102 server = DBServer(conn, module, driver) 103 else: 104 server = DBServer(conn, module) 105 106 # TODO - Remove the following once BioSQL Bug 2839 is fixed. 107 # Test for RULES in PostgreSQL schema, see also Bug 2833. 108 if driver in ["psycopg2", "pgdb"]: 109 sql = "SELECT ev_class FROM pg_rewrite WHERE " + \ 110 "rulename='rule_bioentry_i1' OR " + \ 111 "rulename='rule_bioentry_i2';" 112 if server.adaptor.execute_and_fetchall(sql): 113 import warnings 114 from Bio import BiopythonWarning 115 warnings.warn("Your BioSQL PostgreSQL schema includes some " 116 "rules currently required for bioperl-db but " 117 "which may cause problems loading data using " 118 "Biopython (see BioSQL Bug 2839). If you do not " 119 "use BioPerl, please remove these rules. " 120 "Biopython should cope with the rules present, " 121 "but with a performance penalty when loading " 122 "new records.", BiopythonWarning) 123 global _POSTGRES_RULES_PRESENT 124 _POSTGRES_RULES_PRESENT = True 125 126 return server
127 128
129 -class DBServer:
130 131 """Represents a BioSQL database continaing namespaces (sub-databases). 132 133 This acts like a Python dictionary, giving access to each namespace 134 (defined by a row in the biodatabase table) as a BioSeqDatabase object. 135 """ 136
137 - def __init__(self, conn, module, module_name=None):
138 self.module = module 139 if module_name is None: 140 module_name = module.__name__ 141 if module_name == "mysql.connector" and sys.version_info[0] == 3: 142 wrap_cursor = True 143 else: 144 wrap_cursor = False 145 # Get module specific Adaptor or the base (general) Adaptor 146 Adapt = _interface_specific_adaptors.get(module_name, Adaptor) 147 self.adaptor = Adapt(conn, DBUtils.get_dbutils(module_name), 148 wrap_cursor=wrap_cursor) 149 self.module_name = module_name
150
151 - def __repr__(self):
152 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
153
154 - def __getitem__(self, name):
155 return BioSeqDatabase(self.adaptor, name)
156
157 - def __len__(self):
158 """Number of namespaces (sub-databases) in this database.""" 159 sql = "SELECT COUNT(name) FROM biodatabase;" 160 return int(self.adaptor.execute_and_fetch_col0(sql)[0])
161
162 - def __contains__(self, value):
163 """Check if a namespace (sub-database) in this database.""" 164 sql = "SELECT COUNT(name) FROM biodatabase WHERE name=%s;" 165 return bool(self.adaptor.execute_and_fetch_col0(sql, (value,))[0])
166
167 - def __iter__(self):
168 """Iterate over namespaces (sub-databases) in the database.""" 169 # TODO - Iterate over the cursor, much more efficient 170 return iter(self.adaptor.list_biodatabase_names())
171 172 if hasattr(dict, "iteritems"): 173 # Python 2, use iteritems etc
174 - def keys(self):
175 """List of namespaces (sub-databases) in the database.""" 176 return self.adaptor.list_biodatabase_names()
177
178 - def values(self):
179 """List of BioSeqDatabase objects in the database.""" 180 return [self[key] for key in self]
181
182 - def items(self):
183 """List of (namespace, BioSeqDatabase) for entries in the database.""" 184 return [(key, self[key]) for key in self]
185
186 - def iterkeys(self):
187 """Iterate over namespaces (sub-databases) in the database.""" 188 return iter(self)
189
190 - def itervalues(self):
191 """Iterate over BioSeqDatabase objects in the database.""" 192 for key in self: 193 yield self[key]
194
195 - def iteritems(self):
196 """Iterate over (namespace, BioSeqDatabase) in the database.""" 197 for key in self: 198 yield key, self[key]
199 else: 200 # Python 3, items etc are all iterators
201 - def keys(self):
202 """Iterate over namespaces (sub-databases) in the database.""" 203 return iter(self)
204
205 - def values(self):
206 """Iterate over BioSeqDatabase objects in the database.""" 207 for key in self: 208 yield self[key]
209
210 - def items(self):
211 """Iterate over (namespace, BioSeqDatabase) in the database.""" 212 for key in self: 213 yield key, self[key]
214
215 - def __delitem__(self, name):
216 """Remove a namespace and all its entries.""" 217 if name not in self: 218 raise KeyError(name) 219 db_id = self.adaptor.fetch_dbid_by_dbname(name) 220 remover = Loader.DatabaseRemover(self.adaptor, db_id) 221 remover.remove()
222
223 - def remove_database(self, db_name):
224 """Remove a namespace and all its entries (OBSOLETE). 225 226 Try to remove all references to items in a database. 227 228 server.remove_database(name) 229 230 In keeping with the dictionary interface, you can now do this: 231 232 del server[name] 233 """ 234 import warnings 235 warnings.warn("This method is deprecated. In keeping with the " 236 "dictionary interface, you can now use 'del " 237 "server[name]' instead", BiopythonDeprecationWarning) 238 self.__delitem__(db_name)
239
240 - def new_database(self, db_name, authority=None, description=None):
241 """Add a new database to the server and return it. 242 """ 243 # make the database 244 sql = r"INSERT INTO biodatabase (name, authority, description)" \ 245 r" VALUES (%s, %s, %s)" 246 self.adaptor.execute(sql, (db_name, authority, description)) 247 return BioSeqDatabase(self.adaptor, db_name)
248
249 - def load_database_sql(self, sql_file):
250 """Load a database schema into the given database. 251 252 This is used to create tables, etc when a database is first created. 253 sql_file should specify the complete path to a file containing 254 SQL entries for building the tables. 255 """ 256 # Not sophisticated enough for PG schema. Is it needed by MySQL? 257 # Looks like we need this more complicated way for both. Leaving it 258 # the default and removing the simple-minded approach. 259 260 # read the file with all comment lines removed 261 sql = "" 262 with open(sql_file, _universal_read_mode) as sql_handle: 263 for line in sql_handle: 264 if line.startswith("--"): # don't include comment lines 265 pass 266 elif line.startswith("#"): # ditto for MySQL comments 267 pass 268 elif line.strip(): # only include non-blank lines 269 sql += line.strip() + " " 270 271 # two ways to load the SQL 272 # 1. PostgreSQL can load it all at once and actually needs to 273 # due to FUNCTION defines at the end of the SQL which mess up 274 # the splitting by semicolons 275 if self.module_name in ["psycopg2", "pgdb"]: 276 self.adaptor.cursor.execute(sql) 277 # 2. MySQL needs the database loading split up into single lines of 278 # SQL executed one at a time 279 elif self.module_name in ["mysql.connector", "MySQLdb", "sqlite3"]: 280 sql_parts = sql.split(";") # one line per sql command 281 # don't use the last item, it's blank 282 for sql_line in sql_parts[:-1]: 283 self.adaptor.cursor.execute(sql_line) 284 else: 285 raise ValueError("Module %s not supported by the loader." % 286 (self.module_name))
287
288 - def commit(self):
289 """Commits the current transaction to the database.""" 290 return self.adaptor.commit()
291
292 - def rollback(self):
293 """Rolls backs the current transaction.""" 294 return self.adaptor.rollback()
295
296 - def close(self):
297 """Close the connection. No further activity possible.""" 298 return self.adaptor.close()
299 300
301 -class _CursorWrapper:
302 303 """A wraper for mysql.connector resolving bytestring representations.""" 304
305 - def __init__(self, real_cursor):
306 self.real_cursor = real_cursor
307
308 - def execute(self, operation, params=None, multi=False):
309 self.real_cursor.execute(operation, params, multi)
310
311 - def _convert_tuple(self, tuple_):
312 tuple_list = list(tuple_) 313 for i, elem in enumerate(tuple_list): 314 if type(elem) is bytes: 315 tuple_list[i] = elem.decode("utf-8") 316 return tuple(tuple_list)
317
318 - def _convert_list(self, lst):
319 ret_lst = [] 320 for tuple_ in lst: 321 new_tuple = self._convert_tuple(tuple_) 322 ret_lst.append(new_tuple) 323 return ret_lst
324
325 - def fetchall(self):
326 rv = self.real_cursor.fetchall() 327 return self._convert_list(rv)
328
329 - def fetchone(self):
330 tuple_ = self.real_cursor.fetchone() 331 return self._convert_tuple(tuple_)
332 333
334 -class Adaptor(object):
335 """High level wrapper for a database connection and cursor 336 337 Most database calls in BioSQL are done indirectly though this adaptor 338 class. This provides helper methods for fetching data and executing 339 sql. 340 """ 341
342 - def __init__(self, conn, dbutils, wrap_cursor=False):
343 self.conn = conn 344 if wrap_cursor: 345 self.cursor = _CursorWrapper(conn.cursor()) 346 else: 347 self.cursor = conn.cursor() 348 self.dbutils = dbutils
349
350 - def last_id(self, table):
351 return self.dbutils.last_id(self.cursor, table)
352
353 - def autocommit(self, y=True):
354 """Set the autocommit mode. True values enable; False value disable.""" 355 return self.dbutils.autocommit(self.conn, y)
356
357 - def commit(self):
358 """Commits the current transaction.""" 359 return self.conn.commit()
360
361 - def rollback(self):
362 """Rolls backs the current transaction.""" 363 return self.conn.rollback()
364
365 - def close(self):
366 """Close the connection. No further activity possible.""" 367 return self.conn.close()
368
369 - def fetch_dbid_by_dbname(self, dbname):
370 self.execute( 371 r"select biodatabase_id from biodatabase where name = %s", 372 (dbname,)) 373 rv = self.cursor.fetchall() 374 if not rv: 375 raise KeyError("Cannot find biodatabase with name %r" % dbname) 376 return rv[0][0]
377
378 - def fetch_seqid_by_display_id(self, dbid, name):
379 sql = r"select bioentry_id from bioentry where name = %s" 380 fields = [name] 381 if dbid: 382 sql += " and biodatabase_id = %s" 383 fields.append(dbid) 384 self.execute(sql, fields) 385 rv = self.cursor.fetchall() 386 if not rv: 387 raise IndexError("Cannot find display id %r" % name) 388 if len(rv) > 1: 389 raise IndexError("More than one entry with display id %r" % name) 390 return rv[0][0]
391
392 - def fetch_seqid_by_accession(self, dbid, name):
393 sql = r"select bioentry_id from bioentry where accession = %s" 394 fields = [name] 395 if dbid: 396 sql += " and biodatabase_id = %s" 397 fields.append(dbid) 398 self.execute(sql, fields) 399 rv = self.cursor.fetchall() 400 if not rv: 401 raise IndexError("Cannot find accession %r" % name) 402 if len(rv) > 1: 403 raise IndexError("More than one entry with accession %r" % name) 404 return rv[0][0]
405
406 - def fetch_seqids_by_accession(self, dbid, name):
407 sql = r"select bioentry_id from bioentry where accession = %s" 408 fields = [name] 409 if dbid: 410 sql += " and biodatabase_id = %s" 411 fields.append(dbid) 412 return self.execute_and_fetch_col0(sql, fields)
413
414 - def fetch_seqid_by_version(self, dbid, name):
415 acc_version = name.split(".") 416 if len(acc_version) > 2: 417 raise IndexError("Bad version %r" % name) 418 acc = acc_version[0] 419 if len(acc_version) == 2: 420 version = acc_version[1] 421 else: 422 version = "0" 423 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \ 424 r" AND version = %s" 425 fields = [acc, version] 426 if dbid: 427 sql += " and biodatabase_id = %s" 428 fields.append(dbid) 429 self.execute(sql, fields) 430 rv = self.cursor.fetchall() 431 if not rv: 432 raise IndexError("Cannot find version %r" % name) 433 if len(rv) > 1: 434 raise IndexError("More than one entry with version %r" % name) 435 return rv[0][0]
436
437 - def fetch_seqid_by_identifier(self, dbid, identifier):
438 # YB: was fetch_seqid_by_seqid 439 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s" 440 fields = [identifier] 441 if dbid: 442 sql += " and biodatabase_id = %s" 443 fields.append(dbid) 444 self.execute(sql, fields) 445 rv = self.cursor.fetchall() 446 if not rv: 447 raise IndexError("Cannot find display id %r" % identifier) 448 return rv[0][0]
449
450 - def list_biodatabase_names(self):
451 return self.execute_and_fetch_col0( 452 "SELECT name FROM biodatabase")
453
454 - def list_bioentry_ids(self, dbid):
455 return self.execute_and_fetch_col0( 456 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s", 457 (dbid,))
458
459 - def list_bioentry_display_ids(self, dbid):
460 return self.execute_and_fetch_col0( 461 "SELECT name FROM bioentry WHERE biodatabase_id = %s", 462 (dbid,))
463
464 - def list_any_ids(self, sql, args):
465 """Return ids given a SQL statement to select for them. 466 467 This assumes that the given SQL does a SELECT statement that 468 returns a list of items. This parses them out of the 2D list 469 they come as and just returns them in a list. 470 """ 471 return self.execute_and_fetch_col0(sql, args)
472
473 - def execute_one(self, sql, args=None):
474 """Execute sql that returns 1 record, and return the record""" 475 self.execute(sql, args or ()) 476 rv = self.cursor.fetchall() 477 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv) 478 return rv[0]
479
480 - def execute(self, sql, args=None):
481 """Just execute an sql command. 482 """ 483 if os.name == "java": 484 sql = sql.replace("%s", "?") 485 self.dbutils.execute(self.cursor, sql, args)
486
487 - def get_subseq_as_string(self, seqid, start, end):
488 length = end - start 489 # XXX Check this on MySQL and PostgreSQL. substr should be general, 490 # does it need dbutils? 491 # return self.execute_one( 492 # """select SUBSTRING(seq FROM %s FOR %s) 493 # from biosequence where bioentry_id = %s""", 494 # (start+1, length, seqid))[0] 495 # 496 # Convert to a string on returning for databases that give back 497 # unicode. Shouldn't need unicode for sequences so this seems safe. 498 return str(self.execute_one( 499 """select SUBSTR(seq, %s, %s) 500 from biosequence where bioentry_id = %s""", 501 (start + 1, length, seqid))[0])
502
503 - def execute_and_fetch_col0(self, sql, args=None):
504 self.execute(sql, args or ()) 505 return [field[0] for field in self.cursor.fetchall()]
506
507 - def execute_and_fetchall(self, sql, args=None):
508 self.execute(sql, args or ()) 509 return self.cursor.fetchall()
510 511
512 -class MysqlConnectorAdaptor(Adaptor):
513 """A BioSQL Adaptor class with fixes for the MySQL interface 514 515 BioSQL was failing due to returns of bytearray objects from 516 the mysql-connector-python database connector. This adaptor 517 class scrubs returns of bytearrays and of byte strings converting 518 them to string objects instead. This adaptor class was made in 519 response to backwards incompatible changes added to 520 mysql-connector-python in release 2.0.0 of the package. 521 """
522 - def execute_one(self, sql, args=None):
523 out = super(MysqlConnectorAdaptor, self).execute_one(sql, args) 524 return tuple(bytearray_to_str(v) for v in out)
525
526 - def execute_and_fetch_col0(self, sql, args=None):
527 out = super(MysqlConnectorAdaptor, self).execute_and_fetch_col0(sql, args) 528 return [bytearray_to_str(column) for column in out]
529
530 - def execute_and_fetchall(self, sql, args=None):
531 out = super(MysqlConnectorAdaptor, self).execute_and_fetchall(sql, args) 532 return [tuple(bytearray_to_str(v) for v in o) for o in out]
533 534 535 _interface_specific_adaptors = { 536 # If SQL interfaces require a specific adaptor, use this to map the adaptor 537 "mysql.connector": MysqlConnectorAdaptor 538 } 539 540 _allowed_lookups = { 541 # Lookup name / function name to get id, function to list all ids 542 'primary_id': "fetch_seqid_by_identifier", 543 'gi': "fetch_seqid_by_identifier", 544 'display_id': "fetch_seqid_by_display_id", 545 'name': "fetch_seqid_by_display_id", 546 'accession': "fetch_seqid_by_accession", 547 'version': "fetch_seqid_by_version", 548 } 549 550
551 -class BioSeqDatabase:
552 """Represents a namespace (sub-database) within the BioSQL database. 553 554 i.e. One row in the biodatabase table, and all all rows in the bioentry 555 table associated with it. 556 """ 557
558 - def __init__(self, adaptor, name):
559 self.adaptor = adaptor 560 self.name = name 561 self.dbid = self.adaptor.fetch_dbid_by_dbname(name)
562
563 - def __repr__(self):
564 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
565
566 - def get_Seq_by_id(self, name):
567 """Gets a DBSeqRecord object by its name 568 569 Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN') 570 571 The name of this method is misleading since it returns a DBSeqRecord 572 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 573 """ 574 seqid = self.adaptor.fetch_seqid_by_display_id(self.dbid, name) 575 return BioSeq.DBSeqRecord(self.adaptor, seqid)
576
577 - def get_Seq_by_acc(self, name):
578 """Gets a DBSeqRecord object by accession number 579 580 Example: seq_rec = db.get_Seq_by_acc('X77802') 581 582 The name of this method is misleading since it returns a DBSeqRecord 583 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 584 """ 585 seqid = self.adaptor.fetch_seqid_by_accession(self.dbid, name) 586 return BioSeq.DBSeqRecord(self.adaptor, seqid)
587
588 - def get_Seq_by_ver(self, name):
589 """Gets a DBSeqRecord object by version number 590 591 Example: seq_rec = db.get_Seq_by_ver('X77802.1') 592 593 The name of this method is misleading since it returns a DBSeqRecord 594 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 595 """ 596 seqid = self.adaptor.fetch_seqid_by_version(self.dbid, name) 597 return BioSeq.DBSeqRecord(self.adaptor, seqid)
598
599 - def get_Seqs_by_acc(self, name):
600 """Gets a list of DBSeqRecord objects by accession number 601 602 Example: seq_recs = db.get_Seq_by_acc('X77802') 603 604 The name of this method is misleading since it returns a list of 605 DBSeqRecord objects rather than a list of DBSeq ojbects, and presumably 606 was to mirror BioPerl. 607 """ 608 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name) 609 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
610
611 - def get_all_primary_ids(self):
612 """All the primary_ids of the sequences in the database (OBSOLETE). 613 614 These maybe ids (display style) or accession numbers or 615 something else completely different - they *are not* 616 meaningful outside of this database implementation. 617 618 Please use .keys() instead of .get_all_primary_ids() 619 """ 620 import warnings 621 warnings.warn("Use bio_seq_database.keys() instead of " 622 "bio_seq_database.get_all_primary_ids()", 623 BiopythonDeprecationWarning) 624 return list(self.keys())
625
626 - def __getitem__(self, key):
627 return BioSeq.DBSeqRecord(self.adaptor, key)
628
629 - def __delitem__(self, key):
630 """Remove an entry and all its annotation.""" 631 if key not in self: 632 raise KeyError(key) 633 # Assuming this will automatically cascade to the other tables... 634 sql = "DELETE FROM bioentry " + \ 635 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 636 self.adaptor.execute(sql, (self.dbid, key))
637
638 - def __len__(self):
639 """Number of records in this namespace (sub database).""" 640 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 641 "WHERE biodatabase_id=%s;" 642 return int(self.adaptor.execute_and_fetch_col0(sql, (self.dbid, ))[0])
643
644 - def __contains__(self, value):
645 """Check if a primary (internal) id is this namespace (sub database).""" 646 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 647 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 648 # The bioentry_id field is an integer in the schema. 649 # PostgreSQL will throw an error if we use a non integer in the query. 650 try: 651 bioentry_id = int(value) 652 except ValueError: 653 return False 654 return bool(self.adaptor.execute_and_fetch_col0(sql, 655 (self.dbid, bioentry_id))[0])
656
657 - def __iter__(self):
658 """Iterate over ids (which may not be meaningful outside this database).""" 659 # TODO - Iterate over the cursor, much more efficient 660 return iter(self.adaptor.list_bioentry_ids(self.dbid))
661 662 if hasattr(dict, "iteritems"): 663 # Python 2, use iteritems etc
664 - def keys(self):
665 """List of ids which may not be meaningful outside this database.""" 666 return self.adaptor.list_bioentry_ids(self.dbid)
667
668 - def values(self):
669 """List of DBSeqRecord objects in the namespace (sub database).""" 670 return [self[key] for key in self]
671
672 - def items(self):
673 """List of (id, DBSeqRecord) for the namespace (sub database).""" 674 return [(key, self[key]) for key in self]
675
676 - def iterkeys(self):
677 """Iterate over ids (which may not be meaningful outside this database).""" 678 return iter(self)
679
680 - def itervalues(self):
681 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 682 for key in self: 683 yield self[key]
684
685 - def iteritems(self):
686 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 687 for key in self: 688 yield key, self[key]
689 else: 690 # Python 3, items etc are all iterators
691 - def keys(self):
692 """Iterate over ids (which may not be meaningful outside this database).""" 693 return iter(self)
694
695 - def values(self):
696 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 697 for key in self: 698 yield self[key]
699
700 - def items(self):
701 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 702 for key in self: 703 yield key, self[key]
704
705 - def lookup(self, **kwargs):
706 if len(kwargs) != 1: 707 raise TypeError("single key/value parameter expected") 708 k, v = list(kwargs.items())[0] 709 if k not in _allowed_lookups: 710 raise TypeError("lookup() expects one of %r, not %r" % 711 (list(_allowed_lookups.keys()), k)) 712 lookup_name = _allowed_lookups[k] 713 lookup_func = getattr(self.adaptor, lookup_name) 714 seqid = lookup_func(self.dbid, v) 715 return BioSeq.DBSeqRecord(self.adaptor, seqid)
716
717 - def get_Seq_by_primary_id(self, seqid):
718 """Get a DBSeqRecord by the primary (internal) id (OBSOLETE). 719 720 Rather than db.get_Seq_by_primary_id(my_id) use db[my_id] 721 722 The name of this method is misleading since it returns a DBSeqRecord 723 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 724 """ 725 import warnings 726 warnings.warn("Use bio_seq_database[my_id] instead of " 727 "bio_seq_database.get_Seq_by_primary_id(my_id)", 728 BiopythonDeprecationWarning) 729 return self[seqid]
730
731 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
732 """Load a set of SeqRecords into the BioSQL database. 733 734 record_iterator is either a list of SeqRecord objects, or an 735 Iterator object that returns SeqRecord objects (such as the 736 output from the Bio.SeqIO.parse() function), which will be 737 used to populate the database. 738 739 fetch_NCBI_taxonomy is boolean flag allowing or preventing 740 connection to the taxonomic database on the NCBI server 741 (via Bio.Entrez) to fetch a detailed taxonomy for each 742 SeqRecord. 743 744 Example: 745 from Bio import SeqIO 746 count = db.load(SeqIO.parse(open(filename), format)) 747 748 Returns the number of records loaded. 749 """ 750 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, 751 fetch_NCBI_taxonomy) 752 num_records = 0 753 global _POSTGRES_RULES_PRESENT 754 for cur_record in record_iterator: 755 num_records += 1 756 # Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and 757 # the RULES are present check for a duplicate record before loading 758 if _POSTGRES_RULES_PRESENT: 759 # Recreate what the Loader's _load_bioentry_table will do: 760 if cur_record.id.count(".") == 1: 761 accession, version = cur_record.id.split('.') 762 try: 763 version = int(version) 764 except ValueError: 765 accession = cur_record.id 766 version = 0 767 else: 768 accession = cur_record.id 769 version = 0 770 gi = cur_record.annotations.get("gi", None) 771 sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \ 772 "= '%s' AND biodatabase_id = '%s') OR (accession = " + \ 773 "'%s' AND version = '%s' AND biodatabase_id = '%s')" 774 self.adaptor.execute( 775 sql % (gi, self.dbid, accession, version, self.dbid)) 776 if self.adaptor.cursor.fetchone(): 777 raise self.adaptor.conn.IntegrityError("Duplicate record " 778 "detected: record has not been inserted") 779 # End of hack 780 db_loader.load_seqrecord(cur_record) 781 return num_records
782