Package BioSQL :: Module BioSeqDatabase
[hide private]
[frames] | no frames]

Source Code for Module BioSQL.BioSeqDatabase

  1  # Copyright 2002 by Andrew Dalke.  All rights reserved. 
  2  # Revisions 2007-2014 copyright by Peter Cock.  All rights reserved. 
  3  # Revisions 2009 copyright by Cymon J. Cox.  All rights reserved. 
  4  # Revisions 2013-2014 copyright by Tiago Antao.  All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license.  Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8  # 
  9  # Note that BioSQL (including the database schema and scripts) is 
 10  # available and licensed separately.  Please consult www.biosql.org 
 11  """Connect with a BioSQL database and load Biopython like objects from it. 
 12   
 13  This provides interfaces for loading biological objects from a relational 
 14  database, and is compatible with the BioSQL standards. 
 15  """ 
 16  import os 
 17  import sys 
 18   
 19  from Bio._py3k import _universal_read_mode 
 20  from Bio import BiopythonDeprecationWarning 
 21   
 22  from . import BioSeq 
 23  from . import Loader 
 24  from . import DBUtils 
 25   
 26  _POSTGRES_RULES_PRESENT = False  # Hack for BioSQL Bug 2839 
 27   
 28   
29 -def open_database(driver="MySQLdb", **kwargs):
30 """Main interface for loading a existing BioSQL-style database. 31 32 This function is the easiest way to retrieve a connection to a 33 database, doing something like: 34 35 >>> from BioSeq import BioSeqDatabase 36 >>> server = BioSeqDatabase.open_database(user="root", db="minidb") 37 38 the various options are: 39 driver -> The name of the database driver to use for connecting. The 40 driver should implement the python DB API. By default, the MySQLdb 41 driver is used. 42 user -> the username to connect to the database with. 43 password, passwd -> the password to connect with 44 host -> the hostname of the database 45 database or db -> the name of the database 46 """ 47 if driver == "psycopg": 48 raise ValueError("Using BioSQL with psycopg (version one) is no " 49 "longer supported. Use psycopg2 instead.") 50 51 if os.name == "java": 52 from com.ziclix.python.sql import zxJDBC 53 module = zxJDBC 54 if driver in ["MySQLdb"]: 55 jdbc_driver = "com.mysql.jdbc.Driver" 56 url_pref = "jdbc:mysql://" + kwargs["host"] + "/" 57 elif driver in ["psycopg2"]: 58 jdbc_driver = "org.postgresql.Driver" 59 url_pref = "jdbc:postgresql://" + kwargs["host"] + "/" 60 61 else: 62 module = __import__(driver, fromlist=["connect"]) 63 connect = module.connect 64 65 # Different drivers use different keywords... 66 kw = kwargs.copy() 67 if driver in ["MySQLdb", "mysql.connector"] and os.name != "java": 68 if "database" in kw: 69 kw["db"] = kw["database"] 70 del kw["database"] 71 if "password" in kw: 72 kw["passwd"] = kw["password"] 73 del kw["password"] 74 #kw["charset"] = "utf8" 75 #kw["use_unicode"] = True 76 else: 77 # DB-API recommendations 78 if "db" in kw: 79 kw["database"] = kw["db"] 80 del kw["db"] 81 if "passwd" in kw: 82 kw["password"] = kw["passwd"] 83 del kw["passwd"] 84 if driver in ["psycopg2", "pgdb"] and not kw.get("database"): 85 kw["database"] = "template1" 86 # SQLite connect takes the database name as input 87 if os.name == "java": 88 if driver in ["MySQLdb"]: 89 conn = connect(url_pref + kw.get("database", "mysql"), 90 kw["user"], kw["password"], jdbc_driver) 91 elif driver in ["psycopg2"]: 92 conn = connect(url_pref + kw.get("database", "postgresql") + 93 "?stringtype=unspecified", 94 kw["user"], kw["password"], jdbc_driver) 95 elif driver in ["sqlite3"]: 96 conn = connect(kw["database"]) 97 else: 98 conn = connect(**kw) 99 100 if os.name == "java": 101 server = DBServer(conn, module, driver) 102 else: 103 server = DBServer(conn, module) 104 105 # TODO - Remove the following once BioSQL Bug 2839 is fixed. 106 # Test for RULES in PostgreSQL schema, see also Bug 2833. 107 if driver in ["psycopg2", "pgdb"]: 108 sql = "SELECT ev_class FROM pg_rewrite WHERE " + \ 109 "rulename='rule_bioentry_i1' OR " + \ 110 "rulename='rule_bioentry_i2';" 111 if server.adaptor.execute_and_fetchall(sql): 112 import warnings 113 from Bio import BiopythonWarning 114 warnings.warn("Your BioSQL PostgreSQL schema includes some " 115 "rules currently required for bioperl-db but " 116 "which may cause problems loading data using " 117 "Biopython (see BioSQL Bug 2839). If you do not " 118 "use BioPerl, please remove these rules. " 119 "Biopython should cope with the rules present, " 120 "but with a performance penalty when loading " 121 "new records.", BiopythonWarning) 122 global _POSTGRES_RULES_PRESENT 123 _POSTGRES_RULES_PRESENT = True 124 125 return server
126 127
128 -class DBServer:
129 """Represents a BioSQL database continaing namespaces (sub-databases). 130 131 This acts like a Python dictionary, giving access to each namespace 132 (defined by a row in the biodatabase table) as a BioSeqDatabase object. 133 """
134 - def __init__(self, conn, module, module_name=None):
135 self.module = module 136 if module_name is None: 137 module_name = module.__name__ 138 if module_name == "mysql.connector" and sys.version_info[0] == 3: 139 wrap_cursor = True 140 else: 141 wrap_cursor = False 142 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name), wrap_cursor=wrap_cursor) 143 self.module_name = module_name
144
145 - def __repr__(self):
146 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
147
148 - def __getitem__(self, name):
149 return BioSeqDatabase(self.adaptor, name)
150
151 - def __len__(self):
152 """Number of namespaces (sub-databases) in this database.""" 153 sql = "SELECT COUNT(name) FROM biodatabase;" 154 return int(self.adaptor.execute_and_fetch_col0(sql)[0])
155
156 - def __contains__(self, value):
157 """Check if a namespace (sub-database) in this database.""" 158 sql = "SELECT COUNT(name) FROM biodatabase WHERE name=%s;" 159 return bool(self.adaptor.execute_and_fetch_col0(sql, (value,))[0])
160
161 - def __iter__(self):
162 """Iterate over namespaces (sub-databases) in the database.""" 163 #TODO - Iterate over the cursor, much more efficient 164 return iter(self.adaptor.list_biodatabase_names())
165 166 if hasattr(dict, "iteritems"): 167 #Python 2, use iteritems etc
168 - def keys(self):
169 """List of namespaces (sub-databases) in the database.""" 170 return self.adaptor.list_biodatabase_names()
171
172 - def values(self):
173 """List of BioSeqDatabase objects in the database.""" 174 return [self[key] for key in self]
175
176 - def items(self):
177 """List of (namespace, BioSeqDatabase) for entries in the database.""" 178 return [(key, self[key]) for key in self]
179
180 - def iterkeys(self):
181 """Iterate over namespaces (sub-databases) in the database.""" 182 return iter(self)
183
184 - def itervalues(self):
185 """Iterate over BioSeqDatabase objects in the database.""" 186 for key in self: 187 yield self[key]
188
189 - def iteritems(self):
190 """Iterate over (namespace, BioSeqDatabase) in the database.""" 191 for key in self: 192 yield key, self[key]
193 else: 194 #Python 3, items etc are all iterators
195 - def keys(self):
196 """Iterate over namespaces (sub-databases) in the database.""" 197 return iter(self)
198
199 - def values(self):
200 """Iterate over BioSeqDatabase objects in the database.""" 201 for key in self: 202 yield self[key]
203
204 - def items(self):
205 """Iterate over (namespace, BioSeqDatabase) in the database.""" 206 for key in self: 207 yield key, self[key]
208
209 - def __delitem__(self, name):
210 """Remove a namespace and all its entries.""" 211 if name not in self: 212 raise KeyError(name) 213 db_id = self.adaptor.fetch_dbid_by_dbname(name) 214 remover = Loader.DatabaseRemover(self.adaptor, db_id) 215 remover.remove()
216
217 - def remove_database(self, db_name):
218 """Remove a namespace and all its entries (OBSOLETE). 219 220 Try to remove all references to items in a database. 221 222 server.remove_database(name) 223 224 In keeping with the dictionary interface, you can now do this: 225 226 del server[name] 227 """ 228 import warnings 229 warnings.warn("This method is deprecated. In keeping with the " 230 "dictionary interface, you can now use 'del " 231 "server[name]' instead", BiopythonDeprecationWarning) 232 self.__delitem__(db_name)
233
234 - def new_database(self, db_name, authority=None, description=None):
235 """Add a new database to the server and return it. 236 """ 237 # make the database 238 sql = r"INSERT INTO biodatabase (name, authority, description)" \ 239 r" VALUES (%s, %s, %s)" 240 self.adaptor.execute(sql, (db_name, authority, description)) 241 return BioSeqDatabase(self.adaptor, db_name)
242
243 - def load_database_sql(self, sql_file):
244 """Load a database schema into the given database. 245 246 This is used to create tables, etc when a database is first created. 247 sql_file should specify the complete path to a file containing 248 SQL entries for building the tables. 249 """ 250 # Not sophisticated enough for PG schema. Is it needed by MySQL? 251 # Looks like we need this more complicated way for both. Leaving it 252 # the default and removing the simple-minded approach. 253 254 # read the file with all comment lines removed 255 sql = "" 256 with open(sql_file, _universal_read_mode) as sql_handle: 257 for line in sql_handle: 258 if line.startswith("--"): # don't include comment lines 259 pass 260 elif line.startswith("#"): # ditto for MySQL comments 261 pass 262 elif line.strip(): # only include non-blank lines 263 sql += line.strip() + " " 264 265 # two ways to load the SQL 266 # 1. PostgreSQL can load it all at once and actually needs to 267 # due to FUNCTION defines at the end of the SQL which mess up 268 # the splitting by semicolons 269 if self.module_name in ["psycopg2", "pgdb"]: 270 self.adaptor.cursor.execute(sql) 271 # 2. MySQL needs the database loading split up into single lines of 272 # SQL executed one at a time 273 elif self.module_name in ["mysql.connector", "MySQLdb", "sqlite3"]: 274 sql_parts = sql.split(";") # one line per sql command 275 for sql_line in sql_parts[:-1]: # don't use the last item, it's blank 276 self.adaptor.cursor.execute(sql_line) 277 else: 278 raise ValueError("Module %s not supported by the loader." % 279 (self.module_name))
280
281 - def commit(self):
282 """Commits the current transaction to the database.""" 283 return self.adaptor.commit()
284
285 - def rollback(self):
286 """Rolls backs the current transaction.""" 287 return self.adaptor.rollback()
288
289 - def close(self):
290 """Close the connection. No further activity possible.""" 291 return self.adaptor.close()
292
293 -class _CursorWrapper:
294 """A wraper for mysql.connector resolving bytestring representations."""
295 - def __init__(self, real_cursor):
296 self.real_cursor = real_cursor
297
298 - def execute(self, operation, params=None, multi=False):
299 self.real_cursor.execute(operation, params, multi)
300
301 - def _convert_tuple(self, tuple_):
302 tuple_list = list(tuple_) 303 for i, elem in enumerate(tuple_list): 304 if type(elem) is bytes: 305 tuple_list[i] = elem.decode("utf-8") 306 return tuple(tuple_list)
307
308 - def _convert_list(self, lst):
309 ret_lst = [] 310 for tuple_ in lst: 311 new_tuple = self._convert_tuple(tuple_) 312 ret_lst.append(new_tuple) 313 return ret_lst
314
315 - def fetchall(self):
316 rv = self.real_cursor.fetchall() 317 return self._convert_list(rv)
318
319 - def fetchone(self):
320 tuple_ = self.real_cursor.fetchone() 321 return self._convert_tuple(tuple_)
322 323
324 -class Adaptor:
325 - def __init__(self, conn, dbutils, wrap_cursor=False):
326 self.conn = conn 327 if wrap_cursor: 328 self.cursor = _CursorWrapper(conn.cursor()) 329 else: 330 self.cursor = conn.cursor() 331 self.dbutils = dbutils
332
333 - def last_id(self, table):
334 return self.dbutils.last_id(self.cursor, table)
335
336 - def autocommit(self, y=True):
337 """Set the autocommit mode. True values enable; False value disable.""" 338 return self.dbutils.autocommit(self.conn, y)
339
340 - def commit(self):
341 """Commits the current transaction.""" 342 return self.conn.commit()
343
344 - def rollback(self):
345 """Rolls backs the current transaction.""" 346 return self.conn.rollback()
347
348 - def close(self):
349 """Close the connection. No further activity possible.""" 350 return self.conn.close()
351
352 - def fetch_dbid_by_dbname(self, dbname):
353 self.execute( 354 r"select biodatabase_id from biodatabase where name = %s", 355 (dbname,)) 356 rv = self.cursor.fetchall() 357 if not rv: 358 raise KeyError("Cannot find biodatabase with name %r" % dbname) 359 # Cannot happen (UK) 360 ## assert len(rv) == 1, "More than one biodatabase with name %r" % dbname 361 return rv[0][0]
362
363 - def fetch_seqid_by_display_id(self, dbid, name):
364 sql = r"select bioentry_id from bioentry where name = %s" 365 fields = [name] 366 if dbid: 367 sql += " and biodatabase_id = %s" 368 fields.append(dbid) 369 self.execute(sql, fields) 370 rv = self.cursor.fetchall() 371 if not rv: 372 raise IndexError("Cannot find display id %r" % name) 373 if len(rv) > 1: 374 raise IndexError("More than one entry with display id %r" % name) 375 return rv[0][0]
376
377 - def fetch_seqid_by_accession(self, dbid, name):
378 sql = r"select bioentry_id from bioentry where accession = %s" 379 fields = [name] 380 if dbid: 381 sql += " and biodatabase_id = %s" 382 fields.append(dbid) 383 self.execute(sql, fields) 384 rv = self.cursor.fetchall() 385 if not rv: 386 raise IndexError("Cannot find accession %r" % name) 387 if len(rv) > 1: 388 raise IndexError("More than one entry with accession %r" % name) 389 return rv[0][0]
390
391 - def fetch_seqids_by_accession(self, dbid, name):
392 sql = r"select bioentry_id from bioentry where accession = %s" 393 fields = [name] 394 if dbid: 395 sql += " and biodatabase_id = %s" 396 fields.append(dbid) 397 return self.execute_and_fetch_col0(sql, fields)
398
399 - def fetch_seqid_by_version(self, dbid, name):
400 acc_version = name.split(".") 401 if len(acc_version) > 2: 402 raise IndexError("Bad version %r" % name) 403 acc = acc_version[0] 404 if len(acc_version) == 2: 405 version = acc_version[1] 406 else: 407 version = "0" 408 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \ 409 r" AND version = %s" 410 fields = [acc, version] 411 if dbid: 412 sql += " and biodatabase_id = %s" 413 fields.append(dbid) 414 self.execute(sql, fields) 415 rv = self.cursor.fetchall() 416 if not rv: 417 raise IndexError("Cannot find version %r" % name) 418 if len(rv) > 1: 419 raise IndexError("More than one entry with version %r" % name) 420 return rv[0][0]
421
422 - def fetch_seqid_by_identifier(self, dbid, identifier):
423 # YB: was fetch_seqid_by_seqid 424 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s" 425 fields = [identifier] 426 if dbid: 427 sql += " and biodatabase_id = %s" 428 fields.append(dbid) 429 self.execute(sql, fields) 430 rv = self.cursor.fetchall() 431 if not rv: 432 raise IndexError("Cannot find display id %r" % identifier) 433 return rv[0][0]
434
435 - def list_biodatabase_names(self):
436 return self.execute_and_fetch_col0( 437 "SELECT name FROM biodatabase")
438
439 - def list_bioentry_ids(self, dbid):
440 return self.execute_and_fetch_col0( 441 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s", 442 (dbid,))
443
444 - def list_bioentry_display_ids(self, dbid):
445 return self.execute_and_fetch_col0( 446 "SELECT name FROM bioentry WHERE biodatabase_id = %s", 447 (dbid,))
448
449 - def list_any_ids(self, sql, args):
450 """Return ids given a SQL statement to select for them. 451 452 This assumes that the given SQL does a SELECT statement that 453 returns a list of items. This parses them out of the 2D list 454 they come as and just returns them in a list. 455 """ 456 return self.execute_and_fetch_col0(sql, args)
457
458 - def execute_one(self, sql, args=None):
459 self.execute(sql, args or ()) 460 rv = self.cursor.fetchall() 461 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv) 462 return rv[0]
463
464 - def execute(self, sql, args=None):
465 """Just execute an sql command. 466 """ 467 if os.name == "java": 468 sql = sql.replace("%s", "?") 469 self.dbutils.execute(self.cursor, sql, args)
470
471 - def get_subseq_as_string(self, seqid, start, end):
472 length = end - start 473 # XXX Check this on MySQL and PostgreSQL. substr should be general, 474 # does it need dbutils? 475 #return self.execute_one( 476 # """select SUBSTRING(seq FROM %s FOR %s) 477 # from biosequence where bioentry_id = %s""", 478 # (start+1, length, seqid))[0] 479 # 480 # Convert to a string on returning for databases that give back 481 # unicode. Shouldn't need unicode for sequences so this seems safe. 482 return str(self.execute_one( 483 """select SUBSTR(seq, %s, %s) 484 from biosequence where bioentry_id = %s""", 485 (start + 1, length, seqid))[0])
486
487 - def execute_and_fetch_col0(self, sql, args=None):
488 self.execute(sql, args or ()) 489 return [field[0] for field in self.cursor.fetchall()]
490
491 - def execute_and_fetchall(self, sql, args=None):
492 self.execute(sql, args or ()) 493 return self.cursor.fetchall()
494 495 _allowed_lookups = { 496 # Lookup name / function name to get id, function to list all ids 497 'primary_id': "fetch_seqid_by_identifier", 498 'gi': "fetch_seqid_by_identifier", 499 'display_id': "fetch_seqid_by_display_id", 500 'name': "fetch_seqid_by_display_id", 501 'accession': "fetch_seqid_by_accession", 502 'version': "fetch_seqid_by_version", 503 } 504 505
506 -class BioSeqDatabase:
507 """Represents a namespace (sub-database) within the BioSQL database. 508 509 i.e. One row in the biodatabase table, and all all rows in the bioentry 510 table associated with it. 511 """
512 - def __init__(self, adaptor, name):
513 self.adaptor = adaptor 514 self.name = name 515 self.dbid = self.adaptor.fetch_dbid_by_dbname(name)
516
517 - def __repr__(self):
518 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
519
520 - def get_Seq_by_id(self, name):
521 """Gets a DBSeqRecord object by its name 522 523 Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN') 524 525 The name of this method is misleading since it returns a DBSeqRecord 526 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 527 """ 528 seqid = self.adaptor.fetch_seqid_by_display_id(self.dbid, name) 529 return BioSeq.DBSeqRecord(self.adaptor, seqid)
530
531 - def get_Seq_by_acc(self, name):
532 """Gets a DBSeqRecord object by accession number 533 534 Example: seq_rec = db.get_Seq_by_acc('X77802') 535 536 The name of this method is misleading since it returns a DBSeqRecord 537 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 538 """ 539 seqid = self.adaptor.fetch_seqid_by_accession(self.dbid, name) 540 return BioSeq.DBSeqRecord(self.adaptor, seqid)
541
542 - def get_Seq_by_ver(self, name):
543 """Gets a DBSeqRecord object by version number 544 545 Example: seq_rec = db.get_Seq_by_ver('X77802.1') 546 547 The name of this method is misleading since it returns a DBSeqRecord 548 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 549 """ 550 seqid = self.adaptor.fetch_seqid_by_version(self.dbid, name) 551 return BioSeq.DBSeqRecord(self.adaptor, seqid)
552
553 - def get_Seqs_by_acc(self, name):
554 """Gets a list of DBSeqRecord objects by accession number 555 556 Example: seq_recs = db.get_Seq_by_acc('X77802') 557 558 The name of this method is misleading since it returns a list of 559 DBSeqRecord objects rather than a list of DBSeq ojbects, and presumably 560 was to mirror BioPerl. 561 """ 562 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name) 563 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
564
565 - def get_all_primary_ids(self):
566 """All the primary_ids of the sequences in the database (OBSOLETE). 567 568 These maybe ids (display style) or accession numbers or 569 something else completely different - they *are not* 570 meaningful outside of this database implementation. 571 572 Please use .keys() instead of .get_all_primary_ids() 573 """ 574 import warnings 575 warnings.warn("Use bio_seq_database.keys() instead of " 576 "bio_seq_database.get_all_primary_ids()", 577 BiopythonDeprecationWarning) 578 return list(self.keys())
579
580 - def __getitem__(self, key):
581 return BioSeq.DBSeqRecord(self.adaptor, key)
582
583 - def __delitem__(self, key):
584 """Remove an entry and all its annotation.""" 585 if key not in self: 586 raise KeyError(key) 587 #Assuming this will automatically cascade to the other tables... 588 sql = "DELETE FROM bioentry " + \ 589 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 590 self.adaptor.execute(sql, (self.dbid, key))
591
592 - def __len__(self):
593 """Number of records in this namespace (sub database).""" 594 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 595 "WHERE biodatabase_id=%s;" 596 return int(self.adaptor.execute_and_fetch_col0(sql, (self.dbid, ))[0])
597
598 - def __contains__(self, value):
599 """Check if a primary (internal) id is this namespace (sub database).""" 600 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 601 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 602 #The bioentry_id field is an integer in the schema. 603 #PostgreSQL will throw an error if we use a non integer in the query. 604 try: 605 bioentry_id = int(value) 606 except ValueError: 607 return False 608 return bool(self.adaptor.execute_and_fetch_col0(sql, 609 (self.dbid, bioentry_id))[0])
610
611 - def __iter__(self):
612 """Iterate over ids (which may not be meaningful outside this database).""" 613 #TODO - Iterate over the cursor, much more efficient 614 return iter(self.adaptor.list_bioentry_ids(self.dbid))
615 616 if hasattr(dict, "iteritems"): 617 #Python 2, use iteritems etc
618 - def keys(self):
619 """List of ids which may not be meaningful outside this database.""" 620 return self.adaptor.list_bioentry_ids(self.dbid)
621
622 - def values(self):
623 """List of DBSeqRecord objects in the namespace (sub database).""" 624 return [self[key] for key in self]
625
626 - def items(self):
627 """List of (id, DBSeqRecord) for the namespace (sub database).""" 628 return [(key, self[key]) for key in self]
629
630 - def iterkeys(self):
631 """Iterate over ids (which may not be meaningful outside this database).""" 632 return iter(self)
633
634 - def itervalues(self):
635 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 636 for key in self: 637 yield self[key]
638
639 - def iteritems(self):
640 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 641 for key in self: 642 yield key, self[key]
643 else: 644 #Python 3, items etc are all iterators
645 - def keys(self):
646 """Iterate over ids (which may not be meaningful outside this database).""" 647 return iter(self)
648
649 - def values(self):
650 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 651 for key in self: 652 yield self[key]
653
654 - def items(self):
655 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 656 for key in self: 657 yield key, self[key]
658
659 - def lookup(self, **kwargs):
660 if len(kwargs) != 1: 661 raise TypeError("single key/value parameter expected") 662 k, v = list(kwargs.items())[0] 663 if k not in _allowed_lookups: 664 raise TypeError("lookup() expects one of %r, not %r" % 665 (list(_allowed_lookups.keys()), k)) 666 lookup_name = _allowed_lookups[k] 667 lookup_func = getattr(self.adaptor, lookup_name) 668 seqid = lookup_func(self.dbid, v) 669 return BioSeq.DBSeqRecord(self.adaptor, seqid)
670
671 - def get_Seq_by_primary_id(self, seqid):
672 """Get a DBSeqRecord by the primary (internal) id (OBSOLETE). 673 674 Rather than db.get_Seq_by_primary_id(my_id) use db[my_id] 675 676 The name of this method is misleading since it returns a DBSeqRecord 677 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 678 """ 679 import warnings 680 warnings.warn("Use bio_seq_database[my_id] instead of " 681 "bio_seq_database.get_Seq_by_primary_id(my_id)", 682 BiopythonDeprecationWarning) 683 return self[seqid]
684
685 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
686 """Load a set of SeqRecords into the BioSQL database. 687 688 record_iterator is either a list of SeqRecord objects, or an 689 Iterator object that returns SeqRecord objects (such as the 690 output from the Bio.SeqIO.parse() function), which will be 691 used to populate the database. 692 693 fetch_NCBI_taxonomy is boolean flag allowing or preventing 694 connection to the taxonomic database on the NCBI server 695 (via Bio.Entrez) to fetch a detailed taxonomy for each 696 SeqRecord. 697 698 Example: 699 from Bio import SeqIO 700 count = db.load(SeqIO.parse(open(filename), format)) 701 702 Returns the number of records loaded. 703 """ 704 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, 705 fetch_NCBI_taxonomy) 706 num_records = 0 707 global _POSTGRES_RULES_PRESENT 708 for cur_record in record_iterator: 709 num_records += 1 710 #Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and 711 #the RULES are present check for a duplicate record before loading 712 if _POSTGRES_RULES_PRESENT: 713 #Recreate what the Loader's _load_bioentry_table will do: 714 if cur_record.id.count(".") == 1: 715 accession, version = cur_record.id.split('.') 716 try: 717 version = int(version) 718 except ValueError: 719 accession = cur_record.id 720 version = 0 721 else: 722 accession = cur_record.id 723 version = 0 724 gi = cur_record.annotations.get("gi", None) 725 sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \ 726 "= '%s' AND biodatabase_id = '%s') OR (accession = " + \ 727 "'%s' AND version = '%s' AND biodatabase_id = '%s')" 728 self.adaptor.execute(sql % (gi, self.dbid, accession, version, self.dbid)) 729 if self.adaptor.cursor.fetchone(): 730 raise self.adaptor.conn.IntegrityError("Duplicate record " 731 "detected: record has not been inserted") 732 #End of hack 733 db_loader.load_seqrecord(cur_record) 734 return num_records
735