Package Bio :: Package motifs :: Package jaspar :: Module db
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.jaspar.db

  1  # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Provides read access to a JASPAR5 formatted database. 
  6   
  7  This modules requires MySQLdb to be installed. 
  8   
  9  Example, substitute the your database credentials as 
 10  appropriate: 
 11   
 12      >>> from Bio.motifs.jaspar.db import JASPAR5 
 13      >>> 
 14      >>> JASPAR_DB_HOST = "hostname.example.org" 
 15      >>> JASPAR_DB_NAME = "JASPAR_2013" 
 16      >>> JASPAR_DB_USER = "guest" 
 17      >>> JASPAR_DB_PASS = "guest" 
 18      >>> 
 19      >>> DFLT_COLLECTION = 'CORE' 
 20      >>> jdb = JASPAR5( 
 21      ...     host=JASPAR_DB_HOST, 
 22      ...     name=JASPAR_DB_NAME, 
 23      ...     user=JASPAR_DB_USER, 
 24      ...     password=JASPAR_DB_PASS 
 25      ... ) 
 26      >>> 
 27      >>> 
 28      >>> ets1 = jdb.fetch_motif_by_id('MA0098') 
 29      >>> print(ets1) 
 30      TF name ETS1 
 31      Matrix ID   MA0098.1 
 32      Collection  CORE 
 33      TF class    Winged Helix-Turn-Helix 
 34      TF family   Ets 
 35      Species 9606 
 36      Taxonomic group vertebrates 
 37      Accession   ['CAG47050'] 
 38      Data type used  SELEX 
 39      Medline 1542566 
 40      PAZAR ID    TF0000070 
 41      Comments    - 
 42      Matrix: 
 43              0      1      2      3      4      5 
 44      A:   4.00  17.00   0.00   0.00   0.00   5.00 
 45      C:  16.00   0.00   1.00  39.00  39.00   3.00 
 46      G:   4.00   0.00   0.00   1.00   0.00  17.00 
 47      T:  16.00  23.00  39.00   0.00   1.00  15.00 
 48   
 49   
 50      >>> 
 51      >>> motifs = jdb.fetch_motifs( 
 52      ...     collection = 'CORE', 
 53      ...     tax_group = ['vertebrates', 'insects'], 
 54      ...     tf_class = 'Winged Helix-Turn-Helix', 
 55      ...     tf_family = ['Forkhead', 'Ets'], 
 56      ...     min_ic = 12 
 57      ... ) 
 58      >>> 
 59      >>> for motif in motifs: 
 60      ...     pass # do something with the motif 
 61   
 62  """ 
 63   
 64  from __future__ import print_function 
 65   
 66  import warnings 
 67  from Bio import BiopythonWarning 
 68  from Bio import MissingPythonDependencyError 
 69   
 70  try: 
 71      import MySQLdb as mdb 
 72  except: 
 73      raise MissingPythonDependencyError("Install MySQLdb if you want to use " 
 74                                         "Bio.motifs.jaspar.db") 
 75   
 76  from Bio.Alphabet.IUPAC import unambiguous_dna as dna 
 77  from Bio.motifs import jaspar, matrix 
 78   
 79   
 80  JASPAR_DFLT_COLLECTION = 'CORE' 
 81   
 82   
83 -class JASPAR5(object):
84 """ 85 Class representing a JASPAR5 DB. The methods within are loosely based 86 on the perl TFBS::DB::JASPAR5 module. 87 88 Note: We will only implement reading of JASPAR motifs from the DB. 89 Unlike the perl module, we will not attempt to implement any methods to 90 store JASPAR motifs or create a new DB at this time. 91 92 """ 93
94 - def __init__(self, host=None, name=None, user=None, password=None):
95 """ 96 Construct a JASPAR5 instance and connect to specified DB 97 98 Arguments: 99 host - host name of the the JASPAR DB server 100 name - name of the JASPAR database 101 user - user name to connect to the JASPAR DB 102 password - JASPAR DB password 103 104 """ 105 106 self.name = name 107 self.host = host 108 self.user = user 109 self.password = password 110 111 self.dbh = mdb.connect(host, user, password, name)
112
113 - def __str__(self):
114 """ 115 Return a string represention of the JASPAR5 DB connection. 116 117 """ 118 119 text = "%s\@%s:%s" % (self.user, self.host, self.name) 120 121 return text
122
123 - def fetch_motif_by_id(self, id):
124 """ 125 Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID 126 (e.g. 'MA0001.1'). 127 128 Arguments: 129 130 - id - JASPAR matrix ID. This may be a fully specified ID including 131 the version number (e.g. MA0049.2) or just the base ID (e.g. 132 MA0049). If only a base ID is provided, the latest version is 133 returned. 134 135 Returns: 136 137 - A Bio.motifs.jaspar.Motif object 138 139 **NOTE:** The perl TFBS module allows you to specify the type of matrix 140 to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as 141 PFMs so this does not really belong here. Once a PFM is fetched the 142 pwm() and pssm() methods can be called to return the normalized and 143 log-odds matrices. 144 145 """ 146 147 # separate stable ID and version number 148 (base_id, version) = jaspar.split_jaspar_id(id) 149 if not version: 150 # if ID contains no version portion, fetch the latest version 151 version = self._fetch_latest_version(base_id) 152 153 # fetch internal JASPAR matrix ID - also a check for validity 154 int_id = None 155 if version: 156 int_id = self._fetch_internal_id(base_id, version) 157 158 # fetch JASPAR motif using internal ID 159 motif = None 160 if int_id: 161 motif = self._fetch_motif_by_internal_id(int_id) 162 163 return motif
164
165 - def fetch_motifs_by_name(self, name):
166 """ 167 Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s). 168 169 Arguments: 170 name - a single name or list of names 171 Returns: 172 A list of Bio.motifs.Motif.japar objects 173 174 Notes: 175 Names are not guaranteed to be unique. There may be more than one 176 motif with the same name. Therefore even if name specifies a single 177 name, a list of motifs is returned. This just calls 178 self.fetch_motifs(collection = None, tf_name = name). 179 180 This behaviour is different from the TFBS perl module's 181 get_Matrix_by_name() method which always returns a single matrix, 182 issuing a warning message and returning the first matrix retrieved 183 in the case where multiple matrices have the same name. 184 185 """ 186 187 return self.fetch_motifs(collection=None, tf_name=name)
188
189 - def fetch_motifs( 190 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 191 tf_family=None, matrix_id=None, tax_group=None, species=None, 192 pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0, 193 min_sites=0, all=False, all_versions=False 194 ):
195 """ 196 Fetch a jaspar.Record (list) of motifs based on the provided selection 197 criteria. 198 199 Arguments:: 200 201 Except where obvious, all selection criteria arguments may be 202 specified as a single value or a list of values. Motifs must 203 meet ALL the specified selection criteria to be returned with 204 the precedent exceptions noted below. 205 206 all - Takes precedent of all other selection criteria. 207 Every motif is returned. If 'all_versions' is also 208 specified, all versions of every motif are returned, 209 otherwise just the latest version of every motif is 210 returned. 211 matrix_id - Takes precedence over all other selection criteria 212 except 'all'. Only motifs with the given JASPAR 213 matrix ID(s) are returned. A matrix ID may be 214 specified as just a base ID or full JASPAR IDs 215 including version number. If only a base ID is 216 provided for specific motif(s), then just the latest 217 version of those motif(s) are returned unless 218 'all_versions' is also specified. 219 collection - Only motifs from the specified JASPAR collection(s) 220 are returned. NOTE - if not specified, the collection 221 defaults to CORE for all other selection criteria 222 except 'all' and 'matrix_id'. To apply the other 223 selection criteria across all JASPAR collections, 224 explicitly set collection=None. 225 tf_name - Only motifs with the given name(s) are returned. 226 tf_class - Only motifs of the given TF class(es) are returned. 227 tf_family - Only motifs from the given TF families are returned. 228 tax_group - Only motifs belonging to the given taxonomic 229 supergroups are returned (e.g. 'vertebrates', 230 'insects', 'nematodes' etc.) 231 species - Only motifs derived from the given species are 232 returned. Species are specified as taxonomy IDs. 233 data_type - Only motifs generated with the given data type (e.g. 234 ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned. 235 NOTE - must match exactly as stored in the database. 236 pazar_id - Only motifs with the given PAZAR TF ID are returned. 237 medline - Only motifs with the given medline (PubmMed IDs) are 238 returned. 239 min_ic - Only motifs whose profile matrices have at least this 240 information content (specificty) are returned. 241 min_length - Only motifs whose profiles are of at least this 242 length are returned. 243 min_sites - Only motifs compiled from at least these many binding 244 sites are returned. 245 all_versions- Unless specified, just the latest version of motifs 246 determined by the other selection criteria are 247 returned. Otherwise all versions of the selected 248 motifs are returned. 249 250 Returns: 251 252 - A Bio.motifs.jaspar.Record (list) of motifs. 253 254 """ 255 256 # Fetch the internal IDs of the motifs using the criteria provided 257 int_ids = self._fetch_internal_id_list( 258 collection=collection, 259 tf_name=tf_name, 260 tf_class=tf_class, 261 tf_family=tf_family, 262 matrix_id=matrix_id, 263 tax_group=tax_group, 264 species=species, 265 pazar_id=pazar_id, 266 data_type=data_type, 267 medline=medline, 268 all=all, 269 all_versions=all_versions 270 ) 271 272 record = jaspar.Record() 273 274 """ 275 Now further filter motifs returned above based on any specified 276 matrix specific criteria. 277 """ 278 for int_id in int_ids: 279 motif = self._fetch_motif_by_internal_id(int_id) 280 281 # Filter motifs to those with matrix IC greater than min_ic 282 if min_ic: 283 if motif.pssm.mean() < min_ic: 284 continue 285 286 # Filter motifs to those with minimum length of min_length 287 if min_length: 288 if motif.length < min_length: 289 continue 290 291 # XXX We could also supply a max_length filter. 292 293 """ 294 Filter motifs to those composed of at least this many sites. 295 The perl TFBS module assumes column sums may be different but 296 this should be strictly enforced here we will ignore this and 297 just use the first column sum. 298 """ 299 if min_sites: 300 num_sites = sum( 301 [motif.counts[nt][0] for nt in motif.alphabet.letters] 302 ) 303 if num_sites < min_sites: 304 continue 305 306 record.append(motif) 307 308 return record
309
310 - def _fetch_latest_version(self, base_id):
311 """ 312 Get the latest version number for the given base_id, 313 314 """ 315 316 cur = self.dbh.cursor() 317 cur.execute("""select VERSION from MATRIX where BASE_id = %s 318 order by VERSION desc limit 1""", (base_id,)) 319 320 row = cur.fetchone() 321 322 latest = None 323 if row: 324 latest = row[0] 325 else: 326 warnings.warn("Failed to fetch latest version number for JASPAR motif with base ID '{0}'. No JASPAR motif with this base ID appears to exist in the database.".format(base_id), BiopythonWarning) 327 328 return latest
329
330 - def _fetch_internal_id(self, base_id, version):
331 """ 332 Fetch the internal id for a base id + version. Also checks if this 333 combo exists or not 334 335 """ 336 337 cur = self.dbh.cursor() 338 cur.execute("""select id from MATRIX where BASE_id = %s 339 and VERSION = %s""", (base_id, version)) 340 341 row = cur.fetchone() 342 343 int_id = None 344 if row: 345 int_id = row[0] 346 else: 347 warnings.warn("Failed to fetch internal database ID for JASPAR motif with matrix ID '{0}.{1}'. No JASPAR motif with this matrix ID appears to exist.".format(base_id, version), BiopythonWarning) 348 349 return int_id
350
351 - def _fetch_motif_by_internal_id(self, int_id):
352 # fetch basic motif information 353 cur = self.dbh.cursor() 354 cur.execute("""select BASE_ID, VERSION, COLLECTION, NAME from MATRIX 355 where id = %s""", (int_id,)) 356 357 row = cur.fetchone() 358 359 # This should never happen as it is an internal method. If it does 360 # we should probably raise an exception 361 if not row: 362 warnings.warn("Could not fetch JASPAR motif with internal ID = {0}".format(int_id), BiopythonWarning) 363 return None 364 365 base_id = row[0] 366 version = row[1] 367 collection = row[2] 368 name = row[3] 369 370 matrix_id = "".join([base_id, '.', str(version)]) 371 372 # fetch the counts matrix 373 counts = self._fetch_counts_matrix(int_id) 374 375 # Create new JASPAR motif 376 motif = jaspar.Motif( 377 matrix_id, name, collection=collection, counts=counts 378 ) 379 380 # fetch species 381 cur.execute("""select TAX_ID from MATRIX_SPECIES 382 where id = %s""", (int_id,)) 383 tax_ids = [] 384 rows = cur.fetchall() 385 for row in rows: 386 tax_ids.append(row[0]) 387 388 # Many JASPAR motifs (especially those not in the CORE collection) 389 # do not have taxonomy IDs. So this warning would get annoying. 390 # if not tax_ids: 391 # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif {0}".format(motif.matrix_id), BiopythonWarning) 392 393 motif.species = tax_ids 394 395 # fetch protein accession numbers 396 cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,)) 397 accs = [] 398 rows = cur.fetchall() 399 for row in rows: 400 accs.append(row[0]) 401 402 # Similarly as for taxonomy IDs, it would get annoying to print 403 # warnings for JASPAR motifs which do not have accession numbers. 404 405 motif.acc = accs 406 407 # fetch remaining annotation as tags from the ANNOTATION table 408 cur.execute("""select TAG, VAL from MATRIX_ANNOTATION 409 where id = %s""", (int_id,)) 410 rows = cur.fetchall() 411 for row in rows: 412 attr = row[0] 413 val = row[1] 414 if attr == 'class': 415 motif.tf_class = val 416 elif attr == 'family': 417 motif.tf_family = val 418 elif attr == 'tax_group': 419 motif.tax_group = val 420 elif attr == 'type': 421 motif.data_type = val 422 elif attr == 'pazar_tf_id': 423 motif.pazar_id = val 424 elif attr == 'medline': 425 motif.medline = val 426 elif attr == 'comment': 427 motif.comment = val 428 else: 429 """ 430 TODO If we were to implement additional abitrary tags 431 motif.tag(attr, val) 432 """ 433 pass 434 435 return motif
436
437 - def _fetch_counts_matrix(self, int_id):
438 """ 439 Fetch the counts matrix from the JASPAR DB by the internal ID 440 441 Returns a Bio.motifs.matrix.GenericPositionMatrix 442 443 """ 444 counts = {} 445 cur = self.dbh.cursor() 446 447 for base in dna.letters: 448 base_counts = [] 449 450 cur.execute("""select val from MATRIX_DATA where ID = %s 451 and row = %s order by col""", (int_id, base)) 452 453 rows = cur.fetchall() 454 for row in rows: 455 base_counts.append(row[0]) 456 457 counts[base] = [float(x) for x in base_counts] 458 459 return matrix.GenericPositionMatrix(dna, counts)
460
461 - def _fetch_internal_id_list( 462 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 463 tf_family=None, matrix_id=None, tax_group=None, species=None, 464 pazar_id=None, data_type=None, medline=None, all=False, 465 all_versions=False 466 ):
467 """ 468 Fetch a list of internal JASPAR motif IDs based on various passed 469 parameters which may then be used to fetch the rest of the motif data. 470 471 Caller: 472 fetch_motifs() 473 474 Arguments: 475 See arguments sections of fetch_motifs() 476 477 Returns: 478 A list of internal JASPAR motif IDs which match the given 479 selection criteria arguments. 480 481 482 Build an SQL query based on the selection arguments provided. 483 484 1: First add table joins and sub-clauses for criteria corresponding to 485 named fields from the MATRIX and MATRIX_SPECIES tables such as 486 collection, matrix ID, name, species etc. 487 488 2: Then add joins/sub-clauses for tag/value parameters from the 489 MATRIX_ANNOTATION table. 490 491 For the surviving matrices, the responsibility to do matrix-based 492 feature filtering such as ic, number of sites etc, fall on the 493 calling fetch_motifs() method. 494 495 """ 496 497 int_ids = [] 498 499 cur = self.dbh.cursor() 500 501 """ 502 Special case 1: fetch ALL motifs. Highest priority. 503 Ignore all other selection arguments. 504 """ 505 if all: 506 cur.execute("select ID from MATRIX") 507 rows = cur.fetchall() 508 509 for row in rows: 510 int_ids.append(row[0]) 511 512 return int_ids 513 514 """ 515 Special case 2: fetch specific motifs by their JASPAR IDs. This 516 has higher priority than any other except the above 'all' case. 517 Ignore all other selection arguments. 518 """ 519 if matrix_id: 520 """ 521 These might be either stable IDs or stable_ID.version. 522 If just stable ID and if all_versions == 1, return all versions, 523 otherwise just the latest 524 """ 525 if all_versions: 526 for id in matrix_id: 527 # ignore vesion here, this is a stupidity filter 528 (base_id, version) = jaspar.split_jaspar_id(id) 529 cur.execute( 530 "select ID from MATRIX where BASE_ID = %s", (base_id,) 531 ) 532 533 rows = cur.fetchall() 534 for row in rows: 535 int_ids.append(row[0]) 536 else: 537 # only the lastest version, or the requested version 538 for id in matrix_id: 539 (base_id, version) = jaspar.split_jaspar_id(id) 540 541 if not version: 542 version = self._fetch_latest_version(base_id) 543 544 int_id = None 545 if version: 546 int_id = self._fetch_internal_id(base_id, version) 547 548 if int_id: 549 int_ids.append(int_id) 550 551 return int_ids 552 553 tables = ["MATRIX m"] 554 where_clauses = [] 555 556 # Select by MATRIX.COLLECTION 557 if collection: 558 if isinstance(collection, list): 559 # Multiple collections passed in as a list 560 clause = "m.COLLECTION in ('" 561 clause = "".join([clause, "','".join(collection)]) 562 clause = "".join([clause, "')"]) 563 else: 564 # A single collection - typical usage 565 clause = "m.COLLECTION = '%s'" % collection 566 567 where_clauses.append(clause) 568 569 # Select by MATRIX.NAME 570 if tf_name: 571 if isinstance(tf_name, list): 572 # Multiple names passed in as a list 573 clause = "m.NAME in ('" 574 clause = "".join([clause, "','".join(tf_name)]) 575 clause = "".join([clause, "')"]) 576 else: 577 # A single name 578 clause = "m.NAME = '%s'" % tf_name 579 580 where_clauses.append(clause) 581 582 # Select by MATRIX_SPECIES.TAX_ID 583 if species: 584 tables.append("MATRIX_SPECIES ms") 585 where_clauses.append("m.ID = ms.ID") 586 587 """ 588 NOTE: species are numeric taxonomy IDs but stored as varchars 589 in the DB. 590 """ 591 if isinstance(species, list): 592 # Multiple tax IDs passed in as a list 593 clause = "ms.TAX_ID in ('" 594 clause = "".join([clause, "','".join(str(s) for s in species)]) 595 clause = "".join([clause, "')"]) 596 else: 597 # A single tax ID 598 clause = "ms.TAX_ID = '%s'" % str(species) 599 600 where_clauses.append(clause) 601 602 """ 603 Tag based selection from MATRIX_ANNOTATION 604 Differs from perl TFBS module in that the matrix class explicitly 605 has a tag attribute corresponding to the tags in the database. This 606 provides tremendous flexibility in adding new tags to the DB and 607 being able to select based on those tags with out adding new code. 608 In the JASPAR Motif class we have elected to use specific attributes 609 for the most commonly used tags and here correspondingly only allow 610 selection on these attributes. 611 612 The attributes corresponding to the tags for which selection is 613 provided are: 614 615 Attribute Tag 616 tf_class class 617 tf_family family 618 pazar_id pazar_tf_id 619 medline medline 620 data_type type 621 tax_group tax_group 622 """ 623 624 # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class") 625 if tf_class: 626 tables.append("MATRIX_ANNOTATION ma1") 627 where_clauses.append("m.ID = ma1.ID") 628 629 clause = "ma1.TAG = 'class'" 630 if isinstance(tf_class, list): 631 # A list of TF classes 632 clause = "".join([clause, " and ma1.VAL in ('"]) 633 clause = "".join([clause, "','".join(tf_class)]) 634 clause = "".join([clause, "')"]) 635 else: 636 # A single TF class 637 clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class]) 638 639 where_clauses.append(clause) 640 641 # Select by TF families (MATRIX_ANNOTATION.TAG="family") 642 if tf_family: 643 tables.append("MATRIX_ANNOTATION ma2") 644 where_clauses.append("m.ID = ma2.ID") 645 646 clause = "ma2.TAG = 'family'" 647 if isinstance(tf_family, list): 648 # A list of TF families 649 clause = "".join([clause, " and ma2.VAL in ('"]) 650 clause = "".join([clause, "','".join(tf_family)]) 651 clause = "".join([clause, "')"]) 652 else: 653 # A single TF family 654 clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family]) 655 656 where_clauses.append(clause) 657 658 # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id") 659 if pazar_id: 660 tables.append("MATRIX_ANNOTATION ma3") 661 where_clauses.append("m.ID = ma3.ID") 662 663 clause = "ma3.TAG = 'pazar_tf_id'" 664 if isinstance(pazar_id, list): 665 # A list of PAZAR IDs 666 clause = "".join([clause, " and ma3.VAL in ('"]) 667 clause = "".join([clause, "','".join(pazar_id)]) 668 clause = "".join([clause, "')"]) 669 else: 670 # A single PAZAR ID 671 clause = "".join([" and ma3.VAL = '%s' " % pazar_id]) 672 673 where_clauses.append(clause) 674 675 # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline") 676 if medline: 677 tables.append("MATRIX_ANNOTATION ma4") 678 where_clauses.append("m.ID = ma4.ID") 679 680 clause = "ma4.TAG = 'medline'" 681 if isinstance(medline, list): 682 # A list of PubMed IDs 683 clause = "".join([clause, " and ma4.VAL in ('"]) 684 clause = "".join([clause, "','".join(medline)]) 685 clause = "".join([clause, "')"]) 686 else: 687 # A single PubMed ID 688 clause = "".join([" and ma4.VAL = '%s' " % medline]) 689 690 where_clauses.append(clause) 691 692 # Select by data type(s) used to compile the matrix 693 # (MATRIX_ANNOTATION.TAG="type") 694 if data_type: 695 tables.append("MATRIX_ANNOTATION ma5") 696 where_clauses.append("m.ID = ma5.ID") 697 698 clause = "ma5.TAG = 'type'" 699 if isinstance(data_type, list): 700 # A list of data types 701 clause = "".join([clause, " and ma5.VAL in ('"]) 702 clause = "".join([clause, "','".join(data_type)]) 703 clause = "".join([clause, "')"]) 704 else: 705 # A single data type 706 clause = "".join([" and ma5.VAL = '%s' " % data_type]) 707 708 where_clauses.append(clause) 709 710 # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group") 711 if tax_group: 712 tables.append("MATRIX_ANNOTATION ma6") 713 where_clauses.append("m.ID = ma6.ID") 714 715 clause = "ma6.TAG = 'tax_group'" 716 if isinstance(tax_group, list): 717 # A list of tax IDs 718 clause = "".join([clause, " and ma6.VAL in ('"]) 719 clause = "".join([clause, "','".join(tax_group)]) 720 clause = "".join([clause, "')"]) 721 else: 722 # A single tax ID 723 clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group]) 724 725 where_clauses.append(clause) 726 727 sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)]) 728 729 if where_clauses: 730 sql = "".join([sql, " where ", " and ".join(where_clauses)]) 731 732 # print "sql = %s" % sql 733 734 cur.execute(sql) 735 rows = cur.fetchall() 736 737 for row in rows: 738 id = row[0] 739 if all_versions: 740 int_ids.append(id) 741 else: 742 # is the latest version? 743 if self._is_latest_version(id): 744 int_ids.append(id) 745 746 if len(int_ids) < 1: 747 warnings.warn("Zero motifs returned with current select critera", BiopythonWarning) 748 749 return int_ids
750
751 - def _is_latest_version(self, int_id):
752 """ 753 Does this internal ID represent the latest version of the JASPAR 754 matrix (collapse on base ids) 755 756 """ 757 cur = self.dbh.cursor() 758 759 cur.execute( 760 """select count(*) from MATRIX 761 where BASE_ID = (select BASE_ID from MATRIX where ID = %s) 762 and VERSION > (select VERSION from MATRIX where ID = %s)""", 763 (int_id, int_id) 764 ) 765 766 row = cur.fetchone() 767 768 count = row[0] 769 770 if count == 0: 771 # no matrices with higher version ID and same base id 772 return True 773 774 return False
775