Package Bio :: Package motifs :: Package jaspar :: Module db
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.jaspar.db

  1  # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Provides read access to a JASPAR5 formatted database. 
  6   
  7  This modules requires MySQLdb to be installed. 
  8   
  9  Example, substitute the your database credentials as 
 10  appropriate: 
 11   
 12      >>> from Bio.motifs.jaspar.db import JASPAR5 
 13      >>> 
 14      >>> JASPAR_DB_HOST = "hostname.example.org" 
 15      >>> JASPAR_DB_NAME = "JASPAR_2013" 
 16      >>> JASPAR_DB_USER = "guest" 
 17      >>> JASPAR_DB_PASS = "guest" 
 18      >>> 
 19      >>> DFLT_COLLECTION = 'CORE' 
 20      >>> jdb = JASPAR5( 
 21      ...     host=JASPAR_DB_HOST, 
 22      ...     name=JASPAR_DB_NAME, 
 23      ...     user=JASPAR_DB_USER, 
 24      ...     password=JASPAR_DB_PASS 
 25      ... ) 
 26      >>> 
 27      >>> 
 28      >>> ets1 = jdb.fetch_motif_by_id('MA0098') 
 29      >>> print(ets1) 
 30      TF name ETS1 
 31      Matrix ID   MA0098.1 
 32      Collection  CORE 
 33      TF class    Winged Helix-Turn-Helix 
 34      TF family   Ets 
 35      Species 9606 
 36      Taxonomic group vertebrates 
 37      Accession   ['CAG47050'] 
 38      Data type used  SELEX 
 39      Medline 1542566 
 40      PAZAR ID    TF0000070 
 41      Comments    - 
 42      Matrix: 
 43              0      1      2      3      4      5 
 44      A:   4.00  17.00   0.00   0.00   0.00   5.00 
 45      C:  16.00   0.00   1.00  39.00  39.00   3.00 
 46      G:   4.00   0.00   0.00   1.00   0.00  17.00 
 47      T:  16.00  23.00  39.00   0.00   1.00  15.00 
 48   
 49   
 50      >>> 
 51      >>> motifs = jdb.fetch_motifs( 
 52      ...     collection = 'CORE', 
 53      ...     tax_group = ['vertebrates', 'insects'], 
 54      ...     tf_class = 'Winged Helix-Turn-Helix', 
 55      ...     tf_family = ['Forkhead', 'Ets'], 
 56      ...     min_ic = 12 
 57      ... ) 
 58      >>> 
 59      >>> for motif in motifs: 
 60      ...     pass # do something with the motif 
 61   
 62  """ 
 63   
 64  from __future__ import print_function 
 65   
 66  import warnings 
 67  from Bio import BiopythonWarning 
 68  from Bio import MissingPythonDependencyError 
 69   
 70  try: 
 71      import MySQLdb as mdb 
 72  except ImportError: 
 73      raise MissingPythonDependencyError("Install MySQLdb if you want to use " 
 74                                         "Bio.motifs.jaspar.db") 
 75   
 76  from Bio.Alphabet.IUPAC import unambiguous_dna as dna 
 77  from Bio.motifs import jaspar, matrix 
 78   
 79   
 80  JASPAR_DFLT_COLLECTION = 'CORE' 
 81   
 82   
83 -class JASPAR5(object):
84 """Class representing a JASPAR5 database. 85 86 Class representing a JASPAR5 DB. The methods within are loosely based 87 on the perl TFBS::DB::JASPAR5 module. 88 89 Note: We will only implement reading of JASPAR motifs from the DB. 90 Unlike the perl module, we will not attempt to implement any methods to 91 store JASPAR motifs or create a new DB at this time. 92 """ 93
94 - def __init__(self, host=None, name=None, user=None, password=None):
95 """Construct a JASPAR5 instance and connect to specified DB. 96 97 Arguments: 98 99 - host - host name of the the JASPAR DB server 100 - name - name of the JASPAR database 101 - user - user name to connect to the JASPAR DB 102 - password - JASPAR DB password 103 104 """ 105 self.name = name 106 self.host = host 107 self.user = user 108 self.password = password 109 110 self.dbh = mdb.connect(host, user, password, name)
111
112 - def __str__(self):
113 """Return a string represention of the JASPAR5 DB connection.""" 114 return "%s\@%s:%s" % (self.user, self.host, self.name)
115
116 - def fetch_motif_by_id(self, id):
117 """Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID 118 119 Example id 'MA0001.1'. 120 121 Arguments: 122 123 - id - JASPAR matrix ID. This may be a fully specified ID including 124 the version number (e.g. MA0049.2) or just the base ID (e.g. 125 MA0049). If only a base ID is provided, the latest version is 126 returned. 127 128 Returns: 129 - A Bio.motifs.jaspar.Motif object 130 131 **NOTE:** The perl TFBS module allows you to specify the type of matrix 132 to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as 133 PFMs so this does not really belong here. Once a PFM is fetched the 134 pwm() and pssm() methods can be called to return the normalized and 135 log-odds matrices. 136 137 """ 138 # separate stable ID and version number 139 (base_id, version) = jaspar.split_jaspar_id(id) 140 if not version: 141 # if ID contains no version portion, fetch the latest version 142 version = self._fetch_latest_version(base_id) 143 144 # fetch internal JASPAR matrix ID - also a check for validity 145 int_id = None 146 if version: 147 int_id = self._fetch_internal_id(base_id, version) 148 149 # fetch JASPAR motif using internal ID 150 motif = None 151 if int_id: 152 motif = self._fetch_motif_by_internal_id(int_id) 153 154 return motif
155
156 - def fetch_motifs_by_name(self, name):
157 """Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s). 158 159 Arguments: 160 name - a single name or list of names 161 Returns: 162 A list of Bio.motifs.Motif.japar objects 163 164 Notes: 165 Names are not guaranteed to be unique. There may be more than one 166 motif with the same name. Therefore even if name specifies a single 167 name, a list of motifs is returned. This just calls 168 self.fetch_motifs(collection = None, tf_name = name). 169 170 This behaviour is different from the TFBS perl module's 171 get_Matrix_by_name() method which always returns a single matrix, 172 issuing a warning message and returning the first matrix retrieved 173 in the case where multiple matrices have the same name. 174 175 """ 176 return self.fetch_motifs(collection=None, tf_name=name)
177
178 - def fetch_motifs( 179 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 180 tf_family=None, matrix_id=None, tax_group=None, species=None, 181 pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0, 182 min_sites=0, all=False, all_versions=False 183 ):
184 """Fetch jaspar.Record (list) of motifs using selection criteria. 185 186 Arguments:: 187 188 Except where obvious, all selection criteria arguments may be 189 specified as a single value or a list of values. Motifs must 190 meet ALL the specified selection criteria to be returned with 191 the precedent exceptions noted below. 192 193 all - Takes precedent of all other selection criteria. 194 Every motif is returned. If 'all_versions' is also 195 specified, all versions of every motif are returned, 196 otherwise just the latest version of every motif is 197 returned. 198 matrix_id - Takes precedence over all other selection criteria 199 except 'all'. Only motifs with the given JASPAR 200 matrix ID(s) are returned. A matrix ID may be 201 specified as just a base ID or full JASPAR IDs 202 including version number. If only a base ID is 203 provided for specific motif(s), then just the latest 204 version of those motif(s) are returned unless 205 'all_versions' is also specified. 206 collection - Only motifs from the specified JASPAR collection(s) 207 are returned. NOTE - if not specified, the collection 208 defaults to CORE for all other selection criteria 209 except 'all' and 'matrix_id'. To apply the other 210 selection criteria across all JASPAR collections, 211 explicitly set collection=None. 212 tf_name - Only motifs with the given name(s) are returned. 213 tf_class - Only motifs of the given TF class(es) are returned. 214 tf_family - Only motifs from the given TF families are returned. 215 tax_group - Only motifs belonging to the given taxonomic 216 supergroups are returned (e.g. 'vertebrates', 217 'insects', 'nematodes' etc.) 218 species - Only motifs derived from the given species are 219 returned. Species are specified as taxonomy IDs. 220 data_type - Only motifs generated with the given data type (e.g. 221 ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned. 222 NOTE - must match exactly as stored in the database. 223 pazar_id - Only motifs with the given PAZAR TF ID are returned. 224 medline - Only motifs with the given medline (PubmMed IDs) are 225 returned. 226 min_ic - Only motifs whose profile matrices have at least this 227 information content (specificty) are returned. 228 min_length - Only motifs whose profiles are of at least this 229 length are returned. 230 min_sites - Only motifs compiled from at least these many binding 231 sites are returned. 232 all_versions- Unless specified, just the latest version of motifs 233 determined by the other selection criteria are 234 returned. Otherwise all versions of the selected 235 motifs are returned. 236 237 Returns: 238 - A Bio.motifs.jaspar.Record (list) of motifs. 239 240 """ 241 # Fetch the internal IDs of the motifs using the criteria provided 242 int_ids = self._fetch_internal_id_list( 243 collection=collection, 244 tf_name=tf_name, 245 tf_class=tf_class, 246 tf_family=tf_family, 247 matrix_id=matrix_id, 248 tax_group=tax_group, 249 species=species, 250 pazar_id=pazar_id, 251 data_type=data_type, 252 medline=medline, 253 all=all, 254 all_versions=all_versions 255 ) 256 257 record = jaspar.Record() 258 259 """ 260 Now further filter motifs returned above based on any specified 261 matrix specific criteria. 262 """ 263 for int_id in int_ids: 264 motif = self._fetch_motif_by_internal_id(int_id) 265 266 # Filter motifs to those with matrix IC greater than min_ic 267 if min_ic: 268 if motif.pssm.mean() < min_ic: 269 continue 270 271 # Filter motifs to those with minimum length of min_length 272 if min_length: 273 if motif.length < min_length: 274 continue 275 276 # XXX We could also supply a max_length filter. 277 278 """ 279 Filter motifs to those composed of at least this many sites. 280 The perl TFBS module assumes column sums may be different but 281 this should be strictly enforced here we will ignore this and 282 just use the first column sum. 283 """ 284 if min_sites: 285 num_sites = sum( 286 [motif.counts[nt][0] for nt in motif.alphabet.letters] 287 ) 288 if num_sites < min_sites: 289 continue 290 291 record.append(motif) 292 293 return record
294
295 - def _fetch_latest_version(self, base_id):
296 """Get the latest version number for the given base_id.""" 297 cur = self.dbh.cursor() 298 cur.execute("""select VERSION from MATRIX where BASE_id = %s 299 order by VERSION desc limit 1""", (base_id,)) 300 301 row = cur.fetchone() 302 303 latest = None 304 if row: 305 latest = row[0] 306 else: 307 warnings.warn("Failed to fetch latest version number for JASPAR " 308 "motif with base ID '{0}'. " 309 "No JASPAR motif with this base ID appears to exist " 310 "in the database.".format(base_id), BiopythonWarning) 311 312 return latest
313
314 - def _fetch_internal_id(self, base_id, version):
315 """Fetch the internal id for a base id + version. 316 317 Also checks if this combo exists or not. 318 """ 319 cur = self.dbh.cursor() 320 cur.execute("""select id from MATRIX where BASE_id = %s 321 and VERSION = %s""", (base_id, version)) 322 323 row = cur.fetchone() 324 325 int_id = None 326 if row: 327 int_id = row[0] 328 else: 329 warnings.warn("Failed to fetch internal database ID for JASPAR " 330 "motif with matrix ID '{0}.{1}'. " 331 "No JASPAR motif with this matrix ID appears to " 332 "exist.".format(base_id, version), BiopythonWarning) 333 334 return int_id
335
336 - def _fetch_motif_by_internal_id(self, int_id):
337 # fetch basic motif information 338 cur = self.dbh.cursor() 339 cur.execute("""select BASE_ID, VERSION, COLLECTION, NAME from MATRIX 340 where id = %s""", (int_id,)) 341 342 row = cur.fetchone() 343 344 # This should never happen as it is an internal method. If it does 345 # we should probably raise an exception 346 if not row: 347 warnings.warn("Could not fetch JASPAR motif with internal " 348 "ID = {0}".format(int_id), BiopythonWarning) 349 return None 350 351 base_id = row[0] 352 version = row[1] 353 collection = row[2] 354 name = row[3] 355 356 matrix_id = "".join([base_id, '.', str(version)]) 357 358 # fetch the counts matrix 359 counts = self._fetch_counts_matrix(int_id) 360 361 # Create new JASPAR motif 362 motif = jaspar.Motif( 363 matrix_id, name, collection=collection, counts=counts 364 ) 365 366 # fetch species 367 cur.execute("""select TAX_ID from MATRIX_SPECIES 368 where id = %s""", (int_id,)) 369 tax_ids = [] 370 rows = cur.fetchall() 371 for row in rows: 372 tax_ids.append(row[0]) 373 374 # Many JASPAR motifs (especially those not in the CORE collection) 375 # do not have taxonomy IDs. So this warning would get annoying. 376 # if not tax_ids: 377 # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif" 378 # " {0}".format(motif.matrix_id), BiopythonWarning) 379 380 motif.species = tax_ids 381 382 # fetch protein accession numbers 383 cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,)) 384 accs = [] 385 rows = cur.fetchall() 386 for row in rows: 387 accs.append(row[0]) 388 389 # Similarly as for taxonomy IDs, it would get annoying to print 390 # warnings for JASPAR motifs which do not have accession numbers. 391 392 motif.acc = accs 393 394 # fetch remaining annotation as tags from the ANNOTATION table 395 cur.execute("""select TAG, VAL from MATRIX_ANNOTATION 396 where id = %s""", (int_id,)) 397 rows = cur.fetchall() 398 for row in rows: 399 attr = row[0] 400 val = row[1] 401 if attr == 'class': 402 motif.tf_class = val 403 elif attr == 'family': 404 motif.tf_family = val 405 elif attr == 'tax_group': 406 motif.tax_group = val 407 elif attr == 'type': 408 motif.data_type = val 409 elif attr == 'pazar_tf_id': 410 motif.pazar_id = val 411 elif attr == 'medline': 412 motif.medline = val 413 elif attr == 'comment': 414 motif.comment = val 415 else: 416 """ 417 TODO If we were to implement additional abitrary tags 418 motif.tag(attr, val) 419 """ 420 pass 421 422 return motif
423
424 - def _fetch_counts_matrix(self, int_id):
425 """Fetch the counts matrix from the JASPAR DB by the internal ID 426 427 Returns a Bio.motifs.matrix.GenericPositionMatrix 428 """ 429 counts = {} 430 cur = self.dbh.cursor() 431 432 for base in dna.letters: 433 base_counts = [] 434 435 cur.execute("""select val from MATRIX_DATA where ID = %s 436 and row = %s order by col""", (int_id, base)) 437 438 rows = cur.fetchall() 439 for row in rows: 440 base_counts.append(row[0]) 441 442 counts[base] = [float(x) for x in base_counts] 443 444 return matrix.GenericPositionMatrix(dna, counts)
445
446 - def _fetch_internal_id_list( 447 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 448 tf_family=None, matrix_id=None, tax_group=None, species=None, 449 pazar_id=None, data_type=None, medline=None, all=False, 450 all_versions=False 451 ):
452 """Fetch list of internal JASPAR motif IDs. 453 454 Fetch a list of internal JASPAR motif IDs based on various passed 455 parameters which may then be used to fetch the rest of the motif data. 456 457 Caller: 458 fetch_motifs() 459 460 Arguments: 461 See arguments sections of fetch_motifs() 462 463 Returns: 464 A list of internal JASPAR motif IDs which match the given 465 selection criteria arguments. 466 467 468 Build an SQL query based on the selection arguments provided. 469 470 1: First add table joins and sub-clauses for criteria corresponding to 471 named fields from the MATRIX and MATRIX_SPECIES tables such as 472 collection, matrix ID, name, species etc. 473 474 2: Then add joins/sub-clauses for tag/value parameters from the 475 MATRIX_ANNOTATION table. 476 477 For the surviving matrices, the responsibility to do matrix-based 478 feature filtering such as ic, number of sites etc, fall on the 479 calling fetch_motifs() method. 480 481 """ 482 int_ids = [] 483 484 cur = self.dbh.cursor() 485 486 """ 487 Special case 1: fetch ALL motifs. Highest priority. 488 Ignore all other selection arguments. 489 """ 490 if all: 491 cur.execute("select ID from MATRIX") 492 rows = cur.fetchall() 493 494 for row in rows: 495 int_ids.append(row[0]) 496 497 return int_ids 498 499 """ 500 Special case 2: fetch specific motifs by their JASPAR IDs. This 501 has higher priority than any other except the above 'all' case. 502 Ignore all other selection arguments. 503 """ 504 if matrix_id: 505 """ 506 These might be either stable IDs or stable_ID.version. 507 If just stable ID and if all_versions == 1, return all versions, 508 otherwise just the latest 509 """ 510 if all_versions: 511 for id in matrix_id: 512 # ignore vesion here, this is a stupidity filter 513 (base_id, version) = jaspar.split_jaspar_id(id) 514 cur.execute( 515 "select ID from MATRIX where BASE_ID = %s", (base_id,) 516 ) 517 518 rows = cur.fetchall() 519 for row in rows: 520 int_ids.append(row[0]) 521 else: 522 # only the lastest version, or the requested version 523 for id in matrix_id: 524 (base_id, version) = jaspar.split_jaspar_id(id) 525 526 if not version: 527 version = self._fetch_latest_version(base_id) 528 529 int_id = None 530 if version: 531 int_id = self._fetch_internal_id(base_id, version) 532 533 if int_id: 534 int_ids.append(int_id) 535 536 return int_ids 537 538 tables = ["MATRIX m"] 539 where_clauses = [] 540 541 # Select by MATRIX.COLLECTION 542 if collection: 543 if isinstance(collection, list): 544 # Multiple collections passed in as a list 545 clause = "m.COLLECTION in ('" 546 clause = "".join([clause, "','".join(collection)]) 547 clause = "".join([clause, "')"]) 548 else: 549 # A single collection - typical usage 550 clause = "m.COLLECTION = '%s'" % collection 551 552 where_clauses.append(clause) 553 554 # Select by MATRIX.NAME 555 if tf_name: 556 if isinstance(tf_name, list): 557 # Multiple names passed in as a list 558 clause = "m.NAME in ('" 559 clause = "".join([clause, "','".join(tf_name)]) 560 clause = "".join([clause, "')"]) 561 else: 562 # A single name 563 clause = "m.NAME = '%s'" % tf_name 564 565 where_clauses.append(clause) 566 567 # Select by MATRIX_SPECIES.TAX_ID 568 if species: 569 tables.append("MATRIX_SPECIES ms") 570 where_clauses.append("m.ID = ms.ID") 571 572 """ 573 NOTE: species are numeric taxonomy IDs but stored as varchars 574 in the DB. 575 """ 576 if isinstance(species, list): 577 # Multiple tax IDs passed in as a list 578 clause = "ms.TAX_ID in ('" 579 clause = "".join([clause, "','".join(str(s) for s in species)]) 580 clause = "".join([clause, "')"]) 581 else: 582 # A single tax ID 583 clause = "ms.TAX_ID = '%s'" % str(species) 584 585 where_clauses.append(clause) 586 587 """ 588 Tag based selection from MATRIX_ANNOTATION 589 Differs from perl TFBS module in that the matrix class explicitly 590 has a tag attribute corresponding to the tags in the database. This 591 provides tremendous flexibility in adding new tags to the DB and 592 being able to select based on those tags with out adding new code. 593 In the JASPAR Motif class we have elected to use specific attributes 594 for the most commonly used tags and here correspondingly only allow 595 selection on these attributes. 596 597 The attributes corresponding to the tags for which selection is 598 provided are: 599 600 Attribute Tag 601 tf_class class 602 tf_family family 603 pazar_id pazar_tf_id 604 medline medline 605 data_type type 606 tax_group tax_group 607 """ 608 609 # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class") 610 if tf_class: 611 tables.append("MATRIX_ANNOTATION ma1") 612 where_clauses.append("m.ID = ma1.ID") 613 614 clause = "ma1.TAG = 'class'" 615 if isinstance(tf_class, list): 616 # A list of TF classes 617 clause = "".join([clause, " and ma1.VAL in ('"]) 618 clause = "".join([clause, "','".join(tf_class)]) 619 clause = "".join([clause, "')"]) 620 else: 621 # A single TF class 622 clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class]) 623 624 where_clauses.append(clause) 625 626 # Select by TF families (MATRIX_ANNOTATION.TAG="family") 627 if tf_family: 628 tables.append("MATRIX_ANNOTATION ma2") 629 where_clauses.append("m.ID = ma2.ID") 630 631 clause = "ma2.TAG = 'family'" 632 if isinstance(tf_family, list): 633 # A list of TF families 634 clause = "".join([clause, " and ma2.VAL in ('"]) 635 clause = "".join([clause, "','".join(tf_family)]) 636 clause = "".join([clause, "')"]) 637 else: 638 # A single TF family 639 clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family]) 640 641 where_clauses.append(clause) 642 643 # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id") 644 if pazar_id: 645 tables.append("MATRIX_ANNOTATION ma3") 646 where_clauses.append("m.ID = ma3.ID") 647 648 clause = "ma3.TAG = 'pazar_tf_id'" 649 if isinstance(pazar_id, list): 650 # A list of PAZAR IDs 651 clause = "".join([clause, " and ma3.VAL in ('"]) 652 clause = "".join([clause, "','".join(pazar_id)]) 653 clause = "".join([clause, "')"]) 654 else: 655 # A single PAZAR ID 656 clause = "".join([" and ma3.VAL = '%s' " % pazar_id]) 657 658 where_clauses.append(clause) 659 660 # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline") 661 if medline: 662 tables.append("MATRIX_ANNOTATION ma4") 663 where_clauses.append("m.ID = ma4.ID") 664 665 clause = "ma4.TAG = 'medline'" 666 if isinstance(medline, list): 667 # A list of PubMed IDs 668 clause = "".join([clause, " and ma4.VAL in ('"]) 669 clause = "".join([clause, "','".join(medline)]) 670 clause = "".join([clause, "')"]) 671 else: 672 # A single PubMed ID 673 clause = "".join([" and ma4.VAL = '%s' " % medline]) 674 675 where_clauses.append(clause) 676 677 # Select by data type(s) used to compile the matrix 678 # (MATRIX_ANNOTATION.TAG="type") 679 if data_type: 680 tables.append("MATRIX_ANNOTATION ma5") 681 where_clauses.append("m.ID = ma5.ID") 682 683 clause = "ma5.TAG = 'type'" 684 if isinstance(data_type, list): 685 # A list of data types 686 clause = "".join([clause, " and ma5.VAL in ('"]) 687 clause = "".join([clause, "','".join(data_type)]) 688 clause = "".join([clause, "')"]) 689 else: 690 # A single data type 691 clause = "".join([" and ma5.VAL = '%s' " % data_type]) 692 693 where_clauses.append(clause) 694 695 # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group") 696 if tax_group: 697 tables.append("MATRIX_ANNOTATION ma6") 698 where_clauses.append("m.ID = ma6.ID") 699 700 clause = "ma6.TAG = 'tax_group'" 701 if isinstance(tax_group, list): 702 # A list of tax IDs 703 clause = "".join([clause, " and ma6.VAL in ('"]) 704 clause = "".join([clause, "','".join(tax_group)]) 705 clause = "".join([clause, "')"]) 706 else: 707 # A single tax ID 708 clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group]) 709 710 where_clauses.append(clause) 711 712 sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)]) 713 714 if where_clauses: 715 sql = "".join([sql, " where ", " and ".join(where_clauses)]) 716 717 # print "sql = %s" % sql 718 719 cur.execute(sql) 720 rows = cur.fetchall() 721 722 for row in rows: 723 id = row[0] 724 if all_versions: 725 int_ids.append(id) 726 else: 727 # is the latest version? 728 if self._is_latest_version(id): 729 int_ids.append(id) 730 731 if len(int_ids) < 1: 732 warnings.warn("Zero motifs returned with current select critera", 733 BiopythonWarning) 734 735 return int_ids
736
737 - def _is_latest_version(self, int_id):
738 """Check if the internal ID represents the latest JASPAR matrix. 739 740 Does this internal ID represent the latest version of the JASPAR 741 matrix (collapse on base ids) 742 """ 743 cur = self.dbh.cursor() 744 745 cur.execute("select count(*) from MATRIX where " 746 "BASE_ID = (select BASE_ID from MATRIX where ID = %s) " 747 "and VERSION > (select VERSION from MATRIX where ID = %s)", 748 (int_id, int_id)) 749 750 row = cur.fetchone() 751 752 count = row[0] 753 754 if count == 0: 755 # no matrices with higher version ID and same base id 756 return True 757 758 return False
759