Package Bio :: Package motifs :: Package jaspar :: Module db
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.jaspar.db

  1  # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Provides read access to a JASPAR5 formatted database. 
  6   
  7  This modules requires MySQLdb to be installed. 
  8   
  9  Example, substitute the your database credentials as 
 10  appropriate: 
 11   
 12      >>> from Bio.motifs.jaspar.db import JASPAR5 
 13      >>> 
 14      >>> JASPAR_DB_HOST = "hostname.example.org" 
 15      >>> JASPAR_DB_NAME = "JASPAR_2013" 
 16      >>> JASPAR_DB_USER = "guest" 
 17      >>> JASPAR_DB_PASS = "guest" 
 18      >>> 
 19      >>> DFLT_COLLECTION = 'CORE' 
 20      >>> jdb = JASPAR5( 
 21      ...     host=JASPAR_DB_HOST, 
 22      ...     name=JASPAR_DB_NAME, 
 23      ...     user=JASPAR_DB_USER, 
 24      ...     password=JASPAR_DB_PASS 
 25      ... ) 
 26      >>> 
 27      >>> 
 28      >>> ets1 = jdb.fetch_motif_by_id('MA0098') 
 29      >>> print(ets1) 
 30      TF name ETS1 
 31      Matrix ID   MA0098.1 
 32      Collection  CORE 
 33      TF class    Winged Helix-Turn-Helix 
 34      TF family   Ets 
 35      Species 9606 
 36      Taxonomic group vertebrates 
 37      Accession   ['CAG47050'] 
 38      Data type used  SELEX 
 39      Medline 1542566 
 40      PAZAR ID    TF0000070 
 41      Comments    - 
 42      Matrix: 
 43              0      1      2      3      4      5 
 44      A:   4.00  17.00   0.00   0.00   0.00   5.00 
 45      C:  16.00   0.00   1.00  39.00  39.00   3.00 
 46      G:   4.00   0.00   0.00   1.00   0.00  17.00 
 47      T:  16.00  23.00  39.00   0.00   1.00  15.00 
 48   
 49   
 50      >>> 
 51      >>> motifs = jdb.fetch_motifs( 
 52      ...     collection = 'CORE', 
 53      ...     tax_group = ['vertebrates', 'insects'], 
 54      ...     tf_class = 'Winged Helix-Turn-Helix', 
 55      ...     tf_family = ['Forkhead', 'Ets'], 
 56      ...     min_ic = 12 
 57      ... ) 
 58      >>> 
 59      >>> for motif in motifs: 
 60      ...     pass # do something with the motif 
 61   
 62  """ 
 63   
 64  from __future__ import print_function 
 65   
 66  import warnings 
 67  from Bio import BiopythonWarning 
 68  from Bio import MissingPythonDependencyError 
 69   
 70  try: 
 71      import MySQLdb as mdb 
 72  except: 
 73      raise MissingPythonDependencyError("Install MySQLdb if you want to use " 
 74                                         "Bio.motifs.jaspar.db") 
 75   
 76  from Bio.Alphabet.IUPAC import unambiguous_dna as dna 
 77  from Bio.motifs import jaspar, matrix 
 78   
 79   
 80  JASPAR_DFLT_COLLECTION = 'CORE' 
 81   
 82   
83 -class JASPAR5(object):
84 """ 85 Class representing a JASPAR5 DB. The methods within are loosely based 86 on the perl TFBS::DB::JASPAR5 module. 87 88 Note: We will only implement reading of JASPAR motifs from the DB. 89 Unlike the perl module, we will not attempt to implement any methods to 90 store JASPAR motifs or create a new DB at this time. 91 92 """ 93
94 - def __init__(self, host=None, name=None, user=None, password=None):
95 """ 96 Construct a JASPAR5 instance and connect to specified DB 97 98 Arguments: 99 host - host name of the the JASPAR DB server 100 name - name of the JASPAR database 101 user - user name to connect to the JASPAR DB 102 password - JASPAR DB password 103 """ 104 self.name = name 105 self.host = host 106 self.user = user 107 self.password = password 108 109 self.dbh = mdb.connect(host, user, password, name)
110
111 - def __str__(self):
112 """Return a string represention of the JASPAR5 DB connection.""" 113 return "%s\@%s:%s" % (self.user, self.host, self.name)
114
115 - def fetch_motif_by_id(self, id):
116 """Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID 117 118 Example id 'MA0001.1'. 119 120 Arguments: 121 122 - id - JASPAR matrix ID. This may be a fully specified ID including 123 the version number (e.g. MA0049.2) or just the base ID (e.g. 124 MA0049). If only a base ID is provided, the latest version is 125 returned. 126 127 Returns: 128 129 - A Bio.motifs.jaspar.Motif object 130 131 **NOTE:** The perl TFBS module allows you to specify the type of matrix 132 to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as 133 PFMs so this does not really belong here. Once a PFM is fetched the 134 pwm() and pssm() methods can be called to return the normalized and 135 log-odds matrices. 136 """ 137 # separate stable ID and version number 138 (base_id, version) = jaspar.split_jaspar_id(id) 139 if not version: 140 # if ID contains no version portion, fetch the latest version 141 version = self._fetch_latest_version(base_id) 142 143 # fetch internal JASPAR matrix ID - also a check for validity 144 int_id = None 145 if version: 146 int_id = self._fetch_internal_id(base_id, version) 147 148 # fetch JASPAR motif using internal ID 149 motif = None 150 if int_id: 151 motif = self._fetch_motif_by_internal_id(int_id) 152 153 return motif
154
155 - def fetch_motifs_by_name(self, name):
156 """Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s). 157 158 Arguments: 159 name - a single name or list of names 160 Returns: 161 A list of Bio.motifs.Motif.japar objects 162 163 Notes: 164 Names are not guaranteed to be unique. There may be more than one 165 motif with the same name. Therefore even if name specifies a single 166 name, a list of motifs is returned. This just calls 167 self.fetch_motifs(collection = None, tf_name = name). 168 169 This behaviour is different from the TFBS perl module's 170 get_Matrix_by_name() method which always returns a single matrix, 171 issuing a warning message and returning the first matrix retrieved 172 in the case where multiple matrices have the same name. 173 """ 174 return self.fetch_motifs(collection=None, tf_name=name)
175
176 - def fetch_motifs( 177 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 178 tf_family=None, matrix_id=None, tax_group=None, species=None, 179 pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0, 180 min_sites=0, all=False, all_versions=False 181 ):
182 """Fetch jaspar.Record (list) of motifs using selection criteria. 183 184 Arguments:: 185 186 Except where obvious, all selection criteria arguments may be 187 specified as a single value or a list of values. Motifs must 188 meet ALL the specified selection criteria to be returned with 189 the precedent exceptions noted below. 190 191 all - Takes precedent of all other selection criteria. 192 Every motif is returned. If 'all_versions' is also 193 specified, all versions of every motif are returned, 194 otherwise just the latest version of every motif is 195 returned. 196 matrix_id - Takes precedence over all other selection criteria 197 except 'all'. Only motifs with the given JASPAR 198 matrix ID(s) are returned. A matrix ID may be 199 specified as just a base ID or full JASPAR IDs 200 including version number. If only a base ID is 201 provided for specific motif(s), then just the latest 202 version of those motif(s) are returned unless 203 'all_versions' is also specified. 204 collection - Only motifs from the specified JASPAR collection(s) 205 are returned. NOTE - if not specified, the collection 206 defaults to CORE for all other selection criteria 207 except 'all' and 'matrix_id'. To apply the other 208 selection criteria across all JASPAR collections, 209 explicitly set collection=None. 210 tf_name - Only motifs with the given name(s) are returned. 211 tf_class - Only motifs of the given TF class(es) are returned. 212 tf_family - Only motifs from the given TF families are returned. 213 tax_group - Only motifs belonging to the given taxonomic 214 supergroups are returned (e.g. 'vertebrates', 215 'insects', 'nematodes' etc.) 216 species - Only motifs derived from the given species are 217 returned. Species are specified as taxonomy IDs. 218 data_type - Only motifs generated with the given data type (e.g. 219 ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned. 220 NOTE - must match exactly as stored in the database. 221 pazar_id - Only motifs with the given PAZAR TF ID are returned. 222 medline - Only motifs with the given medline (PubmMed IDs) are 223 returned. 224 min_ic - Only motifs whose profile matrices have at least this 225 information content (specificty) are returned. 226 min_length - Only motifs whose profiles are of at least this 227 length are returned. 228 min_sites - Only motifs compiled from at least these many binding 229 sites are returned. 230 all_versions- Unless specified, just the latest version of motifs 231 determined by the other selection criteria are 232 returned. Otherwise all versions of the selected 233 motifs are returned. 234 235 Returns: 236 237 - A Bio.motifs.jaspar.Record (list) of motifs. 238 """ 239 # Fetch the internal IDs of the motifs using the criteria provided 240 int_ids = self._fetch_internal_id_list( 241 collection=collection, 242 tf_name=tf_name, 243 tf_class=tf_class, 244 tf_family=tf_family, 245 matrix_id=matrix_id, 246 tax_group=tax_group, 247 species=species, 248 pazar_id=pazar_id, 249 data_type=data_type, 250 medline=medline, 251 all=all, 252 all_versions=all_versions 253 ) 254 255 record = jaspar.Record() 256 257 """ 258 Now further filter motifs returned above based on any specified 259 matrix specific criteria. 260 """ 261 for int_id in int_ids: 262 motif = self._fetch_motif_by_internal_id(int_id) 263 264 # Filter motifs to those with matrix IC greater than min_ic 265 if min_ic: 266 if motif.pssm.mean() < min_ic: 267 continue 268 269 # Filter motifs to those with minimum length of min_length 270 if min_length: 271 if motif.length < min_length: 272 continue 273 274 # XXX We could also supply a max_length filter. 275 276 """ 277 Filter motifs to those composed of at least this many sites. 278 The perl TFBS module assumes column sums may be different but 279 this should be strictly enforced here we will ignore this and 280 just use the first column sum. 281 """ 282 if min_sites: 283 num_sites = sum( 284 [motif.counts[nt][0] for nt in motif.alphabet.letters] 285 ) 286 if num_sites < min_sites: 287 continue 288 289 record.append(motif) 290 291 return record
292
293 - def _fetch_latest_version(self, base_id):
294 """Get the latest version number for the given base_id.""" 295 cur = self.dbh.cursor() 296 cur.execute("""select VERSION from MATRIX where BASE_id = %s 297 order by VERSION desc limit 1""", (base_id,)) 298 299 row = cur.fetchone() 300 301 latest = None 302 if row: 303 latest = row[0] 304 else: 305 warnings.warn("Failed to fetch latest version number for JASPAR " 306 "motif with base ID '{0}'. " 307 "No JASPAR motif with this base ID appears to exist " 308 "in the database.".format(base_id), BiopythonWarning) 309 310 return latest
311
312 - def _fetch_internal_id(self, base_id, version):
313 """Fetch the internal id for a base id + version. 314 315 Also checks if this combo exists or not. 316 """ 317 cur = self.dbh.cursor() 318 cur.execute("""select id from MATRIX where BASE_id = %s 319 and VERSION = %s""", (base_id, version)) 320 321 row = cur.fetchone() 322 323 int_id = None 324 if row: 325 int_id = row[0] 326 else: 327 warnings.warn("Failed to fetch internal database ID for JASPAR " 328 "motif with matrix ID '{0}.{1}'. " 329 "No JASPAR motif with this matrix ID appears to " 330 "exist.".format(base_id, version), BiopythonWarning) 331 332 return int_id
333
334 - def _fetch_motif_by_internal_id(self, int_id):
335 # fetch basic motif information 336 cur = self.dbh.cursor() 337 cur.execute("""select BASE_ID, VERSION, COLLECTION, NAME from MATRIX 338 where id = %s""", (int_id,)) 339 340 row = cur.fetchone() 341 342 # This should never happen as it is an internal method. If it does 343 # we should probably raise an exception 344 if not row: 345 warnings.warn("Could not fetch JASPAR motif with internal " 346 "ID = {0}".format(int_id), BiopythonWarning) 347 return None 348 349 base_id = row[0] 350 version = row[1] 351 collection = row[2] 352 name = row[3] 353 354 matrix_id = "".join([base_id, '.', str(version)]) 355 356 # fetch the counts matrix 357 counts = self._fetch_counts_matrix(int_id) 358 359 # Create new JASPAR motif 360 motif = jaspar.Motif( 361 matrix_id, name, collection=collection, counts=counts 362 ) 363 364 # fetch species 365 cur.execute("""select TAX_ID from MATRIX_SPECIES 366 where id = %s""", (int_id,)) 367 tax_ids = [] 368 rows = cur.fetchall() 369 for row in rows: 370 tax_ids.append(row[0]) 371 372 # Many JASPAR motifs (especially those not in the CORE collection) 373 # do not have taxonomy IDs. So this warning would get annoying. 374 # if not tax_ids: 375 # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif" 376 # " {0}".format(motif.matrix_id), BiopythonWarning) 377 378 motif.species = tax_ids 379 380 # fetch protein accession numbers 381 cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,)) 382 accs = [] 383 rows = cur.fetchall() 384 for row in rows: 385 accs.append(row[0]) 386 387 # Similarly as for taxonomy IDs, it would get annoying to print 388 # warnings for JASPAR motifs which do not have accession numbers. 389 390 motif.acc = accs 391 392 # fetch remaining annotation as tags from the ANNOTATION table 393 cur.execute("""select TAG, VAL from MATRIX_ANNOTATION 394 where id = %s""", (int_id,)) 395 rows = cur.fetchall() 396 for row in rows: 397 attr = row[0] 398 val = row[1] 399 if attr == 'class': 400 motif.tf_class = val 401 elif attr == 'family': 402 motif.tf_family = val 403 elif attr == 'tax_group': 404 motif.tax_group = val 405 elif attr == 'type': 406 motif.data_type = val 407 elif attr == 'pazar_tf_id': 408 motif.pazar_id = val 409 elif attr == 'medline': 410 motif.medline = val 411 elif attr == 'comment': 412 motif.comment = val 413 else: 414 """ 415 TODO If we were to implement additional abitrary tags 416 motif.tag(attr, val) 417 """ 418 pass 419 420 return motif
421
422 - def _fetch_counts_matrix(self, int_id):
423 """Fetch the counts matrix from the JASPAR DB by the internal ID 424 425 Returns a Bio.motifs.matrix.GenericPositionMatrix 426 """ 427 counts = {} 428 cur = self.dbh.cursor() 429 430 for base in dna.letters: 431 base_counts = [] 432 433 cur.execute("""select val from MATRIX_DATA where ID = %s 434 and row = %s order by col""", (int_id, base)) 435 436 rows = cur.fetchall() 437 for row in rows: 438 base_counts.append(row[0]) 439 440 counts[base] = [float(x) for x in base_counts] 441 442 return matrix.GenericPositionMatrix(dna, counts)
443
444 - def _fetch_internal_id_list( 445 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 446 tf_family=None, matrix_id=None, tax_group=None, species=None, 447 pazar_id=None, data_type=None, medline=None, all=False, 448 all_versions=False 449 ):
450 """Fetch list of internal JASPAR motif IDs. 451 452 Fetch a list of internal JASPAR motif IDs based on various passed 453 parameters which may then be used to fetch the rest of the motif data. 454 455 Caller: 456 fetch_motifs() 457 458 Arguments: 459 See arguments sections of fetch_motifs() 460 461 Returns: 462 A list of internal JASPAR motif IDs which match the given 463 selection criteria arguments. 464 465 466 Build an SQL query based on the selection arguments provided. 467 468 1: First add table joins and sub-clauses for criteria corresponding to 469 named fields from the MATRIX and MATRIX_SPECIES tables such as 470 collection, matrix ID, name, species etc. 471 472 2: Then add joins/sub-clauses for tag/value parameters from the 473 MATRIX_ANNOTATION table. 474 475 For the surviving matrices, the responsibility to do matrix-based 476 feature filtering such as ic, number of sites etc, fall on the 477 calling fetch_motifs() method. 478 """ 479 int_ids = [] 480 481 cur = self.dbh.cursor() 482 483 """ 484 Special case 1: fetch ALL motifs. Highest priority. 485 Ignore all other selection arguments. 486 """ 487 if all: 488 cur.execute("select ID from MATRIX") 489 rows = cur.fetchall() 490 491 for row in rows: 492 int_ids.append(row[0]) 493 494 return int_ids 495 496 """ 497 Special case 2: fetch specific motifs by their JASPAR IDs. This 498 has higher priority than any other except the above 'all' case. 499 Ignore all other selection arguments. 500 """ 501 if matrix_id: 502 """ 503 These might be either stable IDs or stable_ID.version. 504 If just stable ID and if all_versions == 1, return all versions, 505 otherwise just the latest 506 """ 507 if all_versions: 508 for id in matrix_id: 509 # ignore vesion here, this is a stupidity filter 510 (base_id, version) = jaspar.split_jaspar_id(id) 511 cur.execute( 512 "select ID from MATRIX where BASE_ID = %s", (base_id,) 513 ) 514 515 rows = cur.fetchall() 516 for row in rows: 517 int_ids.append(row[0]) 518 else: 519 # only the lastest version, or the requested version 520 for id in matrix_id: 521 (base_id, version) = jaspar.split_jaspar_id(id) 522 523 if not version: 524 version = self._fetch_latest_version(base_id) 525 526 int_id = None 527 if version: 528 int_id = self._fetch_internal_id(base_id, version) 529 530 if int_id: 531 int_ids.append(int_id) 532 533 return int_ids 534 535 tables = ["MATRIX m"] 536 where_clauses = [] 537 538 # Select by MATRIX.COLLECTION 539 if collection: 540 if isinstance(collection, list): 541 # Multiple collections passed in as a list 542 clause = "m.COLLECTION in ('" 543 clause = "".join([clause, "','".join(collection)]) 544 clause = "".join([clause, "')"]) 545 else: 546 # A single collection - typical usage 547 clause = "m.COLLECTION = '%s'" % collection 548 549 where_clauses.append(clause) 550 551 # Select by MATRIX.NAME 552 if tf_name: 553 if isinstance(tf_name, list): 554 # Multiple names passed in as a list 555 clause = "m.NAME in ('" 556 clause = "".join([clause, "','".join(tf_name)]) 557 clause = "".join([clause, "')"]) 558 else: 559 # A single name 560 clause = "m.NAME = '%s'" % tf_name 561 562 where_clauses.append(clause) 563 564 # Select by MATRIX_SPECIES.TAX_ID 565 if species: 566 tables.append("MATRIX_SPECIES ms") 567 where_clauses.append("m.ID = ms.ID") 568 569 """ 570 NOTE: species are numeric taxonomy IDs but stored as varchars 571 in the DB. 572 """ 573 if isinstance(species, list): 574 # Multiple tax IDs passed in as a list 575 clause = "ms.TAX_ID in ('" 576 clause = "".join([clause, "','".join(str(s) for s in species)]) 577 clause = "".join([clause, "')"]) 578 else: 579 # A single tax ID 580 clause = "ms.TAX_ID = '%s'" % str(species) 581 582 where_clauses.append(clause) 583 584 """ 585 Tag based selection from MATRIX_ANNOTATION 586 Differs from perl TFBS module in that the matrix class explicitly 587 has a tag attribute corresponding to the tags in the database. This 588 provides tremendous flexibility in adding new tags to the DB and 589 being able to select based on those tags with out adding new code. 590 In the JASPAR Motif class we have elected to use specific attributes 591 for the most commonly used tags and here correspondingly only allow 592 selection on these attributes. 593 594 The attributes corresponding to the tags for which selection is 595 provided are: 596 597 Attribute Tag 598 tf_class class 599 tf_family family 600 pazar_id pazar_tf_id 601 medline medline 602 data_type type 603 tax_group tax_group 604 """ 605 606 # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class") 607 if tf_class: 608 tables.append("MATRIX_ANNOTATION ma1") 609 where_clauses.append("m.ID = ma1.ID") 610 611 clause = "ma1.TAG = 'class'" 612 if isinstance(tf_class, list): 613 # A list of TF classes 614 clause = "".join([clause, " and ma1.VAL in ('"]) 615 clause = "".join([clause, "','".join(tf_class)]) 616 clause = "".join([clause, "')"]) 617 else: 618 # A single TF class 619 clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class]) 620 621 where_clauses.append(clause) 622 623 # Select by TF families (MATRIX_ANNOTATION.TAG="family") 624 if tf_family: 625 tables.append("MATRIX_ANNOTATION ma2") 626 where_clauses.append("m.ID = ma2.ID") 627 628 clause = "ma2.TAG = 'family'" 629 if isinstance(tf_family, list): 630 # A list of TF families 631 clause = "".join([clause, " and ma2.VAL in ('"]) 632 clause = "".join([clause, "','".join(tf_family)]) 633 clause = "".join([clause, "')"]) 634 else: 635 # A single TF family 636 clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family]) 637 638 where_clauses.append(clause) 639 640 # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id") 641 if pazar_id: 642 tables.append("MATRIX_ANNOTATION ma3") 643 where_clauses.append("m.ID = ma3.ID") 644 645 clause = "ma3.TAG = 'pazar_tf_id'" 646 if isinstance(pazar_id, list): 647 # A list of PAZAR IDs 648 clause = "".join([clause, " and ma3.VAL in ('"]) 649 clause = "".join([clause, "','".join(pazar_id)]) 650 clause = "".join([clause, "')"]) 651 else: 652 # A single PAZAR ID 653 clause = "".join([" and ma3.VAL = '%s' " % pazar_id]) 654 655 where_clauses.append(clause) 656 657 # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline") 658 if medline: 659 tables.append("MATRIX_ANNOTATION ma4") 660 where_clauses.append("m.ID = ma4.ID") 661 662 clause = "ma4.TAG = 'medline'" 663 if isinstance(medline, list): 664 # A list of PubMed IDs 665 clause = "".join([clause, " and ma4.VAL in ('"]) 666 clause = "".join([clause, "','".join(medline)]) 667 clause = "".join([clause, "')"]) 668 else: 669 # A single PubMed ID 670 clause = "".join([" and ma4.VAL = '%s' " % medline]) 671 672 where_clauses.append(clause) 673 674 # Select by data type(s) used to compile the matrix 675 # (MATRIX_ANNOTATION.TAG="type") 676 if data_type: 677 tables.append("MATRIX_ANNOTATION ma5") 678 where_clauses.append("m.ID = ma5.ID") 679 680 clause = "ma5.TAG = 'type'" 681 if isinstance(data_type, list): 682 # A list of data types 683 clause = "".join([clause, " and ma5.VAL in ('"]) 684 clause = "".join([clause, "','".join(data_type)]) 685 clause = "".join([clause, "')"]) 686 else: 687 # A single data type 688 clause = "".join([" and ma5.VAL = '%s' " % data_type]) 689 690 where_clauses.append(clause) 691 692 # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group") 693 if tax_group: 694 tables.append("MATRIX_ANNOTATION ma6") 695 where_clauses.append("m.ID = ma6.ID") 696 697 clause = "ma6.TAG = 'tax_group'" 698 if isinstance(tax_group, list): 699 # A list of tax IDs 700 clause = "".join([clause, " and ma6.VAL in ('"]) 701 clause = "".join([clause, "','".join(tax_group)]) 702 clause = "".join([clause, "')"]) 703 else: 704 # A single tax ID 705 clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group]) 706 707 where_clauses.append(clause) 708 709 sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)]) 710 711 if where_clauses: 712 sql = "".join([sql, " where ", " and ".join(where_clauses)]) 713 714 # print "sql = %s" % sql 715 716 cur.execute(sql) 717 rows = cur.fetchall() 718 719 for row in rows: 720 id = row[0] 721 if all_versions: 722 int_ids.append(id) 723 else: 724 # is the latest version? 725 if self._is_latest_version(id): 726 int_ids.append(id) 727 728 if len(int_ids) < 1: 729 warnings.warn("Zero motifs returned with current select critera", 730 BiopythonWarning) 731 732 return int_ids
733
734 - def _is_latest_version(self, int_id):
735 """Check if the internal ID represents the latest JASPAR matrix. 736 737 Does this internal ID represent the latest version of the JASPAR 738 matrix (collapse on base ids) 739 """ 740 cur = self.dbh.cursor() 741 742 cur.execute("select count(*) from MATRIX where " 743 "BASE_ID = (select BASE_ID from MATRIX where ID = %s) " 744 "and VERSION > (select VERSION from MATRIX where ID = %s)", 745 (int_id, int_id)) 746 747 row = cur.fetchone() 748 749 count = row[0] 750 751 if count == 0: 752 # no matrices with higher version ID and same base id 753 return True 754 755 return False
756