Package Bio :: Package motifs :: Package jaspar :: Module db
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.jaspar.db

  1  # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Provides read access to a JASPAR5 formatted database. 
  6   
  7  This modules requires MySQLdb to be installed. 
  8   
  9  Example, substitute the your database credentials as 
 10  appropriate: 
 11   
 12      >>> from Bio.motifs.jaspar.db import JASPAR5 
 13      >>> 
 14      >>> JASPAR_DB_HOST = "hostname.example.org" 
 15      >>> JASPAR_DB_NAME = "JASPAR_2013" 
 16      >>> JASPAR_DB_USER = "guest" 
 17      >>> JASPAR_DB_PASS = "guest" 
 18      >>> 
 19      >>> DFLT_COLLECTION = 'CORE' 
 20      >>> jdb = JASPAR5( 
 21      ...     host=JASPAR_DB_HOST, 
 22      ...     name=JASPAR_DB_NAME, 
 23      ...     user=JASPAR_DB_USER, 
 24      ...     password=JASPAR_DB_PASS 
 25      ... ) 
 26      >>> 
 27      >>> 
 28      >>> ets1 = jdb.fetch_motif_by_id('MA0098') 
 29      >>> print(ets1) 
 30      TF name ETS1 
 31      Matrix ID   MA0098.1 
 32      Collection  CORE 
 33      TF class    Winged Helix-Turn-Helix 
 34      TF family   Ets 
 35      Species 9606 
 36      Taxonomic group vertebrates 
 37      Accession   ['CAG47050'] 
 38      Data type used  SELEX 
 39      Medline 1542566 
 40      PAZAR ID    TF0000070 
 41      Comments    - 
 42      Matrix: 
 43              0      1      2      3      4      5 
 44      A:   4.00  17.00   0.00   0.00   0.00   5.00 
 45      C:  16.00   0.00   1.00  39.00  39.00   3.00 
 46      G:   4.00   0.00   0.00   1.00   0.00  17.00 
 47      T:  16.00  23.00  39.00   0.00   1.00  15.00 
 48   
 49   
 50      >>> 
 51      >>> motifs = jdb.fetch_motifs( 
 52      ...     collection = 'CORE', 
 53      ...     tax_group = ['vertebrates', 'insects'], 
 54      ...     tf_class = 'Winged Helix-Turn-Helix', 
 55      ...     tf_family = ['Forkhead', 'Ets'], 
 56      ...     min_ic = 12 
 57      ... ) 
 58      >>> 
 59      >>> for motif in motifs: 
 60      ...     pass # do something with the motif 
 61   
 62  """ 
 63   
 64  from __future__ import print_function 
 65   
 66  import warnings 
 67  from Bio import BiopythonWarning 
 68  from Bio import MissingPythonDependencyError 
 69   
 70  try: 
 71      import MySQLdb as mdb 
 72  except: 
 73      raise MissingPythonDependencyError("Install MySQLdb if you want to use " 
 74                                         "Bio.motifs.jaspar.db") 
 75   
 76  from Bio.Alphabet.IUPAC import unambiguous_dna as dna 
 77  from Bio.motifs import jaspar, matrix 
 78   
 79   
 80  JASPAR_DFLT_COLLECTION = 'CORE' 
 81   
 82   
83 -class JASPAR5(object):
84 """ 85 Class representing a JASPAR5 DB. The methods within are loosely based 86 on the perl TFBS::DB::JASPAR5 module. 87 88 Note: We will only implement reading of JASPAR motifs from the DB. 89 Unlike the perl module, we will not attempt to implement any methods to 90 store JASPAR motifs or create a new DB at this time. 91 92 """ 93
94 - def __init__(self, host=None, name=None, user=None, password=None):
95 """ 96 Construct a JASPAR5 instance and connect to specified DB 97 98 Arguments: 99 host - host name of the the JASPAR DB server 100 name - name of the JASPAR database 101 user - user name to connect to the JASPAR DB 102 password - JASPAR DB password 103 104 """ 105 106 self.name = name 107 self.host = host 108 self.user = user 109 self.password = password 110 111 self.dbh = mdb.connect(host, user, password, name)
112
113 - def __str__(self):
114 """ 115 Return a string represention of the JASPAR5 DB connection. 116 117 """ 118 119 text = "%s\@%s:%s" % (self.user, self.host, self.name) 120 121 return text
122
123 - def fetch_motif_by_id(self, id):
124 """ 125 Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID 126 (e.g. 'MA0001.1'). 127 128 Arguments: 129 130 - id - JASPAR matrix ID. This may be a fully specified ID including 131 the version number (e.g. MA0049.2) or just the base ID (e.g. 132 MA0049). If only a base ID is provided, the latest version is 133 returned. 134 135 Returns: 136 137 - A Bio.motifs.jaspar.Motif object 138 139 **NOTE:** The perl TFBS module allows you to specify the type of matrix 140 to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as 141 PFMs so this does not really belong here. Once a PFM is fetched the 142 pwm() and pssm() methods can be called to return the normalized and 143 log-odds matrices. 144 145 """ 146 147 # separate stable ID and version number 148 (base_id, version) = jaspar.split_jaspar_id(id) 149 if not version: 150 # if ID contains no version portion, fetch the latest version 151 version = self._fetch_latest_version(base_id) 152 153 # fetch internal JASPAR matrix ID - also a check for validity 154 int_id = None 155 if version: 156 int_id = self._fetch_internal_id(base_id, version) 157 158 # fetch JASPAR motif using internal ID 159 motif = None 160 if int_id: 161 motif = self._fetch_motif_by_internal_id(int_id) 162 163 return motif
164
165 - def fetch_motifs_by_name(self, name):
166 """ 167 Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s). 168 169 Arguments: 170 name - a single name or list of names 171 Returns: 172 A list of Bio.motifs.Motif.japar objects 173 174 Notes: 175 Names are not guaranteed to be unique. There may be more than one 176 motif with the same name. Therefore even if name specifies a single 177 name, a list of motifs is returned. This just calls 178 self.fetch_motifs(collection = None, tf_name = name). 179 180 This behaviour is different from the TFBS perl module's 181 get_Matrix_by_name() method which always returns a single matrix, 182 issuing a warning message and returning the first matrix retrieved 183 in the case where multiple matrices have the same name. 184 185 """ 186 187 return self.fetch_motifs(collection=None, tf_name=name)
188
189 - def fetch_motifs( 190 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 191 tf_family=None, matrix_id=None, tax_group=None, species=None, 192 pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0, 193 min_sites=0, all=False, all_versions=False 194 ):
195 """ 196 Fetch a jaspar.Record (list) of motifs based on the provided selection 197 criteria. 198 199 Arguments:: 200 201 Except where obvious, all selection criteria arguments may be 202 specified as a single value or a list of values. Motifs must 203 meet ALL the specified selection criteria to be returned with 204 the precedent exceptions noted below. 205 206 all - Takes precedent of all other selection criteria. 207 Every motif is returned. If 'all_versions' is also 208 specified, all versions of every motif are returned, 209 otherwise just the latest version of every motif is 210 returned. 211 matrix_id - Takes precedence over all other selection criteria 212 except 'all'. Only motifs with the given JASPAR 213 matrix ID(s) are returned. A matrix ID may be 214 specified as just a base ID or full JASPAR IDs 215 including version number. If only a base ID is 216 provided for specific motif(s), then just the latest 217 version of those motif(s) are returned unless 218 'all_versions' is also specified. 219 collection - Only motifs from the specified JASPAR collection(s) 220 are returned. NOTE - if not specified, the collection 221 defaults to CORE for all other selection criteria 222 except 'all' and 'matrix_id'. To apply the other 223 selection criteria across all JASPAR collections, 224 explicitly set collection=None. 225 tf_name - Only motifs with the given name(s) are returned. 226 tf_class - Only motifs of the given TF class(es) are returned. 227 tf_family - Only motifs from the given TF families are returned. 228 tax_group - Only motifs belonging to the given taxonomic 229 supergroups are returned (e.g. 'vertebrates', 230 'insects', 'nematodes' etc.) 231 species - Only motifs derived from the given species are 232 returned. Species are specified as taxonomy IDs. 233 data_type - Only motifs generated with the given data type (e.g. 234 ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned. 235 NOTE - must match exactly as stored in the database. 236 pazar_id - Only motifs with the given PAZAR TF ID are returned. 237 medline - Only motifs with the given medline (PubmMed IDs) are 238 returned. 239 min_ic - Only motifs whose profile matrices have at least this 240 information content (specificty) are returned. 241 min_length - Only motifs whose profiles are of at least this 242 length are returned. 243 min_sites - Only motifs compiled from at least these many binding 244 sites are returned. 245 all_versions- Unless specified, just the latest version of motifs 246 determined by the other selection criteria are 247 returned. Otherwise all versions of the selected 248 motifs are returned. 249 250 Returns: 251 252 - A Bio.motifs.jaspar.Record (list) of motifs. 253 254 """ 255 256 # Fetch the internal IDs of the motifs using the criteria provided 257 int_ids = self._fetch_internal_id_list( 258 collection=collection, 259 tf_name=tf_name, 260 tf_class=tf_class, 261 tf_family=tf_family, 262 matrix_id=matrix_id, 263 tax_group=tax_group, 264 species=species, 265 pazar_id=pazar_id, 266 data_type=data_type, 267 medline=medline, 268 all=all, 269 all_versions=all_versions 270 ) 271 272 record = jaspar.Record() 273 274 """ 275 Now further filter motifs returned above based on any specified 276 matrix specific criteria. 277 """ 278 for int_id in int_ids: 279 motif = self._fetch_motif_by_internal_id(int_id) 280 281 # Filter motifs to those with matrix IC greater than min_ic 282 if min_ic: 283 if motif.pssm.mean() < min_ic: 284 continue 285 286 # Filter motifs to those with minimum length of min_length 287 if min_length: 288 if motif.length < min_length: 289 continue 290 291 # XXX We could also supply a max_length filter. 292 293 """ 294 Filter motifs to those composed of at least this many sites. 295 The perl TFBS module assumes column sums may be different but 296 this should be strictly enforced here we will ignore this and 297 just use the first column sum. 298 """ 299 if min_sites: 300 num_sites = sum( 301 [motif.counts[nt][0] for nt in motif.alphabet.letters] 302 ) 303 if num_sites < min_sites: 304 continue 305 306 record.append(motif) 307 308 return record
309
310 - def _fetch_latest_version(self, base_id):
311 """ 312 Get the latest version number for the given base_id, 313 314 """ 315 316 cur = self.dbh.cursor() 317 cur.execute("""select VERSION from MATRIX where BASE_id = %s 318 order by VERSION desc limit 1""", (base_id,)) 319 320 row = cur.fetchone() 321 322 latest = None 323 if row: 324 latest = row[0] 325 else: 326 warnings.warn("Failed to fetch latest version number for JASPAR " 327 "motif with base ID '{0}'. " 328 "No JASPAR motif with this base ID appears to exist " 329 "in the database.".format(base_id), BiopythonWarning) 330 331 return latest
332
333 - def _fetch_internal_id(self, base_id, version):
334 """ 335 Fetch the internal id for a base id + version. Also checks if this 336 combo exists or not 337 338 """ 339 340 cur = self.dbh.cursor() 341 cur.execute("""select id from MATRIX where BASE_id = %s 342 and VERSION = %s""", (base_id, version)) 343 344 row = cur.fetchone() 345 346 int_id = None 347 if row: 348 int_id = row[0] 349 else: 350 warnings.warn("Failed to fetch internal database ID for JASPAR " 351 "motif with matrix ID '{0}.{1}'. " 352 "No JASPAR motif with this matrix ID appears to " 353 "exist.".format(base_id, version), BiopythonWarning) 354 355 return int_id
356
357 - def _fetch_motif_by_internal_id(self, int_id):
358 # fetch basic motif information 359 cur = self.dbh.cursor() 360 cur.execute("""select BASE_ID, VERSION, COLLECTION, NAME from MATRIX 361 where id = %s""", (int_id,)) 362 363 row = cur.fetchone() 364 365 # This should never happen as it is an internal method. If it does 366 # we should probably raise an exception 367 if not row: 368 warnings.warn("Could not fetch JASPAR motif with internal " 369 "ID = {0}".format(int_id), BiopythonWarning) 370 return None 371 372 base_id = row[0] 373 version = row[1] 374 collection = row[2] 375 name = row[3] 376 377 matrix_id = "".join([base_id, '.', str(version)]) 378 379 # fetch the counts matrix 380 counts = self._fetch_counts_matrix(int_id) 381 382 # Create new JASPAR motif 383 motif = jaspar.Motif( 384 matrix_id, name, collection=collection, counts=counts 385 ) 386 387 # fetch species 388 cur.execute("""select TAX_ID from MATRIX_SPECIES 389 where id = %s""", (int_id,)) 390 tax_ids = [] 391 rows = cur.fetchall() 392 for row in rows: 393 tax_ids.append(row[0]) 394 395 # Many JASPAR motifs (especially those not in the CORE collection) 396 # do not have taxonomy IDs. So this warning would get annoying. 397 # if not tax_ids: 398 # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif" 399 # " {0}".format(motif.matrix_id), BiopythonWarning) 400 401 motif.species = tax_ids 402 403 # fetch protein accession numbers 404 cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,)) 405 accs = [] 406 rows = cur.fetchall() 407 for row in rows: 408 accs.append(row[0]) 409 410 # Similarly as for taxonomy IDs, it would get annoying to print 411 # warnings for JASPAR motifs which do not have accession numbers. 412 413 motif.acc = accs 414 415 # fetch remaining annotation as tags from the ANNOTATION table 416 cur.execute("""select TAG, VAL from MATRIX_ANNOTATION 417 where id = %s""", (int_id,)) 418 rows = cur.fetchall() 419 for row in rows: 420 attr = row[0] 421 val = row[1] 422 if attr == 'class': 423 motif.tf_class = val 424 elif attr == 'family': 425 motif.tf_family = val 426 elif attr == 'tax_group': 427 motif.tax_group = val 428 elif attr == 'type': 429 motif.data_type = val 430 elif attr == 'pazar_tf_id': 431 motif.pazar_id = val 432 elif attr == 'medline': 433 motif.medline = val 434 elif attr == 'comment': 435 motif.comment = val 436 else: 437 """ 438 TODO If we were to implement additional abitrary tags 439 motif.tag(attr, val) 440 """ 441 pass 442 443 return motif
444
445 - def _fetch_counts_matrix(self, int_id):
446 """ 447 Fetch the counts matrix from the JASPAR DB by the internal ID 448 449 Returns a Bio.motifs.matrix.GenericPositionMatrix 450 451 """ 452 counts = {} 453 cur = self.dbh.cursor() 454 455 for base in dna.letters: 456 base_counts = [] 457 458 cur.execute("""select val from MATRIX_DATA where ID = %s 459 and row = %s order by col""", (int_id, base)) 460 461 rows = cur.fetchall() 462 for row in rows: 463 base_counts.append(row[0]) 464 465 counts[base] = [float(x) for x in base_counts] 466 467 return matrix.GenericPositionMatrix(dna, counts)
468
469 - def _fetch_internal_id_list( 470 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 471 tf_family=None, matrix_id=None, tax_group=None, species=None, 472 pazar_id=None, data_type=None, medline=None, all=False, 473 all_versions=False 474 ):
475 """ 476 Fetch a list of internal JASPAR motif IDs based on various passed 477 parameters which may then be used to fetch the rest of the motif data. 478 479 Caller: 480 fetch_motifs() 481 482 Arguments: 483 See arguments sections of fetch_motifs() 484 485 Returns: 486 A list of internal JASPAR motif IDs which match the given 487 selection criteria arguments. 488 489 490 Build an SQL query based on the selection arguments provided. 491 492 1: First add table joins and sub-clauses for criteria corresponding to 493 named fields from the MATRIX and MATRIX_SPECIES tables such as 494 collection, matrix ID, name, species etc. 495 496 2: Then add joins/sub-clauses for tag/value parameters from the 497 MATRIX_ANNOTATION table. 498 499 For the surviving matrices, the responsibility to do matrix-based 500 feature filtering such as ic, number of sites etc, fall on the 501 calling fetch_motifs() method. 502 503 """ 504 505 int_ids = [] 506 507 cur = self.dbh.cursor() 508 509 """ 510 Special case 1: fetch ALL motifs. Highest priority. 511 Ignore all other selection arguments. 512 """ 513 if all: 514 cur.execute("select ID from MATRIX") 515 rows = cur.fetchall() 516 517 for row in rows: 518 int_ids.append(row[0]) 519 520 return int_ids 521 522 """ 523 Special case 2: fetch specific motifs by their JASPAR IDs. This 524 has higher priority than any other except the above 'all' case. 525 Ignore all other selection arguments. 526 """ 527 if matrix_id: 528 """ 529 These might be either stable IDs or stable_ID.version. 530 If just stable ID and if all_versions == 1, return all versions, 531 otherwise just the latest 532 """ 533 if all_versions: 534 for id in matrix_id: 535 # ignore vesion here, this is a stupidity filter 536 (base_id, version) = jaspar.split_jaspar_id(id) 537 cur.execute( 538 "select ID from MATRIX where BASE_ID = %s", (base_id,) 539 ) 540 541 rows = cur.fetchall() 542 for row in rows: 543 int_ids.append(row[0]) 544 else: 545 # only the lastest version, or the requested version 546 for id in matrix_id: 547 (base_id, version) = jaspar.split_jaspar_id(id) 548 549 if not version: 550 version = self._fetch_latest_version(base_id) 551 552 int_id = None 553 if version: 554 int_id = self._fetch_internal_id(base_id, version) 555 556 if int_id: 557 int_ids.append(int_id) 558 559 return int_ids 560 561 tables = ["MATRIX m"] 562 where_clauses = [] 563 564 # Select by MATRIX.COLLECTION 565 if collection: 566 if isinstance(collection, list): 567 # Multiple collections passed in as a list 568 clause = "m.COLLECTION in ('" 569 clause = "".join([clause, "','".join(collection)]) 570 clause = "".join([clause, "')"]) 571 else: 572 # A single collection - typical usage 573 clause = "m.COLLECTION = '%s'" % collection 574 575 where_clauses.append(clause) 576 577 # Select by MATRIX.NAME 578 if tf_name: 579 if isinstance(tf_name, list): 580 # Multiple names passed in as a list 581 clause = "m.NAME in ('" 582 clause = "".join([clause, "','".join(tf_name)]) 583 clause = "".join([clause, "')"]) 584 else: 585 # A single name 586 clause = "m.NAME = '%s'" % tf_name 587 588 where_clauses.append(clause) 589 590 # Select by MATRIX_SPECIES.TAX_ID 591 if species: 592 tables.append("MATRIX_SPECIES ms") 593 where_clauses.append("m.ID = ms.ID") 594 595 """ 596 NOTE: species are numeric taxonomy IDs but stored as varchars 597 in the DB. 598 """ 599 if isinstance(species, list): 600 # Multiple tax IDs passed in as a list 601 clause = "ms.TAX_ID in ('" 602 clause = "".join([clause, "','".join(str(s) for s in species)]) 603 clause = "".join([clause, "')"]) 604 else: 605 # A single tax ID 606 clause = "ms.TAX_ID = '%s'" % str(species) 607 608 where_clauses.append(clause) 609 610 """ 611 Tag based selection from MATRIX_ANNOTATION 612 Differs from perl TFBS module in that the matrix class explicitly 613 has a tag attribute corresponding to the tags in the database. This 614 provides tremendous flexibility in adding new tags to the DB and 615 being able to select based on those tags with out adding new code. 616 In the JASPAR Motif class we have elected to use specific attributes 617 for the most commonly used tags and here correspondingly only allow 618 selection on these attributes. 619 620 The attributes corresponding to the tags for which selection is 621 provided are: 622 623 Attribute Tag 624 tf_class class 625 tf_family family 626 pazar_id pazar_tf_id 627 medline medline 628 data_type type 629 tax_group tax_group 630 """ 631 632 # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class") 633 if tf_class: 634 tables.append("MATRIX_ANNOTATION ma1") 635 where_clauses.append("m.ID = ma1.ID") 636 637 clause = "ma1.TAG = 'class'" 638 if isinstance(tf_class, list): 639 # A list of TF classes 640 clause = "".join([clause, " and ma1.VAL in ('"]) 641 clause = "".join([clause, "','".join(tf_class)]) 642 clause = "".join([clause, "')"]) 643 else: 644 # A single TF class 645 clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class]) 646 647 where_clauses.append(clause) 648 649 # Select by TF families (MATRIX_ANNOTATION.TAG="family") 650 if tf_family: 651 tables.append("MATRIX_ANNOTATION ma2") 652 where_clauses.append("m.ID = ma2.ID") 653 654 clause = "ma2.TAG = 'family'" 655 if isinstance(tf_family, list): 656 # A list of TF families 657 clause = "".join([clause, " and ma2.VAL in ('"]) 658 clause = "".join([clause, "','".join(tf_family)]) 659 clause = "".join([clause, "')"]) 660 else: 661 # A single TF family 662 clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family]) 663 664 where_clauses.append(clause) 665 666 # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id") 667 if pazar_id: 668 tables.append("MATRIX_ANNOTATION ma3") 669 where_clauses.append("m.ID = ma3.ID") 670 671 clause = "ma3.TAG = 'pazar_tf_id'" 672 if isinstance(pazar_id, list): 673 # A list of PAZAR IDs 674 clause = "".join([clause, " and ma3.VAL in ('"]) 675 clause = "".join([clause, "','".join(pazar_id)]) 676 clause = "".join([clause, "')"]) 677 else: 678 # A single PAZAR ID 679 clause = "".join([" and ma3.VAL = '%s' " % pazar_id]) 680 681 where_clauses.append(clause) 682 683 # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline") 684 if medline: 685 tables.append("MATRIX_ANNOTATION ma4") 686 where_clauses.append("m.ID = ma4.ID") 687 688 clause = "ma4.TAG = 'medline'" 689 if isinstance(medline, list): 690 # A list of PubMed IDs 691 clause = "".join([clause, " and ma4.VAL in ('"]) 692 clause = "".join([clause, "','".join(medline)]) 693 clause = "".join([clause, "')"]) 694 else: 695 # A single PubMed ID 696 clause = "".join([" and ma4.VAL = '%s' " % medline]) 697 698 where_clauses.append(clause) 699 700 # Select by data type(s) used to compile the matrix 701 # (MATRIX_ANNOTATION.TAG="type") 702 if data_type: 703 tables.append("MATRIX_ANNOTATION ma5") 704 where_clauses.append("m.ID = ma5.ID") 705 706 clause = "ma5.TAG = 'type'" 707 if isinstance(data_type, list): 708 # A list of data types 709 clause = "".join([clause, " and ma5.VAL in ('"]) 710 clause = "".join([clause, "','".join(data_type)]) 711 clause = "".join([clause, "')"]) 712 else: 713 # A single data type 714 clause = "".join([" and ma5.VAL = '%s' " % data_type]) 715 716 where_clauses.append(clause) 717 718 # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group") 719 if tax_group: 720 tables.append("MATRIX_ANNOTATION ma6") 721 where_clauses.append("m.ID = ma6.ID") 722 723 clause = "ma6.TAG = 'tax_group'" 724 if isinstance(tax_group, list): 725 # A list of tax IDs 726 clause = "".join([clause, " and ma6.VAL in ('"]) 727 clause = "".join([clause, "','".join(tax_group)]) 728 clause = "".join([clause, "')"]) 729 else: 730 # A single tax ID 731 clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group]) 732 733 where_clauses.append(clause) 734 735 sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)]) 736 737 if where_clauses: 738 sql = "".join([sql, " where ", " and ".join(where_clauses)]) 739 740 # print "sql = %s" % sql 741 742 cur.execute(sql) 743 rows = cur.fetchall() 744 745 for row in rows: 746 id = row[0] 747 if all_versions: 748 int_ids.append(id) 749 else: 750 # is the latest version? 751 if self._is_latest_version(id): 752 int_ids.append(id) 753 754 if len(int_ids) < 1: 755 warnings.warn("Zero motifs returned with current select critera", 756 BiopythonWarning) 757 758 return int_ids
759
760 - def _is_latest_version(self, int_id):
761 """ 762 Does this internal ID represent the latest version of the JASPAR 763 matrix (collapse on base ids) 764 765 """ 766 cur = self.dbh.cursor() 767 768 cur.execute("select count(*) from MATRIX where " 769 "BASE_ID = (select BASE_ID from MATRIX where ID = %s) " 770 "and VERSION > (select VERSION from MATRIX where ID = %s)", 771 (int_id, int_id)) 772 773 row = cur.fetchone() 774 775 count = row[0] 776 777 if count == 0: 778 # no matrices with higher version ID and same base id 779 return True 780 781 return False
782