Package Bio :: Package Motif :: Module _Motif
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif._Motif

  1  # Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Implementation of sequence motifs (PRIVATE). 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  from Bio._py3k import range 
 11   
 12  from Bio.Seq import Seq 
 13  from Bio.SubsMat import FreqTable 
 14  from Bio.Alphabet import IUPAC 
 15  import math, random 
 16   
17 -class Motif(object):
18 """ 19 A class representing sequence motifs. 20 """
21 - def __init__(self,alphabet=IUPAC.unambiguous_dna):
22 self.instances = [] 23 self.has_instances=False 24 self.counts = {} 25 self.has_counts=False 26 self.mask = [] 27 self._pwm_is_current = False 28 self._pwm = [] 29 self._log_odds_is_current = False 30 self._log_odds = [] 31 self.alphabet=alphabet 32 self.length=None 33 self.background=dict((n, 1.0/len(self.alphabet.letters)) \ 34 for n in self.alphabet.letters) 35 self.beta=1.0 36 self.info=None 37 self.name=""
38
39 - def _check_length(self, len):
40 # TODO - Change parameter name (len clashes with built in function)? 41 if self.length is None: 42 self.length = len 43 elif self.length != len: 44 raise ValueError("You can't change the length of the motif " 45 "%r %r %r" % (self.length, self.instances, len))
46
47 - def _check_alphabet(self, alphabet):
48 if self.alphabet is None: 49 self.alphabet=alphabet 50 elif self.alphabet != alphabet: 51 raise ValueError("Wrong Alphabet")
52
53 - def add_instance(self, instance):
54 """ 55 adds new instance to the motif 56 """ 57 self._check_alphabet(instance.alphabet) 58 self._check_length(len(instance)) 59 if self.has_counts: 60 for i in range(self.length): 61 let=instance[i] 62 self.counts[let][i]+=1 63 64 if self.has_instances or not self.has_counts: 65 self.instances.append(instance) 66 self.has_instances=True 67 68 self._pwm_is_current = False 69 self._log_odds_is_current = False
70 71
72 - def set_mask(self, mask):
73 """ 74 sets the mask for the motif 75 76 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns 77 """ 78 self._check_length(len(mask)) 79 self.mask=[] 80 for char in mask: 81 if char=="*": 82 self.mask.append(1) 83 elif char==" ": 84 self.mask.append(0) 85 else: 86 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
87
88 - def pwm(self,laplace=True):
89 """ 90 returns the PWM computed for the set of instances 91 92 if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions. 93 """ 94 95 if self._pwm_is_current: 96 return self._pwm 97 #we need to compute new pwm 98 self._pwm = [] 99 for i in range(self.length): 100 dict = {} 101 #filling the dict with 0's 102 for letter in self.alphabet.letters: 103 if laplace: 104 dict[letter]=self.beta*self.background[letter] 105 else: 106 dict[letter]=0.0 107 if self.has_counts: 108 #taking the raw counts 109 for letter in self.alphabet.letters: 110 dict[letter]+=self.counts[letter][i] 111 elif self.has_instances: 112 #counting the occurences of letters in instances 113 for seq in self.instances: 114 #dict[seq[i]]=dict[seq[i]]+1 115 try: 116 dict[seq[i]]+=1 117 except KeyError: #we need to ignore non-alphabet letters 118 pass 119 self._pwm.append(FreqTable.FreqTable(dict, FreqTable.COUNT, self.alphabet)) 120 self._pwm_is_current=1 121 return self._pwm
122
123 - def log_odds(self,laplace=True):
124 """ 125 returns the logg odds matrix computed for the set of instances 126 """ 127 128 if self._log_odds_is_current: 129 return self._log_odds 130 #we need to compute new pwm 131 self._log_odds = [] 132 pwm=self.pwm(laplace) 133 for i in range(self.length): 134 d = {} 135 for a in self.alphabet.letters: 136 d[a]=math.log(pwm[i][a]/self.background[a], 2) 137 self._log_odds.append(d) 138 self._log_odds_is_current=1 139 return self._log_odds
140
141 - def ic(self):
142 """Method returning the information content of a motif. 143 """ 144 res=0 145 pwm=self.pwm() 146 for i in range(self.length): 147 res+=2 148 for a in self.alphabet.letters: 149 if pwm[i][a]!=0: 150 res+=pwm[i][a]*math.log(pwm[i][a], 2) 151 return res
152
153 - def exp_score(self,st_dev=False):
154 """ 155 Computes expected score of motif's instance and its standard deviation 156 """ 157 exs=0.0 158 var=0.0 159 pwm=self.pwm() 160 for i in range(self.length): 161 ex1=0.0 162 ex2=0.0 163 for a in self.alphabet.letters: 164 if pwm[i][a]!=0: 165 ex1+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2)) 166 ex2+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2))**2 167 exs+=ex1 168 var+=ex2-ex1**2 169 if st_dev: 170 return exs, math.sqrt(var) 171 else: 172 return exs
173
174 - def search_instances(self, sequence):
175 """ 176 a generator function, returning found positions of instances of the motif in a given sequence 177 """ 178 if not self.has_instances: 179 raise ValueError ("This motif has no instances") 180 for pos in range(0, len(sequence) - self.length + 1): 181 for instance in self.instances: 182 if str(instance) == str(sequence[pos:pos + self.length]): 183 yield (pos, instance) 184 break # no other instance will fit (we don't want to return multiple hits)
185
186 - def score_hit(self,sequence,position,normalized=0,masked=0):
187 """ 188 give the pwm score for a given position 189 """ 190 lo=self.log_odds() 191 score = 0.0 192 for pos in range(self.length): 193 a = sequence[position+pos] 194 if not masked or self.mask[pos]: 195 try: 196 score += lo[pos][a] 197 except: 198 pass 199 if normalized: 200 if not masked: 201 score/=self.length 202 else: 203 score/=len([x for x in self.mask if x]) 204 return score
205
206 - def search_pwm(self,sequence,normalized=0,masked=0,threshold=0.0,both=True):
207 """ 208 a generator function, returning found hits in a given sequence with the pwm score higher than the threshold 209 """ 210 if both: 211 rc = self.reverse_complement() 212 213 sequence = str(sequence).upper() 214 for pos in range(0, len(sequence) - self.length + 1): 215 score = self.score_hit(sequence, pos, normalized, masked) 216 if score > threshold: 217 yield (pos, score) 218 if both: 219 rev_score = rc.score_hit(sequence, pos, normalized, masked) 220 if rev_score > threshold: 221 yield (-pos, rev_score)
222
223 - def dist_pearson(self, motif, masked = 0):
224 """ 225 return the similarity score based on pearson correlation for the given motif against self. 226 227 We use the Pearson's correlation of the respective probabilities. 228 """ 229 230 if self.alphabet != motif.alphabet: 231 raise ValueError("Cannot compare motifs with different alphabets") 232 233 max_p=-2 234 for offset in range(-self.length+1, motif.length): 235 if offset<0: 236 p = self.dist_pearson_at(motif, -offset) 237 else: #offset>=0 238 p = motif.dist_pearson_at(self, offset) 239 240 if max_p<p: 241 max_p=p 242 max_o=-offset 243 return 1-max_p, max_o
244
245 - def dist_pearson_at(self, motif, offset):
246 sxx = 0 # \sum x^2 247 sxy = 0 # \sum x \cdot y 248 sx = 0 # \sum x 249 sy = 0 # \sum y 250 syy = 0 # \sum x^2 251 norm=max(self.length, offset+motif.length) 252 253 for pos in range(max(self.length, offset+motif.length)): 254 for l in self.alphabet.letters: 255 xi = self[pos][l] 256 yi = motif[pos-offset][l] 257 sx = sx + xi 258 sy = sy + yi 259 sxx = sxx + xi * xi 260 syy = syy + yi * yi 261 sxy = sxy + xi * yi 262 263 norm *= len(self.alphabet.letters) 264 s1 = (sxy - sx*sy*1.0/norm) 265 s2 = (norm*sxx - sx*sx*1.0)*(norm*syy- sy*sy*1.0) 266 return s1/math.sqrt(s2)
267
268 - def dist_product(self, other):
269 """ 270 A similarity measure taking into account a product probability of generating overlaping instances of two motifs 271 """ 272 max_p=0.0 273 for offset in range(-self.length+1, other.length): 274 if offset<0: 275 p = self.dist_product_at(other, -offset) 276 else: #offset>=0 277 p = other.dist_product_at(self, offset) 278 if max_p<p: 279 max_p=p 280 max_o=-offset 281 return 1-max_p/self.dist_product_at(self, 0), max_o
282
283 - def dist_product_at(self, other, offset):
284 s=0 285 for i in range(max(self.length, offset+other.length)): 286 f1=self[i] 287 f2=other[i-offset] 288 for n, b in self.background.items(): 289 s+=b*f1[n]*f2[n] 290 return s/i
291
292 - def dist_dpq(self, other):
293 r"""Calculates the DPQ distance measure between motifs. 294 295 It is calculated as a maximal value of DPQ formula (shown using LaTeX 296 markup, familiar to mathematicians): 297 298 \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \ 299 \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) + 300 m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k])) 301 } 302 303 over possible non-spaced alignemts of two motifs. See this reference: 304 305 D. M Endres and J. E Schindelin, "A new metric for probability 306 distributions", IEEE transactions on Information Theory 49, no. 7 307 (July 2003): 1858-1860. 308 """ 309 310 min_d=float("inf") 311 min_o=-1 312 d_s=[] 313 for offset in range(-self.length+1, other.length): 314 #print("%2.3d"%offset) 315 if offset<0: 316 d = self.dist_dpq_at(other, -offset) 317 overlap = self.length+offset 318 else: #offset>=0 319 d = other.dist_dpq_at(self, offset) 320 overlap = other.length-offset 321 overlap = min(self.length, other.length, overlap) 322 out = self.length+other.length - 2*overlap 323 #print("%f %f %f" % (d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap)) 324 #d = d/(2*overlap) 325 d = (d/(out+overlap))*(2*overlap+out)/(2*overlap) 326 #print(d) 327 d_s.append((offset, d)) 328 if min_d > d: 329 min_d = d 330 min_o = -offset 331 return min_d, min_o #,d_s
332
333 - def dist_dpq_at(self, other, offset):
334 """ 335 calculates the dist_dpq measure with a given offset. 336 337 offset should satisfy 0<=offset<=len(self) 338 """ 339 def dpq (f1, f2, alpha): 340 s=0 341 for n in alpha.letters: 342 avg=(f1[n]+f2[n])/2 343 s+=f1[n]*math.log(f1[n]/avg, 2)+f2[n]*math.log(f2[n]/avg, 2) 344 return math.sqrt(s)
345 346 s=0 347 for i in range(max(self.length, offset+other.length)): 348 f1=self[i] 349 f2=other[i-offset] 350 s+=dpq(f1, f2, self.alphabet) 351 return s
352
353 - def _read(self, stream):
354 """Reads the motif from the stream (in AlignAce format). 355 356 the self.alphabet variable must be set beforehand. 357 If the last line contains asterisks it is used for setting mask 358 """ 359 360 while True: 361 ln = stream.readline() 362 if "*" in ln: 363 self.set_mask(ln.strip("\n\c")) 364 break 365 self.add_instance(Seq(ln.strip(), self.alphabet))
366
367 - def __str__(self,masked=False):
368 """ string representation of a motif. 369 """ 370 str = "".join(str(inst) + "\n" for inst in self.instances) 371 372 if masked: 373 for i in range(self.length): 374 if self.mask[i]: 375 str += "*" 376 else: 377 str += " " 378 str += "\n" 379 return str
380
381 - def __len__(self):
382 """return the length of a motif 383 384 Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly. 385 """ 386 if self.length is None: 387 return 0 388 else: 389 return self.length
390
391 - def _write(self, stream):
392 """ 393 writes the motif to the stream 394 """ 395 396 stream.write(self.__str__())
397 398 399
400 - def _to_fasta(self):
401 """ 402 FASTA representation of motif 403 """ 404 if not self.has_instances: 405 self.make_instances_from_counts() 406 return "".join(">instance%d\n%s\n" % (i, inst) for i, inst in enumerate(self.instances))
407
408 - def reverse_complement(self):
409 """ 410 Gives the reverse complement of the motif 411 """ 412 res = Motif() 413 if self.has_instances: 414 for i in self.instances: 415 res.add_instance(i.reverse_complement()) 416 else: # has counts 417 res.has_counts=True 418 res.counts["A"]=self.counts["T"][:] 419 res.counts["T"]=self.counts["A"][:] 420 res.counts["G"]=self.counts["C"][:] 421 res.counts["C"]=self.counts["G"][:] 422 res.counts["A"].reverse() 423 res.counts["C"].reverse() 424 res.counts["G"].reverse() 425 res.counts["T"].reverse() 426 res.length=self.length 427 res.mask = self.mask 428 return res
429 430
431 - def _from_jaspar_pfm(self,stream,make_instances=False):
432 """ 433 reads the motif from Jaspar .pfm file 434 435 The instances are fake, but the pwm is accurate. 436 """ 437 return self._from_horiz_matrix(stream, letters="ACGT", make_instances=make_instances)
438
439 - def _from_vert_matrix(self,stream,letters=None,make_instances=False):
440 """reads a vertical count matrix from stream and fill in the counts. 441 """ 442 443 self.counts = {} 444 self.has_counts = True 445 if letters is None: 446 letters = self.alphabet.letters 447 self.length = 0 448 for i in letters: 449 self.counts[i] = [] 450 for ln in stream.readlines(): 451 rec = [float(x) for x in ln.strip().split()] 452 for k, v in zip(letters, rec): 453 self.counts[k].append(v) 454 self.length += 1 455 self.set_mask("*"*self.length) 456 if make_instances is True: 457 self.make_instances_from_counts() 458 return self
459
460 - def _from_horiz_matrix(self,stream,letters=None,make_instances=False):
461 """reads a horizontal count matrix from stream and fill in the counts. 462 """ 463 if letters is None: 464 letters=self.alphabet.letters 465 self.counts = {} 466 self.has_counts=True 467 468 for i in letters: 469 ln = stream.readline().strip().split() 470 #if there is a letter in the beginning, ignore it 471 if ln[0] == i: 472 ln = ln[1:] 473 #print(ln) 474 try: 475 self.counts[i] = [int(x) for x in ln] 476 except ValueError: #not integers 477 self.counts[i] = [float(x) for x in ln] 478 #print(counts[i]) 479 480 s = sum(self.counts[nuc][0] for nuc in letters) 481 l = len(self.counts[letters[0]]) 482 self.length=l 483 self.set_mask("*"*l) 484 if make_instances is True: 485 self.make_instances_from_counts() 486 return self
487 488
489 - def make_instances_from_counts(self):
490 """Creates "fake" instances for a motif created from a count matrix. 491 492 In case the sums of counts are different for different columnes, the 493 shorter columns are padded with background. 494 """ 495 alpha = "".join(self.alphabet.letters) 496 #col[i] is a column taken from aligned motif instances 497 col = [] 498 self.has_instances = True 499 self.instances = [] 500 s = sum(self.counts[nuc][0] for nuc in self.alphabet.letters) 501 for i in range(self.length): 502 col.append("") 503 for n in self.alphabet.letters: 504 col[i] = col[i] + n*(self.counts[n][i]) 505 if len(col[i]) < s: 506 print("WARNING, column too short %i %i" % (len(col[i]), s)) 507 col[i] += (alpha*s)[:(s-len(col[i]))] 508 #print("column %i, %s" % (i, col[i])) 509 #iterate over instances 510 for i in range(s): 511 inst = "" #start with empty seq 512 for j in range(self.length): #iterate over positions 513 inst += col[j][i] 514 #print("%i %s" % (i,inst) 515 inst = Seq(inst, self.alphabet) 516 self.add_instance(inst) 517 return self.instances
518
519 - def make_counts_from_instances(self):
520 """Creates the count matrix for a motif with instances. 521 522 """ 523 #make strings for "columns" of motifs 524 #col[i] is a column taken from aligned motif instances 525 counts={} 526 for a in self.alphabet.letters: 527 counts[a]=[] 528 self.has_counts=True 529 s = len(self.instances) 530 for i in range(self.length): 531 ci = dict((a, 0) for a in self.alphabet.letters) 532 for inst in self.instances: 533 ci[inst[i]]+=1 534 for a in self.alphabet.letters: 535 counts[a].append(ci[a]) 536 self.counts=counts 537 return counts
538
539 - def _from_jaspar_sites(self, stream):
540 """ 541 reads the motif from Jaspar .sites file 542 543 The instances and pwm are OK. 544 """ 545 546 while True: 547 ln = stream.readline()# read the header "$>...." 548 if ln=="" or ln[0]!=">": 549 break 550 551 ln=stream.readline().strip()#read the actual sequence 552 i=0 553 while ln[i]==ln[i].lower(): 554 i+=1 555 inst="" 556 while i<len(ln) and ln[i]==ln[i].upper(): 557 inst+=ln[i] 558 i+=1 559 inst=Seq(inst, self.alphabet) 560 self.add_instance(inst) 561 562 self.set_mask("*"*len(inst)) 563 return self
564 565
566 - def __getitem__(self, index):
567 """Returns the probability distribution over symbols at a given position, padding with background. 568 569 If the requested index is out of bounds, the returned distribution comes from background. 570 """ 571 if index in range(self.length): 572 return self.pwm()[index] 573 else: 574 return self.background
575
576 - def consensus(self):
577 """Returns the consensus sequence of a motif. 578 """ 579 res="" 580 for i in range(self.length): 581 max_f=0 582 max_n="X" 583 for n in sorted(self[i]): 584 if self[i][n]>max_f: 585 max_f=self[i][n] 586 max_n=n 587 res+=max_n 588 return Seq(res, self.alphabet)
589
590 - def anticonsensus(self):
591 """returns the least probable pattern to be generated from this motif. 592 """ 593 res="" 594 for i in range(self.length): 595 min_f=10.0 596 min_n="X" 597 for n in sorted(self[i]): 598 if self[i][n]<min_f: 599 min_f=self[i][n] 600 min_n=n 601 res+=min_n 602 return Seq(res, self.alphabet)
603
604 - def max_score(self):
605 """Maximal possible score for this motif. 606 607 returns the score computed for the consensus sequence. 608 """ 609 return self.score_hit(self.consensus(), 0)
610
611 - def min_score(self):
612 """Minimal possible score for this motif. 613 614 returns the score computed for the anticonsensus sequence. 615 """ 616 return self.score_hit(self.anticonsensus(), 0)
617
618 - def weblogo(self,fname,format="PNG",**kwds):
619 """ 620 uses the Berkeley weblogo service to download and save a weblogo of itself 621 622 requires an internet connection. 623 The parameters from **kwds are passed directly to the weblogo server. 624 """ 625 from Bio._py3k import urlopen, urlencode, Request 626 627 al= self._to_fasta() 628 url = 'http://weblogo.berkeley.edu/logo.cgi' 629 values = {'sequence': al, 630 'format': format, 631 'logowidth': '18', 632 'logoheight': '5', 633 'logounits': 'cm', 634 'kind': 'AUTO', 635 'firstnum': "1", 636 'command': 'Create Logo', 637 'smallsamplecorrection': "on", 638 'symbolsperline': 32, 639 'res': '96', 640 'res_units': 'ppi', 641 'antialias': 'on', 642 'title': '', 643 'barbits': '', 644 'xaxis': 'on', 645 'xaxis_label': '', 646 'yaxis': 'on', 647 'yaxis_label': '', 648 'showends': 'on', 649 'shrink': '0.5', 650 'fineprint': 'on', 651 'ticbits': '1', 652 'colorscheme': 'DEFAULT', 653 'color1': 'green', 654 'color2': 'blue', 655 'color3': 'red', 656 'color4': 'black', 657 'color5': 'purple', 658 'color6': 'orange', 659 'color1': 'black', 660 } 661 for k, v in kwds.items(): 662 values[k]=str(v) 663 664 data = urlencode(values) 665 req = Request(url, data) 666 response = urlopen(req) 667 with open(fname,"w") as f: 668 im=response.read() 669 f.write(im)
670 671
672 - def _to_transfac(self):
673 """Write the representation of a motif in TRANSFAC format 674 """ 675 res="XX\nTY Motif\n" #header 676 try: 677 res+="ID %s\n"%self.name 678 except: 679 pass 680 res+="BF undef\nP0" 681 for a in self.alphabet.letters: 682 res+=" %s"%a 683 res+="\n" 684 if not self.has_counts: 685 self.make_counts_from_instances() 686 for i in range(self.length): 687 if i<9: 688 res+="0%d"%(i+1) 689 else: 690 res+="%d"%(i+1) 691 for a in self.alphabet.letters: 692 res+=" %d"%self.counts[a][i] 693 res+="\n" 694 res+="XX\n" 695 return res
696
697 - def _to_vertical_matrix(self,letters=None):
698 """Return string representation of the motif as a matrix. 699 700 """ 701 if letters is None: 702 letters = self.alphabet.letters 703 self._pwm_is_current=False 704 pwm = self.pwm(laplace=False) 705 res = "" 706 for i in range(self.length): 707 res += "\t".join(str(pwm[i][a]) for a in letters) 708 res += "\n" 709 return res
710
711 - def _to_horizontal_matrix(self,letters=None,normalized=True):
712 """Return string representation of the motif as a matrix. 713 714 """ 715 if letters is None: 716 letters = self.alphabet.letters 717 res = "" 718 if normalized: #output PWM 719 self._pwm_is_current=False 720 mat=self.pwm(laplace=False) 721 for a in letters: 722 res += "\t".join(str(mat[i][a]) for i in range(self.length)) 723 res += "\n" 724 else: #output counts 725 if not self.has_counts: 726 self.make_counts_from_instances() 727 mat = self.counts 728 for a in letters: 729 res += "\t".join(str(mat[a][i]) for i in range(self.length)) 730 res += "\n" 731 return res
732
733 - def _to_jaspar_pfm(self):
734 """Returns the pfm representation of the motif 735 """ 736 return self._to_horizontal_matrix(normalized=False, letters="ACGT")
737
738 - def format(self, format):
739 """Returns a string representation of the Motif in a given format 740 741 Currently supported fromats: 742 - jaspar-pfm : JASPAR Position Frequency Matrix 743 - transfac : TRANSFAC like files 744 - fasta : FASTA file with instances 745 """ 746 747 formatters={ 748 "jaspar-pfm": self._to_jaspar_pfm, 749 "transfac": self._to_transfac, 750 "fasta": self._to_fasta, 751 } 752 753 try: 754 return formatters[format]() 755 except KeyError: 756 raise ValueError("Wrong format type")
757
758 - def scanPWM(self, seq):
759 """Matrix of log-odds scores for a nucleotide sequence. 760 761 scans a nucleotide sequence and returns the matrix of log-odds 762 scores for all positions. 763 764 - the result is a one-dimensional list or numpy array 765 - the sequence can only be a DNA sequence 766 - the search is performed only on one strand 767 """ 768 #TODO - Code itself tolerates ambiguous bases (as NaN). 769 if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): 770 raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" \ 771 % self.alphabet) 772 if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA): 773 raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" \ 774 % sequence.alphabet) 775 776 seq = str(seq) 777 778 # check if the fast C code can be used 779 try: 780 import _pwm 781 except ImportError: 782 # use the slower Python code otherwise 783 return self._pwm_calculate(seq) 784 785 # get the log-odds matrix into a proper shape 786 # (each row contains sorted (ACGT) log-odds values) 787 logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()] 788 return _pwm.calculate(seq, logodds)
789
790 - def _pwm_calculate(self, sequence):
791 logodds = self.log_odds() 792 m = len(logodds) 793 s = len(sequence) 794 n = s - m + 1 795 result = [None] * n 796 for i in range(n): 797 score = 0.0 798 for j in range(m): 799 c = sequence[i+j] 800 temp = logodds[j].get(c) 801 if temp is None: 802 break 803 score += temp 804 else: 805 result[i] = score 806 return result
807