Package Bio :: Package Motif :: Module _Motif
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif._Motif

  1  # Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Implementation of sequence motifs (PRIVATE). 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  from Bio._py3k import range 
 11   
 12  from Bio.Seq import Seq 
 13  from Bio.SubsMat import FreqTable 
 14  from Bio.Alphabet import IUPAC 
 15  import math, random 
 16   
17 -class Motif(object):
18 """ 19 A class representing sequence motifs. 20 """
21 - def __init__(self,alphabet=IUPAC.unambiguous_dna):
22 self.instances = [] 23 self.has_instances=False 24 self.counts = {} 25 self.has_counts=False 26 self.mask = [] 27 self._pwm_is_current = False 28 self._pwm = [] 29 self._log_odds_is_current = False 30 self._log_odds = [] 31 self.alphabet=alphabet 32 self.length=None 33 self.background=dict((n, 1.0/len(self.alphabet.letters)) \ 34 for n in self.alphabet.letters) 35 self.beta=1.0 36 self.info=None 37 self.name=""
38
39 - def _check_length(self, len):
40 # TODO - Change parameter name (len clashes with built in function)? 41 if self.length is None: 42 self.length = len 43 elif self.length != len: 44 raise ValueError("You can't change the length of the motif " 45 "%r %r %r" % (self.length, self.instances, len))
46
47 - def _check_alphabet(self, alphabet):
48 if self.alphabet is None: 49 self.alphabet=alphabet 50 elif self.alphabet != alphabet: 51 raise ValueError("Wrong Alphabet")
52
53 - def add_instance(self, instance):
54 """ 55 adds new instance to the motif 56 """ 57 self._check_alphabet(instance.alphabet) 58 self._check_length(len(instance)) 59 if self.has_counts: 60 for i in range(self.length): 61 let=instance[i] 62 self.counts[let][i]+=1 63 64 if self.has_instances or not self.has_counts: 65 self.instances.append(instance) 66 self.has_instances=True 67 68 self._pwm_is_current = False 69 self._log_odds_is_current = False
70 71
72 - def set_mask(self, mask):
73 """ 74 sets the mask for the motif 75 76 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns 77 """ 78 self._check_length(len(mask)) 79 self.mask=[] 80 for char in mask: 81 if char=="*": 82 self.mask.append(1) 83 elif char==" ": 84 self.mask.append(0) 85 else: 86 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
87
88 - def pwm(self,laplace=True):
89 """ 90 returns the PWM computed for the set of instances 91 92 if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions. 93 """ 94 95 if self._pwm_is_current: 96 return self._pwm 97 #we need to compute new pwm 98 self._pwm = [] 99 for i in range(self.length): 100 dict = {} 101 #filling the dict with 0's 102 for letter in self.alphabet.letters: 103 if laplace: 104 dict[letter]=self.beta*self.background[letter] 105 else: 106 dict[letter]=0.0 107 if self.has_counts: 108 #taking the raw counts 109 for letter in self.alphabet.letters: 110 dict[letter]+=self.counts[letter][i] 111 elif self.has_instances: 112 #counting the occurences of letters in instances 113 for seq in self.instances: 114 #dict[seq[i]]=dict[seq[i]]+1 115 try: 116 dict[seq[i]]+=1 117 except KeyError: #we need to ignore non-alphabet letters 118 pass 119 self._pwm.append(FreqTable.FreqTable(dict, FreqTable.COUNT, self.alphabet)) 120 self._pwm_is_current=1 121 return self._pwm
122
123 - def log_odds(self,laplace=True):
124 """ 125 returns the logg odds matrix computed for the set of instances 126 """ 127 128 if self._log_odds_is_current: 129 return self._log_odds 130 #we need to compute new pwm 131 self._log_odds = [] 132 pwm=self.pwm(laplace) 133 for i in range(self.length): 134 d = {} 135 for a in self.alphabet.letters: 136 d[a]=math.log(pwm[i][a]/self.background[a], 2) 137 self._log_odds.append(d) 138 self._log_odds_is_current=1 139 return self._log_odds
140
141 - def ic(self):
142 """Method returning the information content of a motif. 143 """ 144 res=0 145 pwm=self.pwm() 146 for i in range(self.length): 147 res+=2 148 for a in self.alphabet.letters: 149 if pwm[i][a]!=0: 150 res+=pwm[i][a]*math.log(pwm[i][a], 2) 151 return res
152
153 - def exp_score(self,st_dev=False):
154 """ 155 Computes expected score of motif's instance and its standard deviation 156 """ 157 exs=0.0 158 var=0.0 159 pwm=self.pwm() 160 for i in range(self.length): 161 ex1=0.0 162 ex2=0.0 163 for a in self.alphabet.letters: 164 if pwm[i][a]!=0: 165 ex1+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2)) 166 ex2+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2))**2 167 exs+=ex1 168 var+=ex2-ex1**2 169 if st_dev: 170 return exs, math.sqrt(var) 171 else: 172 return exs
173
174 - def search_instances(self, sequence):
175 """ 176 a generator function, returning found positions of instances of the motif in a given sequence 177 """ 178 if not self.has_instances: 179 raise ValueError ("This motif has no instances") 180 for pos in range(0, len(sequence)-self.length+1): 181 for instance in self.instances: 182 if instance.tostring()==sequence[pos:pos+self.length].tostring(): 183 yield(pos, instance) 184 break # no other instance will fit (we don't want to return multiple hits)
185
186 - def score_hit(self,sequence,position,normalized=0,masked=0):
187 """ 188 give the pwm score for a given position 189 """ 190 lo=self.log_odds() 191 score = 0.0 192 for pos in range(self.length): 193 a = sequence[position+pos] 194 if not masked or self.mask[pos]: 195 try: 196 score += lo[pos][a] 197 except: 198 pass 199 if normalized: 200 if not masked: 201 score/=self.length 202 else: 203 score/=len([x for x in self.mask if x]) 204 return score
205
206 - def search_pwm(self,sequence,normalized=0,masked=0,threshold=0.0,both=True):
207 """ 208 a generator function, returning found hits in a given sequence with the pwm score higher than the threshold 209 """ 210 if both: 211 rc = self.reverse_complement() 212 213 sequence=sequence.tostring().upper() 214 for pos in range(0, len(sequence)-self.length+1): 215 score = self.score_hit(sequence, pos, normalized, masked) 216 if score > threshold: 217 yield (pos, score) 218 if both: 219 rev_score = rc.score_hit(sequence, pos, normalized, masked) 220 if rev_score > threshold: 221 yield (-pos, rev_score)
222
223 - def dist_pearson(self, motif, masked = 0):
224 """ 225 return the similarity score based on pearson correlation for the given motif against self. 226 227 We use the Pearson's correlation of the respective probabilities. 228 """ 229 230 if self.alphabet != motif.alphabet: 231 raise ValueError("Cannot compare motifs with different alphabets") 232 233 max_p=-2 234 for offset in range(-self.length+1, motif.length): 235 if offset<0: 236 p = self.dist_pearson_at(motif, -offset) 237 else: #offset>=0 238 p = motif.dist_pearson_at(self, offset) 239 240 if max_p<p: 241 max_p=p 242 max_o=-offset 243 return 1-max_p, max_o
244
245 - def dist_pearson_at(self, motif, offset):
246 sxx = 0 # \sum x^2 247 sxy = 0 # \sum x \cdot y 248 sx = 0 # \sum x 249 sy = 0 # \sum y 250 syy = 0 # \sum x^2 251 norm=max(self.length, offset+motif.length) 252 253 for pos in range(max(self.length, offset+motif.length)): 254 for l in self.alphabet.letters: 255 xi = self[pos][l] 256 yi = motif[pos-offset][l] 257 sx = sx + xi 258 sy = sy + yi 259 sxx = sxx + xi * xi 260 syy = syy + yi * yi 261 sxy = sxy + xi * yi 262 263 norm *= len(self.alphabet.letters) 264 s1 = (sxy - sx*sy*1.0/norm) 265 s2 = (norm*sxx - sx*sx*1.0)*(norm*syy- sy*sy*1.0) 266 return s1/math.sqrt(s2)
267
268 - def dist_product(self, other):
269 """ 270 A similarity measure taking into account a product probability of generating overlaping instances of two motifs 271 """ 272 max_p=0.0 273 for offset in range(-self.length+1, other.length): 274 if offset<0: 275 p = self.dist_product_at(other, -offset) 276 else: #offset>=0 277 p = other.dist_product_at(self, offset) 278 if max_p<p: 279 max_p=p 280 max_o=-offset 281 return 1-max_p/self.dist_product_at(self, 0), max_o
282
283 - def dist_product_at(self, other, offset):
284 s=0 285 for i in range(max(self.length, offset+other.length)): 286 f1=self[i] 287 f2=other[i-offset] 288 for n, b in self.background.items(): 289 s+=b*f1[n]*f2[n] 290 return s/i
291
292 - def dist_dpq(self, other):
293 r"""Calculates the DPQ distance measure between motifs. 294 295 It is calculated as a maximal value of DPQ formula (shown using LaTeX 296 markup, familiar to mathematicians): 297 298 \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \ 299 \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) + 300 m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k])) 301 } 302 303 over possible non-spaced alignemts of two motifs. See this reference: 304 305 D. M Endres and J. E Schindelin, "A new metric for probability 306 distributions", IEEE transactions on Information Theory 49, no. 7 307 (July 2003): 1858-1860. 308 """ 309 310 min_d=float("inf") 311 min_o=-1 312 d_s=[] 313 for offset in range(-self.length+1, other.length): 314 #print("%2.3d"%offset) 315 if offset<0: 316 d = self.dist_dpq_at(other, -offset) 317 overlap = self.length+offset 318 else: #offset>=0 319 d = other.dist_dpq_at(self, offset) 320 overlap = other.length-offset 321 overlap = min(self.length, other.length, overlap) 322 out = self.length+other.length - 2*overlap 323 #print("%f %f %f" % (d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap)) 324 #d = d/(2*overlap) 325 d = (d/(out+overlap))*(2*overlap+out)/(2*overlap) 326 #print(d) 327 d_s.append((offset, d)) 328 if min_d > d: 329 min_d = d 330 min_o = -offset 331 return min_d, min_o #,d_s
332
333 - def dist_dpq_at(self, other, offset):
334 """ 335 calculates the dist_dpq measure with a given offset. 336 337 offset should satisfy 0<=offset<=len(self) 338 """ 339 def dpq (f1, f2, alpha): 340 s=0 341 for n in alpha.letters: 342 avg=(f1[n]+f2[n])/2 343 s+=f1[n]*math.log(f1[n]/avg, 2)+f2[n]*math.log(f2[n]/avg, 2) 344 return math.sqrt(s)
345 346 s=0 347 for i in range(max(self.length, offset+other.length)): 348 f1=self[i] 349 f2=other[i-offset] 350 s+=dpq(f1, f2, self.alphabet) 351 return s
352
353 - def _read(self, stream):
354 """Reads the motif from the stream (in AlignAce format). 355 356 the self.alphabet variable must be set beforehand. 357 If the last line contains asterisks it is used for setting mask 358 """ 359 360 while True: 361 ln = stream.readline() 362 if "*" in ln: 363 self.set_mask(ln.strip("\n\c")) 364 break 365 self.add_instance(Seq(ln.strip(), self.alphabet))
366
367 - def __str__(self,masked=False):
368 """ string representation of a motif. 369 """ 370 str = "" 371 for inst in self.instances: 372 str = str + inst.tostring() + "\n" 373 374 if masked: 375 for i in range(self.length): 376 if self.mask[i]: 377 str = str + "*" 378 else: 379 str = str + " " 380 str = str + "\n" 381 return str
382
383 - def __len__(self):
384 """return the length of a motif 385 386 Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly. 387 """ 388 if self.length is None: 389 return 0 390 else: 391 return self.length
392
393 - def _write(self, stream):
394 """ 395 writes the motif to the stream 396 """ 397 398 stream.write(self.__str__())
399 400 401
402 - def _to_fasta(self):
403 """ 404 FASTA representation of motif 405 """ 406 if not self.has_instances: 407 self.make_instances_from_counts() 408 str = "" 409 for i, inst in enumerate(self.instances): 410 str = str + ">instance%d\n"%i + inst.tostring() + "\n" 411 412 return str
413
414 - def reverse_complement(self):
415 """ 416 Gives the reverse complement of the motif 417 """ 418 res = Motif() 419 if self.has_instances: 420 for i in self.instances: 421 res.add_instance(i.reverse_complement()) 422 else: # has counts 423 res.has_counts=True 424 res.counts["A"]=self.counts["T"][:] 425 res.counts["T"]=self.counts["A"][:] 426 res.counts["G"]=self.counts["C"][:] 427 res.counts["C"]=self.counts["G"][:] 428 res.counts["A"].reverse() 429 res.counts["C"].reverse() 430 res.counts["G"].reverse() 431 res.counts["T"].reverse() 432 res.length=self.length 433 res.mask = self.mask 434 return res
435 436
437 - def _from_jaspar_pfm(self,stream,make_instances=False):
438 """ 439 reads the motif from Jaspar .pfm file 440 441 The instances are fake, but the pwm is accurate. 442 """ 443 return self._from_horiz_matrix(stream, letters="ACGT", make_instances=make_instances)
444
445 - def _from_vert_matrix(self,stream,letters=None,make_instances=False):
446 """reads a vertical count matrix from stream and fill in the counts. 447 """ 448 449 self.counts = {} 450 self.has_counts = True 451 if letters is None: 452 letters = self.alphabet.letters 453 self.length = 0 454 for i in letters: 455 self.counts[i] = [] 456 for ln in stream.readlines(): 457 rec = [float(x) for x in ln.strip().split()] 458 for k, v in zip(letters, rec): 459 self.counts[k].append(v) 460 self.length += 1 461 self.set_mask("*"*self.length) 462 if make_instances is True: 463 self.make_instances_from_counts() 464 return self
465
466 - def _from_horiz_matrix(self,stream,letters=None,make_instances=False):
467 """reads a horizontal count matrix from stream and fill in the counts. 468 """ 469 if letters is None: 470 letters=self.alphabet.letters 471 self.counts = {} 472 self.has_counts=True 473 474 for i in letters: 475 ln = stream.readline().strip().split() 476 #if there is a letter in the beginning, ignore it 477 if ln[0] == i: 478 ln = ln[1:] 479 #print(ln) 480 try: 481 self.counts[i] = [int(x) for x in ln] 482 except ValueError: #not integers 483 self.counts[i] = [float(x) for x in ln] 484 #print(counts[i]) 485 486 s = sum(self.counts[nuc][0] for nuc in letters) 487 l = len(self.counts[letters[0]]) 488 self.length=l 489 self.set_mask("*"*l) 490 if make_instances is True: 491 self.make_instances_from_counts() 492 return self
493 494
495 - def make_instances_from_counts(self):
496 """Creates "fake" instances for a motif created from a count matrix. 497 498 In case the sums of counts are different for different columnes, the 499 shorter columns are padded with background. 500 """ 501 alpha = "".join(self.alphabet.letters) 502 #col[i] is a column taken from aligned motif instances 503 col = [] 504 self.has_instances = True 505 self.instances = [] 506 s = sum(self.counts[nuc][0] for nuc in self.alphabet.letters) 507 for i in range(self.length): 508 col.append("") 509 for n in self.alphabet.letters: 510 col[i] = col[i] + n*(self.counts[n][i]) 511 if len(col[i]) < s: 512 print("WARNING, column too short %i %i" % (len(col[i]), s)) 513 col[i] += (alpha*s)[:(s-len(col[i]))] 514 #print("column %i, %s" % (i, col[i])) 515 #iterate over instances 516 for i in range(s): 517 inst = "" #start with empty seq 518 for j in range(self.length): #iterate over positions 519 inst += col[j][i] 520 #print("%i %s" % (i,inst) 521 inst = Seq(inst, self.alphabet) 522 self.add_instance(inst) 523 return self.instances
524
525 - def make_counts_from_instances(self):
526 """Creates the count matrix for a motif with instances. 527 528 """ 529 #make strings for "columns" of motifs 530 #col[i] is a column taken from aligned motif instances 531 counts={} 532 for a in self.alphabet.letters: 533 counts[a]=[] 534 self.has_counts=True 535 s = len(self.instances) 536 for i in range(self.length): 537 ci = dict((a, 0) for a in self.alphabet.letters) 538 for inst in self.instances: 539 ci[inst[i]]+=1 540 for a in self.alphabet.letters: 541 counts[a].append(ci[a]) 542 self.counts=counts 543 return counts
544
545 - def _from_jaspar_sites(self, stream):
546 """ 547 reads the motif from Jaspar .sites file 548 549 The instances and pwm are OK. 550 """ 551 552 while True: 553 ln = stream.readline()# read the header "$>...." 554 if ln=="" or ln[0]!=">": 555 break 556 557 ln=stream.readline().strip()#read the actual sequence 558 i=0 559 while ln[i]==ln[i].lower(): 560 i+=1 561 inst="" 562 while i<len(ln) and ln[i]==ln[i].upper(): 563 inst+=ln[i] 564 i+=1 565 inst=Seq(inst, self.alphabet) 566 self.add_instance(inst) 567 568 self.set_mask("*"*len(inst)) 569 return self
570 571
572 - def __getitem__(self, index):
573 """Returns the probability distribution over symbols at a given position, padding with background. 574 575 If the requested index is out of bounds, the returned distribution comes from background. 576 """ 577 if index in range(self.length): 578 return self.pwm()[index] 579 else: 580 return self.background
581
582 - def consensus(self):
583 """Returns the consensus sequence of a motif. 584 """ 585 res="" 586 for i in range(self.length): 587 max_f=0 588 max_n="X" 589 for n in sorted(self[i]): 590 if self[i][n]>max_f: 591 max_f=self[i][n] 592 max_n=n 593 res+=max_n 594 return Seq(res, self.alphabet)
595
596 - def anticonsensus(self):
597 """returns the least probable pattern to be generated from this motif. 598 """ 599 res="" 600 for i in range(self.length): 601 min_f=10.0 602 min_n="X" 603 for n in sorted(self[i]): 604 if self[i][n]<min_f: 605 min_f=self[i][n] 606 min_n=n 607 res+=min_n 608 return Seq(res, self.alphabet)
609
610 - def max_score(self):
611 """Maximal possible score for this motif. 612 613 returns the score computed for the consensus sequence. 614 """ 615 return self.score_hit(self.consensus(), 0)
616
617 - def min_score(self):
618 """Minimal possible score for this motif. 619 620 returns the score computed for the anticonsensus sequence. 621 """ 622 return self.score_hit(self.anticonsensus(), 0)
623
624 - def weblogo(self,fname,format="PNG",**kwds):
625 """ 626 uses the Berkeley weblogo service to download and save a weblogo of itself 627 628 requires an internet connection. 629 The parameters from **kwds are passed directly to the weblogo server. 630 """ 631 from Bio._py3k import urlopen, urlencode, Request 632 633 al= self._to_fasta() 634 url = 'http://weblogo.berkeley.edu/logo.cgi' 635 values = {'sequence': al, 636 'format': format, 637 'logowidth': '18', 638 'logoheight': '5', 639 'logounits': 'cm', 640 'kind': 'AUTO', 641 'firstnum': "1", 642 'command': 'Create Logo', 643 'smallsamplecorrection': "on", 644 'symbolsperline': 32, 645 'res': '96', 646 'res_units': 'ppi', 647 'antialias': 'on', 648 'title': '', 649 'barbits': '', 650 'xaxis': 'on', 651 'xaxis_label': '', 652 'yaxis': 'on', 653 'yaxis_label': '', 654 'showends': 'on', 655 'shrink': '0.5', 656 'fineprint': 'on', 657 'ticbits': '1', 658 'colorscheme': 'DEFAULT', 659 'color1': 'green', 660 'color2': 'blue', 661 'color3': 'red', 662 'color4': 'black', 663 'color5': 'purple', 664 'color6': 'orange', 665 'color1': 'black', 666 } 667 for k, v in kwds.items(): 668 values[k]=str(v) 669 670 data = urlencode(values) 671 req = Request(url, data) 672 response = urlopen(req) 673 with open(fname,"w") as f: 674 im=response.read() 675 f.write(im)
676 677
678 - def _to_transfac(self):
679 """Write the representation of a motif in TRANSFAC format 680 """ 681 res="XX\nTY Motif\n" #header 682 try: 683 res+="ID %s\n"%self.name 684 except: 685 pass 686 res+="BF undef\nP0" 687 for a in self.alphabet.letters: 688 res+=" %s"%a 689 res+="\n" 690 if not self.has_counts: 691 self.make_counts_from_instances() 692 for i in range(self.length): 693 if i<9: 694 res+="0%d"%(i+1) 695 else: 696 res+="%d"%(i+1) 697 for a in self.alphabet.letters: 698 res+=" %d"%self.counts[a][i] 699 res+="\n" 700 res+="XX\n" 701 return res
702
703 - def _to_vertical_matrix(self,letters=None):
704 """Return string representation of the motif as a matrix. 705 706 """ 707 if letters is None: 708 letters = self.alphabet.letters 709 self._pwm_is_current=False 710 pwm = self.pwm(laplace=False) 711 res = "" 712 for i in range(self.length): 713 res += "\t".join(str(pwm[i][a]) for a in letters) 714 res += "\n" 715 return res
716
717 - def _to_horizontal_matrix(self,letters=None,normalized=True):
718 """Return string representation of the motif as a matrix. 719 720 """ 721 if letters is None: 722 letters = self.alphabet.letters 723 res = "" 724 if normalized: #output PWM 725 self._pwm_is_current=False 726 mat=self.pwm(laplace=False) 727 for a in letters: 728 res += "\t".join(str(mat[i][a]) for i in range(self.length)) 729 res += "\n" 730 else: #output counts 731 if not self.has_counts: 732 self.make_counts_from_instances() 733 mat = self.counts 734 for a in letters: 735 res += "\t".join(str(mat[a][i]) for i in range(self.length)) 736 res += "\n" 737 return res
738
739 - def _to_jaspar_pfm(self):
740 """Returns the pfm representation of the motif 741 """ 742 return self._to_horizontal_matrix(normalized=False, letters="ACGT")
743
744 - def format(self, format):
745 """Returns a string representation of the Motif in a given format 746 747 Currently supported fromats: 748 - jaspar-pfm : JASPAR Position Frequency Matrix 749 - transfac : TRANSFAC like files 750 - fasta : FASTA file with instances 751 """ 752 753 formatters={ 754 "jaspar-pfm": self._to_jaspar_pfm, 755 "transfac": self._to_transfac, 756 "fasta": self._to_fasta, 757 } 758 759 try: 760 return formatters[format]() 761 except KeyError: 762 raise ValueError("Wrong format type")
763
764 - def scanPWM(self, seq):
765 """Matrix of log-odds scores for a nucleotide sequence. 766 767 scans a nucleotide sequence and returns the matrix of log-odds 768 scores for all positions. 769 770 - the result is a one-dimensional list or numpy array 771 - the sequence can only be a DNA sequence 772 - the search is performed only on one strand 773 """ 774 #TODO - Code itself tolerates ambiguous bases (as NaN). 775 if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): 776 raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" \ 777 % self.alphabet) 778 if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA): 779 raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" \ 780 % sequence.alphabet) 781 782 seq = seq.tostring() 783 784 # check if the fast C code can be used 785 try: 786 import _pwm 787 except ImportError: 788 # use the slower Python code otherwise 789 return self._pwm_calculate(seq) 790 791 # get the log-odds matrix into a proper shape 792 # (each row contains sorted (ACGT) log-odds values) 793 logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()] 794 return _pwm.calculate(seq, logodds)
795
796 - def _pwm_calculate(self, sequence):
797 logodds = self.log_odds() 798 m = len(logodds) 799 s = len(sequence) 800 n = s - m + 1 801 result = [None] * n 802 for i in range(n): 803 score = 0.0 804 for j in range(m): 805 c = sequence[i+j] 806 temp = logodds[j].get(c) 807 if temp is None: 808 break 809 score += temp 810 else: 811 result[i] = score 812 return result
813