Package Bio :: Package motifs :: Module jaspar
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.jaspar

 1  from Bio.motifs import Motif, Instances 
 2  from Bio.Alphabet import IUPAC 
 3  from Bio.Seq import Seq 
 4   
 5   
6 -def read(handle, format):
7 alphabet = IUPAC.unambiguous_dna 8 counts = {} 9 if format=="pfm": 10 # reads the motif from Jaspar .pfm file 11 letters = "ACGT" 12 for letter, line in zip(letters, handle): 13 words = line.split() 14 #if there is a letter in the beginning, ignore it 15 if words[0]==letter: 16 words = words[1:] 17 counts[letter] = map(float, words) 18 motif = Motif(alphabet, counts=counts) 19 elif format=="sites": 20 # reads the motif from Jaspar .sites file 21 instances = [] 22 for line in handle: 23 if not line.startswith(">"): 24 break 25 # line contains the header ">...." 26 # now read the actual sequence 27 line = handle.next() 28 instance = "" 29 for c in line.strip(): 30 if c==c.upper(): 31 instance += c 32 instance = Seq(instance, alphabet) 33 instances.append(instance) 34 instances = Instances(instances, alphabet) 35 motif = Motif(alphabet, instances=instances) 36 else: 37 raise ValueError("Unknown format %s" % format) 38 motif.mask = "*"*motif.length 39 return motif
40
41 -def write(motif):
42 """Returns the pfm representation of the motif 43 """ 44 letters = "ACGT" 45 counts = motif.counts 46 lines = [] 47 for letter in letters: 48 terms = map(str, counts[letter]) 49 line = "\t".join(terms) + "\n" 50 lines.append(line) 51 # Finished; glue the lines together 52 text = "".join(lines) 53 return text
54