Package Bio :: Package motifs :: Module transfac
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.transfac

  1  # Copyright 2003 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parsing TRANSFAC files 
  7  """ 
  8   
  9  from Bio import motifs 
 10  from Bio.Alphabet import IUPAC 
 11   
 12   
13 -class Motif(motifs.Motif, dict):
14 """A Bio.motifs.transfac.Motif stores the information in one TRANSFAC 15 motif. This class inherits from the Bio.motifs.Motif base class, as well 16 as from a Python dictionary. All motif information found by the parser 17 is stored as attributes of the base class when possible; see the 18 Bio.motifs.Motif base class for a description of these attributes. All 19 other information associated with the motif is stored as (key, value) 20 pairs in the dictionary, where the key is the two-letter fields as found 21 in the TRANSFAC file. References are an exception: These are stored in 22 the .references attribute. 23 24 These fields are commonly found in TRANSFAC files: 25 AC: Accession number 26 AS: Accession numbers, secondary 27 BA: Statistical basis 28 BF: Binding factors 29 BS: Factor binding sites underlying the matrix 30 [sequence; SITE accession number; start position for matrix 31 sequence; length of sequence used; number of gaps inserted; 32 strand orientation.] 33 CC: Comments 34 CO: Copyright notice 35 DE: Short factor description 36 DR: External databases 37 [database name: database accession number] 38 DT: Date created/updated 39 HC: Subfamilies 40 HP: Superfamilies 41 ID: Identifier 42 NA: Name of the binding factor 43 OC: Taxonomic classification 44 OS: Species/Taxon 45 OV: Older version 46 PV: Preferred version 47 TY: Type 48 XX: Empty line; these are not stored in the Record. 49 50 References are stored in an .references attribute, which is a list of 51 dictionaries with the following keys: 52 RN: Reference number 53 RA: Reference authors 54 RL: Reference data 55 RT: Reference title 56 RX: PubMed ID 57 58 For more information, see the TRANSFAC documentation. 59 """ 60 multiple_value_keys = set(['BF', 'OV', 'HP', 'BS', 'HC', 'DT', 'DR']) 61 # These keys can occur multiple times for one motif 62 63 reference_keys = set(['RX', 'RA', 'RT', 'RL'])
64 # These keys occur for references 65 66
67 -class Record(list):
68 """A Bio.motifs.transfac.Record stores the information in a TRANSFAC 69 matrix table. The record inherits from a list containing the individual 70 motifs. 71 72 Attributes: 73 o version: The version number, corresponding to the 'VV' field 74 in the TRANSFAC file; 75 """
76 - def __init__(self):
77 self.version = None
78
79 - def __str__(self):
80 return write(self)
81 82
83 -def read(handle):
84 """record = read(handle)""" 85 annotations = {} 86 references = [] 87 counts = None 88 record = Record() 89 for line in handle: 90 line = line.strip() 91 key, value = line[:2], line[4:] 92 if key=='VV': 93 record.version = value 94 elif key in ('P0', 'PO'): # Old TRANSFAC files use PO instead of P0 95 counts = {} 96 assert value.split()[:4]==['A', 'C', 'G', 'T'] 97 length = 0 98 for c in "ACGT": 99 counts[c] = [] 100 for line in handle: 101 key, value = line[:2], line[4:] 102 try: 103 i = int(key) 104 except ValueError: 105 break 106 length+=1 107 assert i==length 108 values = value.split() 109 for c, v in zip("ACGT", values): 110 counts[c].append(float(v)) 111 if line=='XX': 112 pass 113 elif key=='RN': 114 index, separator, accession = value.partition(";") 115 assert index[0]=='[' 116 assert index[-1]==']' 117 index = int(index[1:-1]) 118 assert len(references)==index-1 119 reference = {key: value} 120 references.append(reference) 121 elif key=='//': 122 if counts is not None: 123 motif = Motif(alphabet=IUPAC.unambiguous_dna, counts=counts) 124 motif.update(annotations) 125 motif.references = references 126 record.append(motif) 127 annotations = {} 128 references = [] 129 elif key in Motif.reference_keys: 130 reference[key] = value 131 elif key in Motif.multiple_value_keys: 132 if not key in annotations: 133 annotations[key] = [] 134 annotations[key].append(value) 135 else: 136 annotations[key] = value 137 return record
138
139 -def write(motifs):
140 """Write the representation of a motif in TRANSFAC format 141 """ 142 blocks = [] 143 try: 144 version = motifs.version 145 except AttributeError: 146 pass 147 else: 148 if version is not None: 149 block = """\ 150 VV %s 151 XX 152 // 153 """ % version 154 blocks.append(block) 155 multiple_value_keys = Motif.multiple_value_keys 156 sections = (('AC', 'AS',), # Accession 157 ('ID',), # ID 158 ('DT', 'CO'), # Date, copyright 159 ('NA',), # Name 160 ('DE',), # Short factor description 161 ('TY',), # Type 162 ('OS', 'OC'), # Organism 163 ('HP', 'HC'), # Superfamilies, subfamilies 164 ('BF',), # Binding factors 165 ('P0',), # Frequency matrix 166 ('BA',), # Statistical basis 167 ('BS',), # Factor binding sites 168 ('CC',), # Comments 169 ('DR',), # External databases 170 ('OV', 'PV',), # Versions 171 ) 172 for motif in motifs: 173 lines = [] 174 for section in sections: 175 blank = False 176 for key in section: 177 if key=='P0': 178 # Frequency matrix 179 length = motif.length 180 if length==0: 181 continue 182 sequence = motif.degenerate_consensus 183 line = "P0 A C G T" 184 lines.append(line) 185 for i in range(length): 186 line = "%02.d %6.20g %6.20g %6.20g %6.20g %s" % ( 187 i+1, 188 motif.counts['A'][i], 189 motif.counts['C'][i], 190 motif.counts['G'][i], 191 motif.counts['T'][i], 192 sequence[i], 193 ) 194 lines.append(line) 195 blank = True 196 else: 197 try: 198 value = motif.get(key) 199 except AttributeError: 200 value = None 201 if value is not None: 202 if key in multiple_value_keys: 203 for v in value: 204 line = "%s %s" % (key, v) 205 lines.append(line) 206 else: 207 line = "%s %s" % (key, value) 208 lines.append(line) 209 blank = True 210 if key=='PV': 211 # References 212 try: 213 references = motif.references 214 except AttributeError: 215 pass 216 else: 217 keys = ("RN", "RX", "RA", "RT", "RL") 218 for reference in references: 219 for key in keys: 220 value = reference.get(key) 221 if value is None: 222 continue 223 line = "%s %s" % (key, value) 224 lines.append(line) 225 blank = True 226 if blank: 227 line = 'XX' 228 lines.append(line) 229 # Finished this motif; glue the lines together 230 line = "//" 231 lines.append(line) 232 block = "\n".join(lines) + "\n" 233 blocks.append(block) 234 # Finished all motifs; glue the blocks together 235 text = "".join(blocks) 236 return text
237