Package Bio :: Package motifs :: Module transfac
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.transfac

  1  # Copyright 2003 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Parsing TRANSFAC files 
  7  """ 
  8   
  9  from Bio import motifs 
 10  from Bio.Alphabet import IUPAC 
 11   
 12   
13 -class Motif(motifs.Motif, dict):
14 """A Bio.motifs.transfac.Motif stores the information in one TRANSFAC 15 motif. This class inherits from the Bio.motifs.Motif base class, as well 16 as from a Python dictionary. All motif information found by the parser 17 is stored as attributes of the base class when possible; see the 18 Bio.motifs.Motif base class for a description of these attributes. All 19 other information associated with the motif is stored as (key, value) 20 pairs in the dictionary, where the key is the two-letter fields as found 21 in the TRANSFAC file. References are an exception: These are stored in 22 the .references attribute. 23 24 These fields are commonly found in TRANSFAC files:: 25 26 AC: Accession number 27 AS: Accession numbers, secondary 28 BA: Statistical basis 29 BF: Binding factors 30 BS: Factor binding sites underlying the matrix 31 [sequence; SITE accession number; start position for matrix 32 sequence; length of sequence used; number of gaps inserted; 33 strand orientation.] 34 CC: Comments 35 CO: Copyright notice 36 DE: Short factor description 37 DR: External databases 38 [database name: database accession number] 39 DT: Date created/updated 40 HC: Subfamilies 41 HP: Superfamilies 42 ID: Identifier 43 NA: Name of the binding factor 44 OC: Taxonomic classification 45 OS: Species/Taxon 46 OV: Older version 47 PV: Preferred version 48 TY: Type 49 XX: Empty line; these are not stored in the Record. 50 51 References are stored in an .references attribute, which is a list of 52 dictionaries with the following keys:: 53 54 RN: Reference number 55 RA: Reference authors 56 RL: Reference data 57 RT: Reference title 58 RX: PubMed ID 59 60 For more information, see the TRANSFAC documentation. 61 """ 62 multiple_value_keys = set(['BF', 'OV', 'HP', 'BS', 'HC', 'DT', 'DR']) 63 # These keys can occur multiple times for one motif 64 65 reference_keys = set(['RX', 'RA', 'RT', 'RL'])
66 # These keys occur for references 67 68
69 -class Record(list):
70 """A Bio.motifs.transfac.Record stores the information in a TRANSFAC 71 matrix table. The record inherits from a list containing the individual 72 motifs. 73 74 Attributes: 75 o version: The version number, corresponding to the 'VV' field 76 in the TRANSFAC file; 77 """
78 - def __init__(self):
79 self.version = None
80
81 - def __str__(self):
82 return write(self)
83 84
85 -def read(handle):
86 """record = read(handle)""" 87 annotations = {} 88 references = [] 89 counts = None 90 record = Record() 91 for line in handle: 92 line = line.strip() 93 key, value = line[:2], line[4:] 94 if key == 'VV': 95 record.version = value 96 elif key in ('P0', 'PO'): # Old TRANSFAC files use PO instead of P0 97 counts = {} 98 assert value.split()[:4] == ['A', 'C', 'G', 'T'] 99 length = 0 100 for c in "ACGT": 101 counts[c] = [] 102 for line in handle: 103 key, value = line[:2], line[4:] 104 try: 105 i = int(key) 106 except ValueError: 107 break 108 length += 1 109 assert i == length 110 values = value.split() 111 for c, v in zip("ACGT", values): 112 counts[c].append(float(v)) 113 if line == 'XX': 114 pass 115 elif key == 'RN': 116 index, separator, accession = value.partition(";") 117 assert index[0] == '[' 118 assert index[-1] == ']' 119 index = int(index[1:-1]) 120 assert len(references) == index - 1 121 reference = {key: value} 122 references.append(reference) 123 elif key == '//': 124 if counts is not None: 125 motif = Motif(alphabet=IUPAC.unambiguous_dna, counts=counts) 126 motif.update(annotations) 127 motif.references = references 128 record.append(motif) 129 annotations = {} 130 references = [] 131 elif key in Motif.reference_keys: 132 reference[key] = value 133 elif key in Motif.multiple_value_keys: 134 if key not in annotations: 135 annotations[key] = [] 136 annotations[key].append(value) 137 else: 138 annotations[key] = value 139 return record
140 141
142 -def write(motifs):
143 """Write the representation of a motif in TRANSFAC format 144 """ 145 blocks = [] 146 try: 147 version = motifs.version 148 except AttributeError: 149 pass 150 else: 151 if version is not None: 152 block = """\ 153 VV %s 154 XX 155 // 156 """ % version 157 blocks.append(block) 158 multiple_value_keys = Motif.multiple_value_keys 159 sections = (('AC', 'AS',), # Accession 160 ('ID',), # ID 161 ('DT', 'CO'), # Date, copyright 162 ('NA',), # Name 163 ('DE',), # Short factor description 164 ('TY',), # Type 165 ('OS', 'OC'), # Organism 166 ('HP', 'HC'), # Superfamilies, subfamilies 167 ('BF',), # Binding factors 168 ('P0',), # Frequency matrix 169 ('BA',), # Statistical basis 170 ('BS',), # Factor binding sites 171 ('CC',), # Comments 172 ('DR',), # External databases 173 ('OV', 'PV',), # Versions 174 ) 175 for motif in motifs: 176 lines = [] 177 for section in sections: 178 blank = False 179 for key in section: 180 if key == 'P0': 181 # Frequency matrix 182 length = motif.length 183 if length == 0: 184 continue 185 sequence = motif.degenerate_consensus 186 letters = sorted(motif.alphabet.letters) 187 line = " ".join(["P0"] + letters) 188 189 lines.append(line) 190 for i in range(length): 191 line = " ".join( 192 ["%02.d"] + 193 ["%6.20g" for l in letters]) + \ 194 " %s" 195 line = line % tuple( 196 [i + 1] + 197 [motif.counts[l][i] for l in letters] + 198 [sequence[i]] 199 ) 200 lines.append(line) 201 blank = True 202 else: 203 try: 204 value = motif.get(key) 205 except AttributeError: 206 value = None 207 if value is not None: 208 if key in multiple_value_keys: 209 for v in value: 210 line = "%s %s" % (key, v) 211 lines.append(line) 212 else: 213 line = "%s %s" % (key, value) 214 lines.append(line) 215 blank = True 216 if key == 'PV': 217 # References 218 try: 219 references = motif.references 220 except AttributeError: 221 pass 222 else: 223 keys = ("RN", "RX", "RA", "RT", "RL") 224 for reference in references: 225 for key in keys: 226 value = reference.get(key) 227 if value is None: 228 continue 229 line = "%s %s" % (key, value) 230 lines.append(line) 231 blank = True 232 if blank: 233 line = 'XX' 234 lines.append(line) 235 # Finished this motif; glue the lines together 236 line = "//" 237 lines.append(line) 238 block = "\n".join(lines) + "\n" 239 blocks.append(block) 240 # Finished all motifs; glue the blocks together 241 text = "".join(blocks) 242 return text
243