Package Bio :: Package NMR :: Module xpktools
[hide private]
[frames] | no frames]

Source Code for Module Bio.NMR.xpktools

  1  # Copyright 2004 by Bob Bussell.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Tools to manipulate data from nmrview .xpk peaklist files. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  import sys 
 11   
 12  __docformat__ = "restructuredtext en" 
 13   
 14  HEADERLEN = 6 
 15   
 16   
17 -class XpkEntry(object):
18 """Provide dictonary access to single entry from nmrview .xpk file. 19 20 This class is suited for handling single lines of non-header data 21 from an nmrview .xpk file. This class provides methods for extracting 22 data by the field name which is listed in the last line of the 23 peaklist header. 24 25 Parameters 26 ---------- 27 xpkentry : str 28 The line from an nmrview .xpk file. 29 xpkheadline : str 30 The line from the header file that gives the names of the entries. 31 This is typically the sixth line of the header, 1-origin. 32 33 Attributes 34 ---------- 35 fields : dict 36 Dictionary of fields where key is in header line, value is an entry. 37 Variables are accessed by either their name in the header line as in 38 self.field["H1.P"] will return the H1.P entry for example. 39 self.field["entrynum"] returns the line number (1st field of line) 40 41 """
42 - def __init__(self, entry, headline):
43 # Holds all fields from input line in a dictionary 44 # keys are data labels from the .xpk header 45 self.fields = {} 46 47 datlist = entry.split() 48 headlist = headline.split() 49 50 i = 0 51 for i in range(len(datlist) - 1): 52 self.fields[headlist[i]] = datlist[i+1] 53 i = i + 1 54 55 try: 56 self.fields["entrynum"] = datlist[0] 57 except IndexError as e: 58 pass
59 60
61 -class Peaklist(object):
62 """Provide access to header lines and data from a nmrview xpk file. 63 64 Header file lines and file data are available as attributes. 65 66 Parameters 67 ---------- 68 infn : str 69 The input nmrview filename. 70 71 Attributes 72 ---------- 73 firstline : str 74 The first line in the header. 75 axislabels : str 76 The axis labels. 77 dataset : str 78 The label of the dataset. 79 sw : str 80 The sw coordinates. 81 sf : str 82 The sf coordinates. 83 datalabels : str 84 The labels of the entries. 85 86 data : list 87 File data after header lines. 88 89 Examples 90 -------- 91 92 >>> from Bio.NMR.xpktools import Peaklist 93 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 94 >>> peaklist.firstline 95 'label dataset sw sf ' 96 >>> peaklist.dataset 97 'test.nv' 98 >>> peaklist.sf 99 '{599.8230 } { 60.7860 } { 60.7860 }' 100 >>> peaklist.datalabels 101 ' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat ' 102 103 """
104 - def __init__(self, infn):
105 106 with open(infn, 'r') as infile: 107 108 # Read in the header lines 109 self.firstline = infile.readline().split("\012")[0] 110 self.axislabels = infile.readline().split("\012")[0] 111 self.dataset = infile.readline().split("\012")[0] 112 self.sw = infile.readline().split("\012")[0] 113 self.sf = infile.readline().split("\012")[0] 114 self.datalabels = infile.readline().split("\012")[0] 115 116 # Read in the data lines to a list 117 self.data = [line.split("\012")[0] for line in infile]
118
119 - def residue_dict(self, index):
120 """Return a dict of lines in \`data\` indexed by residue number or a nucleus. 121 122 The nucleus should be given as the input argument in the same form as 123 it appears in the xpk label line (H1, 15N for example) 124 125 Parameters 126 ---------- 127 index : str 128 The nucleus to index data by. 129 130 Returns 131 ------- 132 resdict : dict 133 Mappings of index nucleus to data line. 134 135 Examples 136 -------- 137 138 >>> from Bio.NMR.xpktools import Peaklist 139 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 140 >>> residue_d = peaklist.residue_dict('H1') 141 >>> sorted(residue_d.keys()) 142 ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres'] 143 >>> residue_d['10'] 144 ['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0'] 145 146 """ 147 148 maxres = -1 149 minres = -1 150 151 # Cast the data lines into the xpentry class 152 self.dict = {} 153 for i in range(len(self.data)): 154 line = self.data[i] 155 ind = XpkEntry(line, self.datalabels).fields[index + ".L"] 156 key = ind.split(".")[0] 157 158 res = int(key) 159 160 if (maxres == -1): 161 maxres = res 162 if (minres == -1): 163 minres = res 164 165 maxres = max([maxres, res]) 166 minres = min([minres, res]) 167 168 if str(res) in self.dict: 169 # Append additional data to list under same key 170 templst = self.dict[str(res)] 171 templst.append(line) 172 self.dict[str(res)] = templst 173 174 else: 175 # This is a new residue, start a new list 176 self.dict[str(res)] = [line] # Use [] for list type 177 178 self.dict["maxres"] = maxres 179 self.dict["minres"] = minres 180 181 return self.dict
182
183 - def write_header(self, outfn):
184 """Write header lines from input file to handle `outfn`.""" 185 with open(outfn, 'wb') as outfile: 186 outfile.write(self.firstline) 187 outfile.write("\012") 188 outfile.write(self.axislabels) 189 outfile.write("\012") 190 outfile.write(self.dataset) 191 outfile.write("\012") 192 outfile.write(self.sw) 193 outfile.write("\012") 194 outfile.write(self.sf) 195 outfile.write("\012") 196 outfile.write(self.datalabels) 197 outfile.write("\012")
198 199
200 -def replace_entry(line, fieldn, newentry):
201 """Helper function replace an entry in a string by the field number. 202 203 No padding is implemented currently. Spacing will change if 204 the original field entry and the new field entry are of 205 different lengths. 206 """ 207 # This method depends on xpktools._find_start_entry 208 209 start = _find_start_entry(line, fieldn) 210 leng = len(line[start:].split()[0]) 211 newline = line[:start] + str(newentry) + line[(start+leng):] 212 return newline
213 214
215 -def _find_start_entry(line, n):
216 """Find the starting character for entry `n` in a space delimited `line` (PRIVATE). 217 218 n is counted starting with 1. 219 The n=1 field by definition begins at the first character. 220 221 Returns 222 ------- 223 starting character : str 224 The starting character for entry `n`. 225 """ 226 # This function is used by replace_entry 227 228 infield = 0 # A flag that indicates that the counter is in a field 229 230 if (n == 1): 231 return 0 # Special case 232 233 # Count the number of fields by counting spaces 234 c = 1 235 leng = len(line) 236 237 # Initialize variables according to whether the first character 238 # is a space or a character 239 if (line[0] == " "): 240 infield = 0 241 field = 0 242 else: 243 infield = 1 244 field = 1 245 246 while (c < leng and field < n): 247 if (infield): 248 if (line[c] == " " and not (line[c-1] == " ")): 249 infield = 0 250 else: 251 if (not line[c] == " "): 252 infield = 1 253 field = field + 1 254 255 c = c + 1 256 257 return c - 1
258 259
260 -def data_table(fn_list, datalabel, keyatom):
261 """Generate a data table from a list of input xpk files. 262 263 Parameters 264 ---------- 265 fn_list : list 266 List of .xpk file names. 267 datalabel : str 268 The data element reported. 269 keyatom : str 270 The name of the nucleus used as an index for the data table. 271 272 Returns 273 ------- 274 outlist : list 275 List of table rows indexed by `keyatom`. 276 277 """ 278 # TODO - Clarify this docstring, add an example? 279 outlist = [] 280 281 [dict_list, label_line_list] = _read_dicts(fn_list, keyatom) 282 283 # Find global max and min residue numbers 284 minr = dict_list[0]["minres"] 285 maxr = dict_list[0]["maxres"] 286 287 for dictionary in dict_list: 288 if (maxr < dictionary["maxres"]): 289 maxr = dictionary["maxres"] 290 if (minr > dictionary["minres"]): 291 minr = dictionary["minres"] 292 293 res = minr 294 while res <= maxr: # s.t. res numbers 295 count = 0 296 line = str(res) 297 for dictionary in dict_list: # s.t. dictionaries 298 label = label_line_list[count] 299 if str(res) in dictionary: 300 line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel] 301 else: 302 line = line + "\t" + "*" 303 count = count + 1 304 line = line + "\n" 305 outlist.append(line) 306 res = res + 1 307 308 return outlist
309 310
311 -def _read_dicts(fn_list, keyatom):
312 """Read multiple files into a list of residue dictionaries (PRIVATE).""" 313 dict_list = [] 314 datalabel_list = [] 315 for fn in fn_list: 316 peaklist = Peaklist(fn) 317 dict = peaklist.residue_dict(keyatom) 318 dict_list.append(dict) 319 datalabel_list.append(peaklist.datalabels) 320 321 return [dict_list, datalabel_list]
322 323 324 if __name__ == "__main__": 325 from Bio._utils import run_doctest 326 run_doctest() 327