Package Bio :: Package NMR :: Module xpktools
[hide private]
[frames] | no frames]

Source Code for Module Bio.NMR.xpktools

  1  # Copyright 2004 by Bob Bussell.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Tools to manipulate data from nmrview .xpk peaklist files. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10  import sys 
 11   
 12   
 13  HEADERLEN = 6 
 14   
 15   
16 -class XpkEntry(object):
17 """Provide dictonary access to single entry from nmrview .xpk file. 18 19 This class is suited for handling single lines of non-header data 20 from an nmrview .xpk file. This class provides methods for extracting 21 data by the field name which is listed in the last line of the 22 peaklist header. 23 24 Parameters 25 ---------- 26 xpkentry : str 27 The line from an nmrview .xpk file. 28 xpkheadline : str 29 The line from the header file that gives the names of the entries. 30 This is typically the sixth line of the header, 1-origin. 31 32 Attributes 33 ---------- 34 fields : dict 35 Dictionary of fields where key is in header line, value is an entry. 36 Variables are accessed by either their name in the header line as in 37 self.field["H1.P"] will return the H1.P entry for example. 38 self.field["entrynum"] returns the line number (1st field of line) 39 40 """
41 - def __init__(self, entry, headline):
42 # Holds all fields from input line in a dictionary 43 # keys are data labels from the .xpk header 44 self.fields = {} 45 46 datlist = entry.split() 47 headlist = headline.split() 48 49 i = 0 50 for i in range(len(datlist) - 1): 51 self.fields[headlist[i]] = datlist[i + 1] 52 i = i + 1 53 54 try: 55 self.fields["entrynum"] = datlist[0] 56 except IndexError as e: 57 pass
58 59
60 -class Peaklist(object):
61 """Provide access to header lines and data from a nmrview xpk file. 62 63 Header file lines and file data are available as attributes. 64 65 Parameters 66 ---------- 67 infn : str 68 The input nmrview filename. 69 70 Attributes 71 ---------- 72 firstline : str 73 The first line in the header. 74 axislabels : str 75 The axis labels. 76 dataset : str 77 The label of the dataset. 78 sw : str 79 The sw coordinates. 80 sf : str 81 The sf coordinates. 82 datalabels : str 83 The labels of the entries. 84 85 data : list 86 File data after header lines. 87 88 Examples 89 -------- 90 91 >>> from Bio.NMR.xpktools import Peaklist 92 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 93 >>> peaklist.firstline 94 'label dataset sw sf ' 95 >>> peaklist.dataset 96 'test.nv' 97 >>> peaklist.sf 98 '{599.8230 } { 60.7860 } { 60.7860 }' 99 >>> peaklist.datalabels 100 ' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat ' 101 102 """
103 - def __init__(self, infn):
104 105 with open(infn, 'r') as infile: 106 107 # Read in the header lines 108 self.firstline = infile.readline().split("\012")[0] 109 self.axislabels = infile.readline().split("\012")[0] 110 self.dataset = infile.readline().split("\012")[0] 111 self.sw = infile.readline().split("\012")[0] 112 self.sf = infile.readline().split("\012")[0] 113 self.datalabels = infile.readline().split("\012")[0] 114 115 # Read in the data lines to a list 116 self.data = [line.split("\012")[0] for line in infile]
117
118 - def residue_dict(self, index):
119 """Return a dict of lines in \`data\` indexed by residue number or a nucleus. 120 121 The nucleus should be given as the input argument in the same form as 122 it appears in the xpk label line (H1, 15N for example) 123 124 Parameters 125 ---------- 126 index : str 127 The nucleus to index data by. 128 129 Returns 130 ------- 131 resdict : dict 132 Mappings of index nucleus to data line. 133 134 Examples 135 -------- 136 137 >>> from Bio.NMR.xpktools import Peaklist 138 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 139 >>> residue_d = peaklist.residue_dict('H1') 140 >>> sorted(residue_d.keys()) 141 ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres'] 142 >>> residue_d['10'] 143 ['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0'] 144 145 """ 146 147 maxres = -1 148 minres = -1 149 150 # Cast the data lines into the xpentry class 151 self.dict = {} 152 for i in range(len(self.data)): 153 line = self.data[i] 154 ind = XpkEntry(line, self.datalabels).fields[index + ".L"] 155 key = ind.split(".")[0] 156 157 res = int(key) 158 159 if (maxres == -1): 160 maxres = res 161 if (minres == -1): 162 minres = res 163 164 maxres = max([maxres, res]) 165 minres = min([minres, res]) 166 167 if str(res) in self.dict: 168 # Append additional data to list under same key 169 templst = self.dict[str(res)] 170 templst.append(line) 171 self.dict[str(res)] = templst 172 173 else: 174 # This is a new residue, start a new list 175 self.dict[str(res)] = [line] # Use [] for list type 176 177 self.dict["maxres"] = maxres 178 self.dict["minres"] = minres 179 180 return self.dict
181
182 - def write_header(self, outfn):
183 """Write header lines from input file to handle `outfn`.""" 184 with open(outfn, 'wb') as outfile: 185 outfile.write(self.firstline) 186 outfile.write("\012") 187 outfile.write(self.axislabels) 188 outfile.write("\012") 189 outfile.write(self.dataset) 190 outfile.write("\012") 191 outfile.write(self.sw) 192 outfile.write("\012") 193 outfile.write(self.sf) 194 outfile.write("\012") 195 outfile.write(self.datalabels) 196 outfile.write("\012")
197 198
199 -def replace_entry(line, fieldn, newentry):
200 """Helper function replace an entry in a string by the field number. 201 202 No padding is implemented currently. Spacing will change if 203 the original field entry and the new field entry are of 204 different lengths. 205 """ 206 # This method depends on xpktools._find_start_entry 207 208 start = _find_start_entry(line, fieldn) 209 leng = len(line[start:].split()[0]) 210 newline = line[:start] + str(newentry) + line[(start + leng):] 211 return newline
212 213
214 -def _find_start_entry(line, n):
215 """Find the starting character for entry `n` in a space delimited `line` (PRIVATE). 216 217 n is counted starting with 1. 218 The n=1 field by definition begins at the first character. 219 220 Returns 221 ------- 222 starting character : str 223 The starting character for entry `n`. 224 """ 225 # This function is used by replace_entry 226 227 infield = 0 # A flag that indicates that the counter is in a field 228 229 if (n == 1): 230 return 0 # Special case 231 232 # Count the number of fields by counting spaces 233 c = 1 234 leng = len(line) 235 236 # Initialize variables according to whether the first character 237 # is a space or a character 238 if (line[0] == " "): 239 infield = 0 240 field = 0 241 else: 242 infield = 1 243 field = 1 244 245 while (c < leng and field < n): 246 if (infield): 247 if (line[c] == " " and not (line[c - 1] == " ")): 248 infield = 0 249 else: 250 if (not line[c] == " "): 251 infield = 1 252 field = field + 1 253 254 c = c + 1 255 256 return c - 1
257 258
259 -def data_table(fn_list, datalabel, keyatom):
260 """Generate a data table from a list of input xpk files. 261 262 Parameters 263 ---------- 264 fn_list : list 265 List of .xpk file names. 266 datalabel : str 267 The data element reported. 268 keyatom : str 269 The name of the nucleus used as an index for the data table. 270 271 Returns 272 ------- 273 outlist : list 274 List of table rows indexed by `keyatom`. 275 276 """ 277 # TODO - Clarify this docstring, add an example? 278 outlist = [] 279 280 [dict_list, label_line_list] = _read_dicts(fn_list, keyatom) 281 282 # Find global max and min residue numbers 283 minr = dict_list[0]["minres"] 284 maxr = dict_list[0]["maxres"] 285 286 for dictionary in dict_list: 287 if (maxr < dictionary["maxres"]): 288 maxr = dictionary["maxres"] 289 if (minr > dictionary["minres"]): 290 minr = dictionary["minres"] 291 292 res = minr 293 while res <= maxr: # s.t. res numbers 294 count = 0 295 line = str(res) 296 for dictionary in dict_list: # s.t. dictionaries 297 label = label_line_list[count] 298 if str(res) in dictionary: 299 line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel] 300 else: 301 line = line + "\t" + "*" 302 count = count + 1 303 line = line + "\n" 304 outlist.append(line) 305 res = res + 1 306 307 return outlist
308 309
310 -def _read_dicts(fn_list, keyatom):
311 """Read multiple files into a list of residue dictionaries (PRIVATE).""" 312 dict_list = [] 313 datalabel_list = [] 314 for fn in fn_list: 315 peaklist = Peaklist(fn) 316 dict = peaklist.residue_dict(keyatom) 317 dict_list.append(dict) 318 datalabel_list.append(peaklist.datalabels) 319 320 return [dict_list, datalabel_list]
321 322 323 if __name__ == "__main__": 324 from Bio._utils import run_doctest 325 run_doctest() 326