Package Bio :: Package NMR :: Module xpktools
[hide private]
[frames] | no frames]

Source Code for Module Bio.NMR.xpktools

  1  # Copyright 2004 by Bob Bussell.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Tools to manipulate data from nmrview .xpk peaklist files. 
  6  """ 
  7   
  8  from __future__ import print_function 
  9   
 10   
 11  HEADERLEN = 6 
 12   
 13   
14 -class XpkEntry(object):
15 """Provide dictonary access to single entry from nmrview .xpk file. 16 17 This class is suited for handling single lines of non-header data 18 from an nmrview .xpk file. This class provides methods for extracting 19 data by the field name which is listed in the last line of the 20 peaklist header. 21 22 Parameters 23 ---------- 24 xpkentry : str 25 The line from an nmrview .xpk file. 26 xpkheadline : str 27 The line from the header file that gives the names of the entries. 28 This is typically the sixth line of the header, 1-origin. 29 30 Attributes 31 ---------- 32 fields : dict 33 Dictionary of fields where key is in header line, value is an entry. 34 Variables are accessed by either their name in the header line as in 35 self.field["H1.P"] will return the H1.P entry for example. 36 self.field["entrynum"] returns the line number (1st field of line) 37 38 """
39 - def __init__(self, entry, headline):
40 # Holds all fields from input line in a dictionary 41 # keys are data labels from the .xpk header 42 self.fields = {} 43 44 datlist = entry.split() 45 headlist = headline.split() 46 47 i = 0 48 for i in range(len(datlist) - 1): 49 self.fields[headlist[i]] = datlist[i + 1] 50 i = i + 1 51 52 try: 53 self.fields["entrynum"] = datlist[0] 54 except IndexError as e: 55 pass
56 57
58 -class Peaklist(object):
59 """Provide access to header lines and data from a nmrview xpk file. 60 61 Header file lines and file data are available as attributes. 62 63 Parameters 64 ---------- 65 infn : str 66 The input nmrview filename. 67 68 Attributes 69 ---------- 70 firstline : str 71 The first line in the header. 72 axislabels : str 73 The axis labels. 74 dataset : str 75 The label of the dataset. 76 sw : str 77 The sw coordinates. 78 sf : str 79 The sf coordinates. 80 datalabels : str 81 The labels of the entries. 82 83 data : list 84 File data after header lines. 85 86 Examples 87 -------- 88 89 >>> from Bio.NMR.xpktools import Peaklist 90 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 91 >>> peaklist.firstline 92 'label dataset sw sf ' 93 >>> peaklist.dataset 94 'test.nv' 95 >>> peaklist.sf 96 '{599.8230 } { 60.7860 } { 60.7860 }' 97 >>> peaklist.datalabels 98 ' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat ' 99 100 """
101 - def __init__(self, infn):
102 103 with open(infn, 'r') as infile: 104 105 # Read in the header lines 106 self.firstline = infile.readline().split("\012")[0] 107 self.axislabels = infile.readline().split("\012")[0] 108 self.dataset = infile.readline().split("\012")[0] 109 self.sw = infile.readline().split("\012")[0] 110 self.sf = infile.readline().split("\012")[0] 111 self.datalabels = infile.readline().split("\012")[0] 112 113 # Read in the data lines to a list 114 self.data = [line.split("\012")[0] for line in infile]
115
116 - def residue_dict(self, index):
117 """Return a dict of lines in \`data\` indexed by residue number or a nucleus. 118 119 The nucleus should be given as the input argument in the same form as 120 it appears in the xpk label line (H1, 15N for example) 121 122 Parameters 123 ---------- 124 index : str 125 The nucleus to index data by. 126 127 Returns 128 ------- 129 resdict : dict 130 Mappings of index nucleus to data line. 131 132 Examples 133 -------- 134 135 >>> from Bio.NMR.xpktools import Peaklist 136 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 137 >>> residue_d = peaklist.residue_dict('H1') 138 >>> sorted(residue_d.keys()) 139 ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres'] 140 >>> residue_d['10'] 141 ['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0'] 142 143 """ 144 maxres = -1 145 minres = -1 146 147 # Cast the data lines into the xpentry class 148 self.dict = {} 149 for i in range(len(self.data)): 150 line = self.data[i] 151 ind = XpkEntry(line, self.datalabels).fields[index + ".L"] 152 key = ind.split(".")[0] 153 154 res = int(key) 155 156 if (maxres == -1): 157 maxres = res 158 if (minres == -1): 159 minres = res 160 161 maxres = max([maxres, res]) 162 minres = min([minres, res]) 163 164 if str(res) in self.dict: 165 # Append additional data to list under same key 166 templst = self.dict[str(res)] 167 templst.append(line) 168 self.dict[str(res)] = templst 169 170 else: 171 # This is a new residue, start a new list 172 self.dict[str(res)] = [line] # Use [] for list type 173 174 self.dict["maxres"] = maxres 175 self.dict["minres"] = minres 176 177 return self.dict
178
179 - def write_header(self, outfn):
180 """Write header lines from input file to handle `outfn`.""" 181 with open(outfn, 'wb') as outfile: 182 outfile.write(self.firstline) 183 outfile.write("\012") 184 outfile.write(self.axislabels) 185 outfile.write("\012") 186 outfile.write(self.dataset) 187 outfile.write("\012") 188 outfile.write(self.sw) 189 outfile.write("\012") 190 outfile.write(self.sf) 191 outfile.write("\012") 192 outfile.write(self.datalabels) 193 outfile.write("\012")
194 195
196 -def replace_entry(line, fieldn, newentry):
197 """Helper function replace an entry in a string by the field number. 198 199 No padding is implemented currently. Spacing will change if 200 the original field entry and the new field entry are of 201 different lengths. 202 """ 203 # This method depends on xpktools._find_start_entry 204 205 start = _find_start_entry(line, fieldn) 206 leng = len(line[start:].split()[0]) 207 newline = line[:start] + str(newentry) + line[(start + leng):] 208 return newline
209 210
211 -def _find_start_entry(line, n):
212 """Find the starting character for entry `n` in a space delimited `line` (PRIVATE). 213 214 n is counted starting with 1. 215 The n=1 field by definition begins at the first character. 216 217 Returns 218 ------- 219 starting character : str 220 The starting character for entry `n`. 221 """ 222 # This function is used by replace_entry 223 224 infield = 0 # A flag that indicates that the counter is in a field 225 226 if (n == 1): 227 return 0 # Special case 228 229 # Count the number of fields by counting spaces 230 c = 1 231 leng = len(line) 232 233 # Initialize variables according to whether the first character 234 # is a space or a character 235 if (line[0] == " "): 236 infield = 0 237 field = 0 238 else: 239 infield = 1 240 field = 1 241 242 while (c < leng and field < n): 243 if (infield): 244 if (line[c] == " " and not (line[c - 1] == " ")): 245 infield = 0 246 else: 247 if (not line[c] == " "): 248 infield = 1 249 field = field + 1 250 251 c = c + 1 252 253 return c - 1
254 255
256 -def data_table(fn_list, datalabel, keyatom):
257 """Generate a data table from a list of input xpk files. 258 259 Parameters 260 ---------- 261 fn_list : list 262 List of .xpk file names. 263 datalabel : str 264 The data element reported. 265 keyatom : str 266 The name of the nucleus used as an index for the data table. 267 268 Returns 269 ------- 270 outlist : list 271 List of table rows indexed by `keyatom`. 272 273 """ 274 # TODO - Clarify this docstring, add an example? 275 outlist = [] 276 277 [dict_list, label_line_list] = _read_dicts(fn_list, keyatom) 278 279 # Find global max and min residue numbers 280 minr = dict_list[0]["minres"] 281 maxr = dict_list[0]["maxres"] 282 283 for dictionary in dict_list: 284 if (maxr < dictionary["maxres"]): 285 maxr = dictionary["maxres"] 286 if (minr > dictionary["minres"]): 287 minr = dictionary["minres"] 288 289 res = minr 290 while res <= maxr: # s.t. res numbers 291 count = 0 292 line = str(res) 293 for dictionary in dict_list: # s.t. dictionaries 294 label = label_line_list[count] 295 if str(res) in dictionary: 296 line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel] 297 else: 298 line = line + "\t" + "*" 299 count = count + 1 300 line = line + "\n" 301 outlist.append(line) 302 res = res + 1 303 304 return outlist
305 306
307 -def _read_dicts(fn_list, keyatom):
308 """Read multiple files into a list of residue dictionaries (PRIVATE).""" 309 dict_list = [] 310 datalabel_list = [] 311 for fn in fn_list: 312 peaklist = Peaklist(fn) 313 dict = peaklist.residue_dict(keyatom) 314 dict_list.append(dict) 315 datalabel_list.append(peaklist.datalabels) 316 317 return [dict_list, datalabel_list]
318 319 320 if __name__ == "__main__": 321 from Bio._utils import run_doctest 322 run_doctest() 323