Package Bio :: Package NMR :: Module xpktools
[hide private]
[frames] | no frames]

Source Code for Module Bio.NMR.xpktools

  1  # Copyright 2004 by Bob Bussell.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Tools to manipulate data from nmrview .xpk peaklist files.""" 
  6   
  7  from __future__ import print_function 
  8   
  9   
 10  HEADERLEN = 6 
 11   
 12   
13 -class XpkEntry(object):
14 """Provide dictonary access to single entry from nmrview .xpk file. 15 16 This class is suited for handling single lines of non-header data 17 from an nmrview .xpk file. This class provides methods for extracting 18 data by the field name which is listed in the last line of the 19 peaklist header. 20 21 Parameters 22 ---------- 23 xpkentry : str 24 The line from an nmrview .xpk file. 25 xpkheadline : str 26 The line from the header file that gives the names of the entries. 27 This is typically the sixth line of the header, 1-origin. 28 29 Attributes 30 ---------- 31 fields : dict 32 Dictionary of fields where key is in header line, value is an entry. 33 Variables are accessed by either their name in the header line as in 34 self.field["H1.P"] will return the H1.P entry for example. 35 self.field["entrynum"] returns the line number (1st field of line) 36 37 """ 38
39 - def __init__(self, entry, headline):
40 # Holds all fields from input line in a dictionary 41 # keys are data labels from the .xpk header 42 self.fields = {} 43 44 datlist = entry.split() 45 headlist = headline.split() 46 47 i = 0 48 for i in range(len(datlist) - 1): 49 self.fields[headlist[i]] = datlist[i + 1] 50 i = i + 1 51 52 try: 53 self.fields["entrynum"] = datlist[0] 54 except IndexError as e: 55 pass
56 57
58 -class Peaklist(object):
59 """Provide access to header lines and data from a nmrview xpk file. 60 61 Header file lines and file data are available as attributes. 62 63 Parameters 64 ---------- 65 infn : str 66 The input nmrview filename. 67 68 Attributes 69 ---------- 70 firstline : str 71 The first line in the header. 72 axislabels : str 73 The axis labels. 74 dataset : str 75 The label of the dataset. 76 sw : str 77 The sw coordinates. 78 sf : str 79 The sf coordinates. 80 datalabels : str 81 The labels of the entries. 82 83 data : list 84 File data after header lines. 85 86 Examples 87 -------- 88 >>> from Bio.NMR.xpktools import Peaklist 89 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 90 >>> peaklist.firstline 91 'label dataset sw sf ' 92 >>> peaklist.dataset 93 'test.nv' 94 >>> peaklist.sf 95 '{599.8230 } { 60.7860 } { 60.7860 }' 96 >>> peaklist.datalabels 97 ' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat ' 98 99 """ 100
101 - def __init__(self, infn):
102 103 with open(infn, 'r') as infile: 104 105 # Read in the header lines 106 self.firstline = infile.readline().split("\012")[0] 107 self.axislabels = infile.readline().split("\012")[0] 108 self.dataset = infile.readline().split("\012")[0] 109 self.sw = infile.readline().split("\012")[0] 110 self.sf = infile.readline().split("\012")[0] 111 self.datalabels = infile.readline().split("\012")[0] 112 113 # Read in the data lines to a list 114 self.data = [line.split("\012")[0] for line in infile]
115
116 - def residue_dict(self, index):
117 """Return a dict of lines in \`data\` indexed by residue number or a nucleus. 118 119 The nucleus should be given as the input argument in the same form as 120 it appears in the xpk label line (H1, 15N for example) 121 122 Parameters 123 ---------- 124 index : str 125 The nucleus to index data by. 126 127 Returns 128 ------- 129 resdict : dict 130 Mappings of index nucleus to data line. 131 132 Examples 133 -------- 134 >>> from Bio.NMR.xpktools import Peaklist 135 >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') 136 >>> residue_d = peaklist.residue_dict('H1') 137 >>> sorted(residue_d.keys()) 138 ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres'] 139 >>> residue_d['10'] 140 ['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0'] 141 142 """ 143 maxres = -1 144 minres = -1 145 146 # Cast the data lines into the xpentry class 147 self.dict = {} 148 for i in range(len(self.data)): 149 line = self.data[i] 150 ind = XpkEntry(line, self.datalabels).fields[index + ".L"] 151 key = ind.split(".")[0] 152 153 res = int(key) 154 155 if (maxres == -1): 156 maxres = res 157 if (minres == -1): 158 minres = res 159 160 maxres = max([maxres, res]) 161 minres = min([minres, res]) 162 163 if str(res) in self.dict: 164 # Append additional data to list under same key 165 templst = self.dict[str(res)] 166 templst.append(line) 167 self.dict[str(res)] = templst 168 169 else: 170 # This is a new residue, start a new list 171 self.dict[str(res)] = [line] # Use [] for list type 172 173 self.dict["maxres"] = maxres 174 self.dict["minres"] = minres 175 176 return self.dict
177
178 - def write_header(self, outfn):
179 """Write header lines from input file to handle `outfn`.""" 180 with open(outfn, 'wb') as outfile: 181 outfile.write(self.firstline) 182 outfile.write("\012") 183 outfile.write(self.axislabels) 184 outfile.write("\012") 185 outfile.write(self.dataset) 186 outfile.write("\012") 187 outfile.write(self.sw) 188 outfile.write("\012") 189 outfile.write(self.sf) 190 outfile.write("\012") 191 outfile.write(self.datalabels) 192 outfile.write("\012")
193 194
195 -def replace_entry(line, fieldn, newentry):
196 """Helper function replace an entry in a string by the field number. 197 198 No padding is implemented currently. Spacing will change if 199 the original field entry and the new field entry are of 200 different lengths. 201 """ 202 # This method depends on xpktools._find_start_entry 203 204 start = _find_start_entry(line, fieldn) 205 leng = len(line[start:].split()[0]) 206 newline = line[:start] + str(newentry) + line[(start + leng):] 207 return newline
208 209
210 -def _find_start_entry(line, n):
211 """Find the starting character for entry `n` in a space delimited `line` (PRIVATE). 212 213 n is counted starting with 1. 214 The n=1 field by definition begins at the first character. 215 216 Returns 217 ------- 218 starting character : str 219 The starting character for entry `n`. 220 221 """ 222 # This function is used by replace_entry 223 224 infield = 0 # A flag that indicates that the counter is in a field 225 226 if (n == 1): 227 return 0 # Special case 228 229 # Count the number of fields by counting spaces 230 c = 1 231 leng = len(line) 232 233 # Initialize variables according to whether the first character 234 # is a space or a character 235 if (line[0] == " "): 236 infield = 0 237 field = 0 238 else: 239 infield = 1 240 field = 1 241 242 while (c < leng and field < n): 243 if (infield): 244 if (line[c] == " " and not (line[c - 1] == " ")): 245 infield = 0 246 else: 247 if (not line[c] == " "): 248 infield = 1 249 field = field + 1 250 251 c = c + 1 252 253 return c - 1
254 255
256 -def data_table(fn_list, datalabel, keyatom):
257 """Generate a data table from a list of input xpk files. 258 259 Parameters 260 ---------- 261 fn_list : list 262 List of .xpk file names. 263 datalabel : str 264 The data element reported. 265 keyatom : str 266 The name of the nucleus used as an index for the data table. 267 268 Returns 269 ------- 270 outlist : list 271 List of table rows indexed by `keyatom`. 272 273 """ 274 # TODO - Clarify this docstring, add an example? 275 outlist = [] 276 277 [dict_list, label_line_list] = _read_dicts(fn_list, keyatom) 278 279 # Find global max and min residue numbers 280 minr = dict_list[0]["minres"] 281 maxr = dict_list[0]["maxres"] 282 283 for dictionary in dict_list: 284 if (maxr < dictionary["maxres"]): 285 maxr = dictionary["maxres"] 286 if (minr > dictionary["minres"]): 287 minr = dictionary["minres"] 288 289 res = minr 290 while res <= maxr: # s.t. res numbers 291 count = 0 292 line = str(res) 293 for dictionary in dict_list: # s.t. dictionaries 294 label = label_line_list[count] 295 if str(res) in dictionary: 296 line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel] 297 else: 298 line = line + "\t" + "*" 299 count = count + 1 300 line = line + "\n" 301 outlist.append(line) 302 res = res + 1 303 304 return outlist
305 306
307 -def _read_dicts(fn_list, keyatom):
308 """Read multiple files into a list of residue dictionaries (PRIVATE).""" 309 dict_list = [] 310 datalabel_list = [] 311 for fn in fn_list: 312 peaklist = Peaklist(fn) 313 dict = peaklist.residue_dict(keyatom) 314 dict_list.append(dict) 315 datalabel_list.append(peaklist.datalabels) 316 317 return [dict_list, datalabel_list]
318 319 320 if __name__ == "__main__": 321 from Bio._utils import run_doctest 322 run_doctest() 323