Package Bio :: Package NMR :: Module xpktools
[hide private]
[frames] | no frames]

Source Code for Module Bio.NMR.xpktools

  1  # Copyright 2004 by Bob Bussell.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package.  
  5  """For manipulating data from nmrview .xpk peaklist files. 
  6   
  7  XpkEntry class: A class suited for handling single lines of 
  8  non-header data from an nmrview .xpk file.  This class 
  9  provides methods for extracting data by the field name 
 10  which is listed in the last line of the peaklist header. 
 11  """ 
 12   
 13  from __future__ import print_function 
 14   
 15  import sys 
 16   
 17  HEADERLEN = 6 
 18   
 19   
20 -class XpkEntry(object):
21 """Entry from a .xpk file. 22 23 Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line 24 from an nmrview .xpk file and xpkheadline is the line from 25 the header file that gives the names of the entries 26 which is typcially the sixth line of the header (counting fm 1) 27 28 Variables are accessed by either their name in the header line as in 29 self.field["H1.P"] will return the H1.P entry for example. 30 self.field["entrynum"] returns the line number (1st field of line) 31 """
32 - def __init__(self, entry, headline):
33 self.fields = {} # Holds all fields from input line in a dictionary 34 # keys are data labels from the .xpk header 35 datlist = entry.split() 36 headlist = headline.split() 37 38 i = 0 39 for i in range(len(datlist) - 1): 40 self.fields[headlist[i]] = datlist[i+1] 41 i = i + 1 42 43 try: 44 self.fields["entrynum"] = datlist[0] 45 except IndexError as e: 46 pass
47 48
49 -class Peaklist(object):
50 """For loading an entire .xpk file. 51 52 This class reads in an entire xpk file and returns 53 Header file lines are available as attributes 54 The data lines are available as a list 55 """
56 - def __init__(self, infn):
57 58 with open(infn, 'r') as infile: 59 60 # Read in the header lines 61 self.firstline = infile.readline().split("\012")[0] 62 self.axislabels = infile.readline().split("\012")[0] 63 self.dataset = infile.readline().split("\012")[0] 64 self.sw = infile.readline().split("\012")[0] 65 self.sf = infile.readline().split("\012")[0] 66 self.datalabels = infile.readline().split("\012")[0] 67 68 # Read in the data lines to a list 69 self.data = [line.split("\012")[0] for line in infile]
70
71 - def residue_dict(self, index):
72 """Generate a dictionary idexed by residue number or a nucleus. 73 74 The nucleus should be given as the input argument in the 75 same form as it appears in the xpk label line (H1, 15N for example) 76 """ 77 maxres = -1 78 minres = -1 79 80 # Cast the data lines into the xpentry class 81 self.dict = {} 82 for i in range(len(self.data)): 83 line = self.data[i] 84 ind = XpkEntry(line, self.datalabels).fields[index + ".L"] 85 key = ind.split(".")[0] 86 87 res = int(key) 88 89 if (maxres == -1): 90 maxres = res 91 if (minres == -1): 92 minres = res 93 94 maxres = max([maxres, res]) 95 minres = min([minres, res]) 96 97 if str(res) in self.dict: 98 # Append additional data to list under same key 99 templst = self.dict[str(res)] 100 templst.append(line) 101 self.dict[str(res)] = templst 102 103 else: 104 # This is a new residue, start a new list 105 self.dict[str(res)] = [line] # Use [] for list type 106 107 self.dict["maxres"] = maxres 108 self.dict["minres"] = minres 109 110 return self.dict
111
112 - def write_header(self, outfn):
113 with open(outfn, 'wb') as outfile: 114 outfile.write(self.firstline) 115 outfile.write("\012") 116 outfile.write(self.axislabels) 117 outfile.write("\012") 118 outfile.write(self.dataset) 119 outfile.write("\012") 120 outfile.write(self.sw) 121 outfile.write("\012") 122 outfile.write(self.sf) 123 outfile.write("\012") 124 outfile.write(self.datalabels) 125 outfile.write("\012")
126 127
128 -def replace_entry(line, fieldn, newentry):
129 """Helper function replace an entry in a string by the field number. 130 131 No padding is implemented currently. Spacing will change if 132 the original field entry and the new field entry are of 133 different lengths. 134 """ 135 #This method depends on xpktools._find_start_entry 136 137 start = _find_start_entry(line, fieldn) 138 leng = len(line[start:].split()[0]) 139 newline = line[:start] + str(newentry) + line[(start+leng):] 140 return newline
141 142
143 -def _find_start_entry(line, n):
144 # find the starting point character for the n'th entry in 145 # a space delimited line. n is counted starting with 1 146 # The n=1 field by definition begins at the first character 147 # This function is used by replace_entry 148 149 infield = 0 # A flag that indicates that the counter is in a field 150 151 if (n == 1): 152 return 0 # Special case 153 154 # Count the number of fields by counting spaces 155 c = 1 156 leng = len(line) 157 158 # Initialize variables according to whether the first character 159 # is a space or a character 160 if (line[0] == " "): 161 infield = 0 162 field = 0 163 else: 164 infield = 1 165 field = 1 166 167 while (c < leng and field < n): 168 if (infield): 169 if (line[c] == " " and not (line[c-1] == " ")): 170 infield = 0 171 else: 172 if (not line[c] == " "): 173 infield = 1 174 field = field + 1 175 176 c = c + 1 177 178 return c - 1
179 180
181 -def data_table(fn_list, datalabel, keyatom):
182 """Generate a data table from a list of input xpk files. 183 184 Give the .xpk files as argument <fn_list>. 185 The data element reported in <datalabel> and the index for 186 the data table is by the nucleus indicated by <keyatom>. 187 """ 188 #TODO - Clarify this docstring, add an example? 189 outlist = [] 190 191 [dict_list, label_line_list] = _read_dicts(fn_list, keyatom) 192 193 # Find global max and min residue numbers 194 minr = dict_list[0]["minres"] 195 maxr = dict_list[0]["maxres"] 196 197 for dictionary in dict_list: 198 if (maxr < dictionary["maxres"]): 199 maxr = dictionary["maxres"] 200 if (minr > dictionary["minres"]): 201 minr = dictionary["minres"] 202 203 res = minr 204 while res <= maxr: # s.t. res numbers 205 count = 0 206 line = str(res) 207 for dictionary in dict_list: # s.t. dictionaries 208 label = label_line_list[count] 209 if str(res) in dictionary: 210 line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel] 211 else: 212 line = line + "\t" + "*" 213 count = count + 1 214 line = line + "\n" 215 outlist.append(line) 216 res = res + 1 217 218 return outlist
219 220
221 -def _read_dicts(fn_list, keyatom):
222 # Read multiple files into a list of residue dictionaries 223 dict_list = [] 224 datalabel_list = [] 225 for fn in fn_list: 226 peaklist = Peaklist(fn) 227 dict = peaklist.residue_dict(keyatom) 228 dict_list.append(dict) 229 datalabel_list.append(peaklist.datalabels) 230 231 return [dict_list, datalabel_list]
232