Package Bio :: Package NMR :: Module xpktools
[hide private]
[frames] | no frames]

Source Code for Module Bio.NMR.xpktools

  1  # xpktools.py: A python module containing function definitions and classes 
  2  #          useful for manipulating data from nmrview .xpk peaklist files. 
  3  # 
  4  # ********** INDEX of functions and classes ********** 
  5  # 
  6  #    XpkEntry class: A class suited for handling single lines of 
  7  #        non-header data from an nmrview .xpk file.  This class 
  8  #        provides methods for extracting data by the field name 
  9  #        which is listed in the last line of the peaklist header. 
 10   
 11  import sys 
 12   
 13  # * * * * * INITIALIZATIONS * * * * * 
 14  HEADERLEN = 6 
 15  # * * * * * _______________ * * * * * 
 16   
 17   
18 -class XpkEntry(object):
19 # Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line 20 # from an nmrview .xpk file and xpkheadline is the line from 21 # the header file that gives the names of the entries 22 # which is typcially the sixth line of the header (counting fm 1) 23 # Variables are accessed by either their name in the header line as in 24 # self.field["H1.P"] will return the H1.P entry for example. 25 # self.field["entrynum"] returns the line number (1st field of line) 26
27 - def __init__(self, entry, headline):
28 self.fields = {} # Holds all fields from input line in a dictionary 29 # keys are data labels from the .xpk header 30 datlist = entry.split() 31 headlist = headline.split() 32 33 i = 0 34 for i in range(len(datlist) - 1): 35 self.fields[headlist[i]] = datlist[i+1] 36 i = i + 1 37 38 try: 39 self.fields["entrynum"] = datlist[0] 40 except IndexError, e: 41 pass
42 43
44 -class Peaklist(object):
45 # This class reads in an entire xpk file and returns 46 # Header file lines are available as attributes 47 # The data lines are available as a list
48 - def __init__(self, infn):
49 50 self.data = [] # init the data line list 51 52 infile = open(infn, 'r') 53 54 # Read in the header lines 55 self.firstline = infile.readline().split("\012")[0] 56 self.axislabels = infile.readline().split("\012")[0] 57 self.dataset = infile.readline().split("\012")[0] 58 self.sw = infile.readline().split("\012")[0] 59 self.sf = infile.readline().split("\012")[0] 60 self.datalabels = infile.readline().split("\012")[0] 61 62 # Read in the data lines to a list 63 line = infile.readline() 64 while line: 65 self.data.append(line.split("\012")[0]) 66 line = infile.readline()
67
68 - def residue_dict(self, index):
69 # Generate a dictionary idexed by residue number or a nucleus 70 # The nucleus should be given as the input argument in the 71 # same form as it appears in the xpk label line (H1, 15N for example) 72 73 maxres = -1 74 minres = -1 75 76 # Cast the data lines into the xpentry class 77 self.dict = {} 78 for i in range(len(self.data)): 79 line = self.data[i] 80 ind = XpkEntry(line, self.datalabels).fields[index + ".L"] 81 key = ind.split(".")[0] 82 83 res = int(key) 84 85 if (maxres == -1): 86 maxres = res 87 if (minres == -1): 88 minres = res 89 90 maxres = max([maxres, res]) 91 minres = min([minres, res]) 92 93 if str(res) in self.dict: 94 # Append additional data to list under same key 95 templst = self.dict[str(res)] 96 templst.append(line) 97 self.dict[str(res)] = templst 98 99 else: 100 # This is a new residue, start a new list 101 self.dict[str(res)] = [line] # Use [] for list type 102 103 self.dict["maxres"] = maxres 104 self.dict["minres"] = minres 105 106 return self.dict
107
108 - def write_header(self, outfn):
109 outfile = _try_open_write(outfn) 110 outfile.write(self.firstline) 111 outfile.write("\012") 112 outfile.write(self.axislabels) 113 outfile.write("\012") 114 outfile.write(self.dataset) 115 outfile.write("\012") 116 outfile.write(self.sw) 117 outfile.write("\012") 118 outfile.write(self.sf) 119 outfile.write("\012") 120 outfile.write(self.datalabels) 121 outfile.write("\012") 122 outfile.close()
123 124
125 -def _try_open_read(fn):
126 # Try to open a file for reading. Exit on IOError 127 try: 128 infile = open(fn, 'r') 129 except IOError, e: 130 print "file", fn, "could not be opened for reading - quitting." 131 sys.exit(0) 132 return infile
133 134
135 -def _try_open_write(fn):
136 # Try to open a file for writing. Exit on IOError 137 try: 138 infile = open(fn, 'w') 139 except IOError, e: 140 print "file", fn, "could not be opened for writing - quitting." 141 sys.exit(0) 142 return infile
143 144
145 -def replace_entry(line, fieldn, newentry):
146 # Replace an entry in a string by the field number 147 # No padding is implemented currently. Spacing will change if 148 # the original field entry and the new field entry are of 149 # different lengths. 150 # This method depends on xpktools._find_start_entry 151 152 start = _find_start_entry(line, fieldn) 153 leng = len(line[start:].split()[0]) 154 newline = line[:start] + str(newentry) + line[(start+leng):] 155 return newline
156 157
158 -def _find_start_entry(line, n):
159 # find the starting point character for the n'th entry in 160 # a space delimited line. n is counted starting with 1 161 # The n=1 field by definition begins at the first character 162 # This function is used by replace_entry 163 164 infield = 0 # A flag that indicates that the counter is in a field 165 166 if (n == 1): 167 return 0 # Special case 168 169 # Count the number of fields by counting spaces 170 c = 1 171 leng = len(line) 172 173 # Initialize variables according to whether the first character 174 # is a space or a character 175 if (line[0] == " "): 176 infield = 0 177 field = 0 178 else: 179 infield = 1 180 field = 1 181 182 while (c < leng and field < n): 183 if (infield): 184 if (line[c] == " " and not (line[c-1] == " ")): 185 infield = 0 186 else: 187 if (not line[c] == " "): 188 infield = 1 189 field = field + 1 190 191 c = c + 1 192 193 return c - 1
194 195
196 -def data_table(fn_list, datalabel, keyatom):
197 # Generate and generate a data table from a list of 198 # input xpk files <fn_list>. The data element reported is 199 # <datalabel> and the index for the data table is by the 200 # nucleus indicated by <keyatom>. 201 202 outlist = [] 203 204 [dict_list, label_line_list] = _read_dicts(fn_list, keyatom) 205 206 # Find global max and min residue numbers 207 minr = dict_list[0]["minres"] 208 maxr = dict_list[0]["maxres"] 209 210 for dictionary in dict_list: 211 if (maxr < dictionary["maxres"]): 212 maxr = dictionary["maxres"] 213 if (minr > dictionary["minres"]): 214 minr = dictionary["minres"] 215 216 res = minr 217 while res <= maxr: # s.t. res numbers 218 count = 0 219 line = str(res) 220 for dictionary in dict_list: # s.t. dictionaries 221 label = label_line_list[count] 222 if str(res) in dictionary: 223 line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel] 224 else: 225 line = line + "\t" + "*" 226 count = count + 1 227 line = line + "\n" 228 outlist.append(line) 229 res = res + 1 230 231 return outlist
232 233
234 -def _sort_keys(dictionary):
235 keys = dictionary.keys() 236 sorted_keys = keys.sort() 237 return sorted_keys
238 239
240 -def _read_dicts(fn_list, keyatom):
241 # Read multiple files into a list of residue dictionaries 242 dict_list = [] 243 datalabel_list = [] 244 for fn in fn_list: 245 peaklist = Peaklist(fn) 246 dict = peaklist.residue_dict(keyatom) 247 dict_list.append(dict) 248 datalabel_list.append(peaklist.datalabels) 249 250 return [dict_list, datalabel_list]
251