Package Bio :: Package PopGen :: Package FDist :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Controller

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """This module allows you to control fdist (DEPRECATED). 
  7   
  8  This will allow you to call fdist and associated programs (cplot, 
  9  datacal, pv) by Mark Beaumont. 
 10   
 11  http://www.rubic.rdg.ac.uk/~mab/software.html (old) 
 12  http://www.maths.bris.ac.uk/~mamab/ (new) 
 13  """ 
 14   
 15  import os 
 16  import subprocess 
 17  import sys 
 18  from random import randint 
 19  from time import strftime, clock 
 20  # from logging import debug 
 21   
 22   
23 -def my_float(f):
24 # Because of Jython, mostly 25 if f == "-nan": 26 f = "nan" 27 return float(f)
28 29
30 -class FDistController(object):
31 - def __init__(self, fdist_dir='', ext=None):
32 """Initializes the controller. 33 34 fdist_dir is the directory where fdist2 is. 35 ext is the extension of binaries (.exe on windows, 36 none on Unix) 37 """ 38 self.tmp_idx = 0 39 self.fdist_dir = fdist_dir 40 self.os_name = os.name 41 if sys.platform == 'win32': 42 py_ext = '.exe' 43 else: 44 py_ext = '' 45 if ext is None: 46 self.ext = py_ext 47 else: 48 self.ext = ext
49
50 - def _get_path(self, app):
51 """Returns the path to an fdist application. 52 53 Includes Path where fdist can be found plus executable extension. 54 """ 55 if self.fdist_dir == '': 56 return app + self.ext 57 else: 58 return os.sep.join([self.fdist_dir, app]) + self.ext
59
60 - def _get_temp_file(self):
61 """Gets a temporary file name. 62 63 Returns a temporary file name, if executing inside jython 64 tries to replace unexisting tempfile.mkstemp(). 65 """ 66 self.tmp_idx += 1 67 return strftime("%H%M%S") + str(int(clock() * 100)) + str(randint(0, 1000)) + str(self.tmp_idx)
68
69 - def run_datacal(self, data_dir='.', version=1, 70 crit_freq=0.99, p=0.5, beta=(0.25, 0.25)):
71 """Executes datacal. 72 73 data_dir - Where the data is found. 74 """ 75 if version == 1: 76 datacal_name = "datacal" 77 else: 78 datacal_name = "Ddatacal" 79 proc = subprocess.Popen([self._get_path(datacal_name)], 80 universal_newlines=True, 81 stdin=subprocess.PIPE, 82 shell=(sys.platform != "win32"), 83 stdout=subprocess.PIPE, cwd=data_dir) 84 if version == 1: 85 out, err = proc.communicate('a\n') 86 lines = out.split("\n") 87 fst_line = lines[0].rstrip().split(' ') 88 fst = my_float(fst_line[4]) 89 sample_line = lines[1].rstrip().split(' ') 90 sample = int(sample_line[9]) 91 else: 92 out, err = proc.communicate('%f\n%f\n%f %f\na\n' % ( 93 crit_freq, p, beta[0], beta[1])) 94 lines = out.split("\n") 95 l = lines[0].rstrip().split(" ") 96 loci, pops = int(l[-5]), int(l[-2]) 97 fst_line = lines[1].rstrip().split(' ') 98 fst = my_float(fst_line[4]) 99 sample_line = lines[2].rstrip().split(' ') 100 sample = int(sample_line[9]) 101 F_line = lines[3].rstrip().split(' ') 102 F, obs = my_float(F_line[5]), int(F_line[8]) 103 if version == 1: 104 return fst, sample 105 else: 106 return fst, sample, loci, pops, F, obs
107
108 - def _generate_intfile(self, data_dir):
109 """Generates an INTFILE. 110 111 Parameter: 112 - data_dir - data directory 113 """ 114 inf = open(data_dir + os.sep + 'INTFILE', 'w') 115 for i in range(98): 116 inf.write(str(randint(-2 ** 31 + 1, 2 ** 31 - 1)) + '\n') 117 inf.write('8\n') 118 inf.close()
119
120 - def run_fdist(self, npops, nsamples, fst, sample_size, 121 mut=0, num_sims=50000, data_dir='.', 122 is_dominant=False, theta=0.06, beta=(0.25, 0.25), 123 max_freq=0.99):
124 """Executes (d)fdist. 125 126 Parameters: 127 128 - npops - Number of populations 129 - nsamples - Number of populations sampled 130 - fst - expected Fst 131 - sample_size - Sample size per population 132 For dfdist: if zero a sample size file has to be provided 133 - mut - 1=Stepwise, 0=Infinite allele 134 - num_sims - number of simulations 135 - data_dir - Where the data is found 136 - is_dominant - If true executes dfdist 137 - theta - Theta (=2Nmu) 138 - beta - Parameters for the beta prior 139 - max_freq - Maximum allowed frequency of the commonest allele 140 141 Returns: 142 143 - fst - Average Fst 144 145 Important Note: This can take quite a while to run! 146 """ 147 if fst >= 0.9: 148 # Lets not joke 149 fst = 0.899 150 if fst <= 0.0: 151 # 0 will make fdist run forever 152 fst = 0.001 153 if is_dominant: 154 config_name = "Dfdist_params" 155 else: 156 config_name = "fdist_params2.dat" 157 158 f = open(data_dir + os.sep + config_name, 'w') 159 f.write(str(npops) + '\n') 160 f.write(str(nsamples) + '\n') 161 f.write(str(fst) + '\n') 162 f.write(str(sample_size) + '\n') 163 if is_dominant: 164 f.write(str(theta) + '\n') 165 else: 166 f.write(str(mut) + '\n') 167 f.write(str(num_sims) + '\n') 168 if is_dominant: 169 f.write("%f %f\n" % beta) 170 f.write("%f\n" % max_freq) 171 f.close() 172 173 self._generate_intfile(data_dir) 174 175 if is_dominant: 176 bin_name = "Dfdist" 177 else: 178 bin_name = "fdist2" 179 proc = subprocess.Popen([self._get_path(bin_name)], cwd=data_dir, 180 universal_newlines=True, 181 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 182 shell=(sys.platform != "win32")) 183 out, err = proc.communicate('y\n\n') 184 lines = out.split("\n") 185 for line in lines: 186 if line.startswith('average Fst'): 187 fst = my_float(line.rstrip().split(' ')[-1]) 188 return fst
189
190 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size, 191 mut=0, num_sims=50000, data_dir='.', 192 try_runs=5000, limit=0.001, is_dominant=False, 193 theta=0.06, beta=(0.25, 0.25), 194 max_freq=0.99):
195 """Executes fdist trying to force Fst. 196 197 Parameters: 198 199 - try_runs - Number of simulations on the part trying to get 200 Fst correct 201 - limit - Interval limit 202 203 Other parameters can be seen on run_fdist. 204 """ 205 max_run_fst = 1 206 min_run_fst = 0 207 current_run_fst = fst 208 while True: 209 real_fst = self.run_fdist(npops, nsamples, current_run_fst, 210 sample_size, mut, try_runs, data_dir, 211 is_dominant, theta, beta, max_freq) 212 if abs(real_fst - fst) < limit: 213 return self.run_fdist(npops, nsamples, current_run_fst, 214 sample_size, mut, num_sims, data_dir, 215 is_dominant, theta, beta, max_freq) 216 if real_fst > fst: 217 max_run_fst = current_run_fst 218 if current_run_fst < min_run_fst + limit: 219 # we can do no better 220 # debug('Lower limit is ' + str(min_run_fst)) 221 return self.run_fdist(npops, nsamples, current_run_fst, 222 sample_size, mut, num_sims, 223 data_dir) 224 current_run_fst = (min_run_fst + current_run_fst) / 2 225 else: 226 min_run_fst = current_run_fst 227 if current_run_fst > max_run_fst - limit: 228 return self.run_fdist(npops, nsamples, current_run_fst, 229 sample_size, mut, num_sims, 230 data_dir, is_dominant, theta, 231 beta, max_freq) 232 current_run_fst = (max_run_fst + current_run_fst) / 2
233
234 - def run_cplot(self, ci=0.95, data_dir='.', version=1, smooth=0.04):
235 """Executes cplot. 236 237 ci - Confidence interval. 238 data_dir - Where the data is found. 239 """ 240 self._generate_intfile(data_dir) 241 if version == 1: 242 cplot_name = "cplot" 243 else: 244 cplot_name = "cplot2" 245 proc = subprocess.Popen([self._get_path(cplot_name)], cwd=data_dir, 246 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 247 shell=(sys.platform != "win32"), 248 universal_newlines=True) 249 if version == 1: 250 proc.communicate('out.dat out.cpl\n' + str(ci) + '\n') 251 else: 252 proc.communicate("\n".join([ 253 "data_fst_outfile out.cpl out.dat", 254 str(ci), str(smooth)])) 255 256 f = open(data_dir + os.sep + 'out.cpl') 257 conf_lines = [] 258 l = f.readline() 259 try: 260 while l != '': 261 conf_lines.append( 262 tuple(my_float(x) for x in l.rstrip().split(' '))) 263 l = f.readline() 264 except ValueError: 265 f.close() 266 return [] 267 f.close() 268 return conf_lines
269
270 - def run_pv(self, out_file='probs.dat', data_dir='.', 271 version=1, smooth=0.04):
272 """Executes pv. 273 274 out_file - Name of output file. 275 data_dir - Where the data is found. 276 """ 277 self._generate_intfile(data_dir) 278 279 if version == 1: 280 pv_name = "pv" 281 else: 282 pv_name = "pv2" 283 284 proc = subprocess.Popen([self._get_path(pv_name)], cwd=data_dir, 285 shell=(sys.platform != "win32"), 286 stdin=subprocess.PIPE, 287 stdout=subprocess.PIPE, 288 universal_newlines=True) 289 proc.communicate('data_fst_outfile ' + out_file + 290 ' out.dat\n' + str(smooth) + '\n') 291 pvf = open(data_dir + os.sep + out_file, 'r') 292 result = [tuple(my_float(y) for y in x.rstrip().split(' ')) for x in pvf.readlines()] 293 pvf.close() 294 return result
295