Package Bio :: Package PopGen :: Package FDist :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Controller

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """This module allows you to control fdist. 
  7   
  8  This will allow you to call fdist and associated programs (cplot, 
  9  datacal, pv) by Mark Beaumont. 
 10   
 11  http://www.rubic.rdg.ac.uk/~mab/software.html (old) 
 12  http://www.maths.bris.ac.uk/~mamab/ (new) 
 13  """ 
 14   
 15  import os 
 16  import subprocess 
 17  import sys 
 18  from random import randint 
 19  from time import strftime, clock 
 20  #from logging import debug 
 21   
 22   
23 -def my_float(f):
24 #Because of Jython, mostly 25 if f == "-nan": 26 f = "nan" 27 return float(f)
28 29
30 -class FDistController(object):
31 - def __init__(self, fdist_dir='', ext=None):
32 """Initializes the controller. 33 34 fdist_dir is the directory where fdist2 is. 35 ext is the extension of binaries (.exe on windows, 36 none on Unix) 37 38 """ 39 self.tmp_idx = 0 40 self.fdist_dir = fdist_dir 41 self.os_name = os.name 42 if sys.platform == 'win32': 43 py_ext = '.exe' 44 else: 45 py_ext = '' 46 if ext is None: 47 self.ext = py_ext 48 else: 49 self.ext = ext
50
51 - def _get_path(self, app):
52 """Returns the path to an fdist application. 53 54 Includes Path where fdist can be found plus executable extension. 55 """ 56 if self.fdist_dir == '': 57 return app + self.ext 58 else: 59 return os.sep.join([self.fdist_dir, app]) + self.ext
60
61 - def _get_temp_file(self):
62 """Gets a temporary file name. 63 64 Returns a temporary file name, if executing inside jython 65 tries to replace unexisting tempfile.mkstemp(). 66 """ 67 self.tmp_idx += 1 68 return strftime("%H%M%S") + str(int(clock() * 100)) + str(randint(0, 1000)) + str(self.tmp_idx)
69
70 - def run_datacal(self, data_dir='.', version=1, 71 crit_freq=0.99, p=0.5, beta=(0.25, 0.25)):
72 """Executes datacal. 73 74 data_dir - Where the data is found. 75 """ 76 if version == 1: 77 datacal_name = "datacal" 78 else: 79 datacal_name = "Ddatacal" 80 proc = subprocess.Popen([self._get_path(datacal_name)], 81 universal_newlines=True, 82 stdin=subprocess.PIPE, 83 shell=(sys.platform != "win32"), 84 stdout=subprocess.PIPE, cwd=data_dir) 85 if version == 1: 86 out, err = proc.communicate('a\n') 87 lines = out.split("\n") 88 fst_line = lines[0].rstrip().split(' ') 89 fst = my_float(fst_line[4]) 90 sample_line = lines[1].rstrip().split(' ') 91 sample = int(sample_line[9]) 92 else: 93 out, err = proc.communicate('%f\n%f\n%f %f\na\n' % ( 94 crit_freq, p, beta[0], beta[1])) 95 lines = out.split("\n") 96 l = lines[0].rstrip().split(" ") 97 loci, pops = int(l[-5]), int(l[-2]) 98 fst_line = lines[1].rstrip().split(' ') 99 fst = my_float(fst_line[4]) 100 sample_line = lines[2].rstrip().split(' ') 101 sample = int(sample_line[9]) 102 F_line = lines[3].rstrip().split(' ') 103 F, obs = my_float(F_line[5]), int(F_line[8]) 104 if version == 1: 105 return fst, sample 106 else: 107 return fst, sample, loci, pops, F, obs
108
109 - def _generate_intfile(self, data_dir):
110 """Generates an INTFILE. 111 112 Parameter: 113 data_dir - data directory 114 """ 115 with open(data_dir + os.sep + 'INTFILE', 'w') as inf: 116 for i in range(98): 117 inf.write(str(randint(-sys.maxsize + 1, sys.maxsize - 1)) + '\n') 118 inf.write('8\n')
119
120 - def run_fdist(self, npops, nsamples, fst, sample_size, 121 mut=0, num_sims=50000, data_dir='.', 122 is_dominant=False, theta=0.06, beta=(0.25, 0.25), 123 max_freq=0.99):
124 """Executes (d)fdist. 125 126 Parameters: 127 npops - Number of populations 128 nsamples - Number of populations sampled 129 fst - expected Fst 130 sample_size - Sample size per population 131 For dfdist: if zero a sample size file has to be provided 132 mut - 1=Stepwise, 0=Infinite allele 133 num_sims - number of simulations 134 data_dir - Where the data is found 135 is_dominant - If true executes dfdist 136 theta - Theta (=2Nmu) 137 beta - Parameters for the beta prior 138 max_freq - Maximum allowed frequency of the commonest allele 139 140 Returns: 141 fst - Average Fst 142 143 Important Note: This can take quite a while to run! 144 """ 145 if fst >= 0.9: 146 #Lets not joke 147 fst = 0.899 148 if fst <= 0.0: 149 #0 will make fdist run forever 150 fst = 0.001 151 if is_dominant: 152 config_name = "Dfdist_params" 153 else: 154 config_name = "fdist_params2.dat" 155 156 with open(data_dir + os.sep + config_name, 'w') as f: 157 f.write(str(npops) + '\n') 158 f.write(str(nsamples) + '\n') 159 f.write(str(fst) + '\n') 160 f.write(str(sample_size) + '\n') 161 if is_dominant: 162 f.write(str(theta) + '\n') 163 else: 164 f.write(str(mut) + '\n') 165 f.write(str(num_sims) + '\n') 166 if is_dominant: 167 f.write("%f %f\n" % beta) 168 f.write("%f\n" % max_freq) 169 170 self._generate_intfile(data_dir) 171 172 if is_dominant: 173 bin_name = "Dfdist" 174 else: 175 bin_name = "fdist2" 176 proc = subprocess.Popen([self._get_path(bin_name)], cwd=data_dir, 177 universal_newlines=True, 178 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 179 shell=(sys.platform != "win32")) 180 out, err = proc.communicate('y\n\n') 181 lines = out.split("\n") 182 for line in lines: 183 if line.startswith('average Fst'): 184 fst = my_float(line.rstrip().split(' ')[-1]) 185 return fst
186
187 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size, 188 mut=0, num_sims=50000, data_dir='.', 189 try_runs=5000, limit=0.001, is_dominant=False, 190 theta=0.06, beta=(0.25, 0.25), 191 max_freq=0.99):
192 """Executes fdist trying to force Fst. 193 194 Parameters: 195 try_runs - Number of simulations on the part trying to get 196 Fst correct 197 limit - Interval limit 198 Other parameters can be seen on run_fdist. 199 """ 200 max_run_fst = 1 201 min_run_fst = 0 202 current_run_fst = fst 203 while True: 204 real_fst = self.run_fdist(npops, nsamples, current_run_fst, 205 sample_size, mut, try_runs, data_dir, 206 is_dominant, theta, beta, max_freq) 207 if abs(real_fst - fst) < limit: 208 return self.run_fdist(npops, nsamples, current_run_fst, 209 sample_size, mut, num_sims, data_dir, 210 is_dominant, theta, beta, max_freq) 211 if real_fst > fst: 212 max_run_fst = current_run_fst 213 if current_run_fst < min_run_fst + limit: 214 #we can do no better 215 #debug('Lower limit is ' + str(min_run_fst)) 216 return self.run_fdist(npops, nsamples, current_run_fst, 217 sample_size, mut, num_sims, 218 data_dir) 219 current_run_fst = (min_run_fst + current_run_fst) / 2 220 else: 221 min_run_fst = current_run_fst 222 if current_run_fst > max_run_fst - limit: 223 return self.run_fdist(npops, nsamples, current_run_fst, 224 sample_size, mut, num_sims, 225 data_dir, is_dominant, theta, 226 beta, max_freq) 227 current_run_fst = (max_run_fst + current_run_fst) / 2
228
229 - def run_cplot(self, ci=0.95, data_dir='.', version=1, smooth=0.04):
230 """Executes cplot. 231 232 ci - Confidence interval. 233 data_dir - Where the data is found. 234 """ 235 236 self._generate_intfile(data_dir) 237 if version == 1: 238 cplot_name = "cplot" 239 else: 240 cplot_name = "cplot2" 241 proc = subprocess.Popen([self._get_path(cplot_name)], cwd=data_dir, 242 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 243 shell=(sys.platform != "win32"), 244 universal_newlines=True) 245 if version == 1: 246 proc.communicate('out.dat out.cpl\n' + str(ci) + '\n') 247 else: 248 proc.communicate("\n".join([ 249 "data_fst_outfile out.cpl out.dat", 250 str(ci), str(smooth)])) 251 252 with open(data_dir + os.sep + 'out.cpl') as f: 253 conf_lines = [] 254 l = f.readline() 255 try: 256 while l != '': 257 conf_lines.append( 258 tuple(my_float(x) for x in l.rstrip().split(' '))) 259 l = f.readline() 260 except ValueError: 261 return [] 262 return conf_lines
263
264 - def run_pv(self, out_file='probs.dat', data_dir='.', 265 version=1, smooth=0.04):
266 """Executes pv. 267 268 out_file - Name of output file. 269 data_dir - Where the data is found. 270 """ 271 272 self._generate_intfile(data_dir) 273 274 if version == 1: 275 pv_name = "pv" 276 else: 277 pv_name = "pv2" 278 279 proc = subprocess.Popen([self._get_path(pv_name)], cwd=data_dir, 280 shell=(sys.platform != "win32"), 281 stdin=subprocess.PIPE, 282 stdout=subprocess.PIPE, 283 universal_newlines=True) 284 proc.communicate('data_fst_outfile ' + out_file + 285 ' out.dat\n' + str(smooth) + '\n') 286 with open(data_dir + os.sep + out_file, 'r') as pvf: 287 result = [tuple(my_float(y) for y in x.rstrip().split(' ')) for x in pvf.readlines()] 288 return result
289