Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Module to control GenePop.""" 
  7   
  8  import os 
  9  import re 
 10  import shutil 
 11  import sys 
 12  import tempfile 
 13   
 14  from Bio.Application import AbstractCommandline, _Argument 
 15   
 16   
17 -def _gp_float(tok):
18 """Gets a float from a token, if it fails, returns the string (PRIVATE).""" 19 try: 20 return float(tok) 21 except ValueError: 22 return str(tok)
23 24
25 -def _gp_int(tok):
26 """Gets a int from a token, if it fails, returns the string (PRIVATE).""" 27 try: 28 return int(tok) 29 except ValueError: 30 return str(tok)
31 32
33 -def _read_allele_freq_table(f):
34 l = f.readline() 35 while ' --' not in l: 36 if l == "": 37 raise StopIteration 38 if 'No data' in l: 39 return None, None 40 l = f.readline() 41 alleles = [x for x in f.readline().rstrip().split(" ") if x != ''] 42 alleles = [_gp_int(x) for x in alleles] 43 l = f.readline().rstrip() 44 table = [] 45 while l != "": 46 line = [x for x in l.split(" ") if x != ''] 47 try: 48 table.append( 49 (line[0], [_gp_float(x) for x in line[1: -1]], 50 _gp_int(line[-1]))) 51 except ValueError: 52 table.append( 53 (line[0], [None] * len(alleles), 0)) 54 l = f.readline().rstrip() 55 return alleles, table
56 57
58 -def _read_table(f, funs):
59 table = [] 60 l = f.readline().rstrip() 61 while '---' not in l: 62 l = f.readline().rstrip() 63 l = f.readline().rstrip() 64 while '===' not in l and '---' not in l and l != "": 65 toks = [x for x in l.split(" ") if x != ""] 66 line = [] 67 for i in range(len(toks)): 68 try: 69 line.append(funs[i](toks[i])) 70 except ValueError: 71 line.append(toks[i]) # Could not cast 72 table.append(tuple(line)) 73 l = f.readline().rstrip() 74 return table
75 76
77 -def _read_triangle_matrix(f):
78 matrix = [] 79 l = f.readline().rstrip() 80 while l != "": 81 matrix.append( 82 [_gp_float(x) for x in [y for y in l.split(" ") if y != ""]]) 83 l = f.readline().rstrip() 84 return matrix
85 86
87 -def _read_headed_triangle_matrix(f):
88 matrix = {} 89 header = f.readline().rstrip() 90 if '---' in header or '===' in header: 91 header = f.readline().rstrip() 92 nlines = len([x for x in header.split(' ') if x != '']) - 1 93 for line_pop in range(nlines): 94 l = f.readline().rstrip() 95 vals = [x for x in l.split(' ')[1:] if x != ''] 96 clean_vals = [] 97 for val in vals: 98 try: 99 clean_vals.append(_gp_float(val)) 100 except ValueError: 101 clean_vals.append(None) 102 for col_pop in range(len(clean_vals)): 103 matrix[(line_pop + 1, col_pop)] = clean_vals[col_pop] 104 return matrix
105 106
107 -def _hw_func(stream, is_locus, has_fisher=False):
108 l = stream.readline() 109 if is_locus: 110 hook = "Locus " 111 else: 112 hook = " Pop : " 113 while l != "": 114 if l.startswith(hook): 115 stream.readline() 116 stream.readline() 117 stream.readline() 118 table = _read_table(stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str]) 119 # loci might mean pop if hook="Locus " 120 loci = {} 121 for entry in table: 122 if len(entry) < 4: 123 loci[entry[0]] = None 124 else: 125 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 126 if se == "-": 127 se = None 128 loci[locus] = p, se, fis_wc, fis_rh, steps 129 return loci 130 l = stream.readline() 131 # self.done = True 132 raise StopIteration
133 134
135 -class _FileIterator(object):
136 """Iterator which crawls over a stream of lines with a function (PRIVATE). 137 138 The generator function is expected to yield a tuple, while 139 consuming input 140 """ 141
142 - def __init__(self, func, fname, handle=None):
143 self.func = func 144 if handle is None: 145 self.stream = open(fname) 146 else: 147 # For special cases where calling code wants to 148 # seek into the file before starting: 149 self.stream = handle 150 self.fname = fname 151 self.done = False
152
153 - def __iter__(self):
154 if self.done: 155 self.done = True 156 raise StopIteration 157 return self
158
159 - def __next__(self):
160 return self.func(self)
161 162 if sys.version_info[0] < 3:
163 - def next(self):
164 """Python 2 style alias for Python 3 style __next__ method.""" 165 return self.__next__()
166
167 - def __del__(self):
168 self.stream.close() 169 try: 170 os.remove(self.fname) 171 except OSError: 172 # Jython seems to call the iterator twice 173 pass
174 175
176 -class _GenePopCommandline(AbstractCommandline):
177 """Command Line Wrapper for GenePop (PRIVATE).""" 178
179 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
180 self.parameters = [ 181 _Argument(["command"], "GenePop option to be called", 182 is_required=True), 183 _Argument(["mode"], "Should allways be batch", is_required=True), 184 _Argument(["input"], "Input file", is_required=True), 185 _Argument(["Dememorization"], "Dememorization step"), 186 _Argument(["BatchNumber"], "Number of MCMC batches"), 187 _Argument(["BatchLength"], "Length of MCMC chains"), 188 _Argument(["HWtests"], "Enumeration or MCMC"), 189 _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"), 190 _Argument(["MinimalDistance"], "Minimal IBD distance"), 191 _Argument(["GeographicScale"], "Log or Linear"), 192 ] 193 AbstractCommandline.__init__(self, cmd, **kwargs) 194 self.set_parameter("mode", "Mode=Batch")
195
196 - def set_menu(self, option_list):
197 """Sets the menu option. 198 199 Example set_menu([6,1]) = get all F statistics (menu 6.1) 200 """ 201 self.set_parameter("command", "MenuOptions=" + 202 ".".join(str(x) for x in option_list))
203
204 - def set_input(self, fname):
205 """Sets the input file name.""" 206 self.set_parameter("input", "InputFile=" + fname)
207 208
209 -class GenePopController(object):
210 - def __init__(self, genepop_dir=None):
211 """Initializes the controller. 212 213 genepop_dir is the directory where GenePop is. 214 215 The binary should be called Genepop (capital G) 216 """ 217 self.controller = _GenePopCommandline(genepop_dir)
218
219 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
220 opts = {} 221 opts["Dememorization"] = dememorization 222 opts["BatchNumber"] = batches 223 opts["BatchLength"] = iterations 224 if enum_test is not None: 225 if enum_test is True: 226 opts["HWtests"] = "Enumeration" 227 else: 228 opts["HWtests"] = "MCMC" 229 return opts
230
231 - def _run_genepop(self, extensions, option, fname, opts=None):
232 if opts is None: 233 opts = {} 234 cwd = os.getcwd() 235 temp_dir = tempfile.mkdtemp() 236 os.chdir(temp_dir) 237 self.controller.set_menu(option) 238 if os.path.isabs(fname): 239 self.controller.set_input(fname) 240 else: 241 self.controller.set_input(cwd + os.sep + fname) 242 for opt in opts: 243 self.controller.set_parameter(opt, opt + "=" + str(opts[opt])) 244 self.controller() # checks error level is zero 245 os.chdir(cwd) 246 shutil.rmtree(temp_dir) 247 return
248
249 - def _test_pop_hz_both(self, fname, type, ext, enum_test=True, 250 dememorization=10000, batches=20, 251 iterations=5000):
252 """Hardy-Weinberg test for heterozygote deficiency/excess. 253 254 Returns a population iterator containing a dictionary where 255 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 256 257 Some loci have a None if the info is not available. 258 SE might be none (for enumerations). 259 """ 260 opts = self._get_opts(dememorization, batches, iterations, enum_test) 261 self._run_genepop([ext], [1, type], fname, opts) 262 263 def hw_func(self): 264 return _hw_func(self.stream, False)
265 266 return _FileIterator(hw_func, fname + ext)
267
268 - def _test_global_hz_both(self, fname, type, ext, enum_test=True, 269 dememorization=10000, batches=20, 270 iterations=5000):
271 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 272 273 Returns a triple with: 274 - A list per population containing (pop_name, P-val, SE, switches). 275 Some pops have a None if the info is not available. 276 SE might be none (for enumerations). 277 - A list per loci containing (locus_name, P-val, SE, switches). 278 Some loci have a None if the info is not available. 279 SE might be none (for enumerations). 280 - Overall results (P-val, SE, switches). 281 282 """ 283 opts = self._get_opts(dememorization, batches, iterations, enum_test) 284 self._run_genepop([ext], [1, type], fname, opts) 285 286 def hw_pop_func(self): 287 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
288 289 with open(fname + ext) as f1: 290 l = f1.readline() 291 while "by population" not in l: 292 l = f1.readline() 293 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 294 with open(fname + ext) as f2: 295 l = f2.readline() 296 while "by locus" not in l: 297 l = f2.readline() 298 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 299 with open(fname + ext) as f: 300 l = f.readline() 301 while "all locus" not in l: 302 l = f.readline() 303 f.readline() 304 f.readline() 305 f.readline() 306 f.readline() 307 l = f.readline().rstrip() 308 p, se, switches = tuple(_gp_float(x) for x in [y for y in l.split(" ") if y != ""]) 309 return pop_p, loc_p, (p, se, switches) 310 311 # 1.1
312 - def test_pop_hz_deficiency(self, fname, enum_test=True, 313 dememorization=10000, batches=20, 314 iterations=5000):
315 """Hardy-Weinberg test for heterozygote deficiency. 316 317 Returns a population iterator containing a dictionary wehre 318 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 319 320 Some loci have a None if the info is not available. 321 SE might be none (for enumerations). 322 """ 323 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 324 dememorization, batches, iterations)
325 326 # 1.2
327 - def test_pop_hz_excess(self, fname, enum_test=True, 328 dememorization=10000, batches=20, 329 iterations=5000):
330 """Hardy-Weinberg test for heterozygote deficiency. 331 332 Returns a population iterator containing a dictionary where 333 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 334 335 Some loci have a None if the info is not available. 336 SE might be none (for enumerations). 337 """ 338 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 339 dememorization, batches, iterations)
340 341 # 1.3 P file
342 - def test_pop_hz_prob(self, fname, ext, enum_test=False, 343 dememorization=10000, batches=20, 344 iterations=5000):
345 """Hardy-Weinberg test based on probability. 346 347 Returns 2 iterators and a final tuple: 348 349 1. Returns a loci iterator containing: 350 - A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps). 351 Some pops have a None if the info is not available. 352 SE might be none (for enumerations). 353 - Result of Fisher's test (Chi2, deg freedom, prob). 354 2. Returns a population iterator containing: 355 - A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 356 Some loci have a None if the info is not available. 357 SE might be none (for enumerations). 358 - Result of Fisher's test (Chi2, deg freedom, prob). 359 3. Final tuple (Chi2, deg freedom, prob). 360 361 """ 362 opts = self._get_opts(dememorization, batches, iterations, enum_test) 363 self._run_genepop([ext], [1, 3], fname, opts) 364 365 def hw_prob_loci_func(self): 366 return _hw_func(self.stream, True, True)
367 368 def hw_prob_pop_func(self): 369 return _hw_func(self.stream, False, True) 370 371 shutil.copyfile(fname + ".P", fname + ".P2") 372 373 return _FileIterator(hw_prob_loci_func, fname + ".P"), _FileIterator(hw_prob_pop_func, fname + ".P2") 374 375 # 1.4
376 - def test_global_hz_deficiency(self, fname, enum_test=True, 377 dememorization=10000, batches=20, 378 iterations=5000):
379 """Global Hardy-Weinberg test for heterozygote deficiency. 380 381 Returns a triple with: 382 - An list per population containing (pop_name, P-val, SE, switches). 383 Some pops have a None if the info is not available. 384 SE might be none (for enumerations). 385 - An list per loci containing (locus_name, P-val, SE, switches). 386 Some loci have a None if the info is not available. 387 SE might be none (for enumerations). 388 - Overall results (P-val, SE, switches). 389 390 """ 391 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 392 dememorization, batches, iterations)
393 394 # 1.5
395 - def test_global_hz_excess(self, fname, enum_test=True, 396 dememorization=10000, batches=20, 397 iterations=5000):
398 """Global Hardy-Weinberg test for heterozygote excess. 399 400 Returns a triple with: 401 - A list per population containing (pop_name, P-val, SE, switches). 402 Some pops have a None if the info is not available. 403 SE might be none (for enumerations). 404 - A list per loci containing (locus_name, P-val, SE, switches). 405 Some loci have a None if the info is not available. 406 SE might be none (for enumerations). 407 - Overall results (P-val, SE, switches) 408 409 """ 410 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 411 dememorization, batches, iterations)
412 413 # 2.1
414 - def test_ld(self, fname, dememorization=10000, 415 batches=20, iterations=5000):
416 opts = self._get_opts(dememorization, batches, iterations) 417 self._run_genepop([".DIS"], [2, 1], fname, opts) 418 419 def ld_pop_func(self): 420 current_pop = None 421 l = self.stream.readline().rstrip() 422 if l == "": 423 self.done = True 424 raise StopIteration 425 toks = [x for x in l.split(" ") if x != ""] 426 pop, locus1, locus2 = toks[0], toks[1], toks[2] 427 if not hasattr(self, "start_locus1"): 428 start_locus1, start_locus2 = locus1, locus2 429 current_pop = -1 430 if locus1 == start_locus1 and locus2 == start_locus2: 431 current_pop += 1 432 if toks[3] == "No": 433 return current_pop, pop, (locus1, locus2), None 434 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 435 return current_pop, pop, (locus1, locus2), (p, se, switches)
436 437 def ld_func(self): 438 l = self.stream.readline().rstrip() 439 if l == "": 440 self.done = True 441 raise StopIteration 442 toks = [x for x in l.split(" ") if x != ""] 443 locus1, locus2 = toks[0], toks[2] 444 try: 445 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 446 except ValueError: 447 return (locus1, locus2), None 448 return (locus1, locus2), (chi2, df, p) 449 450 f1 = open(fname + ".DIS") 451 l = f1.readline() 452 while "----" not in l: 453 l = f1.readline() 454 shutil.copyfile(fname + ".DIS", fname + ".DI2") 455 f2 = open(fname + ".DI2") 456 l = f2.readline() 457 while "Locus pair" not in l: 458 l = f2.readline() 459 while "----" not in l: 460 l = f2.readline() 461 return (_FileIterator(ld_pop_func, fname + ".DIS", f1), 462 _FileIterator(ld_func, fname + ".DI2", f2)) 463 464 # 2.2
465 - def create_contingency_tables(self, fname):
466 raise NotImplementedError
467 468 # 3.1 PR/GE files
469 - def test_genic_diff_all(self, fname, dememorization=10000, 470 batches=20, iterations=5000):
471 raise NotImplementedError
472 473 # 3.2 PR2/GE2 files
474 - def test_genic_diff_pair(self, fname, dememorization=10000, 475 batches=20, iterations=5000):
476 raise NotImplementedError
477 478 # 3.3 G files
479 - def test_genotypic_diff_all(self, fname, dememorization=10000, 480 batches=20, iterations=5000):
481 raise NotImplementedError
482 483 # 3.4 2G2 files
484 - def test_genotypic_diff_pair(self, fname, dememorization=10000, 485 batches=20, iterations=5000):
486 raise NotImplementedError
487 488 # 4
489 - def estimate_nm(self, fname):
490 self._run_genepop(["PRI"], [4], fname) 491 with open(fname + ".PRI") as f: 492 lines = f.readlines() # Small file, it is ok 493 for line in lines: 494 m = re.search("Mean sample size: ([.0-9]+)", line) 495 if m is not None: 496 mean_sample_size = _gp_float(m.group(1)) 497 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 498 if m is not None: 499 mean_priv_alleles = _gp_float(m.group(1)) 500 m = re.search("N=10: ([.0-9]+)", line) 501 if m is not None: 502 mig10 = _gp_float(m.group(1)) 503 m = re.search("N=25: ([.0-9]+)", line) 504 if m is not None: 505 mig25 = _gp_float(m.group(1)) 506 m = re.search("N=50: ([.0-9]+)", line) 507 if m is not None: 508 mig50 = _gp_float(m.group(1)) 509 m = re.search("for size= ([.0-9]+)", line) 510 if m is not None: 511 mig_corrected = _gp_float(m.group(1)) 512 os.remove(fname + ".PRI") 513 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
514 515 # 5.1
516 - def calc_allele_genotype_freqs(self, fname):
517 """Calculates allele and genotype frequencies per locus and per sample. 518 519 Parameters: 520 - fname - file name 521 522 Returns tuple with 2 elements: 523 - Population iterator with 524 525 - population name 526 - Locus dictionary with key = locus name and content tuple as 527 Genotype List with 528 (Allele1, Allele2, observed, expected) 529 (expected homozygotes, observed hm, 530 expected heterozygotes, observed ht) 531 Allele frequency/Fis dictionary with allele as key and 532 (count, frequency, Fis Weir & Cockerham) 533 - Totals as a pair 534 - count 535 - Fis Weir & Cockerham, 536 - Fis Robertson & Hill 537 538 - Locus iterator with 539 540 - Locus name 541 - allele list 542 - Population list with a triple 543 544 - population name 545 - list of allele frequencies in the same order as allele list above 546 - number of genes 547 548 Will create a file called fname.INF 549 550 """ 551 self._run_genepop(["INF"], [5, 1], fname) 552 # First pass, general information 553 # num_loci = None 554 # num_pops = None 555 # with open(fname + ".INF") as f: 556 # l = f.readline() 557 # while (num_loci is None or num_pops is None) and l != '': 558 # m = re.search("Number of populations detected : ([0-9+])", l) 559 # if m is not None: 560 # num_pops = _gp_int(m.group(1)) 561 # m = re.search("Number of loci detected : ([0-9+])", l) 562 # if m is not None: 563 # num_loci = _gp_int(m.group(1)) 564 # l = f.readline() 565 566 def pop_parser(self): 567 if hasattr(self, "old_line"): 568 l = self.old_line 569 del self.old_line 570 else: 571 l = self.stream.readline() 572 loci_content = {} 573 while l != '': 574 l = l.rstrip() 575 if "Tables of allelic frequencies for each locus" in l: 576 return self.curr_pop, loci_content 577 match = re.match(".*Pop: (.+) Locus: (.+)", l) 578 if match is not None: 579 pop = match.group(1).rstrip() 580 locus = match.group(2) 581 if not hasattr(self, "first_locus"): 582 self.first_locus = locus 583 if hasattr(self, "curr_pop"): 584 if self.first_locus == locus: 585 old_pop = self.curr_pop 586 # self.curr_pop = pop 587 self.old_line = l 588 del self.first_locus 589 del self.curr_pop 590 return old_pop, loci_content 591 self.curr_pop = pop 592 else: 593 l = self.stream.readline() 594 continue 595 geno_list = [] 596 l = self.stream.readline() 597 if "No data" in l: 598 continue 599 600 while "Genotypes Obs." not in l: 601 l = self.stream.readline() 602 603 while l != "\n": 604 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", l) 605 if m2 is not None: 606 geno_list.append((_gp_int(m2.group(1)), 607 _gp_int(m2.group(2)), 608 _gp_int(m2.group(3)), 609 _gp_float(m2.group(4)))) 610 else: 611 l = self.stream.readline() 612 continue 613 l = self.stream.readline() 614 615 while "Expected number of ho" not in l: 616 l = self.stream.readline() 617 expHo = _gp_float(l[38:]) 618 l = self.stream.readline() 619 obsHo = _gp_int(l[38:]) 620 l = self.stream.readline() 621 expHe = _gp_float(l[38:]) 622 l = self.stream.readline() 623 obsHe = _gp_int(l[38:]) 624 l = self.stream.readline() 625 626 while "Sample count" not in l: 627 l = self.stream.readline() 628 l = self.stream.readline() 629 freq_fis = {} 630 overall_fis = None 631 while "----" not in l: 632 vals = [x for x in l.rstrip().split(' ') if x != ''] 633 if vals[0] == "Tot": 634 overall_fis = (_gp_int(vals[1]), 635 _gp_float(vals[2]), 636 _gp_float(vals[3])) 637 else: 638 freq_fis[_gp_int(vals[0])] = (_gp_int(vals[1]), 639 _gp_float(vals[2]), 640 _gp_float(vals[3])) 641 l = self.stream.readline() 642 loci_content[locus] = (geno_list, 643 (expHo, obsHo, expHe, obsHe), 644 freq_fis, overall_fis) 645 self.done = True 646 raise StopIteration
647 648 def locus_parser(self): 649 l = self.stream.readline() 650 while l != "": 651 l = l.rstrip() 652 match = re.match(" Locus: (.+)", l) 653 if match is not None: 654 locus = match.group(1) 655 alleles, table = _read_allele_freq_table(self.stream) 656 return locus, alleles, table 657 l = self.stream.readline() 658 self.done = True 659 raise StopIteration 660 661 shutil.copyfile(fname + ".INF", fname + ".IN2") 662 pop_iter = _FileIterator(pop_parser, fname + ".INF") 663 locus_iter = _FileIterator(locus_parser, fname + ".IN2") 664 return (pop_iter, locus_iter) 665
666 - def _calc_diversities_fis(self, fname, ext):
667 self._run_genepop([ext], [5, 2], fname) 668 with open(fname + ext) as f: 669 l = f.readline() 670 while l != "": 671 l = l.rstrip() 672 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 673 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 674 avg_Qintra = _read_table(f, [str, _gp_float]) 675 l = f.readline() 676 677 def fis_func(self): 678 l = self.stream.readline() 679 while l != "": 680 l = l.rstrip() 681 m = re.search("Locus: (.+)", l) 682 if m is not None: 683 locus = m.group(1) 684 self.stream.readline() 685 if "No complete" in self.stream.readline(): 686 return locus, None 687 self.stream.readline() 688 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 689 self.stream.readline() 690 avg_qinter, avg_fis = tuple(_gp_float(x) for x in 691 [y for y in self.stream.readline().split(" ") if y != ""]) 692 return locus, fis_table, avg_qinter, avg_fis 693 l = self.stream.readline() 694 self.done = True 695 raise StopIteration
696 697 return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra 698 699 # 5.2
700 - def calc_diversities_fis_with_identity(self, fname):
701 return self._calc_diversities_fis(fname, ".DIV")
702 703 # 5.3
704 - def calc_diversities_fis_with_size(self, fname):
705 raise NotImplementedError
706 707 # 6.1 Less genotype frequencies
708 - def calc_fst_all(self, fname):
709 """Executes GenePop and gets Fst/Fis/Fit (all populations) 710 711 Parameters: 712 - fname - file name 713 714 Returns: 715 - (multiLocusFis, multiLocusFst, multiLocus Fit), 716 - Iterator of tuples 717 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 718 719 Will create a file called `fname.FST`. 720 721 This does not return the genotype frequencies. 722 723 """ 724 self._run_genepop([".FST"], [6, 1], fname) 725 with open(fname + ".FST") as f: 726 l = f.readline() 727 while l != '': 728 if l.startswith(' All:'): 729 toks = [x for x in l.rstrip().split(' ') if x != ""] 730 try: 731 allFis = _gp_float(toks[1]) 732 except ValueError: 733 allFis = None 734 try: 735 allFst = _gp_float(toks[2]) 736 except ValueError: 737 allFst = None 738 try: 739 allFit = _gp_float(toks[3]) 740 except ValueError: 741 allFit = None 742 l = f.readline() 743 744 def proc(self): 745 if hasattr(self, "last_line"): 746 l = self.last_line 747 del self.last_line 748 else: 749 l = self.stream.readline() 750 locus = None 751 fis = None 752 fst = None 753 fit = None 754 qintra = None 755 qinter = None 756 while l != '': 757 l = l.rstrip() 758 if l.startswith(' Locus:'): 759 if locus is not None: 760 self.last_line = l 761 return locus, fis, fst, fit, qintra, qinter 762 else: 763 locus = l.split(':')[1].lstrip() 764 elif l.startswith('Fis^='): 765 fis = _gp_float(l.split(' ')[1]) 766 elif l.startswith('Fst^='): 767 fst = _gp_float(l.split(' ')[1]) 768 elif l.startswith('Fit^='): 769 fit = _gp_float(l.split(' ')[1]) 770 elif l.startswith('1-Qintra^='): 771 qintra = _gp_float(l.split(' ')[1]) 772 elif l.startswith('1-Qinter^='): 773 qinter = _gp_float(l.split(' ')[1]) 774 return locus, fis, fst, fit, qintra, qinter 775 l = self.stream.readline() 776 if locus is not None: 777 return locus, fis, fst, fit, qintra, qinter 778 self.stream.close() 779 self.done = True 780 raise StopIteration
781 return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST") 782 783 # 6.2
784 - def calc_fst_pair(self, fname):
785 self._run_genepop([".ST2", ".MIG"], [6, 2], fname) 786 with open(fname + ".ST2") as f: 787 l = f.readline() 788 while l != "": 789 l = l.rstrip() 790 if l.startswith("Estimates for all loci"): 791 avg_fst = _read_headed_triangle_matrix(f) 792 l = f.readline() 793 794 def loci_func(self): 795 l = self.stream.readline() 796 while l != "": 797 l = l.rstrip() 798 m = re.search(" Locus: (.+)", l) 799 if m is not None: 800 locus = m.group(1) 801 matrix = _read_headed_triangle_matrix(self.stream) 802 return locus, matrix 803 l = self.stream.readline() 804 self.done = True 805 raise StopIteration
806 807 os.remove(fname + ".MIG") 808 return _FileIterator(loci_func, fname + ".ST2"), avg_fst 809 810 # 6.3
811 - def calc_rho_all(self, fname):
812 raise NotImplementedError
813 814 # 6.4
815 - def calc_rho_pair(self, fname):
816 raise NotImplementedError
817
818 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
819 """Calculates isolation by distance statistics.""" 820 self._run_genepop([".GRA", ".MIG", ".ISO"], [6, sub], 821 fname, opts={ 822 "MinimalDistance": min_dist, 823 "GeographicScale": scale, 824 "IsolBDstatistic": stat}) 825 with open(fname + ".ISO") as f: 826 f.readline() 827 f.readline() 828 f.readline() 829 f.readline() 830 estimate = _read_triangle_matrix(f) 831 f.readline() 832 f.readline() 833 distance = _read_triangle_matrix(f) 834 f.readline() 835 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 836 a = _gp_float(match.group(1)) 837 b = _gp_float(match.group(2)) 838 f.readline() 839 f.readline() 840 match = re.match(" b=(.+)", f.readline().rstrip()) 841 bb = _gp_float(match.group(1)) 842 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 843 bblow = _gp_float(match.group(1)) 844 bbhigh = _gp_float(match.group(2)) 845 os.remove(fname + ".MIG") 846 os.remove(fname + ".GRA") 847 os.remove(fname + ".ISO") 848 return estimate, distance, (a, b), (bb, bblow, bbhigh)
849 850 # 6.5
851 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
852 """Calculates isolation by distance statistics for diploid data. 853 854 See _calc_ibd for parameter details. 855 856 Note that each pop can only have a single individual and 857 the individual name has to be the sample coordinates. 858 """ 859 return self._calc_ibd(fname, 5, stat, scale, min_dist)
860 861 # 6.6
862 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
863 """Calculates isolation by distance statistics for haploid data. 864 865 See _calc_ibd for parameter details. 866 867 Note that each pop can only have a single individual and 868 the individual name has to be the sample coordinates. 869 """ 870 return self._calc_ibd(fname, 6, stat, scale, min_dist)
871