Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module allows to control GenePop. 
  8   
  9  """ 
 10   
 11  import os 
 12  import re 
 13  import shutil 
 14  import sys 
 15  import tempfile 
 16   
 17  from Bio.Application import AbstractCommandline, _Argument 
 18   
 19  __docformat__ = "restructuredtext en" 
 20   
 21   
22 -def _gp_float(tok):
23 """Gets a float from a token, if it fails, returns the string (PRIVATE).""" 24 try: 25 return float(tok) 26 except ValueError: 27 return str(tok)
28 29
30 -def _gp_int(tok):
31 """Gets a int from a token, if it fails, returns the string (PRIVATE).""" 32 try: 33 return int(tok) 34 except ValueError: 35 return str(tok)
36 37
38 -def _read_allele_freq_table(f):
39 l = f.readline() 40 while ' --' not in l: 41 if l == "": 42 raise StopIteration 43 if 'No data' in l: 44 return None, None 45 l = f.readline() 46 alleles = [x for x in f.readline().rstrip().split(" ") if x != ''] 47 alleles = [_gp_int(x) for x in alleles] 48 l = f.readline().rstrip() 49 table = [] 50 while l != "": 51 line = [x for x in l.split(" ") if x != ''] 52 try: 53 table.append( 54 (line[0], [_gp_float(x) for x in line[1: -1]], 55 _gp_int(line[-1]))) 56 except ValueError: 57 table.append( 58 (line[0], [None] * len(alleles), 0)) 59 l = f.readline().rstrip() 60 return alleles, table
61 62
63 -def _read_table(f, funs):
64 table = [] 65 l = f.readline().rstrip() 66 while '---' not in l: 67 l = f.readline().rstrip() 68 l = f.readline().rstrip() 69 while '===' not in l and '---' not in l and l != "": 70 toks = [x for x in l.split(" ") if x != ""] 71 line = [] 72 for i in range(len(toks)): 73 try: 74 line.append(funs[i](toks[i])) 75 except ValueError: 76 line.append(toks[i]) # Could not cast 77 table.append(tuple(line)) 78 l = f.readline().rstrip() 79 return table
80 81
82 -def _read_triangle_matrix(f):
83 matrix = [] 84 l = f.readline().rstrip() 85 while l != "": 86 matrix.append( 87 [_gp_float(x) for x in [y for y in l.split(" ") if y != ""]]) 88 l = f.readline().rstrip() 89 return matrix
90 91
92 -def _read_headed_triangle_matrix(f):
93 matrix = {} 94 header = f.readline().rstrip() 95 if '---' in header or '===' in header: 96 header = f.readline().rstrip() 97 nlines = len([x for x in header.split(' ') if x != '']) - 1 98 for line_pop in range(nlines): 99 l = f.readline().rstrip() 100 vals = [x for x in l.split(' ')[1:] if x != ''] 101 clean_vals = [] 102 for val in vals: 103 try: 104 clean_vals.append(_gp_float(val)) 105 except ValueError: 106 clean_vals.append(None) 107 for col_pop in range(len(clean_vals)): 108 matrix[(line_pop + 1, col_pop)] = clean_vals[col_pop] 109 return matrix
110 111
112 -def _hw_func(stream, is_locus, has_fisher=False):
113 l = stream.readline() 114 if is_locus: 115 hook = "Locus " 116 else: 117 hook = " Pop : " 118 while l != "": 119 if l.startswith(hook): 120 stream.readline() 121 stream.readline() 122 stream.readline() 123 table = _read_table(stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str]) 124 # loci might mean pop if hook="Locus " 125 loci = {} 126 for entry in table: 127 if len(entry) < 3: 128 loci[entry[0]] = None 129 else: 130 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 131 if se == "-": 132 se = None 133 loci[locus] = p, se, fis_wc, fis_rh, steps 134 return loci 135 l = stream.readline() 136 # self.done = True 137 raise StopIteration
138 139
140 -class _FileIterator(object):
141 """Iterator which crawls over a stream of lines with a function (PRIVATE). 142 143 The generator function is expected to yield a tuple, while 144 consuming input 145 """
146 - def __init__(self, func, fname, handle=None):
147 self.func = func 148 if handle is None: 149 self.stream = open(fname) 150 else: 151 # For special cases where calling code wants to 152 # seek into the file before starting: 153 self.stream = handle 154 self.fname = fname 155 self.done = False
156
157 - def __iter__(self):
158 if self.done: 159 self.done = True 160 raise StopIteration 161 return self
162
163 - def __next__(self):
164 return self.func(self)
165 166 if sys.version_info[0] < 3:
167 - def next(self):
168 """Python 2 style alias for Python 3 style __next__ method.""" 169 return self.__next__()
170
171 - def __del__(self):
172 self.stream.close() 173 try: 174 os.remove(self.fname) 175 except OSError: 176 # Jython seems to call the iterator twice 177 pass
178 179
180 -class _GenePopCommandline(AbstractCommandline):
181 """Command Line Wrapper for GenePop (PRIVATE).""" 182
183 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
184 self.parameters = [ 185 _Argument(["command"], "GenePop option to be called", 186 is_required=True), 187 _Argument(["mode"], "Should allways be batch", is_required=True), 188 _Argument(["input"], "Input file", is_required=True), 189 _Argument(["Dememorization"], "Dememorization step"), 190 _Argument(["BatchNumber"], "Number of MCMC batches"), 191 _Argument(["BatchLength"], "Length of MCMC chains"), 192 _Argument(["HWtests"], "Enumeration or MCMC"), 193 _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"), 194 _Argument(["MinimalDistance"], "Minimal IBD distance"), 195 _Argument(["GeographicScale"], "Log or Linear"), 196 ] 197 AbstractCommandline.__init__(self, cmd, **kwargs) 198 self.set_parameter("mode", "Mode=Batch")
199
200 - def set_menu(self, option_list):
201 """Sets the menu option. 202 203 Example set_menu([6,1]) = get all F statistics (menu 6.1) 204 """ 205 self.set_parameter("command", "MenuOptions=" + 206 ".".join(str(x) for x in option_list))
207
208 - def set_input(self, fname):
209 """Sets the input file name.""" 210 self.set_parameter("input", "InputFile=" + fname)
211 212
213 -class GenePopController(object):
214 - def __init__(self, genepop_dir=None):
215 """Initializes the controller. 216 217 genepop_dir is the directory where GenePop is. 218 219 The binary should be called Genepop (capital G) 220 """ 221 self.controller = _GenePopCommandline(genepop_dir)
222
223 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
224 opts = {} 225 opts["Dememorization"] = dememorization 226 opts["BatchNumber"] = batches 227 opts["BatchLength"] = iterations 228 if enum_test is not None: 229 if enum_test is True: 230 opts["HWtests"] = "Enumeration" 231 else: 232 opts["HWtests"] = "MCMC" 233 return opts
234
235 - def _run_genepop(self, extensions, option, fname, opts={}):
236 cwd = os.getcwd() 237 temp_dir = tempfile.mkdtemp() 238 os.chdir(temp_dir) 239 self.controller.set_menu(option) 240 if os.path.isabs(fname): 241 self.controller.set_input(fname) 242 else: 243 self.controller.set_input(cwd + os.sep + fname) 244 for opt in opts: 245 self.controller.set_parameter(opt, opt + "=" + str(opts[opt])) 246 self.controller() # checks error level is zero 247 os.chdir(cwd) 248 shutil.rmtree(temp_dir) 249 return
250
251 - def _test_pop_hz_both(self, fname, type, ext, enum_test=True, 252 dememorization=10000, batches=20, 253 iterations=5000):
254 """Hardy-Weinberg test for heterozygote deficiency/excess. 255 256 Returns a population iterator containing a dictionary where 257 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 258 259 Some loci have a None if the info is not available. 260 SE might be none (for enumerations). 261 """ 262 opts = self._get_opts(dememorization, batches, iterations, enum_test) 263 self._run_genepop([ext], [1, type], fname, opts) 264 265 def hw_func(self): 266 return _hw_func(self.stream, False)
267 268 return _FileIterator(hw_func, fname + ext)
269
270 - def _test_global_hz_both(self, fname, type, ext, enum_test=True, 271 dememorization=10000, batches=20, 272 iterations=5000):
273 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 274 275 Returns a triple with: 276 277 - A list per population containing (pop_name, P-val, SE, switches). 278 Some pops have a None if the info is not available. 279 SE might be none (for enumerations). 280 - A list per loci containing (locus_name, P-val, SE, switches). 281 Some loci have a None if the info is not available. 282 SE might be none (for enumerations). 283 - Overall results (P-val, SE, switches). 284 """ 285 opts = self._get_opts(dememorization, batches, iterations, enum_test) 286 self._run_genepop([ext], [1, type], fname, opts) 287 288 def hw_pop_func(self): 289 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
290 291 with open(fname + ext) as f1: 292 l = f1.readline() 293 while "by population" not in l: 294 l = f1.readline() 295 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 296 with open(fname + ext) as f2: 297 l = f2.readline() 298 while "by locus" not in l: 299 l = f2.readline() 300 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 301 with open(fname + ext) as f: 302 l = f.readline() 303 while "all locus" not in l: 304 l = f.readline() 305 f.readline() 306 f.readline() 307 f.readline() 308 f.readline() 309 l = f.readline().rstrip() 310 p, se, switches = tuple(_gp_float(x) for x in [y for y in l.split(" ") if y != ""]) 311 return pop_p, loc_p, (p, se, switches) 312 313 # 1.1
314 - def test_pop_hz_deficiency(self, fname, enum_test=True, 315 dememorization=10000, batches=20, 316 iterations=5000):
317 """Hardy-Weinberg test for heterozygote deficiency. 318 319 Returns a population iterator containing a dictionary wehre 320 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 321 322 Some loci have a None if the info is not available. 323 SE might be none (for enumerations). 324 """ 325 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 326 dememorization, batches, iterations)
327 328 # 1.2
329 - def test_pop_hz_excess(self, fname, enum_test=True, 330 dememorization=10000, batches=20, 331 iterations=5000):
332 """Hardy-Weinberg test for heterozygote deficiency. 333 334 Returns a population iterator containing a dictionary where 335 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 336 337 Some loci have a None if the info is not available. 338 SE might be none (for enumerations). 339 """ 340 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 341 dememorization, batches, iterations)
342 343 # 1.3 P file
344 - def test_pop_hz_prob(self, fname, ext, enum_test=False, 345 dememorization=10000, batches=20, 346 iterations=5000):
347 """Hardy-Weinberg test based on probability. 348 349 Returns 2 iterators and a final tuple: 350 351 1. Returns a loci iterator containing: 352 - A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps). 353 Some pops have a None if the info is not available. 354 SE might be none (for enumerations). 355 - Result of Fisher's test (Chi2, deg freedom, prob). 356 2. Returns a population iterator containing: 357 - A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 358 Some loci have a None if the info is not available. 359 SE might be none (for enumerations). 360 - Result of Fisher's test (Chi2, deg freedom, prob). 361 3. Final tuple (Chi2, deg freedom, prob). 362 """ 363 opts = self._get_opts(dememorization, batches, iterations, enum_test) 364 self._run_genepop([ext], [1, 3], fname, opts) 365 366 def hw_prob_loci_func(self): 367 return _hw_func(self.stream, True, True)
368 369 def hw_prob_pop_func(self): 370 return _hw_func(self.stream, False, True) 371 372 shutil.copyfile(fname + ".P", fname + ".P2") 373 374 return _FileIterator(hw_prob_loci_func, fname + ".P"), _FileIterator(hw_prob_pop_func, fname + ".P2") 375 376 # 1.4
377 - def test_global_hz_deficiency(self, fname, enum_test=True, 378 dememorization=10000, batches=20, 379 iterations=5000):
380 """Global Hardy-Weinberg test for heterozygote deficiency. 381 382 Returns a triple with: 383 384 - An list per population containing (pop_name, P-val, SE, switches). 385 Some pops have a None if the info is not available. 386 SE might be none (for enumerations). 387 - An list per loci containing (locus_name, P-val, SE, switches). 388 Some loci have a None if the info is not available. 389 SE might be none (for enumerations). 390 - Overall results (P-val, SE, switches). 391 """ 392 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 393 dememorization, batches, iterations)
394 395 # 1.5
396 - def test_global_hz_excess(self, fname, enum_test=True, 397 dememorization=10000, batches=20, 398 iterations=5000):
399 """Global Hardy-Weinberg test for heterozygote excess. 400 401 Returns a triple with: 402 403 - A list per population containing (pop_name, P-val, SE, switches). 404 Some pops have a None if the info is not available. 405 SE might be none (for enumerations). 406 - A list per loci containing (locus_name, P-val, SE, switches). 407 Some loci have a None if the info is not available. 408 SE might be none (for enumerations). 409 - Overall results (P-val, SE, switches) 410 """ 411 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 412 dememorization, batches, iterations)
413 414 # 2.1
415 - def test_ld(self, fname, dememorization=10000, 416 batches=20, iterations=5000):
417 opts = self._get_opts(dememorization, batches, iterations) 418 self._run_genepop([".DIS"], [2, 1], fname, opts) 419 420 def ld_pop_func(self): 421 current_pop = None 422 l = self.stream.readline().rstrip() 423 if l == "": 424 self.done = True 425 raise StopIteration 426 toks = [x for x in l.split(" ") if x != ""] 427 pop, locus1, locus2 = toks[0], toks[1], toks[2] 428 if not hasattr(self, "start_locus1"): 429 start_locus1, start_locus2 = locus1, locus2 430 current_pop = -1 431 if locus1 == start_locus1 and locus2 == start_locus2: 432 current_pop += 1 433 if toks[3] == "No": 434 return current_pop, pop, (locus1, locus2), None 435 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 436 return current_pop, pop, (locus1, locus2), (p, se, switches)
437 438 def ld_func(self): 439 l = self.stream.readline().rstrip() 440 if l == "": 441 self.done = True 442 raise StopIteration 443 toks = [x for x in l.split(" ") if x != ""] 444 locus1, locus2 = toks[0], toks[2] 445 try: 446 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 447 except ValueError: 448 return (locus1, locus2), None 449 return (locus1, locus2), (chi2, df, p) 450 451 f1 = open(fname + ".DIS") 452 l = f1.readline() 453 while "----" not in l: 454 l = f1.readline() 455 shutil.copyfile(fname + ".DIS", fname + ".DI2") 456 f2 = open(fname + ".DI2") 457 l = f2.readline() 458 while "Locus pair" not in l: 459 l = f2.readline() 460 while "----" not in l: 461 l = f2.readline() 462 return (_FileIterator(ld_pop_func, fname + ".DIS", f1), 463 _FileIterator(ld_func, fname + ".DI2", f2)) 464 465 # 2.2
466 - def create_contingency_tables(self, fname):
467 raise NotImplementedError
468 469 # 3.1 PR/GE files
470 - def test_genic_diff_all(self, fname, dememorization=10000, 471 batches=20, iterations=5000):
472 raise NotImplementedError
473 474 # 3.2 PR2/GE2 files
475 - def test_genic_diff_pair(self, fname, dememorization=10000, 476 batches=20, iterations=5000):
477 raise NotImplementedError
478 479 # 3.3 G files
480 - def test_genotypic_diff_all(self, fname, dememorization=10000, 481 batches=20, iterations=5000):
482 raise NotImplementedError
483 484 # 3.4 2G2 files
485 - def test_genotypic_diff_pair(self, fname, dememorization=10000, 486 batches=20, iterations=5000):
487 raise NotImplementedError
488 489 # 4
490 - def estimate_nm(self, fname):
491 self._run_genepop(["PRI"], [4], fname) 492 with open(fname + ".PRI") as f: 493 lines = f.readlines() # Small file, it is ok 494 for line in lines: 495 m = re.search("Mean sample size: ([.0-9]+)", line) 496 if m is not None: 497 mean_sample_size = _gp_float(m.group(1)) 498 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 499 if m is not None: 500 mean_priv_alleles = _gp_float(m.group(1)) 501 m = re.search("N=10: ([.0-9]+)", line) 502 if m is not None: 503 mig10 = _gp_float(m.group(1)) 504 m = re.search("N=25: ([.0-9]+)", line) 505 if m is not None: 506 mig25 = _gp_float(m.group(1)) 507 m = re.search("N=50: ([.0-9]+)", line) 508 if m is not None: 509 mig50 = _gp_float(m.group(1)) 510 m = re.search("for size= ([.0-9]+)", line) 511 if m is not None: 512 mig_corrected = _gp_float(m.group(1)) 513 os.remove(fname + ".PRI") 514 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
515 516 # 5.1
517 - def calc_allele_genotype_freqs(self, fname):
518 """Calculates allele and genotype frequencies per locus and per sample. 519 520 Parameters: 521 522 - fname - file name 523 524 Returns tuple with 2 elements: 525 526 - Population iterator with 527 528 - population name 529 - Locus dictionary with key = locus name and content tuple as 530 Genotype List with 531 (Allele1, Allele2, observed, expected) 532 (expected homozygotes, observed hm, 533 expected heterozygotes, observed ht) 534 Allele frequency/Fis dictionary with allele as key and 535 (count, frequency, Fis Weir & Cockerham) 536 - Totals as a pair 537 - count 538 - Fis Weir & Cockerham, 539 - Fis Robertson & Hill 540 541 - Locus iterator with 542 543 - Locus name 544 - allele list 545 - Population list with a triple 546 547 - population name 548 - list of allele frequencies in the same order as allele list above 549 - number of genes 550 551 Will create a file called fname.INF 552 """ 553 self._run_genepop(["INF"], [5, 1], fname) 554 # First pass, general information 555 # num_loci = None 556 # num_pops = None 557 # with open(fname + ".INF") as f: 558 # l = f.readline() 559 # while (num_loci is None or num_pops is None) and l != '': 560 # m = re.search("Number of populations detected : ([0-9+])", l) 561 # if m is not None: 562 # num_pops = _gp_int(m.group(1)) 563 # m = re.search("Number of loci detected : ([0-9+])", l) 564 # if m is not None: 565 # num_loci = _gp_int(m.group(1)) 566 # l = f.readline() 567 568 def pop_parser(self): 569 if hasattr(self, "old_line"): 570 l = self.old_line 571 del self.old_line 572 else: 573 l = self.stream.readline() 574 loci_content = {} 575 while l != '': 576 l = l.rstrip() 577 if "Tables of allelic frequencies for each locus" in l: 578 return self.curr_pop, loci_content 579 match = re.match(".*Pop: (.+) Locus: (.+)", l) 580 if match is not None: 581 pop = match.group(1).rstrip() 582 locus = match.group(2) 583 if not hasattr(self, "first_locus"): 584 self.first_locus = locus 585 if hasattr(self, "curr_pop"): 586 if self.first_locus == locus: 587 old_pop = self.curr_pop 588 # self.curr_pop = pop 589 self.old_line = l 590 del self.first_locus 591 del self.curr_pop 592 return old_pop, loci_content 593 self.curr_pop = pop 594 else: 595 l = self.stream.readline() 596 continue 597 geno_list = [] 598 l = self.stream.readline() 599 if "No data" in l: 600 continue 601 602 while "Genotypes Obs." not in l: 603 l = self.stream.readline() 604 605 while l != "\n": 606 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", l) 607 if m2 is not None: 608 geno_list.append((_gp_int(m2.group(1)), 609 _gp_int(m2.group(2)), 610 _gp_int(m2.group(3)), 611 _gp_float(m2.group(4)))) 612 else: 613 l = self.stream.readline() 614 continue 615 l = self.stream.readline() 616 617 while "Expected number of ho" not in l: 618 l = self.stream.readline() 619 expHo = _gp_float(l[38:]) 620 l = self.stream.readline() 621 obsHo = _gp_int(l[38:]) 622 l = self.stream.readline() 623 expHe = _gp_float(l[38:]) 624 l = self.stream.readline() 625 obsHe = _gp_int(l[38:]) 626 l = self.stream.readline() 627 628 while "Sample count" not in l: 629 l = self.stream.readline() 630 l = self.stream.readline() 631 freq_fis = {} 632 overall_fis = None 633 while "----" not in l: 634 vals = [x for x in l.rstrip().split(' ') if x != ''] 635 if vals[0] == "Tot": 636 overall_fis = (_gp_int(vals[1]), 637 _gp_float(vals[2]), 638 _gp_float(vals[3])) 639 else: 640 freq_fis[_gp_int(vals[0])] = (_gp_int(vals[1]), 641 _gp_float(vals[2]), 642 _gp_float(vals[3])) 643 l = self.stream.readline() 644 loci_content[locus] = (geno_list, 645 (expHo, obsHo, expHe, obsHe), 646 freq_fis, overall_fis) 647 self.done = True 648 raise StopIteration
649 650 def locus_parser(self): 651 l = self.stream.readline() 652 while l != "": 653 l = l.rstrip() 654 match = re.match(" Locus: (.+)", l) 655 if match is not None: 656 locus = match.group(1) 657 alleles, table = _read_allele_freq_table(self.stream) 658 return locus, alleles, table 659 l = self.stream.readline() 660 self.done = True 661 raise StopIteration 662 663 shutil.copyfile(fname + ".INF", fname + ".IN2") 664 pop_iter = _FileIterator(pop_parser, fname + ".INF") 665 locus_iter = _FileIterator(locus_parser, fname + ".IN2") 666 return (pop_iter, locus_iter) 667
668 - def _calc_diversities_fis(self, fname, ext):
669 self._run_genepop([ext], [5, 2], fname) 670 with open(fname + ext) as f: 671 l = f.readline() 672 while l != "": 673 l = l.rstrip() 674 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 675 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 676 avg_Qintra = _read_table(f, [str, _gp_float]) 677 l = f.readline() 678 679 def fis_func(self): 680 l = self.stream.readline() 681 while l != "": 682 l = l.rstrip() 683 m = re.search("Locus: (.+)", l) 684 if m is not None: 685 locus = m.group(1) 686 self.stream.readline() 687 if "No complete" in self.stream.readline(): 688 return locus, None 689 self.stream.readline() 690 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 691 self.stream.readline() 692 avg_qinter, avg_fis = tuple(_gp_float(x) for x in 693 [y for y in self.stream.readline().split(" ") if y != ""]) 694 return locus, fis_table, avg_qinter, avg_fis 695 l = self.stream.readline() 696 self.done = True 697 raise StopIteration
698 699 return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra 700 701 # 5.2
702 - def calc_diversities_fis_with_identity(self, fname):
703 return self._calc_diversities_fis(fname, ".DIV")
704 705 # 5.3
706 - def calc_diversities_fis_with_size(self, fname):
707 raise NotImplementedError
708 709 # 6.1 Less genotype frequencies
710 - def calc_fst_all(self, fname):
711 """Executes GenePop and gets Fst/Fis/Fit (all populations) 712 713 Parameters: 714 715 - fname - file name 716 717 Returns: 718 719 - (multiLocusFis, multiLocusFst, multiLocus Fit), 720 - Iterator of tuples 721 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 722 723 Will create a file called fname.FST . 724 725 This does not return the genotype frequencies. 726 """ 727 self._run_genepop([".FST"], [6, 1], fname) 728 with open(fname + ".FST") as f: 729 l = f.readline() 730 while l != '': 731 if l.startswith(' All:'): 732 toks = [x for x in l.rstrip().split(' ') if x != ""] 733 try: 734 allFis = _gp_float(toks[1]) 735 except ValueError: 736 allFis = None 737 try: 738 allFst = _gp_float(toks[2]) 739 except ValueError: 740 allFst = None 741 try: 742 allFit = _gp_float(toks[3]) 743 except ValueError: 744 allFit = None 745 l = f.readline() 746 747 def proc(self): 748 if hasattr(self, "last_line"): 749 l = self.last_line 750 del self.last_line 751 else: 752 l = self.stream.readline() 753 locus = None 754 fis = None 755 fst = None 756 fit = None 757 qintra = None 758 qinter = None 759 while l != '': 760 l = l.rstrip() 761 if l.startswith(' Locus:'): 762 if locus is not None: 763 self.last_line = l 764 return locus, fis, fst, fit, qintra, qinter 765 else: 766 locus = l.split(':')[1].lstrip() 767 elif l.startswith('Fis^='): 768 fis = _gp_float(l.split(' ')[1]) 769 elif l.startswith('Fst^='): 770 fst = _gp_float(l.split(' ')[1]) 771 elif l.startswith('Fit^='): 772 fit = _gp_float(l.split(' ')[1]) 773 elif l.startswith('1-Qintra^='): 774 qintra = _gp_float(l.split(' ')[1]) 775 elif l.startswith('1-Qinter^='): 776 qinter = _gp_float(l.split(' ')[1]) 777 return locus, fis, fst, fit, qintra, qinter 778 l = self.stream.readline() 779 if locus is not None: 780 return locus, fis, fst, fit, qintra, qinter 781 self.stream.close() 782 self.done = True 783 raise StopIteration
784 return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST") 785 786 # 6.2
787 - def calc_fst_pair(self, fname):
788 self._run_genepop([".ST2", ".MIG"], [6, 2], fname) 789 with open(fname + ".ST2") as f: 790 l = f.readline() 791 while l != "": 792 l = l.rstrip() 793 if l.startswith("Estimates for all loci"): 794 avg_fst = _read_headed_triangle_matrix(f) 795 l = f.readline() 796 797 def loci_func(self): 798 l = self.stream.readline() 799 while l != "": 800 l = l.rstrip() 801 m = re.search(" Locus: (.+)", l) 802 if m is not None: 803 locus = m.group(1) 804 matrix = _read_headed_triangle_matrix(self.stream) 805 return locus, matrix 806 l = self.stream.readline() 807 self.done = True 808 raise StopIteration
809 810 os.remove(fname + ".MIG") 811 return _FileIterator(loci_func, fname + ".ST2"), avg_fst 812 813 # 6.3
814 - def calc_rho_all(self, fname):
815 raise NotImplementedError
816 817 # 6.4
818 - def calc_rho_pair(self, fname):
819 raise NotImplementedError
820
821 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
822 """Calculates isolation by distance statistics 823 """ 824 self._run_genepop([".GRA", ".MIG", ".ISO"], [6, sub], 825 fname, opts={ 826 "MinimalDistance": min_dist, 827 "GeographicScale": scale, 828 "IsolBDstatistic": stat}) 829 with open(fname + ".ISO") as f: 830 f.readline() 831 f.readline() 832 f.readline() 833 f.readline() 834 estimate = _read_triangle_matrix(f) 835 f.readline() 836 f.readline() 837 distance = _read_triangle_matrix(f) 838 f.readline() 839 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 840 a = _gp_float(match.group(1)) 841 b = _gp_float(match.group(2)) 842 f.readline() 843 f.readline() 844 match = re.match(" b=(.+)", f.readline().rstrip()) 845 bb = _gp_float(match.group(1)) 846 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 847 bblow = _gp_float(match.group(1)) 848 bbhigh = _gp_float(match.group(2)) 849 os.remove(fname + ".MIG") 850 os.remove(fname + ".GRA") 851 os.remove(fname + ".ISO") 852 return estimate, distance, (a, b), (bb, bblow, bbhigh)
853 854 # 6.5
855 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
856 """Calculates isolation by distance statistics for diploid data. 857 858 See _calc_ibd for parameter details. 859 Note that each pop can only have a single individual and 860 the individual name has to be the sample coordinates. 861 """ 862 return self._calc_ibd(fname, 5, stat, scale, min_dist)
863 864 # 6.6
865 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
866 """Calculates isolation by distance statistics for haploid data. 867 868 See _calc_ibd for parameter details. 869 Note that each pop can only have a single individual and 870 the individual name has to be the sample coordinates. 871 """ 872 return self._calc_ibd(fname, 6, stat, scale, min_dist)
873