Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module allows to control GenePop. 
  8   
  9  """ 
 10   
 11  import os 
 12  import re 
 13  import shutil 
 14  import sys 
 15  import tempfile 
 16   
 17  from Bio.Application import AbstractCommandline, _Argument 
 18   
 19  __docformat__ = "restructuredtext en" 
 20   
21 -def _gp_float(tok):
22 """Gets a float from a token, if it fails, returns the string (PRIVATE).""" 23 try: 24 return float(tok) 25 except ValueError: 26 return str(tok)
27 28
29 -def _gp_int(tok):
30 """Gets a int from a token, if it fails, returns the string (PRIVATE).""" 31 try: 32 return int(tok) 33 except ValueError: 34 return str(tok)
35 36
37 -def _read_allele_freq_table(f):
38 l = f.readline() 39 while ' --' not in l: 40 if l == "": 41 raise StopIteration 42 if 'No data' in l: 43 return None, None 44 l = f.readline() 45 alleles = [x for x in f.readline().rstrip().split(" ") if x != ''] 46 alleles = [_gp_int(x) for x in alleles] 47 l = f.readline().rstrip() 48 table = [] 49 while l != "": 50 line = [x for x in l.split(" ") if x != ''] 51 try: 52 table.append( 53 (line[0], [_gp_float(x) for x in line[1: -1]], 54 _gp_int(line[-1]))) 55 except ValueError: 56 table.append( 57 (line[0], [None] * len(alleles), 0)) 58 l = f.readline().rstrip() 59 return alleles, table
60 61
62 -def _read_table(f, funs):
63 table = [] 64 l = f.readline().rstrip() 65 while '---' not in l: 66 l = f.readline().rstrip() 67 l = f.readline().rstrip() 68 while '===' not in l and '---' not in l and l != "": 69 toks = [x for x in l.split(" ") if x != ""] 70 line = [] 71 for i in range(len(toks)): 72 try: 73 line.append(funs[i](toks[i])) 74 except ValueError: 75 line.append(toks[i]) # Could not cast 76 table.append(tuple(line)) 77 l = f.readline().rstrip() 78 return table
79 80
81 -def _read_triangle_matrix(f):
82 matrix = [] 83 l = f.readline().rstrip() 84 while l != "": 85 matrix.append( 86 [_gp_float(x) for x in [y for y in l.split(" ") if y != ""]]) 87 l = f.readline().rstrip() 88 return matrix
89 90
91 -def _read_headed_triangle_matrix(f):
92 matrix = {} 93 header = f.readline().rstrip() 94 if '---' in header or '===' in header: 95 header = f.readline().rstrip() 96 nlines = len([x for x in header.split(' ') if x != '']) - 1 97 for line_pop in range(nlines): 98 l = f.readline().rstrip() 99 vals = [x for x in l.split(' ')[1:] if x != ''] 100 clean_vals = [] 101 for val in vals: 102 try: 103 clean_vals.append(_gp_float(val)) 104 except ValueError: 105 clean_vals.append(None) 106 for col_pop in range(len(clean_vals)): 107 matrix[(line_pop + 1, col_pop)] = clean_vals[col_pop] 108 return matrix
109 110
111 -def _hw_func(stream, is_locus, has_fisher=False):
112 l = stream.readline() 113 if is_locus: 114 hook = "Locus " 115 else: 116 hook = " Pop : " 117 while l != "": 118 if l.startswith(hook): 119 stream.readline() 120 stream.readline() 121 stream.readline() 122 table = _read_table(stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str]) 123 # loci might mean pop if hook="Locus " 124 loci = {} 125 for entry in table: 126 if len(entry) < 3: 127 loci[entry[0]] = None 128 else: 129 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 130 if se == "-": 131 se = None 132 loci[locus] = p, se, fis_wc, fis_rh, steps 133 return loci 134 l = stream.readline() 135 # self.done = True 136 raise StopIteration
137 138
139 -class _FileIterator:
140 """Iterator which crawls over a stream of lines with a function (PRIVATE). 141 142 The generator function is expected to yield a tuple, while 143 consuming input 144 """
145 - def __init__(self, func, fname, handle=None):
146 self.func = func 147 if handle is None: 148 self.stream = open(fname) 149 else: 150 # For special cases where calling code wants to 151 # seek into the file before starting: 152 self.stream = handle 153 self.fname = fname 154 self.done = False
155
156 - def __iter__(self):
157 if self.done: 158 self.done = True 159 raise StopIteration 160 return self
161
162 - def __next__(self):
163 return self.func(self)
164 165 if sys.version_info[0] < 3:
166 - def next(self):
167 """Python 2 style alias for Python 3 style __next__ method.""" 168 return self.__next__()
169
170 - def __del__(self):
171 self.stream.close() 172 try: 173 os.remove(self.fname) 174 except OSError: 175 # Jython seems to call the iterator twice 176 pass
177 178
179 -class _GenePopCommandline(AbstractCommandline):
180 """Command Line Wrapper for GenePop (PRIVATE).""" 181
182 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
183 self.parameters = [ 184 _Argument(["command"], "GenePop option to be called", 185 is_required=True), 186 _Argument(["mode"], "Should allways be batch", is_required=True), 187 _Argument(["input"], "Input file", is_required=True), 188 _Argument(["Dememorization"], "Dememorization step"), 189 _Argument(["BatchNumber"], "Number of MCMC batches"), 190 _Argument(["BatchLength"], "Length of MCMC chains"), 191 _Argument(["HWtests"], "Enumeration or MCMC"), 192 _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"), 193 _Argument(["MinimalDistance"], "Minimal IBD distance"), 194 _Argument(["GeographicScale"], "Log or Linear"), 195 ] 196 AbstractCommandline.__init__(self, cmd, **kwargs) 197 self.set_parameter("mode", "Mode=Batch")
198
199 - def set_menu(self, option_list):
200 """Sets the menu option. 201 202 Example set_menu([6,1]) = get all F statistics (menu 6.1) 203 """ 204 self.set_parameter("command", "MenuOptions=" + 205 ".".join(str(x) for x in option_list))
206
207 - def set_input(self, fname):
208 """Sets the input file name.""" 209 self.set_parameter("input", "InputFile=" + fname)
210 211
212 -class GenePopController(object):
213 - def __init__(self, genepop_dir=None):
214 """Initializes the controller. 215 216 genepop_dir is the directory where GenePop is. 217 218 The binary should be called Genepop (capital G) 219 """ 220 self.controller = _GenePopCommandline(genepop_dir)
221
222 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
223 opts = {} 224 opts["Dememorization"] = dememorization 225 opts["BatchNumber"] = batches 226 opts["BatchLength"] = iterations 227 if enum_test is not None: 228 if enum_test is True: 229 opts["HWtests"] = "Enumeration" 230 else: 231 opts["HWtests"] = "MCMC" 232 return opts
233
234 - def _run_genepop(self, extensions, option, fname, opts={}):
235 cwd = os.getcwd() 236 temp_dir = tempfile.mkdtemp() 237 os.chdir(temp_dir) 238 self.controller.set_menu(option) 239 if os.path.isabs(fname): 240 self.controller.set_input(fname) 241 else: 242 self.controller.set_input(cwd + os.sep + fname) 243 for opt in opts: 244 self.controller.set_parameter(opt, opt + "=" + str(opts[opt])) 245 self.controller() # checks error level is zero 246 os.chdir(cwd) 247 shutil.rmtree(temp_dir) 248 return
249
250 - def _test_pop_hz_both(self, fname, type, ext, enum_test=True, 251 dememorization=10000, batches=20, 252 iterations=5000):
253 """Hardy-Weinberg test for heterozygote deficiency/excess. 254 255 Returns a population iterator containing a dictionary where 256 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 257 258 Some loci have a None if the info is not available. 259 SE might be none (for enumerations). 260 """ 261 opts = self._get_opts(dememorization, batches, iterations, enum_test) 262 self._run_genepop([ext], [1, type], fname, opts) 263 264 def hw_func(self): 265 return _hw_func(self.stream, False)
266 267 return _FileIterator(hw_func, fname + ext)
268
269 - def _test_global_hz_both(self, fname, type, ext, enum_test=True, 270 dememorization=10000, batches=20, 271 iterations=5000):
272 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 273 274 Returns a triple with: 275 276 - A list per population containing (pop_name, P-val, SE, switches). 277 Some pops have a None if the info is not available. 278 SE might be none (for enumerations). 279 - A list per loci containing (locus_name, P-val, SE, switches). 280 Some loci have a None if the info is not available. 281 SE might be none (for enumerations). 282 - Overall results (P-val, SE, switches). 283 """ 284 opts = self._get_opts(dememorization, batches, iterations, enum_test) 285 self._run_genepop([ext], [1, type], fname, opts) 286 287 def hw_pop_func(self): 288 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
289 290 with open(fname + ext) as f1: 291 l = f1.readline() 292 while "by population" not in l: 293 l = f1.readline() 294 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 295 with open(fname + ext) as f2: 296 l = f2.readline() 297 while "by locus" not in l: 298 l = f2.readline() 299 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 300 with open(fname + ext) as f: 301 l = f.readline() 302 while "all locus" not in l: 303 l = f.readline() 304 f.readline() 305 f.readline() 306 f.readline() 307 f.readline() 308 l = f.readline().rstrip() 309 p, se, switches = tuple(_gp_float(x) for x in [y for y in l.split(" ") if y != ""]) 310 return pop_p, loc_p, (p, se, switches) 311 312 # 1.1
313 - def test_pop_hz_deficiency(self, fname, enum_test=True, 314 dememorization=10000, batches=20, 315 iterations=5000):
316 """Hardy-Weinberg test for heterozygote deficiency. 317 318 Returns a population iterator containing a dictionary wehre 319 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 320 321 Some loci have a None if the info is not available. 322 SE might be none (for enumerations). 323 """ 324 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 325 dememorization, batches, iterations)
326 327 # 1.2
328 - def test_pop_hz_excess(self, fname, enum_test=True, 329 dememorization=10000, batches=20, 330 iterations=5000):
331 """Hardy-Weinberg test for heterozygote deficiency. 332 333 Returns a population iterator containing a dictionary where 334 dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 335 336 Some loci have a None if the info is not available. 337 SE might be none (for enumerations). 338 """ 339 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 340 dememorization, batches, iterations)
341 342 # 1.3 P file
343 - def test_pop_hz_prob(self, fname, ext, enum_test=False, 344 dememorization=10000, batches=20, 345 iterations=5000):
346 """Hardy-Weinberg test based on probability. 347 348 Returns 2 iterators and a final tuple: 349 350 1. Returns a loci iterator containing: 351 - A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps). 352 Some pops have a None if the info is not available. 353 SE might be none (for enumerations). 354 - Result of Fisher's test (Chi2, deg freedom, prob). 355 2. Returns a population iterator containing: 356 - A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps). 357 Some loci have a None if the info is not available. 358 SE might be none (for enumerations). 359 - Result of Fisher's test (Chi2, deg freedom, prob). 360 3. Final tuple (Chi2, deg freedom, prob). 361 """ 362 opts = self._get_opts(dememorization, batches, iterations, enum_test) 363 self._run_genepop([ext], [1, 3], fname, opts) 364 365 def hw_prob_loci_func(self): 366 return _hw_func(self.stream, True, True)
367 368 def hw_prob_pop_func(self): 369 return _hw_func(self.stream, False, True) 370 371 shutil.copyfile(fname + ".P", fname + ".P2") 372 373 return _FileIterator(hw_prob_loci_func, fname + ".P"), _FileIterator(hw_prob_pop_func, fname + ".P2") 374 375 # 1.4
376 - def test_global_hz_deficiency(self, fname, enum_test=True, 377 dememorization=10000, batches=20, 378 iterations=5000):
379 """Global Hardy-Weinberg test for heterozygote deficiency. 380 381 Returns a triple with: 382 383 - An list per population containing (pop_name, P-val, SE, switches). 384 Some pops have a None if the info is not available. 385 SE might be none (for enumerations). 386 - An list per loci containing (locus_name, P-val, SE, switches). 387 Some loci have a None if the info is not available. 388 SE might be none (for enumerations). 389 - Overall results (P-val, SE, switches). 390 """ 391 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 392 dememorization, batches, iterations)
393 394 # 1.5
395 - def test_global_hz_excess(self, fname, enum_test=True, 396 dememorization=10000, batches=20, 397 iterations=5000):
398 """Global Hardy-Weinberg test for heterozygote excess. 399 400 Returns a triple with: 401 402 - A list per population containing (pop_name, P-val, SE, switches). 403 Some pops have a None if the info is not available. 404 SE might be none (for enumerations). 405 - A list per loci containing (locus_name, P-val, SE, switches). 406 Some loci have a None if the info is not available. 407 SE might be none (for enumerations). 408 - Overall results (P-val, SE, switches) 409 """ 410 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 411 dememorization, batches, iterations)
412 413 # 2.1
414 - def test_ld(self, fname, dememorization=10000, 415 batches=20, iterations=5000):
416 opts = self._get_opts(dememorization, batches, iterations) 417 self._run_genepop([".DIS"], [2, 1], fname, opts) 418 419 def ld_pop_func(self): 420 current_pop = None 421 l = self.stream.readline().rstrip() 422 if l == "": 423 self.done = True 424 raise StopIteration 425 toks = [x for x in l.split(" ") if x != ""] 426 pop, locus1, locus2 = toks[0], toks[1], toks[2] 427 if not hasattr(self, "start_locus1"): 428 start_locus1, start_locus2 = locus1, locus2 429 current_pop = -1 430 if locus1 == start_locus1 and locus2 == start_locus2: 431 current_pop += 1 432 if toks[3] == "No": 433 return current_pop, pop, (locus1, locus2), None 434 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 435 return current_pop, pop, (locus1, locus2), (p, se, switches)
436 437 def ld_func(self): 438 l = self.stream.readline().rstrip() 439 if l == "": 440 self.done = True 441 raise StopIteration 442 toks = [x for x in l.split(" ") if x != ""] 443 locus1, locus2 = toks[0], toks[2] 444 try: 445 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 446 except ValueError: 447 return (locus1, locus2), None 448 return (locus1, locus2), (chi2, df, p) 449 450 f1 = open(fname + ".DIS") 451 l = f1.readline() 452 while "----" not in l: 453 l = f1.readline() 454 shutil.copyfile(fname + ".DIS", fname + ".DI2") 455 f2 = open(fname + ".DI2") 456 l = f2.readline() 457 while "Locus pair" not in l: 458 l = f2.readline() 459 while "----" not in l: 460 l = f2.readline() 461 return (_FileIterator(ld_pop_func, fname + ".DIS", f1), 462 _FileIterator(ld_func, fname + ".DI2", f2)) 463 464 # 2.2
465 - def create_contingency_tables(self, fname):
466 raise NotImplementedError
467 468 # 3.1 PR/GE files
469 - def test_genic_diff_all(self, fname, dememorization=10000, 470 batches=20, iterations=5000):
471 raise NotImplementedError
472 473 # 3.2 PR2/GE2 files
474 - def test_genic_diff_pair(self, fname, dememorization=10000, 475 batches=20, iterations=5000):
476 raise NotImplementedError
477 478 # 3.3 G files
479 - def test_genotypic_diff_all(self, fname, dememorization=10000, 480 batches=20, iterations=5000):
481 raise NotImplementedError
482 483 # 3.4 2G2 files
484 - def test_genotypic_diff_pair(self, fname, dememorization=10000, 485 batches=20, iterations=5000):
486 raise NotImplementedError
487 488 # 4
489 - def estimate_nm(self, fname):
490 self._run_genepop(["PRI"], [4], fname) 491 with open(fname + ".PRI") as f: 492 lines = f.readlines() # Small file, it is ok 493 for line in lines: 494 m = re.search("Mean sample size: ([.0-9]+)", line) 495 if m is not None: 496 mean_sample_size = _gp_float(m.group(1)) 497 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 498 if m is not None: 499 mean_priv_alleles = _gp_float(m.group(1)) 500 m = re.search("N=10: ([.0-9]+)", line) 501 if m is not None: 502 mig10 = _gp_float(m.group(1)) 503 m = re.search("N=25: ([.0-9]+)", line) 504 if m is not None: 505 mig25 = _gp_float(m.group(1)) 506 m = re.search("N=50: ([.0-9]+)", line) 507 if m is not None: 508 mig50 = _gp_float(m.group(1)) 509 m = re.search("for size= ([.0-9]+)", line) 510 if m is not None: 511 mig_corrected = _gp_float(m.group(1)) 512 os.remove(fname + ".PRI") 513 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
514 515 # 5.1
516 - def calc_allele_genotype_freqs(self, fname):
517 """Calculates allele and genotype frequencies per locus and per sample. 518 519 Parameters: 520 521 - fname - file name 522 523 Returns tuple with 2 elements: 524 525 - Population iterator with 526 527 - population name 528 - Locus dictionary with key = locus name and content tuple as 529 Genotype List with 530 (Allele1, Allele2, observed, expected) 531 (expected homozygotes, observed hm, 532 expected heterozygotes, observed ht) 533 - Allele frequency/Fis dictionary with allele as key and 534 (count, frequency, Fis Weir & Cockerham) 535 - Totals as a pair 536 - count 537 - Fis Weir & Cockerham, 538 - Fis Robertson & Hill 539 540 - Locus iterator with 541 542 - Locus name 543 - allele list 544 - Population list with a triple 545 546 - population name 547 - list of allele frequencies in the same order as allele list above 548 - number of genes 549 550 Will create a file called fname.INF 551 """ 552 self._run_genepop(["INF"], [5, 1], fname) 553 # First pass, general information 554 # num_loci = None 555 # num_pops = None 556 # with open(fname + ".INF") as f: 557 # l = f.readline() 558 # while (num_loci is None or num_pops is None) and l != '': 559 # m = re.search("Number of populations detected : ([0-9+])", l) 560 # if m is not None: 561 # num_pops = _gp_int(m.group(1)) 562 # m = re.search("Number of loci detected : ([0-9+])", l) 563 # if m is not None: 564 # num_loci = _gp_int(m.group(1)) 565 # l = f.readline() 566 567 def pop_parser(self): 568 if hasattr(self, "old_line"): 569 l = self.old_line 570 del self.old_line 571 else: 572 l = self.stream.readline() 573 loci_content = {} 574 while l != '': 575 l = l.rstrip() 576 if "Tables of allelic frequencies for each locus" in l: 577 return self.curr_pop, loci_content 578 match = re.match(".*Pop: (.+) Locus: (.+)", l) 579 if match is not None: 580 pop = match.group(1) 581 locus = match.group(2) 582 if not hasattr(self, "first_locus"): 583 self.first_locus = locus 584 if hasattr(self, "curr_pop"): 585 if self.first_locus == locus: 586 old_pop = self.curr_pop 587 # self.curr_pop = pop 588 self.old_line = l 589 del self.first_locus 590 del self.curr_pop 591 return old_pop, loci_content 592 self.curr_pop = pop 593 else: 594 l = self.stream.readline() 595 continue 596 geno_list = [] 597 l = self.stream.readline() 598 if "No data" in l: 599 continue 600 601 while "Genotypes Obs." not in l: 602 l = self.stream.readline() 603 604 while l != "\n": 605 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", l) 606 if m2 is not None: 607 geno_list.append((_gp_int(m2.group(1)), 608 _gp_int(m2.group(2)), 609 _gp_int(m2.group(3)), 610 _gp_float(m2.group(4)))) 611 else: 612 l = self.stream.readline() 613 continue 614 l = self.stream.readline() 615 616 while "Expected number of ho" not in l: 617 l = self.stream.readline() 618 expHo = _gp_float(l[38:]) 619 l = self.stream.readline() 620 obsHo = _gp_int(l[38:]) 621 l = self.stream.readline() 622 expHe = _gp_float(l[38:]) 623 l = self.stream.readline() 624 obsHe = _gp_int(l[38:]) 625 l = self.stream.readline() 626 627 while "Sample count" not in l: 628 l = self.stream.readline() 629 l = self.stream.readline() 630 freq_fis = {} 631 overall_fis = None 632 while "----" not in l: 633 vals = [x for x in l.rstrip().split(' ') if x != ''] 634 if vals[0] == "Tot": 635 overall_fis = (_gp_int(vals[1]), 636 _gp_float(vals[2]), 637 _gp_float(vals[3])) 638 else: 639 freq_fis[_gp_int(vals[0])] = (_gp_int(vals[1]), 640 _gp_float(vals[2]), 641 _gp_float(vals[3])) 642 l = self.stream.readline() 643 loci_content[locus] = (geno_list, 644 (expHo, obsHo, expHe, obsHe), 645 freq_fis, overall_fis) 646 self.done = True 647 raise StopIteration
648 649 def locus_parser(self): 650 l = self.stream.readline() 651 while l != "": 652 l = l.rstrip() 653 match = re.match(" Locus: (.+)", l) 654 if match is not None: 655 locus = match.group(1) 656 alleles, table = _read_allele_freq_table(self.stream) 657 return locus, alleles, table 658 l = self.stream.readline() 659 self.done = True 660 raise StopIteration 661 662 shutil.copyfile(fname + ".INF", fname + ".IN2") 663 pop_iter = _FileIterator(pop_parser, fname + ".INF") 664 locus_iter = _FileIterator(locus_parser, fname + ".IN2") 665 return (pop_iter, locus_iter) 666
667 - def _calc_diversities_fis(self, fname, ext):
668 self._run_genepop([ext], [5, 2], fname) 669 with open(fname + ext) as f: 670 l = f.readline() 671 while l != "": 672 l = l.rstrip() 673 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 674 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 675 avg_Qintra = _read_table(f, [str, _gp_float]) 676 l = f.readline() 677 678 def fis_func(self): 679 l = self.stream.readline() 680 while l != "": 681 l = l.rstrip() 682 m = re.search("Locus: (.+)", l) 683 if m is not None: 684 locus = m.group(1) 685 self.stream.readline() 686 if "No complete" in self.stream.readline(): 687 return locus, None 688 self.stream.readline() 689 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 690 self.stream.readline() 691 avg_qinter, avg_fis = tuple(_gp_float(x) for x in 692 [y for y in self.stream.readline().split(" ") if y != ""]) 693 return locus, fis_table, avg_qinter, avg_fis 694 l = self.stream.readline() 695 self.done = True 696 raise StopIteration
697 698 return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra 699 700 # 5.2
701 - def calc_diversities_fis_with_identity(self, fname):
702 return self._calc_diversities_fis(fname, ".DIV")
703 704 # 5.3
705 - def calc_diversities_fis_with_size(self, fname):
706 raise NotImplementedError
707 708 # 6.1 Less genotype frequencies
709 - def calc_fst_all(self, fname):
710 """Executes GenePop and gets Fst/Fis/Fit (all populations) 711 712 Parameters: 713 714 - fname - file name 715 716 Returns: 717 718 - (multiLocusFis, multiLocusFst, multiLocus Fit), 719 - Iterator of tuples 720 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 721 722 Will create a file called fname.FST . 723 724 This does not return the genotype frequencies. 725 """ 726 self._run_genepop([".FST"], [6, 1], fname) 727 with open(fname + ".FST") as f: 728 l = f.readline() 729 while l != '': 730 if l.startswith(' All:'): 731 toks = [x for x in l.rstrip().split(' ') if x != ""] 732 try: 733 allFis = _gp_float(toks[1]) 734 except ValueError: 735 allFis = None 736 try: 737 allFst = _gp_float(toks[2]) 738 except ValueError: 739 allFst = None 740 try: 741 allFit = _gp_float(toks[3]) 742 except ValueError: 743 allFit = None 744 l = f.readline() 745 746 def proc(self): 747 if hasattr(self, "last_line"): 748 l = self.last_line 749 del self.last_line 750 else: 751 l = self.stream.readline() 752 locus = None 753 fis = None 754 fst = None 755 fit = None 756 qintra = None 757 qinter = None 758 while l != '': 759 l = l.rstrip() 760 if l.startswith(' Locus:'): 761 if locus is not None: 762 self.last_line = l 763 return locus, fis, fst, fit, qintra, qinter 764 else: 765 locus = l.split(':')[1].lstrip() 766 elif l.startswith('Fis^='): 767 fis = _gp_float(l.split(' ')[1]) 768 elif l.startswith('Fst^='): 769 fst = _gp_float(l.split(' ')[1]) 770 elif l.startswith('Fit^='): 771 fit = _gp_float(l.split(' ')[1]) 772 elif l.startswith('1-Qintra^='): 773 qintra = _gp_float(l.split(' ')[1]) 774 elif l.startswith('1-Qinter^='): 775 qinter = _gp_float(l.split(' ')[1]) 776 return locus, fis, fst, fit, qintra, qinter 777 l = self.stream.readline() 778 if locus is not None: 779 return locus, fis, fst, fit, qintra, qinter 780 self.stream.close() 781 self.done = True 782 raise StopIteration
783 return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST") 784 785 # 6.2
786 - def calc_fst_pair(self, fname):
787 self._run_genepop([".ST2", ".MIG"], [6, 2], fname) 788 with open(fname + ".ST2") as f: 789 l = f.readline() 790 while l != "": 791 l = l.rstrip() 792 if l.startswith("Estimates for all loci"): 793 avg_fst = _read_headed_triangle_matrix(f) 794 l = f.readline() 795 796 def loci_func(self): 797 l = self.stream.readline() 798 while l != "": 799 l = l.rstrip() 800 m = re.search(" Locus: (.+)", l) 801 if m is not None: 802 locus = m.group(1) 803 matrix = _read_headed_triangle_matrix(self.stream) 804 return locus, matrix 805 l = self.stream.readline() 806 self.done = True 807 raise StopIteration
808 809 os.remove(fname + ".MIG") 810 return _FileIterator(loci_func, fname + ".ST2"), avg_fst 811 812 # 6.3
813 - def calc_rho_all(self, fname):
814 raise NotImplementedError
815 816 # 6.4
817 - def calc_rho_pair(self, fname):
818 raise NotImplementedError
819
820 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
821 """Calculates isolation by distance statistics 822 """ 823 self._run_genepop([".GRA", ".MIG", ".ISO"], [6, sub], 824 fname, opts={ 825 "MinimalDistance": min_dist, 826 "GeographicScale": scale, 827 "IsolBDstatistic": stat}) 828 with open(fname + ".ISO") as f: 829 f.readline() 830 f.readline() 831 f.readline() 832 f.readline() 833 estimate = _read_triangle_matrix(f) 834 f.readline() 835 f.readline() 836 distance = _read_triangle_matrix(f) 837 f.readline() 838 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 839 a = _gp_float(match.group(1)) 840 b = _gp_float(match.group(2)) 841 f.readline() 842 f.readline() 843 match = re.match(" b=(.+)", f.readline().rstrip()) 844 bb = _gp_float(match.group(1)) 845 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 846 bblow = _gp_float(match.group(1)) 847 bbhigh = _gp_float(match.group(2)) 848 os.remove(fname + ".MIG") 849 os.remove(fname + ".GRA") 850 os.remove(fname + ".ISO") 851 return estimate, distance, (a, b), (bb, bblow, bbhigh)
852 853 # 6.5
854 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
855 """Calculates isolation by distance statistics for diploid data. 856 857 See _calc_ibd for parameter details. 858 Note that each pop can only have a single individual and 859 the individual name has to be the sample coordinates. 860 """ 861 return self._calc_ibd(fname, 5, stat, scale, min_dist)
862 863 # 6.6
864 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
865 """Calculates isolation by distance statistics for haploid data. 866 867 See _calc_ibd for parameter details. 868 Note that each pop can only have a single individual and 869 the individual name has to be the sample coordinates. 870 """ 871 return self._calc_ibd(fname, 6, stat, scale, min_dist)
872