Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module allows to control GenePop. 
  8   
  9  """ 
 10   
 11  import os 
 12  import re 
 13  import shutil 
 14   
 15  import sys # for checking if under Python 2 
 16   
 17  from Bio.Application import AbstractCommandline, _Argument 
 18   
 19   
20 -def _gp_float(tok):
21 """Gets a float from a token, if it fails, returns the string. 22 """ 23 try: 24 return float(tok) 25 except ValueError: 26 return str(tok)
27 28
29 -def _gp_int(tok):
30 """Gets a int from a token, if it fails, returns the string. 31 """ 32 try: 33 return int(tok) 34 except ValueError: 35 return str(tok)
36 37
38 -def _read_allele_freq_table(f):
39 l = f.readline() 40 while ' --' not in l: 41 if l == "": 42 raise StopIteration 43 if 'No data' in l: 44 return None, None 45 l = f.readline() 46 alleles = [x for x in f.readline().rstrip().split(" ") if x != ''] 47 alleles = [_gp_int(x) for x in alleles] 48 l = f.readline().rstrip() 49 table = [] 50 while l != "": 51 line = [x for x in l.split(" ") if x != ''] 52 try: 53 table.append( 54 (line[0], 55 [_gp_float(x) for x in line[1:-1]], 56 _gp_int(line[-1]))) 57 except ValueError: 58 table.append( 59 (line[0], 60 [None] * len(alleles), 61 0)) 62 l = f.readline().rstrip() 63 return alleles, table
64 65
66 -def _read_table(f, funs):
67 table = [] 68 l = f.readline().rstrip() 69 while '---' not in l: 70 l = f.readline().rstrip() 71 l = f.readline().rstrip() 72 while '===' not in l and '---' not in l and l != "": 73 toks = [x for x in l.split(" ") if x != ""] 74 line = [] 75 for i in range(len(toks)): 76 try: 77 line.append(funs[i](toks[i])) 78 except ValueError: 79 line.append(toks[i]) # Could not cast 80 table.append(tuple(line)) 81 l = f.readline().rstrip() 82 return table
83 84
85 -def _read_triangle_matrix(f):
86 matrix = [] 87 l = f.readline().rstrip() 88 while l != "": 89 matrix.append( 90 [_gp_float(x) for x in [y for y in l.split(" ") if y != ""]]) 91 l = f.readline().rstrip() 92 return matrix
93 94
95 -def _read_headed_triangle_matrix(f):
96 matrix = {} 97 header = f.readline().rstrip() 98 if '---' in header or '===' in header: 99 header = f.readline().rstrip() 100 nlines = len([x for x in header.split(' ') if x != '']) - 1 101 for line_pop in range(nlines): 102 l = f.readline().rstrip() 103 vals = [x for x in l.split(' ')[1:] if x != ''] 104 clean_vals = [] 105 for val in vals: 106 try: 107 clean_vals.append(_gp_float(val)) 108 except ValueError: 109 clean_vals.append(None) 110 for col_pop in range(len(clean_vals)): 111 matrix[(line_pop+1, col_pop)] = clean_vals[col_pop] 112 return matrix
113 114
115 -def _hw_func(stream, is_locus, has_fisher = False):
116 l = stream.readline() 117 if is_locus: 118 hook = "Locus " 119 else: 120 hook = " Pop : " 121 while l != "": 122 if l.startswith(hook): 123 stream.readline() 124 stream.readline() 125 stream.readline() 126 table = _read_table(stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str]) 127 #loci might mean pop if hook="Locus " 128 loci = {} 129 for entry in table: 130 if len(entry) < 3: 131 loci[entry[0]] = None 132 else: 133 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 134 if se == "-": 135 se = None 136 loci[locus] = p, se, fis_wc, fis_rh, steps 137 return loci 138 l = stream.readline() 139 #self.done = True 140 raise StopIteration
141 142
143 -class _FileIterator:
144 """Iterator which crawls over a stream of lines with a function. 145 146 The generator function is expected to yield a tuple, while 147 consuming input 148 """
149 - def __init__(self, func, fname, handle=None):
150 self.func = func 151 if handle is None: 152 self.stream = open(fname) 153 else: 154 # For special cases where calling code wants to 155 # seek into the file before starting: 156 self.stream = handle 157 self.fname = fname 158 self.done = False
159
160 - def __iter__(self):
161 if self.done: 162 self.done = True 163 raise StopIteration 164 return self
165
166 - def __next__(self):
167 return self.func(self)
168 169 if sys.version_info[0] < 3:
170 - def next(self):
171 """Deprecated Python 2 style alias for Python 3 style __next__ method.""" 172 import warnings 173 from Bio import BiopythonDeprecationWarning 174 warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), " 175 "the .next() method is deprecated and will be removed in a " 176 "future release of Biopython.", BiopythonDeprecationWarning) 177 return self.__next__()
178
179 - def __del__(self):
180 self.stream.close() 181 try: 182 os.remove(self.fname) 183 except OSError: 184 #Jython seems to call the iterator twice 185 pass
186 187
188 -class _GenePopCommandline(AbstractCommandline):
189 """ Command Line Wrapper for GenePop. 190 """
191 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
192 self.parameters = [ 193 _Argument(["command"], 194 "GenePop option to be called", 195 is_required=True), 196 _Argument(["mode"], 197 "Should allways be batch", 198 is_required=True), 199 _Argument(["input"], 200 "Input file", 201 is_required=True), 202 _Argument(["Dememorization"], 203 "Dememorization step"), 204 _Argument(["BatchNumber"], 205 "Number of MCMC batches"), 206 _Argument(["BatchLength"], 207 "Length of MCMC chains"), 208 _Argument(["HWtests"], 209 "Enumeration or MCMC"), 210 _Argument(["IsolBDstatistic"], 211 "IBD statistic (a or e)"), 212 _Argument(["MinimalDistance"], 213 "Minimal IBD distance"), 214 _Argument(["GeographicScale"], 215 "Log or Linear"), 216 ] 217 AbstractCommandline.__init__(self, cmd, **kwargs) 218 self.set_parameter("mode", "Mode=Batch")
219
220 - def set_menu(self, option_list):
221 """Sets the menu option. 222 223 Example set_menu([6,1]) = get all F statistics (menu 6.1) 224 """ 225 self.set_parameter("command", "MenuOptions=" + 226 ".".join(str(x) for x in option_list))
227
228 - def set_input(self, fname):
229 """Sets the input file name. 230 """ 231 self.set_parameter("input", "InputFile="+fname)
232 233
234 -class GenePopController(object):
235 - def __init__(self, genepop_dir = None):
236 """Initializes the controller. 237 238 genepop_dir is the directory where GenePop is. 239 240 The binary should be called Genepop (capital G) 241 """ 242 self.controller = _GenePopCommandline(genepop_dir)
243
244 - def _remove_garbage(self, fname_out):
245 try: 246 if fname_out is not None: 247 os.remove(fname_out) 248 except OSError: 249 pass # safe 250 try: 251 os.remove("genepop.txt") 252 except OSError: 253 pass # safe 254 try: 255 os.remove("fichier.in") 256 except OSError: 257 pass # safe 258 try: 259 os.remove("cmdline.txt") 260 except OSError: 261 pass # safe
262
263 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
264 opts = {} 265 opts["Dememorization"]=dememorization 266 opts["BatchNumber"]=batches 267 opts["BatchLength"]=iterations 268 if enum_test is not None: 269 if enum_test is True: 270 opts["HWtests"]="Enumeration" 271 else: 272 opts["HWtests"]="MCMC" 273 return opts
274
275 - def _run_genepop(self, extensions, option, fname, opts={}):
276 for extension in extensions: 277 self._remove_garbage(fname + extension) 278 self.controller.set_menu(option) 279 self.controller.set_input(fname) 280 for opt in opts: 281 self.controller.set_parameter(opt, opt+"="+str(opts[opt])) 282 self.controller() # checks error level is zero 283 self._remove_garbage(None) 284 return
285
286 - def _test_pop_hz_both(self, fname, type, ext, enum_test = True, 287 dememorization = 10000, batches = 20, 288 iterations = 5000):
289 """Hardy-Weinberg test for heterozygote deficiency/excess. 290 291 Returns a population iterator containg 292 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 293 Some loci have a None if the info is not available 294 SE might be none (for enumerations) 295 """ 296 opts = self._get_opts(dememorization, batches, iterations, enum_test) 297 self._run_genepop([ext], [1, type], fname, opts) 298 299 def hw_func(self): 300 return _hw_func(self.stream, False)
301 302 return _FileIterator(hw_func, fname + ext)
303
304 - def _test_global_hz_both(self, fname, type, ext, enum_test = True, 305 dememorization = 10000, batches = 20, 306 iterations = 5000):
307 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 308 309 Returns a triple with: 310 A list per population containg 311 (pop_name, P-val, SE, switches) 312 Some pops have a None if the info is not available 313 SE might be none (for enumerations) 314 A list per loci containg 315 (locus_name, P-val, SE, switches) 316 Some loci have a None if the info is not available 317 SE might be none (for enumerations) 318 Overall results (P-val, SE, switches) 319 320 """ 321 opts = self._get_opts(dememorization, batches, iterations, enum_test) 322 self._run_genepop([ext], [1, type], fname, opts) 323 324 def hw_pop_func(self): 325 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
326 327 with open(fname + ext) as f1: 328 l = f1.readline() 329 while "by population" not in l: 330 l = f1.readline() 331 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 332 with open(fname + ext) as f2: 333 l = f2.readline() 334 while "by locus" not in l: 335 l = f2.readline() 336 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 337 with open(fname + ext) as f: 338 l = f.readline() 339 while "all locus" not in l: 340 l = f.readline() 341 f.readline() 342 f.readline() 343 f.readline() 344 f.readline() 345 l = f.readline().rstrip() 346 p, se, switches = tuple(_gp_float(x) for x in [y for y in l.split(" ") if y != ""]) 347 return pop_p, loc_p, (p, se, switches) 348 349 #1.1
350 - def test_pop_hz_deficiency(self, fname, enum_test = True, 351 dememorization = 10000, batches = 20, 352 iterations = 5000):
353 """Hardy-Weinberg test for heterozygote deficiency. 354 355 Returns a population iterator containg 356 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 357 Some loci have a None if the info is not available 358 SE might be none (for enumerations) 359 """ 360 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 361 dememorization, batches, iterations)
362 363 #1.2
364 - def test_pop_hz_excess(self, fname, enum_test = True, 365 dememorization = 10000, batches = 20, 366 iterations = 5000):
367 """Hardy-Weinberg test for heterozygote deficiency. 368 369 Returns a population iterator containg 370 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 371 Some loci have a None if the info is not available 372 SE might be none (for enumerations) 373 """ 374 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 375 dememorization, batches, iterations)
376 377 #1.3 P file
378 - def test_pop_hz_prob(self, fname, ext, enum_test = False, 379 dememorization = 10000, batches = 20, 380 iterations = 5000):
381 """Hardy-Weinberg test based on probability. 382 383 Returns 2 iterators and a final tuple: 384 385 1. Returns a loci iterator containing 386 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 387 Some pops have a None if the info is not available 388 SE might be none (for enumerations) 389 c. Result of Fisher's test (Chi2, deg freedom, prob) 390 2. Returns a population iterator containg 391 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 392 Some loci have a None if the info is not available 393 SE might be none (for enumerations) 394 b. Result of Fisher's test (Chi2, deg freedom, prob) 395 3. (Chi2, deg freedom, prob) 396 """ 397 opts = self._get_opts(dememorization, batches, iterations, enum_test) 398 self._run_genepop([ext], [1, 3], fname, opts) 399 400 def hw_prob_loci_func(self): 401 return _hw_func(self.stream, True, True)
402 403 def hw_prob_pop_func(self): 404 return _hw_func(self.stream, False, True) 405 406 shutil.copyfile(fname+".P", fname+".P2") 407 408 return _FileIterator(hw_prob_loci_func, fname + ".P"), _FileIterator(hw_prob_pop_func, fname + ".P2") 409 410 #1.4
411 - def test_global_hz_deficiency(self, fname, enum_test = True, 412 dememorization = 10000, batches = 20, 413 iterations = 5000):
414 """Global Hardy-Weinberg test for heterozygote deficiency. 415 416 Returns a triple with: 417 An list per population containg 418 (pop_name, P-val, SE, switches) 419 Some pops have a None if the info is not available 420 SE might be none (for enumerations) 421 An list per loci containg 422 (locus_name, P-val, SE, switches) 423 Some loci have a None if the info is not available 424 SE might be none (for enumerations) 425 Overall results (P-val, SE, switches) 426 """ 427 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 428 dememorization, batches, iterations)
429 430 #1.5
431 - def test_global_hz_excess(self, fname, enum_test = True, 432 dememorization = 10000, batches = 20, 433 iterations = 5000):
434 """Global Hardy-Weinberg test for heterozygote excess. 435 436 Returns a triple with: 437 An list per population containg 438 (pop_name, P-val, SE, switches) 439 Some pops have a None if the info is not available 440 SE might be none (for enumerations) 441 An list per loci containg 442 (locus_name, P-val, SE, switches) 443 Some loci have a None if the info is not available 444 SE might be none (for enumerations) 445 Overall results (P-val, SE, switches) 446 """ 447 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 448 dememorization, batches, iterations)
449 450 #2.1
451 - def test_ld(self, fname, 452 dememorization = 10000, batches = 20, iterations = 5000):
453 opts = self._get_opts(dememorization, batches, iterations) 454 self._run_genepop([".DIS"], [2, 1], fname, opts) 455 456 def ld_pop_func(self): 457 current_pop = None 458 l = self.stream.readline().rstrip() 459 if l == "": 460 self.done = True 461 raise StopIteration 462 toks = [x for x in l.split(" ") if x != ""] 463 pop, locus1, locus2 = toks[0], toks[1], toks[2] 464 if not hasattr(self, "start_locus1"): 465 start_locus1, start_locus2 = locus1, locus2 466 current_pop = -1 467 if locus1 == start_locus1 and locus2 == start_locus2: 468 current_pop += 1 469 if toks[3] == "No": 470 return current_pop, pop, (locus1, locus2), None 471 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 472 return current_pop, pop, (locus1, locus2), (p, se, switches)
473 474 def ld_func(self): 475 l = self.stream.readline().rstrip() 476 if l == "": 477 self.done = True 478 raise StopIteration 479 toks = [x for x in l.split(" ") if x != ""] 480 locus1, locus2 = toks[0], toks[2] 481 try: 482 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 483 except ValueError: 484 return (locus1, locus2), None 485 return (locus1, locus2), (chi2, df, p) 486 487 f1 = open(fname + ".DIS") 488 l = f1.readline() 489 while "----" not in l: 490 l = f1.readline() 491 shutil.copyfile(fname + ".DIS", fname + ".DI2") 492 f2 = open(fname + ".DI2") 493 l = f2.readline() 494 while "Locus pair" not in l: 495 l = f2.readline() 496 while "----" not in l: 497 l = f2.readline() 498 return _FileIterator(ld_pop_func, fname+".DIS", f1), _FileIterator(ld_func, fname + ".DI2", f2) 499 500 #2.2
501 - def create_contingency_tables(self, fname):
502 raise NotImplementedError
503 504 #3.1 PR/GE files
505 - def test_genic_diff_all(self, fname, 506 dememorization = 10000, batches = 20, iterations = 5000):
507 raise NotImplementedError
508 509 #3.2 PR2/GE2 files
510 - def test_genic_diff_pair(self, fname, 511 dememorization = 10000, batches = 20, iterations = 5000):
512 raise NotImplementedError
513 514 #3.3 G files
515 - def test_genotypic_diff_all(self, fname, 516 dememorization = 10000, batches = 20, iterations = 5000):
517 raise NotImplementedError
518 519 #3.4 2G2 files
520 - def test_genotypic_diff_pair(self, fname, 521 dememorization = 10000, batches = 20, iterations = 5000):
522 raise NotImplementedError
523 524 #4
525 - def estimate_nm(self, fname):
526 self._run_genepop(["PRI"], [4], fname) 527 with open(fname + ".PRI") as f: 528 lines = f.readlines() # Small file, it is ok 529 for line in lines: 530 m = re.search("Mean sample size: ([.0-9]+)", line) 531 if m is not None: 532 mean_sample_size = _gp_float(m.group(1)) 533 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 534 if m is not None: 535 mean_priv_alleles = _gp_float(m.group(1)) 536 m = re.search("N=10: ([.0-9]+)", line) 537 if m is not None: 538 mig10 = _gp_float(m.group(1)) 539 m = re.search("N=25: ([.0-9]+)", line) 540 if m is not None: 541 mig25 = _gp_float(m.group(1)) 542 m = re.search("N=50: ([.0-9]+)", line) 543 if m is not None: 544 mig50 = _gp_float(m.group(1)) 545 m = re.search("for size= ([.0-9]+)", line) 546 if m is not None: 547 mig_corrected = _gp_float(m.group(1)) 548 os.remove(fname + ".PRI") 549 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
550 551 #5.1
552 - def calc_allele_genotype_freqs(self, fname):
553 """Calculates allele and genotype frequencies per locus and per sample. 554 555 Parameters: 556 fname - file name 557 558 Returns tuple with 2 elements: 559 Population iterator with 560 population name 561 Locus dictionary with key = locus name and content tuple as 562 Genotype List with 563 (Allele1, Allele2, observed, expected) 564 (expected homozygotes, observed hm, 565 expected heterozygotes, observed ht) 566 Allele frequency/Fis dictionary with allele as key and 567 (count, frequency, Fis Weir & Cockerham) 568 Totals as a pair 569 count 570 Fis Weir & Cockerham, 571 Fis Robertson & Hill 572 Locus iterator with 573 Locus name 574 allele list 575 Population list with a triple 576 population name 577 list of allele frequencies in the same order as allele list above 578 number of genes 579 580 Will create a file called fname.INF 581 """ 582 self._run_genepop(["INF"], [5, 1], fname) 583 #First pass, general information 584 #num_loci = None 585 #num_pops = None 586 #with open(fname + ".INF") as f: 587 #l = f.readline() 588 #while (num_loci is None or num_pops is None) and l != '': 589 #m = re.search("Number of populations detected : ([0-9+])", l) 590 #if m is not None: 591 #num_pops = _gp_int(m.group(1)) 592 #m = re.search("Number of loci detected : ([0-9+])", l) 593 #if m is not None: 594 #num_loci = _gp_int(m.group(1)) 595 #l = f.readline() 596 597 def pop_parser(self): 598 if hasattr(self, "old_line"): 599 l = self.old_line 600 del self.old_line 601 else: 602 l = self.stream.readline() 603 loci_content = {} 604 while l != '': 605 l = l.rstrip() 606 if "Tables of allelic frequencies for each locus" in l: 607 return self.curr_pop, loci_content 608 match = re.match(".*Pop: (.+) Locus: (.+)", l) 609 if match is not None: 610 pop = match.group(1) 611 locus = match.group(2) 612 if not hasattr(self, "first_locus"): 613 self.first_locus = locus 614 if hasattr(self, "curr_pop"): 615 if self.first_locus == locus: 616 old_pop = self.curr_pop 617 #self.curr_pop = pop 618 self.old_line = l 619 del self.first_locus 620 del self.curr_pop 621 return old_pop, loci_content 622 self.curr_pop = pop 623 else: 624 l = self.stream.readline() 625 continue 626 geno_list = [] 627 l = self.stream.readline() 628 if "No data" in l: 629 continue 630 631 while "Genotypes Obs." not in l: 632 l = self.stream.readline() 633 634 while l != "\n": 635 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", l) 636 if m2 is not None: 637 geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)), 638 _gp_int(m2.group(3)), _gp_float(m2.group(4)))) 639 else: 640 l = self.stream.readline() 641 continue 642 l = self.stream.readline() 643 644 while "Expected number of ho" not in l: 645 l = self.stream.readline() 646 expHo = _gp_float(l[38:]) 647 l = self.stream.readline() 648 obsHo = _gp_int(l[38:]) 649 l = self.stream.readline() 650 expHe = _gp_float(l[38:]) 651 l = self.stream.readline() 652 obsHe = _gp_int(l[38:]) 653 l = self.stream.readline() 654 655 while "Sample count" not in l: 656 l = self.stream.readline() 657 l = self.stream.readline() 658 freq_fis={} 659 overall_fis = None 660 while "----" not in l: 661 vals = [x for x in l.rstrip().split(' ') if x!=''] 662 if vals[0]=="Tot": 663 overall_fis = _gp_int(vals[1]), \ 664 _gp_float(vals[2]), _gp_float(vals[3]) 665 else: 666 freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \ 667 _gp_float(vals[2]), _gp_float(vals[3]) 668 l = self.stream.readline() 669 loci_content[locus] = geno_list, \ 670 (expHo, obsHo, expHe, obsHe), \ 671 freq_fis, overall_fis 672 self.done = True 673 raise StopIteration
674 675 def locus_parser(self): 676 l = self.stream.readline() 677 while l != "": 678 l = l.rstrip() 679 match = re.match(" Locus: (.+)", l) 680 if match is not None: 681 locus = match.group(1) 682 alleles, table = _read_allele_freq_table(self.stream) 683 return locus, alleles, table 684 l = self.stream.readline() 685 self.done = True 686 raise StopIteration 687 688 shutil.copyfile(fname + ".INF", fname + ".IN2") 689 pop_iter = _FileIterator(pop_parser, fname + ".INF") 690 locus_iter = _FileIterator(locus_parser, fname + ".IN2") 691 return (pop_iter, locus_iter) 692
693 - def _calc_diversities_fis(self, fname, ext):
694 self._run_genepop([ext], [5, 2], fname) 695 with open(fname + ext) as f: 696 l = f.readline() 697 while l != "": 698 l = l.rstrip() 699 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 700 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 701 avg_Qintra = _read_table(f, [str, _gp_float]) 702 l = f.readline() 703 704 def fis_func(self): 705 l = self.stream.readline() 706 while l != "": 707 l = l.rstrip() 708 m = re.search("Locus: (.+)", l) 709 if m is not None: 710 locus = m.group(1) 711 self.stream.readline() 712 if "No complete" in self.stream.readline(): 713 return locus, None 714 self.stream.readline() 715 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 716 self.stream.readline() 717 avg_qinter, avg_fis = tuple(_gp_float(x) for x in 718 [y for y in self.stream.readline().split(" ") if y != ""]) 719 return locus, fis_table, avg_qinter, avg_fis 720 l = self.stream.readline() 721 self.done = True 722 raise StopIteration
723 724 return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra 725 726 #5.2
727 - def calc_diversities_fis_with_identity(self, fname):
728 return self._calc_diversities_fis(fname, ".DIV")
729 730 #5.3
731 - def calc_diversities_fis_with_size(self, fname):
732 raise NotImplementedError
733 734 #6.1 Less genotype frequencies
735 - def calc_fst_all(self, fname):
736 """Executes GenePop and gets Fst/Fis/Fit (all populations) 737 738 Parameters: 739 fname - file name 740 741 Returns: 742 (multiLocusFis, multiLocusFst, multiLocus Fit), 743 Iterator of tuples 744 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 745 746 Will create a file called fname.FST . 747 748 This does not return the genotype frequencies. 749 """ 750 self._run_genepop([".FST"], [6, 1], fname) 751 with open(fname + ".FST") as f: 752 l = f.readline() 753 while l != '': 754 if l.startswith(' All:'): 755 toks = [x for x in l.rstrip().split(' ') if x != ""] 756 try: 757 allFis = _gp_float(toks[1]) 758 except ValueError: 759 allFis = None 760 try: 761 allFst = _gp_float(toks[2]) 762 except ValueError: 763 allFst = None 764 try: 765 allFit = _gp_float(toks[3]) 766 except ValueError: 767 allFit = None 768 l = f.readline() 769 770 def proc(self): 771 if hasattr(self, "last_line"): 772 l = self.last_line 773 del self.last_line 774 else: 775 l = self.stream.readline() 776 locus = None 777 fis = None 778 fst = None 779 fit = None 780 qintra = None 781 qinter = None 782 while l != '': 783 l = l.rstrip() 784 if l.startswith(' Locus:'): 785 if locus is not None: 786 self.last_line = l 787 return locus, fis, fst, fit, qintra, qinter 788 else: 789 locus = l.split(':')[1].lstrip() 790 elif l.startswith('Fis^='): 791 fis = _gp_float(l.split(' ')[1]) 792 elif l.startswith('Fst^='): 793 fst = _gp_float(l.split(' ')[1]) 794 elif l.startswith('Fit^='): 795 fit = _gp_float(l.split(' ')[1]) 796 elif l.startswith('1-Qintra^='): 797 qintra = _gp_float(l.split(' ')[1]) 798 elif l.startswith('1-Qinter^='): 799 qinter = _gp_float(l.split(' ')[1]) 800 return locus, fis, fst, fit, qintra, qinter 801 l = self.stream.readline() 802 if locus is not None: 803 return locus, fis, fst, fit, qintra, qinter 804 self.stream.close() 805 self.done = True 806 raise StopIteration
807 return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST") 808 809 #6.2
810 - def calc_fst_pair(self, fname):
811 self._run_genepop([".ST2", ".MIG"], [6, 2], fname) 812 with open(fname + ".ST2") as f: 813 l = f.readline() 814 while l != "": 815 l = l.rstrip() 816 if l.startswith("Estimates for all loci"): 817 avg_fst = _read_headed_triangle_matrix(f) 818 l = f.readline() 819 820 def loci_func(self): 821 l = self.stream.readline() 822 while l != "": 823 l = l.rstrip() 824 m = re.search(" Locus: (.+)", l) 825 if m is not None: 826 locus = m.group(1) 827 matrix = _read_headed_triangle_matrix(self.stream) 828 return locus, matrix 829 l = self.stream.readline() 830 self.done = True 831 raise StopIteration
832 833 os.remove(fname + ".MIG") 834 return _FileIterator(loci_func, fname + ".ST2"), avg_fst 835 836 #6.3
837 - def calc_rho_all(self, fname):
838 raise NotImplementedError
839 840 #6.4
841 - def calc_rho_pair(self, fname):
842 raise NotImplementedError
843
844 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
845 """Calculates isolation by distance statistics 846 """ 847 self._run_genepop([".GRA", ".MIG", ".ISO"], [6, sub], 848 fname, opts = { 849 "MinimalDistance": min_dist, 850 "GeographicScale": scale, 851 "IsolBDstatistic": stat, 852 }) 853 with open(fname + ".ISO") as f: 854 f.readline() 855 f.readline() 856 f.readline() 857 f.readline() 858 estimate = _read_triangle_matrix(f) 859 f.readline() 860 f.readline() 861 distance = _read_triangle_matrix(f) 862 f.readline() 863 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 864 a = _gp_float(match.group(1)) 865 b = _gp_float(match.group(2)) 866 f.readline() 867 f.readline() 868 match = re.match(" b=(.+)", f.readline().rstrip()) 869 bb = _gp_float(match.group(1)) 870 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 871 bblow = _gp_float(match.group(1)) 872 bbhigh = _gp_float(match.group(2)) 873 os.remove(fname + ".MIG") 874 os.remove(fname + ".GRA") 875 os.remove(fname + ".ISO") 876 return estimate, distance, (a, b), (bb, bblow, bbhigh)
877 878 #6.5
879 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
880 """Calculates isolation by distance statistics for diploid data. 881 882 See _calc_ibd for parameter details. 883 Note that each pop can only have a single individual and 884 the individual name has to be the sample coordinates. 885 """ 886 return self._calc_ibd(fname, 5, stat, scale, min_dist)
887 888 #6.6
889 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
890 """Calculates isolation by distance statistics for haploid data. 891 892 See _calc_ibd for parameter details. 893 Note that each pop can only have a single individual and 894 the individual name has to be the sample coordinates. 895 """ 896 return self._calc_ibd(fname, 6, stat, scale, min_dist)
897