Package Bio :: Package Phylo :: Package PAML :: Module baseml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PAML.baseml

  1  # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  import os 
  7  import os.path 
  8  from ._paml import Paml 
  9  from . import _parse_baseml 
 10   
 11   
12 -class BasemlError(EnvironmentError):
13 """BASEML failed. Run with verbose=True to view BASEML's error message."""
14 15
16 -class Baseml(Paml):
17 """An interface to BASEML, part of the PAML package.""" 18
19 - def __init__(self, alignment=None, tree=None, working_dir=None, 20 out_file=None):
21 """Initialize the Baseml instance. 22 23 The user may optionally pass in strings specifying the locations 24 of the input alignment and tree files, the working directory and 25 the final output file. 26 """ 27 Paml.__init__(self, alignment, working_dir, out_file) 28 if tree is not None: 29 if not os.path.exists(tree): 30 raise IOError("The specified tree file does not exist.") 31 self.tree = tree 32 self.ctl_file = "baseml.ctl" 33 self._options = {"noisy": None, 34 "verbose": None, 35 "runmode": None, 36 "model": None, 37 "model_options": None, 38 "Mgene": None, 39 "ndata": None, 40 "clock": None, 41 "fix_kappa": None, 42 "kappa": None, 43 "fix_alpha": None, 44 "alpha": None, 45 "Malpha": None, 46 "ncatG": None, 47 "fix_rho": None, 48 "rho": None, 49 "nparK": None, 50 "nhomo": None, 51 "getSE": None, 52 "RateAncestor": None, 53 "Small_Diff": None, 54 "cleandata": None, 55 "icode": None, 56 "fix_blength": None, 57 "method": None}
58
59 - def write_ctl_file(self):
60 """Dynamically build a BASEML control file from the options. 61 62 The control file is written to the location specified by the 63 ctl_file property of the baseml class. 64 """ 65 # Make sure all paths are relative to the working directory 66 self._set_rel_paths() 67 if True: # Dummy statement to preserve indentation for diff 68 with open(self.ctl_file, 'w') as ctl_handle: 69 ctl_handle.write("seqfile = %s\n" % self._rel_alignment) 70 ctl_handle.write("outfile = %s\n" % self._rel_out_file) 71 ctl_handle.write("treefile = %s\n" % self._rel_tree) 72 for option in self._options.items(): 73 if option[1] is None: 74 # If an option has a value of None, there's no need 75 # to write it in the control file; it's normally just 76 # commented out. 77 continue 78 if option[0] == "model_options": 79 continue 80 # If "model" is 9 or 10, it may be followed in the cotnrol 81 # file by further options such as 82 # [1 (TC CT AG GA)] 83 # or 84 # [5 (AC CA) (AG GA) (AT TA) (CG GC) (CT TC)] 85 # which are to be stored in "model_options" as a string. 86 if option[0] == "model" and option[1] in [9, 10]: 87 if self._options["model_options"] is not None: 88 ctl_handle.write("model = %s %s" % (option[1], 89 self._options["model_options"])) 90 continue 91 ctl_handle.write("%s = %s\n" % (option[0], option[1]))
92
93 - def read_ctl_file(self, ctl_file):
94 """Parse a control file and load the options into the Baseml instance.""" 95 temp_options = {} 96 if not os.path.isfile(ctl_file): 97 raise IOError("File not found: %r" % ctl_file) 98 else: 99 with open(ctl_file) as ctl_handle: 100 for line in ctl_handle: 101 line = line.strip() 102 uncommented = line.split("*", 1)[0] 103 if uncommented != "": 104 if "=" not in uncommented: 105 raise AttributeError( 106 "Malformed line in control file:\n%r" % line) 107 (option, value) = uncommented.split("=") 108 option = option.strip() 109 value = value.strip() 110 if option == "seqfile": 111 self.alignment = value 112 elif option == "treefile": 113 self.tree = value 114 elif option == "outfile": 115 self.out_file = value 116 elif option not in self._options: 117 raise KeyError("Invalid option: %s" % option) 118 elif option == "model": 119 if len(value) <= 2 and value.isdigit(): 120 temp_options["model"] = int(value) 121 temp_options["model_options"] = None 122 else: 123 model_num = value.partition(" ")[0] 124 model_opt = value.partition(" ")[2].strip() 125 temp_options["model"] = int(model_num) 126 temp_options["model_options"] = model_opt 127 else: 128 if "." in value or "e-" in value: 129 try: 130 converted_value = float(value) 131 except ValueError: 132 converted_value = value 133 else: 134 try: 135 converted_value = int(value) 136 except ValueError: 137 converted_value = value 138 temp_options[option] = converted_value 139 for option in self._options: 140 if option in temp_options: 141 self._options[option] = temp_options[option] 142 else: 143 self._options[option] = None
144
145 - def _set_rel_paths(self):
146 """Convert all file/directory locations to paths relative to the current 147 working directory. 148 149 BASEML requires that all paths specified in the control file be 150 relative to the directory from which it is called rather than 151 absolute paths. 152 """ 153 Paml._set_rel_paths(self) 154 if self.tree is not None: 155 self._rel_tree = os.path.relpath(self.tree, self.working_dir)
156
157 - def run(self, ctl_file=None, verbose=False, command="baseml", 158 parse=True):
159 """Run baseml using the current configuration and then parse the results. 160 161 Return a process signal so the user can determine if 162 the execution was successful (return code 0 is successful, -N 163 indicates a failure). The arguments may be passed as either 164 absolute or relative paths, despite the fact that BASEML 165 requires relative paths. 166 """ 167 if self.tree is None: 168 raise ValueError("Tree file not specified.") 169 if not os.path.exists(self.tree): 170 raise IOError("The specified tree file does not exist.") 171 Paml.run(self, ctl_file, verbose, command) 172 if parse: 173 results = read(self.out_file) 174 else: 175 results = None 176 return results
177 178
179 -def read(results_file):
180 """Parse a BASEML results file.""" 181 results = {} 182 if not os.path.exists(results_file): 183 raise IOError("Results file does not exist.") 184 with open(results_file) as handle: 185 lines = handle.readlines() 186 (results, num_params) = _parse_baseml.parse_basics(lines, results) 187 results = _parse_baseml.parse_parameters(lines, results, num_params) 188 if results.get("version") is None: 189 raise ValueError("Invalid results file") 190 return results
191