Package Bio :: Package Wise
[hide private]
[frames] | no frames]

Source Code for Package Bio.Wise

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  """Run and process output from the Wise2 package tools. 
  7   
  8  Bio.Wise contains modules for running and processing the output of 
  9  some of the models in the Wise2 package by Ewan Birney available from: 
 10  ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
 11  http://www.ebi.ac.uk/Wise2/ 
 12   
 13  Bio.Wise.psw is for protein Smith-Waterman alignments 
 14  Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 15  """ 
 16   
 17  from __future__ import print_function 
 18   
 19  import os 
 20  import sys 
 21  import tempfile 
 22   
 23  from Bio import SeqIO 
 24   
 25   
26 -def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
27 """Helper function to build a command line string (PRIVATE). 28 29 >>> os.environ["WISE_KBYTE"]="300000" 30 >>> if os.isatty(sys.stderr.fileno()): 31 ... c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), 32 ... "/tmp/output", kbyte=100000) 33 ... assert c == 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output', c 34 ... c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), 35 ... "/tmp/output_aa") 36 ... assert c == 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa', c 37 ... else: 38 ... c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), 39 ... "/tmp/output", kbyte=100000) 40 ... assert c == 'dnal -kbyte 100000 -quiet seq1.fna seq2.fna > /tmp/output', c 41 ... c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), 42 ... "/tmp/output_aa") 43 ... assert c == 'psw -kbyte 300000 -quiet seq1.faa seq2.faa > /tmp/output_aa', c 44 45 """ 46 cmdline = cmdline[:] 47 48 # XXX: force_type ignored 49 50 if kbyte is None: 51 try: 52 cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"])) 53 except KeyError: 54 pass 55 else: 56 cmdline.extend(("-kbyte", str(kbyte))) 57 58 if not os.isatty(sys.stderr.fileno()): 59 cmdline.append("-quiet") 60 61 cmdline.extend(pair) 62 cmdline.extend((">", output_filename)) 63 if quiet: 64 cmdline.extend(("2>", "/dev/null")) 65 cmdline_str = ' '.join(cmdline) 66 67 return cmdline_str
68 69
70 -def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False):
71 """Run an alignment. Returns a filehandle.""" 72 if not pair or len(pair) != 2: 73 raise ValueError("Expected pair of filename, not %s" % repr(pair)) 74 75 output_file = tempfile.NamedTemporaryFile(mode='r') 76 input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w") 77 78 if dry_run: 79 print(_build_align_cmdline(cmdline, 80 pair, 81 output_file.name, 82 kbyte, 83 force_type, 84 quiet)) 85 return 86 87 for filename, input_file in zip(pair, input_files): 88 # Pipe the file through Biopython's Fasta parser/writer 89 # to make sure it conforms to the Fasta standard (in particular, 90 # Wise2 may choke on long lines in the Fasta file) 91 records = SeqIO.parse(open(filename), 'fasta') 92 SeqIO.write(records, input_file, 'fasta') 93 input_file.flush() 94 95 input_file_names = [input_file.name for input_file in input_files] 96 97 cmdline_str = _build_align_cmdline(cmdline, 98 input_file_names, 99 output_file.name, 100 kbyte, 101 force_type, 102 quiet) 103 104 if debug: 105 sys.stderr.write("%s\n" % cmdline_str) 106 107 status = os.system(cmdline_str) >> 8 108 109 if status > 1: 110 if kbyte != 0: # possible memory problem; could be None 111 sys.stderr.write("INFO trying again with the linear model\n") 112 return align(cmdline, pair, 0, force_type, dry_run, quiet, debug) 113 else: 114 raise OSError("%s returned %s" % (" ".join(cmdline), status)) 115 116 return output_file
117 118
119 -def all_pairs(singles):
120 """Generate pairs list for all-against-all alignments. 121 122 >>> all_pairs(range(4)) 123 [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)] 124 """ 125 pairs = [] 126 127 singles = list(singles) 128 while singles: 129 suitor = singles.pop(0) # if sorted, stay sorted 130 pairs.extend((suitor, single) for single in singles) 131 132 return pairs
133 134
135 -def main():
136 pass
137 138
139 -def _test(*args, **keywds):
140 import doctest 141 doctest.testmod(sys.modules[__name__], *args, **keywds)
142 143 144 if __name__ == "__main__": 145 if __debug__: 146 _test() 147 main() 148