Package Bio :: Package Wise
[hide private]
[frames] | no frames]

Source Code for Package Bio.Wise

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  """ 
  7  Bio.Wise contains modules for running and processing the output of 
  8  some of the models in the Wise2 package by Ewan Birney available from: 
  9  ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
 10  http://www.ebi.ac.uk/Wise2/ 
 11   
 12  Bio.Wise.psw is for protein Smith-Waterman alignments 
 13  Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 14  """ 
 15   
 16  from __future__ import print_function 
 17   
 18  import os 
 19  import sys 
 20  import tempfile 
 21   
 22  from Bio import SeqIO 
 23   
 24   
25 -def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
26 """Helper function to build a command line string (PRIVATE). 27 28 >>> os.environ["WISE_KBYTE"]="300000" 29 >>> if os.isatty(sys.stderr.fileno()): 30 ... c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), 31 ... "/tmp/output", kbyte=100000) 32 ... assert c == 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output', c 33 ... c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), 34 ... "/tmp/output_aa") 35 ... assert c == 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa', c 36 ... else: 37 ... c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), 38 ... "/tmp/output", kbyte=100000) 39 ... assert c == 'dnal -kbyte 100000 -quiet seq1.fna seq2.fna > /tmp/output', c 40 ... c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), 41 ... "/tmp/output_aa") 42 ... assert c == 'psw -kbyte 300000 -quiet seq1.faa seq2.faa > /tmp/output_aa', c 43 44 """ 45 cmdline = cmdline[:] 46 47 # XXX: force_type ignored 48 49 if kbyte is None: 50 try: 51 cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"])) 52 except KeyError: 53 pass 54 else: 55 cmdline.extend(("-kbyte", str(kbyte))) 56 57 if not os.isatty(sys.stderr.fileno()): 58 cmdline.append("-quiet") 59 60 cmdline.extend(pair) 61 cmdline.extend((">", output_filename)) 62 if quiet: 63 cmdline.extend(("2>", "/dev/null")) 64 cmdline_str = ' '.join(cmdline) 65 66 return cmdline_str
67 68
69 -def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False):
70 """ 71 Returns a filehandle 72 """ 73 if not pair or len(pair) != 2: 74 raise ValueError("Expected pair of filename, not %s" % repr(pair)) 75 76 output_file = tempfile.NamedTemporaryFile(mode='r') 77 input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w") 78 79 if dry_run: 80 print(_build_align_cmdline(cmdline, 81 pair, 82 output_file.name, 83 kbyte, 84 force_type, 85 quiet)) 86 return 87 88 for filename, input_file in zip(pair, input_files): 89 # Pipe the file through Biopython's Fasta parser/writer 90 # to make sure it conforms to the Fasta standard (in particular, 91 # Wise2 may choke on long lines in the Fasta file) 92 records = SeqIO.parse(open(filename), 'fasta') 93 SeqIO.write(records, input_file, 'fasta') 94 input_file.flush() 95 96 input_file_names = [input_file.name for input_file in input_files] 97 98 cmdline_str = _build_align_cmdline(cmdline, 99 input_file_names, 100 output_file.name, 101 kbyte, 102 force_type, 103 quiet) 104 105 if debug: 106 sys.stderr.write("%s\n" % cmdline_str) 107 108 status = os.system(cmdline_str) >> 8 109 110 if status > 1: 111 if kbyte != 0: # possible memory problem; could be None 112 sys.stderr.write("INFO trying again with the linear model\n") 113 return align(cmdline, pair, 0, force_type, dry_run, quiet, debug) 114 else: 115 raise OSError("%s returned %s" % (" ".join(cmdline), status)) 116 117 return output_file
118 119
120 -def all_pairs(singles):
121 """ 122 Generate pairs list for all-against-all alignments 123 124 >>> all_pairs(range(4)) 125 [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)] 126 """ 127 pairs = [] 128 129 singles = list(singles) 130 while singles: 131 suitor = singles.pop(0) # if sorted, stay sorted 132 pairs.extend((suitor, single) for single in singles) 133 134 return pairs
135 136
137 -def main():
138 pass
139 140
141 -def _test(*args, **keywds):
142 import doctest 143 doctest.testmod(sys.modules[__name__], *args, **keywds)
144 145 if __name__ == "__main__": 146 if __debug__: 147 _test() 148 main() 149