1
2
3
4
5
6
7 """General mechanisms to access applications in Biopython.
8
9 This module is not intended for direct use. It provides the basic objects which
10 are subclassed by our command line wrappers, such as:
11
12 - Bio.Align.Applications
13 - Bio.Blast.Applications
14 - Bio.Emboss.Applications
15 - Bio.Sequencing.Applications
16
17 These modules provide wrapper classes for command line tools to help you
18 construct command line strings by setting the values of each parameter.
19 The finished command line strings are then normally invoked via the built-in
20 Python module subprocess.
21
22 This module also includes some deprecated functionality (function generic_run
23 and class ApplicationResult) which should not be used anymore.
24 """
25 import os, sys
26 import StringIO
27 import subprocess
28 import re
29
30 from Bio import File
31
32
33
34 _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
35 assert _re_prop_name.match("t")
36 assert _re_prop_name.match("test")
37 assert _re_prop_name.match("_test") is None
38 assert _re_prop_name.match("-test") is None
39 assert _re_prop_name.match("test_name")
40 assert _re_prop_name.match("test2")
41
42 _reserved_names = ["and", "del", "from", "not", "while", "as", "elif",
43 "global", "or", "with", "assert", "else", "if", "pass",
44 "yield", "break", "except", "import", "print", "class",
45 "exec", "in", "raise", "continue", "finally", "is",
46 "return", "def", "for", "lambda", "try"]
47
48 _local_reserved_names = ["set_parameter"]
49
51 """Run an application with the given commandline (DEPRECATED).
52
53 This expects a pre-built commandline that derives from
54 AbstractCommandline, and returns a ApplicationResult object
55 to get results from a program, along with handles of the
56 standard output and standard error.
57
58 WARNING - This will read in the full program output into memory!
59 This may be in issue when the program writes a large amount of
60 data to standard output.
61
62 NOTE - This function is considered to be obsolete, and we intend to
63 deprecate it and then remove it in future releases of Biopython.
64 We now recommend you invoke subprocess directly, using str(commandline)
65 to turn an AbstractCommandline wrapper into a command line string. This
66 will give you full control of the tool's input and output as well.
67 """
68 import warnings
69 warnings.warn("Bio.Application.generic_run and the associated "
70 "Bio.Application.ApplicationResult are deprecated. "
71 "Please use the Bio.Application based wrappers with "
72 "the built in Python module subprocess instead, as "
73 "described in the Biopython Tutorial.",
74 DeprecationWarning)
75
76
77
78
79 child = subprocess.Popen(str(commandline),
80 stdin=subprocess.PIPE,
81 stdout=subprocess.PIPE,
82 stderr=subprocess.PIPE,
83 universal_newlines=True,
84 shell=(sys.platform!="win32"))
85
86 r_out, e_out = child.communicate()
87
88 error_code = child.returncode
89 return ApplicationResult(commandline, error_code), \
90 File.UndoHandle(StringIO.StringIO(r_out)), \
91 File.UndoHandle(StringIO.StringIO(e_out))
92
94 """Make results of a program available through a standard interface (DEPRECATED).
95
96 This tries to pick up output information available from the program
97 and make it available programmatically.
98
99 NOTE - This class hase been deprecated and we intend to remove it in
100 a future release of Biopython.
101 """
102 - def __init__(self, application_cl, return_code):
103 """Intialize with the commandline from the program.
104 """
105 import warnings
106 warnings.warn("Bio.Application.ApplicationResult and the "
107 "associated function Bio.Application.generic_run "
108 "are deprecated. Please use the Bio.Application "
109 "based wrappers with the built in Python module "
110 "subprocess instead, as described in the Biopython "
111 "Tutorial.", DeprecationWarning)
112 self._cl = application_cl
113
114
115 self.return_code = return_code
116
117
118
119 self._results = {}
120
121 for parameter in self._cl.parameters:
122 if "file" in parameter.param_types and \
123 "output" in parameter.param_types:
124 if parameter.is_set:
125 self._results[parameter.names[-1]] = parameter.value
126
128 """Retrieve result information for the given output.
129
130 Supports any of the defined parameters aliases (assuming the
131 parameter is defined as an output).
132 """
133 try:
134 return self._results[output_name]
135 except KeyError, err:
136
137 for parameter in self._cl.parameters:
138 if output_name in parameter.names:
139 return self._results[parameter.names[-1]]
140
141 raise err
142
144 """Retrieve a list of all available results.
145 """
146 result_names = self._results.keys()
147 result_names.sort()
148 return result_names
149
151 """Generic interface for constructing command line strings.
152
153 This class shouldn't be called directly; it should be subclassed to
154 provide an implementation for a specific application.
155
156 For a usage example we'll show one of the EMBOSS wrappers. You can set
157 options when creating the wrapper object using keyword arguments - or
158 later using their corresponding properties:
159
160 >>> from Bio.Emboss.Applications import WaterCommandline
161 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
162 >>> cline
163 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
164
165 You can instead manipulate the parameters via their properties, e.g.
166
167 >>> cline.gapopen
168 10
169 >>> cline.gapopen = 20
170 >>> cline
171 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
172
173 You can clear a parameter you have already added by 'deleting' the
174 corresponding property:
175
176 >>> del cline.gapopen
177 >>> cline.gapopen
178 >>> cline
179 WaterCommandline(cmd='water', gapextend=0.5)
180
181 Once you have set the parameters you need, turn the object into a string:
182
183 >>> str(cline)
184 Traceback (most recent call last):
185 ...
186 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
187
188 In this case the wrapper knows certain arguments are required to construct
189 a valid command line for the tool. For a complete example,
190
191 >>> from Bio.Emboss.Applications import WaterCommandline
192 >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5)
193 >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
194 >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
195 >>> water_cmd.outfile = "temp_water.txt"
196 >>> print water_cmd
197 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
198 >>> water_cmd
199 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
200
201 You would typically run the command line via a standard Python operating
202 system call using the subprocess module for full control. For the simple
203 case where you just want to run the command and get the output:
204
205 stdout, stderr = water_cmd(capture=Ture)
206 """
207
208
210 """Create a new instance of a command line wrapper object."""
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227 self.program_name = cmd
228 try:
229 parameters = self.parameters
230 except AttributeError:
231 raise AttributeError("Subclass should have defined self.parameters")
232
233 aliases = set()
234 for p in parameters:
235 for name in p.names:
236 if name in aliases:
237 raise ValueError("Parameter alias %s multiply defined" \
238 % name)
239 aliases.add(name)
240 name = p.names[-1]
241 if _re_prop_name.match(name) is None:
242 raise ValueError("Final parameter name %s cannot be used as "
243 "an argument or property name in python"
244 % repr(name))
245 if name in _reserved_names:
246 raise ValueError("Final parameter name %s cannot be used as "
247 "an argument or property name because it is "
248 "a reserved word in python" % repr(name))
249 if name in _local_reserved_names:
250 raise ValueError("Final parameter name %s cannot be used as "
251 "an argument or property name due to the "
252 "way the AbstractCommandline class works"
253 % repr(name))
254
255 def getter(name):
256 return lambda x : x._get_parameter(name)
257 def setter(name):
258 return lambda x, value : x.set_parameter(name, value)
259 def deleter(name):
260 return lambda x : x._clear_parameter(name)
261 doc = p.description
262 if isinstance(p, _Switch):
263 doc += "\n\nThis property controls the addition of the %s " \
264 "switch, treat this property as a boolean." % p.names[0]
265 else:
266 doc += "\n\nThis controls the addition of the %s parameter " \
267 "and its associated value. Set this property to the " \
268 "argument value required." % p.names[0]
269 prop = property(getter(name), setter(name), deleter(name), doc)
270 setattr(self.__class__, name, prop)
271 for key, value in kwargs.iteritems():
272 self.set_parameter(key, value)
273
275 """Make sure the required parameters have been set (PRIVATE).
276
277 No return value - it either works or raises a ValueError.
278
279 This is a separate method (called from __str__) so that subclasses may
280 override it.
281 """
282 for p in self.parameters:
283
284 if p.is_required and not(p.is_set):
285 raise ValueError("Parameter %s is not set." \
286 % p.names[-1])
287
288
290 """Make the commandline string with the currently set options.
291
292 e.g.
293 >>> from Bio.Emboss.Applications import WaterCommandline
294 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
295 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
296 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
297 >>> cline.outfile = "temp_water.txt"
298 >>> print cline
299 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
300 >>> str(cline)
301 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
302 """
303 self._validate()
304 commandline = "%s " % self.program_name
305 for parameter in self.parameters:
306 if parameter.is_set:
307
308 commandline += str(parameter)
309 return commandline.strip()
310
312 """Return a representation of the command line object for debugging.
313
314 e.g.
315 >>> from Bio.Emboss.Applications import WaterCommandline
316 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
317 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
318 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
319 >>> cline.outfile = "temp_water.txt"
320 >>> print cline
321 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
322 >>> cline
323 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
324 """
325 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name))
326 for parameter in self.parameters:
327 if parameter.is_set:
328 if isinstance(parameter, _Switch):
329 answer += ", %s=True" % parameter.names[-1]
330 else:
331 answer += ", %s=%s" \
332 % (parameter.names[-1], repr(parameter.value))
333 answer += ")"
334 return answer
335
345
356
379
381 """Check whether the given value is valid.
382
383 No return value - it either works or raises a ValueError.
384
385 This uses the passed function 'check_function', which can either
386 return a [0, 1] (bad, good) value or raise an error. Either way
387 this function will raise an error if the value is not valid, or
388 finish silently otherwise.
389 """
390 if check_function is not None:
391 is_good = check_function(value)
392 assert is_good in [0,1,True,False]
393 if not is_good:
394 raise ValueError("Invalid parameter value %r for parameter %s" \
395 % (value, name))
396
398 """Set attribute name to value (PRIVATE).
399
400 This code implements a workaround for a user interface issue.
401 Without this __setattr__ attribute-based assignment of parameters
402 will silently accept invalid parameters, leading to known instances
403 of the user assuming that parameters for the application are set,
404 when they are not.
405
406 >>> from Bio.Emboss.Applications import WaterCommandline
407 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
408 >>> cline.asequence = "a.fasta"
409 >>> cline.bsequence = "b.fasta"
410 >>> cline.csequence = "c.fasta"
411 Traceback (most recent call last):
412 ...
413 ValueError: Option name csequence was not found.
414 >>> print cline
415 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
416
417 This workaround uses a whitelist of object attributes, and sets the
418 object attribute list as normal, for these. Other attributes are
419 assumed to be parameters, and passed to the self.set_parameter method
420 for validation and assignment.
421 """
422 if name in ['parameters', 'program_name']:
423 self.__dict__[name] = value
424 else:
425 self.set_parameter(name, value)
426
427 - def __call__(self, stdin=None, stdout=True, stderr=True):
428 """Execute the command and waits for it to finish, returns output.
429
430 Runs the command line tool and waits for it to finish. If it returns
431 a non-zero error level, an exception is raised. Otherwise two strings
432 are returned containing stdout and stderr.
433
434 The optional stdin argument should be a string of data which will be
435 passed to the tool as standard input.
436
437 The optional stdout and stderr argument are treated as a booleans, and
438 control if the output should be captured (True, default), or ignored
439 by sending it to /dev/null to avoid wasting memory (False). In the
440 later case empty string(s) are returned.
441
442 Default example usage:
443
444 from Bio.Emboss.Applications import WaterCommandline
445 water_cmd = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True,
446 asequence="a.fasta", bsequence="b.fasta")
447 print "About to run:\n%s" % water_cmd
448 std_output, err_output = water_cmd()
449
450 This functionality is similar to subprocess.check_output() added in
451 Python 2.7. In general if you require more control over running the
452 command, use subprocess directly.
453 """
454 if stdout:
455 stdout_arg = subprocess.PIPE
456 else:
457 stdout_arg = open(os.devnull)
458 if stderr:
459 stderr_arg = subprocess.PIPE
460 else:
461 stderr_arg = open(os.devnull)
462
463
464
465
466
467
468
469 child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE,
470 stdout=stdout_arg, stderr=stderr_arg,
471 universal_newlines=True,
472 shell=(sys.platform!="win32"))
473
474 stdout_str, stderr_str = child_process.communicate(stdin)
475
476 if not stdout: assert not stdout_str
477 if not stderr: assert not stderr_str
478 return_code = child_process.returncode
479 if return_code:
480 try:
481
482 raise subprocess.ProcessCalledError(return_code, str(self))
483 except AttributeError:
484
485 raise RuntimeError( \
486 "Command %s returned non-zero exit status %i" \
487 % (str(self), return_code))
488 return stdout_str, stderr_str
489
490
492 """A class to hold information about a parameter for a commandline.
493
494 Do not use this directly, instead use one of the subclasses.
495 """
497 raise NotImplementedError
498
500 raise NotImplementedError
501
503 """Represent an option that can be set for a program.
504
505 This holds UNIXish options like --append=yes and -a yes,
506 where a value (here "yes") is generally expected.
507
508 For UNIXish options like -kimura in clustalw which don't
509 take a value, use the _Switch object instead.
510
511 Attributes:
512
513 o names -- a list of string names by which the parameter can be
514 referenced (ie. ["-a", "--append", "append"]). The first name in
515 the list is considered to be the one that goes on the commandline,
516 for those parameters that print the option. The last name in the list
517 is assumed to be a "human readable" name describing the option in one
518 word.
519
520 o param_types -- a list of string describing the type of parameter,
521 which can help let programs know how to use it. Example descriptions
522 include 'input', 'output', 'file'. Note that if 'file' is included,
523 these argument values will automatically be escaped if the filename
524 contains spaces.
525
526 o checker_function -- a reference to a function that will determine
527 if a given value is valid for this parameter. This function can either
528 raise an error when given a bad value, or return a [0, 1] decision on
529 whether the value is correct.
530
531 o equate -- should an equals sign be inserted if a value is used?
532
533 o description -- a description of the option.
534
535 o is_required -- a flag to indicate if the parameter must be set for
536 the program to be run.
537
538 o is_set -- if the parameter has been set
539
540 o value -- the value of a parameter
541 """
542 - def __init__(self, names = [], types = [], checker_function = None,
543 is_required = False, description = "", equate=True):
544 self.names = names
545 self.param_types = types
546 self.checker_function = checker_function
547 self.description = description
548 self.equate = equate
549 self.is_required = is_required
550
551 self.is_set = False
552 self.value = None
553
555 """Return the value of this option for the commandline.
556
557 Includes a trailing space.
558 """
559
560
561
562
563 if self.value is None:
564 return "%s " % self.names[0]
565 if "file" in self.param_types:
566 v = _escape_filename(self.value)
567 else:
568 v = str(self.value)
569 if self.equate:
570 return "%s=%s " % (self.names[0], v)
571 else:
572 return "%s %s " % (self.names[0], v)
573
575 """Represent an optional argument switch for a program.
576
577 This holds UNIXish options like -kimura in clustalw which don't
578 take a value, they are either included in the command string
579 or omitted.
580
581 o names -- a list of string names by which the parameter can be
582 referenced (ie. ["-a", "--append", "append"]). The first name in
583 the list is considered to be the one that goes on the commandline,
584 for those parameters that print the option. The last name in the list
585 is assumed to be a "human readable" name describing the option in one
586 word.
587
588 o param_types -- a list of string describing the type of parameter,
589 which can help let programs know how to use it. Example descriptions
590 include 'input', 'output', 'file'. Note that if 'file' is included,
591 these argument values will automatically be escaped if the filename
592 contains spaces.
593
594 o description -- a description of the option.
595
596 o is_set -- if the parameter has been set
597
598 NOTE - There is no value attribute, see is_set instead,
599 """
600 - def __init__(self, names = [], types = [], description = ""):
606
608 """Return the value of this option for the commandline.
609
610 Includes a trailing space.
611 """
612 assert not hasattr(self, "value")
613 if self.is_set:
614 return "%s " % self.names[0]
615 else:
616 return ""
617
619 """Represent an argument on a commandline.
620 """
621 - def __init__(self, names = [], types = [], checker_function = None,
622 is_required = False, description = ""):
623 self.names = names
624 self.param_types = types
625 self.checker_function = checker_function
626 self.description = description
627 self.is_required = is_required
628 self.is_set = False
629 self.value = None
630
632 if self.value is None:
633 return " "
634 else:
635 return "%s " % self.value
636
638 """Escape filenames with spaces by adding quotes (PRIVATE).
639
640 Note this will not add quotes if they are already included:
641
642 >>> print _escape_filename('example with spaces')
643 "example with spaces"
644 >>> print _escape_filename('"example with spaces"')
645 "example with spaces"
646 """
647
648
649
650
651
652
653
654
655
656
657
658
659
660 if " " not in filename:
661 return filename
662
663 if filename.startswith('"') and filename.endswith('"'):
664
665 return filename
666 else:
667 return '"%s"' % filename
668
670 """Run the Bio.Application module's doctests."""
671 import doctest
672 doctest.testmod(verbose=1)
673
674 if __name__ == "__main__":
675
676 _test()
677