Package Bio :: Package GenBank :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.GenBank.utils

 1  # This code is part of the Biopython distribution and governed by its 
 2  # license.  Please see the LICENSE file that should have been included 
 3  # as part of this package. 
 4  # 
 5   
 6  """Useful utilities for helping in parsing GenBank files. 
 7  """ 
 8   
 9  __docformat__ = "restructuredtext en" 
10   
11   
12 -class FeatureValueCleaner(object):
13 r"""Provide specialized capabilities for cleaning up values in features. 14 15 This class is designed to provide a mechanism to clean up and process 16 values in the key/value pairs of GenBank features. This is useful 17 because in cases like:: 18 19 /translation="MED 20 YDPWNLRFQSKYKSRDA" 21 22 you'll end up with a value with \012s and spaces in it like:: 23 24 "MED\012 YDPWEL..." 25 26 which you probably don't want. 27 28 This cleaning needs to be done on a case by case basis since it is 29 impossible to interpret whether you should be concatenating everything 30 (as in translations), or combining things with spaces (as might be 31 the case with /notes). 32 """ 33 keys_to_process = ["translation"] 34
35 - def __init__(self, to_process=keys_to_process):
36 """Initialize with the keys we should deal with. 37 """ 38 self._to_process = to_process
39
40 - def clean_value(self, key_name, value):
41 """Clean the specified value and return it. 42 43 If the value is not specified to be dealt with, the original value 44 will be returned. 45 """ 46 if key_name in self._to_process: 47 try: 48 cleaner = getattr(self, "_clean_%s" % key_name) 49 value = cleaner(value) 50 except AttributeError: 51 raise AssertionError("No function to clean key: %s" 52 % key_name) 53 return value
54
55 - def _clean_translation(self, value):
56 """Concatenate a translation value to one long protein string. 57 """ 58 translation_parts = value.split() 59 return "".join(translation_parts)
60