Package Bio :: Package Blast :: Module ParseBlastTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.ParseBlastTable

  1  # Copyright 2003 Iddo Friedberg. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """A parser for the NCBI blastpgp version 2.2.5 output format. Currently only supports 
  7  the '-m 9' option, (table w/ annotations). 
  8  Returns a BlastTableRec instance 
  9  """ 
 10   
 11  import sys 
 12   
13 -class BlastTableEntry(object):
14 - def __init__(self, in_rec):
15 bt_fields = in_rec.split() 16 self.qid = bt_fields[0].split('|') 17 self.sid = bt_fields[1].split('|') 18 self.pid = float(bt_fields[2]) 19 self.ali_len = int(bt_fields[3]) 20 self.mis = int(bt_fields[4]) 21 self.gaps = int(bt_fields[5]) 22 self.q_bounds = (int(bt_fields[6]), int(bt_fields[7])) 23 self.s_bounds = (int(bt_fields[8]), int(bt_fields[9])) 24 self.e_value = float(bt_fields[10]) 25 self.bit_score = float(bt_fields[11])
26 27
28 -class BlastTableRec(object):
29 - def __init__(self):
30 self.program = None 31 self.version = None 32 self.date = None 33 self.iteration = None 34 self.query = None 35 self.database = None 36 self.entries = []
37
38 - def add_entry(self, entry):
39 self.entries.append(entry)
40 41 reader_keywords = {'BLASTP': 'version', 42 'Iteration': 'iteration', 43 'Query': 'query', 44 'Database': 'database', 45 'Fields': 'fields'} 46 47
48 -class BlastTableReader(object):
49 - def __init__(self, handle):
50 self.handle = handle 51 inline = self.handle.readline() 52 # zip forward to start of record 53 while inline and 'BLASTP' not in inline: 54 inline = self.handle.readline() 55 self._lookahead = inline 56 self._n = 0 57 self._in_header = 1
58
59 - def __next__(self):
60 self.table_record = BlastTableRec() 61 self._n += 1 62 inline = self._lookahead 63 if not inline: 64 return None 65 while inline: 66 if inline[0] == '#': 67 if self._in_header: 68 self._in_header = self._consume_header(inline) 69 else: 70 break 71 else: 72 self._consume_entry(inline) 73 self._in_header = 0 74 75 inline = self.handle.readline() 76 self._lookahead = inline 77 self._in_header = 1 78 return self.table_record
79 80 if sys.version_info[0] < 3:
81 - def next(self):
82 """Deprecated Python 2 style alias for Python 3 style __next__ method.""" 83 import warnings 84 from Bio import BiopythonDeprecationWarning 85 warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), " 86 "the .next() method is deprecated and will be removed in a " 87 "future release of Biopython.", BiopythonDeprecationWarning) 88 return self.__next__()
89
90 - def _consume_entry(self, inline):
91 current_entry = BlastTableEntry(inline) 92 self.table_record.add_entry(current_entry)
93
94 - def _consume_header(self, inline):
95 for keyword in reader_keywords: 96 if keyword in inline: 97 in_header = self._Parse('_parse_%s' % reader_keywords[keyword], inline) 98 break 99 return in_header
100
101 - def _parse_version(self, inline):
102 program, version, date = inline.split()[1:] 103 self.table_record.program = program 104 self.table_record.version = version 105 self.table_record.date = date 106 return 1
107
108 - def _parse_iteration(self, inline):
109 self.table_record.iteration = int(inline.split()[2]) 110 return 1
111
112 - def _parse_query(self, inline):
113 self.table_record.query = inline.split()[2:] 114 return 1
115
116 - def _parse_database(self, inline):
117 self.table_record.database = inline.split()[2] 118 return 1
119
120 - def _parse_fields(self, inline):
121 return 0
122
123 - def _Parse(self, method_name, inline):
124 return getattr(self, method_name)(inline)
125