Package Bio :: Package SearchIO :: Package BlastIO :: Module blast_text
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO.BlastIO.blast_text

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO parser for BLAST+ plain text output formats. 
  7   
  8  At the moment this is a wrapper around Biopython's NCBIStandalone text 
  9  parser (which is now deprecated). 
 10   
 11  """ 
 12   
 13  from Bio.Alphabet import generic_dna, generic_protein 
 14  from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment 
 15   
 16  import warnings 
 17  from Bio import BiopythonDeprecationWarning 
 18  with warnings.catch_warnings(): 
 19      warnings.simplefilter('ignore', BiopythonDeprecationWarning) 
 20      from Bio.Blast import NCBIStandalone 
 21   
 22   
 23  __all__ = ('BlastTextParser', ) 
 24   
 25   
26 -class BlastTextParser(object):
27 """Parser for the BLAST text format.""" 28
29 - def __init__(self, handle):
30 self.handle = handle 31 blast_parser = NCBIStandalone.BlastParser() 32 self.blast_iter = NCBIStandalone.Iterator(handle, blast_parser)
33
34 - def __iter__(self):
35 for rec in self.blast_iter: 36 # set attributes to SearchIO's 37 # get id and desc 38 if rec.query.startswith('>'): 39 rec.query = rec.query[1:] 40 try: 41 qid, qdesc = rec.query.split(' ', 1) 42 except ValueError: 43 qid, qdesc = rec.query, '' 44 qdesc = qdesc.replace('\n', '').replace('\r', '') 45 46 qresult = QueryResult(id=qid) 47 qresult.program = rec.application.lower() 48 qresult.target = rec.database 49 qresult.seq_len = rec.query_letters 50 qresult.version = rec.version 51 52 # determine alphabet based on program 53 if qresult.program == 'blastn': 54 alphabet = generic_dna 55 elif qresult.program in ['blastp', 'blastx', 'tblastn', 'tblastx']: 56 alphabet = generic_protein 57 58 # iterate over the 'alignments' (hits) and the hit table 59 for idx, aln in enumerate(rec.alignments): 60 # get id and desc 61 if aln.title.startswith('> '): 62 aln.title = aln.title[2:] 63 elif aln.title.startswith('>'): 64 aln.title = aln.title[1:] 65 try: 66 hid, hdesc = aln.title.split(' ', 1) 67 except ValueError: 68 hid, hdesc = aln.title, '' 69 hdesc = hdesc.replace('\n', '').replace('\r', '') 70 71 # iterate over the hsps and group them in a list 72 hsp_list = [] 73 for bhsp in aln.hsps: 74 frag = HSPFragment(hid, qid) 75 frag.alphabet = alphabet 76 # set alignment length 77 frag.aln_span = bhsp.identities[1] 78 # set frames 79 try: 80 frag.query_frame = int(bhsp.frame[0]) 81 except IndexError: 82 if qresult.program in ('blastp', 'tblastn'): 83 frag.query_frame = 0 84 else: 85 frag.query_frame = 1 86 try: 87 frag.hit_frame = int(bhsp.frame[1]) 88 except IndexError: 89 if qresult.program in ('blastp', 'tblastn'): 90 frag.hit_frame = 0 91 else: 92 frag.hit_frame = 1 93 # set query coordinates 94 frag.query_start = min(bhsp.query_start, 95 bhsp.query_end) - 1 96 frag.query_end = max(bhsp.query_start, bhsp.query_end) 97 # set hit coordinates 98 frag.hit_start = min(bhsp.sbjct_start, 99 bhsp.sbjct_end) - 1 100 frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end) 101 # set query, hit sequences and its annotation 102 qseq = '' 103 hseq = '' 104 midline = '' 105 for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match): 106 qchar, hchar, mchar = seqtrio 107 if qchar == ' ' or hchar == ' ': 108 assert all(' ' == x for x in seqtrio) 109 else: 110 qseq += qchar 111 hseq += hchar 112 midline += mchar 113 frag.query, frag.hit = qseq, hseq 114 frag.aln_annotation['similarity'] = midline 115 116 # create HSP object with the fragment 117 hsp = HSP([frag]) 118 hsp.evalue = bhsp.expect 119 hsp.bitscore = bhsp.bits 120 hsp.bitscore_raw = bhsp.score 121 # set gap 122 try: 123 hsp.gap_num = bhsp.gaps[0] 124 except IndexError: 125 hsp.gap_num = 0 126 # set identity 127 hsp.ident_num = bhsp.identities[0] 128 hsp.pos_num = bhsp.positives[0] 129 if hsp.pos_num is None: 130 hsp.pos_num = hsp[0].aln_span 131 132 hsp_list.append(hsp) 133 134 hit = Hit(hsp_list) 135 hit.seq_len = aln.length 136 hit.description = hdesc 137 qresult.append(hit) 138 139 qresult.description = qdesc 140 yield qresult
141 142 143 # if not used as a module, run the doctest 144 if __name__ == "__main__": 145 from Bio._utils import run_doctest 146 run_doctest() 147