Package Bio :: Package Compass
[hide private]
[frames] | no frames]

Source Code for Package Bio.Compass

  1  # Copyright 2004 by James Casbon.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code to deal with COMPASS output, a program for profile/profile comparison. 
  7   
  8  Compass is described in: 
  9   
 10  Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 
 11  alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 
 12  7;326(1):317-36. 
 13   
 14  Tested with COMPASS 1.24. 
 15  """ 
 16   
 17  import re 
 18   
 19  __docformat__ = "restructuredtext en" 
 20   
 21   
22 -def read(handle):
23 """Reads a COMPASS file containing one COMPASS record.""" 24 record = None 25 try: 26 line = next(handle) 27 record = Record() 28 __read_names(record, line) 29 line = next(handle) 30 __read_threshold(record, line) 31 line = next(handle) 32 __read_lengths(record, line) 33 line = next(handle) 34 __read_profilewidth(record, line) 35 line = next(handle) 36 __read_scores(record, line) 37 except StopIteration: 38 if not record: 39 raise ValueError("No record found in handle") 40 else: 41 raise ValueError("Unexpected end of stream.") 42 for line in handle: 43 if not line.strip(): # skip empty lines 44 continue 45 __read_query_alignment(record, line) 46 try: 47 line = next(handle) 48 __read_positive_alignment(record, line) 49 line = next(handle) 50 __read_hit_alignment(record, line) 51 except StopIteration: 52 raise ValueError("Unexpected end of stream.") 53 return record
54 55
56 -def parse(handle):
57 """Iterates over records in a COMPASS file.""" 58 record = None 59 try: 60 line = next(handle) 61 except StopIteration: 62 return 63 while True: 64 try: 65 record = Record() 66 __read_names(record, line) 67 line = next(handle) 68 __read_threshold(record, line) 69 line = next(handle) 70 __read_lengths(record, line) 71 line = next(handle) 72 __read_profilewidth(record, line) 73 line = next(handle) 74 __read_scores(record, line) 75 except StopIteration: 76 raise ValueError("Unexpected end of stream.") 77 for line in handle: 78 if not line.strip(): 79 continue 80 if "Ali1:" in line: 81 yield record 82 break 83 __read_query_alignment(record, line) 84 try: 85 line = next(handle) 86 __read_positive_alignment(record, line) 87 line = next(handle) 88 __read_hit_alignment(record, line) 89 except StopIteration: 90 raise ValueError("Unexpected end of stream.") 91 else: 92 yield record 93 break
94 95
96 -class Record(object):
97 """Hold information from one compass hit. 98 99 Ali1 is the query, Ali2 the hit. 100 """ 101
102 - def __init__(self):
103 self.query = '' 104 self.hit = '' 105 self.gap_threshold = 0 106 self.query_length = 0 107 self.query_filtered_length = 0 108 self.query_nseqs = 0 109 self.query_neffseqs = 0 110 self.hit_length = 0 111 self.hit_filtered_length = 0 112 self.hit_nseqs = 0 113 self.hit_neffseqs = 0 114 self.sw_score = 0 115 self.evalue = -1 116 self.query_start = -1 117 self.hit_start = -1 118 self.query_aln = '' 119 self.hit_aln = '' 120 self.positives = ''
121
122 - def query_coverage(self):
123 """Return the length of the query covered in the alignment.""" 124 s = self.query_aln.replace("=", "") 125 return len(s)
126
127 - def hit_coverage(self):
128 """Return the length of the hit covered in the alignment.""" 129 s = self.hit_aln.replace("=", "") 130 return len(s)
131 132 # Everything below is private 133 134 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"), 135 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"), 136 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"), 137 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"), 138 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"), 139 "start": re.compile("(\d+)"), 140 "align": re.compile("^.{15}(\S+)"), 141 "positive_alignment": re.compile("^.{15}(.+)"), 142 } 143 144
145 -def __read_names(record, line):
146 # Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 147 # ------query----- -------hit------------- 148 if "Ali1:" not in line: 149 raise ValueError("Line does not contain 'Ali1:':\n%s" % line) 150 m = __regex["names"].search(line) 151 record.query = m.group(1) 152 record.hit = m.group(2)
153 154
155 -def __read_threshold(record, line):
156 if not line.startswith("Threshold"): 157 raise ValueError("Line does not start with 'Threshold':\n%s" % line) 158 m = __regex["threshold"].search(line) 159 record.gap_threshold = float(m.group(1))
160 161
162 -def __read_lengths(record, line):
163 if not line.startswith("length1="): 164 raise ValueError("Line does not start with 'length1=':\n%s" % line) 165 m = __regex["lengths"].search(line) 166 record.query_length = int(m.group(1)) 167 record.query_filtered_length = float(m.group(2)) 168 record.hit_length = int(m.group(3)) 169 record.hit_filtered_length = float(m.group(4))
170 171
172 -def __read_profilewidth(record, line):
173 if "Nseqs1" not in line: 174 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line) 175 m = __regex["profilewidth"].search(line) 176 record.query_nseqs = int(m.group(1)) 177 record.query_neffseqs = float(m.group(2)) 178 record.hit_nseqs = int(m.group(3)) 179 record.hit_neffseqs = float(m.group(4))
180 181
182 -def __read_scores(record, line):
183 if not line.startswith("Smith-Waterman"): 184 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line) 185 m = __regex["scores"].search(line) 186 if m: 187 record.sw_score = int(m.group(1)) 188 record.evalue = float(m.group(2)) 189 else: 190 record.sw_score = 0 191 record.evalue = -1.0
192 193
194 -def __read_query_alignment(record, line):
195 m = __regex["start"].search(line) 196 if m: 197 record.query_start = int(m.group(1)) 198 m = __regex["align"].match(line) 199 assert m is not None, "invalid match" 200 record.query_aln += m.group(1)
201 202
203 -def __read_positive_alignment(record, line):
204 m = __regex["positive_alignment"].match(line) 205 assert m is not None, "invalid match" 206 record.positives += m.group(1)
207 208
209 -def __read_hit_alignment(record, line):
210 m = __regex["start"].search(line) 211 if m: 212 record.hit_start = int(m.group(1)) 213 m = __regex["align"].match(line) 214 assert m is not None, "invalid match" 215 record.hit_aln += m.group(1)
216