Package Bio :: Package Compass
[hide private]
[frames] | no frames]

Source Code for Package Bio.Compass

  1  # Copyright 2004 by James Casbon.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code to deal with COMPASS output, a program for profile/profile comparison. 
  7   
  8  Compass is described in: 
  9   
 10  Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 
 11  alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 
 12  7;326(1):317-36. 
 13   
 14  Tested with COMPASS 1.24. 
 15  """ 
 16   
 17  import re 
 18   
 19   
20 -def read(handle):
21 """Reads a COMPASS file containing one COMPASS record.""" 22 record = None 23 try: 24 line = next(handle) 25 record = Record() 26 __read_names(record, line) 27 line = next(handle) 28 __read_threshold(record, line) 29 line = next(handle) 30 __read_lengths(record, line) 31 line = next(handle) 32 __read_profilewidth(record, line) 33 line = next(handle) 34 __read_scores(record, line) 35 except StopIteration: 36 if not record: 37 raise ValueError("No record found in handle") 38 else: 39 raise ValueError("Unexpected end of stream.") 40 for line in handle: 41 if not line.strip(): # skip empty lines 42 continue 43 __read_query_alignment(record, line) 44 try: 45 line = next(handle) 46 __read_positive_alignment(record, line) 47 line = next(handle) 48 __read_hit_alignment(record, line) 49 except StopIteration: 50 raise ValueError("Unexpected end of stream.") 51 return record
52 53
54 -def parse(handle):
55 """Iterates over records in a COMPASS file.""" 56 record = None 57 try: 58 line = next(handle) 59 except StopIteration: 60 return 61 while True: 62 try: 63 record = Record() 64 __read_names(record, line) 65 line = next(handle) 66 __read_threshold(record, line) 67 line = next(handle) 68 __read_lengths(record, line) 69 line = next(handle) 70 __read_profilewidth(record, line) 71 line = next(handle) 72 __read_scores(record, line) 73 except StopIteration: 74 raise ValueError("Unexpected end of stream.") 75 for line in handle: 76 if not line.strip(): 77 continue 78 if "Ali1:" in line: 79 yield record 80 break 81 __read_query_alignment(record, line) 82 try: 83 line = next(handle) 84 __read_positive_alignment(record, line) 85 line = next(handle) 86 __read_hit_alignment(record, line) 87 except StopIteration: 88 raise ValueError("Unexpected end of stream.") 89 else: 90 yield record 91 break
92 93
94 -class Record(object):
95 """Hold information from one compass hit. 96 97 Ali1 is the query, Ali2 the hit. 98 """ 99
100 - def __init__(self):
101 self.query = '' 102 self.hit = '' 103 self.gap_threshold = 0 104 self.query_length = 0 105 self.query_filtered_length = 0 106 self.query_nseqs = 0 107 self.query_neffseqs = 0 108 self.hit_length = 0 109 self.hit_filtered_length = 0 110 self.hit_nseqs = 0 111 self.hit_neffseqs = 0 112 self.sw_score = 0 113 self.evalue = -1 114 self.query_start = -1 115 self.hit_start = -1 116 self.query_aln = '' 117 self.hit_aln = '' 118 self.positives = ''
119
120 - def query_coverage(self):
121 """Return the length of the query covered in the alignment.""" 122 s = self.query_aln.replace("=", "") 123 return len(s)
124
125 - def hit_coverage(self):
126 """Return the length of the hit covered in the alignment.""" 127 s = self.hit_aln.replace("=", "") 128 return len(s)
129 130 # Everything below is private 131 132 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"), 133 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"), 134 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"), 135 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"), 136 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"), 137 "start": re.compile("(\d+)"), 138 "align": re.compile("^.{15}(\S+)"), 139 "positive_alignment": re.compile("^.{15}(.+)"), 140 } 141 142
143 -def __read_names(record, line):
144 # Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 145 # ------query----- -------hit------------- 146 if "Ali1:" not in line: 147 raise ValueError("Line does not contain 'Ali1:':\n%s" % line) 148 m = __regex["names"].search(line) 149 record.query = m.group(1) 150 record.hit = m.group(2)
151 152
153 -def __read_threshold(record, line):
154 if not line.startswith("Threshold"): 155 raise ValueError("Line does not start with 'Threshold':\n%s" % line) 156 m = __regex["threshold"].search(line) 157 record.gap_threshold = float(m.group(1))
158 159
160 -def __read_lengths(record, line):
161 if not line.startswith("length1="): 162 raise ValueError("Line does not start with 'length1=':\n%s" % line) 163 m = __regex["lengths"].search(line) 164 record.query_length = int(m.group(1)) 165 record.query_filtered_length = float(m.group(2)) 166 record.hit_length = int(m.group(3)) 167 record.hit_filtered_length = float(m.group(4))
168 169
170 -def __read_profilewidth(record, line):
171 if "Nseqs1" not in line: 172 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line) 173 m = __regex["profilewidth"].search(line) 174 record.query_nseqs = int(m.group(1)) 175 record.query_neffseqs = float(m.group(2)) 176 record.hit_nseqs = int(m.group(3)) 177 record.hit_neffseqs = float(m.group(4))
178 179
180 -def __read_scores(record, line):
181 if not line.startswith("Smith-Waterman"): 182 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line) 183 m = __regex["scores"].search(line) 184 if m: 185 record.sw_score = int(m.group(1)) 186 record.evalue = float(m.group(2)) 187 else: 188 record.sw_score = 0 189 record.evalue = -1.0
190 191
192 -def __read_query_alignment(record, line):
193 m = __regex["start"].search(line) 194 if m: 195 record.query_start = int(m.group(1)) 196 m = __regex["align"].match(line) 197 assert m is not None, "invalid match" 198 record.query_aln += m.group(1)
199 200
201 -def __read_positive_alignment(record, line):
202 m = __regex["positive_alignment"].match(line) 203 assert m is not None, "invalid match" 204 record.positives += m.group(1)
205 206
207 -def __read_hit_alignment(record, line):
208 m = __regex["start"].search(line) 209 if m: 210 record.hit_start = int(m.group(1)) 211 m = __regex["align"].match(line) 212 assert m is not None, "invalid match" 213 record.hit_aln += m.group(1)
214