Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008-2010 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """ 
  7  Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools. 
  8   
  9  You are expected to use this module via the Bio.AlignIO functions (or the 
 10  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 11   
 12  This module contains a parser for the EMBOSS pairs/simple file format, for 
 13  example from the alignret, water and needle tools. 
 14  """ 
 15   
 16  from Bio.Seq import Seq 
 17  from Bio.SeqRecord import SeqRecord 
 18  from Bio.Align import MultipleSeqAlignment 
 19  from Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 20   
 21   
22 -class EmbossWriter(SequentialAlignmentWriter):
23 """Emboss alignment writer (WORK IN PROGRESS). 24 25 Writes a simplfied version of the EMBOSS pairs/simple file format. 26 A lot of the information their tools record in their headers is not 27 available and is omitted. 28 """ 29
30 - def write_header(self):
31 handle = self.handle 32 handle.write("########################################\n") 33 handle.write("# Program: Biopython\n") 34 try: 35 handle.write("# Report_file: %s\n" % handle.name) 36 except AttributeError: 37 pass 38 handle.write("########################################\n")
39 44
45 - def write_alignment(self, alignment):
46 """Use this to write (another) single alignment to an open file.""" 47 handle = self.handle 48 handle.write("#=======================================\n") 49 handle.write("#\n") 50 handle.write("# Aligned_sequences: %i\n" % len(alignment)) 51 for i, record in enumerate(alignment): 52 handle.write("# %i: %s\n" % (i+1, record.id)) 53 handle.write("#\n") 54 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 55 handle.write("#\n") 56 handle.write("#=======================================\n") 57 handle.write("\n") 58 #... 59 assert False
60 61
62 -class EmbossIterator(AlignmentIterator):
63 """Emboss alignment iterator. 64 65 For reading the (pairwise) alignments from EMBOSS tools in what they 66 call the "pairs" and "simple" formats. 67 """ 68
69 - def next(self):
70 71 handle = self.handle 72 73 try: 74 #Header we saved from when we were parsing 75 #the previous alignment. 76 line = self._header 77 del self._header 78 except AttributeError: 79 line = handle.readline() 80 if not line: 81 raise StopIteration 82 83 while line.rstrip() != "#=======================================": 84 line = handle.readline() 85 if not line: 86 raise StopIteration 87 88 length_of_seqs = None 89 number_of_seqs = None 90 ids = [] 91 seqs = [] 92 93 while line[0] == "#": 94 #Read in the rest of this alignment header, 95 #try and discover the number of records expected 96 #and their length 97 parts = line[1:].split(":", 1) 98 key = parts[0].lower().strip() 99 if key == "aligned_sequences": 100 number_of_seqs = int(parts[1].strip()) 101 assert len(ids) == 0 102 # Should now expect the record identifiers... 103 for i in range(number_of_seqs): 104 line = handle.readline() 105 parts = line[1:].strip().split(":", 1) 106 assert i+1 == int(parts[0].strip()) 107 ids.append(parts[1].strip()) 108 assert len(ids) == number_of_seqs 109 if key == "length": 110 length_of_seqs = int(parts[1].strip()) 111 112 #And read in another line... 113 line = handle.readline() 114 115 if number_of_seqs is None: 116 raise ValueError("Number of sequences missing!") 117 if length_of_seqs is None: 118 raise ValueError("Length of sequences missing!") 119 120 if self.records_per_alignment is not None \ 121 and self.records_per_alignment != number_of_seqs: 122 raise ValueError("Found %i records in this alignment, told to expect %i" 123 % (number_of_seqs, self.records_per_alignment)) 124 125 seqs = ["" for id in ids] 126 seq_starts = [] 127 index = 0 128 129 #Parse the seqs 130 while line: 131 if len(line) > 21: 132 id_start = line[:21].strip().split(None, 1) 133 seq_end = line[21:].strip().split(None, 1) 134 if len(id_start) == 2 and len(seq_end) == 2: 135 #identifier, seq start position, seq, seq end position 136 #(an aligned seq is broken up into multiple lines) 137 id, start = id_start 138 seq, end = seq_end 139 if start == end: 140 #Special case, either a single letter is present, 141 #or no letters at all. 142 if seq.replace("-", "") == "": 143 start = int(start) 144 end = int(end) 145 else: 146 start = int(start) - 1 147 end = int(end) 148 else: 149 assert seq.replace("-", "") != "" 150 start = int(start) - 1 # python counting 151 end = int(end) 152 153 #The identifier is truncated... 154 assert 0 <= index and index < number_of_seqs, \ 155 "Expected index %i in range [0,%i)" \ 156 % (index, number_of_seqs) 157 assert id == ids[index] or id == ids[index][:len(id)] 158 159 if len(seq_starts) == index: 160 #Record the start 161 seq_starts.append(start) 162 163 #Check the start... 164 if start == end: 165 assert seq.replace("-", "") == "", line 166 else: 167 assert start - seq_starts[index] == len(seqs[index].replace("-","")), \ 168 "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \ 169 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 170 start, line) 171 172 seqs[index] += seq 173 174 #Check the end ... 175 assert end == seq_starts[index] + len(seqs[index].replace("-", "")), \ 176 "Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \ 177 % (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]), 178 seq_starts[index], end, line) 179 180 index += 1 181 if index >= number_of_seqs: 182 index = 0 183 else: 184 #just a start value, this is just alignment annotation (?) 185 #print "Skipping: " + line.rstrip() 186 pass 187 elif line.strip() == "": 188 #Just a spacer? 189 pass 190 else: 191 print line 192 assert False 193 194 line = handle.readline() 195 if line.rstrip() == "#---------------------------------------" \ 196 or line.rstrip() == "#=======================================": 197 #End of alignment 198 self._header = line 199 break 200 201 assert index == 0 202 203 if self.records_per_alignment is not None \ 204 and self.records_per_alignment != len(ids): 205 raise ValueError("Found %i records in this alignment, told to expect %i" 206 % (len(ids), self.records_per_alignment)) 207 208 records = [] 209 for id, seq in zip(ids, seqs): 210 if len(seq) != length_of_seqs: 211 #EMBOSS 2.9.0 is known to use spaces instead of minus signs 212 #for leading gaps, and thus fails to parse. This old version 213 #is still used as of Dec 2008 behind the EBI SOAP webservice: 214 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl 215 raise ValueError("Error parsing alignment - sequences of " 216 "different length? You could be using an " 217 "old version of EMBOSS.") 218 records.append(SeqRecord(Seq(seq, self.alphabet), 219 id=id, description=id)) 220 return MultipleSeqAlignment(records, self.alphabet)
221 222 223 if __name__ == "__main__": 224 print "Running a quick self-test" 225 226 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 227 simple_example = \ 228 """######################################## 229 # Program: alignret 230 # Rundate: Wed Jan 16 17:16:13 2002 231 # Report_file: stdout 232 ######################################## 233 #======================================= 234 # 235 # Aligned_sequences: 4 236 # 1: IXI_234 237 # 2: IXI_235 238 # 3: IXI_236 239 # 4: IXI_237 240 # Matrix: EBLOSUM62 241 # Gap_penalty: 10.0 242 # Extend_penalty: 0.5 243 # 244 # Length: 131 245 # Identity: 95/131 (72.5%) 246 # Similarity: 127/131 (96.9%) 247 # Gaps: 25/131 (19.1%) 248 # Score: 100.0 249 # 250 # 251 #======================================= 252 253 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 254 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 255 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 256 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 257 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 258 259 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 260 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 261 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 262 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 263 ||:||||||||||||||||||||:::::::::::||||||||||||| | 264 265 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 266 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 267 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 268 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 269 |||:||||:|||||:|||||||::||||||| 270 271 272 #--------------------------------------- 273 #--------------------------------------- 274 275 """ 276 277 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 278 pair_example = \ 279 """######################################## 280 # Program: water 281 # Rundate: Wed Jan 16 17:23:19 2002 282 # Report_file: stdout 283 ######################################## 284 #======================================= 285 # 286 # Aligned_sequences: 2 287 # 1: IXI_234 288 # 2: IXI_235 289 # Matrix: EBLOSUM62 290 # Gap_penalty: 10.0 291 # Extend_penalty: 0.5 292 # 293 # Length: 131 294 # Identity: 112/131 (85.5%) 295 # Similarity: 112/131 (85.5%) 296 # Gaps: 19/131 (14.5%) 297 # Score: 591.5 298 # 299 # 300 #======================================= 301 302 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 303 ||||||||||||||| |||||||||||||||||||||||||| 304 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 305 306 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 307 |||||||||||||||||||||||| |||||||||||||||| 308 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 309 310 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 311 ||||||||||||||||||||||||||||||| 312 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 313 314 315 #--------------------------------------- 316 #--------------------------------------- 317 318 319 """ 320 321 pair_example2 = \ 322 """######################################## 323 # Program: needle 324 # Rundate: Sun 27 Apr 2007 17:20:35 325 # Commandline: needle 326 # [-asequence] Spo0F.faa 327 # [-bsequence] paired_r.faa 328 # -sformat2 pearson 329 # Align_format: srspair 330 # Report_file: ref_rec .needle 331 ######################################## 332 333 #======================================= 334 # 335 # Aligned_sequences: 2 336 # 1: ref_rec 337 # 2: gi|94968718|receiver 338 # Matrix: EBLOSUM62 339 # Gap_penalty: 10.0 340 # Extend_penalty: 0.5 341 # 342 # Length: 124 343 # Identity: 32/124 (25.8%) 344 # Similarity: 64/124 (51.6%) 345 # Gaps: 17/124 (13.7%) 346 # Score: 112.0 347 # 348 # 349 #======================================= 350 351 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 352 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 353 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 354 355 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 356 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 357 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 358 359 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 360 :..| ..|:|.|: || 361 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 362 363 364 #======================================= 365 # 366 # Aligned_sequences: 2 367 # 1: ref_rec 368 # 2: gi|94968761|receiver 369 # Matrix: EBLOSUM62 370 # Gap_penalty: 10.0 371 # Extend_penalty: 0.5 372 # 373 # Length: 119 374 # Identity: 34/119 (28.6%) 375 # Similarity: 58/119 (48.7%) 376 # Gaps: 9/119 ( 7.6%) 377 # Score: 154.0 378 # 379 # 380 #======================================= 381 382 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 383 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 384 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 385 386 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 387 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 388 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 389 390 ref_rec 101 PFDIDEIRDAV-------- 111 391 |...|::...| 392 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 393 394 395 #======================================= 396 # 397 # Aligned_sequences: 2 398 # 1: ref_rec 399 # 2: gi|94967506|receiver 400 # Matrix: EBLOSUM62 401 # Gap_penalty: 10.0 402 # Extend_penalty: 0.5 403 # 404 # Length: 120 405 # Identity: 29/120 (24.2%) 406 # Similarity: 53/120 (44.2%) 407 # Gaps: 9/120 ( 7.5%) 408 # Score: 121.0 409 # 410 # 411 #======================================= 412 413 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 414 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 415 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 416 417 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 418 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 419 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 420 421 ref_rec 100 KPFDIDEIRDAV-------- 111 422 ||.|||.:.:.. 423 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 424 425 426 #======================================= 427 # 428 # Aligned_sequences: 2 429 # 1: ref_rec 430 # 2: gi|94970045|receiver 431 # Matrix: EBLOSUM62 432 # Gap_penalty: 10.0 433 # Extend_penalty: 0.5 434 # 435 # Length: 118 436 # Identity: 30/118 (25.4%) 437 # Similarity: 64/118 (54.2%) 438 # Gaps: 9/118 ( 7.6%) 439 # Score: 126.0 440 # 441 # 442 #======================================= 443 444 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 445 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 446 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 447 448 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 449 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 450 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 451 452 ref_rec 99 -AKPFDID----EIRDAV 111 453 .|||.:| :||:.: 454 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 455 456 457 #======================================= 458 # 459 # Aligned_sequences: 2 460 # 1: ref_rec 461 # 2: gi|94970041|receiver 462 # Matrix: EBLOSUM62 463 # Gap_penalty: 10.0 464 # Extend_penalty: 0.5 465 # 466 # Length: 125 467 # Identity: 35/125 (28.0%) 468 # Similarity: 70/125 (56.0%) 469 # Gaps: 18/125 (14.4%) 470 # Score: 156.5 471 # 472 # 473 #======================================= 474 475 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 476 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 477 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 478 479 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 480 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 481 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 482 483 ref_rec 99 A----KPFDIDEIRDAV-------- 111 484 | |||..|.:...| 485 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 486 487 488 #--------------------------------------- 489 #--------------------------------------- 490 491 """ 492 493 pair_example3 = """######################################## 494 # Program: needle 495 # Rundate: Mon 14 Jul 2008 11:45:42 496 # Commandline: needle 497 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 498 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 499 # -filter 500 # Align_format: srspair 501 # Report_file: stdout 502 ######################################## 503 504 #======================================= 505 # 506 # Aligned_sequences: 2 507 # 1: asis 508 # 2: asis 509 # Matrix: EDNAFULL 510 # Gap_penalty: 10.0 511 # Extend_penalty: 0.5 512 # 513 # Length: 667 514 # Identity: 210/667 (31.5%) 515 # Similarity: 210/667 (31.5%) 516 # Gaps: 408/667 (61.2%) 517 # Score: 561.0 518 # 519 # 520 #======================================= 521 522 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 523 524 asis 0 -------------------------------------------------- 0 525 526 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 527 528 asis 0 -------------------------------------------------- 0 529 530 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 531 532 asis 0 -------------------------------------------------- 0 533 534 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 535 .|||||| 536 asis 1 ------------TTATTAA------------------------------- 7 537 538 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 539 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 540 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 541 542 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 543 ||.|.|||||.|||.||||.|||| | ||||||||| 544 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 545 546 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 547 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 548 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 549 550 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 551 ||| |||||| .||||...||||..|||||. 552 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 553 554 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 555 .| ||||||||||| || ||.||.||.|| 556 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 557 558 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 559 ||..||.||.|.|||..||||.|| ||||| | 560 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 561 562 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 563 ||| |.||| |||||||||.||| .||||||...||||||||| 564 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 565 566 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 567 ||||||||..| |||||||| 568 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 569 570 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 571 572 asis 311 -------------------------------------------------- 311 573 574 asis 599 GAAAGGGGGTTAATAGC 615 575 576 asis 311 ----------------- 311 577 578 579 #--------------------------------------- 580 #---------------------------------------""" 581 582 from StringIO import StringIO 583 584 alignments = list(EmbossIterator(StringIO(pair_example))) 585 assert len(alignments) == 1 586 assert len(alignments[0]) == 2 587 assert [r.id for r in alignments[0]] \ 588 == ["IXI_234", "IXI_235"] 589 590 alignments = list(EmbossIterator(StringIO(simple_example))) 591 assert len(alignments) == 1 592 assert len(alignments[0]) == 4 593 assert [r.id for r in alignments[0]] \ 594 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 595 596 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 597 assert len(alignments) == 2 598 assert len(alignments[0]) == 2 599 assert len(alignments[1]) == 4 600 assert [r.id for r in alignments[0]] \ 601 == ["IXI_234", "IXI_235"] 602 assert [r.id for r in alignments[1]] \ 603 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 604 605 alignments = list(EmbossIterator(StringIO(pair_example2))) 606 assert len(alignments) == 5 607 assert len(alignments[0]) == 2 608 assert [r.id for r in alignments[0]] \ 609 == ["ref_rec", "gi|94968718|receiver"] 610 assert [r.id for r in alignments[4]] \ 611 == ["ref_rec", "gi|94970041|receiver"] 612 613 alignments = list(EmbossIterator(StringIO(pair_example3))) 614 assert len(alignments) == 1 615 assert len(alignments[0]) == 2 616 assert [r.id for r in alignments[0]] \ 617 == ["asis", "asis"] 618 619 print "Done" 620