Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008-2013 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Bio.AlignIO support for "emboss" alignment output from EMBOSS tools. 
  7   
  8  You are expected to use this module via the Bio.AlignIO functions (or the 
  9  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 10   
 11  This module contains a parser for the EMBOSS pairs/simple file format, for 
 12  example from the alignret, water and needle tools. 
 13  """ 
 14   
 15  from __future__ import print_function 
 16   
 17  from Bio.Seq import Seq 
 18  from Bio.SeqRecord import SeqRecord 
 19  from Bio.Align import MultipleSeqAlignment 
 20  from .Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 21   
 22   
23 -class EmbossWriter(SequentialAlignmentWriter):
24 """Emboss alignment writer (WORK IN PROGRESS). 25 26 Writes a simplfied version of the EMBOSS pairs/simple file format. 27 A lot of the information their tools record in their headers is not 28 available and is omitted. 29 """ 30
31 - def write_header(self):
32 handle = self.handle 33 handle.write("########################################\n") 34 handle.write("# Program: Biopython\n") 35 try: 36 handle.write("# Report_file: %s\n" % handle.name) 37 except AttributeError: 38 pass 39 handle.write("########################################\n")
40 45
46 - def write_alignment(self, alignment):
47 """Use this to write (another) single alignment to an open file.""" 48 handle = self.handle 49 handle.write("#=======================================\n") 50 handle.write("#\n") 51 handle.write("# Aligned_sequences: %i\n" % len(alignment)) 52 for i, record in enumerate(alignment): 53 handle.write("# %i: %s\n" % (i+1, record.id)) 54 handle.write("#\n") 55 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 56 handle.write("#\n") 57 handle.write("#=======================================\n") 58 handle.write("\n") 59 #... 60 assert False
61 62
63 -class EmbossIterator(AlignmentIterator):
64 """Emboss alignment iterator. 65 66 For reading the (pairwise) alignments from EMBOSS tools in what they 67 call the "pairs" and "simple" formats. 68 """ 69
70 - def __next__(self):
71 72 handle = self.handle 73 74 try: 75 #Header we saved from when we were parsing 76 #the previous alignment. 77 line = self._header 78 del self._header 79 except AttributeError: 80 line = handle.readline() 81 if not line: 82 raise StopIteration 83 84 while line.rstrip() != "#=======================================": 85 line = handle.readline() 86 if not line: 87 raise StopIteration 88 89 length_of_seqs = None 90 number_of_seqs = None 91 ids = [] 92 seqs = [] 93 94 while line[0] == "#": 95 #Read in the rest of this alignment header, 96 #try and discover the number of records expected 97 #and their length 98 parts = line[1:].split(":", 1) 99 key = parts[0].lower().strip() 100 if key == "aligned_sequences": 101 number_of_seqs = int(parts[1].strip()) 102 assert len(ids) == 0 103 # Should now expect the record identifiers... 104 for i in range(number_of_seqs): 105 line = handle.readline() 106 parts = line[1:].strip().split(":", 1) 107 assert i+1 == int(parts[0].strip()) 108 ids.append(parts[1].strip()) 109 assert len(ids) == number_of_seqs 110 if key == "length": 111 length_of_seqs = int(parts[1].strip()) 112 113 #And read in another line... 114 line = handle.readline() 115 116 if number_of_seqs is None: 117 raise ValueError("Number of sequences missing!") 118 if length_of_seqs is None: 119 raise ValueError("Length of sequences missing!") 120 121 if self.records_per_alignment is not None \ 122 and self.records_per_alignment != number_of_seqs: 123 raise ValueError("Found %i records in this alignment, told to expect %i" 124 % (number_of_seqs, self.records_per_alignment)) 125 126 seqs = ["" for id in ids] 127 seq_starts = [] 128 index = 0 129 130 #Parse the seqs 131 while line: 132 if len(line) > 21: 133 id_start = line[:21].strip().split(None, 1) 134 seq_end = line[21:].strip().split(None, 1) 135 if len(id_start) == 2 and len(seq_end) == 2: 136 #identifier, seq start position, seq, seq end position 137 #(an aligned seq is broken up into multiple lines) 138 id, start = id_start 139 seq, end = seq_end 140 if start == end: 141 #Special case, either a single letter is present, 142 #or no letters at all. 143 if seq.replace("-", "") == "": 144 start = int(start) 145 end = int(end) 146 else: 147 start = int(start) - 1 148 end = int(end) 149 else: 150 assert seq.replace("-", "") != "", repr(line) 151 start = int(start) - 1 # python counting 152 end = int(end) 153 154 #The identifier is truncated... 155 assert 0 <= index and index < number_of_seqs, \ 156 "Expected index %i in range [0,%i)" \ 157 % (index, number_of_seqs) 158 assert id == ids[index] or id == ids[index][:len(id)] 159 160 if len(seq_starts) == index: 161 #Record the start 162 seq_starts.append(start) 163 164 #Check the start... 165 if start == end: 166 assert seq.replace("-", "") == "", line 167 else: 168 assert start - seq_starts[index] == len(seqs[index].replace("-", "")), \ 169 "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \ 170 % (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]), 171 start, line) 172 173 seqs[index] += seq 174 175 #Check the end ... 176 assert end == seq_starts[index] + len(seqs[index].replace("-", "")), \ 177 "Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \ 178 % (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]), 179 seq_starts[index], end, line) 180 181 index += 1 182 if index >= number_of_seqs: 183 index = 0 184 else: 185 #just a start value, this is just alignment annotation (?) 186 #print "Skipping: " + line.rstrip() 187 pass 188 elif line.strip() == "": 189 #Just a spacer? 190 pass 191 else: 192 print(line) 193 assert False 194 195 line = handle.readline() 196 if line.rstrip() == "#---------------------------------------" \ 197 or line.rstrip() == "#=======================================": 198 #End of alignment 199 self._header = line 200 break 201 202 assert index == 0 203 204 if self.records_per_alignment is not None \ 205 and self.records_per_alignment != len(ids): 206 raise ValueError("Found %i records in this alignment, told to expect %i" 207 % (len(ids), self.records_per_alignment)) 208 209 records = [] 210 for id, seq in zip(ids, seqs): 211 if len(seq) != length_of_seqs: 212 #EMBOSS 2.9.0 is known to use spaces instead of minus signs 213 #for leading gaps, and thus fails to parse. This old version 214 #is still used as of Dec 2008 behind the EBI SOAP webservice: 215 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl 216 raise ValueError("Error parsing alignment - sequences of " 217 "different length? You could be using an " 218 "old version of EMBOSS.") 219 records.append(SeqRecord(Seq(seq, self.alphabet), 220 id=id, description=id)) 221 return MultipleSeqAlignment(records, self.alphabet)
222 223 224 if __name__ == "__main__": 225 print("Running a quick self-test") 226 227 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 228 simple_example = \ 229 """######################################## 230 # Program: alignret 231 # Rundate: Wed Jan 16 17:16:13 2002 232 # Report_file: stdout 233 ######################################## 234 #======================================= 235 # 236 # Aligned_sequences: 4 237 # 1: IXI_234 238 # 2: IXI_235 239 # 3: IXI_236 240 # 4: IXI_237 241 # Matrix: EBLOSUM62 242 # Gap_penalty: 10.0 243 # Extend_penalty: 0.5 244 # 245 # Length: 131 246 # Identity: 95/131 (72.5%) 247 # Similarity: 127/131 (96.9%) 248 # Gaps: 25/131 (19.1%) 249 # Score: 100.0 250 # 251 # 252 #======================================= 253 254 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 255 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 256 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 257 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 258 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 259 260 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 261 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 262 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 263 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 264 ||:||||||||||||||||||||:::::::::::||||||||||||| | 265 266 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 267 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 268 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 269 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 270 |||:||||:|||||:|||||||::||||||| 271 272 273 #--------------------------------------- 274 #--------------------------------------- 275 276 """ 277 278 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 279 pair_example = \ 280 """######################################## 281 # Program: water 282 # Rundate: Wed Jan 16 17:23:19 2002 283 # Report_file: stdout 284 ######################################## 285 #======================================= 286 # 287 # Aligned_sequences: 2 288 # 1: IXI_234 289 # 2: IXI_235 290 # Matrix: EBLOSUM62 291 # Gap_penalty: 10.0 292 # Extend_penalty: 0.5 293 # 294 # Length: 131 295 # Identity: 112/131 (85.5%) 296 # Similarity: 112/131 (85.5%) 297 # Gaps: 19/131 (14.5%) 298 # Score: 591.5 299 # 300 # 301 #======================================= 302 303 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 304 ||||||||||||||| |||||||||||||||||||||||||| 305 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 306 307 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 308 |||||||||||||||||||||||| |||||||||||||||| 309 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 310 311 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 312 ||||||||||||||||||||||||||||||| 313 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 314 315 316 #--------------------------------------- 317 #--------------------------------------- 318 319 320 """ 321 322 pair_example2 = \ 323 """######################################## 324 # Program: needle 325 # Rundate: Sun 27 Apr 2007 17:20:35 326 # Commandline: needle 327 # [-asequence] Spo0F.faa 328 # [-bsequence] paired_r.faa 329 # -sformat2 pearson 330 # Align_format: srspair 331 # Report_file: ref_rec .needle 332 ######################################## 333 334 #======================================= 335 # 336 # Aligned_sequences: 2 337 # 1: ref_rec 338 # 2: gi|94968718|receiver 339 # Matrix: EBLOSUM62 340 # Gap_penalty: 10.0 341 # Extend_penalty: 0.5 342 # 343 # Length: 124 344 # Identity: 32/124 (25.8%) 345 # Similarity: 64/124 (51.6%) 346 # Gaps: 17/124 (13.7%) 347 # Score: 112.0 348 # 349 # 350 #======================================= 351 352 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 353 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 354 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 355 356 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 357 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 358 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 359 360 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 361 :..| ..|:|.|: || 362 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 363 364 365 #======================================= 366 # 367 # Aligned_sequences: 2 368 # 1: ref_rec 369 # 2: gi|94968761|receiver 370 # Matrix: EBLOSUM62 371 # Gap_penalty: 10.0 372 # Extend_penalty: 0.5 373 # 374 # Length: 119 375 # Identity: 34/119 (28.6%) 376 # Similarity: 58/119 (48.7%) 377 # Gaps: 9/119 ( 7.6%) 378 # Score: 154.0 379 # 380 # 381 #======================================= 382 383 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 384 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 385 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 386 387 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 388 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 389 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 390 391 ref_rec 101 PFDIDEIRDAV-------- 111 392 |...|::...| 393 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 394 395 396 #======================================= 397 # 398 # Aligned_sequences: 2 399 # 1: ref_rec 400 # 2: gi|94967506|receiver 401 # Matrix: EBLOSUM62 402 # Gap_penalty: 10.0 403 # Extend_penalty: 0.5 404 # 405 # Length: 120 406 # Identity: 29/120 (24.2%) 407 # Similarity: 53/120 (44.2%) 408 # Gaps: 9/120 ( 7.5%) 409 # Score: 121.0 410 # 411 # 412 #======================================= 413 414 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 415 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 416 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 417 418 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 419 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 420 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 421 422 ref_rec 100 KPFDIDEIRDAV-------- 111 423 ||.|||.:.:.. 424 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 425 426 427 #======================================= 428 # 429 # Aligned_sequences: 2 430 # 1: ref_rec 431 # 2: gi|94970045|receiver 432 # Matrix: EBLOSUM62 433 # Gap_penalty: 10.0 434 # Extend_penalty: 0.5 435 # 436 # Length: 118 437 # Identity: 30/118 (25.4%) 438 # Similarity: 64/118 (54.2%) 439 # Gaps: 9/118 ( 7.6%) 440 # Score: 126.0 441 # 442 # 443 #======================================= 444 445 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 446 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 447 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 448 449 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 450 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 451 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 452 453 ref_rec 99 -AKPFDID----EIRDAV 111 454 .|||.:| :||:.: 455 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 456 457 458 #======================================= 459 # 460 # Aligned_sequences: 2 461 # 1: ref_rec 462 # 2: gi|94970041|receiver 463 # Matrix: EBLOSUM62 464 # Gap_penalty: 10.0 465 # Extend_penalty: 0.5 466 # 467 # Length: 125 468 # Identity: 35/125 (28.0%) 469 # Similarity: 70/125 (56.0%) 470 # Gaps: 18/125 (14.4%) 471 # Score: 156.5 472 # 473 # 474 #======================================= 475 476 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 477 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 478 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 479 480 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 481 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 482 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 483 484 ref_rec 99 A----KPFDIDEIRDAV-------- 111 485 | |||..|.:...| 486 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 487 488 489 #--------------------------------------- 490 #--------------------------------------- 491 492 """ 493 494 pair_example3 = """######################################## 495 # Program: needle 496 # Rundate: Mon 14 Jul 2008 11:45:42 497 # Commandline: needle 498 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 499 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 500 # -filter 501 # Align_format: srspair 502 # Report_file: stdout 503 ######################################## 504 505 #======================================= 506 # 507 # Aligned_sequences: 2 508 # 1: asis 509 # 2: asis 510 # Matrix: EDNAFULL 511 # Gap_penalty: 10.0 512 # Extend_penalty: 0.5 513 # 514 # Length: 667 515 # Identity: 210/667 (31.5%) 516 # Similarity: 210/667 (31.5%) 517 # Gaps: 408/667 (61.2%) 518 # Score: 561.0 519 # 520 # 521 #======================================= 522 523 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 524 525 asis 0 -------------------------------------------------- 0 526 527 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 528 529 asis 0 -------------------------------------------------- 0 530 531 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 532 533 asis 0 -------------------------------------------------- 0 534 535 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 536 .|||||| 537 asis 1 ------------TTATTAA------------------------------- 7 538 539 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 540 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 541 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 542 543 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 544 ||.|.|||||.|||.||||.|||| | ||||||||| 545 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 546 547 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 548 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 549 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 550 551 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 552 ||| |||||| .||||...||||..|||||. 553 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 554 555 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 556 .| ||||||||||| || ||.||.||.|| 557 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 558 559 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 560 ||..||.||.|.|||..||||.|| ||||| | 561 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 562 563 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 564 ||| |.||| |||||||||.||| .||||||...||||||||| 565 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 566 567 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 568 ||||||||..| |||||||| 569 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 570 571 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 572 573 asis 311 -------------------------------------------------- 311 574 575 asis 599 GAAAGGGGGTTAATAGC 615 576 577 asis 311 ----------------- 311 578 579 580 #--------------------------------------- 581 #---------------------------------------""" 582 583 from Bio._py3k import StringIO 584 585 alignments = list(EmbossIterator(StringIO(pair_example))) 586 assert len(alignments) == 1 587 assert len(alignments[0]) == 2 588 assert [r.id for r in alignments[0]] \ 589 == ["IXI_234", "IXI_235"] 590 591 alignments = list(EmbossIterator(StringIO(simple_example))) 592 assert len(alignments) == 1 593 assert len(alignments[0]) == 4 594 assert [r.id for r in alignments[0]] \ 595 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 596 597 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 598 assert len(alignments) == 2 599 assert len(alignments[0]) == 2 600 assert len(alignments[1]) == 4 601 assert [r.id for r in alignments[0]] \ 602 == ["IXI_234", "IXI_235"] 603 assert [r.id for r in alignments[1]] \ 604 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 605 606 alignments = list(EmbossIterator(StringIO(pair_example2))) 607 assert len(alignments) == 5 608 assert len(alignments[0]) == 2 609 assert [r.id for r in alignments[0]] \ 610 == ["ref_rec", "gi|94968718|receiver"] 611 assert [r.id for r in alignments[4]] \ 612 == ["ref_rec", "gi|94970041|receiver"] 613 614 alignments = list(EmbossIterator(StringIO(pair_example3))) 615 assert len(alignments) == 1 616 assert len(alignments[0]) == 2 617 assert [r.id for r in alignments[0]] \ 618 == ["asis", "asis"] 619 620 print("Done") 621