Package Bio :: Package AlignIO :: Module FastaIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.FastaIO

  1  # Copyright 2008-2016 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools. 
  7   
  8  You are expected to use this module via the Bio.AlignIO functions (or the 
  9  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 10   
 11  This module contains a parser for the pairwise alignments produced by Bill 
 12  Pearson's FASTA tools, for use from the Bio.AlignIO interface where it is 
 13  referred to as the "fasta-m10" file format (as we only support the machine 
 14  readable output format selected with the -m 10 command line option). 
 15   
 16  This module does NOT cover the generic "fasta" file format originally 
 17  developed as an input format to the FASTA tools.  The Bio.AlignIO and 
 18  Bio.SeqIO both use the Bio.SeqIO.FastaIO module to deal with these files, 
 19  which can also be used to store a multiple sequence alignments. 
 20  """ 
 21   
 22  from __future__ import print_function 
 23   
 24  from Bio.Seq import Seq 
 25  from Bio.SeqRecord import SeqRecord 
 26  from Bio.Align import MultipleSeqAlignment 
 27  from Bio.Alphabet import single_letter_alphabet, generic_dna, generic_protein 
 28  from Bio.Alphabet import Gapped 
 29   
 30   
31 -def _extract_alignment_region(alignment_seq_with_flanking, annotation):
32 """Extract alignment region (PRIVATE). 33 34 Helper function for the main parsing code. 35 36 To get the actual pairwise alignment sequences, we must first 37 translate the un-gapped sequence based coordinates into positions 38 in the gapped sequence (which may have a flanking region shown 39 using leading - characters). To date, I have never seen any 40 trailing flanking region shown in the m10 file, but the 41 following code should also cope with that. 42 43 Note that this code seems to work fine even when the "sq_offset" 44 entries are prsent as a result of using the -X command line option. 45 """ 46 align_stripped = alignment_seq_with_flanking.strip("-") 47 display_start = int(annotation['al_display_start']) 48 if int(annotation['al_start']) <= int(annotation['al_stop']): 49 start = int(annotation['al_start']) \ 50 - display_start 51 end = int(annotation['al_stop']) \ 52 - display_start + 1 53 else: 54 # FASTA has flipped this sequence... 55 start = display_start \ 56 - int(annotation['al_start']) 57 end = display_start \ 58 - int(annotation['al_stop']) + 1 59 end += align_stripped.count("-") 60 assert 0 <= start and start < end and end <= len(align_stripped), \ 61 "Problem with sequence start/stop,\n%s[%i:%i]\n%s" \ 62 % (alignment_seq_with_flanking, start, end, annotation) 63 return align_stripped[start:end]
64 65
66 -def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
67 """Alignment iterator for the FASTA tool's pairwise alignment output. 68 69 This is for reading the pairwise alignments output by Bill Pearson's 70 FASTA program when called with the -m 10 command line option for machine 71 readable output. For more details about the FASTA tools, see the website 72 http://fasta.bioch.virginia.edu/ and the paper: 73 74 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448 75 76 This class is intended to be used via the Bio.AlignIO.parse() function 77 by specifying the format as "fasta-m10" as shown in the following code:: 78 79 from Bio import AlignIO 80 handle = ... 81 for a in AlignIO.parse(handle, "fasta-m10"): 82 assert len(a) == 2, "Should be pairwise!" 83 print("Alignment length %i" % a.get_alignment_length()) 84 for record in a: 85 print("%s %s %s" % (record.seq, record.name, record.id)) 86 87 Note that this is not a full blown parser for all the information 88 in the FASTA output - for example, most of the header and all of the 89 footer is ignored. Also, the alignments are not batched according to 90 the input queries. 91 92 Also note that there can be up to about 30 letters of flanking region 93 included in the raw FASTA output as contextual information. This is NOT 94 part of the alignment itself, and is not included in the resulting 95 MultipleSeqAlignment objects returned. 96 """ 97 if alphabet is None: 98 alphabet = single_letter_alphabet 99 100 state_PREAMBLE = -1 101 state_NONE = 0 102 state_QUERY_HEADER = 1 103 state_ALIGN_HEADER = 2 104 state_ALIGN_QUERY = 3 105 state_ALIGN_MATCH = 4 106 state_ALIGN_CONS = 5 107 108 def build_hsp(): 109 if not query_tags and not match_tags: 110 raise ValueError("No data for query %r, match %r" 111 % (query_id, match_id)) 112 assert query_tags, query_tags 113 assert match_tags, match_tags 114 evalue = align_tags.get("fa_expect") 115 q = "?" # Just for printing len(q) in debug below 116 m = "?" # Just for printing len(m) in debug below 117 tool = global_tags.get("tool", "").upper() 118 try: 119 q = _extract_alignment_region(query_seq, query_tags) 120 if tool in ["TFASTX"] and len(match_seq) == len(q): 121 m = match_seq 122 # Quick hack until I can work out how -, * and / characters 123 # and the apparent mix of aa and bp coordinates works. 124 else: 125 m = _extract_alignment_region(match_seq, match_tags) 126 assert len(q) == len(m) 127 except AssertionError as err: 128 print("Darn... amino acids vs nucleotide coordinates?") 129 print(tool) 130 print(query_seq) 131 print(query_tags) 132 print("%s %i" % (q, len(q))) 133 print(match_seq) 134 print(match_tags) 135 print("%s %i" % (m, len(m))) 136 print(handle.name) 137 raise err 138 139 assert alphabet is not None 140 alignment = MultipleSeqAlignment([], alphabet) 141 142 # TODO - Introduce an annotated alignment class? 143 # See also Bio/AlignIO/MafIO.py for same requirement. 144 # For now, store the annotation a new private property: 145 alignment._annotations = {} 146 147 # Want to record both the query header tags, and the alignment tags. 148 for key, value in header_tags.items(): 149 alignment._annotations[key] = value 150 for key, value in align_tags.items(): 151 alignment._annotations[key] = value 152 153 # Query 154 # ===== 155 record = SeqRecord(Seq(q, alphabet), 156 id=query_id, 157 name="query", 158 description=query_descr, 159 annotations={"original_length": int(query_tags["sq_len"])}) 160 # TODO - handle start/end coordinates properly. Short term hack for now: 161 record._al_start = int(query_tags["al_start"]) 162 record._al_stop = int(query_tags["al_stop"]) 163 alignment.append(record) 164 165 # TODO - What if a specific alphabet has been requested? 166 # TODO - Use an IUPAC alphabet? 167 # TODO - Can FASTA output RNA? 168 if alphabet == single_letter_alphabet and "sq_type" in query_tags: 169 if query_tags["sq_type"] == "D": 170 record.seq.alphabet = generic_dna 171 elif query_tags["sq_type"] == "p": 172 record.seq.alphabet = generic_protein 173 if "-" in q: 174 if not hasattr(record.seq.alphabet, "gap_char"): 175 record.seq.alphabet = Gapped(record.seq.alphabet, "-") 176 177 # Match 178 # ===== 179 record = SeqRecord(Seq(m, alphabet), 180 id=match_id, 181 name="match", 182 description=match_descr, 183 annotations={"original_length": int(match_tags["sq_len"])}) 184 # TODO - handle start/end coordinates properly. Short term hack for now: 185 record._al_start = int(match_tags["al_start"]) 186 record._al_stop = int(match_tags["al_stop"]) 187 alignment.append(record) 188 189 # This is still a very crude way of dealing with the alphabet: 190 if alphabet == single_letter_alphabet and "sq_type" in match_tags: 191 if match_tags["sq_type"] == "D": 192 record.seq.alphabet = generic_dna 193 elif match_tags["sq_type"] == "p": 194 record.seq.alphabet = generic_protein 195 if "-" in m: 196 if not hasattr(record.seq.alphabet, "gap_char"): 197 record.seq.alphabet = Gapped(record.seq.alphabet, "-") 198 199 return alignment
200 201 state = state_PREAMBLE 202 query_id = None 203 match_id = None 204 query_descr = "" 205 match_descr = "" 206 global_tags = {} 207 header_tags = {} 208 align_tags = {} 209 query_tags = {} 210 match_tags = {} 211 query_seq = "" 212 match_seq = "" 213 cons_seq = "" 214 for line in handle: 215 if ">>>" in line and not line.startswith(">>>"): 216 if query_id and match_id: 217 # This happens on old FASTA output which lacked an end of 218 # query >>><<< marker line. 219 yield build_hsp() 220 state = state_NONE 221 query_descr = line[line.find(">>>") + 3:].strip() 222 query_id = query_descr.split(None, 1)[0] 223 match_id = None 224 header_tags = {} 225 align_tags = {} 226 query_tags = {} 227 match_tags = {} 228 query_seq = "" 229 match_seq = "" 230 cons_seq = "" 231 elif line.startswith("!! No "): 232 # e.g. 233 # !! No library sequences with E() < 0.5 234 # or on more recent versions, 235 # No sequences with E() < 0.05 236 assert state == state_NONE 237 assert not header_tags 238 assert not align_tags 239 assert not match_tags 240 assert not query_tags 241 assert match_id is None 242 assert not query_seq 243 assert not match_seq 244 assert not cons_seq 245 query_id = None 246 elif line.strip() in [">>><<<", ">>>///"]: 247 # End of query, possible end of all queries 248 if query_id and match_id: 249 yield build_hsp() 250 state = state_NONE 251 query_id = None 252 match_id = None 253 header_tags = {} 254 align_tags = {} 255 query_tags = {} 256 match_tags = {} 257 query_seq = "" 258 match_seq = "" 259 cons_seq = "" 260 elif line.startswith(">>>"): 261 # Should be start of a match! 262 assert query_id is not None 263 assert line[3:].split(", ", 1)[0] == query_id, line 264 assert match_id is None 265 assert not header_tags 266 assert not align_tags 267 assert not query_tags 268 assert not match_tags 269 assert not match_seq 270 assert not query_seq 271 assert not cons_seq 272 state = state_QUERY_HEADER 273 elif line.startswith(">>"): 274 # Should now be at start of a match alignment! 275 if query_id and match_id: 276 yield build_hsp() 277 align_tags = {} 278 query_tags = {} 279 match_tags = {} 280 query_seq = "" 281 match_seq = "" 282 cons_seq = "" 283 match_descr = line[2:].strip() 284 match_id = match_descr.split(None, 1)[0] 285 state = state_ALIGN_HEADER 286 elif line.startswith(">--"): 287 # End of one HSP 288 assert query_id and match_id, line 289 yield build_hsp() 290 # Clean up read for next HSP 291 # but reuse header_tags 292 align_tags = {} 293 query_tags = {} 294 match_tags = {} 295 query_seq = "" 296 match_seq = "" 297 cons_seq = "" 298 state = state_ALIGN_HEADER 299 elif line.startswith(">"): 300 if state == state_ALIGN_HEADER: 301 # Should be start of query alignment seq... 302 assert query_id is not None, line 303 assert match_id is not None, line 304 assert query_id.startswith(line[1:].split(None, 1)[0]), line 305 state = state_ALIGN_QUERY 306 elif state == state_ALIGN_QUERY: 307 # Should be start of match alignment seq 308 assert query_id is not None, line 309 assert match_id is not None, line 310 assert match_id.startswith(line[1:].split(None, 1)[0]), line 311 state = state_ALIGN_MATCH 312 elif state == state_NONE: 313 # Can get > as the last line of a histogram 314 pass 315 else: 316 assert False, "state %i got %r" % (state, line) 317 elif line.startswith("; al_cons"): 318 assert state == state_ALIGN_MATCH, line 319 state = state_ALIGN_CONS 320 # Next line(s) should be consensus seq... 321 elif line.startswith("; "): 322 if ": " in line: 323 key, value = [s.strip() for s in line[2:].split(": ", 1)] 324 else: 325 import warnings 326 # Seen in lalign36, specifically version 36.3.4 Apr, 2011 327 # Fixed in version 36.3.5b Oct, 2011(preload8) 328 warnings.warn("Missing colon in line: %r" % line) 329 try: 330 key, value = [s.strip() for s in line[2:].split(" ", 1)] 331 except ValueError: 332 raise ValueError("Bad line: %r" % line) 333 if state == state_QUERY_HEADER: 334 header_tags[key] = value 335 elif state == state_ALIGN_HEADER: 336 align_tags[key] = value 337 elif state == state_ALIGN_QUERY: 338 query_tags[key] = value 339 elif state == state_ALIGN_MATCH: 340 match_tags[key] = value 341 else: 342 assert False, "Unexpected state %r, %r" % (state, line) 343 elif state == state_ALIGN_QUERY: 344 query_seq += line.strip() 345 elif state == state_ALIGN_MATCH: 346 match_seq += line.strip() 347 elif state == state_ALIGN_CONS: 348 cons_seq += line.strip("\n") 349 elif state == state_PREAMBLE: 350 if line.startswith("#"): 351 global_tags["command"] = line[1:].strip() 352 elif line.startswith(" version "): 353 global_tags["version"] = line[9:].strip() 354 elif " compares a " in line: 355 global_tags["tool"] = line[:line.find(" compares a ")].strip() 356 elif " searches a " in line: 357 global_tags["tool"] = line[:line.find(" searches a ")].strip() 358 else: 359 pass 360 361 362 if __name__ == "__main__": 363 print("Running a quick self-test") 364 365 # http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 366 simple_example = \ 367 """# /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa 368 FASTA searches a protein or DNA sequence data bank 369 version 34.26 January 12, 2007 370 Please cite: 371 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448 372 373 Query library NC_002127.faa vs NC_009649.faa library 374 searching NC_009649.faa library 375 376 1>>>gi|10955263|ref|NP_052604.1| plasmid mobilization [Escherichia coli O157:H7 s 107 aa - 107 aa 377 vs NC_009649.faa library 378 379 45119 residues in 180 sequences 380 Expectation_n fit: rho(ln(x))= 6.9146+/-0.0249; mu= -5.7948+/- 1.273 381 mean_var=53.6859+/-13.609, 0's: 0 Z-trim: 1 B-trim: 9 in 1/25 382 Lambda= 0.175043 383 384 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2 385 join: 36, opt: 24, open/ext: -10/-2, width: 16 386 Scan time: 0.000 387 The best scores are: opt bits E(180) 388 gi|152973457|ref|YP_001338508.1| ATPase with chape ( 931) 71 24.9 0.58 389 gi|152973588|ref|YP_001338639.1| F pilus assembly ( 459) 63 23.1 0.99 390 391 >>>gi|10955263|ref|NP_052604.1|, 107 aa vs NC_009649.faa library 392 ; pg_name: /opt/fasta/fasta34 393 ; pg_ver: 34.26 394 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa 395 ; pg_name: FASTA 396 ; pg_ver: 3.5 Sept 2006 397 ; pg_matrix: BL50 (15:-5) 398 ; pg_open-ext: -10 -2 399 ; pg_ktup: 2 400 ; pg_optcut: 24 401 ; pg_cgap: 36 402 ; mp_extrap: 60000 180 403 ; mp_stats: Expectation_n fit: rho(ln(x))= 6.9146+/-0.0249; mu= -5.7948+/- 1.273 mean_var=53.6859+/-13.609, 0's: 0 Z-trim: 1 B-trim: 9 in 1/25 Lambda= 0.175043 404 ; mp_KS: -0.0000 (N=0) at 8159228 405 >>gi|152973457|ref|YP_001338508.1| ATPase with chaperone activity, ATP-binding subunit [Klebsiella pneumoniae subsp. pneumoniae MGH 78578] 406 ; fa_frame: f 407 ; fa_initn: 65 408 ; fa_init1: 43 409 ; fa_opt: 71 410 ; fa_z-score: 90.3 411 ; fa_bits: 24.9 412 ; fa_expect: 0.58 413 ; sw_score: 71 414 ; sw_ident: 0.250 415 ; sw_sim: 0.574 416 ; sw_overlap: 108 417 >gi|10955263| .. 418 ; sq_len: 107 419 ; sq_offset: 1 420 ; sq_type: p 421 ; al_start: 5 422 ; al_stop: 103 423 ; al_display_start: 1 424 --------------------------MTKRSGSNT-RRRAISRPVRLTAE 425 ED---QEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVM----- 426 RLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD 427 >gi|152973457|ref|YP_001338508.1| .. 428 ; sq_len: 931 429 ; sq_type: p 430 ; al_start: 96 431 ; al_stop: 195 432 ; al_display_start: 66 433 SDFFRIGDDATPVAADTDDVVDASFGEPAAAGSGAPRRRGSGLASRISEQ 434 SEALLQEAAKHAAEFGRS------EVDTEHLLLALADSDVVKTILGQFKI 435 KVDDLKRQIESEAKR-GDKPF-EGEIGVSPRVKDALSRAFVASNELGHSY 436 VGPEHFLIGLAEEGEGLAANLLRRYGLTPQ 437 >>gi|152973588|ref|YP_001338639.1| F pilus assembly protein [Klebsiella pneumoniae subsp. pneumoniae MGH 78578] 438 ; fa_frame: f 439 ; fa_initn: 33 440 ; fa_init1: 33 441 ; fa_opt: 63 442 ; fa_z-score: 86.1 443 ; fa_bits: 23.1 444 ; fa_expect: 0.99 445 ; sw_score: 63 446 ; sw_ident: 0.266 447 ; sw_sim: 0.656 448 ; sw_overlap: 64 449 >gi|10955263| .. 450 ; sq_len: 107 451 ; sq_offset: 1 452 ; sq_type: p 453 ; al_start: 32 454 ; al_stop: 94 455 ; al_display_start: 2 456 TKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKK 457 VNSLTDDRVLKEV-MRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRAL 458 LSRLMAD 459 >gi|152973588|ref|YP_001338639.1| .. 460 ; sq_len: 459 461 ; sq_type: p 462 ; al_start: 191 463 ; al_stop: 248 464 ; al_display_start: 161 465 VGGLFPRTQVAQQKVCQDIAGESNIFSDWAASRQGCTVGG--KMDSVQDK 466 ASDKDKERVMKNINIMWNALSKNRLFDG----NKELKEFIMTLTGTLIFG 467 ENSEITPLPARTTDQDLIRAMMEGGTAKIYHCNDSDKCLKVVADATVTIT 468 SNKALKSQISALLSSIQNKAVADEKLTDQE 469 2>>>gi|10955264|ref|NP_052605.1| hypothetical protein pOSAK1_02 [Escherichia coli O157:H7 s 126 aa - 126 aa 470 vs NC_009649.faa library 471 472 45119 residues in 180 sequences 473 Expectation_n fit: rho(ln(x))= 7.1374+/-0.0246; mu= -7.6540+/- 1.313 474 mean_var=51.1189+/-13.171, 0's: 0 Z-trim: 1 B-trim: 8 in 1/25 475 Lambda= 0.179384 476 477 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2 478 join: 36, opt: 24, open/ext: -10/-2, width: 16 479 Scan time: 0.000 480 The best scores are: opt bits E(180) 481 gi|152973462|ref|YP_001338513.1| hypothetical prot ( 101) 58 22.9 0.29 482 483 >>>gi|10955264|ref|NP_052605.1|, 126 aa vs NC_009649.faa library 484 ; pg_name: /opt/fasta/fasta34 485 ; pg_ver: 34.26 486 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa 487 ; pg_name: FASTA 488 ; pg_ver: 3.5 Sept 2006 489 ; pg_matrix: BL50 (15:-5) 490 ; pg_open-ext: -10 -2 491 ; pg_ktup: 2 492 ; pg_optcut: 24 493 ; pg_cgap: 36 494 ; mp_extrap: 60000 180 495 ; mp_stats: Expectation_n fit: rho(ln(x))= 7.1374+/-0.0246; mu= -7.6540+/- 1.313 mean_var=51.1189+/-13.171, 0's: 0 Z-trim: 1 B-trim: 8 in 1/25 Lambda= 0.179384 496 ; mp_KS: -0.0000 (N=0) at 8159228 497 >>gi|152973462|ref|YP_001338513.1| hypothetical protein KPN_pKPN3p05904 [Klebsiella pneumoniae subsp. pneumoniae MGH 78578] 498 ; fa_frame: f 499 ; fa_initn: 50 500 ; fa_init1: 50 501 ; fa_opt: 58 502 ; fa_z-score: 95.8 503 ; fa_bits: 22.9 504 ; fa_expect: 0.29 505 ; sw_score: 58 506 ; sw_ident: 0.289 507 ; sw_sim: 0.632 508 ; sw_overlap: 38 509 >gi|10955264| .. 510 ; sq_len: 126 511 ; sq_offset: 1 512 ; sq_type: p 513 ; al_start: 1 514 ; al_stop: 38 515 ; al_display_start: 1 516 ------------------------------MKKDKKYQIEAIKNKDKTLF 517 IVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSF 518 NGEKFSSYTLNKVTKTDEYN 519 >gi|152973462|ref|YP_001338513.1| .. 520 ; sq_len: 101 521 ; sq_type: p 522 ; al_start: 44 523 ; al_stop: 81 524 ; al_display_start: 14 525 DALLGEIQRLRKQVHQLQLERDILTKANELIKKDLGVSFLKLKNREKTLI 526 VDALKKKYPVAELLSVLQLARSCYFYQNVCTISMRKYA 527 3>>>gi|10955265|ref|NP_052606.1| hypothetical protein pOSAK1_03 [Escherichia coli O157:H7 s 346 aa - 346 aa 528 vs NC_009649.faa library 529 530 45119 residues in 180 sequences 531 Expectation_n fit: rho(ln(x))= 6.0276+/-0.0276; mu= 3.0670+/- 1.461 532 mean_var=37.1634+/- 8.980, 0's: 0 Z-trim: 1 B-trim: 14 in 1/25 533 Lambda= 0.210386 534 535 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2 536 join: 37, opt: 25, open/ext: -10/-2, width: 16 537 Scan time: 0.020 538 The best scores are: opt bits E(180) 539 gi|152973545|ref|YP_001338596.1| putative plasmid ( 242) 70 27.5 0.082 540 541 >>>gi|10955265|ref|NP_052606.1|, 346 aa vs NC_009649.faa library 542 ; pg_name: /opt/fasta/fasta34 543 ; pg_ver: 34.26 544 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa 545 ; pg_name: FASTA 546 ; pg_ver: 3.5 Sept 2006 547 ; pg_matrix: BL50 (15:-5) 548 ; pg_open-ext: -10 -2 549 ; pg_ktup: 2 550 ; pg_optcut: 25 551 ; pg_cgap: 37 552 ; mp_extrap: 60000 180 553 ; mp_stats: Expectation_n fit: rho(ln(x))= 6.0276+/-0.0276; mu= 3.0670+/- 1.461 mean_var=37.1634+/- 8.980, 0's: 0 Z-trim: 1 B-trim: 14 in 1/25 Lambda= 0.210386 554 ; mp_KS: -0.0000 (N=0) at 8159228 555 >>gi|152973545|ref|YP_001338596.1| putative plasmid SOS inhibition protein A [Klebsiella pneumoniae subsp. pneumoniae MGH 78578] 556 ; fa_frame: f 557 ; fa_initn: 52 558 ; fa_init1: 52 559 ; fa_opt: 70 560 ; fa_z-score: 105.5 561 ; fa_bits: 27.5 562 ; fa_expect: 0.082 563 ; sw_score: 70 564 ; sw_ident: 0.279 565 ; sw_sim: 0.651 566 ; sw_overlap: 43 567 >gi|10955265| .. 568 ; sq_len: 346 569 ; sq_offset: 1 570 ; sq_type: p 571 ; al_start: 197 572 ; al_stop: 238 573 ; al_display_start: 167 574 DFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKK 575 QLSC-SLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNL 576 GEYFTENKPKYIIREIHQET 577 >gi|152973545|ref|YP_001338596.1| .. 578 ; sq_len: 242 579 ; sq_type: p 580 ; al_start: 52 581 ; al_stop: 94 582 ; al_display_start: 22 583 IMTVEEARQRGARLPSMPHVRTFLRLLTGCSRINSDVARRIPGIHRDPKD 584 RLSSLKQVEEALDMLISSHGEYCPLPLTMDVQAENFPEVLHTRTVRRLKR 585 QDFAFTRKMRREARQVEQSW 586 >>><<< 587 588 589 579 residues in 3 query sequences 590 45119 residues in 180 library sequences 591 Scomplib [34.26] 592 start: Tue May 20 16:38:45 2008 done: Tue May 20 16:38:45 2008 593 Total Scan time: 0.020 Total Display time: 0.010 594 595 Function used was FASTA [version 34.26 January 12, 2007] 596 597 """ 598 599 from Bio._py3k import StringIO 600 601 alignments = list(FastaM10Iterator(StringIO(simple_example))) 602 assert len(alignments) == 4, len(alignments) 603 assert len(alignments[0]) == 2 604 for a in alignments: 605 print("Alignment %i sequences of length %i" 606 % (len(a), a.get_alignment_length())) 607 for r in a: 608 print("%s %s %i" % (r.seq, r.id, r.annotations["original_length"])) 609 # print(a.annotations) 610 print("Done") 611