From 426e9f82d1935d376f58449b389035e53f573f09 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Fri, 16 Feb 2024 12:36:22 +0000 Subject: [PATCH 1/2] Improve mpileup overlap removal for deletions The code previously just skipped deletions, meaning templates with a deletion in one copy and not in the other could still be reported twice in the mpileup. Note this depended on what came first due to the order of iref updates. The new code moves the two iref updates to after the two cigar_iref2iseq_next so we're always consistently getting the next reference base regardless. We then detect deletions and catch the lagging sequence up, applying the same Q*=0.8 or Q=0 logic that the mismatching base code does. It's tricky to know if this has a beneficial impact on variant calling or not! If there is debate over whether a deletion exists between the read-pairs, then it is probably not something we wish to take into account, however it's currently random which read we select. Previously we'd have had both deletions and not-deletion present in the results, over-inflating depth but maybe bringing less confidence to such deletion calls. However it is at least consistent with the stated intention of overlap removal. One possible further mitigation could be to reduce quality more around such positions (so not just Q*0.8, and also adjusting the before/after base too for the deletion read). That hasn't been done here though. Fixes samtools/samtools#1992 --- sam.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/sam.c b/sam.c index 82b3b9340..af3e7a7ad 100644 --- a/sam.c +++ b/sam.c @@ -5706,8 +5706,7 @@ static int tweak_overlap_quality(bam1_t *a, bam1_t *b) // Loop over the overlapping region nulling qualities in either // seq a or b. int err = 0; - while ( 1 ) - { + while ( 1 ) { // Step to next matching reference position in a and b while ( a_ret >= 0 && a_iref>=0 && a_iref < iref - a->core.pos ) a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max, @@ -5716,8 +5715,6 @@ static int tweak_overlap_quality(bam1_t *a, bam1_t *b) err = a_ret<-1?-1:0; break; } - if ( iref < a_iref + a->core.pos ) - iref = a_iref + a->core.pos; while ( b_ret >= 0 && b_iref>=0 && b_iref < iref - b->core.pos ) b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, &b_icig, @@ -5726,14 +5723,55 @@ static int tweak_overlap_quality(bam1_t *a, bam1_t *b) err = b_ret<-1?-1:0; break; } + + if ( iref < a_iref + a->core.pos ) + iref = a_iref + a->core.pos; + if ( iref < b_iref + b->core.pos ) iref = b_iref + b->core.pos; iref++; - if ( a_iref+a->core.pos != b_iref+b->core.pos ) - // only CMATCH positions, don't know what to do with indels - continue; + // If A or B has a deletion then we catch up the other to this point. + // We also amend quality values using the same rules for mismatch. + if (a_iref+a->core.pos != b_iref+b->core.pos) { + if (a_iref+a->core.pos < b_iref+b->core.pos + && b_cigar > bam_get_cigar(b) + && bam_cigar_op(b_cigar[-1]) == BAM_CDEL) { + // Del in B means it's moved on further than A + do { + a_qual[a_iseq] = amul + ? a_qual[a_iseq]*0.8 + : 0; + a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max, + &a_icig, &a_iseq, &a_iref); + if (a_ret < 0) + return -(a_ret<-1); // 0 or -1 + } while (a_iref + a->core.pos < b_iref+b->core.pos); + } else if (a_cigar > bam_get_cigar(a) + && bam_cigar_op(a_cigar[-1]) == BAM_CDEL) { + // Del in A means it's moved on further than B + do { + b_qual[b_iseq] = bmul + ? b_qual[b_iseq]*0.8 + : 0; + b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, + &b_icig, &b_iseq, &b_iref); + if (b_ret < 0) + return -(b_ret<-1); // 0 or -1 + } while (b_iref + b->core.pos < a_iref+a->core.pos); + } else { + // Anything else, eg ref-skip, we don't support here + continue; + } + } + + // fprintf(stderr, "a_cig=%ld,%ld b_cig=%ld,%ld iref=%ld " + // "a_iref=%ld b_iref=%ld a_iseq=%ld b_iseq=%ld\n", + // a_cigar-bam_get_cigar(a), a_icig, + // b_cigar-bam_get_cigar(b), b_icig, + // iref, a_iref+a->core.pos+1, b_iref+b->core.pos+1, + // a_iseq, b_iseq); if (a_iseq > a->core.l_qseq || b_iseq > b->core.l_qseq) // Fell off end of sequence, bad CIGAR? From 62b74797463064c487bf57eb1c9b677a5d8e3252 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Fri, 16 Feb 2024 14:04:36 +0000 Subject: [PATCH 2/2] Extend pileup test to include quality values. Without this we cannot test that the overlap removal code is working, which operates by zeroing quality values. --- test/mpileup/c1#pad1.out | 20 +- test/mpileup/c1#pad2.out | 20 +- test/mpileup/c1#pad3.out | 10 +- test/mpileup/mp_D.out | 22 +- test/mpileup/mp_DI.out | 24 +- test/mpileup/mp_I.out | 22 +- test/mpileup/mp_ID.out | 24 +- test/mpileup/mp_N.out | 22 +- test/mpileup/mp_N2.out | 26 +- test/mpileup/mp_P.out | 20 +- test/mpileup/mp_overlap1.out | 12 + test/mpileup/mp_overlap1.sam | 4 + test/mpileup/mp_overlap2.out | 12 + test/mpileup/mp_overlap2.sam | 4 + test/mpileup/mpileup.tst | 5 + test/mpileup/small.out | 644 +++++++++++++++++------------------ test/pileup.c | 19 ++ 17 files changed, 483 insertions(+), 427 deletions(-) create mode 100644 test/mpileup/mp_overlap1.out create mode 100644 test/mpileup/mp_overlap1.sam create mode 100644 test/mpileup/mp_overlap2.out create mode 100644 test/mpileup/mp_overlap2.sam diff --git a/test/mpileup/c1#pad1.out b/test/mpileup/c1#pad1.out index cbac51ad8..eda860011 100644 --- a/test/mpileup/c1#pad1.out +++ b/test/mpileup/c1#pad1.out @@ -1,10 +1,10 @@ -c1 1 9 ^!A^!A^!A^!A^!A^!A^!A^!A^!A -c1 2 9 AAAAAAAAA-3() -c1 3 9 CCCCCCCC* -c1 4 9 CCCCCCCC-1()* -c1 5 9 GGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3() -c1 6 9 CCCCCCC** -c1 7 9 GGGGGGGG* -c1 8 9 GGGGGGGG* -c1 9 9 TTTTTTTTT -c1 10 9 T$T$T$T$T$T$T$T$T$ +c1 1 9 ^!A^!A^!A^!A^!A^!A^!A^!A^!A ~~~~~~~~~ +c1 2 9 AAAAAAAAA-3() ~~~~~~~~~ +c1 3 9 CCCCCCCC* ~~~~~~~~~ +c1 4 9 CCCCCCCC-1()* ~~~~~~~~~ +c1 5 9 GGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3() ~~~~~~~~~ +c1 6 9 CCCCCCC** ~~~~~~~~~ +c1 7 9 GGGGGGGG* ~~~~~~~~~ +c1 8 9 GGGGGGGG* ~~~~~~~~~ +c1 9 9 TTTTTTTTT ~~~~~~~~~ +c1 10 9 T$T$T$T$T$T$T$T$T$ ~~~~~~~~~ diff --git a/test/mpileup/c1#pad2.out b/test/mpileup/c1#pad2.out index 9cab78a87..7bab80ec2 100644 --- a/test/mpileup/c1#pad2.out +++ b/test/mpileup/c1#pad2.out @@ -1,10 +1,10 @@ -c1 1 12 ^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!* -c1 2 12 AAAAAAAAAA-3()A* -c1 3 12 CCCCCCCCC*C* -c1 4 12 CCCCCCCCC-1()*C-2()* -c1 5 13 GGGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()**+6(**TA**)-5()^!G+6(**TA**)$ -c1 6 12 CCCCCCCC**** -c1 7 12 GGGGGGGGG*G* -c1 8 12 GGGGGGGGG*G* -c1 9 12 TTTTTTTTTTT* -c1 10 12 T$T$T$T$T$T$T$T$T$T$T$*$ +c1 1 12 ^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!* ~~~~~~~~~~~~ +c1 2 12 AAAAAAAAAA-3()A* ~~~~~~~~~~~~ +c1 3 12 CCCCCCCCC*C* ~~~~~~~~~~~~ +c1 4 12 CCCCCCCCC-1()*C-2()* ~~~~~~~~~~~~ +c1 5 13 GGGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()**+6(**TA**)-5()^!G+6(**TA**)$ ~~~~~~~~~~~~~ +c1 6 12 CCCCCCCC**** ~~~~~~~~~~~~ +c1 7 12 GGGGGGGGG*G* ~~~~~~~~~~~~ +c1 8 12 GGGGGGGGG*G* ~~~~~~~~~~~~ +c1 9 12 TTTTTTTTTTT* ~~~~~~~~~~~~ +c1 10 12 T$T$T$T$T$T$T$T$T$T$T$*$ ~~~~~~~~~~~~ diff --git a/test/mpileup/c1#pad3.out b/test/mpileup/c1#pad3.out index d56eae596..e0ce41871 100644 --- a/test/mpileup/c1#pad3.out +++ b/test/mpileup/c1#pad3.out @@ -1,5 +1,5 @@ -c1 6 11 ^!C^!C^!C^!C^!C^!C^!C^!C^!*^!*^!* -c1 7 11 GGGGGGGGG*G -c1 8 11 GGGGGGGGG*G -c1 9 11 TTTTTTTTTTT -c1 10 11 T$T$T$T$T$T$T$T$T$T$T$ +c1 6 11 ^!C^!C^!C^!C^!C^!C^!C^!C^!*^!*^!* ~~~~~~~~~~~ +c1 7 11 GGGGGGGGG*G ~~~~~~~~~~~ +c1 8 11 GGGGGGGGG*G ~~~~~~~~~~~ +c1 9 11 TTTTTTTTTTT ~~~~~~~~~~~ +c1 10 11 T$T$T$T$T$T$T$T$T$T$T$ ~~~~~~~~~~~ diff --git a/test/mpileup/mp_D.out b/test/mpileup/mp_D.out index 656cedb5d..72f1c433e 100644 --- a/test/mpileup/mp_D.out +++ b/test/mpileup/mp_D.out @@ -1,11 +1,11 @@ -z 2 3 ^!A^!A^!* -z 3 3 GG* -z 4 3 CCC -z 5 3 TT-3()T -z 6 3 T*T -z 7 3 A*A -z 8 3 G*G -z 9 3 CCC -z 10 3 AAA-2() -z 11 3 GG* -z 12 3 G$G$*$ +z 2 3 ^!A^!A^!* 002 +z 3 3 GG* 112 +z 4 3 CCC 222 +z 5 3 TT-3()T 333 +z 6 3 T*T 474 +z 7 3 A*A 575 +z 8 3 G*G 676 +z 9 3 CCC 777 +z 10 3 AAA-2() 888 +z 11 3 GG* 99~ +z 12 3 G$G$*$ 00~ diff --git a/test/mpileup/mp_DI.out b/test/mpileup/mp_DI.out index 8a2ff2e2a..5818c16ff 100644 --- a/test/mpileup/mp_DI.out +++ b/test/mpileup/mp_DI.out @@ -1,12 +1,12 @@ -z 2 5 ^!A^!A^!A^!*^!* -z 3 5 GGG*+2(AA)*+2(*A) -z 4 5 CCCCC -z 5 5 TTTTT -z 6 5 TTTTT -z 7 5 AAAAA -z 8 5 G-2()G-2()G-2()GG -z 9 5 ***CC -z 10 5 *+2(TT)*+2(TT)$*+2(*T)$AA -z 11 3 GGG -z 12 3 GG$G$ -z 13 1 C$ +z 2 5 ^!A^!A^!A^!*^!* 000AB +z 3 5 GGG*+2(AA)*+2(*A) 111AB +z 4 5 CCCCC 22222 +z 5 5 TTTTT 33333 +z 6 5 TTTTT 44444 +z 7 5 AAAAA 55555 +z 8 5 G-2()G-2()G-2()GG 66666 +z 9 5 ***CC AAB77 +z 10 5 *+2(TT)*+2(TT)$*+2(*T)$AA AAB88 +z 11 3 GGG 999 +z 12 3 GG$G$ 000 +z 13 1 C$ 1 diff --git a/test/mpileup/mp_I.out b/test/mpileup/mp_I.out index c437a49d9..d62e9928c 100644 --- a/test/mpileup/mp_I.out +++ b/test/mpileup/mp_I.out @@ -1,11 +1,11 @@ -z 2 3 ^!A^!A^!A -z 3 3 GGG -z 4 3 CCC -z 5 3 TT+3(CCC)T -z 6 3 TTT -z 7 3 AAA -z 8 3 GGG -z 9 3 CCC -z 10 3 AAA -z 11 3 GGG -z 12 3 G$G$G+2(=A)$ +z 2 3 ^!A^!A^!A 000 +z 3 3 GGG 111 +z 4 3 CCC 222 +z 5 3 TT+3(CCC)T 333 +z 6 3 TTT 444 +z 7 3 AAA 555 +z 8 3 GGG 666 +z 9 3 CCC 777 +z 10 3 AAA 888 +z 11 3 GGG 999 +z 12 3 G$G$G+2(=A)$ 000 diff --git a/test/mpileup/mp_ID.out b/test/mpileup/mp_ID.out index 4f88f51e0..4f83ef4b7 100644 --- a/test/mpileup/mp_ID.out +++ b/test/mpileup/mp_ID.out @@ -1,12 +1,12 @@ -z 2 3 ^!A^!A^!A -z 3 3 GGG -z 4 5 CCC^!*^!* -z 5 5 TTT** -z 6 5 TTTTT -z 7 5 AAAAA -z 8 5 G+2(TT)-2()G+2(TT)-2()G+2(T*)-2()GG -z 9 5 ***CC -z 10 5 **$*$AA -z 11 3 GGG -z 12 3 GG$G$ -z 13 1 C$ +z 2 3 ^!A^!A^!A 000 +z 3 3 GGG 111 +z 4 5 CCC^!*^!* 22244 +z 5 5 TTT** 33344 +z 6 5 TTTTT 44444 +z 7 5 AAAAA 55555 +z 8 5 G+2(TT)-2()G+2(TT)-2()G+2(T*)-2()GG 66666 +z 9 5 ***CC 9~~77 +z 10 5 **$*$AA 9~~88 +z 11 3 GGG 999 +z 12 3 GG$G$ 000 +z 13 1 C$ 1 diff --git a/test/mpileup/mp_N.out b/test/mpileup/mp_N.out index 695e4634c..0b8dede38 100644 --- a/test/mpileup/mp_N.out +++ b/test/mpileup/mp_N.out @@ -1,11 +1,11 @@ -z 2 3 ^!A^!A^!> -z 3 3 GG> -z 4 3 CCC -z 5 3 TTT -z 6 3 T>T -z 7 3 A>A -z 8 3 G>G -z 9 3 CCC -z 10 3 AAA -z 11 3 GG> -z 12 3 G$G$>$ +z 2 3 ^!A^!A^!> 002 +z 3 3 GG> 112 +z 4 3 CCC 222 +z 5 3 TTT 333 +z 6 3 T>T 474 +z 7 3 A>A 575 +z 8 3 G>G 676 +z 9 3 CCC 777 +z 10 3 AAA 888 +z 11 3 GG> 99~ +z 12 3 G$G$>$ 00~ diff --git a/test/mpileup/mp_N2.out b/test/mpileup/mp_N2.out index baf168364..5ade66a5e 100644 --- a/test/mpileup/mp_N2.out +++ b/test/mpileup/mp_N2.out @@ -1,13 +1,13 @@ -z 1 6 ^!T^!T^!T^!T^!T^!T -z 2 6 AAAAAA -z 3 6 GGGGGG -z 4 6 C+2(AA)-5()C+2(A*)-5()C+2(*A)-5()C+2(AA)C+2(A*)C+2(*A) -z 5 6 ***>>> -z 6 6 ***>>> -z 7 6 ***>>> -z 8 6 ***>>> -z 9 6 *+2(TT)*+2(*T)*+2(T*)>+2(TT)>+2(*T)>+2(T*) -z 10 6 AAAAAA -z 11 6 GGGGGG -z 12 6 GGGGGG -z 13 6 T$T$T$T$T$T$ +z 1 6 ^!T^!T^!T^!T^!T^!T AAAAAA +z 2 6 AAAAAA BBBBBB +z 3 6 GGGGGG CCCCCC +z 4 6 C+2(AA)-5()C+2(A*)-5()C+2(*A)-5()C+2(AA)C+2(A*)C+2(*A) DDDDDD +z 5 6 ***>>> GHGGHG +z 6 6 ***>>> GHGGHG +z 7 6 ***>>> GHGGHG +z 8 6 ***>>> GHGGHG +z 9 6 *+2(TT)*+2(*T)*+2(T*)>+2(TT)>+2(*T)>+2(T*) GHGGHG +z 10 6 AAAAAA IIIIII +z 11 6 GGGGGG JJJJJJ +z 12 6 GGGGGG KKKKKK +z 13 6 T$T$T$T$T$T$ LLLLLL diff --git a/test/mpileup/mp_P.out b/test/mpileup/mp_P.out index 471b7d7fc..003bed0d2 100644 --- a/test/mpileup/mp_P.out +++ b/test/mpileup/mp_P.out @@ -1,10 +1,10 @@ -z 2 5 ^!A^!A^!A^!A^!A -z 3 5 GGGGG -z 4 5 CCCCC -z 5 5 TTTTT -z 6 5 TT+4(GGCC)T+4(GG**)T+4(*GC*)T+4(**CC) -z 7 5 AAAAA -z 8 5 GGGGG -z 9 5 CCCCC -z 10 5 AAAAA -z 11 5 G$G$G$G$G$ +z 2 5 ^!A^!A^!A^!A^!A 00000 +z 3 5 GGGGG 11111 +z 4 5 CCCCC 22222 +z 5 5 TTTTT 33333 +z 6 5 TT+4(GGCC)T+4(GG**)T+4(*GC*)T+4(**CC) 44444 +z 7 5 AAAAA 55555 +z 8 5 GGGGG 66666 +z 9 5 CCCCC 77777 +z 10 5 AAAAA 88888 +z 11 5 G$G$G$G$G$ 99999 diff --git a/test/mpileup/mp_overlap1.out b/test/mpileup/mp_overlap1.out new file mode 100644 index 000000000..56d70b0dd --- /dev/null +++ b/test/mpileup/mp_overlap1.out @@ -0,0 +1,12 @@ +1 100003 2 ^St^+T {! +1 100004 2 aA-5() {! +1 100005 2 a* N! +1 100006 2 g* N! +1 100007 2 c* N! +1 100008 2 a* N! +1 100009 2 c* N! +1 100010 2 aA {! +1 100011 2 cC {! +1 100012 2 aA {! +1 100013 2 gG {! +1 100014 2 a$A$ {! diff --git a/test/mpileup/mp_overlap1.sam b/test/mpileup/mp_overlap1.sam new file mode 100644 index 000000000..0e3d14bf3 --- /dev/null +++ b/test/mpileup/mp_overlap1.sam @@ -0,0 +1,4 @@ +@HD VN:1.5 SO:coordinate +@SQ SN:1 LN:249250621 +r1 147 1 100003 50 12M * 0 0 TAAGCACACAGA ZZZZZZZZZZZZ +r1 99 1 100003 10 2M5D5M * 0 0 TAACAGA BBBBBBB diff --git a/test/mpileup/mp_overlap2.out b/test/mpileup/mp_overlap2.out new file mode 100644 index 000000000..7e5af6dbc --- /dev/null +++ b/test/mpileup/mp_overlap2.out @@ -0,0 +1,12 @@ +1 100003 2 ^+T^St {! +1 100004 2 A-5()a {! +1 100005 2 *a {! +1 100006 2 *g {! +1 100007 2 *c {! +1 100008 2 *a {! +1 100009 2 *c {! +1 100010 2 Aa {! +1 100011 2 Cc {! +1 100012 2 Aa {! +1 100013 2 Gg {! +1 100014 2 A$a$ {! diff --git a/test/mpileup/mp_overlap2.sam b/test/mpileup/mp_overlap2.sam new file mode 100644 index 000000000..ba9b517c5 --- /dev/null +++ b/test/mpileup/mp_overlap2.sam @@ -0,0 +1,4 @@ +@HD VN:1.5 SO:coordinate +@SQ SN:1 LN:249250621 +r1 99 1 100003 10 2M5D5M * 0 0 TAACAGA BBBBBBB +r1 147 1 100003 50 12M * 0 0 TAAGCACACAGA ZZZZZZZZZZZZ diff --git a/test/mpileup/mpileup.tst b/test/mpileup/mpileup.tst index 4ffbd3481..534383ea0 100644 --- a/test/mpileup/mpileup.tst +++ b/test/mpileup/mpileup.tst @@ -71,3 +71,8 @@ P c1#pad3.out $pileup -m c1#pad3.sam # Issue #852. Problem caused by alignments with entirely S/I ops in CIGAR. P small.out $pileup -m small.bam + +# Overlap removal and the effect on quality values +P mp_overlap1.out $pileup -m mp_overlap1.sam +P mp_overlap2.out $pileup -m mp_overlap2.sam + diff --git a/test/mpileup/small.out b/test/mpileup/small.out index b5c161024..13f943a53 100644 --- a/test/mpileup/small.out +++ b/test/mpileup/small.out @@ -1,322 +1,322 @@ -2 1 1 ^]T -2 2 1 G -2 3 1 G -2 4 1 A -2 5 1 G -2 6 1 A -2 7 1 G -2 8 1 C -2 9 1 A -2 10 1 C -2 11 1 A -2 12 1 T -2 13 1 A -2 14 1 A -2 15 1 C -2 16 1 T -2 17 1 T -2 18 1 G -2 19 1 G -2 20 1 G -2 21 1 T -2 22 1 G -2 23 1 A -2 24 1 G -2 25 1 A -2 26 1 T -2 27 1 G -2 28 1 A -2 29 1 T -2 30 1 G -2 31 2 A^]A -2 32 2 AA -2 33 2 AA -2 34 2 TT -2 35 2 GG -2 36 2 AA -2 37 2 GG -2 38 2 CC -2 39 2 AA -2 40 2 CC -2 41 2 TT -2 42 2 GG -2 43 2 GG -2 44 2 CC -2 45 2 TT -2 46 2 TT -2 47 2 TT -2 48 2 GG -2 49 2 GG -2 50 2 AA -2 51 2 GG -2 52 2 TT -2 53 2 CC -2 54 2 AA -2 55 2 CC -2 56 2 AA -2 57 2 CC -2 58 2 AA -2 59 2 GG -2 60 2 AA -2 61 2 CC -2 62 2 CC -2 63 2 AA -2 64 3 GG^]g -2 65 3 GGg -2 66 3 GGg -2 67 4 TTt^]t -2 68 4 CCcc -2 69 4 CCcc -2 70 4 AAaa -2 71 4 GGgg -2 72 4 GGgg -2 73 4 CCcc -2 74 4 GGgg -2 75 4 C$Ccc -2 76 3 Ccc -2 77 3 Ttt -2 78 3 Agg -2 79 3 Ttt -2 80 3 Aat -2 81 3 Ccc -2 82 3 Ccc -2 83 3 Aaa -2 84 3 Ttt -2 85 3 Aaa -2 86 3 Aaa -2 87 3 Ccc -2 88 3 Acc -2 89 3 Ccc -2 90 3 Ttt -2 91 3 Ccc -2 92 3 Tgt -2 93 3 Aaa -2 94 3 Ggg -2 95 3 Ttt -2 96 3 Ggg -2 97 3 Ggg -2 98 3 Ttt -2 99 3 Ggg -2 100 3 Ttt -2 101 3 Ggg -2 102 3 Ggg -2 103 3 Ccc -2 104 3 Ggg -2 105 3 G$gg -2 106 2 aa -2 107 2 aa -2 108 2 cc -2 109 2 cc -2 110 2 tt -2 111 2 cc -2 112 2 tt -2 113 2 cc -2 114 2 aa -2 115 2 gg -2 116 2 aa -2 117 2 cc -2 118 2 cc -2 119 2 tt -2 120 2 cc -2 121 2 cc -2 122 2 cc -2 123 2 aa -2 124 2 gg -2 125 2 cc -2 126 2 cc -2 127 2 aa -2 128 2 gg -2 129 2 aa -2 130 2 aa -2 131 2 aa -2 132 2 gg -2 133 2 gg -2 134 2 ag -2 135 2 aa -2 136 2 tt -2 137 2 cc -2 138 2 t$t$ -2 495 1 ^Ft -2 496 1 t -2 497 1 t -2 498 1 g -2 499 1 g -2 500 1 c -2 501 1 a -2 502 1 a -2 503 1 t -2 504 1 t -2 505 1 t -2 506 1 a -2 507 1 c -2 508 1 a -2 509 1 c -2 510 1 t -2 511 1 g -2 512 1 t -2 513 1 g -2 514 1 t -2 515 1 t -2 516 1 a -2 517 1 t -2 518 1 a -2 519 1 g -2 520 1 c -2 521 1 a -2 522 1 a -2 523 1 t -2 524 1 a -2 525 1 t -2 526 1 a -2 527 1 g -2 528 1 t -2 529 1 g -2 530 1 a -2 531 1 a -2 532 1 a -2 533 1 a -2 534 1 g -2 535 1 g -2 536 1 g -2 537 1 t -2 538 1 g -2 539 1 a -2 540 1 t -2 541 1 c -2 542 1 a -2 543 1 t -2 544 1 t -2 545 1 a -2 546 1 c -2 547 1 c -2 548 1 t -2 549 1 c -2 550 1 a -2 551 1 a -2 552 1 g -2 553 1 a -2 554 1 c -2 555 1 t -2 556 1 g -2 557 1 t -2 558 1 t -2 559 1 c -2 560 1 a -2 561 1 c -2 562 1 a -2 563 1 a -2 564 1 a -2 565 1 c -2 566 1 a -2 567 1 c -2 568 1 a -2 569 1 t$ -2 648 1 ^gA -2 649 1 C -2 650 1 G -2 651 1 C -2 652 1 A -2 653 1 C -2 654 1 C -2 655 1 C -2 656 1 T -2 657 1 C -2 658 1 T -2 659 1 A -2 660 1 T -2 661 1 C -2 662 1 C -2 663 1 C -2 664 1 C -2 665 1 A -2 666 1 C -2 667 1 A -2 668 1 T -2 669 1 A -2 670 1 A -2 671 1 A -2 672 1 T -2 673 1 C -2 674 1 T -2 675 1 A -2 676 1 T -2 677 1 A -2 678 1 C -2 679 1 A -2 680 1 A -2 681 1 C -2 682 2 A^>a -2 683 2 Cc -2 684 2 Tt -2 685 2 Cc -2 686 2 Aa -2 687 2 Cc -2 688 2 Cc -2 689 2 Cc -2 690 2 Tt -2 691 2 Cc -2 692 2 Tt -2 693 2 Aa -2 694 2 Cc -2 695 2 Aa -2 696 2 Cc -2 697 2 Cc -2 698 2 Cc -2 699 2 Aa -2 700 2 Cc -2 701 2 Aa -2 702 2 Tt -2 703 2 Aa -2 704 2 Cc -2 705 2 Aa -2 706 2 Tt -2 707 2 Cc -2 708 2 Tt -2 709 2 Aa -2 710 2 Tt -2 711 2 Aa -2 712 2 Cc -2 713 2 Aa -2 714 2 Aa -2 715 2 Cc -2 716 2 Aa -2 717 2 Cc -2 718 2 Gg -2 719 2 C$c -2 720 1 a -2 721 1 c -2 722 1 c -2 723 1 c -2 724 1 t -2 725 1 c -2 726 1 t -2 727 1 a -2 728 1 c -2 729 1 c -2 730 1 c -2 731 1 c -2 732 1 a -2 733 1 c -2 734 1 a -2 735 1 t -2 736 1 a -2 737 1 c -2 738 1 g -2 739 1 t -2 740 1 c -2 741 1 t -2 742 1 a -2 743 1 c -2 744 1 a -2 745 1 c -2 746 1 a -2 747 1 a -2 748 1 c -2 749 1 a -2 750 1 t -2 751 1 g -2 752 1 c -2 753 1 a -2 754 1 c -2 755 1 g -2 756 1 c$ +2 1 1 ^]T A +2 2 1 G E +2 3 1 G D +2 4 1 A @ +2 5 1 G ? +2 6 1 A ? +2 7 1 G E +2 8 1 C C +2 9 1 A B +2 10 1 C J +2 11 1 A B +2 12 1 T C +2 13 1 A A +2 14 1 A D +2 15 1 C J +2 16 1 T A +2 17 1 T A +2 18 1 G J +2 19 1 G I +2 20 1 G D +2 21 1 T @ +2 22 1 G I +2 23 1 A < +2 24 1 G J +2 25 1 A A +2 26 1 T < +2 27 1 G I +2 28 1 A ? +2 29 1 T @ +2 30 1 G K +2 31 2 A^]A BA +2 32 2 AA E< +2 33 2 AA E@ +2 34 2 TT A@ +2 35 2 GG KF +2 36 2 AA C; +2 37 2 GG K? +2 38 2 CC HF +2 39 2 AA DA +2 40 2 CC J= +2 41 2 TT FE +2 42 2 GG II +2 43 2 GG K; +2 44 2 CC I= +2 45 2 TT GE +2 46 2 TT CA +2 47 2 TT AB +2 48 2 GG 1H +2 49 2 GG :9 +2 50 2 AA =9 +2 51 2 GG FJ +2 52 2 TT AA +2 53 2 CC HH +2 54 2 AA CC +2 55 2 CC HG +2 56 2 AA C@ +2 57 2 CC HE +2 58 2 AA D> +2 59 2 GG AI +2 60 2 AA A@ +2 61 2 CC IG +2 62 2 CC 7( +2 63 2 AA +2 80 3 Aat !3) +2 81 3 Ccc !HU +2 82 3 Ccc !JP +2 83 3 Aaa !@Z +2 84 3 Ttt !3S +2 85 3 Aaa !@T +2 86 3 Aaa !=S +2 87 3 Ccc !HI +2 88 3 Acc !F9 +2 89 3 Ccc !EY +2 90 3 Ttt !<] +2 91 3 Ccc !5N +2 92 3 Tgt !-N +2 93 3 Aaa !C_ +2 94 3 Ggg !CT +2 95 3 Ttt ! +2 110 2 tt B> +2 111 2 cc JD +2 112 2 tt B@ +2 113 2 cc K; +2 114 2 aa ?C +2 115 2 gg B= +2 116 2 aa ?8 +2 117 2 cc J& +2 118 2 cc AF +2 119 2 tt B@ +2 120 2 cc J: +2 121 2 cc J> +2 122 2 cc IF +2 123 2 aa DC +2 124 2 gg H@ +2 125 2 cc I7 +2 126 2 cc IG +2 127 2 aa C7 +2 128 2 gg BB +2 129 2 aa >7 +2 130 2 aa <> +2 131 2 aa C> +2 132 2 gg F6 +2 133 2 gg F, +2 134 2 ag >0 +2 135 2 aa ?? +2 136 2 tt ?? +2 137 2 cc EB +2 138 2 t$t$ ?> +2 495 1 ^Ft E +2 496 1 t E +2 497 1 t D +2 498 1 g J +2 499 1 g L +2 500 1 c N +2 501 1 a D +2 502 1 a D +2 503 1 t F +2 504 1 t F +2 505 1 t C +2 506 1 a B +2 507 1 c N +2 508 1 a B +2 509 1 c N +2 510 1 t D +2 511 1 g L +2 512 1 t D +2 513 1 g L +2 514 1 t F +2 515 1 t B +2 516 1 a D +2 517 1 t C +2 518 1 a G +2 519 1 g J +2 520 1 c M +2 521 1 a C +2 522 1 a C +2 523 1 t B +2 524 1 a C +2 525 1 t B +2 526 1 a H +2 527 1 g L +2 528 1 t D +2 529 1 g M +2 530 1 a D +2 531 1 a D +2 532 1 a D +2 533 1 a H +2 534 1 g J +2 535 1 g J +2 536 1 g L +2 537 1 t D +2 538 1 g M +2 539 1 a C +2 540 1 t C +2 541 1 c M +2 542 1 a C +2 543 1 t D +2 544 1 t A +2 545 1 a @ +2 546 1 c L +2 547 1 c M +2 548 1 t C +2 549 1 c M +2 550 1 a C +2 551 1 a G +2 552 1 g K +2 553 1 a @ +2 554 1 c M +2 555 1 t C +2 556 1 g K +2 557 1 t E +2 558 1 t B +2 559 1 c K +2 560 1 a @ +2 561 1 c K +2 562 1 a A +2 563 1 a A +2 564 1 a @ +2 565 1 c I +2 566 1 a ? +2 567 1 c I +2 568 1 a > +2 569 1 t$ @ +2 648 1 ^gA ? +2 649 1 C F +2 650 1 G 0 +2 651 1 C D +2 652 1 A < +2 653 1 C < +2 654 1 C G +2 655 1 C > +2 656 1 T D +2 657 1 C H +2 658 1 T @ +2 659 1 A A +2 660 1 T > +2 661 1 C E +2 662 1 C : +2 663 1 C = +2 664 1 C H +2 665 1 A A +2 666 1 C F +2 667 1 A C +2 668 1 T < +2 669 1 A ? +2 670 1 A @ +2 671 1 A ? +2 672 1 T / +2 673 1 C : +2 674 1 T @ +2 675 1 A + +2 676 1 T > +2 677 1 A ? +2 678 1 C D +2 679 1 A B +2 680 1 A @ +2 681 1 C 5 +2 682 2 A^>a +2 687 2 Cc GH +2 688 2 Cc 2H +2 689 2 Cc :F +2 690 2 Tt E? +2 691 2 Cc GG +2 692 2 Tt C@ +2 693 2 Aa +? +2 694 2 Cc =G +2 695 2 Aa B@ +2 696 2 Cc FH +2 697 2 Cc J +2 701 2 Aa AA +2 702 2 Tt ?? +2 703 2 Aa 3A +2 704 2 Cc CM +2 705 2 Aa CC +2 706 2 Tt BB +2 707 2 Cc HL +2 708 2 Tt EB +2 709 2 Aa =C +2 710 2 Tt ?? +2 711 2 Aa @A +2 712 2 Cc CK +2 713 2 Aa =C +2 714 2 Aa ;A +2 715 2 Cc >K +2 716 2 Aa @@ +2 717 2 Cc 5B +2 718 2 Gg 8I +2 719 2 C$c (G +2 720 1 a = +2 721 1 c J +2 722 1 c K +2 723 1 c J +2 724 1 t A +2 725 1 c H +2 726 1 t > +2 727 1 a ? +2 728 1 c J +2 729 1 c L +2 730 1 c J +2 731 1 c L +2 732 1 a @ +2 733 1 c L +2 734 1 a B +2 735 1 t A +2 736 1 a ? +2 737 1 c C +2 738 1 g J +2 739 1 t B +2 740 1 c I +2 741 1 t @ +2 742 1 a @ +2 743 1 c J +2 744 1 a @ +2 745 1 c I +2 746 1 a B +2 747 1 a @ +2 748 1 c K +2 749 1 a @ +2 750 1 t B +2 751 1 g H +2 752 1 c H +2 753 1 a > +2 754 1 c B +2 755 1 g F +2 756 1 c$ ? diff --git a/test/pileup.c b/test/pileup.c index e36f34d23..e31c6f1d3 100644 --- a/test/pileup.c +++ b/test/pileup.c @@ -119,6 +119,19 @@ static int print_pileup_seq(const bam_pileup1_t *p, int n) { return -1; } +static void print_pileup_qual(const bam_pileup1_t *p, int n) { + int i; + + for (i = 0; i < n; i++, p++) { + uint8_t *qual = bam_get_qual(p->b); + uint8_t q = '~'; + if (p->qpos < p->b->core.l_qseq && + qual[p->qpos]+33 < '~') + q = qual[p->qpos]+33; + putchar(q); + } +} + static int test_pileup(ptest_t *input) { bam_plp_t plp = NULL; const bam_pileup1_t *p; @@ -143,6 +156,9 @@ static int test_pileup(ptest_t *input) { if (print_pileup_seq(p, n) < 0) goto fail; + putchar('\t'); + print_pileup_qual(p, n); + putchar('\n'); } if (n < 0) { @@ -188,6 +204,9 @@ static int test_mpileup(ptest_t *input) { if (print_pileup_seq(pileups[0], n_plp[0]) < 0) goto fail; + putchar('\t'); + print_pileup_qual(pileups[0], n_plp[0]); + putchar('\n'); } if (n < 0) {