Skip to content

Commit

Permalink
Rework / add new faidx tests
Browse files Browse the repository at this point in the history
The faidx interfaces were only being tested as side-effects of
other tests.  This adds dedicated tests for them.
  • Loading branch information
daviesrob committed Nov 9, 2022
1 parent 2172aae commit 7c83166
Show file tree
Hide file tree
Showing 15 changed files with 792 additions and 3 deletions.
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ README.md export-ignore
# Remove the text attribute from index_dos.sam, so that the line separators
# for the test file don't get converted into Unix format.
test/index_dos.sam -text

# Remove the text attribute from various faidx test files
test/faidx/faidx*.fa* -text
test/faidx/fastqs*.fq* -text
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ shlib-exports-*.txt
/bgzip
/htsfile
/tabix
/test/faidx/*.tmp*
/test/faidx/FAIL*
/test/fieldarith
/test/hfile
/test/hts_endian
Expand All @@ -59,6 +61,7 @@ shlib-exports-*.txt
/test/test-bcf_set_variant_type
/test/test_bgzf
/test/test_expr
/test/test_faidx
/test/test_index
/test/test_introspection
/test/test_kfunc
Expand Down
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ BUILT_TEST_PROGRAMS = \
test/sam \
test/test_bgzf \
test/test_expr \
test/test_faidx \
test/test_kfunc \
test/test_kstring \
test/test_mod \
Expand Down Expand Up @@ -583,12 +584,13 @@ check test: all $(HTSCODECS_TEST_TARGETS)
fi
test/test_bgzf test/bgziptest.txt
test/test-parse-reg -t test/colons.bam
cd test/faidx && ./test-faidx.sh faidx.tst
cd test/sam_filter && ./filter.sh filter.tst
cd test/tabix && ./test-tabix.sh tabix.tst
cd test/mpileup && ./test-pileup.sh mpileup.tst
cd test/fastq && ./test-fastq.sh
cd test/base_mods && ./base-mods.sh base-mods.tst
REF_PATH=: test/sam test/ce.fa test/faidx.fa test/fastqs.fq
REF_PATH=: test/sam test/ce.fa test/faidx/faidx.fa test/faidx/fastqs.fq
test/test-regidx
cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-}

Expand Down Expand Up @@ -622,6 +624,9 @@ test/test_bgzf: test/test_bgzf.o libhts.a
test/test_expr: test/test_expr.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_expr.o libhts.a -lz $(LIBS) -lpthread

test/test_faidx: test/test_faidx.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_faidx.o libhts.a -lz $(LIBS) -lpthread

test/test_kfunc: test/test_kfunc.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_kfunc.o libhts.a -lz $(LIBS) -lpthread

Expand Down Expand Up @@ -739,6 +744,7 @@ test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_reg
test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h)
test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h)
test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h)
test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h)
test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h)
test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_kseq_h)
test/test-vcf-sweep.o: test/test-vcf-sweep.c config.h $(htslib_vcf_sweep_h)
Expand Down Expand Up @@ -845,7 +851,9 @@ htslib-uninstalled.pc: htslib.pc.tmp


testclean:
-rm -f test/*.tmp test/*.tmp.* test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt
-rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* test/faidx/FAIL* \
test/longrefs/*.tmp.* test/tabix/*.tmp.* test/tabix/FAIL* \
header-exports.txt shlib-exports-$(SHLIB_FLAVOUR).txt
-rm -rf htscodecs/tests/test.out

# Only remove this in git checkouts
Expand Down
8 changes: 8 additions & 0 deletions test/faidx/ce.1.expected.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>CHROMOSOME_I:5001-5125 length: 125
AACTGGTTCAAAAACAAAAATTTTTTAAACTGTACAAACTGTCCAAAAAT
TCGTCGTAAATCGACACACCCTTCTCATTTTTTCAAAATTTTAATTGTTT
TCGAATGTTTTTTTTGCAGAATAAT
>CHROMOSOME_X:101-225 length: 125
GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC
CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT
AAGCCTAAGCCTAAGCCTAAGCCTA
6 changes: 6 additions & 0 deletions test/faidx/faidx.1.expected.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>trailingblank2:28-33 length: 6
GGGCCC
>trailingblank3:4-5 length: 2
TA
>bar:4-5 length: 2
TA
File renamed without changes.
6 changes: 6 additions & 0 deletions test/faidx/faidx.fa.expected.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
4 2 4 5
trailingblank1 33 23 12 13
trailingblank2 72 111 24 25
trailingblank3 5 234 4 6
foo 8 252 6 7
bar 8 280 8 9
74 changes: 74 additions & 0 deletions test/faidx/faidx.tst
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (C) 2022 Genome Research Ltd.
#
# Author: Robert Davies <rmd@sanger.ac.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# First field:
# INIT = initialisation, not counted in testing
# P = expected to pass (zero return; expected output matches, if present)
# N = expected to return non-zero
# F = expected to fail
#
# Second field (P/N/F only):
# Filename of expected output. If '.', output is not checked
#
# Rest:
# Command to execute. $bgzip and $test_faidx are replaced with the path to
# bgzip and test_faidx.

# Index fasta
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -e faidx.fa.expected.fai

# Test various functions on the fasta index
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t fai_line_length -e 24 trailingblank2
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_has_seq -e 1 foo
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_has_seq -e 0 absent
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_iseq -e trailingblank3 3
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_seq_len -e 33 trailingblank1
P . $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_seq_len64 -e 72 trailingblank2

# Index fastq
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -e fastqs.fq.expected.fai

# Test various functions on the fastq index
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t fai_line_length -e 63 FAKE0005_3
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t fai_line_length -e 144 SRR014849.203935_3
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_has_seq -e 1 SRR014849.203935_3
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_has_seq -e 0 absent
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_iseq -e FAKE0005_1 0
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_seq_len -e 453 FSRRS4401CM938_1
P . $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -t faidx_seq_len64 -e 309 FSRRS4401AOV6A_4

# Fasta retrieval tests
P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai trailingblank2:28-33 trailingblank3:4-5 bar:4-5
P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t fai_fetch trailingblank2:28-33 trailingblank3:4-5 bar:4-5
P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t faidx_fetch_seq64 trailingblank2:28-33 trailingblank3:4-5 bar:4-5
P faidx.1.expected.fa $test_faidx -i faidx.fa -f faidx.fa.tmp.fai -t fai_adjust_region trailingblank2:28-33 trailingblank3:4-5 bar:4-5

# Fastq retrieval tests
P fastqs.1.expected.fq $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90
P fastqs.1.expected.fq $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t fai_fetch FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90
P fastqs.1.expected.fq $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai -Q -t faidx_fetch_seq64 FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90
P fastqs.2.expected.fa $test_faidx -i fastqs.fq -f fastqs.fq.tmp.fai FAKE0006_1:4-12 FSRRS4401BE7HA_1:81-120 FAKE0010_2 SRR014849.50939_3:71-90

# Indexing and retrieval on bgzip compressed fasta
INIT $bgzip -c < ../ce.fa > ce.fa.tmp.gz
P . $test_faidx -i ce.fa.tmp.gz -f ce.fa.tmp.gz.fai -g ce.fa.tmp.gz.gzi -e ../ce.fa.fai
P ce.1.expected.fa $test_faidx -i ce.fa.tmp.gz -f ce.fa.tmp.gz.fai -g ce.fa.tmp.gz.gzi CHROMOSOME_I:5001-5125 CHROMOSOME_X:101-225
16 changes: 16 additions & 0 deletions test/faidx/fastqs.1.expected.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@FAKE0006_1:4-12 length: 9
TGCATGCAT
+
{zyxwvuts
@FSRRS4401BE7HA_1:81-120 length: 40
GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca
+
c^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\
@FAKE0010_2 length: 30
gatcrywsmkhbvdnGATCRYWSMKHBVDN
+
I?5+I?5+I?5+I?5+I?5+I?5+I?5+I?
@SRR014849.50939_3:71-90 length: 20
CAATAAATCAATACATAAAA
+
\aZ\d`OY[aY[[\[[e`WP
8 changes: 8 additions & 0 deletions test/faidx/fastqs.2.expected.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>FAKE0006_1:4-12 length: 9
TGCATGCAT
>FSRRS4401BE7HA_1:81-120 length: 40
GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcaca
>FAKE0010_2 length: 30
gatcrywsmkhbvdnGATCRYWSMKHBVDN
>SRR014849.50939_3:71-90 length: 20
CAATAAATCAATACATAAAA
File renamed without changes.
105 changes: 105 additions & 0 deletions test/faidx/fastqs.fq.expected.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
FAKE0005_1 63 85 63 64 151
FAKE0006_1 63 300 63 64 366
FAKE0005_2 63 515 63 64 581
FAKE0006_2 63 730 63 64 796
FAKE0005_3 63 945 63 64 1011
FAKE0006_3 63 1160 63 64 1226
FAKE0005_4 63 1375 63 64 1441
FAKE0006_4 63 1590 63 64 1656
FSRRS4401BE7HA_1 395 1823 395 396 2221
FSRRS4401BRRTC_1 145 2720 145 146 2868
FSRRS4401B64ST_1 382 3118 382 383 3503
FSRRS4401EJ0YH_1 381 3990 381 382 4374
FSRRS4401BK0IB_1 507 4860 507 508 5370
FSRRS4401ARCCB_1 258 5982 258 259 6243
FSRRS4401CM938_1 453 6606 453 454 7062
FSRRS4401EQLIK_1 411 7620 411 412 8034
FSRRS4401AOV6A_1 309 8550 309 310 8862
FSRRS4401EG0ZW_1 424 9276 424 425 9703
FSRRS4401BE7HA_2 395 10231 395 396 10629
FSRRS4401BRRTC_2 145 11128 145 146 11276
FSRRS4401B64ST_2 382 11526 382 383 11911
FSRRS4401EJ0YH_2 381 12398 381 382 12782
FSRRS4401BK0IB_2 507 13268 507 508 13778
FSRRS4401ARCCB_2 258 14390 258 259 14651
FSRRS4401CM938_2 453 15014 453 454 15470
FSRRS4401EQLIK_2 411 16028 411 412 16442
FSRRS4401AOV6A_2 309 16958 309 310 17270
FSRRS4401EG0ZW_2 424 17684 424 425 18111
FSRRS4401BE7HA_3 395 18639 395 396 19037
FSRRS4401BRRTC_3 145 19536 145 146 19684
FSRRS4401B64ST_3 382 19934 382 383 20319
FSRRS4401EJ0YH_3 381 20806 381 382 21190
FSRRS4401BK0IB_3 507 21676 507 508 22186
FSRRS4401ARCCB_3 258 22798 258 259 23059
FSRRS4401CM938_3 453 23422 453 454 23878
FSRRS4401EQLIK_3 411 24436 411 412 24850
FSRRS4401AOV6A_3 309 25366 309 310 25678
FSRRS4401EG0ZW_3 424 26092 424 425 26519
FSRRS4401BE7HA_4 395 27047 80 81 27449
FSRRS4401BRRTC_4 145 27952 80 81 28101
FSRRS4401B64ST_4 382 28352 80 81 28741
FSRRS4401EJ0YH_4 381 29232 80 81 29620
FSRRS4401BK0IB_4 507 30110 80 81 30626
FSRRS4401ARCCB_4 258 31244 80 81 31508
FSRRS4401CM938_4 453 31874 80 81 32335
FSRRS4401EQLIK_4 411 32898 80 81 33317
FSRRS4401AOV6A_4 309 33838 80 81 34153
FSRRS4401EG0ZW_4 424 34570 80 81 35002
FAKE0007_1 41 35549 41 42 35593
FAKE0008_1 41 35752 41 42 35796
FAKE0009_1 41 35955 41 42 35999
FAKE0010_1 30 36143 30 31 36176
FAKE0007_2 41 36324 41 42 36368
FAKE0008_2 41 36527 41 42 36571
FAKE0009_2 41 36730 41 42 36774
FAKE0010_2 30 36918 30 31 36951
FAKE0007_3 41 37099 41 42 37143
FAKE0008_3 41 37302 41 42 37346
FAKE0009_3 41 37505 41 42 37549
FAKE0010_3 30 37693 30 31 37726
FAKE0007_4 41 37874 41 42 37918
FAKE0008_4 41 38077 41 42 38121
FAKE0009_4 41 38280 41 42 38324
FAKE0010_4 30 38468 30 31 38501
FAKE0011_1 41 38649 41 42 38693
FAKE0012_1 41 38852 41 42 38896
FAKE0013_1 41 39055 41 42 39099
FAKE0014_1 30 39250 30 31 39283
FAKE0011_2 41 39431 41 42 39475
FAKE0012_2 41 39634 41 42 39678
FAKE0013_2 41 39837 41 42 39881
FAKE0014_2 30 40032 30 31 40065
FAKE0011_3 41 40213 41 42 40257
FAKE0012_3 41 40416 41 42 40460
FAKE0013_3 41 40619 41 42 40663
FAKE0014_3 30 40814 30 31 40847
FAKE0011_4 41 40995 41 42 41039
FAKE0012_4 41 41198 41 42 41242
FAKE0013_4 41 41401 41 42 41445
FAKE0014_4 30 41596 30 31 41629
FAKE0001_1 94 41745 94 95 41842
FAKE0002_1 94 42022 94 95 42119
FAKE0001_2 94 42299 94 95 42396
FAKE0002_2 94 42576 94 95 42673
FAKE0001_3 94 42853 94 95 42950
FAKE0002_3 94 43130 94 95 43227
FAKE0001_4 94 43407 94 95 43504
FAKE0002_4 94 43684 94 95 43781
FAKE0003_1 68 43963 68 69 44034
FAKE0004_1 68 44190 68 69 44261
FAKE0003_2 68 44417 68 69 44488
FAKE0004_2 68 44644 68 69 44715
FAKE0003_3 68 44871 68 69 44942
FAKE0004_3 68 45098 68 69 45169
FAKE0003_4 68 45325 68 69 45396
FAKE0004_4 68 45552 68 69 45623
SRR014849.50939_1 135 45737 135 136 45875
SRR014849.110027_1 131 46057 131 132 46191
SRR014849.203935_1 144 46369 144 145 46516
SRR014849.50939_2 135 46706 135 136 46844
SRR014849.110027_2 131 47026 131 132 47160
SRR014849.203935_2 144 47338 144 145 47485
SRR014849.50939_3 135 47675 135 136 47813
SRR014849.110027_3 131 47995 131 132 48129
SRR014849.203935_3 144 48307 144 145 48454
35 changes: 35 additions & 0 deletions test/faidx/test-faidx.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/sh
#
# Copyright (C) 2022 Genome Research Ltd.
#
# Author: Robert Davies <rmd@sanger.ac.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# Load in the test driver
. ../simple_test_driver.sh

echo "Testing faidx..."

bgzip="../../bgzip"
test_faidx="../test_faidx"

test_driver $@

exit $?
2 changes: 1 addition & 1 deletion test/sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -2263,7 +2263,7 @@ int main(int argc, char **argv)
test_text_file("test/emptyfile", 0);
test_text_file("test/xx#pair.sam", 7);
test_text_file("test/xx.fa", 7);
test_text_file("test/fastqs.fq", 500);
test_text_file("test/faidx/fastqs.fq", 500);
check_enum1();
check_cigar_tab();
check_big_ref(0);
Expand Down
Loading

0 comments on commit 7c83166

Please sign in to comment.