Skip to content

Commit

Permalink
Add a --seed option to mpileup.
Browse files Browse the repository at this point in the history
Random numbers are used by ks_shuffle, which is called in htslib's
errmod code for sub-sampling the data in regions of > 255 depth.

This corrects the OS specific randomness seen in samtools#1459.
  • Loading branch information
jkbonfield committed May 25, 2021
1 parent 2aa127e commit ee5993a
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ dist.o: dist.c dist.h
cols.o: cols.c cols.h
regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_hts_os_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
Expand Down
2 changes: 2 additions & 0 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,8 @@ those scenarios.
*-x, --ignore-overlaps*::
Disable read-pair overlap detection.

*--seed* 'INT'::
Set the random number seed used when sub-sampling deep regions [0].

==== Output options

Expand Down
6 changes: 6 additions & 0 deletions mpileup.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ DEALINGS IN THE SOFTWARE. */
#include <htslib/faidx.h>
#include <htslib/kstring.h>
#include <htslib/khash_str2int.h>
#include <htslib/hts_os.h>
#include <assert.h>
#include "regidx.h"
#include "bcftools.h"
Expand Down Expand Up @@ -1126,6 +1127,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
" -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
" -x, --ignore-overlaps disable read-pair overlap detection\n"
" --seed INT random number seed used for sampling deep regions [0]\n"
"\n"
"Output options:\n"
" -a, --annotate LIST optional tags to output; '?' to list available tags []\n"
Expand Down Expand Up @@ -1206,6 +1208,8 @@ int main_mpileup(int argc, char *argv[])
mplp.fmt_flag = B2B_INFO_VDB|B2B_INFO_RPB|B2B_INFO_SCB|B2B_INFO_ZSCORE;
mplp.max_read_len = 500;

hts_srand48(0);

static const struct option lopts[] =
{
{"rf", required_argument, NULL, 1}, // require flag
Expand Down Expand Up @@ -1264,6 +1268,7 @@ int main_mpileup(int argc, char *argv[])
{"max-read-len", required_argument, NULL, 'M'},
{"config", required_argument, NULL, 'X'},
{"mwu-u", no_argument, NULL, 'U'},
{"seed", required_argument, NULL, 13},
{NULL, 0, NULL, 0}
};
while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:BDd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:M:X:U",lopts,NULL)) >= 0) {
Expand Down Expand Up @@ -1403,6 +1408,7 @@ int main_mpileup(int argc, char *argv[])
return 1;
}
break;
case 13: hts_srand48(atoi(optarg)); break;
default:
fprintf(stderr,"Invalid option: '%c'\n", c);
return 1;
Expand Down

0 comments on commit ee5993a

Please sign in to comment.