Skip to content

Commit

Permalink
Add a --seed option to mpileup.
Browse files Browse the repository at this point in the history
Random numbers are used by ks_shuffle, which is called in htslib's
errmod code for sub-sampling the data in regions of > 255 depth.

This corrects the OS specific randomness seen in samtools#1459.
  • Loading branch information
jkbonfield committed Apr 21, 2021
1 parent 9aeb1aa commit 525b843
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ dist.o: dist.c dist.h
cols.o: cols.c cols.h
regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h
consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_hts_os_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h)
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
Expand Down
2 changes: 2 additions & 0 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,8 @@ multiple regions and many alignment files are processed.
*-x, --ignore-overlaps*::
Disable read-pair overlap detection.

*--seed* 'INT'::
Set the random number seed used when sub-sampling deep regions [0].

==== Output options

Expand Down
6 changes: 6 additions & 0 deletions mpileup.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ DEALINGS IN THE SOFTWARE. */
#include <htslib/faidx.h>
#include <htslib/kstring.h>
#include <htslib/khash_str2int.h>
#include <htslib/hts_os.h>
#include <assert.h>
#include "regidx.h"
#include "bcftools.h"
Expand Down Expand Up @@ -887,6 +888,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
" -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
" -x, --ignore-overlaps disable read-pair overlap detection\n"
" --seed INT random number seed used for sampling deep regions [0]\n"
"\n"
"Output options:\n"
" -a, --annotate LIST optional tags to output; '?' to list available tags []\n"
Expand Down Expand Up @@ -949,6 +951,8 @@ int main_mpileup(int argc, char *argv[])
mplp.bsmpl = bam_smpl_init();
mplp.fmt_flag = B2B_INFO_VDB|B2B_INFO_RPB; // the default to be changed in future, see also parse_format_flag()

hts_srand48(0);

static const struct option lopts[] =
{
{"rf", required_argument, NULL, 1}, // require flag
Expand Down Expand Up @@ -998,6 +1002,7 @@ int main_mpileup(int argc, char *argv[])
{"per-sample-mF", no_argument, NULL, 'p'},
{"per-sample-mf", no_argument, NULL, 'p'},
{"platforms", required_argument, NULL, 'P'},
{"seed", required_argument, NULL, 13},
{NULL, 0, NULL, 0}
};
while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
Expand Down Expand Up @@ -1092,6 +1097,7 @@ int main_mpileup(int argc, char *argv[])
}
mplp.fmt_flag |= parse_format_flag(optarg);
break;
case 13: hts_srand48(atoi(optarg)); break;
default:
fprintf(stderr,"Invalid option: '%c'\n", c);
return 1;
Expand Down

0 comments on commit 525b843

Please sign in to comment.