From 3be7f86895cd263472864a52c076aa1f60eb97ab Mon Sep 17 00:00:00 2001 From: Joshua Zhuang <71105179+mencian@users.noreply.github.com> Date: Tue, 15 Oct 2024 23:26:49 -0500 Subject: [PATCH] lofreq: add osx-arm64 build (#51417) * lofreq: add osx-arm64 build * add gsl * add m4 build_prefix * remove CPP variable * add setuptools to host --- recipes/lofreq/0001-patches | 260 +++++++++++++++++++++++ recipes/lofreq/build.sh | 20 +- recipes/lofreq/build_failure.osx-64.yaml | 105 --------- recipes/lofreq/meta.yaml | 27 ++- 4 files changed, 299 insertions(+), 113 deletions(-) create mode 100644 recipes/lofreq/0001-patches delete mode 100644 recipes/lofreq/build_failure.osx-64.yaml diff --git a/recipes/lofreq/0001-patches b/recipes/lofreq/0001-patches new file mode 100644 index 0000000000000..c711d24f51e11 --- /dev/null +++ b/recipes/lofreq/0001-patches @@ -0,0 +1,260 @@ +diff --git a/configure.ac b/configure.ac +index ec939f5..13494a5 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -146,11 +146,12 @@ AX_PTHREAD([ + AC_MSG_ERROR([No pthread support on this machine])) + #AX_PTHREAD() + +- +-# explicit libm check ++# if any of these sit in unusual places use ++# export LDFLAGS="-L$path" before calling configure + AC_CHECK_LIB(m, log,, AC_MSG_ERROR([Could not find libm])) + AC_CHECK_LIB(z, gzread,, AC_MSG_ERROR([Could not find libz])) +- ++AC_CHECK_LIB([gslcblas],[cblas_dgemm]) ++AC_CHECK_LIB(gsl, gsl_cdf_poisson_P,, AC_MSG_WARN([libgsl not found. Not using fast approximation])) + + # http://www.gnu.org/software/automake/manual/html_node/Python.html + AM_PATH_PYTHON([2.7]) +diff --git a/src/lofreq/Makefile.am b/src/lofreq/Makefile.am +index 102076b..b7c71cf 100644 +--- a/src/lofreq/Makefile.am ++++ b/src/lofreq/Makefile.am +@@ -1,4 +1,4 @@ +-AM_CFLAGS = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall -I../cdflib90/ -I../uthash $(HTSLIB_CPPFLAGS) @AM_CFLAGS@ ++AM_CFLAGS = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall -O3 -I../cdflib90/ -I../uthash $(HTSLIB_CPPFLAGS) @AM_CFLAGS@ + AM_LDFLAGS = $(LDFLAGS_for_htslib) @AM_LDFLAGS@ + bin_PROGRAMS = lofreq + lofreq_SOURCES = bam_md_ext.c bam_md_ext.h \ +@@ -42,3 +42,4 @@ endif + + # note: order matters + lofreq_LDADD = $(LIBS_for_htslib) ../cdflib90/libcdf.a ++# -l:libgsl.a -lm +diff --git a/src/lofreq/lofreq_call.c b/src/lofreq/lofreq_call.c +index 6537a11..81c0c7b 100644 +--- a/src/lofreq/lofreq_call.c ++++ b/src/lofreq/lofreq_call.c +@@ -315,7 +315,7 @@ call_alt_ins(const plp_col_t *p, double *bi_err_probs, int bi_num_err_probs, + bi_num_err_probs, ins_counts[0], ins_counts[1], ins_counts[2]); + // compute p-value for insertion + if (snpcaller(bi_pvalues, bi_err_probs, bi_num_err_probs, ins_counts, +- conf->bonf_indel, conf->sig)) { ++ conf->bonf_indel, conf->sig, conf->approx_threshold_n)) { + fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n", + __FILE__, __FUNCTION__, __LINE__); + return 1; +@@ -380,7 +380,7 @@ int call_alt_del(const plp_col_t *p, double *bd_err_probs, int bd_num_err_probs, + + /* snpcaller for deletion */ + if (snpcaller(bd_pvalues, bd_err_probs, bd_num_err_probs, del_counts, +- conf->bonf_indel, conf->sig)) { ++ conf->bonf_indel, conf->sig, conf->approx_threshold_n)) { + fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n", + __FILE__, __FUNCTION__, __LINE__); + return 1; +@@ -803,7 +803,7 @@ call_snvs(const plp_col_t *p, varcall_conf_t *conf) + bc_num_err_probs, alt_counts[0], alt_counts[1], alt_counts[2], num_snv_tests, conf->bonf_subst, conf->sig); + + if (snpcaller(pvalues, bc_err_probs, bc_num_err_probs, +- alt_counts, conf->bonf_subst, conf->sig)) { ++ alt_counts, conf->bonf_subst, conf->sig, conf->approx_threshold_n)) { + fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n", + __FILE__, __FUNCTION__, __LINE__); + free(bc_err_probs); +@@ -986,6 +986,7 @@ usage(const mplp_conf_t *mplp_conf, const varcall_conf_t *varcall_conf) + fprintf(stderr, " -C | --min-cov INT Test only positions having at least this coverage [%d]\n", varcall_conf->min_cov); + fprintf(stderr, " (note: without --no-default-filter default filters (incl. coverage) kick in after predictions are done)\n"); + fprintf(stderr, " -d | --max-depth INT Cap coverage at this depth [%d]\n", mplp_conf->max_depth); ++ fprintf(stderr, " -t | --approx-threshold INT Use fast approximation at this depth (might decrease number of calls; off if <= 0) [%d]\n", varcall_conf->approx_threshold_n); + fprintf(stderr, " --illumina-1.3 Assume the quality is Illumina-1.3-1.7/ASCII+64 encoded\n"); + fprintf(stderr, " --use-orphan Count anomalous read pairs (i.e. where mate is not aligned properly)\n"); + fprintf(stderr, " --plp-summary-only No variant calling. Just output pileup summary per column\n"); +@@ -1094,6 +1095,7 @@ for cov in coverage_range: + + {"min-cov", required_argument, NULL, 'C'}, + {"max-depth", required_argument, NULL, 'd'}, ++ {"approx-threshold", required_argument, NULL, 't'}, + + {"illumina-1.3", no_argument, &illumina_1_3, 1}, + {"use-orphan", no_argument, &use_orphan, 1}, +@@ -1272,6 +1274,10 @@ for cov in coverage_range: + mplp_conf.max_depth = atoi(optarg); + break; + ++ case 't': ++ varcall_conf.approx_threshold_n = atoi(optarg); ++ break; ++ + case 'h': + usage(& mplp_conf, & varcall_conf); + return 0; /* WARN: not printing defaults if some args where parsed */ +diff --git a/src/lofreq/lofreq_uniq.c b/src/lofreq/lofreq_uniq.c +index 53ad137..af03fe4 100644 +--- a/src/lofreq/lofreq_uniq.c ++++ b/src/lofreq/lofreq_uniq.c +@@ -309,7 +309,7 @@ uniq_snv(const plp_col_t *p, void *confp) + alt_counts[1] = alt_counts[2] = 0; + + if (snpcaller(pvalues, err_probs, num_err_probs, +- alt_counts, bonf, alpha)) { ++ alt_counts, bonf, alpha, -1)) { + fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n", + __FILE__, __FUNCTION__, __LINE__); + free(err_probs); +diff --git a/src/lofreq/plp.c b/src/lofreq/plp.c +index 808d791..2336445 100644 +--- a/src/lofreq/plp.c ++++ b/src/lofreq/plp.c +@@ -816,7 +816,11 @@ void compile_plp_col(plp_col_t *plp_col, + * n_plp[i] - m + */ + ref_base = (ref && pos < ref_len)? ref[pos] : 'N'; +- ++ /* Added by Ryan Morin in an attempt to mitigate issues with non-ACTG characters in the reference ++ An example position (hg38) affected by this is chr17 83129591 (the reference base is W) */ ++ if (! (ref_base == 'A' || ref_base == 'C' || ref_base == 'T' || ref_base == 'G' || ref_base == 'N')){ ++ ref_base = 'N'; ++ } + plp_col_init(plp_col); + plp_col->target = strdup(target_name); + plp_col->pos = pos; +@@ -1276,8 +1280,8 @@ check_indel: + + for (i = 0; i < NUM_NT4; ++i) { + assert(plp_col->fw_counts[i] + plp_col->rv_counts[i] == plp_col->base_quals[i].n); +- assert(plp_col->base_quals[i].n == plp_col->baq_quals[i].n); +- assert(plp_col->base_quals[i].n == plp_col->map_quals[i].n); ++ /* FIXME only makes sense if BAQ is on assert(plp_col->base_quals[i].n == plp_col->baq_quals[i].n); */ ++ /* FIXME only makes sense if MQ is on assert(plp_col->base_quals[i].n == plp_col->map_quals[i].n); */ + /* assert(plp_col->map_quals[i].n == plp_col->source_quals[i].n);*/ + } + } +diff --git a/src/lofreq/snpcaller.c b/src/lofreq/snpcaller.c +index 253430c..f3a8e44 100644 +--- a/src/lofreq/snpcaller.c ++++ b/src/lofreq/snpcaller.c +@@ -43,6 +43,8 @@ + #include "fet.h" + #include "utils.h" + #include "log.h" ++#include "gsl/gsl_randist.h" ++#include "gsl/gsl_cdf.h" + + #include "snpcaller.h" + #if TIMING +@@ -635,6 +637,7 @@ init_varcall_conf(varcall_conf_t *c) + c->flag |= VARCALL_USE_IDAQ; + c->only_indels = 0; + c->no_indels = 0; ++ c->approx_threshold_n = -1; + } + + +@@ -1062,7 +1065,8 @@ int + snpcaller(long double *snp_pvalues, + const double *err_probs, const int num_err_probs, + const int *noncons_counts, +- const long long int bonf_factor, const double sig_level) ++ const long long int bonf_factor, const double sig_level, ++ const int approx_threshold_n) + { + double *probvec = NULL; + int i; +@@ -1100,6 +1104,33 @@ snpcaller(long double *snp_pvalues, + goto free_and_exit; + } + ++/* how to combine ifndef? */ ++#ifndef HAVE_LIBGSL ++#ifndef HAVE_LIBGSLCBLAS ++ if (approx_threshold_n>0) { ++ LOG_FATAL("%s\n", "Can't use approximation. It was disabled during compile time"); ++ exit(1); ++ } ++#endif ++#endif ++ ++/* how to combine ifdef? */ ++#ifdef HAVE_LIBGSL ++ #ifdef HAVE_LIBGSLCBLAS ++ /* Only approximate if sufficient data available */ ++ if (approx_threshold_n > 0 && num_err_probs > approx_threshold_n) { ++ long double mu = 0; ++ for (int i = 0; i < num_err_probs; ++i) { ++ mu += err_probs[i]; ++ } ++ const long double poibin_approximation = 1 - gsl_cdf_poisson_P(max_noncons_count - 1, mu); ++ if (poibin_approximation * (double)bonf_factor > sig_level) { ++ goto free_and_exit; ++ } ++ } ++ #endif ++#endif ++ + probvec = poissbin(&pvalue, err_probs, num_err_probs, + max_noncons_count, bonf_factor, sig_level); + +@@ -1120,7 +1151,6 @@ snpcaller(long double *snp_pvalues, + goto free_and_exit; + } + +- + /* report p-value for each non-consensus base + */ + for (i=0; i /opt/mambaforge/envs/bioconda/conda-bld/lofreq_1718124617511/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl/bin/samtools - Reason: tried: '/opt/mambaforge/envs/bioconda/conda-bld/lofreq_1718124617511/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl/bin/../lib/libcrypto.1.0.0.dylib' (no such file), '/opt/mambaforge/envs/bioconda/conda-bld/lofreq_1718124617511/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl/bin/../lib/libcrypto.1.0.0.dylib' (no such file), '/usr/local/lib/libcrypto.1.0.0.dylib' (no such file), '/usr/lib/libcrypto.1.0.0.dylib' (no such file, not in dyld cache) - /opt/mambaforge/envs/bioconda/conda-bld/lofreq_1718124617511/test_tmp/run_test.sh: line 8: 51805 Abort trap: 6 samtools --help - WARNING: Tests failed for lofreq-2.1.5-py39h8037bd6_12.tar.bz2 - moving package to /opt/mambaforge/envs/bioconda/conda-bld/broken - TESTS FAILED: lofreq-2.1.5-py39h8037bd6_12.tar.bz2 - export PREFIX=/opt/mambaforge/envs/bioconda/conda-bld/lofreq_1718124617511/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl - export SRC_DIR=/opt/mambaforge/envs/bioconda/conda-bld/lofreq_1718124617511/test_tmp - version: 2.1.5 - commit: unknown - build-date: Jun 11 2024 -# Last 100 lines of the build log. diff --git a/recipes/lofreq/meta.yaml b/recipes/lofreq/meta.yaml index 4dc0e346faf57..0d1ccfe2d7006 100644 --- a/recipes/lofreq/meta.yaml +++ b/recipes/lofreq/meta.yaml @@ -1,15 +1,17 @@ -{% set version="2.1.5" %} +{% set version = "2.1.5" %} package: name: lofreq version: {{ version }} source: - url: https://raw.githubusercontent.com/CSB5/lofreq/v{{ version }}/dist/lofreq_star-{{ version }}.tar.gz - sha256: 43028af07faa23c7ec0e167855492ae7bd31c4e7f8158114e51ec12aba5fd184 + url: https://github.com/CSB5/lofreq/archive/refs/tags/v{{ version }}.tar.gz + sha256: da85ec4baca21e20a55b5f9ee491cdda2986d0dc672177007a2c70ca1d804fe7 + patches: + - 0001-patches build: - number: 12 + number: 13 run_exports: - {{ pin_subpackage('lofreq', max_pin='x') }} @@ -17,13 +19,20 @@ requirements: build: - make - {{ compiler('c') }} + - autoconf + - automake + - libtool host: - python + - setuptools # [py >= 3.12] - htslib + - samtools + - gsl - zlib run: - python - samtools + - gsl test: commands: @@ -31,12 +40,18 @@ test: - samtools --help about: - home: http://csb5.github.io/lofreq/ + home: "https://csb5.github.io/lofreq" license: MIT - summary: A fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data + license_family: MIT + license_file: LICENSE + summary: "A fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data." + dev_url: "https://github.com/CSB5/lofreq" + doc_url: "https://csb5.github.io/lofreq" extra: additional-platforms: - linux-aarch64 + - osx-arm64 identifiers: - usegalaxy-eu:lofreq_call + - biotools:lofreq