diff --git a/bntseq.c b/bntseq.c index 465e3832..65f7e93a 100644 --- a/bntseq.c +++ b/bntseq.c @@ -299,9 +299,9 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only) // read sequences while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); if (!for_only) { // add the reverse complemented sequence - m_pac = (bns->l_pac * 2 + 3) / 4 * 4; - pac = realloc(pac, m_pac/4); - memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); + int64_t ll_pac = (bns->l_pac * 2 + 3) / 4 * 4; + if (ll_pac > m_pac) pac = realloc(pac, ll_pac/4); + memset(pac + (bns->l_pac+3)/4, 0, (ll_pac - (bns->l_pac+3)/4*4) / 4); for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); } diff --git a/bwa.1 b/bwa.1 index b9a65f16..4df58800 100644 --- a/bwa.1 +++ b/bwa.1 @@ -277,6 +277,14 @@ If ARG starts with @, it is interpreted as a string and gets inserted into the output SAM header; otherwise, ARG is interpreted as a file with all lines starting with @ in the file inserted into the SAM header. [null] .TP +.BI -o \ FILE +Write the output SAM file to +.IR FILE . +For compatibility with other BWA commands, this option may also be given as +.B -f +.IR FILE . +[standard ouptut] +.TP .BI -T \ INT Don't output alignment with score lower than .IR INT . diff --git a/bwa.c b/bwa.c index c78ebb67..bc9d3732 100644 --- a/bwa.c +++ b/bwa.c @@ -30,13 +30,23 @@ static inline void trim_readno(kstring_t *s) s->l -= 2, s->s[s->l] = 0; } +static inline char *dupkstring(const kstring_t *str, int dupempty) +{ + char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL; + if (!s) return NULL; + + memcpy(s, str->s, str->l); + s[str->l] = '\0'; + return s; +} + static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) { // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice - s->name = strdup(ks->name.s); - s->comment = ks->comment.l? strdup(ks->comment.s) : 0; - s->seq = strdup(ks->seq.s); - s->qual = ks->qual.l? strdup(ks->qual.s) : 0; - s->l_seq = strlen(s->seq); + s->name = dupkstring(&ks->name, 1); + s->comment = dupkstring(&ks->comment, 0); + s->seq = dupkstring(&ks->seq, 1); + s->qual = dupkstring(&ks->qual, 0); + s->l_seq = ks->seq.l; } bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) @@ -144,9 +154,9 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, max_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.); max_gap = max_ins > max_del? max_ins : max_del; max_gap = max_gap > 1? max_gap : 1; - w = (max_gap + abs(rlen - l_query) + 1) >> 1; + w = (max_gap + abs((int)rlen - l_query) + 1) >> 1; w = w < w_? w : w_; - min_w = abs(rlen - l_query) + 3; + min_w = abs((int)rlen - l_query) + 3; w = w > min_w? w : min_w; // NW alignment if (bwa_verbose >= 4) { diff --git a/bwape.c b/bwape.c index f2d3d8e0..fa4f7f7c 100644 --- a/bwape.c +++ b/bwape.c @@ -624,7 +624,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt, const char *rg_line) { extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); - int i, j, n_seqs, tot_seqs = 0; + int i, j, n_seqs; + long long tot_seqs = 0; bwa_seq_t *seqs[2]; bwa_seqio_t *ks[2]; clock_t t; @@ -711,7 +712,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f for (j = 0; j < 2; ++j) bwa_free_read_seq(n_seqs, seqs[j]); - fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs); + fprintf(stderr, "[bwa_sai2sam_pe_core] %lld sequences have been processed.\n", tot_seqs); last_ii = ii; } diff --git a/bwase.c b/bwase.c index 77c50db9..18e86718 100644 --- a/bwase.c +++ b/bwase.c @@ -173,13 +173,15 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l ubyte_t *rseq; int64_t k, rb, re, rlen; int8_t mat[25]; + int w; bwa_fill_scmat(1, 3, mat); rb = *_rb; re = rb + len + ref_shift; assert(re <= l_pac); rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen); assert(re - rb == rlen); - ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > abs(rlen - len) * 1.5? SW_BW : abs(rlen - len) * 1.5, n_cigar, &cigar32); + w = abs((int)rlen - len) * 1.5; + ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > w? SW_BW : w, n_cigar, &cigar32); assert(*n_cigar > 0); if ((cigar32[*n_cigar - 1]&0xf) == 1) cigar32[*n_cigar - 1] = (cigar32[*n_cigar - 1]>>4<<4) | 3; // change endding ins to soft clipping if ((cigar32[0]&0xf) == 1) cigar32[0] = (cigar32[0]>>4<<4) | 3; // change beginning ins to soft clipping @@ -505,7 +507,8 @@ void bwase_initialize() void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ, const char *rg_line) { extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); - int i, n_seqs, tot_seqs = 0, m_aln; + int i, n_seqs, m_aln; + long long tot_seqs = 0; bwt_aln1_t *aln = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; @@ -563,7 +566,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); bwa_free_read_seq(n_seqs, seqs); - fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); + fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs); } // destroy diff --git a/bwt_gen.c b/bwt_gen.c index f06c1a3e..acea99b6 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -338,6 +338,7 @@ BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable) bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int)); GenerateDNAOccCountTable(bwt->decodeTable); } else { + // FIXME Prevent BWTFree() from freeing decodeTable in this case bwt->decodeTable = decodeTable; } @@ -1538,6 +1539,8 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB (long)bwtInc->numberOfIterationDone, (long)processedTextLength); } } + + fclose(packedFile); return bwtInc; } @@ -1545,8 +1548,6 @@ void BWTFree(BWT *bwt) { if (bwt == 0) return; free(bwt->cumulativeFreq); - free(bwt->bwtCode); - free(bwt->occValue); free(bwt->occValueMajor); free(bwt->decodeTable); free(bwt); @@ -1555,8 +1556,10 @@ void BWTFree(BWT *bwt) void BWTIncFree(BWTInc *bwtInc) { if (bwtInc == 0) return; - free(bwtInc->bwt); + BWTFree(bwtInc->bwt); free(bwtInc->workingMemory); + free(bwtInc->cumulativeCountInCurrentBuild); + free(bwtInc->packedShift); free(bwtInc); } diff --git a/bwt_lite.c b/bwt_lite.c index f7946f54..eb16da64 100644 --- a/bwt_lite.c +++ b/bwt_lite.c @@ -48,7 +48,7 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq) } { // generate cnt_table for (i = 0; i != 256; ++i) { - u_int32_t j, x = 0; + uint32_t j, x = 0; for (j = 0; j != 4; ++j) x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3); b->cnt_table[i] = x; diff --git a/bwtaln.c b/bwtaln.c index 20b01cd3..a348fdfa 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -158,7 +158,8 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa) void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { - int i, n_seqs, tot_seqs = 0; + int i, n_seqs; + long long tot_seqs = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; @@ -218,7 +219,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); bwa_free_read_seq(n_seqs, seqs); - fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); + fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs); } // destroy diff --git a/bwtgap.c b/bwtgap.c index 08bc1f48..7dde443d 100644 --- a/bwtgap.c +++ b/bwtgap.c @@ -58,7 +58,7 @@ static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, i q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries); } p = q->stack + q->n_entries; - p->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l; + p->info = (uint32_t)score<<21 | i; p->k = k; p->l = l; p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->n_ins = n_ins; p->n_del = n_del; p->state = state; diff --git a/bwtgap.h b/bwtgap.h index 7dd61659..9085b626 100644 --- a/bwtgap.h +++ b/bwtgap.h @@ -5,9 +5,9 @@ #include "bwtaln.h" typedef struct { // recursion stack - u_int32_t info; // score<<21 | i - u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6; - u_int32_t n_ins:16, n_del:16; + uint32_t info; // score<<21 | i + uint32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6; + uint32_t n_ins:16, n_del:16; int last_diff_pos; bwtint_t k, l; // (k,l) is the SA region of [i,n-1] } gap_entry_t; diff --git a/bwtindex.c b/bwtindex.c index fb322313..e1322f8e 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -119,7 +119,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) } rope_destroy(r); } - bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4); + bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); for (i = 0; i < bwt->seq_len; ++i) bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1); free(buf); @@ -175,7 +175,7 @@ void bwt_bwtupdate_core(bwt_t *bwt) int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command { bwt_t *bwt; - if (argc < 2) { + if (argc != 2) { fprintf(stderr, "Usage: bwa bwtupdate \n"); return 1; } diff --git a/bwtsw2_pair.c b/bwtsw2_pair.c index fe8489cb..f50b25bd 100644 --- a/bwtsw2_pair.c +++ b/bwtsw2_pair.c @@ -242,7 +242,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b double diff; G[0] = hits[i]->hits[0].G + a[1].G; G[1] = hits[i+1]->hits[0].G + a[0].G; - diff = fabs(G[0] - G[1]) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.); + diff = fabs((double)(G[0] - G[1])) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.); if (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0; } if (a[0].G == 0 || a[1].G == 0) { // one proper pair only diff --git a/fastmap.c b/fastmap.c index 7b7145d9..06199991 100644 --- a/fastmap.c +++ b/fastmap.c @@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) { + while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; @@ -158,6 +158,7 @@ int main_mem(int argc, char *argv[]) else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1; else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1; else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1; + else if (c == 'o' || c == 'f') xreopen(optarg, "wb", stdout); else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1; else if (c == 'C') aux.copy_comment = 1; @@ -257,7 +258,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d]\n", opt->e_del, opt->e_ins); fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3); fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired); - fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); + fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overridden [null]\n"); fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n"); fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n"); fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n"); @@ -265,6 +266,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -p smart pairing (ignoring in2.fq)\n"); fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); fprintf(stderr, " -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]\n"); + fprintf(stderr, " -o FILE sam file to output results to [stdout]\n"); fprintf(stderr, " -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file)\n"); fprintf(stderr, " -5 always take the leftmost alignment on a read as primary\n"); fprintf(stderr, "\n"); diff --git a/kthread.c b/kthread.c index 1b13494f..780de19c 100644 --- a/kthread.c +++ b/kthread.c @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/malloc_wrap.c b/malloc_wrap.c index 100b8cb6..6165e043 100644 --- a/malloc_wrap.c +++ b/malloc_wrap.c @@ -13,7 +13,7 @@ void *wrap_calloc(size_t nmemb, size_t size, void *p = calloc(nmemb, size); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, nmemb * size, file, line, strerror(errno)); exit(EXIT_FAILURE); } @@ -25,7 +25,7 @@ void *wrap_malloc(size_t size, void *p = malloc(size); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, size, file, line, strerror(errno)); exit(EXIT_FAILURE); } @@ -37,7 +37,7 @@ void *wrap_realloc(void *ptr, size_t size, void *p = realloc(ptr, size); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, size, file, line, strerror(errno)); exit(EXIT_FAILURE); } @@ -49,7 +49,7 @@ char *wrap_strdup(const char *s, char *p = strdup(s); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, strlen(s), file, line, strerror(errno)); exit(EXIT_FAILURE); }