This repository has been archived by the owner on Dec 13, 2017. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 29
/
main.c
145 lines (137 loc) · 5.89 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "minimap.h"
#define MM_VERSION "0.2-r124-dirty"
void liftrlimit()
{
#ifdef __linux__
struct rlimit r;
getrlimit(RLIMIT_AS, &r);
r.rlim_cur = r.rlim_max;
setrlimit(RLIMIT_AS, &r);
#endif
}
int main(int argc, char *argv[])
{
mm_mapopt_t opt;
int i, c, k = 15, w = -1, b = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx = 0;
int tbatch_size = 100000000;
uint64_t ibatch_size = 4000000000ULL;
float f = 0.001;
bseq_file_t *fp = 0;
char *fnw = 0;
FILE *fpr = 0, *fpw = 0;
liftrlimit();
mm_realtime0 = realtime();
mm_mapopt_init(&opt);
while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lRPST:m:L:Dx:")) >= 0) {
if (c == 'w') w = atoi(optarg);
else if (c == 'k') k = atoi(optarg);
else if (c == 'b') b = atoi(optarg);
else if (c == 'r') opt.radius = atoi(optarg);
else if (c == 'c') opt.min_cnt = atoi(optarg);
else if (c == 'm') opt.merge_frac = atof(optarg);
else if (c == 'f') f = atof(optarg);
else if (c == 't') n_threads = atoi(optarg);
else if (c == 'v') mm_verbose = atoi(optarg);
else if (c == 'g') opt.max_gap = atoi(optarg);
else if (c == 'N') keep_name = 0;
else if (c == 'd') fnw = optarg;
else if (c == 'l') is_idx = 1;
else if (c == 'R') opt.flag |= MM_F_WITH_REP;
else if (c == 'P') opt.flag &= ~MM_F_WITH_REP;
else if (c == 'D') opt.flag |= MM_F_NO_SELF;
else if (c == 'O') opt.flag |= MM_F_NO_ISO;
else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
else if (c == 'T') opt.sdust_thres = atoi(optarg);
else if (c == 'L') opt.min_match = atoi(optarg);
else if (c == 'V') {
puts(MM_VERSION);
return 0;
} else if (c == 'B' || c == 'I') {
double x;
char *p;
x = strtod(optarg, &p);
if (*p == 'G' || *p == 'g') x *= 1e9;
else if (*p == 'M' || *p == 'm') x *= 1e6;
else if (*p == 'K' || *p == 'k') x *= 1e3;
if (c == 'B') tbatch_size = (uint64_t)(x + .499);
else ibatch_size = (uint64_t)(x + .499);
} else if (c == 'x') {
if (strcmp(optarg, "ava10k") == 0) {
opt.flag |= MM_F_AVA | MM_F_NO_SELF;
opt.min_match = 100;
opt.merge_frac = 0.0;
w = 5;
}
}
}
if (w < 0) w = (int)(.6666667 * k + .499);
if (argc == optind) {
fprintf(stderr, "Usage: minimap [options] <target.fa> [query.fa] [...]\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " Indexing:\n");
fprintf(stderr, " -k INT k-mer size [%d]\n", k);
fprintf(stderr, " -w INT minizer window size [{-k}*2/3]\n");
fprintf(stderr, " -I NUM split index for every ~NUM input bases [4G]\n");
fprintf(stderr, " -d FILE dump index to FILE []\n");
fprintf(stderr, " -l the 1st argument is a index file (overriding -k, -w and -I)\n");
// fprintf(stderr, " -b INT bucket bits [%d]\n", b); // most users would care about this
fprintf(stderr, " Mapping:\n");
fprintf(stderr, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%.3f]\n", f);
fprintf(stderr, " -r INT bandwidth [%d]\n", opt.radius);
fprintf(stderr, " -m FLOAT merge two chains if FLOAT fraction of minimizers are shared [%.2f]\n", opt.merge_frac);
fprintf(stderr, " -c INT retain a mapping if it consists of >=INT minimizers [%d]\n", opt.min_cnt);
fprintf(stderr, " -L INT min matching length [%d]\n", opt.min_match);
fprintf(stderr, " -g INT split a mapping if there is a gap longer than INT [%d]\n", opt.max_gap);
fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres);
// fprintf(stderr, " -D skip self mappings but keep dual mappings\n"); // too confusing to expose to end users
fprintf(stderr, " -S skip self and dual mappings\n");
fprintf(stderr, " -O drop isolated hits before chaining (EXPERIMENTAL)\n");
fprintf(stderr, " -P filtering potential repeats after mapping (EXPERIMENTAL)\n");
// fprintf(stderr, " -R skip post-mapping repeat filtering\n"); // deprecated option for backward compatibility
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " Input/Output:\n");
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
// fprintf(stderr, " -B NUM process ~NUM bp in each batch [100M]\n");
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
// fprintf(stderr, " -N use integer as target names\n");
fprintf(stderr, " -V show version number\n");
fprintf(stderr, "\nSee minimap.1 for detailed description of the command-line options.\n");
return 1;
}
if (is_idx) fpr = fopen(argv[optind], "rb");
else fp = bseq_open(argv[optind]);
if (fnw) fpw = fopen(fnw, "wb");
for (;;) {
mm_idx_t *mi = 0;
if (fpr) mi = mm_idx_load(fpr);
else if (!bseq_eof(fp))
mi = mm_idx_gen(fp, w, k, b, tbatch_size, n_threads, ibatch_size, keep_name);
if (mi == 0) break;
if (mm_verbose >= 3)
fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n);
mm_idx_set_max_occ(mi, f);
if (mm_verbose >= 3)
fprintf(stderr, "[M::%s] max occurrences of a minimizer to consider: %d\n", __func__, mi->max_occ);
if (fpw) mm_idx_dump(fpw, mi);
for (i = optind + 1; i < argc; ++i)
mm_map_file(mi, argv[i], &opt, n_threads, tbatch_size);
mm_idx_destroy(mi);
}
if (fpw) fclose(fpw);
if (fpr) fclose(fpr);
if (fp) bseq_close(fp);
fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
fprintf(stderr, "[M::%s] CMD:", __func__);
for (i = 0; i < argc; ++i)
fprintf(stderr, " %s", argv[i]);
fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - mm_realtime0, cputime());
return 0;
}