From bf2d4f7aecb30a5e35a640633e3f1fcce12e01fe Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 7 Oct 2017 18:53:25 -0400 Subject: [PATCH] r486: treat "U" as "T" for RNA reads (#33) --- bseq.c | 4 ++++ main.c | 2 +- sketch.c | 28 ++++++++++++++-------------- tex/hs38-simu.sh | 5 ++++- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/bseq.c b/bseq.c index 81feb133..9b4800f4 100644 --- a/bseq.c +++ b/bseq.c @@ -55,8 +55,12 @@ void mm_bseq_close(mm_bseq_file_t *fp) static inline void kseq2bseq(kseq_t *ks, mm_bseq1_t *s, int with_qual) { + int i; s->name = strdup(ks->name.s); s->seq = strdup(ks->seq.s); + for (i = 0; i < ks->seq.l; ++i) // convert U to T + if (s->seq[i] == 'u' || s->seq[i] == 'U') + --s->seq[i]; s->qual = with_qual && ks->qual.l? strdup(ks->qual.s) : 0; s->l_seq = ks->seq.l; } diff --git a/main.c b/main.c index 38ea3d82..c1cedf2a 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r482-dirty" +#define MM_VERSION "2.2-r486-dirty" #ifdef __linux__ #include diff --git a/sketch.c b/sketch.c index db359eb6..d98fc709 100644 --- a/sketch.c +++ b/sketch.c @@ -6,21 +6,21 @@ #include "minimap.h" unsigned char seq_nt4_table[256] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; diff --git a/tex/hs38-simu.sh b/tex/hs38-simu.sh index dba0c94e..0fe08aaf 100644 --- a/tex/hs38-simu.sh +++ b/tex/hs38-simu.sh @@ -4,4 +4,7 @@ bin/mason_variator -ir hs38.fa -s 1 -ov hs38-s1.vcf --snp-rate 1e-3 --small-inde bin/mason_simulator -ir hs38.fa -iv hs38-s1.vcf -n 1000000 --seed 1 -o s1_1.fq -or s1_2.fq -oa s1.sam --illumina-prob-mismatch-scale 2.5 bin/mason_variator -ir hs38.fa -s 2 -ov hs38-s2.vcf --snp-rate 1e-3 --small-indel-rate 2e-4 --sv-indel-rate 0 --sv-inversion-rate 0 --sv-translocation-rate 0 --sv-duplication-rate 0 --max-small-indel-size 10 -bin/mason_simulator -ir hs38.fa -iv hs38-s2.vcf -n 1000000 --seed 1 -o mason-s2_1.fq -or mason-s2_2.fq -oa mason-s2.sam --illumina-prob-mismatch-scale 2.5 --illumina-read-length 150 +bin/mason_simulator -ir hs38.fa -iv hs38-s2.vcf -n 1000000 --seed 2 -o mason-s2_1.fq -or mason-s2_2.fq -oa mason-s2.sam --illumina-prob-mismatch-scale 2.5 --illumina-read-length 150 + +bin/mason_variator -ir hs38.fa -s 3 -ov hs38-s3.vcf --snp-rate 1e-3 --small-indel-rate 2e-4 --sv-indel-rate 0 --sv-inversion-rate 0 --sv-translocation-rate 0 --sv-duplication-rate 0 --max-small-indel-size 10 +bin/mason_simulator -ir hs38.fa -iv hs38-s3.vcf -n 10000000 --seed 3 -o mason-s3_1.fq -or mason-s3_2.fq -oa mason-s3.sam --illumina-prob-mismatch-scale 2.5 --illumina-read-length 150