Skip to content

Commit

Permalink
Merge pull request #71 from smithlabcode/self-overlapping-adapter-con…
Browse files Browse the repository at this point in the history
…taminant-fix

Changing adapter matching so that self-overlapping patterns are only matched once per read
  • Loading branch information
andrewdavidsmith authored Sep 18, 2024
2 parents cb6a839 + 165486d commit 99ea94f
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 21 deletions.
4 changes: 3 additions & 1 deletion src/StreamReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,10 @@ StreamReader::process_sequence_base_from_buffer(FastqStats &stats) {
if (do_adapter_optimized && (num_bases_after_n == adapter_size)) {
cur_kmer &= adapter_mask;
for (size_t i = 0; i != num_adapters; ++i) {
if (cur_kmer == adapters[i]) {
if (cur_kmer == adapters[i] && !adapters_found[i]) {
++stats.pos_adapter_count[
(read_pos << Constants::bit_shift_adapter) | i];
adapters_found[i] = true;
}
}
}
Expand Down Expand Up @@ -436,6 +437,7 @@ StreamReader::read_sequence_line(FastqStats &stats) {
still_in_buffer = true;
next_truncation = 100;
do_kmer_read = (stats.num_reads == next_kmer_read);
adapters_found.reset();

if (do_adapters_slow) {
const string seq_line_str = cur_char;
Expand Down
4 changes: 3 additions & 1 deletion src/StreamReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <string>
#include <cmath>
#include <bitset>

// Optional zlib usage
#include <zlib.h>
Expand Down Expand Up @@ -81,6 +82,7 @@ class StreamReader{
const size_t adapter_size;
const size_t adapter_mask;
const std::array<size_t, Constants::max_adapters> adapters;
std::bitset<Constants::max_adapters> adapters_found{};

const std::string filename;

Expand Down Expand Up @@ -250,7 +252,7 @@ class BamReader : public StreamReader {
bool read_entry(FastqStats &stats, size_t &num_bytes_read);

// Specially made for BamReader to work directly with bam1_t
inline void read_sequence_line(FastqStats &stats);
inline void read_sequence_line(FastqStats &stats);
inline void read_quality_line(FastqStats &stats); // parse quality
inline void put_base_in_buffer(const size_t pos); // puts base in buffer or leftover
~BamReader();
Expand Down
38 changes: 19 additions & 19 deletions test/md5sum.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
9641ae08f984bde102d4292bcad56484 test_output/SRR1853178_1/fastqc_data.txt
5c4e6118d438b1f01cf620120f26c622 test_output/SRR1853178_1/fastqc_data.txt
36df1dcab539ba4ef885239fc8524636 test_output/SRR1853178_1/summary.txt
c5b1d892705ee40353a5b88df5fc3a74 test_output/SRR3897196_2/fastqc_data.txt
8dfecfc49e5b450152be12c63177054f test_output/SRR3897196_2/fastqc_data.txt
80cd130958bcb2219f1e5a07d06a3b6e test_output/SRR3897196_2/summary.txt
d60663e26511968865b3ed92a864fda8 test_output/SRR9624732_1/fastqc_data.txt
5df2ae98f4389616df1fa90fe46ce463 test_output/SRR9624732_1/fastqc_data.txt
a525d455b17eb6ba84cd2a60d281a2b0 test_output/SRR9624732_1/summary.txt
053bb7a28541ac110116086ac6ea0cac test_output/SRR10143153_2/fastqc_data.txt
6e5eaadf209c3f77ab61ed61a034f49e test_output/SRR10143153_2/fastqc_data.txt
19f1811f324e4c44154f659bb6e22806 test_output/SRR10143153_2/summary.txt
71e3964a9a610ebcb3207ea2a391b4be test_output/SRR1772703.lite.1/fastqc_data.txt
6403659cb0295ec05db9891e3e5e4f76 test_output/SRR1772703.lite.1/summary.txt
8d28c215efc4f2930a002c240fb14448 test_output/SRR9624732_2/fastqc_data.txt
427099afa91d877f078f1e7989fe4c39 test_output/SRR1772703.lite.1/fastqc_data.txt
ad5727295e7c8de6eb6874837bf1518f test_output/SRR1772703.lite.1/summary.txt
04f7bb98120971c0ba1d648fda893a7c test_output/SRR9624732_2/fastqc_data.txt
fefc5d746f853c14b5e00421ad1ec260 test_output/SRR9624732_2/summary.txt
3942d27ab09f2db7f58a2ed4c904ac8a test_output/SRR10124060/fastqc_data.txt
1e228b1bb498eec2ca81ec2cc657a02d test_output/SRR10124060/summary.txt
7c5f40be6a37ac4d99c39400b1f7ddb6 test_output/SRR891268_2/fastqc_data.txt
a7827fe2115ef6ac48cd61f2065e58e5 test_output/SRR10124060/fastqc_data.txt
776f7d1b53bbed8683de9ca1d2529f1e test_output/SRR10124060/summary.txt
7c05da833c8806ea8d5093ddab337f1c test_output/SRR891268_2/fastqc_data.txt
20a8e50baace4c672622793874a3d7de test_output/SRR891268_2/summary.txt
e02e6043667560aacb39b4e956edc146 test_output/SRR9878537.lite.1/fastqc_data.txt
9bae57f4fa64d9fca4b11f0e0c107327 test_output/SRR9878537.lite.1/fastqc_data.txt
e5c40997d4993c07e164ee5598c39cf9 test_output/SRR9878537.lite.1/summary.txt
7807c0aeca3bf4fdb18309ce993f6e35 test_output/SRR891268_1/fastqc_data.txt
6778518c16860c4300ed575d1dfdd43e test_output/SRR891268_1/fastqc_data.txt
69e7d0c53cd2e67117637c408b65333a test_output/SRR891268_1/summary.txt
54a383a9dd615f4130cbdab202829173 test_output/SRR6059706.lite.1/fastqc_data.txt
a5545633e81fc57fd03ff6ff7ba4fb8f test_output/SRR6059706.lite.1/summary.txt
21b11f43971f424267f6e32a880aaea9 test_output/SRR6387347/fastqc_data.txt
f05047b2a21949570ca2ae45b73b8da0 test_output/SRR6059706.lite.1/fastqc_data.txt
e348e4bcc7fc6f05e989ac7858d2b287 test_output/SRR6059706.lite.1/summary.txt
db82cb634e7e9b2f30472a069ba082b0 test_output/SRR6387347/fastqc_data.txt
a61f65047e76f93300967cf399d044de test_output/SRR6387347/summary.txt
65519f6bcfd726b3219d7f646ad1d3b5 test_output/SRR3897196_1/fastqc_data.txt
905dcc1d5d135e18df379cc1edf778b6 test_output/SRR3897196_1/fastqc_data.txt
b736ee95d5c450ef5c0dda31957b6818 test_output/SRR3897196_1/summary.txt
109cf880b376fcfba44b9637dcc045a6 test_output/SRR10143153_1/fastqc_data.txt
f7eb7940ad9836764b695d2abb0426da test_output/SRR10143153_1/fastqc_data.txt
9ad191925d47a57d4f8b12f21ba0a7c3 test_output/SRR10143153_1/summary.txt
dfebd8cc85b17f954fb14287a2dc8169 test_output/SRR1853178_2/fastqc_data.txt
7645f17be12d149347dab6033aabdeb3 test_output/SRR1853178_2/summary.txt
508cdda67a1ea7ddc756aa3656d1079b test_output/SRR1853178_2/fastqc_data.txt
c331d0f7a6aa9d72be41ac531f9ba269 test_output/SRR1853178_2/summary.txt

0 comments on commit 99ea94f

Please sign in to comment.