Skip to content

Commit

Permalink
Updated misplaced/wrong input handling
Browse files Browse the repository at this point in the history
  • Loading branch information
rvaser committed Mar 7, 2018
1 parent 0dd90d2 commit a6ddd48
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 68 deletions.
73 changes: 35 additions & 38 deletions src/overlap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Overlap::Overlap(uint64_t a_id, uint64_t b_id, double, uint32_t,
: q_name_(), q_id_(a_id - 1), q_begin_(a_begin), q_end_(a_end),
q_length_(a_length), t_name_(), t_id_(b_id - 1), t_begin_(b_begin),
t_end_(b_end), t_length_(b_length), strand_(a_rc ^ b_rc), length_(),
error_(), cigar_(), is_valid_(true), is_transmuted_(true),
error_(), cigar_(), is_valid_(true), is_transmuted_(false),
breaking_points_(), dual_breaking_points_() {

length_ = std::max(q_end_ - q_begin_, t_end_ - t_begin_);
Expand Down Expand Up @@ -127,46 +127,53 @@ bool transmuteId(const std::unordered_map<T, uint64_t>& t_to_id, const T& t,
return true;
}

void Overlap::transmute(const std::unordered_map<std::string, uint64_t>& name_to_id,
void Overlap::transmute(const std::vector<std::unique_ptr<Sequence>>& sequences,
const std::unordered_map<std::string, uint64_t>& name_to_id,
const std::unordered_map<uint64_t, uint64_t>& id_to_id) {

if (!is_valid_) {
fprintf(stderr, "[racon::Overlap::transmute] error: "
"overlap is not valid!\n");
exit(1);
}

if (is_transmuted_) {
return;
if (!is_valid_ || is_transmuted_) {
return;
}

if (!q_name_.empty()) {
if (!transmuteId(name_to_id, q_name_ + "q", q_id_)) {
fprintf(stderr, "[racon::Overlap::transmute] error: "
"missing sequence with name %s!\n", q_name_.c_str());
exit(1);
}
} else {
if (!transmuteId(id_to_id, q_id_ << 1 | 0, q_id_)) {
fprintf(stderr, "[racon::Overlap::transmute] error: "
"missing sequence with id %zu!\n", q_id_);
exit(1);
is_valid_ = false;
return;
}
std::string().swap(q_name_);
} else if (!transmuteId(id_to_id, q_id_ << 1 | 0, q_id_)) {
is_valid_ = false;
return;
}

if (q_length_ != sequences[q_id_]->data().size()) {
fprintf(stderr, "[racon::overlap::find_breaking_points] error: "
"unequal lengths in sequence and overlap file for sequence %s!\n",
sequences[q_id_]->name().c_str());
exit(1);
}

if (!t_name_.empty()) {
if (!transmuteId(name_to_id, t_name_ + "t", t_id_)) {
fprintf(stderr, "[racon::Overlap::transmute] error: "
"missing target sequence with name %s!\n", t_name_.c_str());
exit(1);
}
} else {
if (!transmuteId(id_to_id, t_id_ << 1 | 1, t_id_)) {
fprintf(stderr, "[racon::Overlap::transmute] error: "
"missing sequence with id %zu!\n", t_id_);
exit(1);
is_valid_ = false;
return;
}
std::string().swap(t_name_);
} else if (!transmuteId(id_to_id, t_id_ << 1 | 1, t_id_)) {
is_valid_ = false;
return;
}

if (t_length_ != 0 && t_length_ != sequences[t_id_]->data().size()) {
fprintf(stderr, "[racon::overlap::find_breaking_points] error: "
"unequal lengths in target and overlap file for target %s!\n",
sequences[t_id_]->name().c_str());
exit(1);
}

// for SAM input
t_length_ = sequences[t_id_]->data().size();

is_transmuted_ = true;
}

Expand All @@ -183,16 +190,6 @@ void Overlap::find_breaking_points(const std::vector<std::unique_ptr<Sequence>>&
return;
}

if (q_length_ != sequences[q_id_]->data().size() &&
q_length_ != sequences[q_id_]->reverse_complement().size()) {

fprintf(stderr, "[racon::overlap::find_breaking_points] error: "
"mismatched sequence lengths in sequence and overlap file!\n");
exit(1);
}

t_length_ = sequences[t_id_]->data().size();

if (cigar_.empty()) {
// align overlaps with edlib
const char* q = !strand_ ? &(sequences[q_id_]->data()[q_begin_]) :
Expand Down
3 changes: 2 additions & 1 deletion src/overlap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ class Overlap {
return is_valid_;
}

void transmute(const std::unordered_map<std::string, uint64_t>& name_to_id,
void transmute(const std::vector<std::unique_ptr<Sequence>>& sequences,
const std::unordered_map<std::string, uint64_t>& name_to_id,
const std::unordered_map<uint64_t, uint64_t>& id_to_id);

uint32_t length() const {
Expand Down
23 changes: 8 additions & 15 deletions src/polisher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,26 +202,19 @@ void Polisher::initialize() {

auto it = name_to_id.find(sequences_[i]->name() + "t");
if (it != name_to_id.end()) {
uint64_t j = it->second;
if (j >= targets_size) {
fprintf(stderr, "[racon::Polisher::initialize] error: "
"duplicate sequence %s in file\n",
sequences_[i]->name().c_str());
exit(1);
}
if (sequences_[i]->data().size() != sequences_[j]->data().size() ||
sequences_[i]->quality().size() != sequences_[j]->quality().size()) {
if (sequences_[i]->data().size() != sequences_[it->second]->data().size() ||
sequences_[i]->quality().size() != sequences_[it->second]->quality().size()) {

fprintf(stderr, "[racon::Polisher::initialize] error: "
"duplicate sequence %s with unequal data\n",
sequences_[i]->name().c_str());
exit(1);
}

name_to_id[sequences_[i]->name() + "q"] = j;
id_to_id[sequences_size << 1 | 0] = j;
name_to_id[sequences_[i]->name() + "q"] = it->second;
id_to_id[sequences_size << 1 | 0] = it->second;

duplicate_sequences.insert(j);
duplicate_sequences.insert(it->second);
sequences_[i].reset();
++n;
} else {
Expand Down Expand Up @@ -286,11 +279,12 @@ void Polisher::initialize() {

uint64_t c = l;
for (uint64_t i = l; i < overlaps.size(); ++i) {
overlaps[i]->transmute(sequences_, name_to_id, id_to_id);

if (!overlaps[i]->is_valid()) {
overlaps[i].reset();
continue;
}
overlaps[i]->transmute(name_to_id, id_to_id);

while (overlaps[c] == nullptr) {
++c;
Expand Down Expand Up @@ -323,8 +317,6 @@ void Polisher::initialize() {
}
}

fprintf(stderr, "[racon::Polisher::initialize] loaded batch of overlaps\n");

uint64_t n = shrinkToFit(overlaps, l);
l = c - n;

Expand All @@ -341,6 +333,7 @@ void Polisher::initialize() {
"empty overlap set!\n");
exit(1);
}
fprintf(stderr, "[racon::Polisher::initialize] loaded overlaps\n");

std::vector<std::future<void>> thread_futures;
for (uint64_t i = 0; i < sequences_.size(); ++i) {
Expand Down
28 changes: 14 additions & 14 deletions test/racon_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class RaconPolishingTest: public ::testing::Test {
uint32_t window_length, double quality_threshold, double error_threshold,
int8_t match,int8_t mismatch, int8_t gap) {

polisher = createPolisher(sequences_path, overlaps_path, target_path,
polisher = racon::createPolisher(sequences_path, overlaps_path, target_path,
type, window_length, quality_threshold, error_threshold, match,
mismatch, gap, 4);
}
Expand All @@ -51,33 +51,33 @@ class RaconPolishingTest: public ::testing::Test {
std::unique_ptr<racon::Polisher> polisher;
};

TEST(RaconTest, PolisherTypeError) {
EXPECT_DEATH((createPolisher("", "", "", static_cast<racon::PolisherType>(3),
TEST(RaconInitializeTest, PolisherTypeError) {
EXPECT_DEATH((racon::createPolisher("", "", "", static_cast<racon::PolisherType>(3),
0, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: invalid polisher"
" type!");
}

TEST(RaconTest, WindowLengthError) {
EXPECT_DEATH((createPolisher("", "", "", racon::PolisherType::kC, 0, 0, 0, 0,
0, 0, 0)), ".racon::createPolisher. error: invalid window length!");
TEST(RaconInitializeTest, WindowLengthError) {
EXPECT_DEATH((racon::createPolisher("", "", "", racon::PolisherType::kC, 0,
0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: invalid window length!");
}

TEST(RaconTest, SequencesPathExtensionError) {
EXPECT_DEATH((createPolisher("", "", "", racon::PolisherType::kC, 500, 0,
0, 0, 0, 0, 0)), ".racon::createPolisher. error: file has unsupported "
TEST(RaconInitializeTest, SequencesPathExtensionError) {
EXPECT_DEATH((racon::createPolisher("", "", "", racon::PolisherType::kC, 500,
0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: file has unsupported "
"format extension .valid extensions: .fasta, .fasta.gz, .fa, .fa.gz, "
".fastq, .fastq.gz, .fq, .fq.gz.!");
}

TEST(RaconTest, OverlapsPathExtensionError) {
EXPECT_DEATH((createPolisher(racon_test_data_path + "sample_reads.fastq.gz", "",
"", racon::PolisherType::kC, 500, 0, 0, 0, 0, 0, 0)),
TEST(RaconInitializeTest, OverlapsPathExtensionError) {
EXPECT_DEATH((racon::createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
"", "", racon::PolisherType::kC, 500, 0, 0, 0, 0, 0, 0)),
".racon::createPolisher. error: file has unsupported format extension "
".valid extensions: .mhap, .mhap.gz, .paf, .paf.gz, .sam, .sam.gz.!");
}

TEST(RaconTest, TargetPathExtensionError) {
EXPECT_DEATH((createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
TEST(RaconInitializeTest, TargetPathExtensionError) {
EXPECT_DEATH((racon::createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
racon_test_data_path + "sample_overlaps.paf.gz", "", racon::PolisherType::kC,
500, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: file has "
"unsupported format extension .valid extensions: .fasta, .fasta.gz, .fa,"
Expand Down

0 comments on commit a6ddd48

Please sign in to comment.