Skip to content

Commit

Permalink
fix overflow nodeid and add some progress logging
Browse files Browse the repository at this point in the history
jmonlong committed Mar 29, 2023
1 parent 6828f9e commit e9c431f
Showing 1 changed file with 25 additions and 10 deletions.
35 changes: 25 additions & 10 deletions src/subcommand/gamsort_main.cpp
Original file line number Diff line number Diff line change
@@ -33,10 +33,10 @@ void help_gamsort(char **argv)
struct compare_gaf {
bool operator()(const gafkluge::GafRecord& gaf1, const gafkluge::GafRecord& gaf2) {
// TODO find a way to not have to convert the node ids to string before and then back to int here?
int rk11 = std::stoi(gaf1.opt_fields.find("rk1")->second.second);
int rk12 = std::stoi(gaf2.opt_fields.find("rk1")->second.second);
int rk21 = std::stoi(gaf1.opt_fields.find("rk2")->second.second);
int rk22 = std::stoi(gaf2.opt_fields.find("rk2")->second.second);
long long rk11 = std::stoll(gaf1.opt_fields.find("rk1")->second.second);
long long rk12 = std::stoll(gaf2.opt_fields.find("rk1")->second.second);
long long rk21 = std::stoll(gaf1.opt_fields.find("rk2")->second.second);
long long rk22 = std::stoll(gaf2.opt_fields.find("rk2")->second.second);
return rk11 < rk12 || (rk11 == rk12 && rk21 < rk22);
}
};
@@ -155,6 +155,11 @@ int main_gamsort(int argc, char **argv)
kstring_t s_buffer = KS_INITIALIZE;
gafkluge::GafRecord gaf;

std::string chunk_outf = "temp_gafsort_" + std::to_string(chunk_id) + ".gaf";
if(show_progress){
cerr << "Preparing temporary chunk " << chunk_outf << "..." << endl;
}

while (vg::io::get_next_record_from_gaf(nullptr, nullptr, in, s_buffer, gaf) == true) {
// find the minimum and maximum node IDs
nid_t min_node = std::numeric_limits<nid_t>::max();
@@ -182,33 +187,43 @@ int main_gamsort(int argc, char **argv)
// if we've read enough reads, sort them and write to disk
if(count == chunk_size){
// sort by minimum node id
if(show_progress){
cerr << " Sorting chunk..." << endl;
}
std::stable_sort(current_gaf_chunk.begin(), current_gaf_chunk.end(), compare_gaf());
// write to temp_gafsort_<chunkid>.gaf
std::string chunk_outf = "temp_gafsort_" + std::to_string(chunk_id);
if(show_progress){
cerr << " Writing chunk..." << endl;
}
std::ofstream out_file(chunk_outf);
for (int ii=0; ii<current_gaf_chunk.size(); ii++){
out_file << current_gaf_chunk[ii] << endl;
}
out_file.close();
chunk_files.push_back(chunk_outf);
if(show_progress){
cerr << "Written temporary chunk " << chunk_outf << "..." << endl;
}
// init next chunk
current_gaf_chunk.clear();
count = 0;
chunk_id++;
chunk_outf = "temp_gafsort_" + std::to_string(chunk_id) + ".gaf";
if(show_progress){
cerr << "Preparing temporary chunk " << chunk_outf << "..." << endl;
}
}
}
hts_close(in);

// write the current last chunk too, if it has any reads
if(count > 0){
// sort by minimum node id
if(show_progress){
cerr << " Sorting chunk..." << endl;
}
std::stable_sort(current_gaf_chunk.begin(), current_gaf_chunk.end(), compare_gaf());

// write to temp_gafsort_<chunkid>.gaf
std::string chunk_outf = "temp_gafsort_" + std::to_string(chunk_id) + ".gaf";
if(show_progress){
cerr << " Writing chunk..." << endl;
}
std::ofstream out_file(chunk_outf);
for (int ii=0; ii<current_gaf_chunk.size(); ii++){
out_file << current_gaf_chunk[ii] << endl;

1 comment on commit e9c431f

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch gafidx. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 12983 seconds

Please sign in to comment.