Skip to content

Commit

Permalink
Merge pull request #298 from ChaSooyoung/master
Browse files Browse the repository at this point in the history
Fix .gz extensions in createdb
  • Loading branch information
martin-steinegger authored Jun 27, 2024
2 parents ca58f9b + 5441777 commit eec1092
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions src/strucclustutils/structcreatedb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ std::string removeModel(const std::string& input) {
size_t
writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, StructureTo3Di & structureTo3Di,
PulchraWrapper & pulchra, std::vector<char> & alphabet3di, std::vector<char> & alphabetAA,
std::vector<int8_t> & camol, std::string & header, std::string & name,
std::vector<int8_t> & camol, std::string & header,
DBWriter & aadbw, DBWriter & hdbw, DBWriter & torsiondbw, DBWriter & cadbw, int chainNameMode,
float maskBfactorThreshold, size_t & tooShort, size_t & notProtein, size_t & globalCnt, int thread_idx, int coordStoreMode,
std::string & filename, size_t & fileidCnt,
Expand Down Expand Up @@ -161,13 +161,18 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru
torsiondbw.writeData(alphabet3di.data(), alphabet3di.size(), dbKey, thread_idx);
aadbw.writeData(alphabetAA.data(), alphabetAA.size(), dbKey, thread_idx);
header.clear();
header.append(Util::remove_extension(readStructure.names[ch]));
if (Util::endsWith(".gz", readStructure.names[ch])){
header.append(Util::remove_extension(Util::remove_extension(readStructure.names[ch])));
}
else{
header.append(Util::remove_extension(readStructure.names[ch]));
}
if(readStructure.modelCount > 1){
header.append("_MODEL_");
header.append(std::to_string(readStructure.modelIndices[ch]));
}
if(chainNameMode == LocalParameters::CHAIN_MODE_ADD ||
(chainNameMode == LocalParameters::CHAIN_MODE_AUTO && readStructure.names.size() > 1)){
(chainNameMode == LocalParameters::CHAIN_MODE_AUTO && readStructure.names.size() > 1)){
header.push_back('_');
header.append(readStructure.chainNames[ch]);
}
Expand All @@ -179,20 +184,24 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru
std::string entryName = Util::parseFastaHeader(header.c_str());
#pragma omp critical
{
std::map<std::string, size_t>::iterator it = filenameToFileId.find(Util::remove_extension(filename));
std::string filenameWithExtension = filename;
if (Util::endsWith(".gz", filename)){
filenameWithExtension = Util::remove_extension(filename);
}
std::string filenameWithoutExtension = Util::remove_extension(filenameWithExtension);
std::map<std::string, size_t>::iterator it = filenameToFileId.find(filenameWithoutExtension);
size_t fileid;
if (it != filenameToFileId.end()) {
fileid = it->second;
} else {
fileid = fileidCnt;
filenameToFileId[Util::remove_extension(filename)] = fileid;
fileIdToName[fileid] = Util::remove_extension(filename);
filenameToFileId[filenameWithoutExtension] = fileid;
fileIdToName[fileid] = filenameWithoutExtension;
fileidCnt++;
}
entrynameToFileId[entryName] = std::make_pair(fileid, readStructure.modelIndices[ch]);
}
hdbw.writeData(header.c_str(), header.size(), dbKey, thread_idx);
name.clear();

if (mappingWriter != NULL) {
std::string taxId = SSTR(readStructure.taxIds[ch]);
Expand Down Expand Up @@ -636,10 +645,9 @@ int structcreatedb(int argc, const char **argv, const Command& command) {
}

__sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1));

writeStructureEntry(
mat, readStructure, structureTo3Di, pulchra,
alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw,
alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw,
par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode,
name, globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName,
mappingWriter
Expand Down Expand Up @@ -682,7 +690,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) {
// clear memory
writeStructureEntry(
mat, readStructure, structureTo3Di, pulchra,
alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw,
alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw,
par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode,
looseFiles[i], globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName,
mappingWriter
Expand Down Expand Up @@ -745,7 +753,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) {
__sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1));
writeStructureEntry(
mat, readStructure, structureTo3Di, pulchra,
alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw,
alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw,
par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode,
obj_name, globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName,
mappingWriter
Expand Down Expand Up @@ -796,7 +804,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) {
__sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1));
writeStructureEntry(
mat, readStructure, structureTo3Di, pulchra,
alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw,
alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw,
par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode,
dbname, globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName,
mappingWriter
Expand Down

0 comments on commit eec1092

Please sign in to comment.