diff --git a/configure.ac b/configure.ac index 199f423..d852eea 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.69]) -AC_INIT([xssp], [3.0.9], [Coos.Baakman@radboudumc.nl]) +AC_INIT([xssp], [3.0.10], [Coos.Baakman@radboudumc.nl]) AM_INIT_AUTOMAKE([subdir-objects]) AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_HEADERS([config.h]) diff --git a/src/align-2d.h b/src/align-2d.h index 658734c..fcb9e9b 100644 --- a/src/align-2d.h +++ b/src/align-2d.h @@ -39,24 +39,24 @@ struct entry , m_seq(seq) , m_weight(weight) {} - uint32 nr() const { return m_nr; } + int64 nr() const { return m_nr; } float weight() const { return m_weight; } uint32 length() const { return static_cast(m_seq.length()); } - void insert_gap(uint32 pos); + void insert_gap(int64 pos); void append_gap(); - void remove_gap(uint32 pos); + void remove_gap(int64 pos); void remove_gaps(); void dump_positions() { m_positions.clear(); } - uint32 m_nr; + int64 m_nr; std::string m_id; sequence m_seq; sec_structure m_ss; float m_weight; - std::vector m_positions; + std::vector m_positions; }; // -------------------------------------------------------------------- diff --git a/src/blast.cpp b/src/blast.cpp index 20beca6..42d6fd3 100644 --- a/src/blast.cpp +++ b/src/blast.cpp @@ -41,7 +41,7 @@ const uint32 kGappedDropOffFinal = 25, kGapTrigger = 22, - kMaxSequenceLength = std::numeric_limits::max(); + kMaxSequenceLength = std::numeric_limits::max(); const int32 kHitWindow = 40; @@ -826,8 +826,8 @@ class WordHitIterator struct Entry { - uint16 mCount; - uint16 mDataOffset; + uint32 mCount; + uint32 mDataOffset; }; public: @@ -838,7 +838,7 @@ class WordHitIterator struct WordHitIteratorStaticData { std::vector mLookup; - std::vector mOffsets; + std::vector mOffsets; }; WordHitIterator(const WordHitIteratorStaticData& inStaticData) @@ -858,19 +858,19 @@ class WordHitIterator WordHitIteratorStaticData& outStaticData); void Reset(const sequence& inTarget); - bool Next(uint16& outQueryOffset, uint16& outTargetOffset); + bool Next(uint32& outQueryOffset, uint32& outTargetOffset); uint32 Index() const { return mIndex; } private: const uint8* mTargetCurrent; const uint8* mTargetEnd; - uint16 mTargetOffset; + uint32 mTargetOffset; const std::vector& mLookup; - const std::vector& mOffsets; + const std::vector& mOffsets; uint32 mIndex; - const uint16* mOffset; - uint16 mCount; + const uint32* mOffset; + uint32 mCount; }; template<> const uint32 WordHitIterator<2>::kMask = 0x0001F; @@ -886,9 +886,9 @@ void WordHitIterator::Init(const sequence& inQuery, uint64 N = IWord::kMaxWordIndex; size_t M = 0; - std::vector> test(N); + std::vector> test(N); - for (uint16 i = 0; i < inQuery.length() - WORDSIZE + 1; ++i) + for (uint32 i = 0; i < inQuery.length() - WORDSIZE + 1; ++i) { IWord w(inQuery.c_str() + i); @@ -903,14 +903,14 @@ void WordHitIterator::Init(const sequence& inQuery, } outStaticData.mLookup = std::vector(N); - outStaticData.mOffsets = std::vector(M); + outStaticData.mOffsets = std::vector(M); - uint16* data = &outStaticData.mOffsets[0]; + uint32* data = &outStaticData.mOffsets[0]; for (uint32 i = 0; i < N; ++i) { - outStaticData.mLookup[i].mCount = static_cast(test[i].size()); - outStaticData.mLookup[i].mDataOffset = static_cast( + outStaticData.mLookup[i].mCount = static_cast(test[i].size()); + outStaticData.mLookup[i].mDataOffset = static_cast( data - &outStaticData.mOffsets[0]); for (uint32 j = 0; j < outStaticData.mLookup[i].mCount; ++j) @@ -940,8 +940,8 @@ void WordHitIterator::Reset(const sequence& inTarget) } template -bool WordHitIterator::Next(uint16& outQueryOffset, - uint16& outTargetOffset) +bool WordHitIterator::Next(uint32& outQueryOffset, + uint32& outTargetOffset) { bool result = false; @@ -999,7 +999,7 @@ struct DiagonalStartTable std::fill(mTable, mTable + n, -inTargetLength); } - int32& operator()(uint16 inQueryOffset, uint16 inTargetOffset) + int32& operator()(uint32 inQueryOffset, uint32 inTargetOffset) { return mTable[mTargetLength - inTargetOffset + inQueryOffset]; } @@ -1501,7 +1501,7 @@ void BlastQuery::SearchPart(const char* inFasta, size_t inLength, iter.Reset(target); diagonals.Reset(queryLength, static_cast(target.length())); - uint16 queryOffset, targetOffset; + uint32 queryOffset, targetOffset; while (iter.Next(queryOffset, targetOffset)) { ++hitsToDb; diff --git a/src/dssp.cpp b/src/dssp.cpp index 39a1985..74b2a8f 100644 --- a/src/dssp.cpp +++ b/src/dssp.cpp @@ -79,7 +79,7 @@ std::string ResidueToDSSPLine(const MResidue& residue) char chirality; std::tie(alpha,chirality) = residue.Alpha(); - uint32 bp[2] = {}; + int64 bp[2] = {}; char bridgelabel[2] = { ' ', ' ' }; for (uint32 i = 0; i < 2; ++i) { @@ -107,13 +107,13 @@ std::string ResidueToDSSPLine(const MResidue& residue) if (acceptors[i].residue != nullptr) { - int32 d = acceptors[i].residue->GetNumber() - residue.GetNumber(); + int64 d = acceptors[i].residue->GetNumber() - residue.GetNumber(); NHO[i] = (boost::format("%d,%3.1f") % d % acceptors[i].energy).str(); } if (donors[i].residue != nullptr) { - int32 d = donors[i].residue->GetNumber() - residue.GetNumber(); + int64 d = donors[i].residue->GetNumber() - residue.GetNumber(); ONH[i] = (boost::format("%d,%3.1f") % d % donors[i].energy).str(); } } @@ -229,7 +229,9 @@ void WriteDSSP(MProtein& protein, std::ostream& os) foreach (const MChain* chain, protein.GetChains()) { foreach (const MResidue* residue, chain->GetResidues()) + { residues.push_back(residue); + } } // keep residues sorted by residue number as assigned during reading the PDB file diff --git a/src/hssp-nt.cpp b/src/hssp-nt.cpp index 5fcfea7..0ba8b30 100644 --- a/src/hssp-nt.cpp +++ b/src/hssp-nt.cpp @@ -103,7 +103,7 @@ float calculateDistance(const sequence& a, const sequence& b) M += B(x - 1, y - 1); float s; - uint16 i = 0; + uint32 i = 0; if (a[x] == b[y]) i = 1; @@ -160,8 +160,8 @@ struct MResInfo uint8 m_letter; std::string m_chain_id, m_auth_chain_id; - uint32 m_seq_nr; - int32 m_pdb_nr; + int64 m_seq_nr; + int64 m_pdb_nr; std::string m_ins_code; MSecondaryStructure m_ss; std::string m_dssp; @@ -372,7 +372,7 @@ MProfile::MProfile(const MChain& inChain, const sequence& inSequence, const std::vector& residues = m_chain.GetResidues(); std::vector::const_iterator ri = residues.begin(); - uint32 seq_nr = 1; + int64 seq_nr = 1; for (uint32 i = 0; i < inSequence.length(); ++i) { assert(ri != residues.end()); diff --git a/src/structure.cpp b/src/structure.cpp index 196c69a..fdf0b32 100644 --- a/src/structure.cpp +++ b/src/structure.cpp @@ -980,7 +980,7 @@ void MChain::WritePDB(std::ostream& os) os << (ter % (last->GetCAlpha().mSerial + 1) % kResidueInfo[last->GetType()].name % mChainID % last->GetNumber() % ' ') << std::endl; } -const MResidue* MChain::GetResidueBySeqNumber(uint16 inSeqNumber, +const MResidue* MChain::GetResidueBySeqNumber(int64 inSeqNumber, const std::string& inInsertionCode) const { const auto r = find_if(mResidues.begin(), mResidues.end(), @@ -1002,7 +1002,7 @@ void MChain::GetSequence(std::string& outSequence) const struct MResidueID { std::string chain; - uint16 seqNumber; + int64 seqNumber; std::string insertionCode; bool operator<(const MResidueID& o) const @@ -1160,12 +1160,12 @@ void MProtein::ReadPDB(std::istream& is, bool cAlphaOnly) std::pair ssbond; ssbond.first.chain = line[15]; - ssbond.first.seqNumber = boost::lexical_cast( + ssbond.first.seqNumber = boost::lexical_cast( ba::trim_copy(line.substr(16, 5))); ssbond.first.insertionCode = line[21]; ssbond.second.chain = line[29]; - ssbond.second.seqNumber = boost::lexical_cast( + ssbond.second.seqNumber = boost::lexical_cast( ba::trim_copy(line.substr(30, 5))); ssbond.second.insertionCode = line[35]; @@ -1220,7 +1220,7 @@ void MProtein::ReadPDB(std::istream& is, bool cAlphaOnly) atom.mChainID = line[21]; atom.mAuthChainID = atom.mChainID; // 23 - 26 Integer resSeq Residue sequence number. - atom.mResSeq = boost::lexical_cast( + atom.mResSeq = boost::lexical_cast( ba::trim_copy(line.substr(22, 4))); // 27 AChar iCode Code for insertion of residues. atom.mICode = line.substr(26, 1); @@ -1432,7 +1432,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly) continue; ssbond.first.chain = ss["ptnr1_label_asym_id"]; - ssbond.first.seqNumber = boost::lexical_cast( + ssbond.first.seqNumber = boost::lexical_cast( ss["ptnr1_label_seq_id"]); ssbond.first.insertionCode = ss["pdbx_ptnr1_PDB_ins_code"]; if (ssbond.first.insertionCode == "?") @@ -1442,7 +1442,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly) continue; ssbond.second.chain = ss["ptnr2_label_asym_id"]; - ssbond.second.seqNumber = boost::lexical_cast( + ssbond.second.seqNumber = boost::lexical_cast( ss["ptnr2_label_seq_id"]); ssbond.second.insertionCode = ss["pdbx_ptnr2_PDB_ins_code"]; if (ssbond.second.insertionCode == "?") @@ -1455,7 +1455,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly) char firstAltLoc = 0; // remap label_seq_id to auth_seq_id - std::map > seq_id_map; + std::map > seq_id_map; bool hasModelNum = false; int modelNum = 0; @@ -1488,7 +1488,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly) if (label_seq_id == "?" or label_seq_id == ".") seq_id_map[a.mChainID][a.mResSeq] = a.mResSeq; else - seq_id_map[a.mChainID][boost::lexical_cast(label_seq_id)] = a.mResSeq; + seq_id_map[a.mChainID][boost::lexical_cast(label_seq_id)] = a.mResSeq; a.mLoc.mX = ParseFloat(atom["Cartn_x"]); a.mLoc.mY = ParseFloat(atom["Cartn_y"]); @@ -1545,7 +1545,9 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly) } if (not atoms.empty()) + { AddResidue(atoms); + } // map the sulfur bridges uint32 ssbondNr = 1; @@ -1647,7 +1649,7 @@ void MProtein::GetStatistics(uint32& outNrOfResidues, uint32& outNrOfChains, if (donor[i].residue != nullptr and donor[i].energy < kMaxHBondEnergy) { ++outNrOfHBonds; - int32 k = donor[i].residue->GetNumber() - r->GetNumber(); + int64 k = donor[i].residue->GetNumber() - r->GetNumber(); if (k >= -5 and k <= 5) outNrOfHBondsPerDistance[k + 5] += 1; } @@ -1725,7 +1727,7 @@ void MProtein::AddResidue(const std::vector& inAtoms) if (not residues.empty()) prev = residues.back(); - int32 resNumber = mResidueCount + mChains.size() + mChainBreaks; + int64 resNumber = mResidueCount + mChains.size() + mChainBreaks; MResidue* r = new MResidue(resNumber, prev, inAtoms); // check for chain breaks if (prev != nullptr and not prev->ValidDistance(*r)) @@ -1794,8 +1796,10 @@ void MProtein::CalculateSecondaryStructure(bool inPreferPiHelices) std::vector residues; residues.reserve(mResidueCount); foreach (const MChain* chain, mChains) + { residues.insert(residues.end(), chain->GetResidues().begin(), chain->GetResidues().end()); + } if (VERBOSE) std::cerr << "using " << residues.size() << " residues" << std::endl; @@ -2254,7 +2258,7 @@ void MProtein::SetChain(const std::string& inChainID, const MChain& inChain) // Non-const overload, implemented in terms of the const overload MResidue* MProtein::GetResidue(const std::string& inChainID, - uint16 inSeqNumber, + int64 inSeqNumber, const std::string& inInsertionCode) { return const_cast( static_cast( *this ).GetResidue( @@ -2266,7 +2270,7 @@ MResidue* MProtein::GetResidue(const std::string& inChainID, // Const overload const MResidue* MProtein::GetResidue(const std::string& inChainID, - uint16 inSeqNumber, + int64 inSeqNumber, const std::string& inInsertionCode) const { const MChain& chain = GetChain(inChainID); @@ -2287,7 +2291,7 @@ void MProtein::GetCAlphaLocations(const std::string& inChainID, } MPoint MProtein::GetCAlphaPosition(const std::string& inChainID, - int16 inPDBResSeq) const + int64 inPDBResSeq) const { std::string chainID = inChainID; if (chainID.empty()) diff --git a/src/structure.h b/src/structure.h index 849729d..c82a6ba 100644 --- a/src/structure.h +++ b/src/structure.h @@ -225,11 +225,11 @@ class MResidue // bridge functions MBridgeType TestBridge(MResidue* inResidue) const; - int16 GetSeqNumber() const { return mSeqNumber; } + int64 GetSeqNumber() const { return mSeqNumber; } std::string GetInsertionCode() const { return mInsertionCode; } - void SetNumber(uint16 inNumber) { mNumber = inNumber; } - uint16 GetNumber() const { return mNumber; } + void SetNumber(int64 inNumber) { mNumber = inNumber; } + int64 GetNumber() const { return mNumber; } void Translate(const MPoint& inTranslation); void Rotate(const MQuaternion& inRotation); @@ -265,7 +265,7 @@ class MResidue std::string mChainID; MResidue* mPrev; MResidue* mNext; - int32 mSeqNumber, mNumber; + int64 mSeqNumber, mNumber; std::string mInsertionCode; MResidueType mType; uint8 mSSBridgeNr; @@ -302,7 +302,7 @@ class MChain std::string GetAuthChainID(void) const; void SetAuthChainID(const std::string &inAuthChainID); - const MResidue* GetResidueBySeqNumber(uint16 inSeqNumber, + const MResidue* GetResidueBySeqNumber(int64 inSeqNumber, const std::string& inInsertionCode) const; void GetSequence(std::string& outSequence) const; @@ -354,7 +354,7 @@ class MProtein void GetCAlphaLocations(const std::string& inChainID, std::vector& outPoints) const; MPoint GetCAlphaPosition(const std::string& inChainID, - int16 inPDBResSeq) const; + int64 inPDBResSeq) const; void GetSequence(const std::string& inChainID, entry& outEntry) const; @@ -384,10 +384,10 @@ class MProtein template void GetSequences(OutputIterator outSequences) const; - MResidue* GetResidue(const std::string& inChainID, uint16 inSeqNumber, + MResidue* GetResidue(const std::string& inChainID, int64 inSeqNumber, const std::string& inInsertionCode); - const MResidue* GetResidue(const std::string& inChainID, uint16 inSeqNumber, + const MResidue* GetResidue(const std::string& inChainID, int64 inSeqNumber, const std::string& inInsertionCode) const; // statistics