Skip to content

Commit

Permalink
Fix large mmcif (#117)
Browse files Browse the repository at this point in the history
* use int64 for residue numbers

* bump version
  • Loading branch information
cbaakman authored Aug 21, 2019
1 parent 2a720b7 commit f9b7a0e
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 54 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ([2.69])
AC_INIT([xssp], [3.0.9], [Coos.Baakman@radboudumc.nl])
AC_INIT([xssp], [3.0.10], [Coos.Baakman@radboudumc.nl])
AM_INIT_AUTOMAKE([subdir-objects])
AC_CONFIG_SRCDIR([config.h.in])
AC_CONFIG_HEADERS([config.h])
Expand Down
10 changes: 5 additions & 5 deletions src/align-2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,24 @@ struct entry
, m_seq(seq)
, m_weight(weight) {}

uint32 nr() const { return m_nr; }
int64 nr() const { return m_nr; }
float weight() const { return m_weight; }
uint32 length() const { return static_cast<uint32>(m_seq.length()); }

void insert_gap(uint32 pos);
void insert_gap(int64 pos);
void append_gap();

void remove_gap(uint32 pos);
void remove_gap(int64 pos);

void remove_gaps();
void dump_positions() { m_positions.clear(); }

uint32 m_nr;
int64 m_nr;
std::string m_id;
sequence m_seq;
sec_structure m_ss;
float m_weight;
std::vector<int16> m_positions;
std::vector<int64> m_positions;
};

// --------------------------------------------------------------------
Expand Down
38 changes: 19 additions & 19 deletions src/blast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const uint32
kGappedDropOffFinal = 25,
kGapTrigger = 22,

kMaxSequenceLength = std::numeric_limits<uint16>::max();
kMaxSequenceLength = std::numeric_limits<uint32>::max();

const int32
kHitWindow = 40;
Expand Down Expand Up @@ -826,8 +826,8 @@ class WordHitIterator

struct Entry
{
uint16 mCount;
uint16 mDataOffset;
uint32 mCount;
uint32 mDataOffset;
};

public:
Expand All @@ -838,7 +838,7 @@ class WordHitIterator
struct WordHitIteratorStaticData
{
std::vector<Entry> mLookup;
std::vector<uint16> mOffsets;
std::vector<uint32> mOffsets;
};

WordHitIterator(const WordHitIteratorStaticData& inStaticData)
Expand All @@ -858,19 +858,19 @@ class WordHitIterator
WordHitIteratorStaticData& outStaticData);

void Reset(const sequence& inTarget);
bool Next(uint16& outQueryOffset, uint16& outTargetOffset);
bool Next(uint32& outQueryOffset, uint32& outTargetOffset);
uint32 Index() const { return mIndex; }

private:

const uint8* mTargetCurrent;
const uint8* mTargetEnd;
uint16 mTargetOffset;
uint32 mTargetOffset;
const std::vector<Entry>& mLookup;
const std::vector<uint16>& mOffsets;
const std::vector<uint32>& mOffsets;
uint32 mIndex;
const uint16* mOffset;
uint16 mCount;
const uint32* mOffset;
uint32 mCount;
};

template<> const uint32 WordHitIterator<2>::kMask = 0x0001F;
Expand All @@ -886,9 +886,9 @@ void WordHitIterator<WORDSIZE>::Init(const sequence& inQuery,
uint64 N = IWord::kMaxWordIndex;
size_t M = 0;

std::vector<std::vector<uint16>> test(N);
std::vector<std::vector<uint32>> test(N);

for (uint16 i = 0; i < inQuery.length() - WORDSIZE + 1; ++i)
for (uint32 i = 0; i < inQuery.length() - WORDSIZE + 1; ++i)
{
IWord w(inQuery.c_str() + i);

Expand All @@ -903,14 +903,14 @@ void WordHitIterator<WORDSIZE>::Init(const sequence& inQuery,
}

outStaticData.mLookup = std::vector<Entry>(N);
outStaticData.mOffsets = std::vector<uint16>(M);
outStaticData.mOffsets = std::vector<uint32>(M);

uint16* data = &outStaticData.mOffsets[0];
uint32* data = &outStaticData.mOffsets[0];

for (uint32 i = 0; i < N; ++i)
{
outStaticData.mLookup[i].mCount = static_cast<uint16>(test[i].size());
outStaticData.mLookup[i].mDataOffset = static_cast<uint16>(
outStaticData.mLookup[i].mCount = static_cast<uint32>(test[i].size());
outStaticData.mLookup[i].mDataOffset = static_cast<uint32>(
data - &outStaticData.mOffsets[0]);

for (uint32 j = 0; j < outStaticData.mLookup[i].mCount; ++j)
Expand Down Expand Up @@ -940,8 +940,8 @@ void WordHitIterator<WORDSIZE>::Reset(const sequence& inTarget)
}

template<int WORDSIZE>
bool WordHitIterator<WORDSIZE>::Next(uint16& outQueryOffset,
uint16& outTargetOffset)
bool WordHitIterator<WORDSIZE>::Next(uint32& outQueryOffset,
uint32& outTargetOffset)
{
bool result = false;

Expand Down Expand Up @@ -999,7 +999,7 @@ struct DiagonalStartTable
std::fill(mTable, mTable + n, -inTargetLength);
}

int32& operator()(uint16 inQueryOffset, uint16 inTargetOffset)
int32& operator()(uint32 inQueryOffset, uint32 inTargetOffset)
{
return mTable[mTargetLength - inTargetOffset + inQueryOffset];
}
Expand Down Expand Up @@ -1501,7 +1501,7 @@ void BlastQuery<WORDSIZE>::SearchPart(const char* inFasta, size_t inLength,
iter.Reset(target);
diagonals.Reset(queryLength, static_cast<int32>(target.length()));

uint16 queryOffset, targetOffset;
uint32 queryOffset, targetOffset;
while (iter.Next(queryOffset, targetOffset))
{
++hitsToDb;
Expand Down
8 changes: 5 additions & 3 deletions src/dssp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ std::string ResidueToDSSPLine(const MResidue& residue)
char chirality;
std::tie(alpha,chirality) = residue.Alpha();

uint32 bp[2] = {};
int64 bp[2] = {};
char bridgelabel[2] = { ' ', ' ' };
for (uint32 i = 0; i < 2; ++i)
{
Expand Down Expand Up @@ -107,13 +107,13 @@ std::string ResidueToDSSPLine(const MResidue& residue)

if (acceptors[i].residue != nullptr)
{
int32 d = acceptors[i].residue->GetNumber() - residue.GetNumber();
int64 d = acceptors[i].residue->GetNumber() - residue.GetNumber();
NHO[i] = (boost::format("%d,%3.1f") % d % acceptors[i].energy).str();
}

if (donors[i].residue != nullptr)
{
int32 d = donors[i].residue->GetNumber() - residue.GetNumber();
int64 d = donors[i].residue->GetNumber() - residue.GetNumber();
ONH[i] = (boost::format("%d,%3.1f") % d % donors[i].energy).str();
}
}
Expand Down Expand Up @@ -229,7 +229,9 @@ void WriteDSSP(MProtein& protein, std::ostream& os)
foreach (const MChain* chain, protein.GetChains())
{
foreach (const MResidue* residue, chain->GetResidues())
{
residues.push_back(residue);
}
}

// keep residues sorted by residue number as assigned during reading the PDB file
Expand Down
8 changes: 4 additions & 4 deletions src/hssp-nt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ float calculateDistance(const sequence& a, const sequence& b)
M += B(x - 1, y - 1);

float s;
uint16 i = 0;
uint32 i = 0;
if (a[x] == b[y])
i = 1;

Expand Down Expand Up @@ -160,8 +160,8 @@ struct MResInfo
uint8 m_letter;
std::string m_chain_id,
m_auth_chain_id;
uint32 m_seq_nr;
int32 m_pdb_nr;
int64 m_seq_nr;
int64 m_pdb_nr;
std::string m_ins_code;
MSecondaryStructure m_ss;
std::string m_dssp;
Expand Down Expand Up @@ -372,7 +372,7 @@ MProfile::MProfile(const MChain& inChain, const sequence& inSequence,
const std::vector<MResidue*>& residues = m_chain.GetResidues();
std::vector<MResidue*>::const_iterator ri = residues.begin();

uint32 seq_nr = 1;
int64 seq_nr = 1;
for (uint32 i = 0; i < inSequence.length(); ++i)
{
assert(ri != residues.end());
Expand Down
32 changes: 18 additions & 14 deletions src/structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,7 @@ void MChain::WritePDB(std::ostream& os)
os << (ter % (last->GetCAlpha().mSerial + 1) % kResidueInfo[last->GetType()].name % mChainID % last->GetNumber() % ' ') << std::endl;
}

const MResidue* MChain::GetResidueBySeqNumber(uint16 inSeqNumber,
const MResidue* MChain::GetResidueBySeqNumber(int64 inSeqNumber,
const std::string& inInsertionCode) const
{
const auto r = find_if(mResidues.begin(), mResidues.end(),
Expand All @@ -1002,7 +1002,7 @@ void MChain::GetSequence(std::string& outSequence) const
struct MResidueID
{
std::string chain;
uint16 seqNumber;
int64 seqNumber;
std::string insertionCode;

bool operator<(const MResidueID& o) const
Expand Down Expand Up @@ -1160,12 +1160,12 @@ void MProtein::ReadPDB(std::istream& is, bool cAlphaOnly)
std::pair<MResidueID,MResidueID> ssbond;

ssbond.first.chain = line[15];
ssbond.first.seqNumber = boost::lexical_cast<uint16>(
ssbond.first.seqNumber = boost::lexical_cast<int64>(
ba::trim_copy(line.substr(16, 5)));
ssbond.first.insertionCode = line[21];

ssbond.second.chain = line[29];
ssbond.second.seqNumber = boost::lexical_cast<uint16>(
ssbond.second.seqNumber = boost::lexical_cast<int64>(
ba::trim_copy(line.substr(30, 5)));
ssbond.second.insertionCode = line[35];

Expand Down Expand Up @@ -1220,7 +1220,7 @@ void MProtein::ReadPDB(std::istream& is, bool cAlphaOnly)
atom.mChainID = line[21];
atom.mAuthChainID = atom.mChainID;
// 23 - 26 Integer resSeq Residue sequence number.
atom.mResSeq = boost::lexical_cast<int16>(
atom.mResSeq = boost::lexical_cast<int64>(
ba::trim_copy(line.substr(22, 4)));
// 27 AChar iCode Code for insertion of residues.
atom.mICode = line.substr(26, 1);
Expand Down Expand Up @@ -1432,7 +1432,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly)
continue;

ssbond.first.chain = ss["ptnr1_label_asym_id"];
ssbond.first.seqNumber = boost::lexical_cast<uint16>(
ssbond.first.seqNumber = boost::lexical_cast<int64>(
ss["ptnr1_label_seq_id"]);
ssbond.first.insertionCode = ss["pdbx_ptnr1_PDB_ins_code"];
if (ssbond.first.insertionCode == "?")
Expand All @@ -1442,7 +1442,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly)
continue;

ssbond.second.chain = ss["ptnr2_label_asym_id"];
ssbond.second.seqNumber = boost::lexical_cast<uint16>(
ssbond.second.seqNumber = boost::lexical_cast<int64>(
ss["ptnr2_label_seq_id"]);
ssbond.second.insertionCode = ss["pdbx_ptnr2_PDB_ins_code"];
if (ssbond.second.insertionCode == "?")
Expand All @@ -1455,7 +1455,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly)
char firstAltLoc = 0;

// remap label_seq_id to auth_seq_id
std::map<std::string, std::map<int,int> > seq_id_map;
std::map<std::string, std::map<int64,int64> > seq_id_map;

bool hasModelNum = false;
int modelNum = 0;
Expand Down Expand Up @@ -1488,7 +1488,7 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly)
if (label_seq_id == "?" or label_seq_id == ".")
seq_id_map[a.mChainID][a.mResSeq] = a.mResSeq;
else
seq_id_map[a.mChainID][boost::lexical_cast<int16>(label_seq_id)] = a.mResSeq;
seq_id_map[a.mChainID][boost::lexical_cast<int64>(label_seq_id)] = a.mResSeq;

a.mLoc.mX = ParseFloat(atom["Cartn_x"]);
a.mLoc.mY = ParseFloat(atom["Cartn_y"]);
Expand Down Expand Up @@ -1545,7 +1545,9 @@ void MProtein::ReadmmCIF(std::istream& is, bool cAlphaOnly)
}

if (not atoms.empty())
{
AddResidue(atoms);
}

// map the sulfur bridges
uint32 ssbondNr = 1;
Expand Down Expand Up @@ -1647,7 +1649,7 @@ void MProtein::GetStatistics(uint32& outNrOfResidues, uint32& outNrOfChains,
if (donor[i].residue != nullptr and donor[i].energy < kMaxHBondEnergy)
{
++outNrOfHBonds;
int32 k = donor[i].residue->GetNumber() - r->GetNumber();
int64 k = donor[i].residue->GetNumber() - r->GetNumber();
if (k >= -5 and k <= 5)
outNrOfHBondsPerDistance[k + 5] += 1;
}
Expand Down Expand Up @@ -1725,7 +1727,7 @@ void MProtein::AddResidue(const std::vector<MAtom>& inAtoms)
if (not residues.empty())
prev = residues.back();

int32 resNumber = mResidueCount + mChains.size() + mChainBreaks;
int64 resNumber = mResidueCount + mChains.size() + mChainBreaks;
MResidue* r = new MResidue(resNumber, prev, inAtoms);
// check for chain breaks
if (prev != nullptr and not prev->ValidDistance(*r))
Expand Down Expand Up @@ -1794,8 +1796,10 @@ void MProtein::CalculateSecondaryStructure(bool inPreferPiHelices)
std::vector<MResidue*> residues;
residues.reserve(mResidueCount);
foreach (const MChain* chain, mChains)
{
residues.insert(residues.end(), chain->GetResidues().begin(),
chain->GetResidues().end());
}

if (VERBOSE)
std::cerr << "using " << residues.size() << " residues" << std::endl;
Expand Down Expand Up @@ -2254,7 +2258,7 @@ void MProtein::SetChain(const std::string& inChainID, const MChain& inChain)

// Non-const overload, implemented in terms of the const overload
MResidue* MProtein::GetResidue(const std::string& inChainID,
uint16 inSeqNumber,
int64 inSeqNumber,
const std::string& inInsertionCode)
{
return const_cast<MResidue *>( static_cast<const MProtein &>( *this ).GetResidue(
Expand All @@ -2266,7 +2270,7 @@ MResidue* MProtein::GetResidue(const std::string& inChainID,

// Const overload
const MResidue* MProtein::GetResidue(const std::string& inChainID,
uint16 inSeqNumber,
int64 inSeqNumber,
const std::string& inInsertionCode) const
{
const MChain& chain = GetChain(inChainID);
Expand All @@ -2287,7 +2291,7 @@ void MProtein::GetCAlphaLocations(const std::string& inChainID,
}

MPoint MProtein::GetCAlphaPosition(const std::string& inChainID,
int16 inPDBResSeq) const
int64 inPDBResSeq) const
{
std::string chainID = inChainID;
if (chainID.empty())
Expand Down
Loading

0 comments on commit f9b7a0e

Please sign in to comment.