Skip to content

Commit b319a29

Browse files
authored
Add ability to filter out alternate atom locations from PDB (#831)
* Add warnings about alternate residue location identifiers * Add keyword for only keeping certain alternate atom locations * Add keepaltloc for PDB trajin * If not filtering atom alt loc. IDs, give a warning that they are there. * Create a verbose form of pdb_Box that reports the box parameters (old behavior), and a terse form that just reads the box and warns if it looks weird (so CRYST1 output is not repeated). * Add test for alt loc id filtering * Increment revision for PDB alt atom location ID filtering * Add keepaltloc keyword for parm and trajin
1 parent 71ffc69 commit b319a29

File tree

12 files changed

+288
-26
lines changed

12 files changed

+288
-26
lines changed

doc/cpptraj.lyx

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12399,10 +12399,17 @@ reference "tab:cpptraj-Topology-formats"
1239912399

1240012400
\emph on
1240112401
PDB format:
12402+
\begin_inset CommandInset label
12403+
LatexCommand label
12404+
name "subsec:cpptraj-parm-pdb"
12405+
12406+
\end_inset
12407+
12408+
1240212409
\end_layout
1240312410

1240412411
\begin_layout LyX-Code
12405-
[pqr] [readbox] [conect] [noconect] [link] [nolink]
12412+
[pqr] [readbox] [conect] [noconect] [link] [nolink] [keepaltloc <char>]
1240612413
\begin_inset Separator latexpar
1240712414
\end_inset
1240812415

@@ -12436,6 +12443,19 @@ PDB format:
1243612443
[nolink] Do not read LINK records if present (default).
1243712444
\end_layout
1243812445

12446+
\begin_layout Description
12447+
[keepaltloc
12448+
\begin_inset space ~
12449+
\end_inset
12450+
12451+
<char>] If specified, only keep alternate atom location IDs matching the
12452+
specified character
12453+
\series bold
12454+
<char>
12455+
\series default
12456+
.
12457+
\end_layout
12458+
1243912459
\end_deeper
1244012460
\begin_layout Paragraph*
1244112461

@@ -12469,10 +12489,34 @@ If this is the case
1246912489
\shape italic
1247012490
cpptraj
1247112491
\shape default
12472-
will print a warning about duplicate atom names but will take no other
12473-
action.
12492+
will print a warning about alternate location IDs being present but will
12493+
take no other action.
1247412494
Both residues are considered 'CYS' and the mask ':CYS@CA' would select
12475-
both atom 806 and 809.
12495+
both atom 806 and 808.
12496+
If desired, a specific location ID can be kept via the
12497+
\series bold
12498+
keepaltloc
12499+
\series default
12500+
keyword.
12501+
If
12502+
\series bold
12503+
keepaltloc
12504+
\series default
12505+
is specified, it should also be specified for any
12506+
\series bold
12507+
trajin
12508+
\series default
12509+
commands (see
12510+
\begin_inset CommandInset ref
12511+
LatexCommand vref
12512+
reference "subsec:cpptraj-trajin-pdb"
12513+
plural "false"
12514+
caps "false"
12515+
noprefix "false"
12516+
12517+
\end_inset
12518+
12519+
).
1247612520
Residue insertion codes are read in but also not used by the mask parser.
1247712521
\end_layout
1247812522

@@ -15002,6 +15046,64 @@ shape Force reading of box information as shape matrix.
1500215046
\end_layout
1500315047

1500415048
\end_deeper
15049+
\begin_layout Subsubsection
15050+
Options for PDB files:
15051+
\begin_inset CommandInset label
15052+
LatexCommand label
15053+
name "subsec:cpptraj-trajin-pdb"
15054+
15055+
\end_inset
15056+
15057+
15058+
\end_layout
15059+
15060+
\begin_layout LyX-Code
15061+
[keepaltloc <char>]
15062+
\end_layout
15063+
15064+
\begin_deeper
15065+
\begin_layout Description
15066+
[keepaltloc
15067+
\begin_inset space ~
15068+
\end_inset
15069+
15070+
<char>] If specified, only keep alternate atom location IDs matching the
15071+
specified character
15072+
\series bold
15073+
<char>
15074+
\series default
15075+
.
15076+
\end_layout
15077+
15078+
\end_deeper
15079+
\begin_layout Standard
15080+
Note that if
15081+
\series bold
15082+
keepaltloc
15083+
\series default
15084+
is specified, the associated topology should not have alternate location
15085+
IDs, i.e.
15086+
if the topology is from a PDB the
15087+
\series bold
15088+
keepaltloc
15089+
\series default
15090+
keyword may need to be used with the
15091+
\series bold
15092+
parm
15093+
\series default
15094+
command (see
15095+
\begin_inset CommandInset ref
15096+
LatexCommand vref
15097+
reference "subsec:cpptraj-parm-pdb"
15098+
plural "false"
15099+
caps "false"
15100+
noprefix "false"
15101+
15102+
\end_inset
15103+
15104+
).
15105+
\end_layout
15106+
1500515107
\begin_layout Subsection
1500615108
trajout
1500715109
\begin_inset CommandInset label

src/PDBfile.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@ PDBfile::PDB_RECTYPE PDBfile::NextRecord() {
127127
return recType_;
128128
}
129129

130+
/** \return Atom alt. loc. code from PDB ATOM/HETATM line. */
131+
char PDBfile::pdb_AltLoc() const {
132+
return linebuffer_[16];
133+
}
134+
135+
/** \return Atom containing information from PDB ATOM/HETATM line. */
130136
Atom PDBfile::pdb_Atom(char& altLoc, int& atnum) {
131137
// ATOM or HETATM keyword.
132138
// Check line length before any modification.
@@ -214,7 +220,8 @@ void PDBfile::pdb_ChargeAndRadius(float& charge, float& radius) {
214220
sscanf(linebuffer_+54, "%f %f", &charge, &radius);
215221
}
216222

217-
void PDBfile::pdb_Box(double* box) {
223+
/** Set box[0-5] with A B C ALPHA BETA GAMMA from CRYST1 record. */
224+
void PDBfile::readCRYST1(double* box) {
218225
// CRYST1 keyword. RECORD A B C ALPHA BETA GAMMA SGROUP Z
219226
unsigned int lb_size = strlen(linebuffer_);
220227
if (lb_size < 54) {
@@ -238,14 +245,30 @@ void PDBfile::pdb_Box(double* box) {
238245
box[ib] = atof( linebuffer_ + lb );
239246
linebuffer_[end] = savechar;
240247
}
241-
mprintf("\tRead CRYST1 info from PDB: a=%g b=%g c=%g alpha=%g beta=%g gamma=%g\n",
242-
box[0], box[1], box[2], box[3], box[4], box[5]);
243-
// Warn if the box looks strange.
248+
}
249+
250+
/** Print a warning to STDOUT if unit cell lengths are strange. */
251+
static inline void box_warning(const double* box) {
244252
if (box[0] == 1.0 && box[1] == 1.0 && box[2] == 1.0)
245253
mprintf("Warning: PDB cell lengths are all 1.0 Ang.;"
246254
" this usually indicates an invalid box.\n");
247255
}
248256

257+
/** Read box info from CRYST1, verbose. */
258+
void PDBfile::pdb_Box_verbose(double* box) {
259+
readCRYST1(box);
260+
box_warning(box);
261+
mprintf("\tRead CRYST1 info from PDB: a=%g b=%g c=%g alpha=%g beta=%g gamma=%g\n",
262+
box[0], box[1], box[2], box[3], box[4], box[5]);
263+
}
264+
265+
/** Read box info from CRYST1, warn only if box is strange looking. */
266+
void PDBfile::pdb_Box_terse(double* box) {
267+
readCRYST1(box);
268+
box_warning(box);
269+
}
270+
271+
/** Read serial #s of atoms from a CONECT record. */
249272
int PDBfile::pdb_Bonds(int* bnd) {
250273
unsigned int lb_size = strlen(linebuffer_);
251274
int Nscan = 0;

src/PDBfile.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class PDBfile : public CpptrajFile {
1717
static bool ID_PDB(CpptrajFile&);
1818
/// \return the type of the next PDB record read.
1919
PDB_RECTYPE NextRecord();
20+
/// \return ATOM/HETATM alt. loc ID
21+
char pdb_AltLoc() const;
2022
/// \return Atom info with name and element for ATOM/HETATM; set altLoc and #.
2123
Atom pdb_Atom(char&, int&);
2224
/// \return Atom info with name and element for ATOM/HETATM.
@@ -29,8 +31,10 @@ class PDBfile : public CpptrajFile {
2931
void pdb_OccupancyAndBfactor(float&, float&);
3032
/// Get charge and radius from PQR ATOM/HETATM record.
3133
void pdb_ChargeAndRadius(float&, float&);
32-
/// Set given XYZ array with A/B/C/alpha/beta/gamma from CRYST1 record.
33-
void pdb_Box(double*);
34+
/// Set given XYZ array with A/B/C/alpha/beta/gamma from CRYST1 record, verbose.
35+
void pdb_Box_verbose(double*);
36+
/// Set given XYZ array with A/B/C/alpha/beta/gamma from CRYST1 record, terse.
37+
void pdb_Box_terse(double*);
3438
/// Set given array with atom and #s of bonded atoms from CONECT record.
3539
int pdb_Bonds(int*);
3640
/// \return Link record.
@@ -84,6 +88,8 @@ class PDBfile : public CpptrajFile {
8488
private:
8589
/// \return true if the first 6 chars of buffer match a PDB keyword
8690
static bool IsPDBkeyword(std::string const&);
91+
/// Read box info from CRYST1 record
92+
void readCRYST1(double*);
8793

8894
int anum_; ///< Atom number for writing.
8995
PDB_RECTYPE recType_; ///< Current record type.

src/Parm_PDB.cpp

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,27 @@
66
# include "Timer.h"
77
#endif
88

9+
/** CONSTRUCTOR */
10+
Parm_PDB::Parm_PDB() :
11+
ConectMode_(UNSPECIFIED),
12+
LinkMode_(UNSPECIFIED),
13+
keepAltLoc_(' '),
14+
readAsPQR_(false),
15+
readBox_(false) {}
16+
17+
// Parm_PDB::ReadHelp()
918
void Parm_PDB::ReadHelp() {
10-
mprintf("\tpqr : Read atomic charge/radius from occupancy/B-factor columns (PQR).\n"
11-
"\treadbox : Read unit cell information from CRYST1 record if present.\n"
12-
"\tconect : Read CONECT records if present (default).\n"
13-
"\tnoconect : Do not read CONECT records if present.\n"
14-
"\tlink : Read LINK records if present.\n"
15-
"\tnolink : Do not read LINK records if present (default).\n");
19+
mprintf("\tpqr : Read atomic charge/radius from occupancy/B-factor columns (PQR).\n"
20+
"\treadbox : Read unit cell information from CRYST1 record if present.\n"
21+
"\tconect : Read CONECT records if present (default).\n"
22+
"\tnoconect : Do not read CONECT records if present.\n"
23+
"\tlink : Read LINK records if present.\n"
24+
"\tnolink : Do not read LINK records if present (default).\n"
25+
"\tkeepaltloc <char> : If specified, alternate location ID to keep.\n"
26+
);
1627
}
1728

29+
// Parm_PDB::processReadArgs()
1830
int Parm_PDB::processReadArgs(ArgList& argIn) {
1931
readAsPQR_ = argIn.hasKey("pqr");
2032
readBox_ = argIn.hasKey("readbox");
@@ -26,9 +38,13 @@ int Parm_PDB::processReadArgs(ArgList& argIn) {
2638
LinkMode_ = READ;
2739
else if (argIn.hasKey("nolink"))
2840
LinkMode_ = SKIP;
41+
std::string keepAltChar = argIn.GetStringKey("keepaltloc");
42+
if (!keepAltChar.empty())
43+
keepAltLoc_ = keepAltChar[0];
2944
return 0;
3045
}
3146

47+
// Parm_PDB::ReadParm()
3248
int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) {
3349
typedef std::vector<PDBfile::Link> Larray;
3450
PDBfile infile;
@@ -64,16 +80,19 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) {
6480
mprintf("\tReading bond info from LINK records.\n");
6581
else
6682
mprintf("\tNot reading bond info from LINK records.\n");
83+
if (keepAltLoc_ != ' ')
84+
mprintf("\tWhen present, only reading alternate location ID %c\n", keepAltLoc_);
6785
# ifdef TIMER
6886
Timer time_total, time_atom;
6987
time_total.Start();
7088
# endif
7189
bool missingResidues = false;
90+
int nAltLocSkipped = 0;
7291
// Loop over PDB records
7392
while ( infile.NextRecord() != PDBfile::END_OF_FILE ) {
7493
if (readBox_ && infile.RecType() == PDBfile::CRYST1) {
7594
// Box info from CRYST1 record.
76-
infile.pdb_Box( XYZ );
95+
infile.pdb_Box_verbose( XYZ );
7796
TopIn.SetParmBox( XYZ );
7897
} else if (infile.RecType() == PDBfile::CONECT && readConect) {
7998
// BOND - first element will be atom, next few are bonded atoms.
@@ -101,6 +120,11 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) {
101120
// If this is an ATOM / HETATM keyword, add to topology.
102121
infile.pdb_XYZ( XYZ );
103122
Atom pdbAtom = infile.pdb_Atom(altLoc, atnum);
123+
// Check if we are filtering alt loc IDs
124+
if (keepAltLoc_ != ' ' && altLoc != ' ' && altLoc != keepAltLoc_) {
125+
nAltLocSkipped++;
126+
continue;
127+
}
104128
if (atnum >= (int)serial.size())
105129
serial.resize( atnum+1, -1 );
106130
serial[atnum] = TopIn.Natom();
@@ -113,6 +137,12 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) {
113137
TopIn.AddExtraAtomInfo( AtomExtra(occupancy, bfactor, altLoc) );
114138
}
115139
TopIn.AddTopAtom(pdbAtom, infile.pdb_Residue());
140+
if (altLoc != ' ' && keepAltLoc_ == ' ') {
141+
Residue const& lastRes = TopIn.Res(TopIn.Nres()-1);
142+
mprintf("Warning: Atom %i %s in res %s %i %c has alternate location specifier %c\n",
143+
atnum, *pdbAtom.Name(), *lastRes.Name(), lastRes.OriginalResNum(),
144+
lastRes.ChainId(), altLoc);
145+
}
116146
Coords.AddXYZ( XYZ );
117147
# ifdef TIMER
118148
time_atom.Stop();
@@ -131,7 +161,9 @@ int Parm_PDB::ReadParm(FileName const& fname, Topology &TopIn) {
131161
if (readLink)
132162
mprintf("Warning: If molecule determination fails try not specifying 'link' instead.\n");
133163
}
134-
}
164+
} // END loop over PDB records
165+
if (nAltLocSkipped > 0)
166+
mprintf("\tSkipped %i alternate atom locations.\n", nAltLocSkipped);
135167
// Sanity check
136168
if (TopIn.Natom() < 1) {
137169
mprinterr("Error: No atoms present in PDB.\n");

src/Parm_PDB.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include "ParmIO.h"
44
class Parm_PDB : public ParmIO {
55
public :
6-
Parm_PDB() : ConectMode_(UNSPECIFIED), LinkMode_(UNSPECIFIED), readAsPQR_(false), readBox_(false) {}
6+
Parm_PDB();
77
static BaseIOtype* Alloc() { return (BaseIOtype*)new Parm_PDB(); }
88
static void ReadHelp();
99
bool ID_ParmFormat(CpptrajFile&);
@@ -15,6 +15,7 @@ class Parm_PDB : public ParmIO {
1515
enum ReadType { UNSPECIFIED = 0, READ, SKIP };
1616
ReadType ConectMode_; ///< Specify how to handle CONECT records.
1717
ReadType LinkMode_; ///< Specify how to handle LINK records.
18+
char keepAltLoc_; ///< Alternate location to keep
1819
bool readAsPQR_; ///< If true get charge and radius from occ/b factor cols
1920
bool readBox_; ///< If true try to read CRYST1 record as box info.
2021
};

src/ReferenceAction.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ int ReferenceAction::InitRef(ArgList& argIn, DataSetList const& DSLin,
131131

132132
// ReferenceAction::SetupRefMask()
133133
int ReferenceAction::SetupRefMask(Topology const& topIn) {
134+
mprintf("\tReference topology: %s\n", topIn.c_str());
134135
if (refMask_.MaskStringSet()) {
135136
if (topIn.SetupIntegerMask( refMask_ )) return 1;
136137
mprintf("\tReference mask:");

0 commit comments

Comments
 (0)