Skip to content

Commit

Permalink
Merge pull request #2489 from digit-google/safer-depslog-parser
Browse files Browse the repository at this point in the history
Make the deps_log parser report corrupted files instead of crashing.
  • Loading branch information
jhasse authored Oct 4, 2024
2 parents 4c59ee0 + 197dbdb commit fc2f81e
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 22 deletions.
53 changes: 35 additions & 18 deletions src/deps_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
#include "deps_log.h"

#include <assert.h>
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#ifndef _WIN32
#include <unistd.h>
Expand All @@ -34,12 +35,14 @@ using namespace std;

// The version is stored as 4 bytes after the signature and also serves as a
// byte order mark. Signature and version combined are 16 bytes long.
const char kFileSignature[] = "# ninjadeps\n";
const int kCurrentVersion = 4;
static const char kFileSignature[] = "# ninjadeps\n";
static const size_t kFileSignatureSize = sizeof(kFileSignature) - 1u;

static const int32_t kCurrentVersion = 4;

// Record size is currently limited to less than the full 32 bit, due to
// internal buffers having to have this size.
const unsigned kMaxRecordSize = (1 << 19) - 1;
static constexpr size_t kMaxRecordSize = (1 << 19) - 1;

DepsLog::~DepsLog() {
Close();
Expand Down Expand Up @@ -160,16 +163,18 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
return LOAD_ERROR;
}

bool valid_header = true;
int version = 0;
if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
valid_header = false;
bool valid_header = fread(buf, kFileSignatureSize, 1, f) == 1 &&
!memcmp(buf, kFileSignature, kFileSignatureSize);

int32_t version = 0;
bool valid_version =
fread(&version, 4, 1, f) == 1 && version == kCurrentVersion;

// Note: For version differences, this should migrate to the new format.
// But the v1 format could sometimes (rarely) end up with invalid data, so
// don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
// and there was no release with it, so pretend that it never happened.)
if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
version != kCurrentVersion) {
if (!valid_header || !valid_version) {
if (version == 1)
*err = "deps log version change; rebuilding";
else
Expand Down Expand Up @@ -203,7 +208,10 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
}

if (is_deps) {
assert(size % 4 == 0);
if ((size % 4) != 0) {
read_failed = true;
break;
}
int* deps_data = reinterpret_cast<int*>(buf);
int out_id = deps_data[0];
TimeStamp mtime;
Expand All @@ -212,10 +220,18 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
deps_data += 3;
int deps_count = (size / 4) - 3;

for (int i = 0; i < deps_count; ++i) {
int node_id = deps_data[i];
if (node_id >= (int)nodes_.size() || !nodes_[node_id]) {
read_failed = true;
break;
}
}
if (read_failed)
break;

Deps* deps = new Deps(mtime, deps_count);
for (int i = 0; i < deps_count; ++i) {
assert(deps_data[i] < (int)nodes_.size());
assert(nodes_[deps_data[i]]);
deps->nodes[i] = nodes_[deps_data[i]];
}

Expand All @@ -224,7 +240,10 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
++unique_dep_record_count;
} else {
int path_size = size - 4;
assert(path_size > 0); // CanonicalizePath() rejects empty paths.
if (path_size <= 0) {
read_failed = true;
break;
}
// There can be up to 3 bytes of padding.
if (buf[path_size - 1] == '\0') --path_size;
if (buf[path_size - 1] == '\0') --path_size;
Expand All @@ -244,12 +263,10 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
int expected_id = ~checksum;
int id = nodes_.size();
if (id != expected_id) {
if (id != expected_id || node->id() >= 0) {
read_failed = true;
break;
}

assert(node->id() < 0);
node->set_id(id);
nodes_.push_back(node);
}
Expand Down Expand Up @@ -384,6 +401,7 @@ bool DepsLog::UpdateDeps(int out_id, Deps* deps) {

bool DepsLog::RecordId(Node* node) {
int path_size = node->path().size();
assert(path_size > 0 && "Trying to record empty path Node!");
int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.

unsigned size = path_size + padding + 4;
Expand All @@ -398,7 +416,6 @@ bool DepsLog::RecordId(Node* node) {
if (fwrite(&size, 4, 1, file_) < 1)
return false;
if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
assert(!node->path().empty());
return false;
}
if (padding && fwrite("\0\0", padding, 1, file_) < 1)
Expand Down
140 changes: 137 additions & 3 deletions src/deps_log_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <unistd.h>
#endif

#include "disk_interface.h"
#include "graph.h"
#include "util.h"
#include "test.h"
Expand All @@ -34,9 +35,7 @@ struct DepsLogTest : public testing::Test {
// In case a crashing test left a stale file behind.
unlink(kTestFilename);
}
virtual void TearDown() {
unlink(kTestFilename);
}
virtual void TearDown() { unlink(kTestFilename); }
};

TEST_F(DepsLogTest, WriteRead) {
Expand Down Expand Up @@ -542,4 +541,139 @@ TEST_F(DepsLogTest, ReverseDepsNodes) {
EXPECT_TRUE(rev_deps == state.GetNode("out.o", 0));
}

TEST_F(DepsLogTest, MalformedDepsLog) {
std::string err;
{
State state;
DepsLog log;
EXPECT_TRUE(log.OpenForWrite(kTestFilename, &err));
ASSERT_EQ("", err);

// First, create a valid log file.
std::vector<Node*> deps;
deps.push_back(state.GetNode("foo.hh", 0));
deps.push_back(state.GetNode("bar.hpp", 0));
log.RecordDeps(state.GetNode("out.o", 0), 1, deps);
log.Close();
}

// Now read its value, validate it a little.
RealDiskInterface disk;

std::string original_contents;
ASSERT_EQ(FileReader::Okay, disk.ReadFile(kTestFilename,
&original_contents, &err));

const size_t version_offset = 12;
ASSERT_EQ("# ninjadeps\n", original_contents.substr(0, version_offset));
ASSERT_EQ('\x04', original_contents[version_offset + 0]);
ASSERT_EQ('\x00', original_contents[version_offset + 1]);
ASSERT_EQ('\x00', original_contents[version_offset + 2]);
ASSERT_EQ('\x00', original_contents[version_offset + 3]);

// clang-format off
static const uint8_t kFirstRecord[] = {
// size field == 0x0000000c
0x0c, 0x00, 0x00, 0x00,
// name field = 'out.o' + 3 bytes of padding.
'o', 'u', 't', '.', 'o', 0x00, 0x00, 0x00,
// checksum = ~0
0xff, 0xff, 0xff, 0xff,
};
// clang-format on
const size_t kFirstRecordLen = sizeof(kFirstRecord);
const size_t first_offset = version_offset + 4;

#define COMPARE_RECORD(start_pos, reference, len) \
ASSERT_EQ(original_contents.substr(start_pos, len), std::string(reinterpret_cast<const char*>(reference), len))

COMPARE_RECORD(first_offset, kFirstRecord, kFirstRecordLen);

const size_t second_offset = first_offset + kFirstRecordLen;
// clang-format off
static const uint8_t kSecondRecord[] = {
// size field == 0x0000000c
0x0c, 0x00, 0x00, 0x00,
// name field = 'foo.hh' + 2 bytes of padding.
'f', 'o', 'o', '.', 'h', 'h', 0x00, 0x00,
// checksum = ~1
0xfe, 0xff, 0xff, 0xff,
};
// clang-format on
const size_t kSecondRecordLen = sizeof(kSecondRecord);
COMPARE_RECORD(second_offset, kSecondRecord, kSecondRecordLen);

// Then start generating corrupted versions and trying to load them.
const char kBadLogFile[] = "DepsLogTest-corrupted.tempfile";

// Helper lambda to rewrite the bad log file with new content.
auto write_bad_log_file =
[&disk, &kBadLogFile](const std::string& bad_contents) -> bool {
(void)disk.RemoveFile(kBadLogFile);
return disk.WriteFile(kBadLogFile, bad_contents);
};

// First, corrupt the header.
std::string bad_contents = original_contents;
bad_contents[0] = '@';

ASSERT_TRUE(write_bad_log_file(bad_contents)) << strerror(errno);
{
State state;
DepsLog log;
err.clear();
ASSERT_EQ(LOAD_SUCCESS, log.Load(kBadLogFile, &state, &err));
ASSERT_EQ("bad deps log signature or version; starting over", err);
}

// Second, truncate the version.
bad_contents = original_contents.substr(0, version_offset + 3);
ASSERT_TRUE(write_bad_log_file(bad_contents)) << strerror(errno);
{
State state;
DepsLog log;
err.clear();
ASSERT_EQ(LOAD_SUCCESS, log.Load(kBadLogFile, &state, &err));
ASSERT_EQ("bad deps log signature or version; starting over", err);
}

// Truncate first record's |size| field. The loader should recover.
bad_contents = original_contents.substr(0, version_offset + 4 + 3);
ASSERT_TRUE(write_bad_log_file(bad_contents)) << strerror(errno);
{
State state;
DepsLog log;
err.clear();
ASSERT_EQ(LOAD_SUCCESS, log.Load(kBadLogFile, &state, &err));
ASSERT_EQ("", err);
}

// Corrupt first record |size| value.
bad_contents = original_contents;
bad_contents[first_offset + 0] = '\x55';
bad_contents[first_offset + 1] = '\xaa';
bad_contents[first_offset + 2] = '\xff';
bad_contents[first_offset + 3] = '\xff';
ASSERT_TRUE(write_bad_log_file(bad_contents)) << strerror(errno);
{
State state;
DepsLog log;
err.clear();
ASSERT_EQ(LOAD_SUCCESS, log.Load(kBadLogFile, &state, &err));
ASSERT_EQ("premature end of file; recovering", err);
}

// Make first record |size| less than 4.
bad_contents = original_contents;
bad_contents[first_offset] = '\x01';
ASSERT_TRUE(write_bad_log_file(bad_contents)) << strerror(errno);
{
State state;
DepsLog log;
err.clear();
ASSERT_EQ(LOAD_SUCCESS, log.Load(kBadLogFile, &state, &err));
ASSERT_EQ("premature end of file; recovering", err);
}
}

} // anonymous namespace
2 changes: 1 addition & 1 deletion src/disk_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ TimeStamp RealDiskInterface::Stat(const string& path, string* err) const {
}

bool RealDiskInterface::WriteFile(const string& path, const string& contents) {
FILE* fp = fopen(path.c_str(), "w");
FILE* fp = fopen(path.c_str(), "wb");
if (fp == NULL) {
Error("WriteFile(%s): Unable to create file. %s",
path.c_str(), strerror(errno));
Expand Down

0 comments on commit fc2f81e

Please sign in to comment.