Skip to content

Commit

Permalink
Accept symbolic input from memory
Browse files Browse the repository at this point in the history
This commit adds the option to mark symbolic input by calling
symcc_make_symbolic from the program under test.

The refactoring that was required to add the new feature has had the
pleasant side effect that the QSYM backend now doesn't require the
entire input upfront anymore, making it much more convenient to feed
symbolic data through stdin.
  • Loading branch information
sebastianpoeplau committed Nov 18, 2022
1 parent c4d1aae commit d5891b0
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 103 deletions.
7 changes: 6 additions & 1 deletion docs/Configuration.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ environment variables.

- SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from
standard input as symbolic; when set to a file name, any data read from that
file is considered symbolic.
file is considered symbolic. Ignored if SYMCC_NO_SYMBOLIC_INPUT is set to 1.

- SYMCC_MEMORY_INPUT=0/1 (default 0): When set to 1, expect the program under
test to communicate symbolic inputs with one or more calls to
symcc_make_symbolic. Can't be combined with SYMCC_INPUT_FILE. Ignored if
SYMCC_NO_SYMBOLIC_INPUT is set to 1.

- SYMCC_LOG_FILE (default empty): When set to a file name, SymCC creates the
file (or overwrites any existing file!) and uses it to log backend activity
Expand Down
23 changes: 17 additions & 6 deletions runtime/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <limits>
#include <sstream>
#include <stdexcept>
#include <variant>

namespace {

Expand All @@ -41,17 +42,26 @@ bool checkFlagString(std::string value) {
Config g_config;

void loadConfig() {
auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT");
if (fullyConcrete != nullptr)
g_config.fullyConcrete = checkFlagString(fullyConcrete);

auto *outputDir = getenv("SYMCC_OUTPUT_DIR");
if (outputDir != nullptr)
g_config.outputDir = outputDir;

auto *inputFile = getenv("SYMCC_INPUT_FILE");
if (inputFile != nullptr)
g_config.inputFile = inputFile;
g_config.input = FileInput{inputFile};

auto *memoryInput = getenv("SYMCC_MEMORY_INPUT");
if (memoryInput != nullptr && checkFlagString(memoryInput)) {
if (std::holds_alternative<FileInput>(g_config.input))
throw std::runtime_error{
"Can't enable file and memory input at the same time"};

g_config.input = MemoryInput{};
}

auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT");
if (fullyConcrete != nullptr && checkFlagString(fullyConcrete))
g_config.input = NoInput{};

auto *logFile = getenv("SYMCC_LOG_FILE");
if (logFile != nullptr)
Expand All @@ -76,7 +86,8 @@ void loadConfig() {
throw std::runtime_error(msg.str());
} catch (std::out_of_range &) {
std::stringstream msg;
msg << "The GC threshold must be between 0 and " << std::numeric_limits<size_t>::max();
msg << "The GC threshold must be between 0 and "
<< std::numeric_limits<size_t>::max();
throw std::runtime_error(msg.str());
}
}
Expand Down
25 changes: 20 additions & 5 deletions runtime/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,32 @@
#define CONFIG_H

#include <string>
#include <variant>

/// Marker struct for fully concrete execution.
struct NoInput {};

/// Marker struct for symbolic input from stdin.
struct StdinInput {};

/// Marker struct for symbolic input via _sym_make_symbolic.
struct MemoryInput {};

/// Configuration for symbolic input from a file.
struct FileInput {
/// The name of input file.
std::string fileName;
};

struct Config {
/// Should we allow symbolic data in the program?
bool fullyConcrete = false;
using InputConfig = std::variant<NoInput, StdinInput, MemoryInput, FileInput>;

/// The configuration for our symbolic input.
InputConfig input = StdinInput{};

/// The directory for storing new outputs.
std::string outputDir = "/tmp/output";

/// The input file, if any.
std::string inputFile;

/// The file to log constraint solving information to.
std::string logFile = "";

Expand Down
83 changes: 36 additions & 47 deletions runtime/LibcWrappers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cstring>
#include <iostream>
#include <string>
#include <variant>
#include <vector>

#include <arpa/inet.h>
Expand Down Expand Up @@ -56,13 +57,28 @@ template <typename E, typename F>
void tryAlternative(E *value, SymExpr valueExpr, F caller) {
tryAlternative(reinterpret_cast<intptr_t>(value), valueExpr, caller);
}
} // namespace

void initLibcWrappers() {
if (g_config.fullyConcrete)
void maybeSetInputFile(const char *path, int fd) {
auto *fileInput = std::get_if<FileInput>(&g_config.input);
if (fileInput == nullptr)
return;

if (g_config.inputFile.empty()) {
if (strstr(path, fileInput->fileName.c_str()) == nullptr)
return;

if (inputFileDescriptor != -1)
std::cerr << "Warning: input file opened multiple times; this is not yet "
"supported"
<< std::endl;

inputFileDescriptor = fd;
inputOffset = 0;
}

} // namespace

void initLibcWrappers() {
if (std::holds_alternative<StdinInput>(g_config.input)) {
// Symbolic data comes from standard input.
inputFileDescriptor = 0;
}
Expand Down Expand Up @@ -111,15 +127,8 @@ int SYM(open)(const char *path, int oflag, mode_t mode) {
auto result = open(path, oflag, mode);
_sym_set_return_expression(nullptr);

if (result >= 0 && !g_config.fullyConcrete && !g_config.inputFile.empty() &&
strstr(path, g_config.inputFile.c_str()) != nullptr) {
if (inputFileDescriptor != -1)
std::cerr << "Warning: input file opened multiple times; this is not yet "
"supported"
<< std::endl;
inputFileDescriptor = result;
inputOffset = 0;
}
if (result >= 0)
maybeSetInputFile(path, result);

return result;
}
Expand All @@ -136,9 +145,8 @@ ssize_t SYM(read)(int fildes, void *buf, size_t nbyte) {

if (fildes == inputFileDescriptor) {
// Reading symbolic input.
ReadWriteShadow shadow(buf, result);
std::generate(shadow.begin(), shadow.end(),
[]() { return _sym_get_input_byte(inputOffset++); });
_sym_make_symbolic(buf, result, inputOffset);
inputOffset += result;
} else if (!isConcrete(buf, result)) {
ReadWriteShadow shadow(buf, result);
std::fill(shadow.begin(), shadow.end(), nullptr);
Expand Down Expand Up @@ -193,16 +201,8 @@ FILE *SYM(fopen)(const char *pathname, const char *mode) {
auto *result = fopen(pathname, mode);
_sym_set_return_expression(nullptr);

if (result != nullptr && !g_config.fullyConcrete &&
!g_config.inputFile.empty() &&
strstr(pathname, g_config.inputFile.c_str()) != nullptr) {
if (inputFileDescriptor != -1)
std::cerr << "Warning: input file opened multiple times; this is not yet "
"supported"
<< std::endl;
inputFileDescriptor = fileno(result);
inputOffset = 0;
}
if (result != nullptr)
maybeSetInputFile(pathname, fileno(result));

return result;
}
Expand All @@ -211,16 +211,8 @@ FILE *SYM(fopen64)(const char *pathname, const char *mode) {
auto *result = fopen64(pathname, mode);
_sym_set_return_expression(nullptr);

if (result != nullptr && !g_config.fullyConcrete &&
!g_config.inputFile.empty() &&
strstr(pathname, g_config.inputFile.c_str()) != nullptr) {
if (inputFileDescriptor != -1)
std::cerr << "Warning: input file opened multiple times; this is not yet "
"supported"
<< std::endl;
inputFileDescriptor = fileno(result);
inputOffset = 0;
}
if (result != nullptr)
maybeSetInputFile(pathname, fileno(result));

return result;
}
Expand All @@ -235,9 +227,8 @@ size_t SYM(fread)(void *ptr, size_t size, size_t nmemb, FILE *stream) {

if (fileno(stream) == inputFileDescriptor) {
// Reading symbolic input.
ReadWriteShadow shadow(ptr, result * size);
std::generate(shadow.begin(), shadow.end(),
[]() { return _sym_get_input_byte(inputOffset++); });
_sym_make_symbolic(ptr, result * size, inputOffset);
inputOffset += result * size;
} else if (!isConcrete(ptr, result * size)) {
ReadWriteShadow shadow(ptr, result * size);
std::fill(shadow.begin(), shadow.end(), nullptr);
Expand All @@ -255,9 +246,9 @@ char *SYM(fgets)(char *str, int n, FILE *stream) {

if (fileno(stream) == inputFileDescriptor) {
// Reading symbolic input.
ReadWriteShadow shadow(str, sizeof(char) * strlen(str));
std::generate(shadow.begin(), shadow.end(),
[]() { return _sym_get_input_byte(inputOffset++); });
const auto length = sizeof(char) * strlen(str);
_sym_make_symbolic(str, length, inputOffset);
inputOffset += length;
} else if (!isConcrete(str, sizeof(char) * strlen(str))) {
ReadWriteShadow shadow(str, sizeof(char) * strlen(str));
std::fill(shadow.begin(), shadow.end(), nullptr);
Expand Down Expand Up @@ -338,7 +329,7 @@ int SYM(getc)(FILE *stream) {

if (fileno(stream) == inputFileDescriptor)
_sym_set_return_expression(_sym_build_zext(
_sym_get_input_byte(inputOffset++), sizeof(int) * 8 - 8));
_sym_get_input_byte(inputOffset++, result), sizeof(int) * 8 - 8));
else
_sym_set_return_expression(nullptr);

Expand All @@ -354,16 +345,14 @@ int SYM(fgetc)(FILE *stream) {

if (fileno(stream) == inputFileDescriptor)
_sym_set_return_expression(_sym_build_zext(
_sym_get_input_byte(inputOffset++), sizeof(int) * 8 - 8));
_sym_get_input_byte(inputOffset++, result), sizeof(int) * 8 - 8));
else
_sym_set_return_expression(nullptr);

return result;
}

int SYM(getchar)(void) {
return SYM(getc)(stdin);
}
int SYM(getchar)(void) { return SYM(getc)(stdin); }

int SYM(ungetc)(int c, FILE *stream) {
auto result = ungetc(c, stream);
Expand Down
23 changes: 23 additions & 0 deletions runtime/RuntimeCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@

#include <Runtime.h>

#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <numeric>
#include <stdexcept>
#include <variant>

#include "Config.h"
#include "GarbageCollection.h"
#include "RuntimeCommon.h"
#include "Shadow.h"
Expand Down Expand Up @@ -196,3 +201,21 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset,
void _sym_register_expression_region(SymExpr *start, size_t length) {
registerExpressionRegion({start, length});
}

void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset) {
ReadWriteShadow shadow(data, byte_length);
uint8_t *data_bytes = reinterpret_cast<uint8_t *>(data);
std::generate(shadow.begin(), shadow.end(), [&, i = 0]() mutable {
return _sym_get_input_byte(input_offset++, data_bytes[i++]);
});
}

void symcc_make_symbolic(void *start, size_t byte_length) {
if (!std::holds_alternative<MemoryInput>(g_config.input))
throw std::runtime_error{"Calls to symcc_make_symbolic aren't allowed when "
"SYMCC_MEMORY_INPUT isn't set"};

static size_t inputOffset = 0; // track the offset across calls
_sym_make_symbolic(start, byte_length, inputOffset);
inputOffset += byte_length;
}
13 changes: 12 additions & 1 deletion runtime/RuntimeCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
#define RUNTIMECOMMON_H

#ifdef __cplusplus
#include <cstddef>
#include <cstdint>
extern "C" {
#else
#include <stddef.h>
#include <stdint.h>
#endif

Expand Down Expand Up @@ -141,7 +143,8 @@ SymExpr _sym_get_return_expression(void);
*/
void _sym_push_path_constraint(SymExpr constraint, int taken,
uintptr_t site_id);
SymExpr _sym_get_input_byte(size_t offset);
SymExpr _sym_get_input_byte(size_t offset, uint8_t concrete_value);
void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset);

/*
* Memory management
Expand Down Expand Up @@ -176,6 +179,14 @@ bool _sym_feasible(SymExpr expr);
void _sym_register_expression_region(SymExpr *start, size_t length);
void _sym_collect_garbage(void);

/*
* Symbolic input from memory
*
* This is the only function in the interface that we expect to be called by
* users (i.e., calls to it aren't auto-generated by our compiler pass).
*/
void symcc_make_symbolic(void *start, size_t byte_length);

#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit d5891b0

Please sign in to comment.