Skip to content

Commit

Permalink
refactor: destroy spawn of satan type punning in ETF parser (#894)
Browse files Browse the repository at this point in the history
  • Loading branch information
braindigitalis authored Sep 26, 2023
1 parent aaf5acc commit 5a9bcf5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 42 deletions.
16 changes: 0 additions & 16 deletions include/dpp/etf.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,6 @@ enum etf_token_type : uint8_t {
ett_atom_utf8_small = 'w'
};

/**
* @brief A horrible structure used within the ETF parser to convert uint64_t to double and back.
* This is horrible, but it is the official way erlang term format does this, so we can't really
* mess with it much.
*/
union type_punner {
/**
* @brief binary integer value
*/
uint64_t ui64;
/**
* @brief double floating point value
*/
double df;
};

/**
* @brief Represents a buffer of bytes being encoded into ETF
*/
Expand Down
57 changes: 31 additions & 26 deletions src/dpp/etf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,39 +55,39 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
pk->length += l;
}

void etf_parser::append_version(etf_buffer *b) {
void etf_parser::append_version(etf_buffer *b) {
static unsigned char buf[1] = {FORMAT_VERSION};
buffer_write(b, (const char *)buf, 1);
}

void etf_parser::append_nil(etf_buffer *b) {
void etf_parser::append_nil(etf_buffer *b) {
static unsigned char buf[5] = {ett_atom_small, 3, 'n', 'i', 'l'};
buffer_write(b, (const char *)buf, 5);
}

void etf_parser::append_false(etf_buffer *b) {
void etf_parser::append_false(etf_buffer *b) {
static unsigned char buf[7] = {ett_atom_small, 5, 'f', 'a', 'l', 's', 'e'};
buffer_write(b, (const char *)buf, 7);
}

void etf_parser::append_true(etf_buffer *b) {
void etf_parser::append_true(etf_buffer *b) {
static unsigned char buf[6] = {ett_atom_small, 4, 't', 'r', 'u', 'e'};
buffer_write(b, (const char *)buf, 6);
}

void etf_parser::append_small_integer(etf_buffer *b, unsigned char d) {
void etf_parser::append_small_integer(etf_buffer *b, unsigned char d) {
unsigned char buf[2] = {ett_smallint, d};
buffer_write(b, (const char *)buf, 2);
}

void etf_parser::append_integer(etf_buffer *b, int32_t d) {
void etf_parser::append_integer(etf_buffer *b, int32_t d) {
unsigned char buf[5];
buf[0] = ett_integer;
store_32_bits(buf + 1, d);
buffer_write(b, (const char *)buf, 5);
}

void etf_parser::append_unsigned_long_long(etf_buffer *b, unsigned long long d) {
void etf_parser::append_unsigned_long_long(etf_buffer *b, unsigned long long d) {
unsigned char buf[1 + 2 + sizeof(unsigned long long)];
buf[0] = ett_bigint_small;

Expand All @@ -103,7 +103,7 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
buffer_write(b, (const char *)buf, 1 + 2 + bytes_enc);
}

void etf_parser::append_long_long(etf_buffer *b, long long d) {
void etf_parser::append_long_long(etf_buffer *b, long long d) {
unsigned char buf[1 + 2 + sizeof(unsigned long long)];
buf[0] = ett_bigint_small;
buf[2] = d < 0 ? 1 : 0;
Expand All @@ -118,16 +118,16 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
buffer_write(b, (const char *)buf, 1 + 2 + bytes_enc);
}

void etf_parser::append_double(etf_buffer *b, double f) {
void etf_parser::append_double(etf_buffer *b, double f) {
unsigned char buf[1 + 8] = {0};
uint64_t val_64_out{0};
buf[0] = ett_new_float;
type_punner p;
p.df = f;
store_64_bits(buf + 1, p.ui64);
memcpy(&val_64_out, &f, sizeof(double));
store_64_bits(buf + 1, val_64_out);
buffer_write(b, (const char *)buf, 1 + 8);
}

void etf_parser::append_atom(etf_buffer *b, const char *bytes, size_t size) {
void etf_parser::append_atom(etf_buffer *b, const char *bytes, size_t size) {
if (size < 255) {
unsigned char buf[2] = {ett_atom_small, (unsigned char)size};
buffer_write(b, (const char *)buf, 2);
Expand All @@ -146,7 +146,7 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
}
}

void etf_parser::append_atom_utf8(etf_buffer *b, const char *bytes, size_t size) {
void etf_parser::append_atom_utf8(etf_buffer *b, const char *bytes, size_t size) {
if (size < 255) {
unsigned char buf[2] = {ett_atom_utf8_small, (unsigned char)size};
buffer_write(b, (const char *)buf, 2);
Expand Down Expand Up @@ -174,7 +174,7 @@ void etf_parser::append_binary(etf_buffer *b, const char *bytes, size_t size) {
buffer_write(b, (const char *)bytes, size);
}

void etf_parser::append_string(etf_buffer *b, const char *bytes, size_t size) {
void etf_parser::append_string(etf_buffer *b, const char *bytes, size_t size) {
unsigned char buf[3];
buf[0] = ett_string;

Expand All @@ -183,7 +183,7 @@ void etf_parser::append_binary(etf_buffer *b, const char *bytes, size_t size) {
buffer_write(b, (const char *)bytes, size);
}

void etf_parser::append_tuple_header(etf_buffer *b, size_t size) {
void etf_parser::append_tuple_header(etf_buffer *b, size_t size) {
if (size < 256) {
unsigned char buf[2];
buf[0] = ett_small_tuple;
Expand All @@ -197,19 +197,19 @@ void etf_parser::append_binary(etf_buffer *b, const char *bytes, size_t size) {
}
}

void etf_parser::append_nil_ext(etf_buffer *b) {
void etf_parser::append_nil_ext(etf_buffer *b) {
static unsigned char buf[1] = {ett_nil};
buffer_write(b, (const char *)buf, 1);
}

void etf_parser::append_list_header(etf_buffer *b, size_t size) {
void etf_parser::append_list_header(etf_buffer *b, size_t size) {
unsigned char buf[5];
buf[0] = ett_list;
store_32_bits(buf + 1, size);
buffer_write(b, (const char *)buf, 5);
}

void etf_parser::append_map_header(etf_buffer *b, size_t size) {
void etf_parser::append_map_header(etf_buffer *b, size_t size) {
unsigned char buf[5];
buf[0] = ett_map;
store_32_bits(buf + 1, size);
Expand Down Expand Up @@ -269,28 +269,33 @@ const char* etf_parser::read_string(uint32_t length) {
return (const char*)str;
}

static const char* atom_null = "null";
static const char* atom_true = "true";
static const char* atom_false = "false";

json etf_parser::process_atom(const char* atom, uint16_t length) {
json j;

if (atom == NULL) {
return j;
}

/**
* WARNING: PERFORMANCE HOTSPOT
* Valgrind cachegrind identified this as a performance hotspot in DPP when using ETF. This gets called a LOT,
* and the difference here between comparing the simple constant types here where the lengths are known in this
* way, compared to using memcpy or strncpy four times, is absolutely huge (factors of ten). Considering this
* can be called hundreds of times a second on a busy bot, the performance of this function should not be
* underestimated. Think carefully before 'tidying' this function further!
*
*/
if (length >= 3 && length <= 5) {
if (length == 4 && *((uint32_t*)atom) == *((uint32_t*)atom_null)) { // "null"
if (length == 4 && atom[0] == 'n' && atom[1] == 'u' && atom[2] == 'l' && atom[3] == 'l') { // "null"
return j;
}
else if(length == 4 && *((uint32_t*)atom) == *((uint32_t*)atom_true)) { // "true"
else if (length == 4 && atom[0] == 't' && atom[1] == 'r' && atom[2] == 'u' && atom[3] == 'e') { // "true"
return true;
}
else if (length == 3 && atom[0] == 'n' && atom[1] == 'i' && atom[2] == 'l') { // "nil"
return j;
}
else if (length == 5 && *((uint32_t*)atom) == *((uint32_t*)atom_false) && atom[4] == 'e') { // "fals","e"
else if (length == 5 && atom[0] == 'f' && atom[1] == 'a' && atom[2] == 'l' && atom[3] == 's' && atom[4] == 'e') { // "fals","e"
return false;
}
}
Expand Down

0 comments on commit 5a9bcf5

Please sign in to comment.