From 6a2bae4a515b39be3484cb9a750d0e5a4c111581 Mon Sep 17 00:00:00 2001 From: WGH Date: Wed, 22 Jul 2020 03:14:51 +0300 Subject: [PATCH] Remove unnecessary copying in transformations In C++11, string data is always null-terminated[1], and can be freely modified[2]. [1] https://stackoverflow.com/questions/6077189/will-stdstring-always-be-null-terminated-in-c11 [2] https://stackoverflow.com/questions/38875623/is-it-permitted-to-modify-the-internal-stdstring-buffer-returned-by-operator --- src/actions/transformations/css_decode.cc | 11 +++-------- .../transformations/escape_seq_decode.cc | 13 +++---------- src/actions/transformations/hex_decode.cc | 19 +++---------------- .../transformations/html_entity_decode.cc | 18 +++--------------- src/actions/transformations/js_decode.cc | 18 +++--------------- src/actions/transformations/normalise_path.cc | 16 ++++------------ .../transformations/normalise_path_win.cc | 15 +++------------ .../transformations/parity_even_7bit.cc | 17 ++--------------- .../transformations/parity_odd_7bit.cc | 19 ++----------------- .../transformations/parity_zero_7bit.cc | 17 ++--------------- src/actions/transformations/sql_hex_decode.cc | 19 +++---------------- src/actions/transformations/url_decode.cc | 17 +++++------------ src/actions/transformations/url_decode_uni.cc | 18 +++--------------- .../transformations/utf8_to_unicode.cc | 17 +++-------------- src/actions/transformations/utf8_to_unicode.h | 2 +- 15 files changed, 43 insertions(+), 193 deletions(-) diff --git a/src/actions/transformations/css_decode.cc b/src/actions/transformations/css_decode.cc index 4b23618f1a..c3245f8cea 100644 --- a/src/actions/transformations/css_decode.cc +++ b/src/actions/transformations/css_decode.cc @@ -37,16 +37,11 @@ namespace transformations { std::string CssDecode::evaluate(const std::string &value, Transaction *transaction) { - char *tmp = reinterpret_cast( - malloc(sizeof(char) * value.size() + 1)); - memcpy(tmp, value.c_str(), value.size() + 1); - tmp[value.size()] = '\0'; + std::string ret = value; - CssDecode::css_decode_inplace(reinterpret_cast(tmp), - value.size()); + auto size = CssDecode::css_decode_inplace(reinterpret_cast(&ret[0]), ret.size()); + ret.resize(size); - std::string ret(tmp, 0, value.size()); - free(tmp); return ret; } diff --git a/src/actions/transformations/escape_seq_decode.cc b/src/actions/transformations/escape_seq_decode.cc index e32a42d062..490d314cbf 100644 --- a/src/actions/transformations/escape_seq_decode.cc +++ b/src/actions/transformations/escape_seq_decode.cc @@ -142,17 +142,10 @@ int EscapeSeqDecode::ansi_c_sequences_decode_inplace(unsigned char *input, std::string EscapeSeqDecode::evaluate(const std::string &value, Transaction *transaction) { + std::string ret = value; - unsigned char *tmp = (unsigned char *) malloc(sizeof(char) - * value.size() + 1); - memcpy(tmp, value.c_str(), value.size() + 1); - tmp[value.size()] = '\0'; - - int size = ansi_c_sequences_decode_inplace(tmp, value.size()); - - std::string ret(""); - ret.assign(reinterpret_cast(tmp), size); - free(tmp); + auto size = ansi_c_sequences_decode_inplace(reinterpret_cast(&ret[0]), ret.size()); + ret.resize(size); return ret; } diff --git a/src/actions/transformations/hex_decode.cc b/src/actions/transformations/hex_decode.cc index bc72228fc6..3dc14a9a95 100644 --- a/src/actions/transformations/hex_decode.cc +++ b/src/actions/transformations/hex_decode.cc @@ -34,23 +34,10 @@ namespace transformations { std::string HexDecode::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; - int size = 0; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - size = inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), size); - free(input); + auto size = inplace(reinterpret_cast(&ret[0]), ret.length()); + ret.resize(size); return ret; } diff --git a/src/actions/transformations/html_entity_decode.cc b/src/actions/transformations/html_entity_decode.cc index 48257e76ef..de797a890c 100644 --- a/src/actions/transformations/html_entity_decode.cc +++ b/src/actions/transformations/html_entity_decode.cc @@ -35,22 +35,10 @@ namespace transformations { std::string HtmlEntityDecode::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - size_t i = inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), i); - free(input); + auto i = inplace(reinterpret_cast(&ret[0]), ret.length()); + ret.resize(i); return ret; } diff --git a/src/actions/transformations/js_decode.cc b/src/actions/transformations/js_decode.cc index 89f33b9f0f..0dfb4b228d 100644 --- a/src/actions/transformations/js_decode.cc +++ b/src/actions/transformations/js_decode.cc @@ -36,22 +36,10 @@ namespace transformations { std::string JsDecode::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - size_t i = inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), i); - free(input); + auto i = inplace(reinterpret_cast(&ret[0]), ret.length()); + ret.resize(i); return ret; } diff --git a/src/actions/transformations/normalise_path.cc b/src/actions/transformations/normalise_path.cc index ad76a22a46..a3d4bf029d 100644 --- a/src/actions/transformations/normalise_path.cc +++ b/src/actions/transformations/normalise_path.cc @@ -39,19 +39,11 @@ NormalisePath::NormalisePath(const std::string &action) std::string NormalisePath::evaluate(const std::string &value, Transaction *transaction) { - int changed = 0; + std::string ret = value; + int changed; - char *tmp = reinterpret_cast( - malloc(sizeof(char) * value.size() + 1)); - memcpy(tmp, value.c_str(), value.size() + 1); - tmp[value.size()] = '\0'; - - int i = normalize_path_inplace((unsigned char *)tmp, - value.size(), 0, &changed); - - std::string ret(""); - ret.assign(tmp, i); - free(tmp); + auto size = normalize_path_inplace(reinterpret_cast(&ret[0]), ret.length(), 0, &changed); + ret.resize(size); return ret; } diff --git a/src/actions/transformations/normalise_path_win.cc b/src/actions/transformations/normalise_path_win.cc index 8970e8b968..42f48c807d 100644 --- a/src/actions/transformations/normalise_path_win.cc +++ b/src/actions/transformations/normalise_path_win.cc @@ -36,20 +36,11 @@ namespace transformations { std::string NormalisePathWin::evaluate(const std::string &value, Transaction *transaction) { + std::string ret = value; int changed; - char *tmp = reinterpret_cast( - malloc(sizeof(char) * value.size() + 1)); - memcpy(tmp, value.c_str(), value.size() + 1); - tmp[value.size()] = '\0'; - - int i = NormalisePath::normalize_path_inplace( - reinterpret_cast(tmp), - value.size(), 1, &changed); - - std::string ret(""); - ret.assign(tmp, i); - free(tmp); + auto size = NormalisePath::normalize_path_inplace(reinterpret_cast(&ret[0]), ret.length(), 1, &changed); + ret.resize(size); return ret; } diff --git a/src/actions/transformations/parity_even_7bit.cc b/src/actions/transformations/parity_even_7bit.cc index 357fe7594c..219a2fa109 100644 --- a/src/actions/transformations/parity_even_7bit.cc +++ b/src/actions/transformations/parity_even_7bit.cc @@ -34,22 +34,9 @@ namespace transformations { std::string ParityEven7bit::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - std::memcpy(input, value.c_str(), value.length()+1); - - inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), value.length()); - free(input); + inplace(reinterpret_cast(&ret[0]), ret.size()); return ret; } diff --git a/src/actions/transformations/parity_odd_7bit.cc b/src/actions/transformations/parity_odd_7bit.cc index fbd6c8fcc2..0399f6a372 100644 --- a/src/actions/transformations/parity_odd_7bit.cc +++ b/src/actions/transformations/parity_odd_7bit.cc @@ -34,23 +34,8 @@ namespace transformations { std::string ParityOdd7bit::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; - - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), value.length()); - free(input); - + std::string ret = value; + inplace(reinterpret_cast(&ret[0]), ret.length()); return ret; } diff --git a/src/actions/transformations/parity_zero_7bit.cc b/src/actions/transformations/parity_zero_7bit.cc index 93a0f974e2..b4e4dcbad3 100644 --- a/src/actions/transformations/parity_zero_7bit.cc +++ b/src/actions/transformations/parity_zero_7bit.cc @@ -34,22 +34,9 @@ namespace transformations { std::string ParityZero7bit::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), value.length()); - free(input); + inplace(reinterpret_cast(&ret[0]), ret.length()); return ret; } diff --git a/src/actions/transformations/sql_hex_decode.cc b/src/actions/transformations/sql_hex_decode.cc index b33deac735..45225e9e3f 100644 --- a/src/actions/transformations/sql_hex_decode.cc +++ b/src/actions/transformations/sql_hex_decode.cc @@ -43,23 +43,10 @@ namespace transformations { std::string SqlHexDecode::evaluate(const std::string &value, Transaction *transaction) { - std::string ret; - unsigned char *input; - int size = 0; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - size = inplace(input, value.length()); - - ret.assign(reinterpret_cast(input), size); - free(input); + auto size = inplace(reinterpret_cast(&ret[0]), ret.size()); + ret.resize(size); return ret; } diff --git a/src/actions/transformations/url_decode.cc b/src/actions/transformations/url_decode.cc index c533b7e883..7b81c9b5b3 100644 --- a/src/actions/transformations/url_decode.cc +++ b/src/actions/transformations/url_decode.cc @@ -39,23 +39,16 @@ UrlDecode::UrlDecode(const std::string &action) std::string UrlDecode::evaluate(const std::string &value, Transaction *transaction) { - unsigned char *val = NULL; + std::string ret = value; int invalid_count = 0; int changed; - val = (unsigned char *) malloc(sizeof(char) * value.size() + 1); - memcpy(val, value.c_str(), value.size() + 1); - val[value.size()] = '\0'; - - int size = utils::urldecode_nonstrict_inplace(val, value.size(), + int size = utils::urldecode_nonstrict_inplace( + reinterpret_cast(&ret[0]), ret.size(), &invalid_count, &changed); - std::string out; - - out.append((const char *)val, size); - - free(val); + ret.resize(size); - return out; + return ret; } diff --git a/src/actions/transformations/url_decode_uni.cc b/src/actions/transformations/url_decode_uni.cc index 9533125012..6638713afa 100644 --- a/src/actions/transformations/url_decode_uni.cc +++ b/src/actions/transformations/url_decode_uni.cc @@ -40,22 +40,10 @@ namespace transformations { std::string UrlDecodeUni::evaluate(const std::string &value, Transaction *t) { - std::string ret; - unsigned char *input; + std::string ret = value; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - size_t i = inplace(input, value.length(), t); - - ret.assign(reinterpret_cast(input), i); - free(input); + size_t i = inplace(reinterpret_cast(&ret[0]), ret.length(), t); + ret.resize(i); return ret; } diff --git a/src/actions/transformations/utf8_to_unicode.cc b/src/actions/transformations/utf8_to_unicode.cc index 38d9a2b784..ff2ee0306d 100644 --- a/src/actions/transformations/utf8_to_unicode.cc +++ b/src/actions/transformations/utf8_to_unicode.cc @@ -36,21 +36,10 @@ namespace transformations { std::string Utf8ToUnicode::evaluate(const std::string &value, Transaction *transaction) { std::string ret; - unsigned char *input; int changed = 0; char *out; - input = reinterpret_cast - (malloc(sizeof(char) * value.length()+1)); - - if (input == NULL) { - return ""; - } - - memcpy(input, value.c_str(), value.length()+1); - - out = inplace(input, value.size() + 1, &changed); - free(input); + out = inplace(reinterpret_cast(&value[0]), value.size() + 1, &changed); if (out != NULL) { ret.assign(reinterpret_cast(out), strlen(reinterpret_cast(out))); @@ -61,7 +50,7 @@ std::string Utf8ToUnicode::evaluate(const std::string &value, } -char *Utf8ToUnicode::inplace(unsigned char *input, +char *Utf8ToUnicode::inplace(const unsigned char *input, uint64_t input_len, int *changed) { unsigned int count = 0; char *data; @@ -89,7 +78,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input, int unicode_len = 0; unsigned int d = 0; unsigned char c; - unsigned char *utf = (unsigned char *)&input[i]; + const unsigned char *utf = &input[i]; c = *utf; diff --git a/src/actions/transformations/utf8_to_unicode.h b/src/actions/transformations/utf8_to_unicode.h index c76bb0a285..271b8f65d7 100644 --- a/src/actions/transformations/utf8_to_unicode.h +++ b/src/actions/transformations/utf8_to_unicode.h @@ -40,7 +40,7 @@ class Utf8ToUnicode : public Transformation { std::string evaluate(const std::string &exp, Transaction *transaction) override; - static char *inplace(unsigned char *input, uint64_t input_len, + static char *inplace(const unsigned char *input, uint64_t input_len, int *changed); };