From 3b0bfcd84a19ca81fbc84b3567982a908b726756 Mon Sep 17 00:00:00 2001 From: Leif Hedstrom Date: Sun, 5 May 2019 13:14:50 -0600 Subject: [PATCH] This fixes parsing where the [ ] section gets merged into values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example: cond %{SEND_RESPONSE_HDR_HOOK} set-header X-First "First" set-header X-Last "Last" [L] would set the header X-Last: Last [L]. In addition, it also fixes a typo (I think?) in an unrelated test, however, it's unclear as to why it's not failing that test. CHECK_EQ(p.get_tokens()[1], "%{METHOD}c”); --- plugins/header_rewrite/header_rewrite.cc | 6 +- plugins/header_rewrite/header_rewrite_test.cc | 24 +++++- plugins/header_rewrite/parser.cc | 79 +++++++++---------- plugins/header_rewrite/parser.h | 23 ++++-- plugins/header_rewrite/ruleset.cc | 3 +- plugins/header_rewrite/value.cc | 8 +- 6 files changed, 85 insertions(+), 58 deletions(-) diff --git a/plugins/header_rewrite/header_rewrite.cc b/plugins/header_rewrite/header_rewrite.cc index a385d913b3a..539d3972b18 100644 --- a/plugins/header_rewrite/header_rewrite.cc +++ b/plugins/header_rewrite/header_rewrite.cc @@ -184,8 +184,10 @@ RulesConfig::parse_config(const std::string &fname, TSHttpHookID default_hook) continue; } - Parser p(line); // Tokenize and parse this line - if (p.empty()) { + Parser p; + + // Tokenize and parse this line + if (!p.parse_line(line) || p.empty()) { continue; } diff --git a/plugins/header_rewrite/header_rewrite_test.cc b/plugins/header_rewrite/header_rewrite_test.cc index 437b76c849d..423705921d5 100644 --- a/plugins/header_rewrite/header_rewrite_test.cc +++ b/plugins/header_rewrite/header_rewrite_test.cc @@ -44,7 +44,12 @@ TSError(const char *fmt, ...) class ParserTest : public Parser { public: - ParserTest(const std::string &line) : Parser(line), res(true) { std::cout << "Finished parser test: " << line << std::endl; } + ParserTest(const std::string &line) : res(true) + { + Parser::parse_line(line); + std::cout << "Finished parser test: " << line << std::endl; + } + std::vector getTokens() const { @@ -353,12 +358,13 @@ test_parsing() } { - ParserTest p(R"(set-header Alt-Svc "quic=\":443\"; v=\"35\"")"); + ParserTest p(R"(set-header Alt-Svc "quic=\":443\"; v=\"35\"" [L])"); - CHECK_EQ(p.getTokens().size(), 3UL); + CHECK_EQ(p.getTokens().size(), 4UL); CHECK_EQ(p.getTokens()[0], "set-header"); CHECK_EQ(p.getTokens()[1], "Alt-Svc"); CHECK_EQ(p.getTokens()[2], R"(quic=":443"; v="35")"); + CHECK_EQ(p.get_value(), R"(quic=":443"; v="35")"); END_TEST(); } @@ -461,12 +467,16 @@ test_tokenizer() SimpleTokenizerTest p("a simple test"); CHECK_EQ(p.get_tokens().size(), 1UL); CHECK_EQ(p.get_tokens()[0], "a simple test"); + + END_TEST(); } { SimpleTokenizerTest p(R"(quic=":443"; v="35")"); CHECK_EQ(p.get_tokens().size(), 1UL); CHECK_EQ(p.get_tokens()[0], R"(quic=":443"; v="35")"); + + END_TEST(); } { @@ -474,15 +484,19 @@ test_tokenizer() CHECK_EQ(p.get_tokens().size(), 2UL); CHECK_EQ(p.get_tokens()[0], "let's party like it's "); CHECK_EQ(p.get_tokens()[1], "%{NOW:YEAR}"); + + END_TEST(); } { SimpleTokenizerTest p("A racoon's favorite tag is %{METHOD} in %{NOW:YEAR}!"); CHECK_EQ(p.get_tokens().size(), 5UL); CHECK_EQ(p.get_tokens()[0], "A racoon's favorite tag is "); - CHECK_EQ(p.get_tokens()[1], "%{METHOD}c"); + CHECK_EQ(p.get_tokens()[1], "%{METHOD}"); CHECK_EQ(p.get_tokens()[2], " in "); CHECK_EQ(p.get_tokens()[3], "%{NOW:YEAR}"); CHECK_EQ(p.get_tokens()[4], "!"); + + END_TEST(); } { @@ -492,6 +506,8 @@ test_tokenizer() CHECK_EQ(p.get_tokens()[1], "%{IP:SERVER}"); CHECK_EQ(p.get_tokens()[2], ":"); CHECK_EQ(p.get_tokens()[3], "%{INBOUND:LOCAL-PORT}"); + + END_TEST(); } return errors; diff --git a/plugins/header_rewrite/parser.cc b/plugins/header_rewrite/parser.cc index cc7a70c52b7..7e5e40e6fd1 100644 --- a/plugins/header_rewrite/parser.cc +++ b/plugins/header_rewrite/parser.cc @@ -30,7 +30,8 @@ enum ParserState { PARSER_DEFAULT, PARSER_IN_QUOTE, PARSER_IN_REGEX, PARSER_IN_EXPANSION }; -Parser::Parser(const std::string &original_line) : _cond(false), _empty(false) +bool +Parser::parse_line(const std::string &original_line) { std::string line = original_line; ParserState state = PARSER_DEFAULT; @@ -85,7 +86,7 @@ Parser::Parser(const std::string &original_line) : _cond(false), _empty(false) TSError("[%s] malformed line \"%s\", ignoring", PLUGIN_NAME, line.c_str()); _tokens.clear(); _empty = true; - return; + return false; } } else if (!extracting_token) { if (_tokens.empty() && line[i] == '#') { @@ -114,21 +115,49 @@ Parser::Parser(const std::string &original_line) : _cond(false), _empty(false) TSError("[%s] malformed line, unterminated quotation: \"%s\", ignoring", PLUGIN_NAME, line.c_str()); _tokens.clear(); _empty = true; - return; + return false; } } if (_tokens.empty()) { _empty = true; } else { - preprocess(_tokens); + return preprocess(_tokens); } + + return true; } -// This is the core "parser", parsing rule sets -void +// This is the main "parser", a helper function to the above tokenizer. NOTE: this modifies (possibly) the tokens list, +// therefore, we pass in a copy of the parsers tokens here, such that the original token list is retained (useful for tests etc.). +bool Parser::preprocess(std::vector tokens) { + // The last token might be the "flags" section, lets consume it if it is + if (tokens.size() > 0) { + std::string m = tokens[tokens.size() - 1]; + + if (!m.empty() && (m[0] == '[')) { + if (m[m.size() - 1] == ']') { + m = m.substr(1, m.size() - 2); + if (m.find_first_of(',') != std::string::npos) { + std::istringstream iss(m); + std::string t; + while (getline(iss, t, ',')) { + _mods.push_back(t); + } + } else { + _mods.push_back(m); + } + tokens.pop_back(); // consume it, so we don't concatenate it into the value + } else { + // Syntax error + TSError("[%s] mods have to be enclosed in []", PLUGIN_NAME); + return false; + } + } + } + // Special case for "conditional" values if (tokens[0].substr(0, 2) == "%{") { _cond = true; @@ -155,7 +184,7 @@ Parser::preprocess(std::vector tokens) } } else { TSError("[%s] conditions must be embraced in %%{}", PLUGIN_NAME); - return; + return false; } } else { // Operator has no qualifiers, but could take an optional second argument @@ -179,29 +208,7 @@ Parser::preprocess(std::vector tokens) } } - // The last token might be the "flags" section - if (tokens.size() > 0) { - std::string m = tokens[tokens.size() - 1]; - - if (!m.empty() && (m[0] == '[')) { - if (m[m.size() - 1] == ']') { - m = m.substr(1, m.size() - 2); - if (m.find_first_of(',') != std::string::npos) { - std::istringstream iss(m); - std::string t; - while (getline(iss, t, ',')) { - _mods.push_back(t); - } - } else { - _mods.push_back(m); - } - } else { - // Syntax error - TSError("[%s] mods have to be enclosed in []", PLUGIN_NAME); - return; - } - } - } + return true; } // Check if the operator is a condition, a hook, and if so, which hook. If the cond is not a hook @@ -241,12 +248,6 @@ Parser::cond_is_hook(TSHttpHookID &hook) const return false; } -const std::vector & -Parser::get_tokens() const -{ - return _tokens; -} - SimpleTokenizer::SimpleTokenizer(const std::string &original_line) { std::string line = original_line; @@ -294,9 +295,3 @@ SimpleTokenizer::SimpleTokenizer(const std::string &original_line) _tokens.push_back(line.substr(cur_token_start)); } } - -const std::vector & -SimpleTokenizer::get_tokens() const -{ - return _tokens; -} diff --git a/plugins/header_rewrite/parser.h b/plugins/header_rewrite/parser.h index 385c824b833..691a817b72b 100644 --- a/plugins/header_rewrite/parser.h +++ b/plugins/header_rewrite/parser.h @@ -33,7 +33,7 @@ class Parser { public: - explicit Parser(const std::string &line); + Parser(){}; // noncopyable Parser(const Parser &) = delete; @@ -76,13 +76,20 @@ class Parser } bool cond_is_hook(TSHttpHookID &hook) const; - const std::vector &get_tokens() const; + + const std::vector & + get_tokens() const + { + return _tokens; + } + + bool parse_line(const std::string &original_line); private: - void preprocess(std::vector tokens); + bool preprocess(std::vector tokens); - bool _cond; - bool _empty; + bool _cond = false; + bool _empty = false; std::vector _mods; std::string _op; std::string _arg; @@ -101,7 +108,11 @@ class SimpleTokenizer SimpleTokenizer(const SimpleTokenizer &) = delete; void operator=(const SimpleTokenizer &) = delete; - const std::vector &get_tokens() const; + const std::vector & + get_tokens() const + { + return _tokens; + } protected: std::vector _tokens; diff --git a/plugins/header_rewrite/ruleset.cc b/plugins/header_rewrite/ruleset.cc index f6d0140f7fd..b2b7d8c1e71 100644 --- a/plugins/header_rewrite/ruleset.cc +++ b/plugins/header_rewrite/ruleset.cc @@ -76,8 +76,7 @@ RuleSet::add_operator(Parser &p, const char *filename, int lineno) Operator *o = operator_factory(p.get_op()); if (nullptr != o) { - // TODO: This should be extended to show both the "argument" and the "value" (if both are used) - TSDebug(PLUGIN_NAME, " Adding operator: %s(%s)", p.get_op().c_str(), p.get_arg().c_str()); + TSDebug(PLUGIN_NAME, " Adding operator: %s(%s)=\"%s\"", p.get_op().c_str(), p.get_arg().c_str(), p.get_value().c_str()); o->initialize(p); if (!o->set_hook(_hook)) { delete o; diff --git a/plugins/header_rewrite/value.cc b/plugins/header_rewrite/value.cc index 5059a009755..fc4dcc0e09b 100644 --- a/plugins/header_rewrite/value.cc +++ b/plugins/header_rewrite/value.cc @@ -54,9 +54,13 @@ Value::set_value(const std::string &val) std::string cond_token = token.substr(2, token.size() - 3); if ((tcond_val = condition_factory(cond_token))) { - Parser parser(_value); + Parser parser; - tcond_val->initialize(parser); + if (parser.parse_line(_value)) { + tcond_val->initialize(parser); + } else { + // TODO: should we produce error here? + } } } else { tcond_val = new ConditionStringLiteral(token);