From 3b0bfcd84a19ca81fbc84b3567982a908b726756 Mon Sep 17 00:00:00 2001
From: Leif Hedstrom <zwoop@apache.org>
Date: Sun, 5 May 2019 13:14:50 -0600
Subject: [PATCH] This fixes parsing where the [ ] section gets merged into
 values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For example:

    cond %{SEND_RESPONSE_HDR_HOOK}
     set-header X-First "First"
     set-header X-Last "Last" [L]

would set the header X-Last: Last [L].

In addition, it also fixes a typo (I think?) in an unrelated test,
however, it's unclear as to why it's not failing that test.

    CHECK_EQ(p.get_tokens()[1], "%{METHOD}c”);
---
 plugins/header_rewrite/header_rewrite.cc      |  6 +-
 plugins/header_rewrite/header_rewrite_test.cc | 24 +++++-
 plugins/header_rewrite/parser.cc              | 79 +++++++++----------
 plugins/header_rewrite/parser.h               | 23 ++++--
 plugins/header_rewrite/ruleset.cc             |  3 +-
 plugins/header_rewrite/value.cc               |  8 +-
 6 files changed, 85 insertions(+), 58 deletions(-)

diff --git a/plugins/header_rewrite/header_rewrite.cc b/plugins/header_rewrite/header_rewrite.cc
index a385d913b3a..539d3972b18 100644
--- a/plugins/header_rewrite/header_rewrite.cc
+++ b/plugins/header_rewrite/header_rewrite.cc
@@ -184,8 +184,10 @@ RulesConfig::parse_config(const std::string &fname, TSHttpHookID default_hook)
       continue;
     }
 
-    Parser p(line); // Tokenize and parse this line
-    if (p.empty()) {
+    Parser p;
+
+    // Tokenize and parse this line
+    if (!p.parse_line(line) || p.empty()) {
       continue;
     }
 
diff --git a/plugins/header_rewrite/header_rewrite_test.cc b/plugins/header_rewrite/header_rewrite_test.cc
index 437b76c849d..423705921d5 100644
--- a/plugins/header_rewrite/header_rewrite_test.cc
+++ b/plugins/header_rewrite/header_rewrite_test.cc
@@ -44,7 +44,12 @@ TSError(const char *fmt, ...)
 class ParserTest : public Parser
 {
 public:
-  ParserTest(const std::string &line) : Parser(line), res(true) { std::cout << "Finished parser test: " << line << std::endl; }
+  ParserTest(const std::string &line) : res(true)
+  {
+    Parser::parse_line(line);
+    std::cout << "Finished parser test: " << line << std::endl;
+  }
+
   std::vector<std::string>
   getTokens() const
   {
@@ -353,12 +358,13 @@ test_parsing()
   }
 
   {
-    ParserTest p(R"(set-header Alt-Svc "quic=\":443\"; v=\"35\"")");
+    ParserTest p(R"(set-header Alt-Svc "quic=\":443\"; v=\"35\"" [L])");
 
-    CHECK_EQ(p.getTokens().size(), 3UL);
+    CHECK_EQ(p.getTokens().size(), 4UL);
     CHECK_EQ(p.getTokens()[0], "set-header");
     CHECK_EQ(p.getTokens()[1], "Alt-Svc");
     CHECK_EQ(p.getTokens()[2], R"(quic=":443"; v="35")");
+    CHECK_EQ(p.get_value(), R"(quic=":443"; v="35")");
 
     END_TEST();
   }
@@ -461,12 +467,16 @@ test_tokenizer()
     SimpleTokenizerTest p("a simple test");
     CHECK_EQ(p.get_tokens().size(), 1UL);
     CHECK_EQ(p.get_tokens()[0], "a simple test");
+
+    END_TEST();
   }
 
   {
     SimpleTokenizerTest p(R"(quic=":443"; v="35")");
     CHECK_EQ(p.get_tokens().size(), 1UL);
     CHECK_EQ(p.get_tokens()[0], R"(quic=":443"; v="35")");
+
+    END_TEST();
   }
 
   {
@@ -474,15 +484,19 @@ test_tokenizer()
     CHECK_EQ(p.get_tokens().size(), 2UL);
     CHECK_EQ(p.get_tokens()[0], "let's party like it's  ");
     CHECK_EQ(p.get_tokens()[1], "%{NOW:YEAR}");
+
+    END_TEST();
   }
   {
     SimpleTokenizerTest p("A racoon's favorite tag is %{METHOD} in %{NOW:YEAR}!");
     CHECK_EQ(p.get_tokens().size(), 5UL);
     CHECK_EQ(p.get_tokens()[0], "A racoon's favorite tag is ");
-    CHECK_EQ(p.get_tokens()[1], "%{METHOD}c");
+    CHECK_EQ(p.get_tokens()[1], "%{METHOD}");
     CHECK_EQ(p.get_tokens()[2], " in ");
     CHECK_EQ(p.get_tokens()[3], "%{NOW:YEAR}");
     CHECK_EQ(p.get_tokens()[4], "!");
+
+    END_TEST();
   }
 
   {
@@ -492,6 +506,8 @@ test_tokenizer()
     CHECK_EQ(p.get_tokens()[1], "%{IP:SERVER}");
     CHECK_EQ(p.get_tokens()[2], ":");
     CHECK_EQ(p.get_tokens()[3], "%{INBOUND:LOCAL-PORT}");
+
+    END_TEST();
   }
 
   return errors;
diff --git a/plugins/header_rewrite/parser.cc b/plugins/header_rewrite/parser.cc
index cc7a70c52b7..7e5e40e6fd1 100644
--- a/plugins/header_rewrite/parser.cc
+++ b/plugins/header_rewrite/parser.cc
@@ -30,7 +30,8 @@
 
 enum ParserState { PARSER_DEFAULT, PARSER_IN_QUOTE, PARSER_IN_REGEX, PARSER_IN_EXPANSION };
 
-Parser::Parser(const std::string &original_line) : _cond(false), _empty(false)
+bool
+Parser::parse_line(const std::string &original_line)
 {
   std::string line        = original_line;
   ParserState state       = PARSER_DEFAULT;
@@ -85,7 +86,7 @@ Parser::Parser(const std::string &original_line) : _cond(false), _empty(false)
         TSError("[%s] malformed line \"%s\", ignoring", PLUGIN_NAME, line.c_str());
         _tokens.clear();
         _empty = true;
-        return;
+        return false;
       }
     } else if (!extracting_token) {
       if (_tokens.empty() && line[i] == '#') {
@@ -114,21 +115,49 @@ Parser::Parser(const std::string &original_line) : _cond(false), _empty(false)
       TSError("[%s] malformed line, unterminated quotation: \"%s\", ignoring", PLUGIN_NAME, line.c_str());
       _tokens.clear();
       _empty = true;
-      return;
+      return false;
     }
   }
 
   if (_tokens.empty()) {
     _empty = true;
   } else {
-    preprocess(_tokens);
+    return preprocess(_tokens);
   }
+
+  return true;
 }
 
-// This is the core "parser", parsing rule sets
-void
+// This is the main "parser", a helper function to the above tokenizer. NOTE: this modifies (possibly) the tokens list,
+// therefore, we pass in a copy of the parsers tokens here, such that the original token list is retained (useful for tests etc.).
+bool
 Parser::preprocess(std::vector<std::string> tokens)
 {
+  // The last token might be the "flags" section, lets consume it if it is
+  if (tokens.size() > 0) {
+    std::string m = tokens[tokens.size() - 1];
+
+    if (!m.empty() && (m[0] == '[')) {
+      if (m[m.size() - 1] == ']') {
+        m = m.substr(1, m.size() - 2);
+        if (m.find_first_of(',') != std::string::npos) {
+          std::istringstream iss(m);
+          std::string t;
+          while (getline(iss, t, ',')) {
+            _mods.push_back(t);
+          }
+        } else {
+          _mods.push_back(m);
+        }
+        tokens.pop_back(); // consume it, so we don't concatenate it into the value
+      } else {
+        // Syntax error
+        TSError("[%s] mods have to be enclosed in []", PLUGIN_NAME);
+        return false;
+      }
+    }
+  }
+
   // Special case for "conditional" values
   if (tokens[0].substr(0, 2) == "%{") {
     _cond = true;
@@ -155,7 +184,7 @@ Parser::preprocess(std::vector<std::string> tokens)
       }
     } else {
       TSError("[%s] conditions must be embraced in %%{}", PLUGIN_NAME);
-      return;
+      return false;
     }
   } else {
     // Operator has no qualifiers, but could take an optional second argument
@@ -179,29 +208,7 @@ Parser::preprocess(std::vector<std::string> tokens)
     }
   }
 
-  // The last token might be the "flags" section
-  if (tokens.size() > 0) {
-    std::string m = tokens[tokens.size() - 1];
-
-    if (!m.empty() && (m[0] == '[')) {
-      if (m[m.size() - 1] == ']') {
-        m = m.substr(1, m.size() - 2);
-        if (m.find_first_of(',') != std::string::npos) {
-          std::istringstream iss(m);
-          std::string t;
-          while (getline(iss, t, ',')) {
-            _mods.push_back(t);
-          }
-        } else {
-          _mods.push_back(m);
-        }
-      } else {
-        // Syntax error
-        TSError("[%s] mods have to be enclosed in []", PLUGIN_NAME);
-        return;
-      }
-    }
-  }
+  return true;
 }
 
 // Check if the operator is a condition, a hook, and if so, which hook. If the cond is not a hook
@@ -241,12 +248,6 @@ Parser::cond_is_hook(TSHttpHookID &hook) const
   return false;
 }
 
-const std::vector<std::string> &
-Parser::get_tokens() const
-{
-  return _tokens;
-}
-
 SimpleTokenizer::SimpleTokenizer(const std::string &original_line)
 {
   std::string line        = original_line;
@@ -294,9 +295,3 @@ SimpleTokenizer::SimpleTokenizer(const std::string &original_line)
     _tokens.push_back(line.substr(cur_token_start));
   }
 }
-
-const std::vector<std::string> &
-SimpleTokenizer::get_tokens() const
-{
-  return _tokens;
-}
diff --git a/plugins/header_rewrite/parser.h b/plugins/header_rewrite/parser.h
index 385c824b833..691a817b72b 100644
--- a/plugins/header_rewrite/parser.h
+++ b/plugins/header_rewrite/parser.h
@@ -33,7 +33,7 @@
 class Parser
 {
 public:
-  explicit Parser(const std::string &line);
+  Parser(){};
 
   // noncopyable
   Parser(const Parser &) = delete;
@@ -76,13 +76,20 @@ class Parser
   }
 
   bool cond_is_hook(TSHttpHookID &hook) const;
-  const std::vector<std::string> &get_tokens() const;
+
+  const std::vector<std::string> &
+  get_tokens() const
+  {
+    return _tokens;
+  }
+
+  bool parse_line(const std::string &original_line);
 
 private:
-  void preprocess(std::vector<std::string> tokens);
+  bool preprocess(std::vector<std::string> tokens);
 
-  bool _cond;
-  bool _empty;
+  bool _cond  = false;
+  bool _empty = false;
   std::vector<std::string> _mods;
   std::string _op;
   std::string _arg;
@@ -101,7 +108,11 @@ class SimpleTokenizer
   SimpleTokenizer(const SimpleTokenizer &) = delete;
   void operator=(const SimpleTokenizer &) = delete;
 
-  const std::vector<std::string> &get_tokens() const;
+  const std::vector<std::string> &
+  get_tokens() const
+  {
+    return _tokens;
+  }
 
 protected:
   std::vector<std::string> _tokens;
diff --git a/plugins/header_rewrite/ruleset.cc b/plugins/header_rewrite/ruleset.cc
index f6d0140f7fd..b2b7d8c1e71 100644
--- a/plugins/header_rewrite/ruleset.cc
+++ b/plugins/header_rewrite/ruleset.cc
@@ -76,8 +76,7 @@ RuleSet::add_operator(Parser &p, const char *filename, int lineno)
   Operator *o = operator_factory(p.get_op());
 
   if (nullptr != o) {
-    // TODO: This should be extended to show both the "argument" and the "value" (if both are used)
-    TSDebug(PLUGIN_NAME, "   Adding operator: %s(%s)", p.get_op().c_str(), p.get_arg().c_str());
+    TSDebug(PLUGIN_NAME, "   Adding operator: %s(%s)=\"%s\"", p.get_op().c_str(), p.get_arg().c_str(), p.get_value().c_str());
     o->initialize(p);
     if (!o->set_hook(_hook)) {
       delete o;
diff --git a/plugins/header_rewrite/value.cc b/plugins/header_rewrite/value.cc
index 5059a009755..fc4dcc0e09b 100644
--- a/plugins/header_rewrite/value.cc
+++ b/plugins/header_rewrite/value.cc
@@ -54,9 +54,13 @@ Value::set_value(const std::string &val)
         std::string cond_token = token.substr(2, token.size() - 3);
 
         if ((tcond_val = condition_factory(cond_token))) {
-          Parser parser(_value);
+          Parser parser;
 
-          tcond_val->initialize(parser);
+          if (parser.parse_line(_value)) {
+            tcond_val->initialize(parser);
+          } else {
+            // TODO: should we produce error here?
+          }
         }
       } else {
         tcond_val = new ConditionStringLiteral(token);