From d68ef79139fc21cc53b724b2f28f0edbfdf63bd0 Mon Sep 17 00:00:00 2001 From: Federico Di Pierro Date: Thu, 12 Sep 2024 14:14:29 +0200 Subject: [PATCH 1/2] fix(src,include,test): fixed multiple cases where a bad yaml was accepted. Signed-off-by: Federico Di Pierro --- include/yaml-cpp/exceptions.h | 2 + src/scanner.cpp | 24 +++++++++++ src/scanner.h | 1 + src/scantoken.cpp | 7 ++++ test/integration/load_node_test.cpp | 63 +++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+) diff --git a/include/yaml-cpp/exceptions.h b/include/yaml-cpp/exceptions.h index f6b2602ae..99e06b24e 100644 --- a/include/yaml-cpp/exceptions.h +++ b/include/yaml-cpp/exceptions.h @@ -48,6 +48,8 @@ const char* const UNKNOWN_TOKEN = "unknown token"; const char* const DOC_IN_SCALAR = "illegal document indicator in scalar"; const char* const EOF_IN_SCALAR = "illegal EOF in scalar"; const char* const CHAR_IN_SCALAR = "illegal character in scalar"; +const char* const UNEXPECTED_SCALAR = "unexpected scalar"; +const char* const UNEXPECTED_FLOW = "plain value cannot start with flow indicator character"; const char* const TAB_IN_INDENTATION = "illegal tab when looking for indentation"; const char* const FLOW_END = "illegal flow end"; diff --git a/src/scanner.cpp b/src/scanner.cpp index ea5511a11..d0eb03cc7 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -13,6 +13,7 @@ Scanner::Scanner(std::istream& in) m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), + m_scalarValueAllowed(false), m_canBeJSONFlow(false), m_simpleKeys{}, m_indents{}, @@ -127,6 +128,17 @@ void Scanner::ScanNextToken() { } if (INPUT.peek() == Keys::FlowEntry) { + // values starting with `,` are not allowed. + // eg: reject `,foo` + if (INPUT.column() == 0) { + throw ParserException(INPUT.mark(), ErrorMsg::UNEXPECTED_FLOW); + } + // if we already parsed a quoted scalar value and we are not in a flow, + // then `,` is not a valid character. + // eg: reject `"foo",` + if (!m_scalarValueAllowed) { + throw ParserException(INPUT.mark(), ErrorMsg::UNEXPECTED_SCALAR); + } return ScanFlowEntry(); } @@ -159,6 +171,13 @@ void Scanner::ScanNextToken() { return ScanBlockScalar(); } + // if we already parsed a quoted scalar value in this line, + // another scalar value is an error. + // eg: reject `"foo" "bar"` + if (!m_scalarValueAllowed) { + throw ParserException(INPUT.mark(), ErrorMsg::UNEXPECTED_SCALAR); + } + if (INPUT.peek() == '\'' || INPUT.peek() == '\"') { return ScanQuotedScalar(); } @@ -203,6 +222,9 @@ void Scanner::ScanToNextToken() { // oh yeah, and let's get rid of that simple key InvalidateSimpleKey(); + // new line - we accept a scalar value now + m_scalarValueAllowed = true; + // new line - we may be able to accept a simple key now if (InBlockContext()) { m_simpleKeyAllowed = true; @@ -245,6 +267,7 @@ const RegEx& Scanner::GetValueRegex() const { void Scanner::StartStream() { m_startedStream = true; m_simpleKeyAllowed = true; + m_scalarValueAllowed = true; std::unique_ptr pIndent( new IndentMarker(-1, IndentMarker::NONE)); m_indentRefs.push_back(std::move(pIndent)); @@ -261,6 +284,7 @@ void Scanner::EndStream() { PopAllSimpleKeys(); m_simpleKeyAllowed = false; + m_scalarValueAllowed = false; m_endedStream = true; } diff --git a/src/scanner.h b/src/scanner.h index 4af938e69..a3bfb662d 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -177,6 +177,7 @@ class Scanner { // state info bool m_startedStream, m_endedStream; bool m_simpleKeyAllowed; + bool m_scalarValueAllowed; bool m_canBeJSONFlow; std::stack m_simpleKeys; std::stack m_indents; diff --git a/src/scantoken.cpp b/src/scantoken.cpp index 1a94ab1d7..b13d21133 100644 --- a/src/scantoken.cpp +++ b/src/scantoken.cpp @@ -211,6 +211,9 @@ void Scanner::ScanValue() { m_simpleKeyAllowed = InBlockContext(); } + // we are parsing a `key: value` pair; scalars are always allowed + m_scalarValueAllowed = true; + // eat Mark mark = INPUT.mark(); INPUT.eat(1); @@ -360,6 +363,10 @@ void Scanner::ScanQuotedScalar() { // and scan scalar = ScanScalar(INPUT, params); m_simpleKeyAllowed = false; + // we just scanned a quoted scalar; + // we can only have another scalar in this line + // if we are in a flow, eg: `[ "foo", "bar" ]` is ok, but `"foo", "bar"` isn't. + m_scalarValueAllowed = InFlowContext(); m_canBeJSONFlow = true; Token token(Token::NON_PLAIN_SCALAR, mark); diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index 1cc84a45a..3f32d1aa1 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -334,6 +334,69 @@ TEST(NodeTest, LoadQuotedNull) { EXPECT_EQ(node.as(), "null"); } +TEST(NodeTest, LoadNonClosedQuotedString) { + EXPECT_THROW(Load(R"("foo)"), ParserException); +} + +TEST(NodeTest, LoadWrongQuotedString) { + EXPECT_THROW(Load(R"("foo" [)"), ParserException); + EXPECT_THROW(Load(R"("foo", [)"), ParserException); +} + +TEST(NodeTest, LoadUnquotedQuotedStrings) { + Node node = Load(R"(foo,"bar")"); + EXPECT_EQ(node.as(), "foo,\"bar\""); + + node = Load(R"(foo,bar)"); + EXPECT_EQ(node.as(), "foo,bar"); + + node = Load(R"(foo,)"); + EXPECT_EQ(node.as(), "foo,"); + + node = Load(R"(foo "bar")"); + EXPECT_EQ(node.as(), "foo \"bar\""); +} + +TEST(NodeTest, LoadCommaSeparatedStrings) { + EXPECT_THROW(Load(R"("foo","bar")"), ParserException); + EXPECT_THROW(Load(R"("foo",bar)"), ParserException); + EXPECT_THROW(Load(R"(,)"), ParserException); + EXPECT_THROW(Load(R"("foo",)"), ParserException); + EXPECT_THROW(Load(R"("foo","")"), ParserException); + EXPECT_THROW(Load(R"("foo",)"), ParserException); + EXPECT_THROW(Load(R"(,"foo")"), ParserException); + EXPECT_THROW(Load(R"(,foo)"), ParserException); + + const char *doc_ok = R"( +top +, aa +)"; + EXPECT_NO_THROW(Load(doc_ok)); + + const char *doc_ok_2 = R"( +top +, "aa" +)"; + EXPECT_NO_THROW(Load(doc_ok_2)); + + const char *doc_fail = R"( +"top" +, "aa" +)"; + EXPECT_THROW(Load(doc_fail), ParserException); + + const char *doc_fail_2 = R"( +"top" +, aa +)"; + EXPECT_THROW(Load(doc_fail_2), ParserException); +} + +TEST(NodeTest, LoadSameLineStrings) { + EXPECT_THROW(Load(R"("foo" "bar")"), ParserException); + EXPECT_THROW(Load(R"("foo" bar)"), ParserException); +} + TEST(NodeTest, LoadTagWithParenthesis) { Node node = Load("!Complex(Tag) foo"); EXPECT_EQ(node.Tag(), "!Complex(Tag)"); From 9aac21aa0dd5b9e22062e74d5aa5a66675c82492 Mon Sep 17 00:00:00 2001 From: Federico Di Pierro Date: Fri, 13 Sep 2024 08:50:53 +0200 Subject: [PATCH 2/2] chore(test/integration): refactor some test cases to their own test. Plus, check that the content is what we actually expect. Signed-off-by: Federico Di Pierro --- test/integration/load_node_test.cpp | 47 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index 3f32d1aa1..e9a133b74 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -366,30 +366,31 @@ TEST(NodeTest, LoadCommaSeparatedStrings) { EXPECT_THROW(Load(R"("foo",)"), ParserException); EXPECT_THROW(Load(R"(,"foo")"), ParserException); EXPECT_THROW(Load(R"(,foo)"), ParserException); +} - const char *doc_ok = R"( -top -, aa -)"; - EXPECT_NO_THROW(Load(doc_ok)); - - const char *doc_ok_2 = R"( -top -, "aa" -)"; - EXPECT_NO_THROW(Load(doc_ok_2)); - - const char *doc_fail = R"( -"top" -, "aa" -)"; - EXPECT_THROW(Load(doc_fail), ParserException); - - const char *doc_fail_2 = R"( -"top" -, aa -)"; - EXPECT_THROW(Load(doc_fail_2), ParserException); +struct NewLineStringsTestCase { + std::string input; + std::string expected_content; + bool should_throw; +}; +TEST(NodeTest, LoadNewLineStrings) { + std::vector tests = { + {"foo\n, bar", "foo , bar", false}, + {"foo\n, \"bar\"", "foo , \"bar\"", false}, + {"\"foo\"\n, \"bar\"", "", true}, + {"\"foo\"\n, bar", "", true}, + }; + for (const NewLineStringsTestCase& test : tests) { + if (test.should_throw) { + EXPECT_THROW(Load(test.input), ParserException); + } else { + Node node = Load(test.input); + Emitter emitter; + emitter << node; + EXPECT_EQ(NodeType::Scalar, node.Type()); + EXPECT_EQ(test.expected_content, std::string(emitter.c_str())); + } + } } TEST(NodeTest, LoadSameLineStrings) {