Skip to content

Commit

Permalink
Fix extend whtie space char. (#5213)
Browse files Browse the repository at this point in the history
* Fix extend whtie space char.

* Format.

Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com>
  • Loading branch information
Shylock-Hg and Sophie-Xie authored Jan 6, 2023
1 parent 3ac0109 commit 122c2de
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 6 deletions.
13 changes: 8 additions & 5 deletions src/parser/scanner.lex
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ static constexpr size_t MAX_STRING = 4096;
%x LB_STR
%x COMMENT

blank_without_newline ([ \t\r\xa0])
nbsp (\xc2\xa0)
blank_without_newline ([ \t\r]|{nbsp})
blank ({blank_without_newline}|[\n])

blanks ({blank}+)
Expand All @@ -57,17 +58,19 @@ HEX ([0-9a-fA-F])
OCT ([0-7])
IP_OCTET ([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])

U [\x80-\xbf]
U2 [\xc2-\xdf]
U [\x80-\x9f\xa1-\xbf]
UA0 \xa0
U2 [\xc3-\xdf]
UC2 \xc2
U3 [\xe0-\xee]
U4 [\xf0-\xf4]
CHINESE {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
CHINESE {U2}{UA0}|{UC2}{U}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
CN_EN {CHINESE}|[a-zA-Z]
CN_EN_NUM {CHINESE}|[_a-zA-Z0-9]
LABEL {CN_EN}{CN_EN_NUM}*

U3_FULL_WIDTH [\xe0-\xef]
CHINESE_FULL_WIDTH {U2}{U}|{U3_FULL_WIDTH}{U}{U}|{U4}{U}{U}{U}
CHINESE_FULL_WIDTH {U2}{UA0}|{UC2}{U}|{U2}{U}|{U3_FULL_WIDTH}{U}{U}|{U4}{U}{U}{U}
CN_EN_FULL_WIDTH {CHINESE_FULL_WIDTH}|[a-zA-Z]
CN_EN_NUM_FULL_WIDTH {CHINESE_FULL_WIDTH}|[_a-zA-Z0-9 ]
LABEL_FULL_WIDTH {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}*
Expand Down
14 changes: 14 additions & 0 deletions src/parser/test/ParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3350,4 +3350,18 @@ TEST_F(ParserTest, TestShowSentenceWithPipe) {
ASSERT_TRUE(result.ok()) << result.status();
}
}

TEST_F(ParserTest, TestSpecialWhiteSpaceChar) {
{
std::string query = "SHOW\xC2\xA0SPACES";
auto result = parse(query);
ASSERT_TRUE(result.ok()) << result.status();
}
{
std::string query = "SHOW \xC2\xA0SPACES\xC2\xA0";
auto result = parse(query);
ASSERT_TRUE(result.ok()) << result.status();
}
}

} // namespace nebula
2 changes: 1 addition & 1 deletion src/parser/test/ScannerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ TEST(Scanner, Basic) {
CHECK_SEMANTIC_VALUE("label", TokenType::LABEL, "label"),
CHECK_SEMANTIC_VALUE("label123", TokenType::LABEL, "label123"),
// \xA0 is white space in UTF-8 too
CHECK_SEMANTIC_VALUE("\xA0"
CHECK_SEMANTIC_VALUE("\xC2\xA0"
"abc",
TokenType::LABEL,
"abc"),
Expand Down
16 changes: 16 additions & 0 deletions tests/tck/features/basic/Parser.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) 2022 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
Feature: Parser

Scenario: Test special white space character
When executing query:
"""
SHOW  SPACES
"""
Then the execution should be successful
When executing query:
"""
RETURN  1
"""
Then the execution should be successful

0 comments on commit 122c2de

Please sign in to comment.