From d42203dde80502f1f952dc739624158b8a05a94a Mon Sep 17 00:00:00 2001 From: "endy.li" <25311962+heroicNeZha@users.noreply.github.com> Date: Tue, 7 Dec 2021 11:05:26 +0800 Subject: [PATCH] feat - schema support chinese (#3380) * feat - support chinese * fix - remove debug anno * support `chinese` * add test cases * fix encode bug Co-authored-by: cpw <13495049+CPWstatic@users.noreply.github.com> Co-authored-by: Yee <2520865+yixinglu@users.noreply.github.com> Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/parser/parser.yy | 3 +- src/parser/scanner.lex | 17 ++++++- tests/tck/features/schema/Schema.feature | 63 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 04f1bf1699d..0ea8ede2672 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -213,7 +213,7 @@ static constexpr size_t kCommentLengthLimit = 256; %token BOOL %token INTEGER %token DOUBLE -%token STRING VARIABLE LABEL IPV4 +%token STRING VARIABLE LABEL IPV4 CHINESE_LABEL %type name_label unreserved_keyword predicate_name %type expression @@ -406,6 +406,7 @@ static constexpr size_t kCommentLengthLimit = 256; name_label : LABEL { $$ = $1; } + | CHINESE_LABEL { $$ = $1; } | unreserved_keyword { $$ = $1; } ; diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index 007a1f93153..4fe773332d2 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -43,7 +43,11 @@ HEX ([0-9a-fA-F]) OCT ([0-7]) IP_OCTET ([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]) - +U [\x80-\xbf] +U2 [\xc2-\xdf] +U3 [\xe0-\xef] +U4 [\xf0-\xf4] +CHINESE_LABEL ({U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U})+ %% @@ -467,6 +471,17 @@ IP_OCTET ([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]) // Must match /* */ throw GraphParser::syntax_error(*yylloc, "unterminated comment"); } +\`{CHINESE_LABEL}\` { + yylval->strval = new std::string(yytext + 1, yyleng - 2); + if (yylval->strval->size() > MAX_STRING) { + auto error = "Out of range of the LABEL length, " + "the max length of LABEL is " + + std::to_string(MAX_STRING) + ":"; + delete yylval->strval; + throw GraphParser::syntax_error(*yylloc, error); + } + return TokenType::CHINESE_LABEL; + } . { /** * Any other unmatched byte sequences will get us here, diff --git a/tests/tck/features/schema/Schema.feature b/tests/tck/features/schema/Schema.feature index b203ea87b62..f30f79a6d27 100644 --- a/tests/tck/features/schema/Schema.feature +++ b/tests/tck/features/schema/Schema.feature @@ -769,6 +769,69 @@ Feature: Insert string vid of vertex and edge ALTER EDGE edge_not_null_default1 CHANGE (name FIXED_STRING(10) DEFAULT 10) """ Then a ExecutionError should be raised at runtime: Invalid param! + # chinese tag without quote mark + When executing query: + """ + CREATE TAG 队伍( 名字 string); + """ + Then a SyntaxError should be raised at runtime: + # chinese tag and chinese prop + When executing query: + """ + CREATE TAG `队伍`(`名字` string); + """ + Then the execution should be successful + # show chinese tags + When executing query: + """ + SHOW TAGS + """ + Then the result should contain: + | Name | + | "队伍" | + # alter chinese tag + When executing query: + """ + ALTER TAG `队伍` ADD (`类别` string); + """ + Then the execution should be successful + # desc chinese tag + When executing query: + """ + DESCRIBE TAG `队伍` + """ + Then the result should be, in any order: + | Field | Type | Null | Default | Comment | + | "名字" | "string" | "YES" | EMPTY | EMPTY | + | "类别" | "string" | "YES" | EMPTY | EMPTY | + # chinese edge and chinese prop + When executing query: + """ + CREATE EDGE `服役`(); + """ + Then the execution should be successful + # show chinese edge + When executing query: + """ + SHOW EDGES; + """ + Then the result should contain: + | Name | + | "服役" | + # alter chinese edge + When executing query: + """ + ALTER EDGE `服役` ADD (`时间` timestamp); + """ + Then the execution should be successful + # desc chinese edge + When executing query: + """ + DESCRIBE EDGE `服役` + """ + Then the result should be, in any order: + | Field | Type | Null | Default | Comment | + | "时间" | "timestamp" | "YES" | EMPTY | EMPTY | When executing query: """ DROP SPACE issue2009;