From 562cbb971e4adaaa038a705d6ff614d9347fcee6 Mon Sep 17 00:00:00 2001 From: peefy Date: Wed, 12 Jun 2024 16:15:35 +0800 Subject: [PATCH] fix: wrong long str interpolation location with special chars Signed-off-by: peefy --- kclvm/parser/src/parser/stmt.rs | 53 +++++++----- kclvm/parser/src/tests/error_recovery.rs | 6 ++ ...r_recovery__joined_string_recovery_10.snap | 83 +++++++++++++++++++ ...or_recovery__joined_string_recovery_7.snap | 14 ++-- ...or_recovery__joined_string_recovery_8.snap | 18 ++-- ...or_recovery__joined_string_recovery_9.snap | 83 +++++++++++++++++++ 6 files changed, 219 insertions(+), 38 deletions(-) create mode 100644 kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_10.snap create mode 100644 kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_9.snap diff --git a/kclvm/parser/src/parser/stmt.rs b/kclvm/parser/src/parser/stmt.rs index 170b28ea3..a543de9c1 100644 --- a/kclvm/parser/src/parser/stmt.rs +++ b/kclvm/parser/src/parser/stmt.rs @@ -1548,11 +1548,7 @@ impl<'a> Parser<'a> { return None; } - let start_pos = if s.is_long_string { - pos + new_byte_pos(3) - } else { - pos + new_byte_pos(1) - }; + let quote_space = if s.is_long_string { 3 } else { 1 }; let mut joined_value = JoinedString { is_long_string: s.is_long_string, @@ -1632,16 +1628,33 @@ impl<'a> Parser<'a> { ) } + // Here we use double pointers of data and raw_data, + // where data is used to obtain string literal data + // and raw_data is used to obtain string interpolation + // data to ensure that their respective positional + // information is correct. + let data = s.value.as_str(); - let mut off: usize = 0; + let raw_data = s.raw_value.as_str(); + let raw_data = &s.raw_value.as_str()[..raw_data.len() - quote_space]; + let mut data_off = 0; + let mut raw_off: usize = quote_space; loop { - if let Some(i) = data[off..].find("${") { - if let Some(j) = data[off + i..].find('}') { - let lo: usize = off + i; - let hi: usize = off + i + j + 1; + if let (Some(i), Some(data_i)) = + (raw_data[raw_off..].find("${"), data[data_off..].find("${")) + { + if let (Some(j), Some(data_j)) = ( + raw_data[raw_off + i..].find('}'), + data[data_off + i..].find('}'), + ) { + let lo: usize = raw_off + i; + let hi: usize = raw_off + i + j + 1; + + let data_lo: usize = data_off + data_i; + let data_hi: usize = data_off + data_i + data_j + 1; - let s0 = &data[off..lo]; - let s1 = &data[lo..hi]; + let s0 = &data[data_off..data_lo]; + let s1 = &raw_data[lo..hi]; let s0_expr = node_ref!(Expr::StringLit(StringLit { is_long_string: false, @@ -1649,14 +1662,15 @@ impl<'a> Parser<'a> { value: s0.to_string().replace("$$", "$"), })); - let s1_expr = parse_expr(self, s1, start_pos + new_byte_pos(lo as u32)); + let s1_expr = parse_expr(self, s1, pos + new_byte_pos(lo as u32)); if !s0.is_empty() { joined_value.values.push(s0_expr); } joined_value.values.push(s1_expr); - off = hi; + data_off = data_hi; + raw_off = hi; continue; } else { self.sess.struct_message_error( @@ -1667,23 +1681,22 @@ impl<'a> Parser<'a> { .values .push(node_ref!(Expr::StringLit(StringLit { is_long_string: false, - raw_value: data[off..].to_string(), - value: data[off..].to_string(), + raw_value: data[data_off..].to_string(), + value: data[data_off..].to_string(), }))); break; } } else { - if off >= s.value.as_str().len() { + if raw_off >= raw_data.len() || data_off >= data.len() { break; } - // todo: fix pos joined_value .values .push(node_ref!(Expr::StringLit(StringLit { is_long_string: false, - raw_value: data[off..].to_string(), - value: data[off..].to_string().replace("$$", "$"), + raw_value: data[data_off..].to_string(), + value: data[data_off..].to_string().replace("$$", "$"), }))); break; } diff --git a/kclvm/parser/src/tests/error_recovery.rs b/kclvm/parser/src/tests/error_recovery.rs index cff059570..77c2440b5 100644 --- a/kclvm/parser/src/tests/error_recovery.rs +++ b/kclvm/parser/src/tests/error_recovery.rs @@ -213,6 +213,12 @@ parse_expr_snapshot! { joined_string_recovery_5, r#"'${a + 1 = }'"# } parse_expr_snapshot! { joined_string_recovery_6, r#"'${a: json}'"# } parse_expr_snapshot! { joined_string_recovery_7, r#"'\n${a: #json}'"# } parse_expr_snapshot! { joined_string_recovery_8, r#"'a\nb${a: #json}\n'"# } +parse_expr_snapshot! { joined_string_recovery_9, r#"'''\ + ${CC} +'''"# } +parse_expr_snapshot! { joined_string_recovery_10, r#"""" + ${CC} +""""# } parse_expr_snapshot! { lambda_recovery_0, r#"lambda"# } parse_expr_snapshot! { lambda_recovery_1, r#"lambda {"# } parse_expr_snapshot! { lambda_recovery_2, r#"lambda {}"# } diff --git a/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_10.snap b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_10.snap new file mode 100644 index 000000000..3aca424a4 --- /dev/null +++ b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_10.snap @@ -0,0 +1,83 @@ +--- +source: parser/src/tests/error_recovery.rs +expression: "crate::tests::parsing_expr_string(r#\"\"\"\"\n ${CC}\n\"\"\"\"#)" +--- +Node { + node: JoinedString( + JoinedString { + is_long_string: true, + values: [ + Node { + node: StringLit( + StringLit { + is_long_string: false, + raw_value: "\n ", + value: "\n ", + }, + ), + filename: "", + line: 1, + column: 1, + end_line: 1, + end_column: 1, + }, + Node { + node: FormattedValue( + FormattedValue { + is_long_string: false, + value: Node { + node: Identifier( + Identifier { + names: [ + Node { + node: "CC", + filename: "", + line: 2, + column: 4, + end_line: 2, + end_column: 6, + }, + ], + pkgpath: "", + ctx: Load, + }, + ), + filename: "", + line: 2, + column: 4, + end_line: 2, + end_column: 6, + }, + format_spec: None, + }, + ), + filename: "", + line: 2, + column: 4, + end_line: 2, + end_column: 6, + }, + Node { + node: StringLit( + StringLit { + is_long_string: false, + raw_value: "\n", + value: "\n", + }, + ), + filename: "", + line: 1, + column: 1, + end_line: 1, + end_column: 1, + }, + ], + raw_value: "\"\"\"\n ${CC}\n\"\"\"", + }, + ), + filename: "", + line: 1, + column: 0, + end_line: 3, + end_column: 3, +} diff --git a/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_7.snap b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_7.snap index 5a7e5cefa..e41fdfb36 100644 --- a/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_7.snap +++ b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_7.snap @@ -1,6 +1,5 @@ --- source: parser/src/tests/error_recovery.rs -assertion_line: 158 expression: "crate::tests::parsing_expr_string(r#\"'\\n${a: #json}'\"#)" --- Node { @@ -34,9 +33,9 @@ Node { node: "a", filename: "", line: 1, - column: 4, + column: 5, end_line: 1, - end_column: 5, + end_column: 6, }, ], pkgpath: "", @@ -45,9 +44,9 @@ Node { ), filename: "", line: 1, - column: 4, + column: 5, end_line: 1, - end_column: 5, + end_column: 6, }, format_spec: Some( "#json", @@ -56,9 +55,9 @@ Node { ), filename: "", line: 1, - column: 4, + column: 5, end_line: 1, - end_column: 12, + end_column: 13, }, ], raw_value: "'\\n${a: #json}'", @@ -70,4 +69,3 @@ Node { end_line: 1, end_column: 15, } - diff --git a/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_8.snap b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_8.snap index ecd382291..868e55fd3 100644 --- a/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_8.snap +++ b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_8.snap @@ -1,6 +1,5 @@ --- source: parser/src/tests/error_recovery.rs -assertion_line: 159 expression: "crate::tests::parsing_expr_string(r#\"'a\\nb${a: #json}\\n'\"#)" --- Node { @@ -34,9 +33,9 @@ Node { node: "a", filename: "", line: 1, - column: 6, + column: 7, end_line: 1, - end_column: 7, + end_column: 8, }, ], pkgpath: "", @@ -45,9 +44,9 @@ Node { ), filename: "", line: 1, - column: 6, + column: 7, end_line: 1, - end_column: 7, + end_column: 8, }, format_spec: Some( "#json", @@ -56,16 +55,16 @@ Node { ), filename: "", line: 1, - column: 6, + column: 7, end_line: 1, - end_column: 14, + end_column: 15, }, Node { node: StringLit( StringLit { is_long_string: false, - raw_value: "\n", - value: "\n", + raw_value: "}\n", + value: "}\n", }, ), filename: "", @@ -84,4 +83,3 @@ Node { end_line: 1, end_column: 19, } - diff --git a/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_9.snap b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_9.snap new file mode 100644 index 000000000..fa8064ae2 --- /dev/null +++ b/kclvm/parser/src/tests/snapshots/kclvm_parser__tests__error_recovery__joined_string_recovery_9.snap @@ -0,0 +1,83 @@ +--- +source: parser/src/tests/error_recovery.rs +expression: "crate::tests::parsing_expr_string(r#\"'''\\\n ${CC}\n'''\"#)" +--- +Node { + node: JoinedString( + JoinedString { + is_long_string: true, + values: [ + Node { + node: StringLit( + StringLit { + is_long_string: false, + raw_value: " ", + value: " ", + }, + ), + filename: "", + line: 1, + column: 1, + end_line: 1, + end_column: 1, + }, + Node { + node: FormattedValue( + FormattedValue { + is_long_string: false, + value: Node { + node: Identifier( + Identifier { + names: [ + Node { + node: "CC", + filename: "", + line: 2, + column: 4, + end_line: 2, + end_column: 6, + }, + ], + pkgpath: "", + ctx: Load, + }, + ), + filename: "", + line: 2, + column: 4, + end_line: 2, + end_column: 6, + }, + format_spec: None, + }, + ), + filename: "", + line: 2, + column: 4, + end_line: 2, + end_column: 6, + }, + Node { + node: StringLit( + StringLit { + is_long_string: false, + raw_value: "C}\n", + value: "C}\n", + }, + ), + filename: "", + line: 1, + column: 1, + end_line: 1, + end_column: 1, + }, + ], + raw_value: "'''\\\n ${CC}\n'''", + }, + ), + filename: "", + line: 1, + column: 0, + end_line: 3, + end_column: 3, +}