@@ -394,6 +394,71 @@ auto expand_string_literal(std::string_view text, std::vector<error>& errors, so
394394 return parts.generate ();
395395}
396396
397+ auto expand_raw_string_literal (
398+ const std::string& opening_seq,
399+ const std::string& closing_seq,
400+ string_parts::adds_sequences closing_strategy,
401+ std::string_view text,
402+ std::vector<error>& errors,
403+ source_position src_pos) -> string_parts
404+ {
405+ auto const length = std::ssize (text);
406+ auto pos = 0 ;
407+ auto first_quote_pos = pos;
408+ auto current_start = pos; // the current offset before which the string has been added to ret
409+ string_parts parts{opening_seq, closing_seq, closing_strategy};
410+
411+ // Now we're on the first character of the string itself
412+ for ( ; pos < length; ++pos )
413+ {
414+ // Find the next )$
415+ if (text[pos] == ' $' && text[pos-1 ] == ' )' )
416+ {
417+ // Scan back to find the matching (
418+ auto paren_depth = 1 ;
419+ auto open = pos - 2 ;
420+
421+ for ( ; open > current_start; --open)
422+ {
423+ if (text[open] == ' )' ) {
424+ ++paren_depth;
425+ }
426+ else if (text[open] == ' (' ) {
427+ --paren_depth;
428+ if (paren_depth == 0 ) {
429+ break ;
430+ }
431+ }
432+ }
433+ if (text[open] != ' (' )
434+ {
435+ errors.emplace_back (
436+ source_position ( src_pos.lineno , src_pos.colno + pos ),
437+ " no matching ( for string interpolation ending in )$"
438+ );
439+ return parts;
440+ }
441+
442+ // 'open' is now at the matching (
443+
444+ // Put the next non-empty non-interpolated chunk straight into ret
445+ if (open != current_start) {
446+ parts.add_string (text.substr (current_start, open - current_start));
447+ }
448+ // Then put interpolated chunk into ret
449+ parts.add_code (" cpp2::to_string" + std::string{text.substr (open, pos - open)});
450+
451+ current_start = pos+1 ;
452+ }
453+ }
454+
455+ // Put the final non-interpolated chunk straight into ret
456+ if (current_start < std::ssize (text)) {
457+ parts.add_string (text.substr (current_start));
458+ }
459+
460+ return parts;
461+ }
397462
398463// -----------------------------------------------------------------------
399464// lex: Tokenize a single line while maintaining inter-line state
@@ -413,6 +478,8 @@ auto expand_string_literal(std::string_view text, std::vector<error>& errors, so
413478// -- this isn't about tokens generated later, that's tokens::generated_tokens
414479static auto generated_text = std::deque<std::string>{};
415480
481+ static auto multiline_raw_strings = std::deque<multiline_raw_string>{};
482+
416483auto lex_line (
417484 std::string& mutable_line,
418485 int const lineno,
@@ -782,6 +849,45 @@ auto lex_line(
782849 return do_is_keyword (multi_keys);
783850 };
784851
852+ auto reset_processing_of_the_line = [&]() {
853+ // Redo processing of this whole line now that the string is expanded,
854+ // which may have moved it in memory... move i back to the line start
855+ // and discard any tokens we already tokenized for this line
856+ i = colno_t {-1 };
857+ while (!tokens.empty () && tokens.back ().position ().lineno == lineno) {
858+ tokens.pop_back ();
859+ }
860+ };
861+
862+ auto interpolate_raw_string = [&](
863+ const std::string& opening_seq,
864+ const std::string& closing_seq,
865+ string_parts::adds_sequences closing_strategy,
866+ std::string_view part,
867+ int pos_to_replace,
868+ int size_to_replace
869+ ) -> bool {
870+ auto parts = expand_raw_string_literal (opening_seq, closing_seq, closing_strategy, part, errors, source_position (lineno, pos_to_replace + 1 ));
871+ auto new_part = parts.generate ();
872+ mutable_line.replace ( pos_to_replace, size_to_replace, new_part );
873+ i += std::ssize (new_part)-1 ;
874+
875+ if (parts.is_expanded ()) {
876+ // raw string was expanded and we need to repeat the processing of this line
877+ reset_processing_of_the_line ();
878+
879+ // but skipping end of potential multiline raw string that ends on this line
880+ if (!multiline_raw_strings.empty () && multiline_raw_strings.back ().end .lineno == lineno) {
881+ i = multiline_raw_strings.back ().end .colno ;
882+ raw_string_multiline.reset ();
883+ } else if (raw_string_multiline && raw_string_multiline->start .lineno == lineno) {
884+ raw_string_multiline.reset ();
885+ }
886+ return true ;
887+ }
888+ return false ;
889+ };
890+
785891 //
786892 // -----------------------------------------------------
787893
@@ -811,6 +917,7 @@ auto lex_line(
811917 else if (peek1 == ' R' && peek2 == next) { return 3 ; } // LR"
812918 }
813919 else if (line[i] == ' R' && peek1 == next) { return 2 ; } // R"
920+ else if (line[i] == ' $' && peek1 == ' R' && peek2 == next) { return 3 ; } // $R"
814921 return 0 ;
815922 };
816923
@@ -840,22 +947,36 @@ auto lex_line(
840947 auto end_pos = line.find (raw_string_multiline.value ().closing_seq , i);
841948 auto part = line.substr (i, end_pos-i);
842949
950+ if (const auto & rsm = raw_string_multiline.value (); rsm.should_interpolate ) {
951+
952+ auto closing_strategy = end_pos == line.npos ? string_parts::no_ends : string_parts::on_the_end;
953+ auto size_to_replace = end_pos == line.npos ? std::ssize (line) - i : end_pos - i + std::ssize (rsm.closing_seq );
954+
955+ if (interpolate_raw_string (rsm.opening_seq , rsm.closing_seq , closing_strategy, part, i, size_to_replace ) ) {
956+ continue ;
957+ }
958+ }
959+ // raw string was not expanded
960+
843961 raw_string_multiline.value ().text += part;
844962 if (end_pos == std::string::npos) {
845963 raw_string_multiline.value ().text += ' \n ' ;
846964 break ;
847965 }
848966
849- // here we know that we are dealing with multiline raw string literal
850- // token needs to use generated_text to store string that exists in multiple lines
851- i = end_pos+std::ssize (raw_string_multiline.value ().closing_seq )-1 ;
967+ // here we know that we are dealing with finalized multiline raw string literal
968+ // token needs to use multiline_raw_strings to store string that exists in multiple lines
852969 raw_string_multiline.value ().text += raw_string_multiline.value ().closing_seq ;
853970
854- generated_text.push_back (raw_string_multiline.value ().text );
971+ // and position where multiline_raw_string ends (needed for reseting line parsing)
972+ i = end_pos+std::ssize (raw_string_multiline.value ().closing_seq )-1 ;
973+
974+ const auto & text = raw_string_multiline.value ().should_interpolate ? raw_string_multiline.value ().text .substr (1 ) : raw_string_multiline.value ().text ;
975+ multiline_raw_strings.emplace_back (multiline_raw_string{ text, {lineno, i} });
855976
856977 tokens.push_back ({
857- &generated_text .back ()[0 ],
858- std::ssize (generated_text .back ()),
978+ &multiline_raw_strings .back (). text [0 ],
979+ std::ssize (multiline_raw_strings .back (). text ),
859980 raw_string_multiline.value ().start ,
860981 lexeme::StringLiteral
861982 });
@@ -1046,7 +1167,62 @@ auto lex_line(
10461167 store (1 , lexeme::QuestionMark);
10471168
10481169 break ;case ' $' :
1049- store (1 , lexeme::Dollar);
1170+ if (auto j = is_encoding_prefix_and (' \" ' ); peek (j-2 ) == ' R' ) {
1171+ // if peek(j-2) is 'R' it means that we deal with raw-string literal
1172+ auto R_pos = i + j - 2 ;
1173+ auto seq_pos = i + j;
1174+
1175+ if (auto paren_pos = line.find (" (" , seq_pos); paren_pos != std::string::npos) {
1176+ auto opening_seq = line.substr (i, paren_pos - i + 1 );
1177+ auto closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1178+
1179+ if (auto closing_pos = line.find (closing_seq, paren_pos+1 ); closing_pos != line.npos ) {
1180+ if (interpolate_raw_string (
1181+ opening_seq,
1182+ closing_seq,
1183+ string_parts::on_both_ends,
1184+ std::string_view (&line[paren_pos+1 ], closing_pos-paren_pos-1 ), i, closing_pos-i+std::ssize (closing_seq))
1185+ ) {
1186+ continue ;
1187+ }
1188+
1189+ tokens.push_back ({
1190+ &line[R_pos],
1191+ i - R_pos + 1 ,
1192+ source_position (lineno, R_pos + 1 ),
1193+ lexeme::StringLiteral
1194+ });
1195+ } else {
1196+ raw_string_multiline.emplace (raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq, true });
1197+
1198+ if (interpolate_raw_string (
1199+ opening_seq,
1200+ closing_seq,
1201+ string_parts::on_the_begining,
1202+ std::string_view (&line[paren_pos+1 ], std::ssize (line)-(paren_pos+1 )), i, std::ssize (line)-i)
1203+ ) {
1204+ continue ;
1205+ }
1206+ // skip entire raw string opening sequence R"
1207+ i = paren_pos;
1208+
1209+ // if we are on the end of the line we need to add new line char
1210+ if (i+1 == std::ssize (line)) {
1211+ raw_string_multiline.value ().text += ' \n ' ;
1212+ }
1213+ }
1214+ continue ;
1215+ }
1216+ else {
1217+ errors.emplace_back (
1218+ source_position (lineno, i + j - 2 ),
1219+ " invalid new-line in raw string delimiter \" " + std::string (&line[i],j)
1220+ + " \" - stray 'R' in program \" "
1221+ );
1222+ }
1223+ } else {
1224+ store (1 , lexeme::Dollar);
1225+ }
10501226
10511227 // G
10521228 // G literal:
@@ -1196,13 +1372,13 @@ auto lex_line(
11961372 auto seq_pos = i + j;
11971373
11981374 if (auto paren_pos = line.find (" (" , seq_pos); paren_pos != std::string::npos) {
1199- auto raw_string_opening_seq = line.substr (i, paren_pos - i + 1 );
1200- auto raw_string_closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1375+ auto opening_seq = line.substr (i, paren_pos - i + 1 );
1376+ auto closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
12011377
1202- if (auto closing_pos = line.find (raw_string_closing_seq , paren_pos+1 ); closing_pos != line.npos ) {
1203- store (closing_pos+std::ssize (raw_string_closing_seq )-i, lexeme::StringLiteral);
1378+ if (auto closing_pos = line.find (closing_seq , paren_pos+1 ); closing_pos != line.npos ) {
1379+ store (closing_pos+std::ssize (closing_seq )-i, lexeme::StringLiteral);
12041380 } else {
1205- raw_string_multiline.emplace (raw_string{source_position{lineno, i}, raw_string_opening_seq, raw_string_opening_seq, raw_string_closing_seq });
1381+ raw_string_multiline.emplace (raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq });
12061382 // skip entire raw string opening sequence R"
12071383 i = paren_pos;
12081384
@@ -1245,13 +1421,7 @@ auto lex_line(
12451421 assert (std::ssize (s) > j+1 );
12461422 mutable_line.replace ( i, j+1 , s );
12471423
1248- // Redo processing of this whole line now that the string is expanded,
1249- // which may have moved it in memory... move i back to the line start
1250- // and discard any tokens we already tokenized for this line
1251- i = colno_t {-1 };
1252- while (!tokens.empty () && tokens.back ().position ().lineno == lineno) {
1253- tokens.pop_back ();
1254- }
1424+ reset_processing_of_the_line ();
12551425 }
12561426 }
12571427 }
0 commit comments