@@ -436,6 +436,71 @@ auto expand_string_literal(
436436 return parts.generate ();
437437}
438438
439+ auto expand_raw_string_literal (
440+ const std::string& opening_seq,
441+ const std::string& closing_seq,
442+ string_parts::adds_sequences closing_strategy,
443+ std::string_view text,
444+ std::vector<error>& errors,
445+ source_position src_pos) -> string_parts
446+ {
447+ auto const length = std::ssize (text);
448+ auto pos = 0 ;
449+ auto first_quote_pos = pos;
450+ auto current_start = pos; // the current offset before which the string has been added to ret
451+ string_parts parts{opening_seq, closing_seq, closing_strategy};
452+
453+ // Now we're on the first character of the string itself
454+ for ( ; pos < length; ++pos )
455+ {
456+ // Find the next )$
457+ if (text[pos] == ' $' && text[pos-1 ] == ' )' )
458+ {
459+ // Scan back to find the matching (
460+ auto paren_depth = 1 ;
461+ auto open = pos - 2 ;
462+
463+ for ( ; open > current_start; --open)
464+ {
465+ if (text[open] == ' )' ) {
466+ ++paren_depth;
467+ }
468+ else if (text[open] == ' (' ) {
469+ --paren_depth;
470+ if (paren_depth == 0 ) {
471+ break ;
472+ }
473+ }
474+ }
475+ if (text[open] != ' (' )
476+ {
477+ errors.emplace_back (
478+ source_position ( src_pos.lineno , src_pos.colno + pos ),
479+ " no matching ( for string interpolation ending in )$"
480+ );
481+ return parts;
482+ }
483+
484+ // 'open' is now at the matching (
485+
486+ // Put the next non-empty non-interpolated chunk straight into ret
487+ if (open != current_start) {
488+ parts.add_string (text.substr (current_start, open - current_start));
489+ }
490+ // Then put interpolated chunk into ret
491+ parts.add_code (" cpp2::to_string" + std::string{text.substr (open, pos - open)});
492+
493+ current_start = pos+1 ;
494+ }
495+ }
496+
497+ // Put the final non-interpolated chunk straight into ret
498+ if (current_start < std::ssize (text)) {
499+ parts.add_string (text.substr (current_start));
500+ }
501+
502+ return parts;
503+ }
439504
440505// -----------------------------------------------------------------------
441506// lex: Tokenize a single line while maintaining inter-line state
@@ -455,6 +520,8 @@ auto expand_string_literal(
455520// -- this isn't about tokens generated later, that's tokens::generated_tokens
456521static auto generated_text = std::deque<std::string>{};
457522
523+ static auto multiline_raw_strings = std::deque<multiline_raw_string>{};
524+
458525auto lex_line (
459526 std::string& mutable_line,
460527 int const lineno,
@@ -889,6 +956,49 @@ auto lex_line(
889956 return do_is_keyword (multi_keys);
890957 };
891958
959+ auto reset_processing_of_the_line = [&]() {
960+ // Redo processing of this whole line now that the string is expanded,
961+ // which may have moved it in memory... move i back to the line start
962+ // and discard any tokens we already tokenized for this line
963+ i = colno_t {-1 };
964+ while (
965+ !tokens.empty ()
966+ && tokens.back ().position ().lineno == lineno
967+ )
968+ {
969+ tokens.pop_back ();
970+ }
971+ };
972+
973+ auto interpolate_raw_string = [&](
974+ const std::string& opening_seq,
975+ const std::string& closing_seq,
976+ string_parts::adds_sequences closing_strategy,
977+ std::string_view part,
978+ int pos_to_replace,
979+ int size_to_replace
980+ ) -> bool {
981+ auto parts = expand_raw_string_literal (opening_seq, closing_seq, closing_strategy, part, errors, source_position (lineno, pos_to_replace + 1 ));
982+ auto new_part = parts.generate ();
983+ mutable_line.replace ( pos_to_replace, size_to_replace, new_part );
984+ i += std::ssize (new_part)-1 ;
985+
986+ if (parts.is_expanded ()) {
987+ // raw string was expanded and we need to repeat the processing of this line
988+ reset_processing_of_the_line ();
989+
990+ // but skipping end of potential multiline raw string that ends on this line
991+ if (!multiline_raw_strings.empty () && multiline_raw_strings.back ().end .lineno == lineno) {
992+ i = multiline_raw_strings.back ().end .colno ;
993+ raw_string_multiline.reset ();
994+ } else if (raw_string_multiline && raw_string_multiline->start .lineno == lineno) {
995+ raw_string_multiline.reset ();
996+ }
997+ return true ;
998+ }
999+ return false ;
1000+ };
1001+
8921002 //
8931003 // -----------------------------------------------------
8941004
@@ -918,6 +1028,7 @@ auto lex_line(
9181028 else if (peek1 == ' R' && peek2 == next) { return 3 ; } // LR"
9191029 }
9201030 else if (line[i] == ' R' && peek1 == next) { return 2 ; } // R"
1031+ else if (line[i] == ' $' && peek1 == ' R' && peek2 == next) { return 3 ; } // $R"
9211032 return 0 ;
9221033 };
9231034
@@ -947,22 +1058,36 @@ auto lex_line(
9471058 auto end_pos = line.find (raw_string_multiline.value ().closing_seq , i);
9481059 auto part = line.substr (i, end_pos-i);
9491060
1061+ if (const auto & rsm = raw_string_multiline.value (); rsm.should_interpolate ) {
1062+
1063+ auto closing_strategy = end_pos == line.npos ? string_parts::no_ends : string_parts::on_the_end;
1064+ auto size_to_replace = end_pos == line.npos ? std::ssize (line) - i : end_pos - i + std::ssize (rsm.closing_seq );
1065+
1066+ if (interpolate_raw_string (rsm.opening_seq , rsm.closing_seq , closing_strategy, part, i, size_to_replace ) ) {
1067+ continue ;
1068+ }
1069+ }
1070+ // raw string was not expanded
1071+
9501072 raw_string_multiline.value ().text += part;
9511073 if (end_pos == std::string::npos) {
9521074 raw_string_multiline.value ().text += ' \n ' ;
9531075 break ;
9541076 }
9551077
956- // here we know that we are dealing with multiline raw string literal
957- // token needs to use generated_text to store string that exists in multiple lines
958- i = end_pos+std::ssize (raw_string_multiline.value ().closing_seq )-1 ;
1078+ // here we know that we are dealing with finalized multiline raw string literal
1079+ // token needs to use multiline_raw_strings to store string that exists in multiple lines
9591080 raw_string_multiline.value ().text += raw_string_multiline.value ().closing_seq ;
9601081
961- generated_text.push_back (raw_string_multiline.value ().text );
1082+ // and position where multiline_raw_string ends (needed for reseting line parsing)
1083+ i = end_pos+std::ssize (raw_string_multiline.value ().closing_seq )-1 ;
1084+
1085+ const auto & text = raw_string_multiline.value ().should_interpolate ? raw_string_multiline.value ().text .substr (1 ) : raw_string_multiline.value ().text ;
1086+ multiline_raw_strings.emplace_back (multiline_raw_string{ text, {lineno, i} });
9621087
9631088 tokens.push_back ({
964- &generated_text .back ()[0 ],
965- std::ssize (generated_text .back ()),
1089+ &multiline_raw_strings .back (). text [0 ],
1090+ std::ssize (multiline_raw_strings .back (). text ),
9661091 raw_string_multiline.value ().start ,
9671092 lexeme::StringLiteral
9681093 });
@@ -1153,7 +1278,62 @@ auto lex_line(
11531278 store (1 , lexeme::QuestionMark);
11541279
11551280 break ;case ' $' :
1156- store (1 , lexeme::Dollar);
1281+ if (auto j = is_encoding_prefix_and (' \" ' ); peek (j-2 ) == ' R' ) {
1282+ // if peek(j-2) is 'R' it means that we deal with raw-string literal
1283+ auto R_pos = i + j - 2 ;
1284+ auto seq_pos = i + j;
1285+
1286+ if (auto paren_pos = line.find (" (" , seq_pos); paren_pos != std::string::npos) {
1287+ auto opening_seq = line.substr (i, paren_pos - i + 1 );
1288+ auto closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1289+
1290+ if (auto closing_pos = line.find (closing_seq, paren_pos+1 ); closing_pos != line.npos ) {
1291+ if (interpolate_raw_string (
1292+ opening_seq,
1293+ closing_seq,
1294+ string_parts::on_both_ends,
1295+ std::string_view (&line[paren_pos+1 ], closing_pos-paren_pos-1 ), i, closing_pos-i+std::ssize (closing_seq))
1296+ ) {
1297+ continue ;
1298+ }
1299+
1300+ tokens.push_back ({
1301+ &line[R_pos],
1302+ i - R_pos + 1 ,
1303+ source_position (lineno, R_pos + 1 ),
1304+ lexeme::StringLiteral
1305+ });
1306+ } else {
1307+ raw_string_multiline.emplace (raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq, true });
1308+
1309+ if (interpolate_raw_string (
1310+ opening_seq,
1311+ closing_seq,
1312+ string_parts::on_the_begining,
1313+ std::string_view (&line[paren_pos+1 ], std::ssize (line)-(paren_pos+1 )), i, std::ssize (line)-i)
1314+ ) {
1315+ continue ;
1316+ }
1317+ // skip entire raw string opening sequence R"
1318+ i = paren_pos;
1319+
1320+ // if we are on the end of the line we need to add new line char
1321+ if (i+1 == std::ssize (line)) {
1322+ raw_string_multiline.value ().text += ' \n ' ;
1323+ }
1324+ }
1325+ continue ;
1326+ }
1327+ else {
1328+ errors.emplace_back (
1329+ source_position (lineno, i + j - 2 ),
1330+ " invalid new-line in raw string delimiter \" " + std::string (&line[i],j)
1331+ + " \" - stray 'R' in program \" "
1332+ );
1333+ }
1334+ } else {
1335+ store (1 , lexeme::Dollar);
1336+ }
11571337
11581338 // G
11591339 // G literal:
@@ -1313,13 +1493,13 @@ auto lex_line(
13131493 auto seq_pos = i + j;
13141494
13151495 if (auto paren_pos = line.find (" (" , seq_pos); paren_pos != std::string::npos) {
1316- auto raw_string_opening_seq = line.substr (i, paren_pos - i + 1 );
1317- auto raw_string_closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
1496+ auto opening_seq = line.substr (i, paren_pos - i + 1 );
1497+ auto closing_seq = " )" +line.substr (seq_pos, paren_pos-seq_pos)+" \" " ;
13181498
1319- if (auto closing_pos = line.find (raw_string_closing_seq , paren_pos+1 ); closing_pos != line.npos ) {
1320- store (closing_pos+std::ssize (raw_string_closing_seq )-i, lexeme::StringLiteral);
1499+ if (auto closing_pos = line.find (closing_seq , paren_pos+1 ); closing_pos != line.npos ) {
1500+ store (closing_pos+std::ssize (closing_seq )-i, lexeme::StringLiteral);
13211501 } else {
1322- raw_string_multiline.emplace (raw_string{source_position{lineno, i}, raw_string_opening_seq, raw_string_opening_seq, raw_string_closing_seq });
1502+ raw_string_multiline.emplace (raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq });
13231503 // skip entire raw string opening sequence R"
13241504 i = paren_pos;
13251505
@@ -1368,17 +1548,7 @@ auto lex_line(
13681548 }
13691549 mutable_line.replace ( i, j+1 , s );
13701550
1371- // Redo processing of this whole line now that the string is expanded,
1372- // which may have moved it in memory... move i back to the line start
1373- // and discard any tokens we already tokenized for this line
1374- i = colno_t {-1 };
1375- while (
1376- !tokens.empty ()
1377- && tokens.back ().position ().lineno == lineno
1378- )
1379- {
1380- tokens.pop_back ();
1381- }
1551+ reset_processing_of_the_line ();
13821552 }
13831553 }
13841554 }
0 commit comments