Skip to content

Commit fda45aa

Browse files
authored
Add cpp2 raw string literals support with interpolation (#251)
* Add string_parts * Add raw_string struct * Refactor expand_string_literal to use string_parts Helper class that were used for raw string can replace expansion of string literal. * Add support for raw string literals in cpp2 * Add raw string interpolation support for cpp2 Raw-string literals that starts with $ (dollar sign) will interpolate. That means that following code: ```cpp rs := $R"(m["one"] + m["two"] = (m["one"] + m["two"])$)"; ``` will generate follwing cpp1 code: ```cpp auto rs { R"(m["one"] + m["two"] = )" + cpp2::to_string(cpp2::assert_in_bounds(m, "one") + cpp2::assert_in_bounds(m, "two")) }; ``` It handles raw strings in single line and in multiple lines. It process line by one and stores parts of multiline raw string in separate buffer (multiline_raw_strings). * Add regression-tests * Move `$R"` prefix out from is_encoding_prefix_and() As there is only one place where there is a check for `$R"` I have moved this check outside from is_encoding_prefix_and() function. This prefix is now check directly after maching `$` in lex_line(). Update comment section of is_encoding_prefix_and() to include all prefixes that are supported by the function.
1 parent c4bc996 commit fda45aa

9 files changed

+538
-76
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
main: () -> int = {
2+
i := 42;
3+
m : std::map<std::string, int> = ();
4+
m["one"] = 1;
5+
m["two"] = 2;
6+
7+
str : std::string = "this is a string";
8+
9+
raw_str : std::string = R"string(raw string without interpolation)string";
10+
11+
raw_str_multi : std::string = R"test(this is raw string literal
12+
13+
that can last for multiple
14+
15+
lines)test";
16+
17+
raw_str_inter : std::string = $R"test(this is raw string literal
18+
that can last for multiple
19+
lines
20+
(i)$ R"(this can be added too)"
21+
calculations like m["one"] + m["two"] = (m["one"] + m["two"])$ also works
22+
("at the beginning of the line")$!!!)test";
23+
24+
raw_str_inter_multi : std::string = $R"(
25+
26+
)" + $R"((i)$)" + $R"((i)$)";
27+
28+
std::cout << str << std::endl;
29+
std::cout << raw_str << std::endl;
30+
std::cout << raw_str_multi << std::endl;
31+
std::cout << raw_str_inter << std::endl;
32+
std::cout << raw_str_inter_multi << std::endl;
33+
std::cout << ($R"((m["one"])$.)" + $R"((m["two"])$.)" + $R"((m["three"])$.)" + $R"((i)$)") << std::endl;
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
this is a string
2+
raw string without interpolation
3+
this is raw string literal
4+
5+
that can last for multiple
6+
7+
lines
8+
this is raw string literal
9+
that can last for multiple
10+
lines
11+
42 R"(this can be added too)"
12+
calculations like m["one"] + m["two"] = 3 also works
13+
at the beginning of the line!!!
14+
15+
16+
4242
17+
1.2.0.42

regression-tests/test-results/apple-clang-14/pure2-raw-string-literal-and-interpolation.cpp.output

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
this is a string
2+
raw string without interpolation
3+
this is raw string literal
4+
5+
that can last for multiple
6+
7+
lines
8+
this is raw string literal
9+
that can last for multiple
10+
lines
11+
42 R"(this can be added too)"
12+
calculations like m["one"] + m["two"] = 3 also works
13+
at the beginning of the line!!!
14+
15+
16+
4242
17+
1.2.0.42

regression-tests/test-results/clang-12/pure2-raw-string-literal-and-interpolation.cpp.output

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
#define CPP2_USE_MODULES Yes
3+
4+
#include "cpp2util.h"
5+
6+
7+
#line 1 "pure2-raw-string-literal-and-interpolation.cpp2"
8+
[[nodiscard]] auto main() -> int;
9+
10+
//=== Cpp2 definitions ==========================================================
11+
12+
#line 1 "pure2-raw-string-literal-and-interpolation.cpp2"
13+
[[nodiscard]] auto main() -> int{
14+
auto i {42};
15+
std::map<std::string,int> m {};
16+
cpp2::assert_in_bounds(m, "one") = 1;
17+
cpp2::assert_in_bounds(m, "two") = 2;
18+
19+
std::string str {"this is a string"};
20+
21+
std::string raw_str {R"string(raw string without interpolation)string"};
22+
23+
std::string raw_str_multi {R"test(this is raw string literal
24+
25+
that can last for multiple
26+
27+
lines)test"};
28+
29+
std::string raw_str_inter {R"test(this is raw string literal
30+
that can last for multiple
31+
lines
32+
)test" + cpp2::to_string(i) + R"test( R"(this can be added too)"
33+
calculations like m["one"] + m["two"] = )test" + cpp2::to_string(cpp2::assert_in_bounds(m, "one") + cpp2::assert_in_bounds(m, "two")) + R"test( also works
34+
)test" + cpp2::to_string("at the beginning of the line") + R"test(!!!)test"};
35+
36+
std::string raw_str_inter_multi {R"(
37+
38+
)" + cpp2::to_string(i) + cpp2::to_string(i)};
39+
40+
std::cout << std::move(str) << std::endl;
41+
std::cout << std::move(raw_str) << std::endl;
42+
std::cout << std::move(raw_str_multi) << std::endl;
43+
std::cout << std::move(raw_str_inter) << std::endl;
44+
std::cout << std::move(raw_str_inter_multi) << std::endl;
45+
std::cout << (cpp2::to_string(cpp2::assert_in_bounds(m, "one")) + R"(.)" + cpp2::to_string(cpp2::assert_in_bounds(m, "two")) + R"(.)" + cpp2::to_string(cpp2::assert_in_bounds(std::move(m), "three")) + R"(.)" + cpp2::to_string(std::move(i))) << std::endl;
46+
}
47+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pure2-raw-string-literal-and-interpolation.cpp2... ok (all Cpp2, passes safety checks)
2+

source/common.h

+125
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,131 @@ struct comment
100100
std::string text;
101101
};
102102

103+
struct string_parts {
104+
struct cpp_code { std::string text; };
105+
struct raw_string { std::string text; };
106+
enum adds_sequences { no_ends = 0, on_the_begining = 1, on_the_end = 2, on_both_ends = 3 };
107+
108+
string_parts(const std::string& beginseq,
109+
const std::string& endseq,
110+
adds_sequences strateg)
111+
: begin_seq{beginseq}
112+
, end_seq{endseq}
113+
, strategy{strateg}
114+
{
115+
if (!(strategy & on_the_begining)) {
116+
parts.push_back(raw_string{""});
117+
}
118+
}
119+
120+
void add_code(const std::string& text) { parts.push_back(cpp_code{text});}
121+
void add_string(const std::string& text) { parts.push_back(raw_string{text});}
122+
void add_string(const std::string_view& text) { parts.push_back(raw_string{std::string(text)});}
123+
124+
void clear() { parts.clear(); }
125+
126+
127+
auto generate() const -> std::string {
128+
129+
if (parts.empty()) {
130+
return (strategy & on_the_begining ? begin_seq : std::string{})
131+
+ (strategy & on_the_end ? end_seq : std::string{});
132+
}
133+
134+
auto result = std::visit(begin_visit{begin_seq, strategy},
135+
parts.front());
136+
137+
if (std::ssize(parts) > 1) {
138+
auto it1 = parts.cbegin();
139+
auto it2 = parts.cbegin()+1;
140+
for(;it2 != parts.cend(); ++it1, ++it2) {
141+
result += std::visit(generator_visit{begin_seq, end_seq}, *it1, *it2);
142+
}
143+
}
144+
145+
if (!(strategy & on_the_end)) {
146+
result += std::visit([this](const auto& lhs) {
147+
return generator_visit{begin_seq, end_seq}(lhs, raw_string{""});
148+
}, parts.back());
149+
}
150+
151+
result += std::visit(end_visit{end_seq, strategy}, parts.back());
152+
153+
return result;
154+
}
155+
156+
auto is_expanded() const -> bool {
157+
for (const auto& p : parts) {
158+
if (std::holds_alternative<cpp_code>(p)) {
159+
return true;
160+
}
161+
}
162+
return false;
163+
}
164+
165+
private:
166+
std::string begin_seq;
167+
std::string end_seq;
168+
adds_sequences strategy;
169+
std::vector<std::variant<raw_string, cpp_code>> parts;
170+
171+
struct begin_visit {
172+
std::string begin_seq;
173+
adds_sequences strategy;
174+
175+
auto operator()(const raw_string& part) const -> std::string {
176+
return (strategy & on_the_begining ? begin_seq : "") + part.text;
177+
}
178+
auto operator()(const cpp_code& part) const -> std::string {
179+
return part.text;
180+
}
181+
};
182+
183+
struct end_visit {
184+
std::string end_seq;
185+
adds_sequences strategy;
186+
auto operator()(const raw_string&) const -> std::string {
187+
return strategy & on_the_end ? end_seq : "";
188+
}
189+
auto operator()(const cpp_code&) const -> std::string {
190+
return {};
191+
}
192+
};
193+
194+
struct generator_visit {
195+
std::string begin_seq;
196+
std::string end_seq;
197+
198+
auto operator()(const raw_string&, const cpp_code& part ) const -> std::string {
199+
return end_seq + " + " + part.text;
200+
}
201+
auto operator()(const cpp_code&, const raw_string& part ) const -> std::string {
202+
return " + " + begin_seq + part.text;
203+
}
204+
auto operator()(const raw_string&, const raw_string& part ) const -> std::string {
205+
return part.text;
206+
}
207+
auto operator()(const cpp_code&, const cpp_code& part ) const -> std::string {
208+
return " + " + part.text;
209+
}
210+
};
211+
};
212+
213+
struct raw_string
214+
{
215+
source_position start;
216+
std::string text;
217+
std::string opening_seq;
218+
std::string closing_seq;
219+
bool should_interpolate = false;
220+
};
221+
222+
struct multiline_raw_string
223+
{
224+
std::string text;
225+
source_position end = {0, 0};
226+
};
227+
103228
//-----------------------------------------------------------------------
104229
//
105230
// error: represents a user-readable error message

0 commit comments

Comments
 (0)