Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Escape unescape improvements #1902

Merged
merged 3 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/libasr/asdl_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1662,12 +1662,12 @@ def visitField(self, field, cons):
elif field.type == "string" and not field.seq:
if field.opt:
self.emit("if (x.m_%s) {" % field.name, 2)
self.emit( 's.append("\\"" + get_escaped_str(x.m_%s) + "\\"");' % field.name, 3)
self.emit( 's.append("\\"" + str_escape_c(x.m_%s) + "\\"");' % field.name, 3)
self.emit("} else {", 2)
self.emit( 's.append("()");', 3)
self.emit("}", 2)
else:
self.emit('s.append("\\"" + get_escaped_str(x.m_%s) + "\\"");' % field.name, 2)
self.emit('s.append("\\"" + str_escape_c(x.m_%s) + "\\"");' % field.name, 2)
elif field.type == "int" and not field.seq:
if field.opt:
self.emit("if (x.m_%s) {" % field.name, 2)
Expand Down Expand Up @@ -1934,12 +1934,12 @@ def visitField(self, field, cons):
elif field.type == "string" and not field.seq:
if field.opt:
self.emit("if (x.m_%s) {" % field.name, 2)
self.emit( 's.append("\\"" + get_escaped_str(x.m_%s) + "\\"");' % field.name, 3)
self.emit( 's.append("\\"" + str_escape_c(x.m_%s) + "\\"");' % field.name, 3)
self.emit("} else {", 2)
self.emit( 's.append("[]");', 3)
self.emit("}", 2)
else:
self.emit('s.append("\\"" + get_escaped_str(x.m_%s) + "\\"");' % field.name, 2)
self.emit('s.append("\\"" + str_escape_c(x.m_%s) + "\\"");' % field.name, 2)
elif field.type == "int" and not field.seq:
if field.opt:
self.emit("if (x.m_%s) {" % field.name, 2)
Expand Down
19 changes: 1 addition & 18 deletions src/libasr/codegen/asr_to_c_cpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1036,24 +1036,7 @@ R"(#include <stdio.h>


void visit_StringConstant(const ASR::StringConstant_t &x) {
src = "\"";
std::string s = x.m_s;
for (size_t idx = 0; idx < s.size(); idx++) {
if (s[idx] == '\n') {
src += "\\n";
} else if (s[idx] == '\t') {
src += "\\t";
} else if (s[idx] == '\r') {
src += "\\r";
}else if (s[idx] == '\\') {
src += "\\\\";
} else if (s[idx] == '\"') {
src += "\\\"";
} else {
src += s[idx];
}
}
src += "\"";
src = "\"" + str_escape_c(x.m_s) + "\"";
last_expr_precedence = 2;
}

Expand Down
18 changes: 9 additions & 9 deletions src/libasr/codegen/wasm_to_wat.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <fstream>

#include <libasr/assert.h>
#include <libasr/string_utils.h>
#include <libasr/codegen/wasm_decoder.h>
#include <libasr/codegen/wasm_to_wat.h>

Expand Down Expand Up @@ -292,16 +293,15 @@ class WATVisitor : public WASMDecoder<WATVisitor>,
" align=" + std::to_string(1U << mem_align);
}

std::string get_escaped_str(const std::string &s, bool is_iov) {
std::string str_escape_wat(const std::string &s, bool is_iov) {
if (!is_iov) {
return str_escape_c(s);
}
std::string escaped_str = "";
for (auto ch:s) {
if (!is_iov && ch >= 32) {
escaped_str += ch;
} else {
std::string byte(2, ' ');
snprintf(byte.data(), 3, "%02x", uint8_t(ch));
escaped_str += "\\" + byte;
}
std::string byte(2, ' ');
snprintf(byte.data(), 3, "%02x", uint8_t(ch));
escaped_str += "\\" + byte;
}
return escaped_str;
}
Expand Down Expand Up @@ -417,7 +417,7 @@ class WATVisitor : public WASMDecoder<WATVisitor>,
}
result += indent + "(data (;" + std::to_string(i) + ";) (" +
date_segment_insts + ") \"" +
get_escaped_str(data_segments[i].text, (i % 2 == 0)) + "\")";
str_escape_wat(data_segments[i].text, (i % 2 == 0)) + "\")";
}

result += "\n)\n";
Expand Down
4 changes: 4 additions & 0 deletions src/libasr/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ struct Str {
size_t size() const {
return n;
}

char back() const {
return p[n - 1];
}
};

static_assert(std::is_standard_layout<Str>::value);
Expand Down
83 changes: 57 additions & 26 deletions src/libasr/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,29 +87,6 @@ std::string replace(const std::string &s,
return std::regex_replace(s, std::regex(regex), replace);
}

std::string get_escaped_str(const std::string &s) {
std::ostringstream o;
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"': o << "\\\""; break;
case '\\': o << "\\\\"; break;
case '\b': o << "\\b"; break;
case '\f': o << "\\f"; break;
case '\n': o << "\\n"; break;
case '\r': o << "\\r"; break;
case '\t': o << "\\t"; break;
default:
if ('\x00' <= *c && *c <= '\x1f') {
o << "\\u"
<< std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*c);
} else {
o << *c;
}
}
}
return o.str();
}

std::string read_file(const std::string &filename)
{
std::ifstream ifs(filename.c_str(), std::ios::in | std::ios::binary
Expand Down Expand Up @@ -154,9 +131,33 @@ std::string join_paths(const std::vector<std::string> &paths) {
return p;
}

char* unescape_string(Allocator &al, LCompilers::Str &s) {
std::string x;
for (size_t idx=0; idx < s.size(); idx++) {
std::string str_escape_c(const std::string &s) {
std::ostringstream o;
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"': o << "\\\""; break;
case '\\': o << "\\\\"; break;
case '\b': o << "\\b"; break;
case '\f': o << "\\f"; break;
case '\n': o << "\\n"; break;
case '\r': o << "\\r"; break;
case '\t': o << "\\t"; break;
default:
if ('\x00' <= *c && *c <= '\x1f') {
o << "\\u"
<< std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*c);
} else {
o << *c;
}
}
}
return o.str();
}

char* str_unescape_c(Allocator &al, LCompilers::Str &s) {
std::string x = "";
size_t idx = 0;
for (; idx + 1 < s.size(); idx++) {
if (s[idx] == '\\' && s[idx+1] == '\n') { // continuation character
idx++;
} else if (s[idx] == '\\' && s[idx+1] == 'n') {
Expand Down Expand Up @@ -187,6 +188,36 @@ char* unescape_string(Allocator &al, LCompilers::Str &s) {
x += s[idx];
}
}
if (idx < s.size()) {
x += s[idx];
}
return LCompilers::s2c(al, x);
}

std::string str_escape_fortran_double_quote(const std::string &s) {
std::ostringstream o;
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"': o << "\"\""; break;
}
}
return o.str();
}

char* str_unescape_fortran(Allocator &al, LCompilers::Str &s, char ch) {
std::string x = "";
size_t idx = 0;
for (; idx + 1 < s.size(); idx++) {
if (s[idx] == ch && s[idx + 1] == ch) {
x += s[idx];
idx++;
} else {
x += s[idx];
}
}
if (idx < s.size()) {
x += s[idx];
}
return LCompilers::s2c(al, x);
}

Expand Down
14 changes: 9 additions & 5 deletions src/libasr/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ char *s2c(Allocator &al, const std::string &s);
std::string replace(const std::string &s,
const std::string &regex, const std::string &replace);

// Escapes special characters from the given string.
// It is used during AST/R to Json conversion.
std::string get_escaped_str(const std::string &s);

std::string read_file(const std::string &filename);

// Returns the parent path to the given path
Expand All @@ -38,7 +34,15 @@ bool is_relative_path(const std::string &path);
// Joins paths (paths can be empty)
std::string join_paths(const std::vector<std::string> &paths);

char* unescape_string(Allocator &al, LCompilers::Str &s);
// Escapes special characters from the given string
// using C style escaping
std::string str_escape_c(const std::string &s);
char* str_unescape_c(Allocator &al, LCompilers::Str &s);

// Escapes double quote characters from the given string
// given string must be enclosed in double quotes
std::string str_escape_fortran_double_quote(const std::string &s);
char* str_unescape_fortran(Allocator &al, LCompilers::Str &s, char ch);

} // namespace LCompilers

Expand Down
6 changes: 3 additions & 3 deletions src/lpython/parser/semantics.h
Original file line number Diff line number Diff line change
Expand Up @@ -798,8 +798,8 @@ static inline ast_t* concat_string(Allocator &al, Location &l,
x.c_str(p.m_a), expr_contextType::Load)
// `x.int_n` is of type BigInt but we store the int64_t directly in AST
#define INTEGER(x, l) make_ConstantInt_t(p.m_a, l, x, nullptr)
#define STRING1(x, l) make_ConstantStr_t(p.m_a, l, unescape_string(p.m_a, x), nullptr)
#define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), unescape_string(p.m_a, y), nullptr)
#define STRING1(x, l) make_ConstantStr_t(p.m_a, l, str_unescape_c(p.m_a, x), nullptr)
#define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), str_unescape_c(p.m_a, y), nullptr)
#define STRING3(id, x, l) PREFIX_STRING(p.m_a, l, name2char(id), x.c_str(p.m_a))
#define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s))
#define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr)
Expand Down Expand Up @@ -864,7 +864,7 @@ static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, cha
} else if (strcmp(prefix, "b") == 0) {
LCompilers::Str s_;
s_.from_str(al, std::string(s));
std::string str = std::string(unescape_string(al, s_));
std::string str = std::string(str_unescape_c(al, s_));
str = "b'" + str + "'";
tmp = make_ConstantBytes_t(al, l, LCompilers::s2c(al, str), nullptr);
} else if ( strcmp(prefix, "br") == 0 || strcmp(prefix, "rb") == 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/lpython/parser/tokenizer.re
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ std::string pickle_token(int token, const YYSTYPE &yystype)
} else if (token == yytokentype::TK_IMAG_NUM) {
t += " " + std::to_string(yystype.f) + "j";
} else if (token == yytokentype::TK_STRING) {
t = t + " " + "\"" + yystype.string.str() + "\"";
t = t + " " + "\"" + str_escape_c(yystype.string.str()) + "\"";
} else if (token == yytokentype::TK_TYPE_COMMENT) {
t = t + " " + "\"" + yystype.string.str() + "\"";
} else if (token == yytokentype::TK_TYPE_IGNORE) {
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/tokens-docstring1-1355fbb.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "tokens-docstring1-1355fbb.stdout",
"stdout_hash": "1b85fc7f73cdf02de4658833853717555d29e098188ad737ab1a0ac1",
"stdout_hash": "9afa056946f77dcfa0a5aa89b3ff738274836892169e03c14ee14a8f",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
14 changes: 3 additions & 11 deletions tests/reference/tokens-docstring1-1355fbb.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
(TOKEN ":") 11:11
(NEWLINE) 12:12
(TOKEN "indent") 13:16
(TOKEN "string" "A multi-line
docstring.
") 17:54
(TOKEN "string" "A multi-line\n docstring.\n ") 17:54
(NEWLINE) 55:55
(NEWLINE) 56:56
(TOKEN "dedent") 56:56
Expand All @@ -18,10 +16,7 @@
(TOKEN ":") 68:68
(NEWLINE) 69:69
(TOKEN "indent") 70:73
(TOKEN "string" "
A multi-line
docstring.
") 74:116
(TOKEN "string" "\n A multi-line\n docstring.\n ") 74:116
(NEWLINE) 117:117
(NEWLINE) 118:118
(TOKEN "dedent") 118:118
Expand All @@ -36,9 +31,6 @@
(NEWLINE) 167:167
(NEWLINE) 168:168
(TOKEN "dedent") 168:168
(TOKEN "string" "
A multi-line
docstring.
") 169:199
(TOKEN "string" "\nA multi-line\ndocstring.\n") 169:199
(NEWLINE) 200:200
(EOF) 201:201
2 changes: 1 addition & 1 deletion tests/reference/wat-bool1-234bcd1.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "wat-bool1-234bcd1.stdout",
"stdout_hash": "82281b600ee1c6761bed3a793ced3193ca78440f65b4bbf61f6c9595",
"stdout_hash": "1788374952754bb051767034a62d5cad318908ee45c0e449c58f6a41",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-bool1-234bcd1.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@
(data (;0;) (i32.const 4) "\0c\00\00\00\01\00\00\00")
(data (;1;) (i32.const 12) " ")
(data (;2;) (i32.const 16) "\18\00\00\00\01\00\00\00")
(data (;3;) (i32.const 24) "\0a ")
(data (;3;) (i32.const 24) "\n ")
(data (;4;) (i32.const 28) "\24\00\00\00\01\00\00\00")
(data (;5;) (i32.const 36) "- ")
(data (;6;) (i32.const 40) "\30\00\00\00\01\00\00\00")
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-expr14-5e0cb96.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "wat-expr14-5e0cb96.stdout",
"stdout_hash": "c76b00ab575193cf3e658a54c61a93e064c0851d9e2162567dd29641",
"stdout_hash": "731682ff49eaab392c46e72e575f595873b89b30309c62c75cc6e36b",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-expr14-5e0cb96.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
(data (;0;) (i32.const 4) "\0c\00\00\00\01\00\00\00")
(data (;1;) (i32.const 12) " ")
(data (;2;) (i32.const 16) "\18\00\00\00\01\00\00\00")
(data (;3;) (i32.const 24) "\0a ")
(data (;3;) (i32.const 24) "\n ")
(data (;4;) (i32.const 28) "\24\00\00\00\01\00\00\00")
(data (;5;) (i32.const 36) "- ")
(data (;6;) (i32.const 40) "\30\00\00\00\01\00\00\00")
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-expr2-8b17723.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "wat-expr2-8b17723.stdout",
"stdout_hash": "c76b00ab575193cf3e658a54c61a93e064c0851d9e2162567dd29641",
"stdout_hash": "731682ff49eaab392c46e72e575f595873b89b30309c62c75cc6e36b",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-expr2-8b17723.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
(data (;0;) (i32.const 4) "\0c\00\00\00\01\00\00\00")
(data (;1;) (i32.const 12) " ")
(data (;2;) (i32.const 16) "\18\00\00\00\01\00\00\00")
(data (;3;) (i32.const 24) "\0a ")
(data (;3;) (i32.const 24) "\n ")
(data (;4;) (i32.const 28) "\24\00\00\00\01\00\00\00")
(data (;5;) (i32.const 36) "- ")
(data (;6;) (i32.const 40) "\30\00\00\00\01\00\00\00")
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-expr9-f73afd1.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "wat-expr9-f73afd1.stdout",
"stdout_hash": "212ad5e41ca7f94eb5ca02a0bd15b73b2b0f0a470709881d4249fbbc",
"stdout_hash": "b9760209706729d81bacca34584d8b872b0438e7dfc2a460f00425f1",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-expr9-f73afd1.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
(data (;0;) (i32.const 4) "\0c\00\00\00\01\00\00\00")
(data (;1;) (i32.const 12) " ")
(data (;2;) (i32.const 16) "\18\00\00\00\01\00\00\00")
(data (;3;) (i32.const 24) "\0a ")
(data (;3;) (i32.const 24) "\n ")
(data (;4;) (i32.const 28) "\24\00\00\00\01\00\00\00")
(data (;5;) (i32.const 36) "- ")
(data (;6;) (i32.const 40) "\30\00\00\00\01\00\00\00")
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/wat-loop1-e0046d4.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "wat-loop1-e0046d4.stdout",
"stdout_hash": "6b28c49a1246a563742113981087de0d5d9d2c995d95be0d1d41ee8c",
"stdout_hash": "089b7a011662f733fe28e6aa0c5c30921e99867b5f2ae268681a281a",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
Loading