From 2e15e46a859aaacdadb15b4cf152e88737cce77a Mon Sep 17 00:00:00 2001 From: xzyfer Date: Tue, 13 Oct 2015 00:26:16 +1100 Subject: [PATCH 1/2] Emulate Ruby Sass' url() parsing semantics We've had countless bugs and regressions with parsing url(). This patch is complete refactor of our url() parsing semantics to 100% match that of Ruby Sass. Fixes #674 Spec https://github.com/sass/sass-spec/pull/539 --- src/constants.cpp | 1 + src/constants.hpp | 1 + src/lexer.cpp | 20 ++++++++++++++++ src/lexer.hpp | 4 ++++ src/parser.cpp | 25 +++++++++++--------- src/parser.hpp | 5 ++-- src/prelexer.cpp | 60 ++++++++++++++++++++++++++++++++++++++++------- src/prelexer.hpp | 9 +++++++ 8 files changed, 103 insertions(+), 22 deletions(-) diff --git a/src/constants.cpp b/src/constants.cpp index 4661e9f8e3..03f4bbfde9 100644 --- a/src/constants.cpp +++ b/src/constants.cpp @@ -141,6 +141,7 @@ namespace Sass { // constants for uri parsing (RFC 3986 Appendix A.) extern const char uri_chars[] = ":;/?!%&#@|[]{}'`^\"*+-.,_=~"; + extern const char real_uri_chars[] = "#%&"; // some specific constant character classes // they must be static to be useable by lexer diff --git a/src/constants.hpp b/src/constants.hpp index c7900ff74d..525697d31c 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -144,6 +144,7 @@ namespace Sass { // constants for uri parsing (RFC 3986 Appendix A.) extern const char uri_chars[]; + extern const char real_uri_chars[]; // some specific constant character classes // they must be static to be useable by lexer diff --git a/src/lexer.cpp b/src/lexer.cpp index 8fd6dffa46..f5ded62b6f 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -75,6 +75,24 @@ namespace Sass { return unsigned(chr) > 127; } + // check if char is outside ascii range + // but with specific ranges (copied from Ruby Sass) + bool is_nonascii(const char& chr) + { + return ( + (unsigned(chr) > 127 && unsigned(chr) < 55296) || + (unsigned(chr) > 57343 && unsigned(chr) < 65534) || + (unsigned(chr) > 65535 && unsigned(chr) < 1114111) + ); + } + + // check if char is within a reduced ascii range + // valid in a uri (copied from Ruby Sass) + bool is_uri_character(const char& chr) + { + return unsigned(chr) > 41 && unsigned(chr) < 127; + } + // Match word character (look ahead) bool is_character(const char& chr) { @@ -90,11 +108,13 @@ namespace Sass { const char* space(const char* src) { return is_space(*src) ? src + 1 : 0; } const char* alpha(const char* src) { return is_alpha(*src) ? src + 1 : 0; } const char* unicode(const char* src) { return is_unicode(*src) ? src + 1 : 0; } + const char* nonascii(const char* src) { return is_nonascii(*src) ? src + 1 : 0; } const char* digit(const char* src) { return is_digit(*src) ? src + 1 : 0; } const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; } const char* alnum(const char* src) { return is_alnum(*src) ? src + 1 : 0; } const char* punct(const char* src) { return is_punct(*src) ? src + 1 : 0; } const char* character(const char* src) { return is_character(*src) ? src + 1 : 0; } + const char* uri_character(const char* src) { return is_uri_character(*src) ? src + 1 : 0; } // Match multiple ctype characters. const char* spaces(const char* src) { return one_plus(src); } diff --git a/src/lexer.hpp b/src/lexer.hpp index 8566a829ea..1fe7eb7605 100644 --- a/src/lexer.hpp +++ b/src/lexer.hpp @@ -32,7 +32,9 @@ namespace Sass { bool is_alnum(const char& src); bool is_xdigit(const char& src); bool is_unicode(const char& src); + bool is_nonascii(const char& src); bool is_character(const char& src); + bool is_uri_character(const char& src); // Match a single ctype predicate. const char* space(const char* src); @@ -42,7 +44,9 @@ namespace Sass { const char* alnum(const char* src); const char* punct(const char* src); const char* unicode(const char* src); + const char* nonascii(const char* src); const char* character(const char* src); + const char* uri_character(const char* src); // Match multiple ctype characters. const char* spaces(const char* src); diff --git a/src/parser.cpp b/src/parser.cpp index e6e04859ea..51de80963d 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -501,7 +501,7 @@ namespace Sass { return p; } - Arguments* Parser::parse_arguments(bool has_url) + Arguments* Parser::parse_arguments() { std::string name(lexed); Position position = after_token; @@ -509,7 +509,7 @@ namespace Sass { if (lex_css< exactly<'('> >()) { // if there's anything there at all if (!peek_css< exactly<')'> >()) { - do (*args) << parse_argument(has_url); + do (*args) << parse_argument(); while (lex_css< exactly<','> >()); } if (!lex_css< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, position); @@ -517,7 +517,7 @@ namespace Sass { return args; } - Argument* Parser::parse_argument(bool has_url) + Argument* Parser::parse_argument() { if (peek_css< sequence < exactly< hash_lbrace >, exactly< rbrace > > >()) { position += 2; @@ -525,12 +525,7 @@ namespace Sass { } Argument* arg; - // some urls can look like line comments (parse literally - chunk would not work) - if (has_url && lex< sequence < uri_value, lookahead < loosely<')'> > > >(false)) { - String* the_url = parse_interpolated_chunk(lexed); - arg = SASS_MEMORY_NEW(ctx.mem, Argument, the_url->pstate(), the_url); - } - else if (peek_css< sequence < variable, optional_css_comments, exactly<':'> > >()) { + if (peek_css< sequence < variable, optional_css_comments, exactly<':'> > >()) { lex_css< variable >(); std::string name(Util::normalize_underscores(lexed)); ParserState p = pstate; @@ -1410,6 +1405,9 @@ namespace Sass { } return string; } + else if (peek< real_uri_value >()) { + return parse_url_function_string(); + } else if (peek< re_functional >()) { return parse_function_call(); } @@ -1790,14 +1788,19 @@ namespace Sass { return SASS_MEMORY_NEW(ctx.mem, Function_Call, call_pos, name, args); } + String* Parser::parse_url_function_string() + { + lex< real_uri_value >(); + return SASS_MEMORY_NEW(ctx.mem, String_Constant, pstate, lexed); + } + Function_Call* Parser::parse_function_call() { lex< identifier >(); std::string name(lexed); ParserState call_pos = pstate; - bool expect_url = name == "url" || name == "url-prefix"; - Arguments* args = parse_arguments(expect_url); + Arguments* args = parse_arguments(); return SASS_MEMORY_NEW(ctx.mem, Function_Call, call_pos, name, args); } diff --git a/src/parser.hpp b/src/parser.hpp index 92b665915f..2e6256e2e6 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -220,8 +220,8 @@ namespace Sass { Parameters* parse_parameters(); Parameter* parse_parameter(); Mixin_Call* parse_include_directive(); - Arguments* parse_arguments(bool has_url = false); - Argument* parse_argument(bool has_url = false); + Arguments* parse_arguments(); + Argument* parse_argument(); Assignment* parse_assignment(); // Propset* parse_propset(); Ruleset* parse_ruleset(Lookahead lookahead, bool is_root = false); @@ -256,6 +256,7 @@ namespace Sass { Function_Call* parse_calc_function(); Function_Call* parse_function_call(); Function_Call_Schema* parse_function_call_schema(); + String* parse_url_function_string(); String* parse_interpolated_chunk(Token, bool constant = false); String* parse_string(); String_Constant* parse_static_expression(); diff --git a/src/prelexer.cpp b/src/prelexer.cpp index b6c7db2bf0..6fdbf9539b 100644 --- a/src/prelexer.cpp +++ b/src/prelexer.cpp @@ -914,22 +914,64 @@ namespace Sass { exactly<'\f'> >(src); }*/ - /* not used anymore - remove? const char* H(const char* src) { return std::isxdigit(*src) ? src+1 : 0; - }*/ + } - /* not used anymore - remove? - const char* unicode(const char* src) { + const char* W(const char* src) { + return zero_plus< alternatives< + space, + exactly< '\t' >, + exactly< '\r' >, + exactly< '\n' >, + exactly< '\f' > + > >(src); + } + + const char* UUNICODE(const char* src) { return sequence< exactly<'\\'>, between, - optional< class_char > >(src); - }*/ + optional< W > + >(src); + } + + const char* NONASCII(const char* src) { + return nonascii(src); + } - /* not used anymore - remove? const char* ESCAPE(const char* src) { - return alternatives< unicode, class_char >(src); - }*/ + return alternatives< + UUNICODE, + sequence< + exactly<'\\'>, + NONASCII, + class_char< escape_chars > + > + >(src); + } + + + const char* real_uri_value(const char* src) { + return + sequence< + exactly< url_kwd >, + W, + zero_plus< alternatives< + class_char< real_uri_chars >, + uri_character, + NONASCII, + ESCAPE + > >, + alternatives< + sequence< + W, + exactly< ')' > + >, + exactly< hash_lbrace > + > + > + (src); + } const char* static_string(const char* src) { const char* pos = src; diff --git a/src/prelexer.hpp b/src/prelexer.hpp index f8e3f5ca91..cefb7a8dce 100644 --- a/src/prelexer.hpp +++ b/src/prelexer.hpp @@ -340,6 +340,15 @@ namespace Sass { // match urls const char* url(const char* src); + // match url() + const char* H(const char* src); + const char* W(const char* src); + // `UNICODE` makes VS sad + const char* UUNICODE(const char* src); + const char* NONASCII(const char* src); + const char* ESCAPE(const char* src); + const char* real_uri_value(const char* src); + // Path matching functions. // const char* folder(const char* src); // const char* folders(const char* src); From 4d3fe32aec7c341afa63a0909df1d571a1bc3131 Mon Sep 17 00:00:00 2001 From: xzyfer Date: Tue, 13 Oct 2015 02:39:30 +1100 Subject: [PATCH 2/2] Correctly handle interpolants in url() strings --- src/constants.cpp | 1 + src/constants.hpp | 1 + src/parser.cpp | 26 +++++++++++++++++++++++--- src/prelexer.cpp | 34 +++++++++++++++++++++------------- src/prelexer.hpp | 2 ++ src/util.cpp | 9 +++++++++ src/util.hpp | 2 ++ 7 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/constants.cpp b/src/constants.cpp index 03f4bbfde9..13eff1b81d 100644 --- a/src/constants.cpp +++ b/src/constants.cpp @@ -79,6 +79,7 @@ namespace Sass { extern const char only_kwd[] = "only"; extern const char rgb_kwd[] = "rgb("; extern const char url_kwd[] = "url("; + // extern const char url_prefix_kwd[] = "url-prefix("; extern const char important_kwd[] = "important"; extern const char pseudo_not_kwd[] = ":not("; extern const char even_kwd[] = "even"; diff --git a/src/constants.hpp b/src/constants.hpp index 525697d31c..79af5235cc 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -80,6 +80,7 @@ namespace Sass { extern const char only_kwd[]; extern const char rgb_kwd[]; extern const char url_kwd[]; + // extern const char url_prefix_kwd[]; extern const char image_url_kwd[]; extern const char important_kwd[]; extern const char pseudo_not_kwd[]; diff --git a/src/parser.cpp b/src/parser.cpp index 51de80963d..9843e4d9c4 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1405,7 +1405,7 @@ namespace Sass { } return string; } - else if (peek< real_uri_value >()) { + else if (peek< sequence< uri_prefix, W, real_uri_value > >()) { return parse_url_function_string(); } else if (peek< re_functional >()) { @@ -1790,8 +1790,28 @@ namespace Sass { String* Parser::parse_url_function_string() { - lex< real_uri_value >(); - return SASS_MEMORY_NEW(ctx.mem, String_Constant, pstate, lexed); + const char* p = position; + + lex< uri_prefix >(); + std::string prefix = lexed; + + lex< real_uri_value >(false); + std::string uri = lexed; + + if (peek< exactly< hash_lbrace > >()) { + const char* pp = position; + // TODO: error checking for unclosed interpolants + while (peek< exactly< hash_lbrace > >(pp)) { + pp = sequence< interpolant, real_uri_value >(pp); + } + position = peek< real_uri_suffix >(pp); + return parse_interpolated_chunk(Token(p, position)); + } else { + lex< real_uri_suffix >(); + std::string res = prefix + Util::rtrim(uri) + lexed.to_string(); + return SASS_MEMORY_NEW(ctx.mem, String_Constant, pstate, res); + } + } Function_Call* Parser::parse_function_call() diff --git a/src/prelexer.cpp b/src/prelexer.cpp index 6fdbf9539b..44ee476c51 100644 --- a/src/prelexer.cpp +++ b/src/prelexer.cpp @@ -951,23 +951,31 @@ namespace Sass { } + // const char* real_uri_prefix(const char* src) { + // return alternatives< + // exactly< url_kwd >, + // exactly< url_prefix_kwd > + // >(src); + // } + + const char* real_uri_suffix(const char* src) { + return sequence< W, exactly< ')' > >(src); + } + const char* real_uri_value(const char* src) { return sequence< - exactly< url_kwd >, - W, - zero_plus< alternatives< - class_char< real_uri_chars >, - uri_character, - NONASCII, - ESCAPE - > >, - alternatives< - sequence< - W, - exactly< ')' > + non_greedy< + alternatives< + class_char< real_uri_chars >, + uri_character, + NONASCII, + ESCAPE >, - exactly< hash_lbrace > + alternatives< + real_uri_suffix, + exactly< hash_lbrace > + > > > (src); diff --git a/src/prelexer.hpp b/src/prelexer.hpp index cefb7a8dce..cdcce90934 100644 --- a/src/prelexer.hpp +++ b/src/prelexer.hpp @@ -347,6 +347,8 @@ namespace Sass { const char* UUNICODE(const char* src); const char* NONASCII(const char* src); const char* ESCAPE(const char* src); + const char* real_uri_suffix(const char* src); + // const char* real_uri_prefix(const char* src); const char* real_uri_value(const char* src); // Path matching functions. diff --git a/src/util.cpp b/src/util.cpp index 38e0d84b91..5fb9f40516 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -529,6 +529,15 @@ namespace Sass { namespace Util { using std::string; + std::string rtrim(const std::string &str) { + std::string trimmed = str; + size_t pos_ws = trimmed.find_last_not_of(" \t\n\v\f\r"); + if (pos_ws != std::string::npos) + { trimmed.erase(pos_ws + 1); } + else { trimmed.clear(); } + return trimmed; + } + std::string normalize_underscores(const std::string& str) { std::string normalized = str; for(size_t i = 0, L = normalized.length(); i < L; ++i) { diff --git a/src/util.hpp b/src/util.hpp index 11cfbb55f3..58c6e3f895 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -36,6 +36,8 @@ namespace Sass { namespace Util { + std::string rtrim(const std::string& str); + std::string normalize_underscores(const std::string& str); std::string normalize_decimals(const std::string& str); std::string normalize_sixtuplet(const std::string& col);