From cc1fde7d5ced1e647f1341a91219e3aa50f0ac0c Mon Sep 17 00:00:00 2001 From: Marcel Greter Date: Tue, 31 Mar 2015 03:01:25 +0200 Subject: [PATCH] Enable url function overloading https://github.com/sass/libsass/issues/674 --- constants.cpp | 8 ++-- constants.hpp | 8 ++-- lexer.cpp | 16 ++++++- lexer.hpp | 22 ++++++++-- parser.cpp | 118 +++++++++++++++++++------------------------------- parser.hpp | 16 ++++--- prelexer.cpp | 23 +++++++--- prelexer.hpp | 16 +++++-- 8 files changed, 128 insertions(+), 99 deletions(-) diff --git a/constants.cpp b/constants.cpp index a7dd3c6bee..67d93d0e67 100644 --- a/constants.cpp +++ b/constants.cpp @@ -113,7 +113,7 @@ namespace Sass { extern const char sign_chars[] = "-+"; extern const char hyphen[] = "-"; extern const char ellipsis[] = "..."; - extern const char url_space_chars[] = " \t\r\n\f"; + // extern const char url_space_chars[] = " \t\r\n\f"; extern const char escape_chars[] = " -~"; // need to include unicode spaces too // type names extern const char numeric_name[] = "numeric value"; @@ -127,8 +127,10 @@ namespace Sass { extern const char map_name[] = "map"; extern const char arglist_name[] = "arglist"; - // byte order marks - // (taken from http://en.wikipedia.org/wiki/Byte_order_mark) + // constants for uri parsing (RFC 3986 Appendix A.) + extern const char uri_chars[] = ":/?!$%&#@[]{}'\"*+-._="; + + // byte order marks (http://en.wikipedia.org/wiki/Byte_order_mark) extern const unsigned char utf_8_bom[] = { 0xEF, 0xBB, 0xBF }; extern const unsigned char utf_16_bom_be[] = { 0xFE, 0xFF }; extern const unsigned char utf_16_bom_le[] = { 0xFF, 0xFE }; diff --git a/constants.hpp b/constants.hpp index a48360bdb0..711ae48bc4 100644 --- a/constants.hpp +++ b/constants.hpp @@ -115,7 +115,7 @@ namespace Sass { extern const char sign_chars[]; extern const char hyphen[]; extern const char ellipsis[]; - extern const char url_space_chars[]; + // extern const char url_space_chars[]; extern const char escape_chars[]; // type names @@ -130,8 +130,10 @@ namespace Sass { extern const char map_name[]; extern const char arglist_name[]; - // byte order marks - // (taken from http://en.wikipedia.org/wiki/Byte_order_mark) + // constants for uri parsing (RFC 3986 Appendix A.) + extern const char uri_chars[]; + + // byte order marks(http://en.wikipedia.org/wiki/Byte_order_mark) extern const unsigned char utf_8_bom[]; extern const unsigned char utf_16_bom_be[]; extern const unsigned char utf_16_bom_le[]; diff --git a/lexer.cpp b/lexer.cpp index b3c3e21579..be4a1afc0c 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -11,6 +11,20 @@ namespace Sass { namespace Prelexer { + //#################################### + // BASIC CHARACTER MATCHERS + //#################################### + + // Match standard control chars + const char* kwd_at(const char* src) { return exactly<'@'>(src); } + const char* kwd_dot(const char* src) { return exactly<'.'>(src); } + const char* kwd_comma(const char* src) { return exactly<','>(src); }; + const char* kwd_colon(const char* src) { return exactly<':'>(src); }; + const char* kwd_star(const char* src) { return exactly<'*'>(src); }; + const char* kwd_plus(const char* src) { return exactly<'+'>(src); }; + const char* kwd_minus(const char* src) { return exactly<'-'>(src); }; + const char* kwd_slash(const char* src) { return exactly<'/'>(src); }; + //#################################### // implement some function that do exist in the standard // but those are locale aware which brought some trouble @@ -69,7 +83,7 @@ namespace Sass { } //#################################### - // BASIC CHARACTER MATCHERS + // BASIC CLASS MATCHERS //#################################### // create matchers that advance the position diff --git a/lexer.hpp b/lexer.hpp index 752d13241c..7939486fbd 100644 --- a/lexer.hpp +++ b/lexer.hpp @@ -10,6 +10,20 @@ namespace Sass { // BASIC CHARACTER MATCHERS //#################################### + // Match standard control chars + const char* kwd_at(const char* src); + const char* kwd_dot(const char* src); + const char* kwd_comma(const char* src); + const char* kwd_colon(const char* src); + const char* kwd_star(const char* src); + const char* kwd_plus(const char* src); + const char* kwd_minus(const char* src); + const char* kwd_slash(const char* src); + + //#################################### + // BASIC CLASS MATCHERS + //#################################### + // These are locale independant const bool is_space(const char& src); const bool is_alpha(const char& src); @@ -120,10 +134,10 @@ namespace Sass { // Aka. zero-width positive lookahead. // Regex equivalent: /(?=literal)/ // just hangs around until we need it - // template - // const char* lookahead(const char* src) { - // return mx(src) ? src : 0; - // } + template + const char* lookahead(const char* src) { + return mx(src) ? src : 0; + } // Tries supplied matchers in order. // Succeeds if one of them succeeds. diff --git a/parser.cpp b/parser.cpp index 22d8786371..938b2e2172 100644 --- a/parser.cpp +++ b/parser.cpp @@ -266,8 +266,18 @@ namespace Sass { import_single_file(imp, lexed); } } - else if (peek< uri_prefix >()) { - imp->urls().push_back(parse_value()); + else if (lex< uri_prefix >()) { + Arguments* args = new (ctx.mem) Arguments(pstate); + Function_Call* result = new (ctx.mem) Function_Call(pstate, "url", args); + if (lex < uri_value >()) { // chunk seems to work too! + String* the_url = parse_interpolated_chunk(lexed); + *args << new (ctx.mem) Argument(the_url->pstate(), the_url); + } + else { + error("malformed URL", pstate); + } + if (!lex< exactly<')'> >()) error("URI is missing ')'", pstate); + imp->urls().push_back(result); } else { if (first) error("@import directive requires a url or quoted path", pstate); @@ -301,16 +311,16 @@ namespace Sass { Parameters* Parser::parse_parameters() { - string name(lexed); // for the error message + string name(lexed); + Position position = after_token; Parameters* params = new (ctx.mem) Parameters(pstate); - if (lex< exactly<'('> >()) { + if (lex_css< exactly<'('> >()) { // if there's anything there at all - if (!peek< exactly<')'> >()) { + if (!peek_css< exactly<')'> >()) { do (*params) << parse_parameter(); while (lex_css< exactly<','> >()); } - while (lex< alternatives < spaces, block_comment > >()) {}; - if (!lex< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, pstate); + if (!lex_css< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, position); } return params; } @@ -351,34 +361,36 @@ namespace Sass { return the_call; } - Arguments* Parser::parse_arguments() + Arguments* Parser::parse_arguments(bool has_url) { string name(lexed); + Position position = after_token; Arguments* args = new (ctx.mem) Arguments(pstate); - - if (lex< exactly<'('> >()) { + if (lex_css< exactly<'('> >()) { // if there's anything there at all - if (!peek< exactly<')'> >()) { - do (*args) << parse_argument(); + if (!peek_css< exactly<')'> >()) { + do (*args) << parse_argument(has_url); while (lex_css< exactly<','> >()); } - while (lex< block_comment >()); - if (!lex< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, pstate); + if (!lex_css< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, position); } - return args; } - Argument* Parser::parse_argument() + Argument* Parser::parse_argument(bool has_url) { + Argument* arg; - while (lex< alternatives < spaces, block_comment > >()); - if (peek< sequence < variable, zero_plus < alternatives < spaces, line_comment, block_comment > >, exactly<':'> > >()) { - lex< variable >(); + // some urls can look like line comments (parse literally - chunk would not work) + if (has_url && lex< sequence < uri_value, lookahead < exactly<')'> > > >(false)) { + String* the_url = parse_interpolated_chunk(lexed); + arg = new (ctx.mem) Argument(the_url->pstate(), the_url); + } + else if (peek_css< sequence < variable, optional_css_comments, exactly<':'> > >()) { + lex_css< variable >(); string name(Util::normalize_underscores(lexed)); ParserState p = pstate; - while (lex< alternatives < spaces, block_comment > >()) {}; - lex< exactly<':'> >(); + lex_css< exactly<':'> >(); Expression* val = parse_space_list(); val->is_delayed(false); arg = new (ctx.mem) Argument(p, val, name); @@ -388,7 +400,7 @@ namespace Sass { bool is_keyword = false; Expression* val = parse_space_list(); val->is_delayed(false); - if (lex< exactly< ellipsis > >()) { + if (lex_css< exactly< ellipsis > >()) { if (val->concrete_type() == Expression::MAP) is_keyword = true; else is_arglist = true; } @@ -1118,10 +1130,10 @@ namespace Sass { { Expression* conj1 = parse_conjunction(); // if it's a singleton, return it directly; don't wrap it - if (!peek< sequence< kwd_or, negate< identifier > > >()) return conj1; + if (!peek_css< kwd_or >()) return conj1; vector operands; - while (lex< sequence< kwd_or, negate< identifier > > >()) + while (lex_css< kwd_or >()) operands.push_back(parse_conjunction()); return fold_operands(conj1, operands, Binary_Expression::OR); @@ -1131,10 +1143,10 @@ namespace Sass { { Expression* rel1 = parse_relation(); // if it's a singleton, return it directly; don't wrap it - if (!peek< sequence< kwd_and, negate< identifier > > >()) return rel1; + if (!peek_css< kwd_and >()) return rel1; vector operands; - while (lex< sequence< kwd_and, negate< identifier > > >()) + while (lex_css< kwd_and >()) operands.push_back(parse_relation()); return fold_operands(rel1, operands, Binary_Expression::AND); @@ -1252,7 +1264,7 @@ namespace Sass { else if (peek< sequence< identifier_schema, negate< exactly<'%'> > > >()) { return parse_identifier_schema(); } - else if (peek< functional >() && !peek< uri_prefix >()) { + else if (peek< functional >()) { return parse_function_call(); } else if (lex< sequence< exactly<'+'>, optional_css_whitespace, negate< number > > >()) { @@ -1275,45 +1287,7 @@ namespace Sass { Expression* Parser::parse_value() { - while (lex< block_comment >()); - if (lex< uri_prefix >()) { - Arguments* args = new (ctx.mem) Arguments(pstate); - Function_Call* result = new (ctx.mem) Function_Call(pstate, "url", args); - const char* here = position; - Position here_p = before_token; - // Try to parse a SassScript expression. If it succeeds and we can munch - // a matching rparen, then that's our url. If we can't munch a matching - // rparen, or if the attempt to parse an expression fails, then try to - // munch a regular CSS url. - try { - // special case -- if there's a comment, treat it as part of a URL - lex(); - if (peek() || peek()) error("comment in URL", pstate); // doesn't really matter what we throw - Expression* expr = parse_list(); - if (!lex< exactly<')'> >()) error("dangling expression in URL", pstate); // doesn't really matter what we throw - Argument* arg = new (ctx.mem) Argument(expr->pstate(), expr); - *args << arg; - return result; - } - catch (Sass_Error&) { - // back up so we can try again - position = here; - before_token = here_p; - } - catch (...) { throw; } - lex< spaces >(); - if (lex< url >()) { - String* the_url = parse_interpolated_chunk(lexed); - Argument* arg = new (ctx.mem) Argument(the_url->pstate(), the_url); - *args << arg; - } - else { - error("malformed URL", pstate); - } - if (!lex< exactly<')'> >()) error("URI is missing ')'", pstate); - return result; - } - + lex< css_comments >(); if (lex< ampersand >()) { return new (ctx.mem) Parent_Selector(pstate, parse_selector_group()); } @@ -1325,13 +1299,13 @@ namespace Sass { if ((stop = peek< value_schema >())) { return parse_value_schema(stop); } - if (lex< sequence< kwd_true, negate< identifier > > >()) + if (lex< kwd_true >()) { return new (ctx.mem) Boolean(pstate, true); } - if (lex< sequence< kwd_false, negate< identifier > > >()) + if (lex< kwd_false >()) { return new (ctx.mem) Boolean(pstate, false); } - if (lex< sequence< kwd_null, negate< identifier > > >()) + if (lex< kwd_null >()) { return new (ctx.mem) Null(pstate); } if (lex< identifier >()) { @@ -1637,10 +1611,8 @@ namespace Sass { { lex< identifier >(); string name(lexed); - ParserState source_position_of_call = pstate; - - Function_Call* the_call = new (ctx.mem) Function_Call(source_position_of_call, name, parse_arguments()); - return the_call; + Arguments* args = parse_arguments(name == "url"); + return new (ctx.mem) Function_Call(pstate, name, args); } Function_Call_Schema* Parser::parse_function_call_schema() diff --git a/parser.hpp b/parser.hpp index 3df540374e..fdda1d4553 100644 --- a/parser.hpp +++ b/parser.hpp @@ -82,8 +82,7 @@ namespace Sass { const char* it_position = start ? start : position; // skip white-space? - if (mx == url || - mx == spaces || + if (mx == spaces || mx == no_spaces || mx == css_comments || mx == css_whitespace || @@ -123,12 +122,17 @@ namespace Sass { // sourcemap offset and we modify the position pointer! // lex will only skip over space, tabs and line comment template - const char* lex() + const char* lex(bool lazy = true) { + // position considered before lexed token + // we can skip whitespace or comments for + // lazy developers (but we need control) + const char* it_before_token = position; + // sneak up to the actual token we want to lex // this should skip over white-space if desired - const char* it_before_token = sneak < mx >(position); + if (lazy) it_before_token = sneak < mx >(position); // now call matcher to get position after token const char* it_after_token = mx(it_before_token); @@ -196,8 +200,8 @@ namespace Sass { Parameters* parse_parameters(); Parameter* parse_parameter(); Mixin_Call* parse_mixin_call(); - Arguments* parse_arguments(); - Argument* parse_argument(); + Arguments* parse_arguments(bool has_url = false); + Argument* parse_argument(bool has_url = false); Assignment* parse_assignment(); // Propset* parse_propset(); Ruleset* parse_ruleset(Selector_Lookahead lookahead); diff --git a/prelexer.cpp b/prelexer.cpp index d737d1919f..ed1239f413 100644 --- a/prelexer.cpp +++ b/prelexer.cpp @@ -36,6 +36,7 @@ namespace Sass { zero_plus < space >, delimited_by >(src); } + /* not use anymore - remove? const char* block_comment_prefix(const char* src) { return exactly(src); } @@ -43,6 +44,7 @@ namespace Sass { const char* comment(const char* src) { return line_comment(src); } + */ // Match zero plus white-space or line_comments const char* optional_css_whitespace(const char* src) { @@ -188,9 +190,11 @@ namespace Sass { zero_plus< alternatives< identifier, percentage, dimension, hex, number, quoted_string, exactly<'%'> > > > >(src); } + /* not used anymore - remove? const char* filename(const char* src) { return one_plus< alternatives< identifier, number, exactly<'.'> > >(src); } + */ // Match CSS '@' keywords. const char* at_keyword(const char* src) { @@ -419,6 +423,18 @@ namespace Sass { const char* uri_prefix(const char* src) { return exactly(src); } + const char* uri_value(const char* src) + { + return + zero_plus < + alternatives < + alnum, + exactly <'/'>, + class_char < uri_chars > + > + >(src); + } + // TODO: rename the following two functions /* no longer used - remove? const char* uri(const char* src) { @@ -615,7 +631,7 @@ namespace Sass { const char* folders(const char* src) { return zero_plus< folder >(src); }*/ - + /* not used anymore - remove? const char* chunk(const char* src) { char inside_str = 0; const char* p = src; @@ -642,6 +658,7 @@ namespace Sass { // unreachable return 0; } + */ // follow the CSS spec more closely and see if this helps us scan URLs correctly /* not used anymore - remove? @@ -669,10 +686,6 @@ namespace Sass { return alternatives< unicode, class_char >(src); }*/ - const char* url(const char* src) { - return chunk(src); - } - const char* static_string(const char* src) { const char* pos = src; const char * s = quoted_string(pos); diff --git a/prelexer.hpp b/prelexer.hpp index a21b45b77b..2f7e15a3c0 100644 --- a/prelexer.hpp +++ b/prelexer.hpp @@ -25,6 +25,16 @@ namespace Sass { const char* kwd_lt(const char* src); const char* kwd_lte(const char* src); + // Match standard control chars + const char* kwd_at(const char* src); + const char* kwd_dot(const char* src); + const char* kwd_comma(const char* src); + const char* kwd_colon(const char* src); + const char* kwd_slash(const char* src); + const char* kwd_star(const char* src); + const char* kwd_plus(const char* src); + const char* kwd_minus(const char* src); + //#################################### // SPECIAL "REGEX" CONSTRUCTS //#################################### @@ -177,7 +187,7 @@ namespace Sass { // Match interpolant schemas const char* identifier_schema(const char* src); const char* value_schema(const char* src); - const char* filename(const char* src); + // const char* filename(const char* src); // const char* filename_schema(const char* src); // const char* url_schema(const char* src); // const char* url_value(const char* src); @@ -248,8 +258,7 @@ namespace Sass { // const char* rgb_prefix(const char* src); // Match CSS uri specifiers. const char* uri_prefix(const char* src); - // const char* uri(const char* src); - const char* url(const char* src); + const char* uri_value(const char* src); // Match CSS "!important" keyword. const char* important(const char* src); // Match CSS "!optional" keyword. @@ -345,7 +354,6 @@ namespace Sass { return counter; } - const char* chunk(const char* src); } }