From cc1fde7d5ced1e647f1341a91219e3aa50f0ac0c Mon Sep 17 00:00:00 2001
From: Marcel Greter <marcel.greter@ocbnet.ch>
Date: Tue, 31 Mar 2015 03:01:25 +0200
Subject: [PATCH] Enable url function overloading

https://github.com/sass/libsass/issues/674
---
 constants.cpp |   8 ++--
 constants.hpp |   8 ++--
 lexer.cpp     |  16 ++++++-
 lexer.hpp     |  22 ++++++++--
 parser.cpp    | 118 +++++++++++++++++++-------------------------------
 parser.hpp    |  16 ++++---
 prelexer.cpp  |  23 +++++++---
 prelexer.hpp  |  16 +++++--
 8 files changed, 128 insertions(+), 99 deletions(-)

diff --git a/constants.cpp b/constants.cpp
index a7dd3c6bee..67d93d0e67 100644
--- a/constants.cpp
+++ b/constants.cpp
@@ -113,7 +113,7 @@ namespace Sass {
     extern const char sign_chars[]      = "-+";
     extern const char hyphen[]          = "-";
     extern const char ellipsis[]        = "...";
-    extern const char url_space_chars[] = " \t\r\n\f";
+    // extern const char url_space_chars[] = " \t\r\n\f";
     extern const char escape_chars[]    = " -~"; // need to include unicode spaces too
     // type names
     extern const char numeric_name[]    = "numeric value";
@@ -127,8 +127,10 @@ namespace Sass {
     extern const char map_name[]        = "map";
     extern const char arglist_name[]    = "arglist";
 
-    // byte order marks
-    // (taken from http://en.wikipedia.org/wiki/Byte_order_mark)
+    // constants for uri parsing (RFC 3986 Appendix A.)
+    extern const char uri_chars[]  = ":/?!$%&#@[]{}'\"*+-._=";
+
+    // byte order marks (http://en.wikipedia.org/wiki/Byte_order_mark)
     extern const unsigned char utf_8_bom[]      = { 0xEF, 0xBB, 0xBF };
     extern const unsigned char utf_16_bom_be[]  = { 0xFE, 0xFF };
     extern const unsigned char utf_16_bom_le[]  = { 0xFF, 0xFE };
diff --git a/constants.hpp b/constants.hpp
index a48360bdb0..711ae48bc4 100644
--- a/constants.hpp
+++ b/constants.hpp
@@ -115,7 +115,7 @@ namespace Sass {
     extern const char sign_chars[];
     extern const char hyphen[];
     extern const char ellipsis[];
-    extern const char url_space_chars[];
+    // extern const char url_space_chars[];
     extern const char escape_chars[];
 
     // type names
@@ -130,8 +130,10 @@ namespace Sass {
     extern const char map_name[];
     extern const char arglist_name[];
 
-    // byte order marks
-    // (taken from http://en.wikipedia.org/wiki/Byte_order_mark)
+    // constants for uri parsing (RFC 3986 Appendix A.)
+    extern const char uri_chars[];
+
+    // byte order marks(http://en.wikipedia.org/wiki/Byte_order_mark)
     extern const unsigned char utf_8_bom[];
     extern const unsigned char utf_16_bom_be[];
     extern const unsigned char utf_16_bom_le[];
diff --git a/lexer.cpp b/lexer.cpp
index b3c3e21579..be4a1afc0c 100644
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -11,6 +11,20 @@ namespace Sass {
 
   namespace Prelexer {
 
+    //####################################
+    // BASIC CHARACTER MATCHERS
+    //####################################
+
+    // Match standard control chars
+    const char* kwd_at(const char* src) { return exactly<'@'>(src); }
+    const char* kwd_dot(const char* src) { return exactly<'.'>(src); }
+    const char* kwd_comma(const char* src) { return exactly<','>(src); };
+    const char* kwd_colon(const char* src) { return exactly<':'>(src); };
+    const char* kwd_star(const char* src) { return exactly<'*'>(src); };
+    const char* kwd_plus(const char* src) { return exactly<'+'>(src); };
+    const char* kwd_minus(const char* src) { return exactly<'-'>(src); };
+    const char* kwd_slash(const char* src) { return exactly<'/'>(src); };
+
     //####################################
     // implement some function that do exist in the standard
     // but those are locale aware which brought some trouble
@@ -69,7 +83,7 @@ namespace Sass {
     }
 
     //####################################
-    // BASIC CHARACTER MATCHERS
+    // BASIC CLASS MATCHERS
     //####################################
 
     // create matchers that advance the position
diff --git a/lexer.hpp b/lexer.hpp
index 752d13241c..7939486fbd 100644
--- a/lexer.hpp
+++ b/lexer.hpp
@@ -10,6 +10,20 @@ namespace Sass {
     // BASIC CHARACTER MATCHERS
     //####################################
 
+    // Match standard control chars
+    const char* kwd_at(const char* src);
+    const char* kwd_dot(const char* src);
+    const char* kwd_comma(const char* src);
+    const char* kwd_colon(const char* src);
+    const char* kwd_star(const char* src);
+    const char* kwd_plus(const char* src);
+    const char* kwd_minus(const char* src);
+    const char* kwd_slash(const char* src);
+
+    //####################################
+    // BASIC CLASS MATCHERS
+    //####################################
+
     // These are locale independant
     const bool is_space(const char& src);
     const bool is_alpha(const char& src);
@@ -120,10 +134,10 @@ namespace Sass {
     // Aka. zero-width positive lookahead.
     // Regex equivalent: /(?=literal)/
     // just hangs around until we need it
-    // template <prelexer mx>
-    // const char* lookahead(const char* src) {
-    //   return mx(src) ? src : 0;
-    // }
+    template <prelexer mx>
+    const char* lookahead(const char* src) {
+      return mx(src) ? src : 0;
+    }
 
     // Tries supplied matchers in order.
     // Succeeds if one of them succeeds.
diff --git a/parser.cpp b/parser.cpp
index 22d8786371..938b2e2172 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -266,8 +266,18 @@ namespace Sass {
           import_single_file(imp, lexed);
         }
       }
-      else if (peek< uri_prefix >()) {
-        imp->urls().push_back(parse_value());
+      else if (lex< uri_prefix >()) {
+        Arguments* args = new (ctx.mem) Arguments(pstate);
+        Function_Call* result = new (ctx.mem) Function_Call(pstate, "url", args);
+        if (lex < uri_value >()) { // chunk seems to work too!
+          String* the_url = parse_interpolated_chunk(lexed);
+          *args << new (ctx.mem) Argument(the_url->pstate(), the_url);
+        }
+        else {
+          error("malformed URL", pstate);
+        }
+        if (!lex< exactly<')'> >()) error("URI is missing ')'", pstate);
+        imp->urls().push_back(result);
       }
       else {
         if (first) error("@import directive requires a url or quoted path", pstate);
@@ -301,16 +311,16 @@ namespace Sass {
 
   Parameters* Parser::parse_parameters()
   {
-    string name(lexed); // for the error message
+    string name(lexed);
+    Position position = after_token;
     Parameters* params = new (ctx.mem) Parameters(pstate);
-    if (lex< exactly<'('> >()) {
+    if (lex_css< exactly<'('> >()) {
       // if there's anything there at all
-      if (!peek< exactly<')'> >()) {
+      if (!peek_css< exactly<')'> >()) {
         do (*params) << parse_parameter();
         while (lex_css< exactly<','> >());
       }
-      while (lex< alternatives < spaces, block_comment > >()) {};
-      if (!lex< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, pstate);
+      if (!lex_css< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, position);
     }
     return params;
   }
@@ -351,34 +361,36 @@ namespace Sass {
     return the_call;
   }
 
-  Arguments* Parser::parse_arguments()
+  Arguments* Parser::parse_arguments(bool has_url)
   {
     string name(lexed);
+    Position position = after_token;
     Arguments* args = new (ctx.mem) Arguments(pstate);
-
-    if (lex< exactly<'('> >()) {
+    if (lex_css< exactly<'('> >()) {
       // if there's anything there at all
-      if (!peek< exactly<')'> >()) {
-        do (*args) << parse_argument();
+      if (!peek_css< exactly<')'> >()) {
+        do (*args) << parse_argument(has_url);
         while (lex_css< exactly<','> >());
       }
-      while (lex< block_comment >());
-      if (!lex< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, pstate);
+      if (!lex_css< exactly<')'> >()) error("expected a variable name (e.g. $x) or ')' for the parameter list for " + name, position);
     }
-
     return args;
   }
 
-  Argument* Parser::parse_argument()
+  Argument* Parser::parse_argument(bool has_url)
   {
+
     Argument* arg;
-    while (lex< alternatives < spaces, block_comment > >());
-    if (peek< sequence < variable, zero_plus < alternatives < spaces, line_comment, block_comment > >, exactly<':'> > >()) {
-      lex< variable >();
+    // some urls can look like line comments (parse literally - chunk would not work)
+    if (has_url && lex< sequence < uri_value, lookahead < exactly<')'> > > >(false)) {
+      String* the_url = parse_interpolated_chunk(lexed);
+      arg = new (ctx.mem) Argument(the_url->pstate(), the_url);
+    }
+    else if (peek_css< sequence < variable, optional_css_comments, exactly<':'> > >()) {
+      lex_css< variable >();
       string name(Util::normalize_underscores(lexed));
       ParserState p = pstate;
-      while (lex< alternatives < spaces, block_comment > >()) {};
-      lex< exactly<':'> >();
+      lex_css< exactly<':'> >();
       Expression* val = parse_space_list();
       val->is_delayed(false);
       arg = new (ctx.mem) Argument(p, val, name);
@@ -388,7 +400,7 @@ namespace Sass {
       bool is_keyword = false;
       Expression* val = parse_space_list();
       val->is_delayed(false);
-      if (lex< exactly< ellipsis > >()) {
+      if (lex_css< exactly< ellipsis > >()) {
         if (val->concrete_type() == Expression::MAP) is_keyword = true;
         else is_arglist = true;
       }
@@ -1118,10 +1130,10 @@ namespace Sass {
   {
     Expression* conj1 = parse_conjunction();
     // if it's a singleton, return it directly; don't wrap it
-    if (!peek< sequence< kwd_or, negate< identifier > > >()) return conj1;
+    if (!peek_css< kwd_or >()) return conj1;
 
     vector<Expression*> operands;
-    while (lex< sequence< kwd_or, negate< identifier > > >())
+    while (lex_css< kwd_or >())
       operands.push_back(parse_conjunction());
 
     return fold_operands(conj1, operands, Binary_Expression::OR);
@@ -1131,10 +1143,10 @@ namespace Sass {
   {
     Expression* rel1 = parse_relation();
     // if it's a singleton, return it directly; don't wrap it
-    if (!peek< sequence< kwd_and, negate< identifier > > >()) return rel1;
+    if (!peek_css< kwd_and >()) return rel1;
 
     vector<Expression*> operands;
-    while (lex< sequence< kwd_and, negate< identifier > > >())
+    while (lex_css< kwd_and >())
       operands.push_back(parse_relation());
 
     return fold_operands(rel1, operands, Binary_Expression::AND);
@@ -1252,7 +1264,7 @@ namespace Sass {
     else if (peek< sequence< identifier_schema, negate< exactly<'%'> > > >()) {
       return parse_identifier_schema();
     }
-    else if (peek< functional >() && !peek< uri_prefix >()) {
+    else if (peek< functional >()) {
       return parse_function_call();
     }
     else if (lex< sequence< exactly<'+'>, optional_css_whitespace, negate< number > > >()) {
@@ -1275,45 +1287,7 @@ namespace Sass {
 
   Expression* Parser::parse_value()
   {
-    while (lex< block_comment >());
-    if (lex< uri_prefix >()) {
-      Arguments* args = new (ctx.mem) Arguments(pstate);
-      Function_Call* result = new (ctx.mem) Function_Call(pstate, "url", args);
-      const char* here = position;
-      Position here_p = before_token;
-      // Try to parse a SassScript expression. If it succeeds and we can munch
-      // a matching rparen, then that's our url. If we can't munch a matching
-      // rparen, or if the attempt to parse an expression fails, then try to
-      // munch a regular CSS url.
-      try {
-        // special case -- if there's a comment, treat it as part of a URL
-        lex<spaces>();
-        if (peek<line_comment>() || peek<block_comment_prefix>()) error("comment in URL", pstate); // doesn't really matter what we throw
-        Expression* expr = parse_list();
-        if (!lex< exactly<')'> >()) error("dangling expression in URL", pstate); // doesn't really matter what we throw
-        Argument* arg = new (ctx.mem) Argument(expr->pstate(), expr);
-        *args << arg;
-        return result;
-      }
-      catch (Sass_Error&) {
-        // back up so we can try again
-        position = here;
-        before_token = here_p;
-      }
-      catch (...) { throw; }
-      lex< spaces >();
-      if (lex< url >()) {
-        String* the_url = parse_interpolated_chunk(lexed);
-        Argument* arg = new (ctx.mem) Argument(the_url->pstate(), the_url);
-        *args << arg;
-      }
-      else {
-        error("malformed URL", pstate);
-      }
-      if (!lex< exactly<')'> >()) error("URI is missing ')'", pstate);
-      return result;
-    }
-
+    lex< css_comments >();
     if (lex< ampersand >())
     {
       return new (ctx.mem) Parent_Selector(pstate, parse_selector_group()); }
@@ -1325,13 +1299,13 @@ namespace Sass {
     if ((stop = peek< value_schema >()))
     { return parse_value_schema(stop); }
 
-    if (lex< sequence< kwd_true, negate< identifier > > >())
+    if (lex< kwd_true >())
     { return new (ctx.mem) Boolean(pstate, true); }
 
-    if (lex< sequence< kwd_false, negate< identifier > > >())
+    if (lex< kwd_false >())
     { return new (ctx.mem) Boolean(pstate, false); }
 
-    if (lex< sequence< kwd_null, negate< identifier > > >())
+    if (lex< kwd_null >())
     { return new (ctx.mem) Null(pstate); }
 
     if (lex< identifier >()) {
@@ -1637,10 +1611,8 @@ namespace Sass {
   {
     lex< identifier >();
     string name(lexed);
-    ParserState source_position_of_call = pstate;
-
-    Function_Call* the_call = new (ctx.mem) Function_Call(source_position_of_call, name, parse_arguments());
-    return the_call;
+    Arguments* args = parse_arguments(name == "url");
+    return new (ctx.mem) Function_Call(pstate, name, args);
   }
 
   Function_Call_Schema* Parser::parse_function_call_schema()
diff --git a/parser.hpp b/parser.hpp
index 3df540374e..fdda1d4553 100644
--- a/parser.hpp
+++ b/parser.hpp
@@ -82,8 +82,7 @@ namespace Sass {
       const char* it_position = start ? start : position;
 
       // skip white-space?
-      if (mx == url ||
-          mx == spaces ||
+      if (mx == spaces ||
           mx == no_spaces ||
           mx == css_comments ||
           mx == css_whitespace ||
@@ -123,12 +122,17 @@ namespace Sass {
     // sourcemap offset and we modify the position pointer!
     // lex will only skip over space, tabs and line comment
     template <prelexer mx>
-    const char* lex()
+    const char* lex(bool lazy = true)
     {
 
+      // position considered before lexed token
+      // we can skip whitespace or comments for
+      // lazy developers (but we need control)
+      const char* it_before_token = position;
+
       // sneak up to the actual token we want to lex
       // this should skip over white-space if desired
-      const char* it_before_token = sneak < mx >(position);
+      if (lazy) it_before_token = sneak < mx >(position);
 
       // now call matcher to get position after token
       const char* it_after_token = mx(it_before_token);
@@ -196,8 +200,8 @@ namespace Sass {
     Parameters* parse_parameters();
     Parameter* parse_parameter();
     Mixin_Call* parse_mixin_call();
-    Arguments* parse_arguments();
-    Argument* parse_argument();
+    Arguments* parse_arguments(bool has_url = false);
+    Argument* parse_argument(bool has_url = false);
     Assignment* parse_assignment();
     // Propset* parse_propset();
     Ruleset* parse_ruleset(Selector_Lookahead lookahead);
diff --git a/prelexer.cpp b/prelexer.cpp
index d737d1919f..ed1239f413 100644
--- a/prelexer.cpp
+++ b/prelexer.cpp
@@ -36,6 +36,7 @@ namespace Sass {
                zero_plus < space >,
                delimited_by<slash_star, star_slash, false> >(src);
     }
+    /* not use anymore - remove?
     const char* block_comment_prefix(const char* src) {
       return exactly<slash_star>(src);
     }
@@ -43,6 +44,7 @@ namespace Sass {
     const char* comment(const char* src) {
       return line_comment(src);
     }
+    */
 
     // Match zero plus white-space or line_comments
     const char* optional_css_whitespace(const char* src) {
@@ -188,9 +190,11 @@ namespace Sass {
                                  zero_plus< alternatives< identifier, percentage, dimension, hex, number, quoted_string, exactly<'%'> > > > >(src);
     }
 
+    /* not used anymore - remove?
     const char* filename(const char* src) {
       return one_plus< alternatives< identifier, number, exactly<'.'> > >(src);
     }
+    */
 
     // Match CSS '@' keywords.
     const char* at_keyword(const char* src) {
@@ -419,6 +423,18 @@ namespace Sass {
     const char* uri_prefix(const char* src) {
       return exactly<url_kwd>(src);
     }
+    const char* uri_value(const char* src)
+    {
+      return
+      zero_plus <
+        alternatives <
+          alnum,
+          exactly <'/'>,
+          class_char < uri_chars >
+        >
+      >(src);
+    }
+
     // TODO: rename the following two functions
     /* no longer used - remove?
     const char* uri(const char* src) {
@@ -615,7 +631,7 @@ namespace Sass {
     const char* folders(const char* src) {
       return zero_plus< folder >(src);
     }*/
-
+    /* not used anymore - remove?
     const char* chunk(const char* src) {
       char inside_str = 0;
       const char* p = src;
@@ -642,6 +658,7 @@ namespace Sass {
       // unreachable
       return 0;
     }
+    */
 
     // follow the CSS spec more closely and see if this helps us scan URLs correctly
     /* not used anymore - remove?
@@ -669,10 +686,6 @@ namespace Sass {
       return alternatives< unicode, class_char<escape_chars> >(src);
     }*/
 
-    const char* url(const char* src) {
-      return chunk(src);
-    }
-
     const char* static_string(const char* src) {
       const char* pos = src;
       const char * s = quoted_string(pos);
diff --git a/prelexer.hpp b/prelexer.hpp
index a21b45b77b..2f7e15a3c0 100644
--- a/prelexer.hpp
+++ b/prelexer.hpp
@@ -25,6 +25,16 @@ namespace Sass {
     const char* kwd_lt(const char* src);
     const char* kwd_lte(const char* src);
 
+    // Match standard control chars
+    const char* kwd_at(const char* src);
+    const char* kwd_dot(const char* src);
+    const char* kwd_comma(const char* src);
+    const char* kwd_colon(const char* src);
+    const char* kwd_slash(const char* src);
+    const char* kwd_star(const char* src);
+    const char* kwd_plus(const char* src);
+    const char* kwd_minus(const char* src);
+
     //####################################
     // SPECIAL "REGEX" CONSTRUCTS
     //####################################
@@ -177,7 +187,7 @@ namespace Sass {
     // Match interpolant schemas
     const char* identifier_schema(const char* src);
     const char* value_schema(const char* src);
-    const char* filename(const char* src);
+    // const char* filename(const char* src);
     // const char* filename_schema(const char* src);
     // const char* url_schema(const char* src);
     // const char* url_value(const char* src);
@@ -248,8 +258,7 @@ namespace Sass {
     // const char* rgb_prefix(const char* src);
     // Match CSS uri specifiers.
     const char* uri_prefix(const char* src);
-    // const char* uri(const char* src);
-    const char* url(const char* src);
+    const char* uri_value(const char* src);
     // Match CSS "!important" keyword.
     const char* important(const char* src);
     // Match CSS "!optional" keyword.
@@ -345,7 +354,6 @@ namespace Sass {
       return counter;
     }
 
-    const char* chunk(const char* src);
   }
 }