tildeio · mmun · May 23, 2016 · Jun 19, 2015 · May 9, 2016 · May 10, 2016
diff --git a/Brocfile.js b/Brocfile.js
@@ -36,7 +36,7 @@ function buildTestSuite (libTree) {
   var jsHintLib = jsHint(libTree);
 
   var testTree = new Funnel( 'tests', {
-    files: ['recognizer-tests.js', 'router-tests.js'],
+    files: ['recognizer-tests.js', 'router-tests.js', 'normalizer-tests.js'],
     destDir: destination
   });
 

diff --git a/bench/benches/normalize.js b/bench/benches/normalize.js
@@ -0,0 +1,28 @@
+var RouteRecognizer = require('../../dist/route-recognizer');
+var Normalizer = RouteRecognizer.Normalizer;
+
+var router = new RouteRecognizer();
+
+var paths = {
+  complex: "/foo/" + encodeURIComponent("http://example.com/index.html?foo=bar&baz=faz#hashtag"),
+  simple: "/post/123",
+  medium: "/abc%3Adef"
+
+};
+
+module.exports = [{
+  name: 'Normalize Complex',
+  fn: function() {
+    Normalizer.normalizePath(paths.complex);
+  }
+}, {
+  name: 'Normalize Simple',
+  fn: function() {
+    Normalizer.normalizePath(paths.simple);
+  }
+}, {
+  name: 'Normalize Medium',
+  fn: function() {
+    Normalizer.normalizePath(paths.medium);
+  }
+}];
diff --git a/bench/index.js b/bench/index.js
@@ -2,6 +2,14 @@ var glob = require('glob');
 var path = require('path');
 var bench = require('do-you-even-bench');
 
-bench(glob.sync( './bench/benches/*.js' ).map( function( file ) {
-  return require( path.resolve( file ) );
-}));
+var suites = [];
+glob.sync( './bench/benches/*.js' ).forEach(function(file) {
+  var exported = require( path.resolve( file ) );
+  if (Array.isArray(exported)) {
+    suites = suites.concat(exported);
+  } else {
+    suites.push(exported);
+  }
+});
+
+bench(suites);
diff --git a/lib/route-recognizer.js b/lib/route-recognizer.js
@@ -1,4 +1,8 @@
 import map from './route-recognizer/dsl';
+import Normalizer from './route-recognizer/normalizer';
+
+var normalizePath = Normalizer.normalizePath;
+var normalizeSegment = Normalizer.normalizeSegment;
 
 var specials = [
   '/', '.', '*', '+', '?', '|',
@@ -28,7 +32,7 @@ function isArray(test) {
 // * `invalidChars`: a String with a list of all invalid characters
 // * `repeat`: true if the character specification can repeat
 
-function StaticSegment(string) { this.string = string; }
+function StaticSegment(string) { this.string = normalizeSegment(string); }
 StaticSegment.prototype = {
   eachChar: function(currentState) {
     var string = this.string, ch;
@@ -50,7 +54,7 @@ StaticSegment.prototype = {
   }
 };
 
-function DynamicSegment(name) { this.name = name; }
+function DynamicSegment(name) { this.name = normalizeSegment(name); }
 DynamicSegment.prototype = {
   eachChar: function(currentState) {
     return currentState.put({ invalidChars: "/", repeat: true, validChars: undefined });
@@ -61,7 +65,11 @@ DynamicSegment.prototype = {
   },
 
   generate: function(params) {
-    return params[this.name];
+    if (RouteRecognizer.ENCODE_AND_DECODE_PATH_SEGMENTS) {
+      return encodeURIComponent(params[this.name]);
+    } else {
+      return params[this.name];
+    }
   }
 };
 
@@ -89,6 +97,10 @@ EpsilonSegment.prototype = {
   generate: function() { return ""; }
 };
 
+// The `names` will be populated with {name, decode} objects for each
+// dynamic/star segment, where `name` is the parameter name for use during
+// recognition, and `decode` is whether the parameter value should be decoded
+// (true for dynamic segments, false for star segments).
 function parse(route, names, specificity) {
   // normalize route as not starting with a "/". Recognition will
   // also normalize.
@@ -123,12 +135,12 @@ function parse(route, names, specificity) {
 
     if (match = segment.match(/^:([^\/]+)$/)) {
       results[i] = new DynamicSegment(match[1]);
-      names.push(match[1]);
+      names.push({name: match[1], decode: true});
       specificity.val += '3';
     } else if (match = segment.match(/^\*([^\/]+)$/)) {
       results[i] = new StarSegment(match[1]);
+      names.push({name: match[1], decode: false});
       specificity.val += '1';
-      names.push(match[1]);
     } else if(segment === "") {
       results[i] = new EpsilonSegment();
       specificity.val += '2';
@@ -273,18 +285,31 @@ RecognizeResults.prototype = oCreate({
   queryParams: null
 });
 
-function findHandler(state, path, queryParams) {
+function findHandler(state, originalPath, queryParams) {
   var handlers = state.handlers, regex = state.regex;
-  var captures = path.match(regex), currentCapture = 1;
+  var captures = originalPath.match(regex), currentCapture = 1;
   var result = new RecognizeResults(queryParams);
 
   result.length = handlers.length;
 
   for (var i=0; i<handlers.length; i++) {
     var handler = handlers[i], names = handler.names, params = {};
+    var name, shouldDecode, capture;
 
     for (var j=0; j<names.length; j++) {
-      params[names[j]] = captures[currentCapture++];
+      name = names[j].name;
+      shouldDecode = names[j].decode;
+      capture = captures[currentCapture++];
+
+      if (RouteRecognizer.ENCODE_AND_DECODE_PATH_SEGMENTS) {
+        if (shouldDecode) {
+          params[name] = decodeURIComponent(capture);
+        } else {
+          params[name] = capture;
+        }
+      } else {
+        params[name] = capture;
+      }
     }
 
     result[i] = { handler: handler.handler, params: params, isDynamic: !!names.length };
@@ -469,22 +494,35 @@ RouteRecognizer.prototype = {
   recognize: function(path) {
     var states = [ this.rootState ],
         pathLen, i, l, queryStart, queryParams = {},
+        hashStart,
         isSlashDropped = false;
 
+    hashStart = path.indexOf('#');
+    if (hashStart !== -1) {
+      path = path.substr(0, hashStart);
+    }
+
     queryStart = path.indexOf('?');
     if (queryStart !== -1) {
       var queryString = path.substr(queryStart + 1, path.length);
       path = path.substr(0, queryStart);
       queryParams = this.parseQueryString(queryString);
     }
 
-    path = decodeURI(path);
-
     if (path.charAt(0) !== "/") { path = "/" + path; }
+    var originalPath = path;
+
+    if (RouteRecognizer.ENCODE_AND_DECODE_PATH_SEGMENTS) {
+      path = normalizePath(path);
+    } else {
+      path = decodeURI(path);
+      originalPath = decodeURI(originalPath);
+    }
 
     pathLen = path.length;
     if (pathLen > 1 && path.charAt(pathLen - 1) === "/") {
       path = path.substr(0, pathLen - 1);
+      originalPath = originalPath.substr(0, pathLen - 1);
       isSlashDropped = true;
     }
 
@@ -506,9 +544,9 @@ RouteRecognizer.prototype = {
       // if a trailing slash was dropped and a star segment is the last segment
       // specified, put the trailing slash back
       if (isSlashDropped && state.regex.source.slice(-5) === "(.+)$") {
-        path = path + "/";
-      }
-      return findHandler(state, path, queryParams);
+         originalPath = originalPath + "/";
+       }
+      return findHandler(state, originalPath, queryParams);
     }
   }
 };
@@ -517,4 +555,10 @@ RouteRecognizer.prototype.map = map;
 
 RouteRecognizer.VERSION = 'VERSION_STRING_PLACEHOLDER';
 
+// Set to false to opt-out of encoding and decoding path segments.
+// See https://github.com/tildeio/route-recognizer/pull/55
+RouteRecognizer.ENCODE_AND_DECODE_PATH_SEGMENTS = true;
+
+RouteRecognizer.Normalizer = Normalizer;
+
 export default RouteRecognizer;
diff --git a/lib/route-recognizer/normalizer.js b/lib/route-recognizer/normalizer.js
@@ -0,0 +1,82 @@
+// Match percent-encoded values (e.g. %3a, %3A, %25)
+var PERCENT_ENCODED_VALUES = /%[a-fA-F0-9]{2}/g;
+
+function toUpper(str) { return str.toUpperCase(); }
+
+// Turn percent-encoded values to upper case ("%3a" -> "%3A")
+function percentEncodedValuesToUpper(string) {
+  return string.replace(PERCENT_ENCODED_VALUES, toUpper);
+}
+
+// Normalizes percent-encoded values to upper-case and decodes percent-encoded
+// values that are not reserved (like unicode characters).
+// Safe to call multiple times on the same path.
+function normalizePath(path) {
+  return path.split('/')
+             .map(normalizeSegment)
+             .join('/');
+}
+
+function percentEncode(char) {
+  return '%' + charToHex(char);
+}
+
+function charToHex(char) {
+  return char.charCodeAt(0).toString(16).toUpperCase();
+}
+
+// Decodes percent-encoded values in the string except those
+// characters in `reservedHex`, where `reservedHex` is an array of 2-character
+// percent-encodings
+function decodeURIComponentExcept(string, reservedHex) {
+  if (string.indexOf('%') === -1) {
+    // If there is no percent char, there is no decoding that needs to
+    // be done and we exit early
+    return string;
+  }
+  string = percentEncodedValuesToUpper(string);
+
+  var result = '';
+  var buffer = '';
+  var idx = 0;
+  while (idx < string.length) {
+    var pIdx = string.indexOf('%', idx);
+
+    if (pIdx === -1) { // no percent char
+      buffer += string.slice(idx);
+      break;
+    } else { // found percent char
+      buffer += string.slice(idx, pIdx);
+      idx = pIdx + 3;
+
+      var hex = string.slice(pIdx + 1, pIdx + 3);
+      var encoded = '%' + hex;
+
+      if (reservedHex.indexOf(hex) === -1) {
+        // encoded is not in reserved set, add to buffer
+        buffer += encoded;
+      } else {
+        result += decodeURIComponent(buffer);
+        buffer = '';
+        result += encoded;
+      }
+    }
+  }
+  result += decodeURIComponent(buffer);
+  return result;
+}
+
+// Leave these characters in encoded state in segments
+var reservedSegmentChars = ['%', '/'];
+var reservedHex = reservedSegmentChars.map(charToHex);
+
+function normalizeSegment(segment) {
+  return decodeURIComponentExcept(segment, reservedHex);
+}
+
+var Normalizer = {
+  normalizeSegment: normalizeSegment,
+  normalizePath: normalizePath
+};
+
+export default Normalizer;
diff --git a/tests/normalizer-tests.js b/tests/normalizer-tests.js
@@ -0,0 +1,44 @@
+/* globals QUnit */
+
+import RouteRecognizer from 'route-recognizer';
+
+var Normalizer = RouteRecognizer.Normalizer;
+
+module("Normalization");
+
+var expectations = [{
+  paths: ["/foo/bar"],
+  normalized: "/foo/bar"
+}, {
+  paths: ["/foo%3Abar", "/foo%3abar"],
+  normalized: "/foo:bar"
+}, {
+  paths: ["/foo%2fbar", "/foo%2Fbar"],
+  normalized: "/foo%2Fbar"
+}, {
+  paths: ["/café", "/caf%C3%A9", "/caf%c3%a9"],
+  normalized: "/café"
+}, {
+  paths: ["/abc%25def"],
+  normalized: "/abc%25def"
+}, {
+  paths: ["/" + encodeURIComponent("http://example.com/index.html?foo=100%&baz=boo#hash")],
+  normalized: "/http:%2F%2Fexample.com%2Findex.html?foo=100%25&baz=boo#hash"
+}, {
+  paths: ["/%25%25%25%25"],
+  normalized: "/%25%25%25%25"
+}, {
+  paths: ["/%25%25%25%25%3A%3a%2F%2f%2f"],
+  normalized: "/%25%25%25%25::%2F%2F%2F"
+}];
+
+expectations.forEach(function(expectation) {
+  var paths = expectation.paths;
+  var normalized = expectation.normalized;
+
+  paths.forEach(function(path) {
+    test("the path '" + path + "' is normalized to '" + normalized + "'", function() {
+      equal(Normalizer.normalizePath(path), normalized);
+    });
+  });
+});