From 2670d9b14d31f0a62561f053185b42172a07973b Mon Sep 17 00:00:00 2001 From: Graeme Yeates Date: Tue, 2 Jun 2015 21:12:44 -0400 Subject: [PATCH] Optimize levenshtein implementation --- levenshtein.js | 54 +++++++++++++++++++++++++++++++------------- tests/levenshtein.js | 3 +++ 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/levenshtein.js b/levenshtein.js index a95efd13..85f220c1 100644 --- a/levenshtein.js +++ b/levenshtein.js @@ -1,30 +1,52 @@ var makeString = require('./helper/makeString'); +/** + * Based on the implementation here: https://github.com/hiddentao/fast-levenshtein + */ module.exports = function levenshtein(str1, str2) { + 'use strict'; str1 = makeString(str1); str2 = makeString(str2); - if (str1 === str2) - return 0; + // Short cut cases + if (str1 === str2) return 0; + if (!str1 || !str2) return Math.max(str1.length, str2.length); - var current = [], - prev, value = 0; + // two rows + var prevRow = new Array(str2.length + 1); - for (var i = 0; i <= str2.length; i++) { - for (var j = 0; j <= str1.length; j++) { - if (i && j) { - if (str1.charAt(j - 1) === str2.charAt(i - 1)) - value = prev; - else - value = Math.min(current[j], current[j - 1], prev) + 1; + // initialise previous row + for (var i = 0; i < prevRow.length; ++i) { + prevRow[i] = i; + } + + // calculate current row distance from previous row + for (i = 0; i < str1.length; ++i) { + var nextCol = i + 1; + + for (var j = 0; j < str2.length; ++j) { + var curCol = nextCol; + + // substution + nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 ); + // insertion + var tmp = curCol + 1; + if (nextCol > tmp) { + nextCol = tmp; + } + // deletion + tmp = prevRow[j + 1] + 1; + if (nextCol > tmp) { + nextCol = tmp; } - else - value = i + j; - prev = current[j]; - current[j] = value; + // copy current col value into previous (in preparation for next iteration) + prevRow[j] = curCol; } + + // copy last col value into previous (in preparation for next iteration) + prevRow[j] = nextCol; } - return value; + return nextCol; }; diff --git a/tests/levenshtein.js b/tests/levenshtein.js index d6deb88d..21fe66eb 100644 --- a/tests/levenshtein.js +++ b/tests/levenshtein.js @@ -17,3 +17,6 @@ test('#levenshtein', function() { equal(levenshtein(), 0); }); +test('#levenshtein non-latin', function() { + equal(levenshtein('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文'), 2); +});