diff --git a/javascript/diff_match_patch.js b/javascript/diff_match_patch.js index 2fe320a..3c7ba68 100644 --- a/javascript/diff_match_patch.js +++ b/javascript/diff_match_patch.js @@ -4,11 +4,13 @@ b.length-f);a=this.diff_compute_(a,b,e,d);c&&a.unshift(new diff_match_patch.Diff diff_match_patch.prototype.diff_compute_=function(a,b,c,d){if(!a)return[new diff_match_patch.Diff(DIFF_INSERT,b)];if(!b)return[new diff_match_patch.Diff(DIFF_DELETE,a)];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);return-1!=g?(c=[new diff_match_patch.Diff(DIFF_INSERT,e.substring(0,g)),new diff_match_patch.Diff(DIFF_EQUAL,f),new diff_match_patch.Diff(DIFF_INSERT,e.substring(g+f.length))],a.length>b.length&&(c[0][0]=c[2][0]=DIFF_DELETE),c):1==f.length?[new diff_match_patch.Diff(DIFF_DELETE, a),new diff_match_patch.Diff(DIFF_INSERT,b)]:(e=this.diff_halfMatch_(a,b))?(b=e[1],f=e[3],a=e[4],e=this.diff_main(e[0],e[2],c,d),c=this.diff_main(b,f,c,d),e.concat([new diff_match_patch.Diff(DIFF_EQUAL,a)],c)):c&&100c);t++){for(var v=-t+p;v<=t-x;v+=2){var n=f+v;var r=v==-t||v!=t&&h[n-1]d)x+=2;else if(y>e)p+=2;else if(m&&(n=f+k-v,0<=n&&n= u)return this.diff_bisectSplit_(a,b,r,y,c)}}for(v=-t+w;v<=t-q;v+=2){n=f+v;u=v==-t||v!=t&&l[n-1]d)q+=2;else if(r>e)w+=2;else if(!m&&(n=f+k-v,0<=n&&n=u)))return this.diff_bisectSplit_(a,b,r,y,c)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]}; diff_match_patch.prototype.diff_bisectSplit_=function(a,b,c,d,e){var f=a.substring(0,c),g=b.substring(0,d);a=a.substring(c);b=b.substring(d);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)}; diff_match_patch.prototype.diff_linesToChars_=function(a,b){function c(a){for(var b="",c=0,g=-1,h=d.length;gd?a=a.substring(c-d):c} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_wordMode_ = function(text1, text2, deadline) { + // Scan the text on a line-by-line basis first. + var a = this.diff_linesToWords_(text1, text2); + text1 = a.chars1; + text2 = a.chars2; + var linearray = a.lineArray; + + var diffs = this.diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + this.diff_charsToLines_(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + this.diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.push(new diff_match_patch.Diff(DIFF_EQUAL, '')); + var pointer = 0; + var count_delete = 0; + var count_insert = 0; + var text_delete = ''; + var text_insert = ''; + while (pointer < diffs.length) { + switch (diffs[pointer][0]) { + case DIFF_INSERT: + count_insert++; + text_insert += diffs[pointer][1]; + break; + case DIFF_DELETE: + count_delete++; + text_delete += diffs[pointer][1]; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + diffs.splice(pointer - count_delete - count_insert, + count_delete + count_insert); + pointer = pointer - count_delete - count_insert; + var subDiff = + this.diff_main(text_delete, text_insert, false, deadline); + for (var j = subDiff.length - 1; j >= 0; j--) { + diffs.splice(pointer, 0, subDiff[j]); + } + pointer = pointer + subDiff.length; + } + count_insert = 0; + count_delete = 0; + text_delete = ''; + text_insert = ''; + break; + } + pointer++; + } + diffs.pop(); // Remove the dummy entry at the end. + + return diffs; +}; /** * Do a quick line-level diff on both strings, then rediff the parts for @@ -452,6 +520,75 @@ diff_match_patch.prototype.diff_bisectSplit_ = function(text1, text2, x, y, }; +/** + * Split two texts into an array of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one word. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {{chars1: string, chars2: string, lineArray: !Array.}} + * An object containing the encoded text1, the encoded text2 and + * the array of unique strings. + * The zeroth element of the array of unique strings is intentionally blank. + * @private + */ +diff_match_patch.prototype.diff_linesToWords_ = function(text1, text2) { + var lineArray = []; // e.g. lineArray[4] == 'Hello\n' + var lineHash = {}; // e.g. lineHash['Hello\n'] == 4 + + // '\x00' is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray[0] = ''; + + /** + * Split a text into an array of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * Modifies linearray and linehash through being a closure. + * @param {string} text String to encode. + * @return {string} Encoded string. + * @private + */ + function diff_linesToCharsMunge_(text) { + var chars = ''; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + var lineStart = 0; + var lineEnd = -1; + // Keeping our own length variable is faster than looking it up. + var lineArrayLength = lineArray.length; + while (lineEnd < text.length - 1) { + lineEnd = text.replace(/[\n\.,;:]/g,' ').indexOf(' ', lineStart); + if (lineEnd == -1) { + lineEnd = text.length - 1; + } + var line = text.substring(lineStart, lineEnd + 1); + + if (lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : + (lineHash[line] !== undefined)) { + chars += String.fromCharCode(lineHash[line]); + } else { + if (lineArrayLength == maxLines) { + // Bail out at 65535 because + // String.fromCharCode(65536) == String.fromCharCode(0) + line = text.substring(lineStart); + lineEnd = text.length; + } + chars += String.fromCharCode(lineArrayLength); + lineHash[line] = lineArrayLength; + lineArray[lineArrayLength++] = line; + } + lineStart = lineEnd + 1; + } + return chars; + } + // Allocate 2/3rds of the space for text1, the rest for text2. + var maxLines = 40000; + var chars1 = diff_linesToCharsMunge_(text1); + maxLines = 65535; + var chars2 = diff_linesToCharsMunge_(text2); + return {chars1: chars1, chars2: chars2, lineArray: lineArray}; +}; + /** * Split two texts into an array of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line.