diff --git a/README.md b/README.md index 9cce485b..68e26185 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ Broadly, jsdiff's diff functions all take an old text and a new text and perform The optional `options` object may have the following keys: - `fuzzFactor`: Number of lines that are allowed to differ before rejecting a patch. Defaults to 0. + - `autoConvertLineEndings`: If `true`, and if the file to be patched consistently uses different line endings to the patch (i.e. either the file always uses Unix line endings while the patch uses Windows ones, or vice versa), then `applyPatch` will behave as if the line endings in the patch were the same as those in the source file. (If `false`, the patch will usually fail to apply in such circumstances since lines deleted in the patch won't be considered to match those in the source file.) Defaults to `true`. - `compareLine(lineNumber, line, operation, patchContent)`: Callback used to compare to given lines to determine if they should be considered equal when patching. Defaults to strict equality but may be overridden to provide fuzzier comparison. Should return false if the lines should be rejected. * `Diff.applyPatches(patch, options)` - applies one or more patches. diff --git a/release-notes.md b/release-notes.md index 091a660b..d6e4aa3f 100644 --- a/release-notes.md +++ b/release-notes.md @@ -24,6 +24,7 @@ - [#486](https://github.com/kpdecker/jsdiff/pull/486) **The `ignoreWhitespace` option of `diffLines` behaves more sensibly now.** `value`s in returned change objects now include leading/trailing whitespace even when `ignoreWhitespace` is used, just like how with `ignoreCase` the `value`s still reflect the case of one of the original texts instead of being all-lowercase. `ignoreWhitespace` is also now compatible with `newlineIsToken`. Finally, **`diffTrimmedLines` is deprecated** (and removed from the docs) in favour of using `diffLines` with `ignoreWhitespace: true`; the two are, and always have been, equivalent. - [#490](https://github.com/kpdecker/jsdiff/pull/490) **When calling diffing functions in async mode by passing a `callback` option, the diff result will now be passed as the *first* argument to the callback instead of the second.** (Previously, the first argument was never used at all and would always have value `undefined`.) - [#489](github.com/kpdecker/jsdiff/pull/489) **`this.options` no longer exists on `Diff` objects.** Instead, `options` is now passed as an argument to methods that rely on options, like `equals(left, right, options)`. This fixes a race condition in async mode, where diffing behaviour could be changed mid-execution if a concurrent usage of the same `Diff` instances overwrote its `options`. +- [#518](https://github.com/kpdecker/jsdiff/pull/518) **`linedelimiters` no longer exists** on patch objects; instead, when a patch with Windows-style CRLF line endings is parsed, **the lines in `lines` will end with `\r`**. There is now a **new `autoConvertLineEndings` option, on by default**, which makes it so that when a patch with Windows-style line endings is applied to a source file with Unix style line endings, the patch gets autoconverted to use Unix-style line endings, and when a patch with Unix-style line endings is applied to a source file with Windows-style line endings, it gets autoconverted to use Windows-style line endings. ## v5.2.0 diff --git a/src/patch/apply.js b/src/patch/apply.js index 4e2727ee..151ecfc1 100644 --- a/src/patch/apply.js +++ b/src/patch/apply.js @@ -1,3 +1,5 @@ +import {hasOnlyWinLineEndings, hasOnlyUnixLineEndings} from '../util/string'; +import {isWin, isUnix, unixToWin, winToUnix} from './line-endings'; import {parsePatch} from './parse'; import distanceIterator from '../util/distance-iterator'; @@ -14,9 +16,16 @@ export function applyPatch(source, uniDiff, options = {}) { uniDiff = uniDiff[0]; } + if (options.autoConvertLineEndings || options.autoConvertLineEndings == null) { + if (hasOnlyWinLineEndings(source) && isUnix(uniDiff)) { + uniDiff = unixToWin(uniDiff); + } else if (hasOnlyUnixLineEndings(source) && isWin(uniDiff)) { + uniDiff = winToUnix(uniDiff); + } + } + // Apply the diff to the input - let lines = source.split(/\r?\n/), - delimiters = source.match(/\r?\n/g) || [], + let lines = source.split('\n'), hunks = uniDiff.hunks, compareLine = options.compareLine || ((lineNumber, line, operation, patchContent) => line === patchContent), @@ -88,18 +97,15 @@ export function applyPatch(source, uniDiff, options = {}) { for (let j = 0; j < hunk.lines.length; j++) { let line = hunk.lines[j], operation = (line.length > 0 ? line[0] : ' '), - content = (line.length > 0 ? line.substr(1) : line), - delimiter = hunk.linedelimiters && hunk.linedelimiters[j] || '\n'; + content = (line.length > 0 ? line.substr(1) : line); if (operation === ' ') { toPos++; } else if (operation === '-') { lines.splice(toPos, 1); - delimiters.splice(toPos, 1); /* istanbul ignore else */ } else if (operation === '+') { lines.splice(toPos, 0, content); - delimiters.splice(toPos, 0, delimiter); toPos++; } else if (operation === '\\') { let previousOperation = hunk.lines[j - 1] ? hunk.lines[j - 1][0] : null; @@ -116,16 +122,11 @@ export function applyPatch(source, uniDiff, options = {}) { if (removeEOFNL) { while (!lines[lines.length - 1]) { lines.pop(); - delimiters.pop(); } } else if (addEOFNL) { lines.push(''); - delimiters.push('\n'); - } - for (let _k = 0; _k < lines.length - 1; _k++) { - lines[_k] = lines[_k] + delimiters[_k]; } - return lines.join(''); + return lines.join('\n'); } // Wrapper that supports multiple file patches via callbacks. diff --git a/src/patch/line-endings.js b/src/patch/line-endings.js new file mode 100644 index 00000000..d1907b47 --- /dev/null +++ b/src/patch/line-endings.js @@ -0,0 +1,62 @@ +export function unixToWin(patch) { + if (Array.isArray(patch)) { + return patch.map(unixToWin); + } + + return { + ...patch, + hunks: patch.hunks.map(hunk => ({ + ...hunk, + lines: hunk.lines.map( + (line, i) => + (line.startsWith('\\') || line.endsWith('\r') || hunk.lines[i + 1]?.startsWith('\\')) + ? line + : line + '\r' + ) + })) + }; +} + +export function winToUnix(patch) { + if (Array.isArray(patch)) { + return patch.map(winToUnix); + } + + return { + ...patch, + hunks: patch.hunks.map(hunk => ({ + ...hunk, + lines: hunk.lines.map(line => line.endsWith('\r') ? line.substring(0, line.length - 1) : line) + })) + }; +} + +/** + * Returns true if the patch consistently uses Unix line endings (or only involves one line and has + * no line endings). + */ +export function isUnix(patch) { + if (!Array.isArray(patch)) { patch = [patch]; } + return !patch.some( + index => index.hunks.some( + hunk => hunk.lines.some( + line => !line.startsWith('\\') && line.endsWith('\r') + ) + ) + ); +} + +/** + * Returns true if the patch uses Windows line endings and only Windows line endings. + */ +export function isWin(patch) { + if (!Array.isArray(patch)) { patch = [patch]; } + return patch.some(index => index.hunks.some(hunk => hunk.lines.some(line => line.endsWith('\r')))) + && patch.every( + index => index.hunks.every( + hunk => hunk.lines.every( + (line, i) => line.startsWith('\\') || line.endsWith('\r') || hunk.lines[i + 1]?.startsWith('\\') + ) + ) + ); +} diff --git a/src/patch/parse.js b/src/patch/parse.js index 8276ea71..caaf7881 100755 --- a/src/patch/parse.js +++ b/src/patch/parse.js @@ -1,6 +1,5 @@ export function parsePatch(uniDiff) { - let diffstr = uniDiff.split(/\r?\n/), - delimiters = uniDiff.match(/\r?\n/g) || [], + let diffstr = uniDiff.split(/\n/), list = [], i = 0; @@ -51,7 +50,7 @@ export function parsePatch(uniDiff) { // Parses the --- and +++ headers, if none are found, no lines // are consumed. function parseFileHeader(index) { - const fileHeader = (/^(---|\+\+\+)\s+(.*)$/).exec(diffstr[i]); + const fileHeader = (/^(---|\+\+\+)\s+(.*)\r?$/).exec(diffstr[i]); if (fileHeader) { let keyPrefix = fileHeader[1] === '---' ? 'old' : 'new'; const data = fileHeader[2].split('\t', 2); @@ -78,8 +77,7 @@ export function parsePatch(uniDiff) { oldLines: typeof chunkHeader[2] === 'undefined' ? 1 : +chunkHeader[2], newStart: +chunkHeader[3], newLines: typeof chunkHeader[4] === 'undefined' ? 1 : +chunkHeader[4], - lines: [], - linedelimiters: [] + lines: [] }; // Unified Diff Format quirk: If the chunk size is 0, @@ -107,7 +105,6 @@ export function parsePatch(uniDiff) { if (operation === '+' || operation === '-' || operation === ' ' || operation === '\\') { hunk.lines.push(diffstr[i]); - hunk.linedelimiters.push(delimiters[i] || '\n'); if (operation === '+') { addCount++; diff --git a/src/patch/reverse.js b/src/patch/reverse.js index fb902b47..e839eeba 100644 --- a/src/patch/reverse.js +++ b/src/patch/reverse.js @@ -15,7 +15,6 @@ export function reversePatch(structuredPatch) { oldStart: hunk.newStart, newLines: hunk.oldLines, newStart: hunk.oldStart, - linedelimiters: hunk.linedelimiters, lines: hunk.lines.map(l => { if (l.startsWith('-')) { return `+${l.slice(1)}`; } if (l.startsWith('+')) { return `-${l.slice(1)}`; } diff --git a/src/util/string.js b/src/util/string.js index bcb0ae92..7230a5fe 100644 --- a/src/util/string.js +++ b/src/util/string.js @@ -86,3 +86,18 @@ function overlapCount(a, b) { } return k; } + + +/** + * Returns true if the string consistently uses Windows line endings. + */ +export function hasOnlyWinLineEndings(string) { + return string.includes('\r\n') && !string.match(/(? { + const oldFile = 'foo\r\nbar\r\nbaz\r\nqux\r\n'; + const diffFile = + 'Index: testFileName\n' + + '===================================================================\n' + + '--- testFileName\tOld Header\n' + + '+++ testFileName\tNew Header\n' + + '@@ -2,2 +2,3 @@\n' + + '-bar\n' + + '-baz\n' + + '+new\n' + + '+two\n' + + '+three\n'; + + expect(applyPatch(oldFile, diffFile)).to.equal('foo\r\nnew\r\ntwo\r\nthree\r\nqux\r\n'); + }); + + it('should automatically convert a patch with Windows file endings to Unix when patching a Unix file', () => { + const oldFile = 'foo\nbar\nbaz\nqux\n'; + const diffFile = + 'Index: testFileName\r\n' + + '===================================================================\r\n' + + '--- testFileName\tOld Header\r\n' + + '+++ testFileName\tNew Header\r\n' + + '@@ -2,2 +2,3 @@\r\n' + + '-bar\r\n' + + '-baz\r\n' + + '+new\r\n' + + '+two\r\n' + + '+three\r\n'; + + expect(applyPatch(oldFile, diffFile)).to.equal('foo\nnew\ntwo\nthree\nqux\n'); + }); + + it('should leave line endings in the patch alone if the target file has mixed file endings, even if this means the patch does not apply', () => { + const oldFile1 = 'foo\r\nbar\nbaz\nqux\n'; + const oldFile2 = 'foo\nbar\r\nbaz\r\nqux\n'; + const diffFile = + 'Index: testFileName\r\n' + + '===================================================================\r\n' + + '--- testFileName\tOld Header\r\n' + + '+++ testFileName\tNew Header\r\n' + + '@@ -2,2 +2,3 @@\r\n' + + '-bar\r\n' + + '-baz\r\n' + + '+new\r\n' + + '+two\r\n' + + '+three\r\n'; + + expect(applyPatch(oldFile1, diffFile)).to.equal(false); + expect(applyPatch(oldFile2, diffFile)).to.equal('foo\nnew\r\ntwo\r\nthree\r\nqux\n'); + }); + + it('should leave patch file endings alone if autoConvertLineEndings=false', () => { + const oldFile = 'foo\r\nbar\r\nbaz\r\nqux\r\n'; + const diffFile = + 'Index: testFileName\n' + + '===================================================================\n' + + '--- testFileName\tOld Header\n' + + '+++ testFileName\tNew Header\n' + + '@@ -2,2 +2,3 @@\n' + + '-bar\n' + + '-baz\n' + + '+new\n' + + '+two\n' + + '+three\n'; + + expect(applyPatch(oldFile, diffFile, {autoConvertLineEndings: false})).to.equal(false); + }); }); describe('#applyPatches', function() { diff --git a/test/patch/create.js b/test/patch/create.js index d0cfd065..498d14c2 100644 --- a/test/patch/create.js +++ b/test/patch/create.js @@ -809,10 +809,6 @@ describe('patch/create', function() { lines: [ '-xxx', '+yyy' - ], - linedelimiters: [ - '\n', - '\n' ] } ] @@ -831,10 +827,6 @@ describe('patch/create', function() { lines: [ '-aaa', '+bbb' - ], - linedelimiters: [ - '\n', - '\n' ] } ] @@ -875,8 +867,7 @@ describe('patch/create', function() { // Check 2: starting with a structuredPatch, does formatting and then // parsing again basically round-trip as long as we wrap it in an array - // to match the output of parsePatch and delete the linedelimiters that - // parsePatch puts in? + // to match the output of parsePatch? const patchObj = structuredPatch( 'oldfile', 'newfile', 'line2\nline3\nline4\n', 'line2\nline3\nline5', @@ -884,9 +875,6 @@ describe('patch/create', function() { ); const roundTrippedPatch = parsePatch(formatPatch([patchObj])); - for (const hunk of roundTrippedPatch[0].hunks) { - delete hunk.linedelimiters; - } expect(roundTrippedPatch).to.deep.equal([patchObj]); }); diff --git a/test/patch/line-endings.js b/test/patch/line-endings.js new file mode 100644 index 00000000..5fe47586 --- /dev/null +++ b/test/patch/line-endings.js @@ -0,0 +1,151 @@ +import {parsePatch} from '../../lib/patch/parse'; +import {formatPatch} from '../../lib/patch/create'; +import {winToUnix, unixToWin, isWin, isUnix} from '../../lib/patch/line-endings'; + +import {expect} from 'chai'; + +describe('unixToWin and winToUnix', function() { + it('should convert line endings in a patch between Unix-style and Windows-style', function() { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,3 +1,4 @@\n' + + ' line2\n' + + ' line3\r\n' + + '+line4\r\n' + + ' line5\n' + ); + + const winPatch = unixToWin(patch); + expect(formatPatch(winPatch)).to.equal( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,3 +1,4 @@\n' + + ' line2\r\n' + + ' line3\r\n' + + '+line4\r\n' + + ' line5\r\n' + ); + + const unixPatch = winToUnix(winPatch); + expect(formatPatch(unixPatch)).to.equal( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,3 +1,4 @@\n' + + ' line2\n' + + ' line3\n' + + '+line4\n' + + ' line5\n' + ); + + expect(formatPatch(winToUnix(patch))).to.equal(formatPatch(unixPatch)); + }); + + it('should not introduce \\r on the last line if there was no newline at EOF', () => { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\n' + + ' line3\n' + + '+line4\n' + + '\\ No newline at end of file\n' + ); + + const winPatch = unixToWin(patch); + expect(formatPatch(winPatch)).to.equal( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\r\n' + + ' line3\r\n' + + '+line4\n' + + '\\ No newline at end of file\n' + ); + }); +}); + +describe('isWin', () => { + it('should return true if all lines end with CRLF', () => { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\r\n' + + ' line3\r\n' + + '+line4\r\n' + ); + expect(isWin(patch)).to.equal(true); + }); + + it('should return false if a line ends with a LF without a CR', () => { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\r\n' + + ' line3\r\n' + + '+line4\n' + ); + expect(isWin(patch)).to.equal(false); + }); + + it('should still return true if only the last line in a file is missing a CR and there is a no newline at EOF indicator', () => { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\r\n' + + ' line3\r\n' + + '+line4\n' + + '\\ No newline at end of file\n' + ); + expect(isWin(patch)).to.equal(true); + }); +}); + +describe('isUnix', () => { + it('should return false if some lines end with CRLF', () => { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\r\n' + + ' line3\n' + + '+line4\r\n' + ); + expect(isUnix(patch)).to.equal(false); + }); + + it('should return true if no lines end with CRLF', () => { + const patch = parsePatch( + 'Index: test\n' + + '===================================================================\n' + + '--- test\theader1\n' + + '+++ test\theader2\n' + + '@@ -1,2 +1,3 @@\n' + + ' line2\n' + + ' line3\n' + + '+line4\n' + ); + expect(isUnix(patch)).to.equal(true); + }); +}); diff --git a/test/patch/parse.js b/test/patch/parse.js index 865e3046..a1a09c9c 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -22,12 +22,6 @@ describe('patch/parse', function() { ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -46,10 +40,6 @@ describe('patch/parse', function() { lines: [ '-line3', '+line4' - ], - linedelimiters: [ - '\n', - '\n' ] } ] @@ -77,12 +67,6 @@ describe('patch/parse', function() { ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] }, { @@ -93,12 +77,6 @@ describe('patch/parse', function() { ' line3', '-line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -130,12 +108,6 @@ describe('patch/parse', function() { ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -176,12 +148,6 @@ Index: test2 ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -200,12 +166,6 @@ Index: test2 ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -242,12 +202,6 @@ Index: test2 ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -265,12 +219,6 @@ Index: test2 ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -303,12 +251,6 @@ Index: test2 ' line3', '+line4', ' line5' - ], - linedelimiters: [ - '\n', - '\n', - '\n', - '\n' ] } ] @@ -328,10 +270,6 @@ Index: test2 lines: [ '-line5', '\\ No newline at end of file' - ], - linedelimiters: [ - '\n', - '\n' ] } ] @@ -350,10 +288,6 @@ Index: test2 lines: [ '+line5', '\\ No newline at end of file' - ], - linedelimiters: [ - '\n', - '\n' ] } ] @@ -374,11 +308,6 @@ Index: test2 '+line4', ' line5', '\\ No newline at end of file' - ], - linedelimiters: [ - '\n', - '\n', - '\n' ] } ] @@ -458,8 +387,7 @@ Index: test2 ' baz', ' qux', '\\ No newline at end of file' - ], - linedelimiters: [ '\n', '\n', '\n', '\n', '\n', '\n' ] + ] } ] } @@ -491,8 +419,7 @@ Index: test2 ' baz', ' qux', '\\ No newline at end of file' - ], - linedelimiters: [ '\n', '\n', '\n', '\n', '\n', '\n' ] + ] } ] }