diff --git a/release-notes.md b/release-notes.md index 98281b4a..28219b2b 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,5 +1,10 @@ # Release Notes +## 8.0.2 + +- [#616](https://github.com/kpdecker/jsdiff/pull/616) **Restored compatibility of `diffSentences` with old Safari versions.** This was broken in 8.0.0 by the introduction of a regex with a [lookbehind assertion](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Lookbehind_assertion); these weren't supported in Safari prior to version 16.4. +- [#612](https://github.com/kpdecker/jsdiff/pull/612) **Improved tree shakeability** by marking the built CJS and ESM packages with `sideEffects: false`. + ## 8.0.1 - [#610](https://github.com/kpdecker/jsdiff/pull/610) **Fixes types for `diffJson` which were broken by 8.0.0**. The new bundled types in 8.0.0 only allowed `diffJson` to be passed string arguments, but it should've been possible to pass either strings or objects (and now is). Thanks to Josh Kelley for the fix. diff --git a/src/diff/sentence.ts b/src/diff/sentence.ts index 098711ef..1eeb8f0b 100644 --- a/src/diff/sentence.ts +++ b/src/diff/sentence.ts @@ -1,9 +1,53 @@ import Diff from './base.js'; -import type { ChangeObject, CallbackOptionAbortable, CallbackOptionNonabortable, DiffCallbackNonabortable, DiffSentencesOptionsAbortable, DiffSentencesOptionsNonabortable} from '../types.js'; +import type { + ChangeObject, + CallbackOptionAbortable, + CallbackOptionNonabortable, + DiffCallbackNonabortable, + DiffSentencesOptionsAbortable, + DiffSentencesOptionsNonabortable +} from '../types.js'; + +function isSentenceEndPunct(char: string) { + return char == '.' || char == '!' || char == '?'; +} class SentenceDiff extends Diff { tokenize(value: string) { - return value.split(/(?<=[.!?])(\s+|$)/); + // If in future we drop support for environments that don't support lookbehinds, we can replace + // this entire function with: + // return value.split(/(?<=[.!?])(\s+|$)/); + // but until then, for similar reasons to the trailingWs function in string.ts, we are forced + // to do this verbosely "by hand" instead of using a regex. + const result = []; + let tokenStartI = 0; + for (let i = 0; i < value.length; i++) { + if (i == value.length - 1) { + result.push(value.slice(tokenStartI)); + break; + } + + if (isSentenceEndPunct(value[i]) && value[i + 1].match(/\s/)) { + // We've hit a sentence break - i.e. a punctuation mark followed by whitespace. + // We now want to push TWO tokens to the result: + // 1. the sentence + result.push(value.slice(tokenStartI, i + 1)); + + // 2. the whitespace + i = tokenStartI = i + 1; + while (value[i + 1]?.match(/\s/)) { + i++; + } + result.push(value.slice(tokenStartI, i + 1)); + + // Then the next token (a sentence) starts on the character after the whitespace. + // (It's okay if this is off the end of the string - then the outer loop will terminate + // here anyway.) + tokenStartI = i + 1; + } + } + + return result; } }