Skip to content

Commit

Permalink
handle paragraphs better (#6)
Browse files Browse the repository at this point in the history
Kyle Grinstead authored Mar 22, 2024

Unverified

This commit is not signed, but one or more authors requires that any commit attributed to them is signed.
1 parent fb8daac commit 5a8459b
Showing 6 changed files with 151 additions and 13 deletions.
17 changes: 8 additions & 9 deletions .github/workflows/tag-release.yml
Original file line number Diff line number Diff line change
@@ -5,29 +5,28 @@ on:
branches: [master]

jobs:
build-and-release:
tag-release:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: oven-sh/setup-bun@v1
with:
fetch-depth: 0

- name: Fetch tags
run: git fetch --tags
- uses: oven-sh/setup-bun@v1

- name: Get latest tag
id: get_latest_tag
run: |
latest_tag=$(git describe --tags --abbrev=0 --match="[0-9]*.[0-9]*.[0-9]*" 2>/dev/null || echo "1.0.0")
latest_tag=$(git tag --sort=-version:refname --list 'v*[0-9].*[0-9].*[0-9]' | head -n 1)
latest_tag=${latest_tag#v} # Remove the 'v' prefix if present
echo "latest_tag=${latest_tag}" >> $GITHUB_OUTPUT
- name: Bump patch version
id: bump_patch_version
run: |
latest_tag=${{ steps.get_latest_tag.outputs.latest_tag }}
version_parts=(${latest_tag//./ })
patch_version=$((${version_parts[2]} + 1))
new_tag="${version_parts[0]}.${version_parts[1]}.${patch_version}"
IFS='.' read -r major minor patch <<< "${{ steps.get_latest_tag.outputs.latest_tag }}"
new_tag="${major}.${minor}.$((patch + 1))"
echo "new_tag=${new_tag}" >> $GITHUB_OUTPUT
- name: Create Release
2 changes: 2 additions & 0 deletions dist/index.d.ts
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@ export default class HtmlDiff extends AbstractDiff {
protected oldIsolatedDiffTags: {
[key: number]: string[];
};
protected justProcessedDeleteFromIndex: number;
static create(oldText: string, newText: string, config?: HtmlDiffConfig | null): HtmlDiff;
setInsertSpaceInReplace(boolean: boolean): HtmlDiff;
getInsertSpaceInReplace(): boolean;
@@ -33,6 +34,7 @@ export default class HtmlDiff extends AbstractDiff {
protected diffPicture(oldText: string, newText: string): string;
protected diffElementsByAttribute(oldText: string, newText: string, attribute: string, element: string): string;
protected processEqualOperation(operation: Operation): void;
protected replaceParagraphSymbolWithBreaksIfNeeded(): void;
protected getAttributeFromTag(text: string, attribute: string): string | null;
protected isLinkPlaceholder(text: string): boolean;
protected isImagePlaceholder(text: string): boolean;
54 changes: 52 additions & 2 deletions dist/index.js
Original file line number Diff line number Diff line change
@@ -195,7 +195,12 @@ class AbstractDiff {
return;
}
if (sentenceOrHtmlTag[0] === "<") {
words.push(sentenceOrHtmlTag);
if (sentenceOrHtmlTag === "</p>") {
words.push(sentenceOrHtmlTag);
words.push("\xB6");
} else {
words.push(sentenceOrHtmlTag);
}
return;
}
sentenceOrHtmlTag = this.normalizeWhitespaceInHtmlSentence(sentenceOrHtmlTag);
@@ -275,6 +280,7 @@ class HtmlDiff extends AbstractDiff {
wordIndices = {};
newIsolatedDiffTags = {};
oldIsolatedDiffTags = {};
justProcessedDeleteFromIndex = -1;
static create(oldText, newText, config = null) {
const diff = new this(oldText, newText);
if (config !== null) {
@@ -300,6 +306,7 @@ class HtmlDiff extends AbstractDiff {
for (const item of operations) {
this.performOperation(item);
}
this.replaceParagraphSymbolWithBreaksIfNeeded();
return this.content;
}
indexNewWords() {
@@ -383,7 +390,7 @@ class HtmlDiff extends AbstractDiff {
return false;
}
isSelfClosingTag(text) {
return /<[^>]+\/\s*>/iu.test(text);
return /<br.*>/.test(text) || /<[^>]+\/\s*>/iu.test(text);
}
isClosingIsolatedDiffTag(item, currentIsolatedDiffTag = null) {
const tagsToMatch = currentIsolatedDiffTag !== null ? {
@@ -418,27 +425,54 @@ class HtmlDiff extends AbstractDiff {
this.processInsertOperation(operation2, "diffmod");
}
processInsertOperation(operation2, cssClass) {
this.justProcessedDeleteFromIndex = -1;
const text = [];
const paragraphSplitIndexes = [];
let rawIndex = 0;
for (let pos = operation2.startInNew;pos < operation2.endInNew; pos++) {
const s = this.newWords[pos];
if (this.config.isIsolatedDiffTagPlaceholder(s) && this.newIsolatedDiffTags[pos]) {
text.push(...this.newIsolatedDiffTags[pos]);
} else if (s === "\xB6") {
paragraphSplitIndexes.push(rawIndex);
text.push(this.wrapText(s, "ins", cssClass));
} else {
text.push(s);
}
rawIndex++;
}
paragraphSplitIndexes.reverse().forEach((paragraphSplitIndex) => {
if (paragraphSplitIndex > 0 && paragraphSplitIndex < text.length - 1) {
const temp = text[paragraphSplitIndex - 1];
text[paragraphSplitIndex - 1] = text[paragraphSplitIndex];
text[paragraphSplitIndex] = temp;
}
});
this.insertTag("ins", cssClass, text);
}
processDeleteOperation(operation2, cssClass) {
const text = [];
const paragraphMergeIndexes = [];
let rawIndex = 0;
for (let pos = operation2.startInOld;pos < operation2.endInOld; pos++) {
const s = this.oldWords[pos];
if (this.config.isIsolatedDiffTagPlaceholder(s) && this.oldIsolatedDiffTags[pos]) {
text.push(...this.oldIsolatedDiffTags[pos]);
} else {
if (s === "\xB6") {
paragraphMergeIndexes.push(rawIndex);
}
text.push(s);
}
rawIndex++;
}
paragraphMergeIndexes.reverse().forEach((paragraphMergeIndex) => {
if (paragraphMergeIndex > 0 && paragraphMergeIndex < text.length - 1) {
text.splice(paragraphMergeIndex + 1, 1);
text.splice(paragraphMergeIndex - 1, 1);
}
});
this.justProcessedDeleteFromIndex = this.content.length;
this.insertTag("del", cssClass, text);
}
diffIsolatedPlaceholder(operation2, pos, placeholder, stripWrappingTags = true) {
@@ -488,12 +522,28 @@ class HtmlDiff extends AbstractDiff {
const s = this.newWords[pos];
if (this.config.isIsolatedDiffTagPlaceholder(s) && this.newIsolatedDiffTags[pos]) {
result.push(this.diffIsolatedPlaceholder(operation2, pos, s));
} else if (s === "\xB6") {
if (pos > operation2.startInNew && this.newWords[pos - 1] === "</p>" && pos < operation2.endInNew - 1 && this.newWords[pos + 1].startsWith("<p>")) {
result.push("<br>");
}
} else {
result.push(s);
}
}
if (result[0] === "</p>" || result[0] === "." && result[1] === "</p>") {
this.replaceParagraphSymbolWithBreaksIfNeeded();
}
this.justProcessedDeleteFromIndex = -1;
this.content += result.join("");
}
replaceParagraphSymbolWithBreaksIfNeeded() {
if (this.justProcessedDeleteFromIndex > -1) {
const contentBeforeIndex = this.content.slice(0, this.justProcessedDeleteFromIndex);
const contentAfterIndex = this.content.slice(this.justProcessedDeleteFromIndex);
const replacedContent = contentAfterIndex.replace(//g, "<br><br>");
this.content = contentBeforeIndex + replacedContent;
}
}
getAttributeFromTag(text, attribute) {
const pattern = new RegExp(`<[^>]*\\b${attribute}\\s*=\\s*(['"])(.*)\\1[^>]*>`, "iu");
const matches = text.match(pattern);
7 changes: 6 additions & 1 deletion src/abstract-diff.ts
Original file line number Diff line number Diff line change
@@ -65,7 +65,12 @@ export class AbstractDiff {
}

if (sentenceOrHtmlTag[0] === '<') {
words.push(sentenceOrHtmlTag);
if (sentenceOrHtmlTag === '</p>') {
words.push(sentenceOrHtmlTag);
words.push('¶');
} else {
words.push(sentenceOrHtmlTag);
}
return;
}

29 changes: 29 additions & 0 deletions src/index.test.ts
Original file line number Diff line number Diff line change
@@ -28,3 +28,32 @@ test('works with apostrophes', () => {

expect(result).toBe(`<p>this's a apost</p>`);
});

test('works with paragraphs being combined', () => {
const result = HtmlDiff.create(
`<p>one two three</p><p>four five six</p>`,
`<p>one two three four five six</p>`
).build();

expect(result).toBe(`<p>one two three<del class=\"diffmod\">¶</del>four five six</p>`);
});

test('works with paragraphs being split', () => {
const result = HtmlDiff.create(
`<p>one two three four five six</p>`,
`<p>one two three</p><p>four five six</p>`
).build();

expect(result).toBe(`<p>one two three<ins class=\"diffmod\">¶</ins></p><p>four five six</p>`);
});

test('works when deleting a paragraph', () => {
const result = HtmlDiff.create(
`<p>one two three four five six.</p><p>seven eight nine ten.</p><p>eleven twelve thirteen.</p><p>fourteen fifteen sixteen.</p>`,
`<p>one two three four five six.</p><p>eleven twelve thirteen.</p><p>fourteen fifteen sixteen.</p>`
).build();

expect(result).toBe(
`<p>one two three four five six<del class=\"diffdel\">.<br><br>seven eight nine ten</del>.</p><br><p>eleven twelve thirteen.</p><br><p>fourteen fifteen sixteen.</p>`
);
});
55 changes: 54 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ export default class HtmlDiff extends AbstractDiff {
protected wordIndices: { [key: string]: number[] } = {};
protected newIsolatedDiffTags: { [key: number]: string[] } = {};
protected oldIsolatedDiffTags: { [key: number]: string[] } = {};
protected justProcessedDeleteFromIndex = -1;

public static create(oldText: string, newText: string, config: HtmlDiffConfig | null = null): HtmlDiff {
const diff = new this(oldText, newText);
@@ -42,6 +43,8 @@ export default class HtmlDiff extends AbstractDiff {
this.performOperation(item);
}

this.replaceParagraphSymbolWithBreaksIfNeeded();

return this.content;
}

@@ -149,7 +152,7 @@ export default class HtmlDiff extends AbstractDiff {
}

protected isSelfClosingTag(text: string): boolean {
return /<[^>]+\/\s*>/iu.test(text);
return /<br.*>/.test(text) || /<[^>]+\/\s*>/iu.test(text);
}

protected isClosingIsolatedDiffTag(item: string, currentIsolatedDiffTag: string | null = null): string | false {
@@ -193,28 +196,55 @@ export default class HtmlDiff extends AbstractDiff {
}

protected processInsertOperation(operation: Operation, cssClass: string): void {
this.justProcessedDeleteFromIndex = -1;
const text: string[] = [];
const paragraphSplitIndexes = [];
let rawIndex = 0;
for (let pos = operation.startInNew; pos < operation.endInNew; pos++) {
const s = this.newWords[pos];
if (this.config.isIsolatedDiffTagPlaceholder(s) && this.newIsolatedDiffTags[pos]) {
text.push(...this.newIsolatedDiffTags[pos]);
} else if (s === '¶') {
paragraphSplitIndexes.push(rawIndex);
text.push(this.wrapText(s, 'ins', cssClass));
} else {
text.push(s);
}
rawIndex++;
}
paragraphSplitIndexes.reverse().forEach((paragraphSplitIndex) => {
if (paragraphSplitIndex > 0 && paragraphSplitIndex < text.length - 1) {
const temp = text[paragraphSplitIndex - 1];
text[paragraphSplitIndex - 1] = text[paragraphSplitIndex];
text[paragraphSplitIndex] = temp;
}
});
this.insertTag('ins', cssClass, text);
}

protected processDeleteOperation(operation: Operation, cssClass: string): void {
const text: string[] = [];
const paragraphMergeIndexes = [];
let rawIndex = 0;
for (let pos = operation.startInOld; pos < operation.endInOld; pos++) {
const s = this.oldWords[pos];
if (this.config.isIsolatedDiffTagPlaceholder(s) && this.oldIsolatedDiffTags[pos]) {
text.push(...this.oldIsolatedDiffTags[pos]);
} else {
if (s === '¶') {
paragraphMergeIndexes.push(rawIndex);
}
text.push(s);
}
rawIndex++;
}
paragraphMergeIndexes.reverse().forEach((paragraphMergeIndex) => {
if (paragraphMergeIndex > 0 && paragraphMergeIndex < text.length - 1) {
text.splice(paragraphMergeIndex + 1, 1);
text.splice(paragraphMergeIndex - 1, 1);
}
});
this.justProcessedDeleteFromIndex = this.content.length;
this.insertTag('del', cssClass, text);
}

@@ -282,13 +312,36 @@ export default class HtmlDiff extends AbstractDiff {
const s = this.newWords[pos];
if (this.config.isIsolatedDiffTagPlaceholder(s) && this.newIsolatedDiffTags[pos]) {
result.push(this.diffIsolatedPlaceholder(operation, pos, s));
} else if (s === '¶') {
if (
pos > operation.startInNew &&
this.newWords[pos - 1] === '</p>' &&
pos < operation.endInNew - 1 &&
this.newWords[pos + 1].startsWith('<p>')
) {
result.push('<br>');
}
} else {
result.push(s);
}
}

if (result[0] === '</p>' || (result[0] === '.' && result[1] === '</p>')) {
this.replaceParagraphSymbolWithBreaksIfNeeded();
}
this.justProcessedDeleteFromIndex = -1;
this.content += result.join('');
}

protected replaceParagraphSymbolWithBreaksIfNeeded() {
if (this.justProcessedDeleteFromIndex > -1) {
const contentBeforeIndex = this.content.slice(0, this.justProcessedDeleteFromIndex);
const contentAfterIndex = this.content.slice(this.justProcessedDeleteFromIndex);
const replacedContent = contentAfterIndex.replace(//g, '<br><br>');
this.content = contentBeforeIndex + replacedContent;
}
}

protected getAttributeFromTag(text: string, attribute: string): string | null {
const pattern = new RegExp(`<[^>]*\\b${attribute}\\s*=\\s*(['"])(.*)\\1[^>]*>`, 'iu');
const matches = text.match(pattern);

0 comments on commit 5a8459b

Please sign in to comment.