Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix history corruption on title updates #28

Merged
merged 2 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
"devDependencies": {
"@types/express": "^4.17.21",
"@types/jquery": "^3.5.29",
"@types/jquery": "^3.5.30",
"@types/node": "^20.11.30",
"eslint": "^8.11.0",
"eslint-config-etherpad": "^3.0.9",
Expand Down
96 changes: 35 additions & 61 deletions src/pad/database/hash.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import HTMLParser, { Node } from "node-html-parser";
import { SearchEngine, PadType } from "ep_search/setup";
import { tokenize } from "../static/js/parser";
import { logPrefix } from "../util/log";
import { getHashQuery } from "../static/js/hash";
import { applyReplaceSet, ReplaceSet } from "./text";

const api = require("ep_etherpad-lite/node/db/API");
const { decode, encode } = require("he");
Expand All @@ -14,104 +14,78 @@ export type HashUpdate = {

const MAX_PAGES = 10000;

function replaceHashToken(token: string, oldTitle: string, newTitle: string) {
function replaceHashToken(
token: string,
oldTitle: string,
newTitle: string,
offset: number
): ReplaceSet | null {
if (token === `#${oldTitle}`) {
return `#${newTitle}`;
return {
start: offset,
ndel: oldTitle.length + 1,
text: `#${newTitle}`,
};
}
return token;
return null;
}

function replaceHashTokens(token: string, updates: HashUpdate[]) {
function replaceHashTokens(
token: string,
updates: HashUpdate[],
offset: number
): ReplaceSet | null {
for (const update of updates) {
const { oldTitle, newTitle } = update;
const replaced = replaceHashToken(token, oldTitle, newTitle);
if (replaced !== token) {
const replaced = replaceHashToken(token, oldTitle, newTitle, offset);
if (replaced !== null) {
return replaced;
}
}
return null;
}

function replaceHash(text: string, updates: HashUpdate[]): string | null {
let newtext = "";
function replaceHash(text: string, updates: HashUpdate[]): ReplaceSet[] {
let remain = text;
let replaced = false;
let offset = 0;
let replaceSet: ReplaceSet[] = [];
while (remain.length > 0) {
const token = tokenize(remain);
const replacedToken = replaceHashTokens(token, updates);
const replacedToken = replaceHashTokens(token, updates, offset);
if (replacedToken !== null) {
newtext += replacedToken;
remain = remain.substring(token.length);
replaced = true;
continue;
replaceSet.push(replacedToken);
}
newtext += token;
remain = remain.substring(token.length);
offset += token.length;
}
if (!replaced) {
return null;
}
return newtext;
}

function traverseNodes(node: Node, handler: (node: Node) => void) {
handler(node);
(node.childNodes || []).forEach((child: Node) => {
handler(child);
traverseNodes(child, handler);
});
}

function replaceHashHtml(html: string, updates: HashUpdate[]) {
let html_ = html;
const m = html.match(/^\<\!DOCTYPE\s+HTML\>(.+)$/);
if (m) {
html_ = m[1];
}
const root = HTMLParser.parse(html_);
let replaced = false;
traverseNodes(root, (node) => {
if (node.nodeType !== 3 /* Node.TEXT_NODE */) {
return;
}
const replacedText = replaceHash(decode(node.rawText), updates);
if (replacedText === null) {
return;
}
replaced = true;
node.rawText = encode(replacedText);
});
if (!replaced) {
return null;
}
return root.toString();
return replaceSet;
}

export async function updateHash(padId: string, updates: HashUpdate[]) {
const { html } = await api.getHTML(padId);
const { text } = await api.getText(padId);
console.debug(
logPrefix,
"Update hash with text",
padId,
", src=",
html,
text,
", updates=",
updates
);
const newhtml = replaceHashHtml(html, updates);
if (newhtml === null) {
console.warn(logPrefix, "Hash not found in HTML", updates, html);
const replaceSet = replaceHash(text, updates);
if (replaceSet.length === 0) {
console.warn(logPrefix, "Hash not found in HTML", updates, text);
return padId;
}
await api.setHTML(padId, newhtml);
await applyReplaceSet(padId, replaceSet);
console.debug(
logPrefix,
"Update hash with text",
padId,
", src=",
html,
text,
", dest=",
newhtml
replaceSet
);
return padId;
}
Expand Down
59 changes: 59 additions & 0 deletions src/pad/database/text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
const api = require("ep_etherpad-lite/node/db/API");
const padMessageHandler = require("ep_etherpad-lite/node/handler/PadMessageHandler");
const CustomError = require('ep_etherpad-lite/node/utils/customError');
const padManager = require('ep_etherpad-lite/node/db/PadManager');

// based on https://github.com/ether/etherpad-lite/blob/develop/src/node/db/API.ts
// ... This is required to use spliceText, which is not exported in the API

// gets a pad safe
const getPadSafe = async (padID: string|object, shouldExist: boolean, text?:string, authorId:string = '') => {
// check if padID is a string
if (typeof padID !== 'string') {
throw new CustomError('padID is not a string', 'apierror');
}

// check if the padID maches the requirements
if (!padManager.isValidPadId(padID)) {
throw new CustomError('padID did not match requirements', 'apierror');
}

// check if the pad exists
const exists = await padManager.doesPadExists(padID);

if (!exists && shouldExist) {
// does not exist, but should
throw new CustomError('padID does not exist', 'apierror');
}

if (exists && !shouldExist) {
// does exist, but shouldn't
throw new CustomError('padID does already exist', 'apierror');
}

// pad exists, let's get it
return padManager.getPad(padID, text, authorId);
};

export type ReplaceSet = {
start: number;
ndel: number;
text: string;
};

export async function applyReplaceSet(
padID: string,
replaceSet: ReplaceSet[],
authorId: string = ""
) {
const pad = await getPadSafe(padID, true);

const sortedReplaceSet = new Array<ReplaceSet>(...replaceSet)
.sort((a, b) => a.start - b.start)
.reverse();
for (const replaceSet of sortedReplaceSet) {
const { start, ndel, text } = replaceSet;
await pad.spliceText(start, ndel, text, authorId);
}
await padMessageHandler.updatePadClients(pad);
}
64 changes: 17 additions & 47 deletions src/pad/database/title.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import HTMLParser, { Node } from "node-html-parser";
import { SearchEngine, PadType } from "ep_search/setup";
import removeMdBase from "remove-markdown";
import { searchHashes, updateHash } from "./hash";
import { logPrefix } from "../util/log";
import { escapeForText } from "../static/js/result";
import { applyReplaceSet, ReplaceSet } from "./text";

const api = require("ep_etherpad-lite/node/db/API");
const db = require("ep_etherpad-lite/node/db/DB").db;
Expand Down Expand Up @@ -43,51 +43,21 @@ export function extractTitle(padData: PadType) {
return removeMd(lines[0]);
}

function traverseNodes(node: Node, handler: (node: Node) => void) {
handler(node);
(node.childNodes || []).forEach((child: Node) => {
handler(child);
traverseNodes(child, handler);
});
}

function replaceTitle(text: string, oldtitle: string, newtitle: string) {
return text.replace(oldtitle, newtitle);
}

function replaceTitleHtml(
html: string,
function replaceTitle(
text: string,
oldtitle: string,
newtitle: string
): string | null {
let html_ = html;
const m = html.match(/^\<\!DOCTYPE\s+HTML\>(.+)$/);
if (m) {
html_ = m[1];
}
const root = HTMLParser.parse(html_);
let replaced = false;
traverseNodes(root, (node) => {
if (node.nodeType !== 3 /* Node.TEXT_NODE */) {
return;
}
if (replaced) {
return;
}
const decodedText = decode(node.rawText);
if (!decodedText.includes(oldtitle)) {
return;
}
replaced = true;
const replacedText = replaceTitle(decodedText, oldtitle, newtitle);
console.info(logPrefix, "Title replaced", decodedText, replacedText);
node.rawText = encode(replacedText);
});
if (!replaced) {
console.warn(logPrefix, "Title not found in HTML", oldtitle, html);
newtitle: string,
offset: number = 0
): ReplaceSet | null {
const pos = text.indexOf(oldtitle);
if (pos === -1) {
return null;
}
return root.toString();
return {
start: pos + offset,
ndel: oldtitle.length,
text: newtitle,
};
}

async function updateTitleContent(
Expand All @@ -97,9 +67,9 @@ async function updateTitleContent(
newTitle: string
): Promise<TitleUpdateResult> {
console.info(logPrefix, "Update title", pad.id, oldTitle, newTitle);
const { html } = await api.getHTML(pad.id);
const replacedHtml = replaceTitleHtml(html, oldTitle, newTitle);
if (replacedHtml === null) {
const { text } = await api.getText(pad.id);
const replaceSet = replaceTitle(text, oldTitle, newTitle);
if (replaceSet === null) {
console.warn(logPrefix, "Title not found in HTML", oldTitle, newTitle);
const updates = await searchHashes(searchEngine, oldTitle);
return {
Expand All @@ -110,7 +80,7 @@ async function updateTitleContent(
},
};
}
await api.setHTML(pad.id, replacedHtml);
await applyReplaceSet(pad.id, [replaceSet]);

const updates = await searchHashes(searchEngine, oldTitle);
return {
Expand Down
3 changes: 3 additions & 0 deletions src/pad/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js">
</script>

<script src="https://code.jquery.com/jquery-3.7.1.min.js" integrity="sha256-/JqT3SQfawRcv/BIHPThkBvs0OEvtFFmqPF/lYI/Cxo=" crossorigin="anonymous">
</script>

<script type="text/javascript">
$(document).ready(function(){
require(['index/index'], function(index) {
Expand Down
Loading