Skip to content

Commit

Permalink
Merge pull request #28 from yacchin1205/fix/update-pad
Browse files Browse the repository at this point in the history
Fix history corruption on title updates
  • Loading branch information
yacchin1205 authored Aug 26, 2024
2 parents aa46b37 + ebaffd0 commit 2a83c33
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 116 deletions.
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
"devDependencies": {
"@types/express": "^4.17.21",
"@types/jquery": "^3.5.29",
"@types/jquery": "^3.5.30",
"@types/node": "^20.11.30",
"eslint": "^8.11.0",
"eslint-config-etherpad": "^3.0.9",
Expand Down
96 changes: 35 additions & 61 deletions src/pad/database/hash.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import HTMLParser, { Node } from "node-html-parser";
import { SearchEngine, PadType } from "ep_search/setup";
import { tokenize } from "../static/js/parser";
import { logPrefix } from "../util/log";
import { getHashQuery } from "../static/js/hash";
import { applyReplaceSet, ReplaceSet } from "./text";

const api = require("ep_etherpad-lite/node/db/API");
const { decode, encode } = require("he");
Expand All @@ -14,104 +14,78 @@ export type HashUpdate = {

const MAX_PAGES = 10000;

function replaceHashToken(token: string, oldTitle: string, newTitle: string) {
function replaceHashToken(
token: string,
oldTitle: string,
newTitle: string,
offset: number
): ReplaceSet | null {
if (token === `#${oldTitle}`) {
return `#${newTitle}`;
return {
start: offset,
ndel: oldTitle.length + 1,
text: `#${newTitle}`,
};
}
return token;
return null;
}

function replaceHashTokens(token: string, updates: HashUpdate[]) {
function replaceHashTokens(
token: string,
updates: HashUpdate[],
offset: number
): ReplaceSet | null {
for (const update of updates) {
const { oldTitle, newTitle } = update;
const replaced = replaceHashToken(token, oldTitle, newTitle);
if (replaced !== token) {
const replaced = replaceHashToken(token, oldTitle, newTitle, offset);
if (replaced !== null) {
return replaced;
}
}
return null;
}

function replaceHash(text: string, updates: HashUpdate[]): string | null {
let newtext = "";
function replaceHash(text: string, updates: HashUpdate[]): ReplaceSet[] {
let remain = text;
let replaced = false;
let offset = 0;
let replaceSet: ReplaceSet[] = [];
while (remain.length > 0) {
const token = tokenize(remain);
const replacedToken = replaceHashTokens(token, updates);
const replacedToken = replaceHashTokens(token, updates, offset);
if (replacedToken !== null) {
newtext += replacedToken;
remain = remain.substring(token.length);
replaced = true;
continue;
replaceSet.push(replacedToken);
}
newtext += token;
remain = remain.substring(token.length);
offset += token.length;
}
if (!replaced) {
return null;
}
return newtext;
}

function traverseNodes(node: Node, handler: (node: Node) => void) {
handler(node);
(node.childNodes || []).forEach((child: Node) => {
handler(child);
traverseNodes(child, handler);
});
}

function replaceHashHtml(html: string, updates: HashUpdate[]) {
let html_ = html;
const m = html.match(/^\<\!DOCTYPE\s+HTML\>(.+)$/);
if (m) {
html_ = m[1];
}
const root = HTMLParser.parse(html_);
let replaced = false;
traverseNodes(root, (node) => {
if (node.nodeType !== 3 /* Node.TEXT_NODE */) {
return;
}
const replacedText = replaceHash(decode(node.rawText), updates);
if (replacedText === null) {
return;
}
replaced = true;
node.rawText = encode(replacedText);
});
if (!replaced) {
return null;
}
return root.toString();
return replaceSet;
}

export async function updateHash(padId: string, updates: HashUpdate[]) {
const { html } = await api.getHTML(padId);
const { text } = await api.getText(padId);
console.debug(
logPrefix,
"Update hash with text",
padId,
", src=",
html,
text,
", updates=",
updates
);
const newhtml = replaceHashHtml(html, updates);
if (newhtml === null) {
console.warn(logPrefix, "Hash not found in HTML", updates, html);
const replaceSet = replaceHash(text, updates);
if (replaceSet.length === 0) {
console.warn(logPrefix, "Hash not found in HTML", updates, text);
return padId;
}
await api.setHTML(padId, newhtml);
await applyReplaceSet(padId, replaceSet);
console.debug(
logPrefix,
"Update hash with text",
padId,
", src=",
html,
text,
", dest=",
newhtml
replaceSet
);
return padId;
}
Expand Down
59 changes: 59 additions & 0 deletions src/pad/database/text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
const api = require("ep_etherpad-lite/node/db/API");
const padMessageHandler = require("ep_etherpad-lite/node/handler/PadMessageHandler");
const CustomError = require('ep_etherpad-lite/node/utils/customError');
const padManager = require('ep_etherpad-lite/node/db/PadManager');

// based on https://github.com/ether/etherpad-lite/blob/develop/src/node/db/API.ts
// ... This is required to use spliceText, which is not exported in the API

// gets a pad safe
const getPadSafe = async (padID: string|object, shouldExist: boolean, text?:string, authorId:string = '') => {
// check if padID is a string
if (typeof padID !== 'string') {
throw new CustomError('padID is not a string', 'apierror');
}

// check if the padID maches the requirements
if (!padManager.isValidPadId(padID)) {
throw new CustomError('padID did not match requirements', 'apierror');
}

// check if the pad exists
const exists = await padManager.doesPadExists(padID);

if (!exists && shouldExist) {
// does not exist, but should
throw new CustomError('padID does not exist', 'apierror');
}

if (exists && !shouldExist) {
// does exist, but shouldn't
throw new CustomError('padID does already exist', 'apierror');
}

// pad exists, let's get it
return padManager.getPad(padID, text, authorId);
};

export type ReplaceSet = {
start: number;
ndel: number;
text: string;
};

export async function applyReplaceSet(
padID: string,
replaceSet: ReplaceSet[],
authorId: string = ""
) {
const pad = await getPadSafe(padID, true);

const sortedReplaceSet = new Array<ReplaceSet>(...replaceSet)
.sort((a, b) => a.start - b.start)
.reverse();
for (const replaceSet of sortedReplaceSet) {
const { start, ndel, text } = replaceSet;
await pad.spliceText(start, ndel, text, authorId);
}
await padMessageHandler.updatePadClients(pad);
}
64 changes: 17 additions & 47 deletions src/pad/database/title.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import HTMLParser, { Node } from "node-html-parser";
import { SearchEngine, PadType } from "ep_search/setup";
import removeMdBase from "remove-markdown";
import { searchHashes, updateHash } from "./hash";
import { logPrefix } from "../util/log";
import { escapeForText } from "../static/js/result";
import { applyReplaceSet, ReplaceSet } from "./text";

const api = require("ep_etherpad-lite/node/db/API");
const db = require("ep_etherpad-lite/node/db/DB").db;
Expand Down Expand Up @@ -43,51 +43,21 @@ export function extractTitle(padData: PadType) {
return removeMd(lines[0]);
}

function traverseNodes(node: Node, handler: (node: Node) => void) {
handler(node);
(node.childNodes || []).forEach((child: Node) => {
handler(child);
traverseNodes(child, handler);
});
}

function replaceTitle(text: string, oldtitle: string, newtitle: string) {
return text.replace(oldtitle, newtitle);
}

function replaceTitleHtml(
html: string,
function replaceTitle(
text: string,
oldtitle: string,
newtitle: string
): string | null {
let html_ = html;
const m = html.match(/^\<\!DOCTYPE\s+HTML\>(.+)$/);
if (m) {
html_ = m[1];
}
const root = HTMLParser.parse(html_);
let replaced = false;
traverseNodes(root, (node) => {
if (node.nodeType !== 3 /* Node.TEXT_NODE */) {
return;
}
if (replaced) {
return;
}
const decodedText = decode(node.rawText);
if (!decodedText.includes(oldtitle)) {
return;
}
replaced = true;
const replacedText = replaceTitle(decodedText, oldtitle, newtitle);
console.info(logPrefix, "Title replaced", decodedText, replacedText);
node.rawText = encode(replacedText);
});
if (!replaced) {
console.warn(logPrefix, "Title not found in HTML", oldtitle, html);
newtitle: string,
offset: number = 0
): ReplaceSet | null {
const pos = text.indexOf(oldtitle);
if (pos === -1) {
return null;
}
return root.toString();
return {
start: pos + offset,
ndel: oldtitle.length,
text: newtitle,
};
}

async function updateTitleContent(
Expand All @@ -97,9 +67,9 @@ async function updateTitleContent(
newTitle: string
): Promise<TitleUpdateResult> {
console.info(logPrefix, "Update title", pad.id, oldTitle, newTitle);
const { html } = await api.getHTML(pad.id);
const replacedHtml = replaceTitleHtml(html, oldTitle, newTitle);
if (replacedHtml === null) {
const { text } = await api.getText(pad.id);
const replaceSet = replaceTitle(text, oldTitle, newTitle);
if (replaceSet === null) {
console.warn(logPrefix, "Title not found in HTML", oldTitle, newTitle);
const updates = await searchHashes(searchEngine, oldTitle);
return {
Expand All @@ -110,7 +80,7 @@ async function updateTitleContent(
},
};
}
await api.setHTML(pad.id, replacedHtml);
await applyReplaceSet(pad.id, [replaceSet]);

const updates = await searchHashes(searchEngine, oldTitle);
return {
Expand Down
3 changes: 3 additions & 0 deletions src/pad/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js">
</script>

<script src="https://code.jquery.com/jquery-3.7.1.min.js" integrity="sha256-/JqT3SQfawRcv/BIHPThkBvs0OEvtFFmqPF/lYI/Cxo=" crossorigin="anonymous">
</script>

<script type="text/javascript">
$(document).ready(function(){
require(['index/index'], function(index) {
Expand Down

0 comments on commit 2a83c33

Please sign in to comment.