Skip to content

Commit 985acd0

Browse files
committed
webui : Revised LaTeX formula recognition
1 parent 81086cd commit 985acd0

File tree

2 files changed

+148
-1
lines changed

2 files changed

+148
-1
lines changed

tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,146 @@
130130
return tempDiv.innerHTML;
131131
}
132132
133+
// See also:
134+
// https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
135+
136+
// Protect code blocks: ```...``` and `...`
137+
const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g;
138+
139+
export function preprocessLaTeX(content: string): string {
140+
// Step 1: Protect code blocks
141+
const codeBlocks: string[] = [];
142+
content = content.replace(codeBlockRegex, (match) => {
143+
codeBlocks.push(match);
144+
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
145+
});
146+
147+
// Step 2: Protect existing LaTeX expressions
148+
const latexExpressions: string[] = [];
149+
150+
// Match \(...\), \[...\], $$...$$ and protect them
151+
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
152+
latexExpressions.push(match);
153+
return `<<LATEX_${latexExpressions.length - 1}>>`;
154+
});
155+
156+
// Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99)
157+
content = protectLaTeXButNotMoney(content, latexExpressions);
158+
159+
// Step 3: Escape standalone $ before digits (currency like $5 → \$5)
160+
// (Now that inline math is protected, this will only escape dollars not already protected)
161+
content = content.replace(/\$(?=\d)/g, '\\$');
162+
163+
// Step 4: Restore protected LaTeX expressions (they are valid)
164+
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => {
165+
return latexExpressions[parseInt(index)];
166+
});
167+
168+
// Step 5: Restore code blocks
169+
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => {
170+
return codeBlocks[parseInt(index)];
171+
});
172+
173+
// Step 6: Apply additional escaping functions (brackets and mhchem)
174+
content = escapeBrackets(content);
175+
if (content.includes('\\ce{') || content.includes('\\pu{')) {
176+
content = escapeMhchem(content);
177+
}
178+
179+
// Final pass: Convert \(...\) → $...$, \[...\] → $$...$$
180+
content = content
181+
.replace(/\\\((.+?)\\\)/g, '$$$1$') // inline
182+
.replace(/\\\[(.+?)\\\]/g, '$$$$1$$'); // display
183+
184+
return content;
185+
}
186+
187+
function protectLaTeXButNotMoney(content: string, latexExpressions: string[]): string {
188+
if (content.indexOf('$') == -1) {
189+
return content;
190+
}
191+
return content
192+
.split('\n')
193+
.map((line) => {
194+
if (line.indexOf('$') == -1) {
195+
return line;
196+
}
197+
let result = '';
198+
let index = 0;
199+
while (index + 2 < line.length) {
200+
const openIndex = line.indexOf('$', index);
201+
if (openIndex == -1) {
202+
result += line.slice(index);
203+
break;
204+
}
205+
206+
// Is there a next $-sign?
207+
const nextIndex = line.indexOf('$', openIndex + 1);
208+
if (nextIndex == -1) {
209+
result += line.slice(index);
210+
break;
211+
}
212+
213+
const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : '';
214+
const afterOpenChar = line[openIndex + 1];
215+
const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : '';
216+
if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) {
217+
// character, digit, $, _ or - before first '$', no TeX.
218+
result += line.slice(index, openIndex + 1);
219+
index = openIndex + 1;
220+
continue;
221+
}
222+
if (/[0-9]/.test(afterOpenChar) && /[A-Za-z0-9_$-]/.test(afterCloseChar)) {
223+
// First $ seems to belong to an amount.
224+
result += line.slice(index, openIndex + 1);
225+
index = openIndex + 1;
226+
continue;
227+
}
228+
229+
// Treat as LaTeX
230+
result += line.slice(index, openIndex);
231+
const latexContent = line.slice(openIndex, nextIndex + 1);
232+
latexExpressions.push(latexContent);
233+
result += `<<LATEX_${latexExpressions.length - 1}>>`;
234+
index = nextIndex + 1;
235+
}
236+
return result;
237+
})
238+
.join('\n');
239+
}
240+
241+
function escapeBrackets(text: string): string {
242+
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
243+
return text.replace(
244+
pattern,
245+
(
246+
match: string,
247+
codeBlock: string | undefined,
248+
squareBracket: string | undefined,
249+
roundBracket: string | undefined
250+
): string => {
251+
if (codeBlock != null) {
252+
return codeBlock;
253+
} else if (squareBracket != null) {
254+
return `$$${squareBracket}$$`;
255+
} else if (roundBracket != null) {
256+
return `$${roundBracket}$`;
257+
}
258+
return match;
259+
}
260+
);
261+
}
262+
263+
// Escape $\\ce{...} → $\\ce{...} but with proper handling
264+
function escapeMhchem(text: string): string {
265+
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
266+
}
267+
133268
async function processMarkdown(text: string): Promise<string> {
134269
try {
135-
const result = await processor().process(text);
270+
const processedText = preprocessLaTeX(text);
271+
272+
const result = await processor().process(processedText);
136273
const html = String(result);
137274
const enhancedLinks = enhanceLinks(html);
138275

tools/server/webui/src/stories/fixtures/math-formulas.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,16 @@ $$\lim_{x \to 0} \frac{\sin x}{x} = 1$$
150150
151151
$$\lim_{n \to \infty} \left(1 + \frac{x}{n}\right)^n = e^x$$
152152
153+
## Further Bracket Styles
154+
155+
- \( \mathrm{GL}_2(\mathbb{F}_7) \): Group of invertible matrices with entries in \(\mathbb{F}_7\).
156+
- Some kernel of \(\mathrm{SL}_2(\mathbb{F}_7)\):
157+
\[
158+
\left\{ \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}, \begin{pmatrix} -1 & 0 \\ 0 & -1 \end{pmatrix} \right\} = \{\pm I\}
159+
\]
160+
- $100 and $12.99 are amounts, not LaTeX.
161+
- I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.
162+
153163
---
154164
155165
*This document showcases various mathematical notation and formulas that can be rendered in markdown using LaTeX syntax.*

0 commit comments

Comments
 (0)