Skip to content

Commit 7eb8914

Browse files
committed
webui : vitest for maskInlineLaTeX
1 parent 022e294 commit 7eb8914

File tree

3 files changed

+181
-55
lines changed

3 files changed

+181
-55
lines changed

tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import rehypeKatex from 'rehype-katex';
99
import rehypeStringify from 'rehype-stringify';
1010
import { copyCodeToClipboard } from '$lib/utils/copy';
11+
import { maskInlineLaTeX } from '$lib/utils/latex-protection';
1112
import { browser } from '$app/environment';
1213
import 'katex/dist/katex.min.css';
1314
@@ -154,7 +155,7 @@
154155
});
155156
156157
// Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99)
157-
content = protectLaTeXButNotMoney(content, latexExpressions);
158+
content = maskInlineLaTeX(content, latexExpressions);
158159
159160
// Step 3: Escape standalone $ before digits (currency like $5 → \$5)
160161
// (Now that inline math is protected, this will only escape dollars not already protected)
@@ -184,60 +185,6 @@
184185
return content;
185186
}
186187
187-
function protectLaTeXButNotMoney(content: string, latexExpressions: string[]): string {
188-
if (content.indexOf('$') == -1) {
189-
return content;
190-
}
191-
return content
192-
.split('\n')
193-
.map((line) => {
194-
if (line.indexOf('$') == -1) {
195-
return line;
196-
}
197-
let result = '';
198-
let index = 0;
199-
while (index + 2 < line.length) {
200-
const openIndex = line.indexOf('$', index);
201-
if (openIndex == -1) {
202-
result += line.slice(index);
203-
break;
204-
}
205-
206-
// Is there a next $-sign?
207-
const nextIndex = line.indexOf('$', openIndex + 1);
208-
if (nextIndex == -1) {
209-
result += line.slice(index);
210-
break;
211-
}
212-
213-
const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : '';
214-
const afterOpenChar = line[openIndex + 1];
215-
const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : '';
216-
if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) {
217-
// character, digit, $, _ or - before first '$', no TeX.
218-
result += line.slice(index, openIndex + 1);
219-
index = openIndex + 1;
220-
continue;
221-
}
222-
if (/[0-9]/.test(afterOpenChar) && /[A-Za-z0-9_$-]/.test(afterCloseChar)) {
223-
// First $ seems to belong to an amount.
224-
result += line.slice(index, openIndex + 1);
225-
index = openIndex + 1;
226-
continue;
227-
}
228-
229-
// Treat as LaTeX
230-
result += line.slice(index, openIndex);
231-
const latexContent = line.slice(openIndex, nextIndex + 1);
232-
latexExpressions.push(latexContent);
233-
result += `<<LATEX_${latexExpressions.length - 1}>>`;
234-
index = nextIndex + 1;
235-
}
236-
return result;
237-
})
238-
.join('\n');
239-
}
240-
241188
function escapeBrackets(text: string): string {
242189
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
243190
return text.replace(
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { maskInlineLaTeX } from './latex-protection';
3+
4+
describe('maskInlineLaTeX', () => {
5+
it('should protect LaTeX $x + y$ but not money $3.99', () => {
6+
const latexExpressions: string[] = [];
7+
const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
8+
const output = maskInlineLaTeX(input, latexExpressions);
9+
10+
expect(output).toBe('I have $10, $3.99 and <<LATEX_0>> and <<LATEX_1>>. The amount is $2,000.');
11+
expect(latexExpressions).toEqual(['$x + y$', '$100x$']);
12+
});
13+
14+
it('should ignore money like $5 and $12.99', () => {
15+
const latexExpressions: string[] = [];
16+
const input = 'Prices are $12.99 and $5. Tax?';
17+
const output = maskInlineLaTeX(input, latexExpressions);
18+
19+
expect(output).toBe('Prices are $12.99 and $5. Tax?');
20+
expect(latexExpressions).toEqual([]);
21+
});
22+
23+
it('should protect inline math $a^2 + b^2$ even after text', () => {
24+
const latexExpressions: string[] = [];
25+
const input = 'Pythagorean: $a^2 + b^2 = c^2$.';
26+
const output = maskInlineLaTeX(input, latexExpressions);
27+
28+
expect(output).toBe('Pythagorean: <<LATEX_0>>.');
29+
expect(latexExpressions).toEqual(['$a^2 + b^2 = c^2$']);
30+
});
31+
32+
it('should not protect math that has letter after closing $ (e.g. units)', () => {
33+
const latexExpressions: string[] = [];
34+
const input = 'The cost is $99 and change.';
35+
const output = maskInlineLaTeX(input, latexExpressions);
36+
37+
expect(output).toBe('The cost is $99 and change.');
38+
expect(latexExpressions).toEqual([]);
39+
});
40+
41+
it('should allow $x$ followed by punctuation', () => {
42+
const latexExpressions: string[] = [];
43+
const input = 'We know $x$, right?';
44+
const output = maskInlineLaTeX(input, latexExpressions);
45+
46+
expect(output).toBe('We know <<LATEX_0>>, right?');
47+
expect(latexExpressions).toEqual(['$x$']);
48+
});
49+
50+
it('should work across multiple lines', () => {
51+
const latexExpressions: string[] = [];
52+
const input = `Emma buys cupcakes for $3 each.\nHow much is $x + y$?`;
53+
const output = maskInlineLaTeX(input, latexExpressions);
54+
55+
expect(output).toBe(`Emma buys cupcakes for $3 each.\nHow much is <<LATEX_0>>?`);
56+
expect(latexExpressions).toEqual(['$x + y$']);
57+
});
58+
59+
it('should not protect $100 but protect $matrix$', () => {
60+
const latexExpressions: string[] = [];
61+
const input = '$100 and $\\mathrm{GL}_2(\\mathbb{F}_7)$ are different.';
62+
const output = maskInlineLaTeX(input, latexExpressions);
63+
64+
expect(output).toBe('$100 and <<LATEX_0>> are different.');
65+
expect(latexExpressions).toEqual(['$\\mathrm{GL}_2(\\mathbb{F}_7)$']);
66+
});
67+
68+
it('should skip if $ is followed by digit and alphanumeric after close (money)', () => {
69+
const latexExpressions: string[] = [];
70+
const input = 'I paid $5 quickly.';
71+
const output = maskInlineLaTeX(input, latexExpressions);
72+
73+
expect(output).toBe('I paid $5 quickly.');
74+
expect(latexExpressions).toEqual([]);
75+
});
76+
77+
it('should protect LaTeX even with special chars inside', () => {
78+
const latexExpressions: string[] = [];
79+
const input = 'Consider $\\alpha_1 + \\beta_2$ now.';
80+
const output = maskInlineLaTeX(input, latexExpressions);
81+
82+
expect(output).toBe('Consider <<LATEX_0>> now.');
83+
expect(latexExpressions).toEqual(['$\\alpha_1 + \\beta_2$']);
84+
});
85+
86+
it('short text', () => {
87+
const latexExpressions: string[] = ['$0$'];
88+
const input = '$a$\n$a$ and $b$';
89+
const output = maskInlineLaTeX(input, latexExpressions);
90+
91+
expect(output).toBe('<<LATEX_1>>\n<<LATEX_2>> and <<LATEX_3>>');
92+
expect(latexExpressions).toEqual(['$0$', '$a$', '$a$', '$b$']);
93+
});
94+
95+
it('empty text', () => {
96+
const latexExpressions: string[] = [];
97+
const input = '$\n$$\n';
98+
const output = maskInlineLaTeX(input, latexExpressions);
99+
100+
expect(output).toBe('$\n$$\n');
101+
expect(latexExpressions).toEqual([]);
102+
});
103+
});
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs
3+
* that appear to be part of monetary values or identifiers.
4+
*
5+
* This function processes the input line by line and skips `$` sequences that are likely
6+
* part of money amounts (e.g., `$5`, `$100.99`) or code-like tokens (e.g., `var$`, `$var`).
7+
* Valid LaTeX inline math is replaced with a placeholder like `<<LATEX_0>>`, and the
8+
* actual LaTeX content is stored in the provided `latexExpressions` array.
9+
*
10+
* @param content - The input text potentially containing LaTeX expressions.
11+
* @param latexExpressions - An array used to collect extracted LaTeX expressions.
12+
* @returns The processed string with LaTeX replaced by placeholders.
13+
*/
14+
export function maskInlineLaTeX(content: string, latexExpressions: string[]): string {
15+
if (content.indexOf('$') == -1) {
16+
return content;
17+
}
18+
return content
19+
.split('\n')
20+
.map((line) => {
21+
if (line.indexOf('$') == -1) {
22+
return line;
23+
}
24+
let result = '';
25+
let index = 0;
26+
while (index < line.length) {
27+
const openIndex = line.indexOf('$', index);
28+
if (openIndex == -1) {
29+
result += line.slice(index);
30+
break;
31+
}
32+
33+
// Is there a next $-sign?
34+
const nextIndex = line.indexOf('$', openIndex + 1);
35+
if (nextIndex == -1) {
36+
result += line.slice(index);
37+
break;
38+
}
39+
40+
const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : '';
41+
const afterOpenChar = line[openIndex + 1];
42+
const beforeCloseChar = openIndex + 1 < nextIndex ? line[nextIndex - 1] : '';
43+
const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : '';
44+
let cont = false;
45+
if (nextIndex == index + 1) {
46+
// no content
47+
cont = true;
48+
}
49+
if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) {
50+
// character, digit, $, _ or - before first '$', no TeX.
51+
cont = true;
52+
}
53+
if (
54+
/[0-9]/.test(afterOpenChar) &&
55+
(/[A-Za-z0-9_$-]/.test(afterCloseChar) || ' ' == beforeCloseChar)
56+
) {
57+
// First $ seems to belong to an amount.
58+
cont = true;
59+
}
60+
if (cont) {
61+
result += line.slice(index, openIndex + 1);
62+
index = openIndex + 1;
63+
continue;
64+
}
65+
66+
// Treat as LaTeX
67+
result += line.slice(index, openIndex);
68+
const latexContent = line.slice(openIndex, nextIndex + 1);
69+
latexExpressions.push(latexContent);
70+
result += `<<LATEX_${latexExpressions.length - 1}>>`;
71+
index = nextIndex + 1;
72+
}
73+
return result;
74+
})
75+
.join('\n');
76+
}

0 commit comments

Comments
 (0)