|
130 | 130 | return tempDiv.innerHTML; |
131 | 131 | } |
132 | 132 |
|
| 133 | + // See also: |
| 134 | + // https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts |
| 135 | +
|
| 136 | + // Protect code blocks: ```...``` and `...` |
| 137 | + const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g; |
| 138 | +
|
| 139 | + export function preprocessLaTeX(content: string): string { |
| 140 | + // Step 1: Protect code blocks |
| 141 | + const codeBlocks: string[] = []; |
| 142 | + content = content.replace(codeBlockRegex, (match) => { |
| 143 | + codeBlocks.push(match); |
| 144 | + return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`; |
| 145 | + }); |
| 146 | +
|
| 147 | + // Step 2: Protect existing LaTeX expressions |
| 148 | + const latexExpressions: string[] = []; |
| 149 | +
|
| 150 | + // Match \(...\), \[...\], $$...$$ and protect them |
| 151 | + content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => { |
| 152 | + latexExpressions.push(match); |
| 153 | + return `<<LATEX_${latexExpressions.length - 1}>>`; |
| 154 | + }); |
| 155 | +
|
| 156 | + // Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99) |
| 157 | + content = protectLaTeXButNotMoney(content, latexExpressions); |
| 158 | +
|
| 159 | + // Step 3: Escape standalone $ before digits (currency like $5 → \$5) |
| 160 | + // (Now that inline math is protected, this will only escape dollars not already protected) |
| 161 | + content = content.replace(/\$(?=\d)/g, '\\$'); |
| 162 | +
|
| 163 | + // Step 4: Restore protected LaTeX expressions (they are valid) |
| 164 | + content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => { |
| 165 | + return latexExpressions[parseInt(index)]; |
| 166 | + }); |
| 167 | +
|
| 168 | + // Step 5: Restore code blocks |
| 169 | + content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => { |
| 170 | + return codeBlocks[parseInt(index)]; |
| 171 | + }); |
| 172 | +
|
| 173 | + // Step 6: Apply additional escaping functions (brackets and mhchem) |
| 174 | + content = escapeBrackets(content); |
| 175 | + if (content.includes('\\ce{') || content.includes('\\pu{')) { |
| 176 | + content = escapeMhchem(content); |
| 177 | + } |
| 178 | +
|
| 179 | + // Final pass: Convert \(...\) → $...$, \[...\] → $$...$$ |
| 180 | + content = content |
| 181 | + .replace(/\\\((.+?)\\\)/g, '$$$1$') // inline |
| 182 | + .replace(/\\\[(.+?)\\\]/g, '$$$$1$$'); // display |
| 183 | +
|
| 184 | + return content; |
| 185 | + } |
| 186 | +
|
| 187 | + function protectLaTeXButNotMoney(content: string, latexExpressions: string[]): string { |
| 188 | + if (content.indexOf('$') == -1) { |
| 189 | + return content; |
| 190 | + } |
| 191 | + return content |
| 192 | + .split('\n') |
| 193 | + .map((line) => { |
| 194 | + if (line.indexOf('$') == -1) { |
| 195 | + return line; |
| 196 | + } |
| 197 | + let result = ''; |
| 198 | + let index = 0; |
| 199 | + while (index + 2 < line.length) { |
| 200 | + const openIndex = line.indexOf('$', index); |
| 201 | + if (openIndex == -1) { |
| 202 | + result += line.slice(index); |
| 203 | + break; |
| 204 | + } |
| 205 | +
|
| 206 | + // Is there a next $-sign? |
| 207 | + const nextIndex = line.indexOf('$', openIndex + 1); |
| 208 | + if (nextIndex == -1) { |
| 209 | + result += line.slice(index); |
| 210 | + break; |
| 211 | + } |
| 212 | +
|
| 213 | + const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : ''; |
| 214 | + const afterOpenChar = line[openIndex + 1]; |
| 215 | + const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : ''; |
| 216 | + if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) { |
| 217 | + // character, digit, $, _ or - before first '$', no TeX. |
| 218 | + result += line.slice(index, openIndex + 1); |
| 219 | + index = openIndex + 1; |
| 220 | + continue; |
| 221 | + } |
| 222 | + if (/[0-9]/.test(afterOpenChar) && /[A-Za-z0-9_$-]/.test(afterCloseChar)) { |
| 223 | + // First $ seems to belong to an amount. |
| 224 | + result += line.slice(index, openIndex + 1); |
| 225 | + index = openIndex + 1; |
| 226 | + continue; |
| 227 | + } |
| 228 | +
|
| 229 | + // Treat as LaTeX |
| 230 | + result += line.slice(index, openIndex); |
| 231 | + const latexContent = line.slice(openIndex, nextIndex + 1); |
| 232 | + latexExpressions.push(latexContent); |
| 233 | + result += `<<LATEX_${latexExpressions.length - 1}>>`; |
| 234 | + index = nextIndex + 1; |
| 235 | + } |
| 236 | + return result; |
| 237 | + }) |
| 238 | + .join('\n'); |
| 239 | + } |
| 240 | +
|
| 241 | + function escapeBrackets(text: string): string { |
| 242 | + const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g; |
| 243 | + return text.replace( |
| 244 | + pattern, |
| 245 | + ( |
| 246 | + match: string, |
| 247 | + codeBlock: string | undefined, |
| 248 | + squareBracket: string | undefined, |
| 249 | + roundBracket: string | undefined |
| 250 | + ): string => { |
| 251 | + if (codeBlock != null) { |
| 252 | + return codeBlock; |
| 253 | + } else if (squareBracket != null) { |
| 254 | + return `$$${squareBracket}$$`; |
| 255 | + } else if (roundBracket != null) { |
| 256 | + return `$${roundBracket}$`; |
| 257 | + } |
| 258 | + return match; |
| 259 | + } |
| 260 | + ); |
| 261 | + } |
| 262 | +
|
| 263 | + // Escape $\\ce{...} → $\\ce{...} but with proper handling |
| 264 | + function escapeMhchem(text: string): string { |
| 265 | + return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{'); |
| 266 | + } |
| 267 | +
|
133 | 268 | async function processMarkdown(text: string): Promise<string> { |
134 | 269 | try { |
135 | | - const result = await processor().process(text); |
| 270 | + const processedText = preprocessLaTeX(text); |
| 271 | +
|
| 272 | + const result = await processor().process(processedText); |
136 | 273 | const html = String(result); |
137 | 274 | const enhancedLinks = enhanceLinks(html); |
138 | 275 |
|
|
0 commit comments