-
Notifications
You must be signed in to change notification settings - Fork 79
/
Copy path_text.py
140 lines (90 loc) · 3.45 KB
/
_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from __future__ import annotations
import html
import re
from dataclasses import dataclass
from typing import Callable
import commonmark
class BaseText:
"""Abstract base class for text elements"""
def to_html(self) -> str:
raise NotImplementedError("Method not implemented")
def to_latex(self) -> str:
raise NotImplementedError("Method not implemented")
@dataclass
class Text(BaseText):
"""As-is text"""
text: str
def to_html(self) -> str:
return self.text
def to_latex(self) -> str:
return self.text
class Md(Text):
"""Markdown text"""
def to_html(self) -> str:
return _md_html(self.text)
def to_latex(self) -> str:
return _md_latex(self.text)
class Html(Text):
"""HTML text"""
def to_html(self) -> str:
return self.text
def to_latex(self) -> str:
from ._utils_render_latex import _not_implemented
_not_implemented(
"Using the `html()` helper function won't convert HTML to LaTeX. Escaping HTML string instead."
)
return _latex_escape(self.text)
def _md_html(x: str) -> str:
str = commonmark.commonmark(x)
return re.sub(r"^<p>|</p>\n$", "", str)
def _md_latex(x: str) -> str:
# TODO: Implement commonmark to LaTeX conversion (through a different library as
# commonmark-py does not support it)
raise NotImplementedError("Markdown to LaTeX conversion is not supported yet")
def _process_text(x: str | BaseText | None, context: str = "html") -> str:
if x is None:
return ""
escape_fn = _html_escape if context == "html" else _latex_escape
if isinstance(x, str):
return escape_fn(x)
elif isinstance(x, BaseText):
return x.to_html() if context == "html" else x.to_latex()
raise TypeError(f"Invalid type: {type(x)}")
def _process_text_id(x: str | BaseText | None) -> str:
return _process_text(x)
def _html_escape(x: str) -> str:
return html.escape(x)
def _latex_escape(text: str) -> str:
latex_escape_regex = "[\\\\&%$#_{}~^]"
text = re.sub(latex_escape_regex, lambda match: "\\" + match.group(), text)
return text
def escape_pattern_str_latex(pattern_str: str) -> str:
pattern = r"(\{[x0-9]+\})"
return process_string(pattern_str, pattern, _latex_escape)
def process_string(string: str, pattern: str, func: Callable[[str], str]) -> str:
"""
Apply a function to segments of a string that are unmatched by a regex pattern.
This function splits a string based on a regex pattern to a list of strings, and invokes the
supplied function (in `func=`) to those list elements that *do not* match the pattern (i.e.,
the matched components are untouched). Finally, the processed list of text fragments is then
joined back into a single .
Parameters
----------
string
The string to process.
pattern
The regex pattern used for splitting the input string.
func
The function applied to elements that do not match the pattern.
Returns
-------
str
A processed string.
"""
# Split the string by the pattern
split_result = re.split(pattern, string)
# Apply the function to elements that do not match the pattern
processed_list = [func(part) if not re.match(pattern, part) else part for part in split_result]
# Recombine the list elements to obtain a selectively processed string
combined_str = "".join(processed_list)
return combined_str