-
Notifications
You must be signed in to change notification settings - Fork 118
/
Copy path591. Tag Validator.cpp
221 lines (190 loc) · 6.85 KB
/
591. Tag Validator.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
//Approach 1: Stack
//Runtime: 4 ms, faster than 58.93% of C++ online submissions for Tag Validator.
//Memory Usage: 6.4 MB, less than 60.71% of C++ online submissions for Tag Validator.
class Solution {
public:
stack<string> stk;
bool containsTag;
bool isValidTagName(string s, bool ending){
if(s.size() < 1 || s.size() > 9)
return false;
for(int i = 0; i < s.size(); ++i){
if(s[i] < 'A' || s[i] > 'Z')
return false;
}
if(ending){
if(!stk.empty() && stk.top() == s){
//find the matching start tag
stk.pop();
}else{
//cannot find the matching start tag
return false;
}
}else{
//update containsTag when we find a start tag
containsTag = true;
stk.push(s);
}
return true;
};
bool isValidCdata(string s){
return s.find("[CDATA[") == 0;
};
bool isValid(string code) {
if(code[0] != '<' || code.back() != '>'){
return false;
}
//initialize, it is set when we find a valid start tag
containsTag = false;
for(int i = 0; i < code.size(); ++i){
bool ending = false;
int closeIndex;
if(containsTag && stk.empty()){
/*
if we have met a start tag,
the stack should contain its tag name
o.w. it means that the subsequent substring is
something outside that tag
e.g. "<A></A><B></B>"
*/
return false;
}
if(code[i] == '<'){
if(!stk.empty() && code[i+1] == '!'){
//"<!]]>" is wrapped by a tag
closeIndex = code.find("]]>", i+1);
// cout << "potential cdata: " << code.substr(i+2, closeIndex-(i+2)) << endl;
if(closeIndex == string::npos ||
!isValidCdata(code.substr(i+2, closeIndex-(i+2)))){
return false;
}
}else{
if(code[i+1] == '/'){
++i;
ending = true;
}
closeIndex = code.find('>', i+1);
// cout << "potential tagname: " << code.substr(i+1, closeIndex-(i+1)) << endl;
if(closeIndex < 0 ||
!isValidTagName(code.substr(i+1, closeIndex-(i+1)), ending)){
return false;
}
}
//next time start from a char behind cdata or tag's end
i = closeIndex;
}
}
return stk.empty() && containsTag;
}
};
//Approach 2: Regex, catastrophic backtracking
//TLE
//0 / 256 test cases passed.
class Solution {
public:
bool isValid(string code) {
/*
(1) <([A-Z]{1,9})>: outermost start-tag
all upper-case alphabets with length btw 1 to 9 inside <...>
(2) [^<]*: TAG_CONTENT except CDATA
all chars except '<' occurring 0 or more times
(3) (<\/?[A-Z]{1,9}>): start tag or end tag
(4) (<!\[CDATA\[(.*?)]]>): CDATA
matches any char within <!\[CDATA\[...]]>
(5) <\/1>: outermost end-tag
using "back-reference"
//https://www.regular-expressions.info/backref.html
(6) (.*?): match all chars until "]]>"
(inside (4)) using "non-greedy mode"
*/
/*
this regex will lead to catastrophic backtracking
https://www.regular-expressions.info/catastrophic.html
*/
regex pattern("<([A-Z]{1,9})>([^<]*((<\\/?[A-Z]{1,9}>)|(<!\\[CDATA\\[(.*?)]]>))?[^<]*)*<\/1>");
return regex_match(code, pattern);
}
};
//Approach 2: Regex
//TLE
//0 / 256 test cases passed.
class Solution {
public:
stack<string> stk;
bool containsTag;
bool isValidTagName(string s, bool ending){
if(ending){
if(!stk.empty() && stk.top() == s){
stk.pop();
}else{
return false;
}
}else{
containsTag = true;
stk.push(s);
}
return true;
};
bool isValid(string code) {
regex pattern("<[A-Z]{0,9}>([^<]*(<((\\/?[A-Z]{1,9}>)|(!\\[CDATA\\[(.*?)]]>)))?)*");
// cout << "matching regex" << endl;
if(!regex_match(code, pattern))
return false;
// cout << "matched regex" << endl;
//initialize, it is set when we find a valid start tag
containsTag = false;
for(int i = 0; i < code.size(); ++i){
bool ending = false;
if(containsTag && stk.empty()){
return false;
}
if(code[i] == '<'){
if(code[i+1] == '!'){
//"<!]]>" is wrapped by a tag
i = code.find("]]>", i+1);
continue;
}
if(code[i+1] == '/'){
++i;
ending = true;
}
int closeIndex = code.find('>', i+1);
// cout << "potential tagname: " << code.substr(i+1, closeIndex-(i+1)) << endl;
if(closeIndex < 0 ||
!isValidTagName(code.substr(i+1, closeIndex-(i+1)), ending)){
return false;
}
//next time start from a char behind cdata or tag's end
i = closeIndex;
}
}
return stk.empty() && containsTag;
}
};
//regex, replace
//https://leetcode.com/problems/tag-validator/discuss/103370/short-python-accepted-but-not-sure-if-correct
//Runtime: 168 ms, faster than 7.14% of C++ online submissions for Tag Validator.
//Memory Usage: 29.2 MB, less than 7.14% of C++ online submissions for Tag Validator.
class Solution {
public:
bool isValid(string code) {
if(code == "t") return false;
// cout << code << endl;
/*
in C++, \\ will be interpreted as \
in regex, \[ will be interpreted as the char '[',
different from the [] when we need to matches any single character in brackets.
*/
regex pattern("<!\\[CDATA\\[.*?\\]\\]>");
code = regex_replace(code, pattern, "c");
// cout << code << endl;
string prev = "";
pattern = regex("<([A-Z]{1,9})>[^<]*</\\1>");
while(code != prev){
prev = code;
code = regex_replace(code, pattern, "t");
// cout << code << endl;
}
return code == "t";
}
};