|
1 | 1 | #!/usr/bin/env python
|
2 |
| -"""usage: %prog [options] filename |
3 |
| -
|
| 2 | +""" |
4 | 3 | Parse a document to a tree, with optional profiling
|
5 | 4 | """
|
6 | 5 |
|
| 6 | +import argparse |
7 | 7 | import sys
|
8 | 8 | import traceback
|
9 |
| -from optparse import OptionParser |
10 | 9 |
|
11 | 10 | from html5lib import html5parser
|
12 | 11 | from html5lib import treebuilders, serializer, treewalkers
|
|
15 | 14 |
|
16 | 15 |
|
17 | 16 | def parse():
|
18 |
| - optParser = getOptParser() |
19 |
| - opts, args = optParser.parse_args() |
| 17 | + parser = get_parser() |
| 18 | + opts = parser.parse_args() |
20 | 19 | encoding = "utf8"
|
21 | 20 |
|
22 | 21 | try:
|
23 |
| - f = args[-1] |
| 22 | + f = opts.filename |
24 | 23 | # Try opening from the internet
|
25 | 24 | if f.startswith('http://'):
|
26 | 25 | try:
|
@@ -151,92 +150,84 @@ def printOutput(parser, document, opts):
|
151 | 150 | sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n")
|
152 | 151 |
|
153 | 152 |
|
154 |
| -def getOptParser(): |
155 |
| - parser = OptionParser(usage=__doc__) |
| 153 | +def get_parser(): |
| 154 | + parser = argparse.ArgumentParser(description=__doc__) |
| 155 | + |
| 156 | + parser.add_argument("-p", "--profile", action="store_true", |
| 157 | + help="Use the hotshot profiler to " |
| 158 | + "produce a detailed log of the run") |
156 | 159 |
|
157 |
| - parser.add_option("-p", "--profile", action="store_true", default=False, |
158 |
| - dest="profile", help="Use the hotshot profiler to " |
159 |
| - "produce a detailed log of the run") |
| 160 | + parser.add_argument("-t", "--time", |
| 161 | + action="store_true", |
| 162 | + help="Time the run using time.time (may not be accurate on all platforms, especially for short runs)") |
160 | 163 |
|
161 |
| - parser.add_option("-t", "--time", |
162 |
| - action="store_true", default=False, dest="time", |
163 |
| - help="Time the run using time.time (may not be accurate on all platforms, especially for short runs)") |
| 164 | + parser.add_argument("-b", "--treebuilder", |
| 165 | + default="etree") |
164 | 166 |
|
165 |
| - parser.add_option("-b", "--treebuilder", action="store", type="string", |
166 |
| - dest="treebuilder", default="etree") |
| 167 | + parser.add_argument("-e", "--error", action="store_true", |
| 168 | + help="Print a list of parse errors") |
167 | 169 |
|
168 |
| - parser.add_option("-e", "--error", action="store_true", default=False, |
169 |
| - dest="error", help="Print a list of parse errors") |
| 170 | + parser.add_argument("-f", "--fragment", action="store_true", |
| 171 | + help="Parse as a fragment") |
170 | 172 |
|
171 |
| - parser.add_option("-f", "--fragment", action="store_true", default=False, |
172 |
| - dest="fragment", help="Parse as a fragment") |
| 173 | + parser.add_argument("-s", "--scripting", action="store_true", |
| 174 | + help="Handle noscript tags as if scripting was enabled") |
173 | 175 |
|
174 |
| - parser.add_option("-s", "--scripting", action="store_true", default=False, |
175 |
| - dest="scripting", help="Handle noscript tags as if scripting was enabled") |
| 176 | + parser.add_argument("--tree", action="store_true", |
| 177 | + help="Output as debug tree") |
176 | 178 |
|
177 |
| - parser.add_option("", "--tree", action="store_true", default=False, |
178 |
| - dest="tree", help="Output as debug tree") |
| 179 | + parser.add_argument("-x", "--xml", action="store_true", |
| 180 | + help="Output as xml") |
179 | 181 |
|
180 |
| - parser.add_option("-x", "--xml", action="store_true", default=False, |
181 |
| - dest="xml", help="Output as xml") |
| 182 | + parser.add_argument("--no-html", action="store_false", |
| 183 | + dest="html", help="Don't output html") |
182 | 184 |
|
183 |
| - parser.add_option("", "--no-html", action="store_false", default=True, |
184 |
| - dest="html", help="Don't output html") |
| 185 | + parser.add_argument("-c", "--encoding", action="store_true", |
| 186 | + help="Print character encoding used") |
185 | 187 |
|
186 |
| - parser.add_option("-c", "--encoding", action="store_true", default=False, |
187 |
| - dest="encoding", help="Print character encoding used") |
| 188 | + parser.add_argument("--inject-meta-charset", action="store_true", |
| 189 | + help="inject <meta charset>") |
188 | 190 |
|
189 |
| - parser.add_option("", "--inject-meta-charset", action="store_true", |
190 |
| - default=False, dest="inject_meta_charset", |
191 |
| - help="inject <meta charset>") |
| 191 | + parser.add_argument("--strip-whitespace", action="store_true", |
| 192 | + help="strip whitespace") |
192 | 193 |
|
193 |
| - parser.add_option("", "--strip-whitespace", action="store_true", |
194 |
| - default=False, dest="strip_whitespace", |
195 |
| - help="strip whitespace") |
| 194 | + parser.add_argument("--omit-optional-tags", action="store_true", |
| 195 | + help="omit optional tags") |
196 | 196 |
|
197 |
| - parser.add_option("", "--omit-optional-tags", action="store_true", |
198 |
| - default=False, dest="omit_optional_tags", |
199 |
| - help="omit optional tags") |
| 197 | + parser.add_argument("--quote-attr-values", action="store_true", |
| 198 | + help="quote attribute values") |
200 | 199 |
|
201 |
| - parser.add_option("", "--quote-attr-values", action="store_true", |
202 |
| - default=False, dest="quote_attr_values", |
203 |
| - help="quote attribute values") |
| 200 | + parser.add_argument("--use-best-quote-char", action="store_true", |
| 201 | + help="use best quote character") |
204 | 202 |
|
205 |
| - parser.add_option("", "--use-best-quote-char", action="store_true", |
206 |
| - default=False, dest="use_best_quote_char", |
207 |
| - help="use best quote character") |
| 203 | + parser.add_argument("--quote-char", |
| 204 | + help="quote character") |
208 | 205 |
|
209 |
| - parser.add_option("", "--quote-char", action="store", |
210 |
| - default=None, dest="quote_char", |
211 |
| - help="quote character") |
| 206 | + parser.add_argument("--no-minimize-boolean-attributes", |
| 207 | + action="store_false", |
| 208 | + dest="minimize_boolean_attributes", |
| 209 | + help="minimize boolean attributes") |
212 | 210 |
|
213 |
| - parser.add_option("", "--no-minimize-boolean-attributes", |
214 |
| - action="store_false", default=True, |
215 |
| - dest="minimize_boolean_attributes", |
216 |
| - help="minimize boolean attributes") |
| 211 | + parser.add_argument("--use-trailing-solidus", action="store_true", |
| 212 | + help="use trailing solidus") |
217 | 213 |
|
218 |
| - parser.add_option("", "--use-trailing-solidus", action="store_true", |
219 |
| - default=False, dest="use_trailing_solidus", |
220 |
| - help="use trailing solidus") |
| 214 | + parser.add_argument("--space-before-trailing-solidus", |
| 215 | + action="store_true", |
| 216 | + help="add space before trailing solidus") |
221 | 217 |
|
222 |
| - parser.add_option("", "--space-before-trailing-solidus", |
223 |
| - action="store_true", default=False, |
224 |
| - dest="space_before_trailing_solidus", |
225 |
| - help="add space before trailing solidus") |
| 218 | + parser.add_argument("--escape-lt-in-attrs", action="store_true", |
| 219 | + help="escape less than signs in attribute values") |
226 | 220 |
|
227 |
| - parser.add_option("", "--escape-lt-in-attrs", action="store_true", |
228 |
| - default=False, dest="escape_lt_in_attrs", |
229 |
| - help="escape less than signs in attribute values") |
| 221 | + parser.add_argument("--escape-rcdata", action="store_true", |
| 222 | + help="escape rcdata element values") |
230 | 223 |
|
231 |
| - parser.add_option("", "--escape-rcdata", action="store_true", |
232 |
| - default=False, dest="escape_rcdata", |
233 |
| - help="escape rcdata element values") |
| 224 | + parser.add_argument("--sanitize", action="store_true", |
| 225 | + help="sanitize") |
234 | 226 |
|
235 |
| - parser.add_option("", "--sanitize", action="store_true", default=False, |
236 |
| - dest="sanitize", help="sanitize") |
| 227 | + parser.add_argument("-l", "--log", action="store_true", |
| 228 | + help="log state transitions") |
237 | 229 |
|
238 |
| - parser.add_option("-l", "--log", action="store_true", default=False, |
239 |
| - dest="log", help="log state transitions") |
| 230 | + parser.add_argument("filename") |
240 | 231 |
|
241 | 232 | return parser
|
242 | 233 |
|
|
0 commit comments