|
2 | 2 | """
|
3 | 3 | Utility for parsing HTML5 entity definitions available from:
|
4 | 4 |
|
5 |
| - http://dev.w3.org/html5/spec/entities.json |
| 5 | + https://html.spec.whatwg.org/entities.json |
| 6 | + https://html.spec.whatwg.org/multipage/named-characters.html |
6 | 7 |
|
7 |
| -Written by Ezio Melotti and Iuliia Proskurnia. |
| 8 | +The page now contains the following note: |
| 9 | +
|
| 10 | + "This list is static and will not be expanded or changed in the future." |
8 | 11 |
|
| 12 | +Written by Ezio Melotti and Iuliia Proskurnia. |
9 | 13 | """
|
10 | 14 |
|
11 | 15 | import os
|
|
14 | 18 | from urllib.request import urlopen
|
15 | 19 | from html.entities import html5
|
16 | 20 |
|
17 |
| -entities_url = 'http://dev.w3.org/html5/spec/entities.json' |
| 21 | +PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html' |
| 22 | +ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json' |
| 23 | +HTML5_SECTION_START = '# HTML5 named character references' |
18 | 24 |
|
19 | 25 | def get_json(url):
|
20 | 26 | """Download the json file from the url and returns a decoded object."""
|
@@ -62,29 +68,32 @@ def write_items(entities, file=sys.stdout):
|
62 | 68 | # be before their equivalent lowercase version.
|
63 | 69 | keys = sorted(entities.keys())
|
64 | 70 | keys = sorted(keys, key=str.lower)
|
| 71 | + print(HTML5_SECTION_START, file=file) |
| 72 | + print(f'# Generated by {sys.argv[0]!r}\n' |
| 73 | + f'# from {ENTITIES_URL} and\n' |
| 74 | + f'# {PAGE_URL}.\n' |
| 75 | + f'# Map HTML5 named character references to the ' |
| 76 | + f'equivalent Unicode character(s).', file=file) |
65 | 77 | print('html5 = {', file=file)
|
66 | 78 | for name in keys:
|
67 |
| - print(' {!r}: {!a},'.format(name, entities[name]), file=file) |
| 79 | + print(f' {name!r}: {entities[name]!a},', file=file) |
68 | 80 | print('}', file=file)
|
69 | 81 |
|
70 | 82 |
|
71 | 83 | if __name__ == '__main__':
|
72 | 84 | # without args print a diff between html.entities.html5 and new_html5
|
73 | 85 | # with --create print the new html5 dict
|
74 | 86 | # with --patch patch the Lib/html/entities.py file
|
75 |
| - new_html5 = create_dict(get_json(entities_url)) |
| 87 | + new_html5 = create_dict(get_json(ENTITIES_URL)) |
76 | 88 | if '--create' in sys.argv:
|
77 |
| - print('# map the HTML5 named character references to the ' |
78 |
| - 'equivalent Unicode character(s)') |
79 |
| - print('# Generated by {}. Do not edit manually.'.format(__file__)) |
80 | 89 | write_items(new_html5)
|
81 | 90 | elif '--patch' in sys.argv:
|
82 | 91 | fname = 'Lib/html/entities.py'
|
83 | 92 | temp_fname = fname + '.temp'
|
84 | 93 | with open(fname) as f1, open(temp_fname, 'w') as f2:
|
85 | 94 | skip = False
|
86 | 95 | for line in f1:
|
87 |
| - if line.startswith('html5 = {'): |
| 96 | + if line.startswith(HTML5_SECTION_START): |
88 | 97 | write_items(new_html5, file=f2)
|
89 | 98 | skip = True
|
90 | 99 | continue
|
|
0 commit comments