From 223314727283ce9ef36c1a4f0de6cb8f91024cbf Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 22 Aug 2024 16:00:59 +0800 Subject: [PATCH 1/7] DOC: Generate the charset tables dynamically in the documentation --- doc/conf.py | 3 + doc/techref/encodings.md | 141 +++++++++++++++------------------------ 2 files changed, 56 insertions(+), 88 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 4fbdb8aacf9..1b1925c8331 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -61,6 +61,9 @@ "requires": requirements, } +# MyST-NB configurations: https://myst-nb.readthedocs.io/en/latest/configuration.html +nb_render_markdown_format = "myst" # The format to use for text/markdown rendering + # Make the list of returns arguments and attributes render the same as the # parameters list diff --git a/doc/techref/encodings.md b/doc/techref/encodings.md index f96d0163deb..331a0270262 100644 --- a/doc/techref/encodings.md +++ b/doc/techref/encodings.md @@ -1,3 +1,35 @@ +--- +file_format: mystnb +--- + +```{code-cell} +--- +tags: [remove-input] +--- +from IPython.display import display, Markdown +from pygmt.encodings import charset + + +def get_charset_mdtable(name): + """ + Create a markdown table for a charset. + """ + mappings = charset[name] + + undefined = "\ufffd" + text = "| octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |\n" + text += "|---|---|---|---|---|---|---|---|---|\n" + for i in range(0o00, 0o400, 8): + chars = [mappings.get(j, undefined) for j in range(i, i + 8)] + if chars == [undefined] * 8: + continue + chars = [f"&#x{ord(char):04x};" for char in chars] + row = f"\\{i:03o}"[:-1] + "x" + text += f"| **{row}** | {' | '.join(chars)} |\n" + text += "\n" + return Markdown(text) +``` + # Supported Encodings and Non-ASCII Characters GMT supports a number of encodings and each encoding contains a set of ASCII and @@ -10,100 +42,33 @@ that the character is not defined in the encoding. ## Adobe ISOLatin1+ Encoding -| octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | -|---|---|---|---|---|---|---|---|---| -| **\03x** | � | • | … | ™ | — | – | fi | ž | -| **\04x** | | ! | " | # | $ | % | & | ’ | -| **\05x** | ( | ) | * | + | , | - | . | / | -| **\06x** | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | -| **\07x** | 8 | 9 | : | ; | < | = | > | ? | -| **\10x** | @ | A | B | C | D | E | F | G | -| **\11x** | H | I | J | K | L | M | N | O | -| **\12x** | P | Q | R | S | T | U | V | W | -| **\13x** | X | Y | Z | [ | \ | ] | ^ | _ | -| **\14x** | ‘ | a | b | c | d | e | f | g | -| **\15x** | h | i | j | k | l | m | n | o | -| **\16x** | p | q | r | s | t | u | v | w | -| **\17x** | x | y | z | { | | | } | ~ | š | -| **\20x** | Œ | † | ‡ | Ł | ⁄ | ‹ | Š | › | -| **\21x** | œ | Ÿ | Ž | ł | ‰ | „ | “ | ” | -| **\22x** | ı | ` | ´ | ^ | ˜ | ¯ | ˘ | ˙ | -| **\23x** | ¨ | ‚ | ˚ | ¸ | ' | ˝ | ˛ | ˇ | -| **\24x** | � | ¡ | ¢ | £ | ¤ | ¥ | ¦ | § | -| **\25x** | ¨ | © | ª | « | ¬ | ­ | ® | ¯ | -| **\26x** | ° | ± | ² | ³ | ´ | µ | ¶ | · | -| **\27x** | ¸ | ¹ | º | » | ¼ | ½ | ¾ | ¿ | -| **\30x** | À | Á |  | à | Ä | Å | Æ | Ç | -| **\31x** | È | É | Ê | Ë | Ì | Í | Î | Ï | -| **\32x** | Ð | Ñ | Ò | Ó | Ô | Õ | Ö | × | -| **\33x** | Ø | Ù | Ú | Û | Ü | Ý | Þ | ß | -| **\34x** | à | á | â | ã | ä | å | æ | ç | -| **\35x** | è | é | ê | ë | ì | í | î | ï | -| **\36x** | ð | ñ | ò | ó | ô | õ | ö | ÷ | -| **\37x** | ø | ù | ú | û | ü | ý | þ | ÿ | +```{code-cell} +--- +tags: [remove-input] +--- +display(get_charset_mdtable("ISOLatin1+")) +``` ## Adobe Symbol Encoding -| octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | -|---|---|---|---|---|---|---|---|---| -| **\04x** | | ! | ∀ | # | ∃ | % | & | ∋ | -| **\05x** | ( | ) | ∗ | + | , | − | . | / | -| **\06x** | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | -| **\07x** | 8 | 9 | : | ; | < | = | > | ? | -| **\10x** | ≅ | Α | Β | Χ | ∆ | Ε | Φ | Γ | -| **\11x** | Η | Ι | ϑ | Κ | Λ | Μ | Ν | Ο | -| **\12x** | Π | Θ | Ρ | Σ | Τ | Υ | ς | Ω | -| **\13x** | Ξ | Ψ | Ζ | [ | ∴ | ] | ⊥ | _ | -| **\14x** |  | α | β | χ | δ | ε | φ | γ | -| **\15x** | η | ι | ϕ | κ | λ | μ | ν | ο | -| **\16x** | π | θ | ρ | σ | τ | υ | ϖ | ω | -| **\17x** | ξ | ψ | ζ | { | | | } | ∼ | � | -| **\24x** | € | ϒ | ′ | ≤ | ∕ | ∞ | ƒ | ♣ | -| **\25x** | ♦ | ♥ | ♠ | ↔ | ← | ↑ | → | ↓ | -| **\26x** | ° | ± | ″ | ≥ | × | ∝ | ∂ | • | -| **\27x** | ÷ | ≠ | ≡ | ≈ | … | ⏐ | ⎯ | ↵ | -| **\30x** | ℵ | ℑ | ℜ | ℘ | ⊗ | ⊕ | ∅ | ∩ | -| **\31x** | ∪ | ⊃ | ⊇ | ⊄ | ⊂ | ⊆ | ∈ | ∉ | -| **\32x** | ∠ | ∇ | ® | © | ™ | ∏ | √ | ⋅ | -| **\33x** | ¬ | ∧ | ∨ | ⇔ | ⇐ | ⇑ | ⇒ | ⇓ | -| **\34x** | ◊ | 〈 | ® | © | ™ | ∑ | ⎛ | ⎜ | -| **\35x** | ⎝ | ⎡ | ⎢ | ⎣ | ⎧ | ⎨ | ⎩ | ⎪ | -| **\36x** | � | 〉 | ∫ | ⌠ | ⎮ | ⌡ | ⎞ | ⎟ | -| **\37x** | ⎠ | ⎤ | ⎥ | ⎦ | ⎫ | ⎬ | ⎭ | � | - -**Note**: The octal code `\140` represents the RADICAL EXTENDER character, which is not available in -the Unicode character set. +```{code-cell} +--- +tags: [remove-input] +--- +display(get_charset_mdtable("Symbol")) +``` + +**Note**: The octal code `\140` represents the RADICAL EXTENDER character, which is not +available in the Unicode character set. ## Adobe ZapfDingbats Encoding -| octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | -|---|---|---|---|---|---|---|---|---| -| **\04x** | | ✁ | ✂ | ✃ | ✄ | ☎ | ✆ | ✇ | -| **\05x** | ✈ | ✉ | ☛ | ☞ | ✌ | ✍ | ✎ | ✏ | -| **\06x** | ✐ | ✑ | ✒ | ✓ | ✔ | ✕ | ✖ | ✗ | -| **\07x** | ✘ | ✙ | ✚ | ✛ | ✜ | ✝ | ✞ | ✟ | -| **\10x** | ✠ | ✡ | ✢ | ✣ | ✤ | ✥ | ✦ | ✧ | -| **\11x** | ★ | ✩ | ✪ | ✫ | ✬ | ✭ | ✮ | ✯ | -| **\12x** | ✰ | ✱ | ✲ | ✳ | ✴ | ✵ | ✶ | ✷ | -| **\13x** | ✸ | ✹ | ✺ | ✻ | ✼ | ✽ | ✾ | ✿ | -| **\14x** | ❀ | ❁ | ❂ | ❃ | ❄ | ❅ | ❆ | ❇ | -| **\15x** | ❈ | ❉ | ❊ | ❋ | ● | ❍ | ■ | ❏ | -| **\16x** | ❐ | ❑ | ❒ | ▲ | ▼ | ◆ | ❖ | ◗ | -| **\17x** | ❘ | ❙ | ❚ | ❛ | ❜ | ❝ | ❞ | � | -| **\20x** | ❨ | ❩ | ❪ | ❫ | ❬ | ❭ | ❮ | ❯ | -| **\21x** | ❰ | ❱ | ❲ | ❳ | ❴ | ❵ | � | � | -| **\24x** | � | ❡ | ❢ | ❣ | ❤ | ❥ | ❦ | ❧ | -| **\25x** | ♣ | ♦ | ♥ | ♠ | ① | ② | ③ | ④ | -| **\26x** | ⑤ | ⑥ | ⑦ | ⑧ | ⑨ | ⑩ | ❶ | ❷ | -| **\27x** | ❸ | ❹ | ❺ | ❻ | ❼ | ❽ | ❾ | ❿ | -| **\30x** | ➀ | ➁ | ➂ | ➃ | ➄ | ➅ | ➆ | ➇ | -| **\31x** | ➈ | ➉ | ➊ | ➋ | ➌ | ➍ | ➎ | ➏ | -| **\32x** | ➐ | ➑ | ➒ | ➓ | ➔ | → | ↔ | ↕ | -| **\33x** | ➘ | ➙ | ➚ | ➛ | ➜ | ➝ | ➞ | ➟ | -| **\34x** | ➠ | ➡ | ➢ | ➣ | ➤ | ➥ | ➦ | ➧ | -| **\35x** | ➨ | ➩ | ➪ | ➫ | ➬ | ➭ | ➮ | ➯ | -| **\36x** | � | ➱ | ➲ | ➳ | ➴ | ➵ | ➶ | ➷ | -| **\37x** | ➸ | ➹ | ➺ | ➻ | ➼ | ➽ | ➾ | � | +```{code-cell} +--- +tags: [remove-input] +--- +display(get_charset_mdtable("ZapfDingbats")) +``` ## ISO/IEC 8859 From 3b32be4f10b47a42490d95d65f80c7b82509aed7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 23 Aug 2024 14:17:55 +0800 Subject: [PATCH 2/7] Set alignments of table cells --- doc/techref/encodings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/techref/encodings.md b/doc/techref/encodings.md index 331a0270262..d3f88a41ebf 100644 --- a/doc/techref/encodings.md +++ b/doc/techref/encodings.md @@ -17,8 +17,8 @@ def get_charset_mdtable(name): mappings = charset[name] undefined = "\ufffd" - text = "| octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |\n" - text += "|---|---|---|---|---|---|---|---|---|\n" + text = "| Octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |\n" + text += "|:---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n" for i in range(0o00, 0o400, 8): chars = [mappings.get(j, undefined) for j in range(i, i + 8)] if chars == [undefined] * 8: From 5d6eb1c65ad80e4b24557adaa0ee2c6ab9601027 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 23 Aug 2024 14:19:18 +0800 Subject: [PATCH 3/7] Add styles for text alignment --- doc/_static/style.css | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/_static/style.css b/doc/_static/style.css index 77546baa72b..2681160e4b8 100644 --- a/doc/_static/style.css +++ b/doc/_static/style.css @@ -203,3 +203,19 @@ a.copybtn { .sphx-glr-single-img { max-width: 80%!important; } + +/* + * Styles for aligning table cells. + * https://myst-parser.readthedocs.io/en/latest/syntax/tables.html#markdown-syntax + */ +th.text-left, td.text-left { + text-align: left !important; +} + +th.text-center, td.text-center { + text-align: center !important; +} + +th.text-right, td.text-right { + text-align: right !important; +} From 738dda634da12eac9c1af06a7ba7193549e091de Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 23 Aug 2024 21:38:46 +0800 Subject: [PATCH 4/7] Improve comments --- doc/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 1b1925c8331..fc26641bd83 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -61,10 +61,10 @@ "requires": requirements, } -# MyST-NB configurations: https://myst-nb.readthedocs.io/en/latest/configuration.html +# MyST-NB configurations. +# Reference: https://myst-nb.readthedocs.io/en/latest/configuration.html nb_render_markdown_format = "myst" # The format to use for text/markdown rendering - # Make the list of returns arguments and attributes render the same as the # parameters list napoleon_use_rtype = False From 6ea7d15c588cef9037a4d92d7005316d2ff7eb64 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 27 Aug 2024 10:52:11 +0800 Subject: [PATCH 5/7] Improve the script for the encoding table --- doc/techref/encodings.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/techref/encodings.md b/doc/techref/encodings.md index d3f88a41ebf..2ee0b95bcb9 100644 --- a/doc/techref/encodings.md +++ b/doc/techref/encodings.md @@ -16,15 +16,14 @@ def get_charset_mdtable(name): """ mappings = charset[name] - undefined = "\ufffd" text = "| Octal | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |\n" text += "|:---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n" for i in range(0o00, 0o400, 8): - chars = [mappings.get(j, undefined) for j in range(i, i + 8)] - if chars == [undefined] * 8: + chars = [mappings.get(j) for j in range(i, i + 8)] + if all(v is None for v in chars): # All characters in this row are undefined continue - chars = [f"&#x{ord(char):04x};" for char in chars] row = f"\\{i:03o}"[:-1] + "x" + chars = [f"&#x{ord(char):04x};" for char in chars] text += f"| **{row}** | {' | '.join(chars)} |\n" text += "\n" return Markdown(text) @@ -38,7 +37,7 @@ in arguments and text strings. When using non-ASCII characters in PyGMT, the eas is to copy and paste the character from the encoding tables below. **Note**: The special character � (REPLACEMENT CHARACTER) is used to indicate -that the character is not defined in the encoding. +that the character is undefined in the encoding. ## Adobe ISOLatin1+ Encoding From 2923b2f83fbca59a176ee5343eb329f8a7828225 Mon Sep 17 00:00:00 2001 From: yvonnefroelich Date: Wed, 7 Aug 2024 18:58:11 +0200 Subject: [PATCH 6/7] Write links in markdown in 'encodings' --- doc/techref/encodings.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/doc/techref/encodings.md b/doc/techref/encodings.md index 2ee0b95bcb9..0265d69354a 100644 --- a/doc/techref/encodings.md +++ b/doc/techref/encodings.md @@ -72,23 +72,23 @@ display(get_charset_mdtable("ZapfDingbats")) ## ISO/IEC 8859 PyGMT also supports the ISO/IEC 8859 standard for 8-bit character encodings. Refer to - for descriptions of the different parts of -the standard. +[ISO/IEC_8859](https://en.wikipedia.org/wiki/ISO/IEC_8859) for descriptions of the +different parts of the standard. For a list of the characters in each part of the standard, refer to the following links: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +- [ISO/IEC_8859-1](https://en.wikipedia.org/wiki/ISO/IEC_8859-1) +- [ISO/IEC_8859-2](https://en.wikipedia.org/wiki/ISO/IEC_8859-2) +- [ISO/IEC_8859-3](https://en.wikipedia.org/wiki/ISO/IEC_8859-3) +- [ISO/IEC_8859-4](https://en.wikipedia.org/wiki/ISO/IEC_8859-4) +- [ISO/IEC_8859-5](https://en.wikipedia.org/wiki/ISO/IEC_8859-5) +- [ISO/IEC_8859-6](https://en.wikipedia.org/wiki/ISO/IEC_8859-6) +- [ISO/IEC_8859-7](https://en.wikipedia.org/wiki/ISO/IEC_8859-7) +- [ISO/IEC_8859-8](https://en.wikipedia.org/wiki/ISO/IEC_8859-8) +- [ISO/IEC_8859-9](https://en.wikipedia.org/wiki/ISO/IEC_8859-9) +- [ISO/IEC_8859-10](https://en.wikipedia.org/wiki/ISO/IEC_8859-10) +- [ISO/IEC_8859-11](https://en.wikipedia.org/wiki/ISO/IEC_8859-11) +- [ISO/IEC_8859-13](https://en.wikipedia.org/wiki/ISO/IEC_8859-13) +- [ISO/IEC_8859-14](https://en.wikipedia.org/wiki/ISO/IEC_8859-14) +- [ISO/IEC_8859-15](https://en.wikipedia.org/wiki/ISO/IEC_8859-15) +- [ISO/IEC_8859-16](https://en.wikipedia.org/wiki/ISO/IEC_8859-16) From 1ec65574283e572def2a3ee14a937f0890ec23f7 Mon Sep 17 00:00:00 2001 From: yvonnefroelich Date: Wed, 7 Aug 2024 19:26:28 +0200 Subject: [PATCH 7/7] Remove underscores --- doc/techref/encodings.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/techref/encodings.md b/doc/techref/encodings.md index 0265d69354a..ca96bf3441a 100644 --- a/doc/techref/encodings.md +++ b/doc/techref/encodings.md @@ -72,23 +72,23 @@ display(get_charset_mdtable("ZapfDingbats")) ## ISO/IEC 8859 PyGMT also supports the ISO/IEC 8859 standard for 8-bit character encodings. Refer to -[ISO/IEC_8859](https://en.wikipedia.org/wiki/ISO/IEC_8859) for descriptions of the +[ISO/IEC 8859](https://en.wikipedia.org/wiki/ISO/IEC_8859) for descriptions of the different parts of the standard. For a list of the characters in each part of the standard, refer to the following links: -- [ISO/IEC_8859-1](https://en.wikipedia.org/wiki/ISO/IEC_8859-1) -- [ISO/IEC_8859-2](https://en.wikipedia.org/wiki/ISO/IEC_8859-2) -- [ISO/IEC_8859-3](https://en.wikipedia.org/wiki/ISO/IEC_8859-3) -- [ISO/IEC_8859-4](https://en.wikipedia.org/wiki/ISO/IEC_8859-4) -- [ISO/IEC_8859-5](https://en.wikipedia.org/wiki/ISO/IEC_8859-5) -- [ISO/IEC_8859-6](https://en.wikipedia.org/wiki/ISO/IEC_8859-6) -- [ISO/IEC_8859-7](https://en.wikipedia.org/wiki/ISO/IEC_8859-7) -- [ISO/IEC_8859-8](https://en.wikipedia.org/wiki/ISO/IEC_8859-8) -- [ISO/IEC_8859-9](https://en.wikipedia.org/wiki/ISO/IEC_8859-9) -- [ISO/IEC_8859-10](https://en.wikipedia.org/wiki/ISO/IEC_8859-10) -- [ISO/IEC_8859-11](https://en.wikipedia.org/wiki/ISO/IEC_8859-11) -- [ISO/IEC_8859-13](https://en.wikipedia.org/wiki/ISO/IEC_8859-13) -- [ISO/IEC_8859-14](https://en.wikipedia.org/wiki/ISO/IEC_8859-14) -- [ISO/IEC_8859-15](https://en.wikipedia.org/wiki/ISO/IEC_8859-15) -- [ISO/IEC_8859-16](https://en.wikipedia.org/wiki/ISO/IEC_8859-16) +- [ISO/IEC 8859-1](https://en.wikipedia.org/wiki/ISO/IEC_8859-1) +- [ISO/IEC 8859-2](https://en.wikipedia.org/wiki/ISO/IEC_8859-2) +- [ISO/IEC 8859-3](https://en.wikipedia.org/wiki/ISO/IEC_8859-3) +- [ISO/IEC 8859-4](https://en.wikipedia.org/wiki/ISO/IEC_8859-4) +- [ISO/IEC 8859-5](https://en.wikipedia.org/wiki/ISO/IEC_8859-5) +- [ISO/IEC 8859-6](https://en.wikipedia.org/wiki/ISO/IEC_8859-6) +- [ISO/IEC 8859-7](https://en.wikipedia.org/wiki/ISO/IEC_8859-7) +- [ISO/IEC 8859-8](https://en.wikipedia.org/wiki/ISO/IEC_8859-8) +- [ISO/IEC 8859-9](https://en.wikipedia.org/wiki/ISO/IEC_8859-9) +- [ISO/IEC 8859-10](https://en.wikipedia.org/wiki/ISO/IEC_8859-10) +- [ISO/IEC 8859-11](https://en.wikipedia.org/wiki/ISO/IEC_8859-11) +- [ISO/IEC 8859-13](https://en.wikipedia.org/wiki/ISO/IEC_8859-13) +- [ISO/IEC 8859-14](https://en.wikipedia.org/wiki/ISO/IEC_8859-14) +- [ISO/IEC 8859-15](https://en.wikipedia.org/wiki/ISO/IEC_8859-15) +- [ISO/IEC 8859-16](https://en.wikipedia.org/wiki/ISO/IEC_8859-16)