diff --git a/bin/update-tables.py b/bin/update-tables.py index fcd4a6c..2bb9e18 100644 --- a/bin/update-tables.py +++ b/bin/update-tables.py @@ -372,6 +372,21 @@ def fetch_table_zero_data() -> UnicodeTableRenderCtx: # Add Hangul Jamo Vowels and Hangul Trailing Consonants table[version].values.update(HANGUL_JAMO_ZEROWIDTH) + + # Remove u+00AD categoryCode=Cf name="SOFT HYPHEN", + # > https://www.unicode.org/faq/casemap_charprop.html + # + # > Q: Unicode now treats the SOFT HYPHEN as format control (Cf) + # > character when formerly it was a punctuation character (Pd). + # > Doesn't this break ISO 8859-1 compatibility? + # + # > [..] In a terminal emulation environment, particularly in + # > ISO-8859-1 contexts, one could display the SOFT HYPHEN as a hyphen + # > in all circumstances. + # + # This value was wrongly measured as a width of '0' in this wcwidth + # versions 0.2.9 - 0.2.13. Fixed in 0.2.14 + table[version].values.discard(0x00AD) # SOFT HYPHEN return UnicodeTableRenderCtx('ZERO_WIDTH', table) diff --git a/docs/intro.rst b/docs/intro.rst index 829b23b..26025d6 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -217,8 +217,10 @@ Other Languages History ======= -Unreleased +0.2.14 *2025-09-18* * **Updated** tables to include Unicode Specification 16.0.0 and 17.0.0. + * **Bugfix** U+00AD SOFT HYPHEN should measure as 1, versions 0.2.9 through + 0.2.13 measured as 0. 0.2.13 *2024-01-06* * **Bugfix** zero-width support for Hangul Jamo (Korean) diff --git a/docs/specs.rst b/docs/specs.rst index bf4a076..5b8a5ca 100644 --- a/docs/specs.rst +++ b/docs/specs.rst @@ -26,7 +26,7 @@ Any characters defined by category codes in `DerivedGeneralCategory.txt`_ files: - 'Me': Enclosing Combining Mark, aprox. 13 characters. - 'Mn': Nonspacing Combining Mark, aprox. 1,839 characters. - 'Mc': Spacing Mark, aprox. 443 characters. -- 'Cf': Format control character, aprox. 161 characters. +- 'Cf': Format control characters excluding `U+00AD` SOFT HYPHEN, aprox. 160 characters. - 'Zl': `U+2028`_ LINE SEPARATOR only - 'Zp': `U+2029`_ PARAGRAPH SEPARATOR only - 'Sk': Modifier Symbol, aprox. 4 characters of only those where phrase @@ -66,6 +66,7 @@ Any character in sequence with `U+FE0F`_ (Variation Selector 16) defined by .. _`U+001F`: https://codepoints.net/U+001F .. _`U+007F`: https://codepoints.net/U+007F .. _`U+00A0`: https://codepoints.net/U+00A0 +.. _`U+00AD`: https://codepoints.net/U+00AD .. _`U+1160`: https://codepoints.net/U+1160 .. _`U+11FF`: https://codepoints.net/U+11FF .. _`U+200D`: https://codepoints.net/U+200D diff --git a/docs/unicode_version.rst b/docs/unicode_version.rst index da6c4e5..778672f 100644 --- a/docs/unicode_version.rst +++ b/docs/unicode_version.rst @@ -136,6 +136,9 @@ release files: ``emoji-variation-sequences-12.0.0.txt`` *Date: 2019-01-15, 12:10:05 GMT* +``emoji-variation-sequences-15.1.0.txt`` + *Date: 2023-02-01, 02:22:54 GMT* + ``emoji-variation-sequences-17.0.0.txt`` *Date: 2025-01-30, 21:48:29 GMT* diff --git a/tests/test_core.py b/tests/test_core.py index ccb2bf0..206bbdc 100755 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -392,3 +392,9 @@ def test_zero_wide_conflict(): assert wcwidth.wcwidth(chr(0x03099), unicode_version='4.1.0') == 0 assert wcwidth.wcwidth(chr(0x0309a), unicode_version='4.1.0') == 0 assert wcwidth.wcwidth(chr(0x0309b), unicode_version='4.1.0') == 2 + +def test_soft_hyphen(): + # Test SOFT HYPHEN, category 'Cf' usually are zero-width, but most + # implementations agree to draw it was '1' cell, visually + # indistinguishable from a space, ' ' in Konsole, for example. + assert wcwidth.wcwidth(chr(0x000ad)) == 1 diff --git a/wcwidth/table_zero.py b/wcwidth/table_zero.py index df2f2cf..ef99d09 100644 --- a/wcwidth/table_zero.py +++ b/wcwidth/table_zero.py @@ -1,7 +1,7 @@ """ Exports ZERO_WIDTH table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2025-09-15 16:57:50 UTC. +This code generated by wcwidth/bin/update-tables.py on 2025-09-18 07:49:05 UTC. """ ZERO_WIDTH = { '4.1.0': ( @@ -9,7 +9,6 @@ # Date: 2005-02-26, 02:35:50 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili (0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli @@ -159,7 +158,6 @@ # Date: 2006-02-27, 23:41:27 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili (0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli @@ -314,7 +312,6 @@ # Date: 2008-03-20, 17:54:57 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -492,7 +489,6 @@ # Date: 2009-08-22, 04:58:21 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -698,7 +694,6 @@ # Date: 2010-08-19, 00:48:09 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -910,7 +905,6 @@ # Date: 2011-11-27, 05:10:22 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1133,7 +1127,6 @@ # Date: 2012-05-20, 00:42:34 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1356,7 +1349,6 @@ # Date: 2013-07-05, 14:08:45 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1580,7 +1572,6 @@ # Date: 2014-02-07, 18:42:12 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1830,7 +1821,6 @@ # Date: 2015-02-13, 13:47:11 GMT [MD] # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2088,7 +2078,6 @@ # Date: 2016-06-01, 10:34:26 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2359,7 +2348,6 @@ # Date: 2017-03-08, 08:41:49 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2643,7 +2631,6 @@ # Date: 2018-02-21, 05:34:04 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2940,7 +2927,6 @@ # Date: 2019-01-22, 08:18:28 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -3243,7 +3229,6 @@ # Date: 2019-03-10, 10:53:08 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -3546,7 +3531,6 @@ # Date: 2019-10-21, 14:30:32 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -3859,7 +3843,6 @@ # Date: 2021-07-10, 00:35:08 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -4181,7 +4164,6 @@ # Date: 2022-04-26, 23:14:35 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -4513,7 +4495,6 @@ # Date: 2023-07-28, 23:34:02 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -4845,7 +4826,6 @@ # Date: 2024-04-30, 21:48:17 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -5188,7 +5168,6 @@ # Date: 2025-07-24, 00:12:50 GMT # (0x00000, 0x00000,), # (nil) - (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg