Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions bin/update-tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,21 @@ def fetch_table_zero_data() -> UnicodeTableRenderCtx:

# Add Hangul Jamo Vowels and Hangul Trailing Consonants
table[version].values.update(HANGUL_JAMO_ZEROWIDTH)

# Remove u+00AD categoryCode=Cf name="SOFT HYPHEN",
# > https://www.unicode.org/faq/casemap_charprop.html
#
# > Q: Unicode now treats the SOFT HYPHEN as format control (Cf)
# > character when formerly it was a punctuation character (Pd).
# > Doesn't this break ISO 8859-1 compatibility?
#
# > [..] In a terminal emulation environment, particularly in
# > ISO-8859-1 contexts, one could display the SOFT HYPHEN as a hyphen
# > in all circumstances.
#
# This value was wrongly measured as a width of '0' in this wcwidth
# versions 0.2.9 - 0.2.13. Fixed in 0.2.14
table[version].values.discard(0x00AD) # SOFT HYPHEN
return UnicodeTableRenderCtx('ZERO_WIDTH', table)


Expand Down
4 changes: 3 additions & 1 deletion docs/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,10 @@ Other Languages
History
=======

Unreleased
0.2.14 *2025-09-18*
* **Updated** tables to include Unicode Specification 16.0.0 and 17.0.0.
* **Bugfix** U+00AD SOFT HYPHEN should measure as 1, versions 0.2.9 through
0.2.13 measured as 0.

0.2.13 *2024-01-06*
* **Bugfix** zero-width support for Hangul Jamo (Korean)
Expand Down
3 changes: 2 additions & 1 deletion docs/specs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Any characters defined by category codes in `DerivedGeneralCategory.txt`_ files:
- 'Me': Enclosing Combining Mark, aprox. 13 characters.
- 'Mn': Nonspacing Combining Mark, aprox. 1,839 characters.
- 'Mc': Spacing Mark, aprox. 443 characters.
- 'Cf': Format control character, aprox. 161 characters.
- 'Cf': Format control characters excluding `U+00AD` SOFT HYPHEN, aprox. 160 characters.
- 'Zl': `U+2028`_ LINE SEPARATOR only
- 'Zp': `U+2029`_ PARAGRAPH SEPARATOR only
- 'Sk': Modifier Symbol, aprox. 4 characters of only those where phrase
Expand Down Expand Up @@ -66,6 +66,7 @@ Any character in sequence with `U+FE0F`_ (Variation Selector 16) defined by
.. _`U+001F`: https://codepoints.net/U+001F
.. _`U+007F`: https://codepoints.net/U+007F
.. _`U+00A0`: https://codepoints.net/U+00A0
.. _`U+00AD`: https://codepoints.net/U+00AD
.. _`U+1160`: https://codepoints.net/U+1160
.. _`U+11FF`: https://codepoints.net/U+11FF
.. _`U+200D`: https://codepoints.net/U+200D
Expand Down
3 changes: 3 additions & 0 deletions docs/unicode_version.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ release files:
``emoji-variation-sequences-12.0.0.txt``
*Date: 2019-01-15, 12:10:05 GMT*

``emoji-variation-sequences-15.1.0.txt``
*Date: 2023-02-01, 02:22:54 GMT*

``emoji-variation-sequences-17.0.0.txt``
*Date: 2025-01-30, 21:48:29 GMT*

6 changes: 6 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,3 +392,9 @@ def test_zero_wide_conflict():
assert wcwidth.wcwidth(chr(0x03099), unicode_version='4.1.0') == 0
assert wcwidth.wcwidth(chr(0x0309a), unicode_version='4.1.0') == 0
assert wcwidth.wcwidth(chr(0x0309b), unicode_version='4.1.0') == 2

def test_soft_hyphen():
# Test SOFT HYPHEN, category 'Cf' usually are zero-width, but most
# implementations agree to draw it was '1' cell, visually
# indistinguishable from a space, ' ' in Konsole, for example.
assert wcwidth.wcwidth(chr(0x000ad)) == 1
23 changes: 1 addition & 22 deletions wcwidth/table_zero.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
"""
Exports ZERO_WIDTH table keyed by supporting unicode version level.

This code generated by wcwidth/bin/update-tables.py on 2025-09-15 16:57:50 UTC.
This code generated by wcwidth/bin/update-tables.py on 2025-09-18 07:49:05 UTC.
"""
ZERO_WIDTH = {
'4.1.0': (
# Source: DerivedGeneralCategory-4.1.0.txt
# Date: 2005-02-26, 02:35:50 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili
(0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli
Expand Down Expand Up @@ -159,7 +158,6 @@
# Date: 2006-02-27, 23:41:27 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili
(0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli
Expand Down Expand Up @@ -314,7 +312,6 @@
# Date: 2008-03-20, 17:54:57 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -492,7 +489,6 @@
# Date: 2009-08-22, 04:58:21 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -698,7 +694,6 @@
# Date: 2010-08-19, 00:48:09 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -910,7 +905,6 @@
# Date: 2011-11-27, 05:10:22 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -1133,7 +1127,6 @@
# Date: 2012-05-20, 00:42:34 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -1356,7 +1349,6 @@
# Date: 2013-07-05, 14:08:45 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -1580,7 +1572,6 @@
# Date: 2014-02-07, 18:42:12 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -1830,7 +1821,6 @@
# Date: 2015-02-13, 13:47:11 GMT [MD]
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -2088,7 +2078,6 @@
# Date: 2016-06-01, 10:34:26 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -2359,7 +2348,6 @@
# Date: 2017-03-08, 08:41:49 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -2643,7 +2631,6 @@
# Date: 2018-02-21, 05:34:04 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -2940,7 +2927,6 @@
# Date: 2019-01-22, 08:18:28 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -3243,7 +3229,6 @@
# Date: 2019-03-10, 10:53:08 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -3546,7 +3531,6 @@
# Date: 2019-10-21, 14:30:32 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -3859,7 +3843,6 @@
# Date: 2021-07-10, 00:35:08 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -4181,7 +4164,6 @@
# Date: 2022-04-26, 23:14:35 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -4513,7 +4495,6 @@
# Date: 2023-07-28, 23:34:02 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -4845,7 +4826,6 @@
# Date: 2024-04-30, 21:48:17 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down Expand Up @@ -5188,7 +5168,6 @@
# Date: 2025-07-24, 00:12:50 GMT
#
(0x00000, 0x00000,), # (nil)
(0x000ad, 0x000ad,), # Soft Hyphen
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
Expand Down
Loading