diff --git a/README.md b/README.md index e49eaab..7c44efb 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ [![crates.io version](https://img.shields.io/crates/v/unicode-width)](https://crates.io/crates/unicode-width) [![Docs status](https://img.shields.io/docsrs/unicode-width)](https://docs.rs/unicode-width/) -Determine displayed width of `char` and `str` types according to [Unicode Standard Annex #11][UAX11], -other portions of the Unicode standard, and common implementations of POSIX [`wcwidth()`](https://pubs.opengroup.org/onlinepubs/9699919799/). +Determine displayed width of `char` and `str` types according to [Unicode Standard Annex #11][UAX11] +and other portions of the Unicode standard. This crate is `#![no_std]`. diff --git a/scripts/unicode.py b/scripts/unicode.py index 605edad..bb3ccc1 100755 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -776,9 +776,6 @@ def main(module_path: str): map(lambda x: EffectiveWidth.ZERO if x[1] else x[0], zip(eaw_map, zw_map)) ) - # Override for soft hyphen - width_map[0x00AD] = EffectiveWidth.NARROW - tables = make_tables(TABLE_CFGS, enumerate(width_map)) emoji_presentations = load_emoji_presentation_sequences() diff --git a/src/lib.rs b/src/lib.rs index a31c7ca..c7af2ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,9 +9,8 @@ // except according to those terms. //! Determine displayed width of `char` and `str` types according to -//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/), -//! other portions of the Unicode standard, and common implementations of -//! POSIX [`wcwidth()`](https://pubs.opengroup.org/onlinepubs/9699919799/). +//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) +//! and other portions of the Unicode standard. //! See the [Rules for determining width](#rules-for-determining-width) section //! for the exact rules. //! @@ -39,9 +38,8 @@ //! iff their base character fulfills all the following requirements: //! - Has the [`Emoji_Presentation`] property, and //! - Not in the [Enclosed Ideographic Supplement] block. -//! 3. [`'\u{00AD}'` SOFT HYPHEN](https://util.unicode.org/UnicodeJsps/character.jsp?a=00AD) has width 1. -//! 4. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2. -//! 5. The following have width 0: +//! 3. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2. +//! 4. The following have width 0: //! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D) //! with the [`Default_Ignorable_Code_Point`](https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095) property. //! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D) @@ -58,13 +56,13 @@ //! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D) //! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`). //! - [`'\0'` NUL](https://util.unicode.org/UnicodeJsps/character.jsp?a=0000). -//! 6. The [control characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BCc%7D) +//! 5. The [control characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BCc%7D) //! have no defined width, and are ignored when determining the width of a string. -//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D) +//! 6. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D) //! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2. -//! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D) +//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D) //! with an [`East_Asian_Width`] of [`Ambiguous`] have width 2 in an East Asian context, and width 1 otherwise. -//! 9. All other characters have width 1. +//! 8. All other characters have width 1. //! //! [`East_Asian_Width`]: https://www.unicode.org/reports/tr11/#ED1 //! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation diff --git a/src/tables.rs b/src/tables.rs index f2aff59..3052deb 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -330,7 +330,7 @@ pub mod charwidth { static TABLES_2: [u8; 3936] = [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, - 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xD7, 0x77, 0x75, 0xFF, + 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0xD7, 0x77, 0x71, 0xFF, 0xF7, 0x7F, 0xFF, 0x55, 0x75, 0x55, 0x55, 0x57, 0xD5, 0x57, 0xF5, 0x5F, 0x75, 0x7F, 0x5F, 0xF7, 0xD5, 0x7F, 0x77, 0x5D, 0x55, 0x55, 0x55, 0xDD, 0x55, 0xD5, 0x55, 0x55, 0xF5, 0xD5, 0x55, 0xFD, 0x55, 0x57, 0xD5, 0x7F, 0x57, 0xFF, 0x5D, 0xF5, 0x55, 0x55, 0x55, 0x55, 0xF5, diff --git a/tests/tests.rs b/tests/tests.rs index a8a5922..fb61a6c 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -64,8 +64,8 @@ fn test_char2() { assert_eq!(UnicodeWidthChar::width('h'), Some(2)); assert_eq!('h'.width_cjk(), Some(2)); - assert_eq!(UnicodeWidthChar::width('\u{AD}'), Some(1)); - assert_eq!('\u{AD}'.width_cjk(), Some(1)); + assert_eq!(UnicodeWidthChar::width('\u{AD}'), Some(0)); + assert_eq!('\u{AD}'.width_cjk(), Some(0)); assert_eq!(UnicodeWidthChar::width('\u{1160}'), Some(0)); assert_eq!('\u{1160}'.width_cjk(), Some(0));