Skip to content

Commit

Permalink
Mark U+070F SYRIAC ABBREVIATION MARK as zero width
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed May 21, 2024
1 parent 7cb4f39 commit 934c875
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 2 deletions.
4 changes: 4 additions & 0 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,10 @@ def load_zero_widths() -> "list[bool]":
# width 2. Therefore, we treat it as having width 2.
zw_map[0x115F] = False

# Syriac abbreviation mark
# This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
zw_map[0x070F] = True

return zw_map


Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43).
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
//! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
//! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
Expand All @@ -78,6 +79,7 @@
//!
//! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
//!
//! [Syriac]: https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G13006
//! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
//!
//! ## Canonical equivalence
Expand Down
2 changes: 1 addition & 1 deletion src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ pub mod charwidth {
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x00, 0x14, 0x00, 0x14, 0x04,
0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x05, 0x00, 0x00, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
Expand Down
8 changes: 7 additions & 1 deletion tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,17 @@ fn test_jamo() {
#[test]
fn test_prepended_concatenation_marks() {
assert_eq!('\u{0600}'.width(), Some(1));
assert_eq!('\u{070F}'.width(), Some(1));
assert_eq!('\u{08E2}'.width(), Some(1));
assert_eq!('\u{110BD}'.width(), Some(1));
}

#[test]
fn test_syriac_abbreviation_mark() {
assert_eq!('\u{070F}'.width(), Some(0));
assert_eq!("\u{070F}".width(), 0);
}


#[test]
fn test_interlinear_annotation_chars() {
assert_eq!('\u{FFF9}'.width(), Some(1));
Expand Down

0 comments on commit 934c875

Please sign in to comment.