Skip to content

Commit

Permalink
Uni 15: new block Arabic_Extended_C with default Bidi_Class=AL
Browse files Browse the repository at this point in the history
  • Loading branch information
markusicu committed Dec 9, 2021
1 parent ee84402 commit 4365206
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 10 deletions.
13 changes: 7 additions & 6 deletions unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedBidiClass-15.0.0.txt
# Date: 2021-12-09, 17:39:32 GMT
# Date: 2021-12-09, 22:21:24 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand All @@ -15,14 +15,14 @@
#
# The unassigned code points that default to AL are in the ranges:
# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF
# \U00010D00-\U00010D3F \U00010F30-\U00010F6F
# \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F
# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
#
# This includes code points in the Arabic, Syriac, and Thaana blocks, among others.
#
# The unassigned code points that default to R are in the ranges:
# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F
# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF
# \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF
# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
#
# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others.
Expand Down Expand Up @@ -1244,7 +1244,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
10EAD ; R # Pd YEZIDI HYPHENATION MARK
10EAE..10EAF ; R # Cn [2] <reserved-10EAE>..<reserved-10EAF>
10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10EB2..10EFC ; R # Cn [75] <reserved-10EB2>..<reserved-10EFC>
10EB2..10EBF ; R # Cn [14] <reserved-10EB2>..<reserved-10EBF>
10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
Expand Down Expand Up @@ -1272,7 +1272,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
1ED50..1EDFF ; R # Cn [176] <reserved-1ED50>..<reserved-1EDFF>
1EF00..1EFFF ; R # Cn [256] <reserved-1EF00>..<reserved-1EFFF>

# Total code points: 3708
# Total code points: 3647

# ================================================

Expand Down Expand Up @@ -2388,6 +2388,7 @@ FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE>
10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10D28..10D2F ; AL # Cn [8] <reserved-10D28>..<reserved-10D2F>
10D3A..10D3F ; AL # Cn [6] <reserved-10D3A>..<reserved-10D3F>
10EC0..10EFC ; AL # Cn [61] <reserved-10EC0>..<reserved-10EFC>
10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
Expand Down Expand Up @@ -2472,7 +2473,7 @@ FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE>
1EEBC..1EEEF ; AL # Cn [52] <reserved-1EEBC>..<reserved-1EEEF>
1EEF2..1EEFF ; AL # Cn [14] <reserved-1EEF2>..<reserved-1EEFF>

# Total code points: 1708
# Total code points: 1769

# ================================================

Expand Down
5 changes: 5 additions & 0 deletions unicodetools/src/main/java/org/unicode/text/UCD/UCD.java
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,11 @@ public byte getBidiClass(int codePoint) {
// New block 0870..089F "Arabic Extended-B" defaults to bc=AL.
blockData.keySet("Arabic_Extended_B", BIDI_AL_SET);
}
if (versionInfo.getMajor() >= 15) {
// Unicode 15:
// New block 10EC0..10EFF "Arabic Extended-C" defaults to bc=AL.
blockData.keySet("Arabic_Extended_C", BIDI_AL_SET);
}
BIDI_R_Delta.removeAll(BIDI_R_SET).removeAll(BIDI_AL_SET);
if (SHOW_LOADING) {
System.out.println("R: Adding " + BIDI_R_Delta);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,14 @@ Property: Bidi_Class
#
# The unassigned code points that default to AL are in the ranges:
# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF
# \U00010D00-\U00010D3F \U00010F30-\U00010F6F
# \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F
# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
#
# This includes code points in the Arabic, Syriac, and Thaana blocks, among others.
#
# The unassigned code points that default to R are in the ranges:
# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F
# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF
# \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF
# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
#
# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ Let $BMExclusions =[≠ ∤ ∦ ≢ \u2ADC]
In [\p{dt=canonical}-$BMExclusions] Bidi_M * \P{bc=NSM} * dm = Bidi_M * \P{bc=NSM}

# Additional BIDI invariant constants
Let $AL_blocks = [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U00010D00-\U00010D3F \U00010F30-\U00010F6F \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
Let $R_blocks = [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
Let $AL_blocks = [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
Let $R_blocks = [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
# 6.1.0 updated blocks
# 10.0 updated blocks (Syriac Supplement is bc=AL)
# 11.0 updated blocks (Hanifi Rohingya, Sogdian, Indic Siyaq Numbers are bc=AL); Old Sogdian is bc=R
Expand Down

0 comments on commit 4365206

Please sign in to comment.