Skip to content

Commit 135a6b8

Browse files
committed
Fix Shadda ligatures
1 parent 75df1ed commit 135a6b8

File tree

3 files changed

+73
-6
lines changed

3 files changed

+73
-6
lines changed

arabic_reshaper/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '2.0.9'
1+
__version__ = '2.0.10'

arabic_reshaper/default-config.ini

+10-1
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,17 @@ ARABIC LIGATURE SEEN WITH MEEM WITH JEEM = no
215215
ARABIC LIGATURE SEEN WITH MEEM WITH MEEM = no
216216
ARABIC LIGATURE SEEN WITH REH = no
217217
ARABIC LIGATURE SEEN WITH YEH = no
218-
ARABIC LIGATURE SHADDA WITH DAMMA = no
218+
ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM = no
219+
ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM = no
220+
ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM = no
221+
ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM = no
222+
ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM = no
223+
ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM = no
224+
ARABIC LIGATURE SHADDA WITH FATHA MEDIAL FORM = no
225+
ARABIC LIGATURE SHADDA WITH DAMMA MEDIAL FORM = no
226+
ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM = no
219227
ARABIC LIGATURE SHADDA WITH FATHA = no
228+
ARABIC LIGATURE SHADDA WITH DAMMA = no
220229
ARABIC LIGATURE SHADDA WITH KASRA = no
221230
ARABIC LIGATURE SHEEN WITH ALEF MAKSURA = no
222231
ARABIC LIGATURE SHEEN WITH HAH = no

arabic_reshaper/ligatures.py

+62-4
Original file line numberDiff line numberDiff line change
@@ -661,15 +661,73 @@
661661
('ARABIC LIGATURE SEEN WITH YEH', (
662662
'\u0633\u064A', ('\uFCFC', '', '', '\uFD18'),
663663
)),
664-
('ARABIC LIGATURE SHADDA WITH DAMMA', (
665-
'\u0640\u064F\u0651', ('', '', '\uFCF3', ''),
664+
665+
# Arabic ligatures with Shadda, the order of characters doesn't matter
666+
('ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM', (
667+
'(?:\u064C\u0651|\u0651\u064C)',
668+
669+
('\uFC5E', '\uFC5E', '\uFC5E', '\uFC5E'),
670+
)),
671+
('ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM', (
672+
'(?:\u064D\u0651|\u0651\u064D)',
673+
674+
('\uFC5F', '\uFC5F', '\uFC5F', '\uFC5F'),
675+
)),
676+
('ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM', (
677+
'(?:\u064E\u0651|\u0651\u064E)',
678+
679+
('\uFC60', '\uFC60', '\uFC60', '\uFC60'),
666680
)),
681+
('ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM', (
682+
'(?:\u064F\u0651|\u0651\u064F)',
683+
684+
('\uFC61', '\uFC61', '\uFC61', '\uFC61'),
685+
)),
686+
('ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM', (
687+
'(?:\u0650\u0651|\u0651\u0650)',
688+
689+
('\uFC62', '\uFC62', '\uFC62', '\uFC62'),
690+
)),
691+
('ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM', (
692+
'(?:\u0670\u0651|\u0651\u0670)',
693+
694+
('\uFC63', '\uFC63', '\uFC63', '\uFC63'),
695+
)),
696+
697+
# There is a special case when they are with Tatweel
698+
('ARABIC LIGATURE SHADDA WITH FATHA MEDIAL FORM', (
699+
'\u0640(?:\u064E\u0651|\u0651\u064E)',
700+
701+
('\uFCF2', '\uFCF2', '\uFCF2', '\uFCF2'),
702+
)),
703+
('ARABIC LIGATURE SHADDA WITH DAMMA MEDIAL FORM', (
704+
'\u0640(?:\u064F\u0651|\u0651\u064F)',
705+
706+
('\uFCF3', '\uFCF3', '\uFCF3', '\uFCF3'),
707+
)),
708+
('ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM', (
709+
'\u0640(?:\u0650\u0651|\u0651\u0650)',
710+
711+
('\uFCF4', '\uFCF4', '\uFCF4', '\uFCF4'),
712+
)),
713+
714+
# Repeated with different keys to be backward compatible
667715
('ARABIC LIGATURE SHADDA WITH FATHA', (
668-
'\u0640\u064E\u0651', ('', '', '\uFCF2', ''),
716+
'\u0640(?:\u064E\u0651|\u0651\u064E)',
717+
718+
('\uFCF2', '\uFCF2', '\uFCF2', '\uFCF2'),
719+
)),
720+
('ARABIC LIGATURE SHADDA WITH DAMMA', (
721+
'\u0640(?:\u064F\u0651|\u0651\u064F)',
722+
723+
('\uFCF3', '\uFCF3', '\uFCF3', '\uFCF3'),
669724
)),
670725
('ARABIC LIGATURE SHADDA WITH KASRA', (
671-
'\u0640\u0650\u0651', ('', '', '\uFCF4', ''),
726+
'\u0640(?:\u0650\u0651|\u0651\u0650)',
727+
728+
('\uFCF4', '\uFCF4', '\uFCF4', '\uFCF4'),
672729
)),
730+
673731
('ARABIC LIGATURE SHEEN WITH ALEF MAKSURA', (
674732
'\u0634\u0649', ('\uFCFD', '', '', '\uFD19'),
675733
)),

0 commit comments

Comments
 (0)