Skip to content

Commit 0839a95

Browse files
committed
Proper support for ZWJ
1 parent 135a6b8 commit 0839a95

File tree

4 files changed

+27
-48
lines changed

4 files changed

+27
-48
lines changed

arabic_reshaper/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '2.0.10'
1+
__version__ = '2.0.11'

arabic_reshaper/arabic_reshaper.py

+10-46
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,7 @@ def reshape(self, text):
169169
support_zwj = self.configuration.getboolean('support_zwj')
170170
positions_harakat = {}
171171

172-
arabic_word_start = -1
173-
zwjs = []
174-
175-
for i, letter in enumerate(text):
172+
for letter in text:
176173
if HARAKAT_RE.match(letter):
177174
if not delete_harakat:
178175
position = len(output) - 1
@@ -181,51 +178,15 @@ def reshape(self, text):
181178
positions_harakat[position].append(letter)
182179
elif letter == TATWEEL and delete_tatweel:
183180
pass
184-
elif letter == ZWJ and support_zwj:
185-
zwjs.append(i)
186-
187-
if arabic_word_start != -1:
188-
# Handle three consecutive ZWJs or more
189-
if (
190-
len(zwjs) > 2 and
191-
zwjs[-2] == i - 1 and
192-
zwjs[-3] == i - 2
193-
):
194-
arabic_word_start = -1
195-
# Handle when previous letter is not ZWJ
196-
elif (
197-
output and
198-
len(zwjs) == 1 or (len(zwjs) > 1 and zwjs[-2] != i - 1)
199-
):
200-
previous_letter = output[-1]
201-
if connects_with_letter_after(previous_letter[LETTER]):
202-
if previous_letter[FORM] == ISOLATED:
203-
output[-1] = (
204-
previous_letter[LETTER],
205-
INITIAL
206-
)
207-
else:
208-
output[-1] = (
209-
previous_letter[LETTER],
210-
MEDIAL
211-
)
181+
elif letter == ZWJ and not support_zwj:
182+
pass
212183
elif letter not in LETTERS:
213-
arabic_word_start = -1
214184
output.append((letter, NOT_SUPPORTED))
215185
elif not output: # first letter
216-
arabic_word_start = i
217186
output.append((letter, ISOLATED))
218187
else:
219-
if arabic_word_start == -1:
220-
arabic_word_start = i
221188
previous_letter = output[-1]
222-
if (
223-
arabic_word_start != i and
224-
zwjs and
225-
connects_with_letter_before(letter)
226-
):
227-
output.append((letter, FINAL))
228-
elif previous_letter[FORM] == NOT_SUPPORTED:
189+
if previous_letter[FORM] == NOT_SUPPORTED:
229190
output.append((letter, ISOLATED))
230191
elif not connects_with_letter_before(letter):
231192
output.append((letter, ISOLATED))
@@ -253,9 +214,12 @@ def reshape(self, text):
253214
)
254215
output.append((letter, FINAL))
255216

256-
# clear ZWJs
257-
if zwjs and letter != ZWJ:
258-
zwjs = []
217+
# Remove ZWJ if it's the second to last item as it won't be useful
218+
if support_zwj and len(output) > 1 and output[-2][LETTER] == ZWJ:
219+
output.pop(len(output) - 2)
220+
221+
if support_zwj and output and output[-1][LETTER] == ZWJ:
222+
output.pop()
259223

260224
if self.configuration.getboolean('support_ligatures'):
261225
# Clean text from Harakat to be able to find ligatures

arabic_reshaper/letters.py

+3
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,9 @@
178178
'\u06D2': ('\uFBAE', '', '', '\uFBAF'),
179179
# ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
180180
'\u06D3': ('\uFBB0', '', '', '\uFBB1'),
181+
182+
# ZWJ
183+
ZWJ: (ZWJ, ZWJ, ZWJ, ZWJ),
181184
}
182185

183186

arabic_reshaper/tests/test_002_reshaping.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,19 @@ def setUp(self):
7070
),
7171
(
7272
letters.ZWJ + BEH + HAMZA,
73-
BEH_ISOLATED + HAMZA_ISOLATED
73+
BEH_FINAL + HAMZA_ISOLATED
74+
),
75+
(
76+
letters.ZWJ + BEH,
77+
BEH_FINAL
78+
),
79+
(
80+
BEH + letters.ZWJ,
81+
BEH_INITIAL
82+
),
83+
(
84+
letters.ZWJ + BEH + letters.ZWJ,
85+
BEH_MEDIAL
7486
),
7587
(
7688
BEH + letters.ZWJ + HAMZA,

0 commit comments

Comments
 (0)