Skip to content

Commit 6e1b832

Browse files
authored
bpo-30003: Fix handling escape characters in HZ codec (#1720) (#1556)
1 parent 2b67c7a commit 6e1b832

File tree

3 files changed

+21
-13
lines changed

3 files changed

+21
-13
lines changed

Lib/test/test_codecencodings_cn.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
8282
(b'ab~cd', 'replace', u'ab\uFFFDd'),
8383
(b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
8484
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
85+
# issue 30003
86+
(u'ab~cd', 'strict', b'ab~~cd'), # escape ~
87+
(b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
88+
(b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
8589
)
8690

8791
def test_main():

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ Extension Modules
4949
Library
5050
-------
5151

52+
- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
53+
by Ma Lin.
54+
5255
- bpo-30375: Warnings emitted when compile a regular expression now always
5356
point to the line in the user code. Previously they could point into inners
5457
of the re module if emitted from inside of groups or conditionals.

Modules/cjkcodecs/_codecs_cn.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -335,15 +335,17 @@ ENCODER(hz)
335335
DBCHAR code;
336336

337337
if (c < 0x80) {
338-
if (state->i == 0) {
339-
WRITE1((unsigned char)c)
340-
NEXT(1, 1)
341-
}
342-
else {
343-
WRITE3('~', '}', (unsigned char)c)
344-
NEXT(1, 3)
338+
if (state->i) {
339+
WRITE2('~', '}')
340+
NEXT_OUT(2)
345341
state->i = 0;
346342
}
343+
WRITE1((unsigned char)c)
344+
NEXT(1, 1)
345+
if (c == '~') {
346+
WRITE1('~')
347+
NEXT_OUT(1)
348+
}
347349
continue;
348350
}
349351

@@ -390,20 +392,19 @@ DECODER(hz)
390392
unsigned char c2 = IN2;
391393

392394
REQUIRE_INBUF(2)
393-
if (c2 == '~') {
395+
if (c2 == '~' && state->i == 0) {
394396
WRITE1('~')
395-
NEXT(2, 1)
396-
continue;
397+
NEXT_OUT(1)
397398
}
398399
else if (c2 == '{' && state->i == 0)
399400
state->i = 1; /* set GB */
401+
else if (c2 == '\n' && state->i == 0)
402+
; /* line-continuation */
400403
else if (c2 == '}' && state->i == 1)
401404
state->i = 0; /* set ASCII */
402-
else if (c2 == '\n')
403-
; /* line-continuation */
404405
else
405406
return 2;
406-
NEXT(2, 0);
407+
NEXT_IN(2)
407408
continue;
408409
}
409410

0 commit comments

Comments
 (0)