Skip to content

Commit

Permalink
Merge pull request #120 from lubomir/unicode-boundary
Browse files Browse the repository at this point in the history
Fix #119
  • Loading branch information
rohanpm authored Jan 10, 2020
2 parents 507cecd + bb830f2 commit 047e9ed
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
30 changes: 29 additions & 1 deletion kobo/shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ def run(self):

output = "" if is_text_mode else b""
sentinel = "" if is_text_mode else b""
leftover = None
exception = None
while True:
if buffer_size == -1:
lines = proc.stdout.readline()
Expand All @@ -353,16 +355,42 @@ def run(self):

if lines == sentinel:
break

if leftover:
lines = leftover + lines
leftover = None

if stdout:
if not is_text_mode:
sys.stdout.write(lines.decode(encoding))
try:
sys.stdout.write(lines.decode(encoding))
except UnicodeDecodeError as exc:
if exc.reason != "unexpected end of data":
# This error was not caused by us. If there is an
# incomplete sequence in the middle of the string,
# we would get "invalid continuation byte".
raise
# We split the chunk in the middle of a multibyte
# sequence. Print text until this character, and save
# the rest for later. It will be prepended to the next
# chunk. If there is no next chunk, we will re-raise
# the error.
exception = exc
leftover = lines[exc.start:]
lines = lines[:exc.start]
sys.stdout.write(lines.decode(encoding))
else:
sys.stdout.write(lines)
if logfile:
log.write(lines)
if return_stdout:
output += lines
proc.wait()
if leftover:
# There is some data left over. That means there was an unfinished
# multibyte sequence not caused by our splitting. Let's raise the
# stored exception to report it.
raise exception

finally:
if logfile:
Expand Down
18 changes: 18 additions & 0 deletions tests/test_shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,24 @@ def test_run_show_cmd_logfile_stdout(self, mock_out):
self.assertEqual(mock_out.getvalue(),
'COMMAND: echo foo\n-----------------\nfoo\n')

def test_run_split_in_middle_of_utf8_sequence(self):
cmd = "printf ' ' && bash -c \"printf 'č%.0s' {1..10000}\""
ret, out = run(cmd, stdout=True)
self.assertEqual(ret, 0)
self.assertEqual(out, b" " + b"\xc4\x8d" * 10000)

def test_run_chunk_ends_with_incomplete_char(self):
cmd = "bash -c \"printf 'a b \\xc4'\""
self.assertRaises(UnicodeDecodeError, run, cmd, stdout=True)

def test_run_chunk_with_incomplete_char_in_middle(self):
cmd = "bash -c \"printf 'a \\xc4 b'\""
self.assertRaises(UnicodeDecodeError, run, cmd, stdout=True)

def test_run_other_unicode_decode_error(self):
cmd = "bash -c \"printf 'a \\x80 b'\""
self.assertRaises(UnicodeDecodeError, run, cmd, stdout=True)

@mock.patch('sys.stdout', new_callable=StringIO)
def test_run_univ_nl_logfile_stdout(self, mock_out):
logfile = os.path.join(self.tmp_dir, 'output.log')
Expand Down

0 comments on commit 047e9ed

Please sign in to comment.