Skip to content

Commit

Permalink
Fallback to UTF-8 when decoding by codepage fails
Browse files Browse the repository at this point in the history
  • Loading branch information
Matmaus committed May 19, 2024
1 parent 82280f1 commit caef2ed
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions LnkParse3/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@ def _chars_to_string(lst):
bin_string = b"".join(lst)
try:
string = bin_string.decode(self.cp)
except UnicodeDecodeError as e:
string = bin_string.decode(self.cp, errors="replace")
msg = f"Error while decoding string `{string}` ({e})"
warnings.warn(msg)
except UnicodeDecodeError:
# Fallback to UTF-8 before giving up.
try:
string = bin_string.decode("utf-8")
except UnicodeDecodeError as e:
string = bin_string.decode(self.cp, errors="replace")
msg = f"Error while decoding string `{string}` ({e})"
warnings.warn(msg)
yield string

for char in binary:
Expand Down

0 comments on commit caef2ed

Please sign in to comment.