Skip to content

Commit

Permalink
FIX: ICONV from UTF16 with BOM does not skip the BOM on Windows
Browse files Browse the repository at this point in the history
fixes: #19
  • Loading branch information
Oldes committed May 25, 2020
1 parent 2d0ebc1 commit 0bb9d95
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/core/u-iconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -611,10 +611,21 @@ static REBYTE* get_codepage_name(REBVAL *cp)
return R_NONE;
}

REBYTE *bp;

if (cp == 1200 || cp == 1201) { // data are already wide (UTF-16LE or UTF-16BE)
dst_len = VAL_LEN(data) / 2;
dst_len = src_len / 2;
dst_wide = Make_Series(dst_len + 1, 2, FALSE);
memcpy(BIN_HEAD(dst_wide), VAL_BIN_AT(data), VAL_LEN(data));
bp = VAL_BIN_AT(data);
if ( src_len >= 2 && (
(0xFF == bp[0] && 0xFE == bp[1]) ||
(0xFE == bp[0] && 0xFF == bp[1])
)) { // skip BOM
src_len -= 2;
dst_len -= 1;
bp += 2;
}
memcpy(BIN_HEAD(dst_wide), bp, src_len);
dst_wide->tail = dst_len;
TERM_SERIES(dst_wide);
if (ref_to) {
Expand Down
4 changes: 4 additions & 0 deletions src/tests/units/series-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,10 @@ Rebol [
--assert "" = iconv #{} 28592
--assert "" = iconv #{} 'utf8

--test-- "ICONV from UTF-16 with BOM"
--assert "Writer" = iconv #{FEFF005700720069007400650072} 'UTF-16BE
--assert "Writer" = iconv #{FFFE570072006900740065007200} 'UTF-16LE


--test-- "ICONV/TO (conversion to different codepage - binary result)"
bin: to binary! txt ; normaly conversion is done to UTF-8
Expand Down

0 comments on commit 0bb9d95

Please sign in to comment.