Skip to content

Commit

Permalink
FIX: LF to CRLF conversion when using write with string input
Browse files Browse the repository at this point in the history
  • Loading branch information
Oldes committed Jan 26, 2024
1 parent da19058 commit 592454e
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 29 deletions.
3 changes: 2 additions & 1 deletion src/core/p-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ REBINT Mode_Syms[] = {
}
else if (IS_STRING(data)) {
// Auto convert string to UTF-8
ser = Encode_UTF8_Value(data, len, ENCF_OS_CRLF);
// Using LF to CRLF conversion on Windows if not used /binary refinement!
ser = Encode_UTF8_Value(data, len, (args & AM_WRITE_BINARY) ? 0 : ENCF_OS_CRLF);
file->data = ser? BIN_HEAD(ser) : VAL_BIN_DATA(data); // No encoding may be needed
len = SERIES_TAIL(ser);
}
Expand Down
105 changes: 77 additions & 28 deletions src/core/s-unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -800,47 +800,96 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
REBINT n;
REBYTE buf[8];
REBYTE *bs = dst; // save start
REBYTE *bp = (REBYTE*)src;
REBUNI *up = (REBUNI*)src;
REBYTE *bp;
REBUNI *up;
REBLEN cnt;

if (len) cnt = *len;
else {
if (uni) {
if (uni) {
up = (REBUNI*)src;
if (!len) {
// not using wcslen, because on some systems wchar_t has 4 bytes!
cnt = 0;
while (*up++ != 0 && cnt < (REBLEN)max) cnt++;
up = (REBUNI*)src;
} else
cnt = LEN_BYTES(bp);
}

for (; max > 0 && cnt > 0; cnt--) {
c = uni ? *up++ : *bp++;
if (c < 0x80) {
}
for (; max > 0 && cnt > 0; cnt--) {
c = *up++;
if (c < 0x80) {
#if defined(TO_WINDOWS)
if (ccr && c == LF) {
// If there's not room, don't try to output CRLF
if (2 > max) {up--; break;}
*dst++ = CR;
if (ccr) {
if (c == CR && up[0] == LF) {
*dst++ = CR;
*dst++ = LF;
up++;
cnt--;
max -= 2;
continue;
}
if (c == LF) {
// If there's not room, don't try to output CRLF
if (2 > max) { up--; break; }
*dst++ = CR;
max--;
c = LF;
}
}
#endif
*dst++ = (REBYTE)c;
max--;
c = LF;
}
#endif
*dst++ = (REBYTE)c;
max--;
else {
n = Encode_UTF8_Char(buf, c);
if (n > max) { up--; break; }
memcpy(dst, buf, n);
dst += n;
max -= n;
}
}
else {
n = Encode_UTF8_Char(buf, c);
if (n > max) {up--; break;}
memcpy(dst, buf, n);
dst += n;
max -= n;
if (len) *len = dst - bs;
return up - (REBUNI*)src;
}
else {
bp = (REBYTE*)src;
if (!len) cnt = LEN_BYTES(bp);
for (; max > 0 && cnt > 0; cnt--) {
c = *bp++;
if (c < 0x80) {
#if defined(TO_WINDOWS)
if (ccr) {
if (c == CR && bp[0] == LF) {
*dst++ = CR;
*dst++ = LF;
bp++;
cnt--;
max -= 2;
continue;
}
if (c == LF) {
// If there's not room, don't try to output CRLF
if (2 > max) { bp--; break; }
*dst++ = CR;
max--;
c = LF;
}
}
#endif
*dst++ = (REBYTE)c;
max--;
}
else {
n = Encode_UTF8_Char(buf, c);
if (n > max) { bp--; break; }
memcpy(dst, buf, n);
dst += n;
max -= n;
}
}
if (len) *len = dst - bs;
return bp - (REBYTE*)src;
}

if (len) *len = dst - bs;
return uni ? up - (REBUNI*)src : bp - (REBYTE*)src;

}


Expand Down Expand Up @@ -928,7 +977,7 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
else {
REBYTE *bp = (REBYTE*)src;

if (Is_Not_ASCII(bp, len)) {
if (ccr || Is_Not_ASCII(bp, len)) {
size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr);
cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0));
Encode_UTF8(cp, size, bp, &len, FALSE, ccr);
Expand Down
55 changes: 55 additions & 0 deletions src/tests/units/port-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,61 @@ if system/platform = 'Windows [
--assert "a^/b^/c" = read/string %units/files/issue-622.txt
delete %units/files/issue-622.txt

--test-- "read write CRLF conversion"
;@@ https://github.com/Oldes/Rebol-issues/issues/2586
;; In these tests are used #"a" and #"á" to have internally plain and wide strings
;; write/binary keeps the linefeeds without modifications
--assert #{0A} = read write/binary %tmp next "a^/"
--assert #{0A} = read write/binary %tmp next ^/"
--assert #{0D0A} = read write/binary %tmp next "a^M^/"
--assert #{0D0A} = read write/binary %tmp next ^M^/"
--assert #{0D0D0A} = read write/binary %tmp next "a^M^M^/"
--assert #{0D0D0A} = read write/binary %tmp next ^M^M^/"
--assert #{0D0A0A} = read write/binary %tmp next "a^M^/^/"
--assert #{0D0A0A} = read write/binary %tmp next ^M^/^/"
;; it is possible to get the original string using implicit conversion
--assert "a^/" = to string! read write/binary %tmp "a^/"
--assert ^/" = to string! read write/binary %tmp ^/"
--assert "a^M^/" = to string! read write/binary %tmp "a^M^/"
--assert ^M^/" = to string! read write/binary %tmp ^M^/"
--assert "a^M^M^/" = to string! read write/binary %tmp "a^M^M^/"
--assert ^M^M^/" = to string! read write/binary %tmp ^M^M^/"
--assert "a^M^/^/" = to string! read write/binary %tmp "a^M^/^/"
--assert ^M^/^/" = to string! read write/binary %tmp ^M^/^/"
either system/platform = 'Windows [
;; on Windows by default write converts LF to CRLF (if the input is string!)
--assert #{0D0A} = read write %tmp next "a^/"
--assert #{0D0A} = read write %tmp next ^/"
;; when there is already CRLF, if does not write it like CRCRLF!
--assert #{0D0A} = read write %tmp next "a^M^/"
--assert #{0D0A} = read write %tmp next ^M^/"
--assert #{0D0D0A} = read write %tmp next "a^M^M^/"
--assert #{0D0D0A} = read write %tmp next ^M^M^/"
--assert #{0D0A0D0A} = read write %tmp next "a^M^/^/"
--assert #{0D0A0D0A} = read write %tmp next ^M^/^/"
][
;; on all other platforms it does no modifications!
--assert #{0A} = read write %tmp next "a^/"
--assert #{0A} = read write %tmp next ^/"
--assert #{0D0A} = read write %tmp next "a^M^/"
--assert #{0D0A} = read write %tmp next ^M^/"
--assert #{0D0D0A} = read write %tmp next "a^M^M^/"
--assert #{0D0D0A} = read write %tmp next ^M^M^/"
--assert #{0D0A0A} = read write %tmp next "a^M^/^/"
--assert #{0D0A0A} = read write %tmp next ^M^/^/"
]
;; read/string converts CRLF (or plain CR) to LF
--assert "^/" = read/string write/binary %tmp next "a^/"
--assert "^/" = read/string write/binary %tmp next ^/"
--assert "^/" = read/string write/binary %tmp next "a^M"
--assert "^/" = read/string write/binary %tmp next ^M"
--assert "^/" = read/string write/binary %tmp next "a^M^/"
--assert "^/" = read/string write/binary %tmp next ^M^/"
--assert "^/^/" = read/string write/binary %tmp next "a^M^M^/"
--assert "^/^/" = read/string write/binary %tmp next ^M^M^/"
--assert "^/^/" = read/string write/binary %tmp next "a^M^/^/"
--assert "^/^/" = read/string write/binary %tmp next ^M^/^/"


--test-- "write file result - wish/2337"
;@@ https://github.com/Oldes/Rebol-issues/issues/2337
Expand Down

0 comments on commit 592454e

Please sign in to comment.