Skip to content
This repository was archived by the owner on Oct 12, 2022. It is now read-only.

Commit 24dc693

Browse files
committed
Make encode reusabe in Phobos
1 parent 3fafa7d commit 24dc693

File tree

1 file changed

+106
-80
lines changed

1 file changed

+106
-80
lines changed

src/core/internal/utf.d

+106-80
Original file line numberDiff line numberDiff line change
@@ -425,61 +425,98 @@ dchar decode(in dchar[] s, ref size_t idx)
425425
return c; // dummy return
426426
}
427427

428-
429428
/* =================== Encode ======================= */
430429

431-
/*******************************
432-
* Encodes character c and appends it to array s[].
430+
/**
431+
* Encodes `c` into the static array `buf`.
432+
*
433+
* Params:
434+
* buf = destination of encoded character
435+
* c = character to encode
436+
*
437+
* Returns:
438+
* The length of the encoded character (a number between `1` and `4` for
439+
* `char[4]` buffers and a number between `1` and `2` for `wchar[2]` buffers)
440+
* or `0` in case of failure.
433441
*/
434-
@safe pure nothrow
435-
void encode(ref char[] s, dchar c)
436-
in
442+
@nogc nothrow pure @safe
443+
size_t encode(out char[4] buf, dchar c)
444+
in
445+
{
446+
assert(isValidDchar(c));
447+
}
448+
do
449+
{
450+
if (c <= 0x7F)
437451
{
438-
assert(isValidDchar(c));
452+
buf[0] = cast(char) c;
453+
return 1;
439454
}
440-
do
455+
else if (c <= 0x7FF)
441456
{
442-
char[] r = s;
443-
444-
if (c <= 0x7F)
445-
{
446-
r ~= cast(char) c;
447-
}
448-
else
449-
{
450-
char[4] buf;
451-
uint L;
457+
buf[0] = cast(char)(0xC0 | (c >> 6));
458+
buf[1] = cast(char)(0x80 | (c & 0x3F));
459+
return 2;
460+
}
461+
else if (c <= 0xFFFF)
462+
{
463+
buf[0] = cast(char)(0xE0 | (c >> 12));
464+
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
465+
buf[2] = cast(char)(0x80 | (c & 0x3F));
466+
return 3;
467+
}
468+
else if (c <= 0x10FFFF)
469+
{
470+
buf[0] = cast(char)(0xF0 | (c >> 18));
471+
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
472+
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
473+
buf[3] = cast(char)(0x80 | (c & 0x3F));
474+
return 4;
475+
}
476+
return 0;
477+
}
452478

453-
if (c <= 0x7FF)
454-
{
455-
buf[0] = cast(char)(0xC0 | (c >> 6));
456-
buf[1] = cast(char)(0x80 | (c & 0x3F));
457-
L = 2;
458-
}
459-
else if (c <= 0xFFFF)
460-
{
461-
buf[0] = cast(char)(0xE0 | (c >> 12));
462-
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
463-
buf[2] = cast(char)(0x80 | (c & 0x3F));
464-
L = 3;
465-
}
466-
else if (c <= 0x10FFFF)
467-
{
468-
buf[0] = cast(char)(0xF0 | (c >> 18));
469-
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
470-
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
471-
buf[3] = cast(char)(0x80 | (c & 0x3F));
472-
L = 4;
473-
}
474-
else
475-
{
476-
assert(0);
477-
}
478-
r ~= buf[0 .. L];
479-
}
480-
s = r;
479+
/// ditto
480+
@nogc nothrow pure @safe
481+
size_t encode(out wchar[2] buf, dchar c)
482+
in
483+
{
484+
assert(isValidDchar(c));
485+
}
486+
do
487+
{
488+
if (c <= 0xFFFF)
489+
{
490+
buf[0] = cast(wchar) c;
491+
return 1;
481492
}
493+
else if (c <= 0x10FFFF)
494+
{
495+
buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
496+
buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
497+
return 2;
498+
}
499+
return 0;
500+
}
482501

502+
/**
503+
* Encodes character c and appends it to array s[].
504+
*/
505+
nothrow pure @safe
506+
void encode(ref char[] s, dchar c)
507+
in
508+
{
509+
assert(isValidDchar(c));
510+
}
511+
do
512+
{
513+
char[4] buf;
514+
size_t L = encode(buf, c);
515+
assert(L); // If L is 0, then encode has failed
516+
s ~= buf[0 .. L];
517+
}
518+
519+
///
483520
unittest
484521
{
485522
debug(utf) printf("utf.encode.unittest\n");
@@ -499,43 +536,32 @@ unittest
499536
assert(s == "abcda\xC2\xA9\xE2\x89\xA0");
500537
}
501538

502-
/** ditto */
503-
@safe pure nothrow
539+
/// ditto
540+
nothrow pure @safe
504541
void encode(ref wchar[] s, dchar c)
505-
in
506-
{
507-
assert(isValidDchar(c));
508-
}
509-
do
510-
{
511-
wchar[] r = s;
512-
513-
if (c <= 0xFFFF)
514-
{
515-
r ~= cast(wchar) c;
516-
}
517-
else
518-
{
519-
wchar[2] buf;
520-
521-
buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
522-
buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00);
523-
r ~= buf;
524-
}
525-
s = r;
526-
}
542+
in
543+
{
544+
assert(isValidDchar(c));
545+
}
546+
do
547+
{
548+
wchar[2] buf;
549+
size_t L = encode(buf, c);
550+
assert(L);
551+
s ~= buf[0 .. L];
552+
}
527553

528-
/** ditto */
529-
@safe pure nothrow
554+
/// ditto
555+
nothrow pure @safe
530556
void encode(ref dchar[] s, dchar c)
531-
in
532-
{
533-
assert(isValidDchar(c));
534-
}
535-
do
536-
{
537-
s ~= c;
538-
}
557+
in
558+
{
559+
assert(isValidDchar(c));
560+
}
561+
do
562+
{
563+
s ~= c;
564+
}
539565

540566
/**
541567
Returns the code length of $(D c) in the encoding using $(D C) as a

0 commit comments

Comments
 (0)