Skip to content

Commit 93cced8

Browse files
committed
add std.uni.toCapitalize()
1 parent e5f9e07 commit 93cced8

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed

std/uni.d

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8251,6 +8251,8 @@ private auto toCaser(alias indexFn, uint maxIdx, alias tableFn, Range)(Range str
82518251
auto val = tableFn(idx);
82528252
// unpack length + codepoint
82538253
nLeft = val >> 24;
8254+
if (nLeft == 0)
8255+
nLeft = 1;
82548256
assert(nLeft <= buf.length);
82558257
buf[nLeft - 1] = cast(dchar)(val & 0xFF_FFFF);
82568258
foreach (j; 1 .. nLeft)
@@ -8382,6 +8384,192 @@ unittest
83828384
"HELLo"w.toUpperCase.equal("HELLO"d);
83838385
"HELLo"d.toLowerCase.equal("hello"d);
83848386
"HELLo"d.toUpperCase.equal("HELLO"d);
8387+
8388+
import std.utf : byChar;
8389+
assert(toLower("\u1Fe2") == toLowerCase("\u1Fe2").byChar.array);
8390+
}
8391+
8392+
import std.stdio;
8393+
// generic capitalizer on whole range, returns range
8394+
private auto toCapitalizer(alias indexFnUpper, uint maxIdxUpper, alias tableFnUpper,
8395+
Range)(Range str)
8396+
// Accept range of dchar's
8397+
if (isInputRange!Range &&
8398+
isSomeChar!(ElementEncodingType!Range) &&
8399+
ElementEncodingType!Range.sizeof == dchar.sizeof)
8400+
{
8401+
static struct ToCapitalizerImpl
8402+
{
8403+
@property bool empty()
8404+
{
8405+
return lower ? lwr.empty : !nLeft && r.empty;
8406+
}
8407+
8408+
@property auto front()
8409+
{
8410+
if (lower)
8411+
return lwr.front;
8412+
8413+
if (!nLeft)
8414+
{
8415+
dchar c = r.front;
8416+
const idx = indexFnUpper(c);
8417+
if (idx == ushort.max)
8418+
{
8419+
buf[0] = c;
8420+
nLeft = 1;
8421+
}
8422+
else if (idx < maxIdxUpper)
8423+
{
8424+
buf[0] = tableFnUpper(idx);
8425+
nLeft = 1;
8426+
}
8427+
else
8428+
{
8429+
auto val = tableFnUpper(idx);
8430+
// unpack length + codepoint
8431+
nLeft = val >> 24;
8432+
if (nLeft == 0)
8433+
nLeft = 1;
8434+
assert(nLeft <= buf.length);
8435+
buf[nLeft - 1] = cast(dchar)(val & 0xFF_FFFF);
8436+
foreach (j; 1 .. nLeft)
8437+
buf[nLeft - j - 1] = tableFnUpper(idx + j);
8438+
}
8439+
}
8440+
return buf[nLeft - 1];
8441+
}
8442+
8443+
void popFront()
8444+
{
8445+
if (lower)
8446+
lwr.popFront();
8447+
else
8448+
{
8449+
if (!nLeft)
8450+
front();
8451+
assert(nLeft);
8452+
--nLeft;
8453+
if (!nLeft)
8454+
{
8455+
r.popFront();
8456+
lwr = r.toLowerCase();
8457+
lower = true;
8458+
}
8459+
}
8460+
}
8461+
8462+
static if (isForwardRange!Range)
8463+
{
8464+
@property auto save()
8465+
{
8466+
auto ret = this;
8467+
ret.r = r.save;
8468+
ret.lwr = lwr.save;
8469+
return ret;
8470+
}
8471+
}
8472+
8473+
private:
8474+
Range r;
8475+
typeof(r.toLowerCase) lwr; // range representing the lower case rest of string
8476+
bool lower = false; // false for first character, true for rest of string
8477+
dchar[3] buf = void;
8478+
uint nLeft = 0;
8479+
}
8480+
8481+
return ToCapitalizerImpl(str);
8482+
}
8483+
8484+
/*********************
8485+
* Capitalize input range or string, meaning convert the first
8486+
* character to upper case and subsequent characters to lower case.
8487+
*
8488+
* Does not allocate memory.
8489+
* Characters in UTF-8 or UTF-16 format that cannot be decoded
8490+
* are treated as $(XREF utf, replacementDchar).
8491+
*
8492+
* Params:
8493+
* str = string or range of characters
8494+
*
8495+
* Returns:
8496+
* an InputRange of dchars
8497+
*
8498+
* See_Also:
8499+
* $(LREF toUpper), $(LREF toLower)
8500+
* $(LREF toUpperCase), $(LREF toLowerCase)
8501+
*/
8502+
8503+
auto toCapitalize(Range)(Range str)
8504+
if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range))
8505+
{
8506+
static if (ElementEncodingType!Range.sizeof < dchar.sizeof)
8507+
{
8508+
import std.utf : byDchar;
8509+
8510+
// Decode first
8511+
return toCapitalizer!UpperTriple(str.byDchar);
8512+
}
8513+
else
8514+
{
8515+
return toCapitalizer!UpperTriple(str);
8516+
}
8517+
}
8518+
8519+
///
8520+
@safe pure unittest
8521+
{
8522+
import std.algorithm: equal;
8523+
8524+
assert("hEllo".toCapitalize.equal("Hello"));
8525+
}
8526+
8527+
unittest
8528+
{
8529+
import std.array;
8530+
8531+
auto a = "hELLo".toCapitalize;
8532+
auto savea = a.save;
8533+
auto s = a.array;
8534+
assert(s == "Hello");
8535+
s = savea.array;
8536+
assert(s == "Hello");
8537+
8538+
string[2][] cases =
8539+
[
8540+
["", ""],
8541+
["h", "H"],
8542+
["H", "H"],
8543+
["3", "3"],
8544+
["123", "123"],
8545+
["h123A", "H123a"],
8546+
["феж", "Феж"],
8547+
["\u1Fe2", "\u03a5\u0308\u0300"],
8548+
];
8549+
8550+
foreach (i; 0 .. cases.length)
8551+
{
8552+
import std.utf : byChar;
8553+
8554+
auto r = cases[i][0].toCapitalize.byChar.array;
8555+
auto result = cases[i][1];
8556+
assert(r == result);
8557+
}
8558+
8559+
// Don't call r.front
8560+
for (auto r = "\u1Fe2".toCapitalize; !r.empty; r.popFront())
8561+
{
8562+
}
8563+
8564+
import std.algorithm : equal;
8565+
8566+
"HELLo"w.toCapitalize.equal("Hello"d);
8567+
"hElLO"w.toCapitalize.equal("Hello"d);
8568+
"hello"d.toCapitalize.equal("Hello"d);
8569+
"HELLO"d.toCapitalize.equal("Hello"d);
8570+
8571+
import std.utf : byChar;
8572+
assert(toCapitalize("\u0130").byChar.array == toUpperCase("\u0130").byChar.array);
83858573
}
83868574

83878575
// TODO: helper, I wish std.utf was more flexible (and stright)

0 commit comments

Comments
 (0)