Skip to content

Commit

Permalink
Upstream Unicode table generator and update tables to v15
Browse files Browse the repository at this point in the history
  • Loading branch information
rikkimax committed Dec 11, 2022
1 parent 08be610 commit 122df92
Show file tree
Hide file tree
Showing 10 changed files with 3,784 additions and 19,407 deletions.
2 changes: 2 additions & 0 deletions .dscanner.ini
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,5 @@ trust_too_much="-std.regex,-std.stdio,-std.uni,-std.internal.cstring"
; Checks for if statements whose 'then' block is the same as the 'else' block
; Temporarily disable until https://github.com/dlang-community/D-Scanner/issues/593 is fixed
if_else_same_check="-std.typecons"
; Disable checks for generated unicode tables
long_line_check="-std.internal.unicode_decomp,-std.internal.unicode_comp,-std.internal.unicode_grapheme,-std.internal.unicode_norm,-std.internal.unicode_tables"
1 change: 1 addition & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
circleci.sh @CyberShadow @MartinNowak @wilzbach
etc/c/* @CyberShadow
posix.mak @CyberShadow @MartinNowak @wilzbach
# tools/unicode_table_generator.d
std/* @andralex
std/algorithm/* @andralex @JackStouffer @wilzbach @PetarKirov
std/array.d @JackStouffer @wilzbach @PetarKirov
Expand Down
5 changes: 5 additions & 0 deletions changelog/unicode_table_generator.dd
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Unicode table generator is now in Phobos, tables are updated to version 15.

It is likely that this change will result in breakage in code and program usage.
This is due to a number of factors, the tables being updated so significantly and the table generator not having all its changes commited throughout the years.

2,997 changes: 25 additions & 2,972 deletions std/internal/unicode_comp.d

Large diffs are not rendered by default.

5,314 changes: 25 additions & 5,289 deletions std/internal/unicode_decomp.d

Large diffs are not rendered by default.

302 changes: 21 additions & 281 deletions std/internal/unicode_grapheme.d

Large diffs are not rendered by default.

557 changes: 21 additions & 536 deletions std/internal/unicode_norm.d

Large diffs are not rendered by default.

12,678 changes: 2,363 additions & 10,315 deletions std/internal/unicode_tables.d

Large diffs are not rendered by default.

39 changes: 25 additions & 14 deletions std/uni/package.d
Original file line number Diff line number Diff line change
Expand Up @@ -1528,7 +1528,7 @@ if (is(Unqual!T == T))
return SliceOverIndexed!T(a, b, x);
}

@safe unittest
@system unittest
{
int[] idxArray = [2, 3, 5, 8, 13];
auto sliced = sliceOverIndexed(0, idxArray.length, &idxArray);
Expand Down Expand Up @@ -2472,19 +2472,19 @@ public:
import std.format : format;
import std.uni : unicode;

assert(unicode.Cyrillic.to!string ==
"[1024..1157) [1159..1320) [7467..7468) [7544..7545) [11744..11776) [42560..42648) [42655..42656)");
// This was originally using Cyrillic script.
// Unfortunately this is a pretty active range for changes,
// and hence broke in an update.
// Therefore the range Basic latin was used instead as it
// unlikely to ever change.

// The specs '%s' and '%d' are equivalent to the to!string call above.
assert(format("%d", unicode.Cyrillic) == unicode.Cyrillic.to!string);
assert(unicode.InBasic_latin.to!string == "[0..128)");

assert(format("%#x", unicode.Cyrillic) ==
"[0x400..0x485) [0x487..0x528) [0x1d2b..0x1d2c) [0x1d78..0x1d79) [0x2de0..0x2e00) "
~"[0xa640..0xa698) [0xa69f..0xa6a0)");
// The specs '%s' and '%d' are equivalent to the to!string call above.
assert(format("%d", unicode.InBasic_latin) == unicode.InBasic_latin.to!string);

assert(format("%#X", unicode.Cyrillic) ==
"[0X400..0X485) [0X487..0X528) [0X1D2B..0X1D2C) [0X1D78..0X1D79) [0X2DE0..0X2E00) "
~"[0XA640..0XA698) [0XA69F..0XA6A0)");
assert(format("%#x", unicode.InBasic_latin) == "[0..0x80)");
assert(format("%#X", unicode.InBasic_latin) == "[0..0X80)");
}

pure @safe unittest
Expand Down Expand Up @@ -4872,6 +4872,7 @@ template Utf8Matcher()
enum mode = Mode.neverSkip;
assert(!inp.empty);
auto ch = inp[0];

static if (hasASCII)
{
if (ch < 0x80)
Expand Down Expand Up @@ -4970,6 +4971,7 @@ template Utf8Matcher()
else
{
static assert(mode == Mode.skipOnMatch);

if (tab!size[needle])
{
inp.popFrontN(size);
Expand Down Expand Up @@ -5312,23 +5314,31 @@ pure @safe unittest
auto utf8 = utf8Matcher(unicode.Letter);
auto asc = utf8.subMatcher!(1);
auto uni = utf8.subMatcher!(2,3,4);

// h
assert(asc.test(codec));
assert(!uni.match(codec));
assert(utf8.skip(codec));
assert(codec.idx == 1);

assert(!uni.match(codec));
// i
assert(asc.test(codec));
assert(!uni.match(codec));
assert(utf8.skip(codec));
assert(codec.idx == 2);
assert(!asc.match(codec));

// !
assert(!asc.match(codec));
assert(!utf8.test(codec));
assert(!utf8.skip(codec));
assert(codec.idx == 3);

// space
assert(!asc.test(codec));
assert(!utf8.test(codec));
assert(!utf8.skip(codec));
assert(codec.idx == 4);

assert(utf8.test(codec));
foreach (i; 0 .. 7)
{
Expand All @@ -5338,6 +5348,7 @@ pure @safe unittest
}
assert(!utf8.test(codec));
assert(!utf8.skip(codec));

//the same with match where applicable
codec = rs.decoder;
assert(utf8.match(codec));
Expand All @@ -5360,7 +5371,7 @@ pure @safe unittest
assert(codec.idx == i);
}

pure @safe unittest
pure @system unittest
{
import std.range : stride;
static bool testAll(Matcher, Range)(ref Matcher m, ref Range r) @safe
Expand Down
Loading

0 comments on commit 122df92

Please sign in to comment.