diff --git a/mak/COPY b/mak/COPY index 1a8342116b..096b18c29f 100644 --- a/mak/COPY +++ b/mak/COPY @@ -21,6 +21,9 @@ COPY=\ $(IMPDIR)\core\time.d \ $(IMPDIR)\core\vararg.d \ \ + $(IMPDIR)\core\experimental\memory\memcpy.d \ + $(IMPDIR)\core\experimental\memory\simd.d \ + \ $(IMPDIR)\core\internal\abort.d \ $(IMPDIR)\core\internal\arrayop.d \ $(IMPDIR)\core\internal\attributes.d \ diff --git a/mak/DOCS b/mak/DOCS index 2fb5f72953..fa266401f6 100644 --- a/mak/DOCS +++ b/mak/DOCS @@ -16,6 +16,9 @@ DOCS=\ $(DOCDIR)\core_time.html \ $(DOCDIR)\core_vararg.html \ \ + $(DOCDIR)\core_experimental_memory_memcpy.html \ + $(DOCDIR)\core_experimental_memory_simd.html \ + \ $(DOCDIR)\core_gc_config.html \ $(DOCDIR)\core_gc_gcinterface.html \ $(DOCDIR)\core_gc_registry.html \ diff --git a/mak/SRCS b/mak/SRCS index cc0e925154..348109da03 100644 --- a/mak/SRCS +++ b/mak/SRCS @@ -17,6 +17,9 @@ SRCS=\ src\core\time.d \ src\core\vararg.d \ \ + src\core\experimental\memory\memcpy.d \ + src\core\experimental\memory\simd.d \ + \ src\core\gc\config.d \ src\core\gc\gcinterface.d \ src\core\gc\registry.d \ diff --git a/mak/WINDOWS b/mak/WINDOWS index 4cd551976a..7f3109a4a7 100644 --- a/mak/WINDOWS +++ b/mak/WINDOWS @@ -117,6 +117,12 @@ $(IMPDIR)\core\gc\gcinterface.d : src\core\gc\gcinterface.d $(IMPDIR)\core\gc\registry.d : src\core\gc\registry.d copy $** $@ +$(IMPDIR)\core\experimental\memory\memcpy.d : src\core\experimental\memory\memcpy.d + copy $** $@ + +$(IMPDIR)\core\experimental\memory\simd.d : src\core\experimental\memory\simd.d + copy $** $@ + $(IMPDIR)\core\internal\abort.d : src\core\internal\abort.d copy $** $@ diff --git a/posix.mak b/posix.mak index 3b3e4e72bd..7cf2639087 100644 --- a/posix.mak +++ b/posix.mak @@ -153,6 +153,9 @@ $(DOCDIR)/core_%.html : src/core/%.d $(DMD) $(DOCDIR)/core_experimental_%.html : src/core/experimental/%.d $(DMD) $(DMD) $(DDOCFLAGS) -Df$@ project.ddoc $(DOCFMT) $< +$(DOCDIR)/core_experimental_memory_%.html : src/core/experimental/memory/%.d $(DMD) + $(DMD) $(DDOCFLAGS) -Df$@ project.ddoc $(DOCFMT) $< + $(DOCDIR)/core_gc_%.html : src/core/gc/%.d $(DMD) $(DMD) $(DDOCFLAGS) -Df$@ project.ddoc $(DOCFMT) $< diff --git a/src/core/experimental/memory/memcpy.d b/src/core/experimental/memory/memcpy.d new file mode 100644 index 0000000000..e6450f7837 --- /dev/null +++ b/src/core/experimental/memory/memcpy.d @@ -0,0 +1,386 @@ +/** + * Pure D replacement of the C Standard Library memcpy(). + * There is an idiomatic-D interface, memcpy(), which is split into 3 overloads. + * One taking static types, one dynamic arrays and one static arrays. + * Also, there is available a C-like interface, the Dmemcpy() (which is named Dmemcpy + * for disambiguation with the C memcpy() a _similar_ interface) that + * is the classic (void*, void*, size_t) interface. + * N.B.: Both the memcpy() here and Dmemcpy() return nothing, contrary to the C Standard + * Library version. + * Source: $(DRUNTIMESRC core/experimental/memory/memcpy.d) + */ +module core.experimental.memory.memcpy; + +import core.internal.traits : isArray; + +/** + * Handle Static Types + * N.B.: No need for more sophisticated code for static types. The compiler + * knows better how to handle them in every target case. + * + * Params: + * dst = Reference to memory destination to copy bytes to. + * src = Reference to memory source to copy bytes from. + */ +pragma(inline, true) +void memcpy(T)(ref T dst, ref const T src) nothrow @nogc +if (!isArray!T) +{ + dst = src; +} + +/** + * Handle Dynamic Types + * + * Params: + * dst = Reference to destination dynamic array to copy bytes to. + * src = Reference to source dynamic array to copy bytes from. + */ +void memcpy(T)(ref T[] dst, ref const T[] src) nothrow @nogc +{ + assert(dst.length == src.length); + void* d = cast(void*) dst.ptr; + const(void)* s = cast(const(void)*) src.ptr; + size_t n = dst.length * typeof(dst[0]).sizeof; + // Assume that there is no overlap. + pragma(inline, true); + Dmemcpy(d, s, n); +} + +/// Ditto +void memcpy(T, size_t len)(ref T[len] dst, ref const T[len] src) nothrow @nogc +{ + T[] d = dst[0 .. $]; + const T[] s = src[0 .. $]; + memcpy(d, s); +} + +/** Tests + */ + +/// Basic features tests +nothrow @nogc unittest +{ + real a = 1.2; + real b; + memcpy(b, a); + assert(b == 1.2); +} +/// Ditto +nothrow @nogc unittest +{ + const float[3] a = [1.2, 3.4, 5.8]; + float[3] b; + memcpy(b, a); + assert(b[0] == 1.2f); + assert(b[1] == 3.4f); + assert(b[2] == 5.8f); +} + +/// More sophisticated test suite +nothrow @nogc unittest +{ + /* Handy struct + */ + struct S(size_t Size) + { + ubyte[Size] x; + } + + pragma(inline, false) + void initStatic(T)(T *v) nothrow @nogc + { + auto m = (cast(ubyte*) v)[0 .. T.sizeof]; + foreach (i; 0..m.length) + { + m[i] = cast(ubyte) i; + } + } + + pragma(inline, false) + void verifyStaticType(T)(const T *a, const T *b) nothrow @nogc + { + const ubyte* aa = cast(const ubyte*) a; + const ubyte* bb = cast(const ubyte*) b; + // Note: `real` is an exceptional case, + // in that it behaves differently across compilers + // because it's not a power of 2 (its size is 10 for x86) + // and thus padding is added (to reach 16). But, the padding bytes + // are not considered (by the compiler) in a move (for instance). + // So, Dmemcpy, for static types, is *dst = *src. And the compiler + // might output `fld` followed by `fstp` instruction. Those intructions + // operate on extended floating point values (whose size is 10). And so, + // the padding bytes are not copied to dest. + static if (is(T == real)) + { + enum n = 10; + } + else + { + enum n = T.sizeof; + } + foreach (i; 0..n) + { + assert(aa[i] == bb[i]); + } + } + + pragma(inline, false) + void testStaticType(T)() nothrow @nogc + { + T d, s; + initStatic!(T)(&d); + initStatic!(T)(&s); + memcpy(d, s); + verifyStaticType(&d, &s); + } + + pragma(inline, false) + void init(T)(ref T[] v) nothrow @nogc + { + foreach (i; 0..v.length) + { + v[i] = cast(ubyte) i; + } + } + + pragma(inline, false) + void verifyArray(size_t j, const ref ubyte[] a, const ref ubyte[80000] b) nothrow @nogc + { + foreach (i; 0..a.length) + { + assert(a[i] == b[i]); + } + } + + pragma(inline, false) + void testDynamicArray(size_t n)() nothrow @nogc + { + ubyte[80000] buf1; + ubyte[80000] buf2; + enum alignments = 32; + foreach (i; 0..alignments) + { + ubyte[] p = buf1[i..i+n]; + ubyte[] q; + // Relatively aligned + q = buf2[0..n]; + // Use a copy for the cases of overlap. + ubyte[80000] copy; + init(q); + init(p); + foreach (k; 0..p.length) + { + copy[k] = p[k]; + } + memcpy(q, p); + verifyArray(i, q, copy); + } + } + void tests() nothrow @nogc + { + testStaticType!(byte); + testStaticType!(ubyte); + testStaticType!(short); + testStaticType!(ushort); + testStaticType!(int); + testStaticType!(uint); + testStaticType!(long); + testStaticType!(ulong); + testStaticType!(float); + testStaticType!(double); + testStaticType!(real); + static foreach (i; 1..100) + { + testStaticType!(S!i); + testDynamicArray!(i)(); + } + testStaticType!(S!3452); + testDynamicArray!(3452)(); + testStaticType!(S!6598); + testDynamicArray!(6598); + testStaticType!(S!14928); + testDynamicArray!(14928); + testStaticType!(S!27891); + testDynamicArray!(27891); + testStaticType!(S!44032); + testStaticType!(S!55897); + testStaticType!(S!79394); + testStaticType!(S!256); + testStaticType!(S!512); + testStaticType!(S!1024); + testStaticType!(S!2048); + testStaticType!(S!4096); + testStaticType!(S!8192); + testStaticType!(S!16384); + testStaticType!(S!32768); + testStaticType!(S!65536); + } + + tests(); +} + +import core.experimental.memory.simd : useSIMD; + +/* + * Dynamic implementation + * N.B.: All Dmemcpy functions require _no_ overlap. + */ +static if (useSIMD) +{ + +import core.experimental.memory.simd : load16fSSE, store16fSSE, lstore128fpSSE, + lstore128fSSE, lstore64fSSE, lstore32fSSE; +import core.simd : float4; + +/** + * Handle Dynamic Types + * N.B.: While Dmemcpy's interface is C-like, it returns void. + * + * Params: + * d = Pointer to memory destination to copy bytes to. + * s = Pointer to memory source to copy bytes from. + * n = Number of bytes to copy. + */ +void Dmemcpy(void* d, const(void)* s, size_t n) nothrow @nogc +{ + if (n <= 128) + { + Dmemcpy_small(d, s, n); + } + else + { + Dmemcpy_large(d, s, n); + } +} + +/* Handle dynamic sizes <= 128. `d` and `s` must not overlap. + */ +private void Dmemcpy_small(void* d, const(void)* s, size_t n) nothrow @nogc +{ + if (n < 16) { + if (n & 0x01) + { + *(cast(ubyte*) d) = *(cast(const ubyte*) s); + ++d; + ++s; + } + if (n & 0x02) + { + *(cast(ushort*) d) = *(cast(const ushort*) s); + d += 2; + s += 2; + } + if (n & 0x04) + { + *(cast(uint*) d) = *(cast(const uint*) s); + d += 4; + s += 4; + } + if (n & 0x08) + { + *(cast(ulong*) d) = *(cast(const ulong*) s); + } + return; + } + if (n <= 32) + { + float4 xmm0 = load16fSSE(s); + float4 xmm1 = load16fSSE(s-16+n); + store16fSSE(d, xmm0); + store16fSSE(d-16+n, xmm1); + return; + } + if (n <= 64) + { + float4 xmm0 = load16fSSE(s); + float4 xmm1 = load16fSSE(s+16); + float4 xmm2 = load16fSSE(s-32+n); + float4 xmm3 = load16fSSE(s-32+n+16); + store16fSSE(d, xmm0); + store16fSSE(d+16, xmm1); + store16fSSE(d-32+n, xmm2); + store16fSSE(d-32+n+16, xmm3); + return; + } + import core.simd : void16; + lstore64fSSE(d, s); + // Requires _no_ overlap. + n -= 64; + s = s + n; + d = d + n; + lstore64fSSE(d, s); +} + +/* Handle dynamic sizes > 128. `d` and `s` must not overlap. + */ +private void Dmemcpy_large(void* d, const(void)* s, size_t n) nothrow @nogc +{ + // NOTE: Alternative - Reach 64-byte + // (cache-line) alignment and use rep movsb + // Good for bigger sizes and only for Intel. + + // Align destination (write) to 32-byte boundary + // Note: We're using SSE, which needs 16-byte alignment. + // But actually, 32-byte alignment was quite faster (probably because + // the loads / stores are faster and there's the bottleneck). + uint rem = cast(ulong) d & 15; + if (rem) + { + store16fSSE(d, load16fSSE(s)); + s += 16 - rem; + d += 16 - rem; + n -= 16 - rem; + } + + static string loop(string prefetchChoice)() + { + return + " + while (n >= 128) + { + // Aligned stores / writes + " ~ prefetchChoice ~ "(d, s); + d += 128; + s += 128; + n -= 128; + } + "; + } + + if (n >= 20000) + { + mixin(loop!("lstore128fpSSE")()); + } + else + { + mixin(loop!("lstore128fSSE")()); + } + + // We already have checked that the initial size is >= 128 + // to be here. So, we won't overwrite previous data. + if (n != 0) + { + lstore128fSSE(d - 128 + n, s - 128 + n); + } +} + +} +else +{ + +/* Non-SIMD version + */ +void Dmemcpy(void* d, const(void)* s, size_t n) nothrow @nogc +{ + ubyte* dst = cast(ubyte*) d; + const(ubyte)* src = cast(const(ubyte)*) s; + foreach (i; 0..n) + { + *dst = *src; + dst++; + src++; + } +} + +} diff --git a/src/core/experimental/memory/simd.d b/src/core/experimental/memory/simd.d new file mode 100644 index 0000000000..3501fd0af1 --- /dev/null +++ b/src/core/experimental/memory/simd.d @@ -0,0 +1,165 @@ +/** + * An currently small experimental SIMD library for D. It is cross-compiler (DMD, LDC, GDC) + * and cross-platform (For GDC, it is only i386 and x86_64 specific). + * Source: $(DRUNTIMESRC core/experimental/memory/simd.d) + */ +module core.experimental.memory.simd; + +/* Provide enum to the user to know + * if they can use SIMD + */ +version (D_SIMD) +{ + import core.simd : float4; + enum useSIMD = true; +} +else version (LDC) +{ + // LDC always supports SIMD (but doesn't ever set D_SIMD) and + // the back-end uses the most appropriate size for every target. + import core.simd : float4; + enum useSIMD = true; +} +else version (GNU) +{ + import core.simd : float4; + // GNU does not support SIMD by default. + version (X86_64) + { + private enum isX86 = true; + } + else version (X86) + { + private enum isX86 = true; + } + + static if (isX86 && __traits(compiles, float4)) + { + enum useSIMD = true; + } + else + { + enum useSIMD = false; + } +} +else +{ + enum useSIMD = false; +} + +static if (useSIMD) +{ + version (LDC) + { + import ldc.simd : loadUnaligned, storeUnaligned; + } + else version (DigitalMars) + { + import core.simd : void16, loadUnaligned, storeUnaligned; + } + else version (GNU) + { + import gcc.builtins : __builtin_ia32_storeups, __builtin_ia32_loadups; + } + + void store16fSSE(void* dest, float4 reg) nothrow @nogc + { + version (LDC) + { + storeUnaligned!float4(reg, cast(float*)dest); + } + else version (DigitalMars) + { + storeUnaligned(cast(void16*)dest, reg); + } + else version (GNU) + { + __builtin_ia32_storeups(cast(float*) dest, reg); + } + } + float4 load16fSSE(const(void)* src) nothrow @nogc + { + version (LDC) + { + return loadUnaligned!(float4)(cast(const(float)*) src); + } + else version (DigitalMars) + { + return loadUnaligned(cast(void16*) src); + } else version (GNU) + { + return __builtin_ia32_loadups(cast(float*) src); + } + } + + private void prefetchForward(void* s) nothrow @nogc + { + enum writeFetch = 0; + enum locality = 3; // -> t0 + version (DigitalMars) + { + import core.simd : prefetch; + prefetch!(writeFetch, locality)(s+0x1a0); + prefetch!(writeFetch, locality)(s+0x280); + } + else version (LDC) + { + import ldc.intrinsics : llvm_prefetch; + enum dataCache = 1; + llvm_prefetch(s+0x1a0, writeFetch, locality, dataCache); + llvm_prefetch(s+0x280, writeFetch, locality, dataCache); + } + else version (GNU) + { + import gcc.builtins : __builtin_prefetch; + __builtin_prefetch(s+0x1a0, writeFetch, locality); + __builtin_prefetch(s+0x280, writeFetch, locality); + } + + } + void lstore128fpSSE(void* d, const(void)* s) nothrow @nogc + { + prefetchForward(cast(void*) s); + lstore128fSSE(d, s); + } + void lstore128fSSE(void* d, const(void)* s) nothrow @nogc + { + float4 xmm0 = load16fSSE(cast(const float*)s); + float4 xmm1 = load16fSSE(cast(const float*)(s+16)); + float4 xmm2 = load16fSSE(cast(const float*)(s+32)); + float4 xmm3 = load16fSSE(cast(const float*)(s+48)); + float4 xmm4 = load16fSSE(cast(const float*)(s+64)); + float4 xmm5 = load16fSSE(cast(const float*)(s+80)); + float4 xmm6 = load16fSSE(cast(const float*)(s+96)); + float4 xmm7 = load16fSSE(cast(const float*)(s+112)); + // + store16fSSE(cast(float*)d, xmm0); + store16fSSE(cast(float*)(d+16), xmm1); + store16fSSE(cast(float*)(d+32), xmm2); + store16fSSE(cast(float*)(d+48), xmm3); + store16fSSE(cast(float*)(d+64), xmm4); + store16fSSE(cast(float*)(d+80), xmm5); + store16fSSE(cast(float*)(d+96), xmm6); + store16fSSE(cast(float*)(d+112), xmm7); + } + void lstore64fSSE(void* d, const(void)* s) nothrow @nogc + { + float4 xmm0 = load16fSSE(cast(const float*)s); + float4 xmm1 = load16fSSE(cast(const float*)(s+16)); + float4 xmm2 = load16fSSE(cast(const float*)(s+32)); + float4 xmm3 = load16fSSE(cast(const float*)(s+48)); + // + store16fSSE(cast(float*)d, xmm0); + store16fSSE(cast(float*)(d+16), xmm1); + store16fSSE(cast(float*)(d+32), xmm2); + store16fSSE(cast(float*)(d+48), xmm3); + } + void lstore32fSSE(void* d, const(void)* s) nothrow @nogc + { + float4 xmm0 = load16fSSE(cast(const float*)s); + float4 xmm1 = load16fSSE(cast(const float*)(s+16)); + // + store16fSSE(cast(float*)d, xmm0); + store16fSSE(cast(float*)(d+16), xmm1); + } +} diff --git a/src/core/internal/traits.d b/src/core/internal/traits.d index bccf1ad356..9a777e2f0a 100644 --- a/src/core/internal/traits.d +++ b/src/core/internal/traits.d @@ -567,3 +567,117 @@ if (func.length == 1 /*&& isCallable!func*/) static assert(P_dglit.length == 1); static assert(is(P_dglit[0] == int)); } + +// [For internal use] +package template ModifyTypePreservingTQ(alias Modifier, T) +{ + static if (is(T U == immutable U)) alias ModifyTypePreservingTQ = immutable Modifier!U; + else static if (is(T U == shared inout const U)) alias ModifyTypePreservingTQ = shared inout const Modifier!U; + else static if (is(T U == shared inout U)) alias ModifyTypePreservingTQ = shared inout Modifier!U; + else static if (is(T U == shared const U)) alias ModifyTypePreservingTQ = shared const Modifier!U; + else static if (is(T U == shared U)) alias ModifyTypePreservingTQ = shared Modifier!U; + else static if (is(T U == inout const U)) alias ModifyTypePreservingTQ = inout const Modifier!U; + else static if (is(T U == inout U)) alias ModifyTypePreservingTQ = inout Modifier!U; + else static if (is(T U == const U)) alias ModifyTypePreservingTQ = const Modifier!U; + else alias ModifyTypePreservingTQ = Modifier!T; +} + +@safe unittest +{ + alias Intify(T) = int; + static assert(is(ModifyTypePreservingTQ!(Intify, real) == int)); + static assert(is(ModifyTypePreservingTQ!(Intify, const real) == const int)); + static assert(is(ModifyTypePreservingTQ!(Intify, inout real) == inout int)); + static assert(is(ModifyTypePreservingTQ!(Intify, inout const real) == inout const int)); + static assert(is(ModifyTypePreservingTQ!(Intify, shared real) == shared int)); + static assert(is(ModifyTypePreservingTQ!(Intify, shared const real) == shared const int)); + static assert(is(ModifyTypePreservingTQ!(Intify, shared inout real) == shared inout int)); + static assert(is(ModifyTypePreservingTQ!(Intify, shared inout const real) == shared inout const int)); + static assert(is(ModifyTypePreservingTQ!(Intify, immutable real) == immutable int)); +} + +/** + * Strips off all `enum`s from type `T`. + */ +template OriginalType(T) +{ + template Impl(T) + { + static if (is(T U == enum)) alias Impl = OriginalType!U; + else alias Impl = T; + } + + alias OriginalType = ModifyTypePreservingTQ!(Impl, T); +} + +/// +@safe unittest +{ + enum E : real { a = 0 } // NOTE: explicit initialization to 0 required during Enum init deprecation cycle + enum F : E { a = E.a } + alias G = const(F); + static assert(is(OriginalType!E == real)); + static assert(is(OriginalType!F == real)); + static assert(is(OriginalType!G == const real)); +} + +/** + * Detect whether type `T` is an aggregate type. + */ +enum bool isAggregateType(T) = is(T == struct) || is(T == union) || + is(T == class) || is(T == interface); + +private template AliasThisTypeOf(T) +if (isAggregateType!T) +{ + alias members = __traits(getAliasThis, T); + + static if (members.length == 1) + { + alias AliasThisTypeOf = typeof(__traits(getMember, T.init, members[0])); + } + else + static assert(0, T.stringof~" does not have alias this type"); +} + +/* + */ +template DynamicArrayTypeOf(T) +{ + static if (is(AliasThisTypeOf!T AT) && !is(AT[] == AT)) + alias X = DynamicArrayTypeOf!AT; + else + alias X = OriginalType!T; + + static if (is(Unqual!X : E[], E) && !is(typeof({ enum n = X.length; }))) + { + alias DynamicArrayTypeOf = X; + } + else + static assert(0, T.stringof~" is not a dynamic array"); +} + +// TODO(stefanos): More unit-testing. + +@safe unittest +{ + static assert(!is(DynamicArrayTypeOf!(int[3]))); + static assert(!is(DynamicArrayTypeOf!(void[3]))); + static assert(!is(DynamicArrayTypeOf!(typeof(null)))); +} + +/** + * Detect whether type `T` is a dynamic array. + */ +enum bool isDynamicArray(T) = is(DynamicArrayTypeOf!T) && !isAggregateType!T; + +/** + * Detect whether type `T` is a static array. + */ +enum bool isStaticArray(T) = __traits(isStaticArray, T); + +/** + * Detect whether type `T` is an array (static or dynamic; for associative + * arrays see $(LREF isAssociativeArray)). + */ +enum bool isArray(T) = isStaticArray!T || isDynamicArray!T;