diff --git a/tests/ImageSharp.Benchmarks/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Bulk/Pad3Shuffle4Channel.cs index 1b6663e70e..8728dd6715 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/Pad3Shuffle4Channel.cs @@ -47,37 +47,37 @@ public void Pad3Shuffle4FastFallback() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | // |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:| -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - | -// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | +// | Pad3Shuffle4 | 3. SSE | DOTNET_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - | -// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 3. SSE | DOTNET_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - | -// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 3. SSE | DOTNET_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - | -// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | +// | Pad3Shuffle4 | 3. SSE | DOTNET_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | // 2023-02-21 // ########## @@ -94,34 +94,34 @@ public void Pad3Shuffle4FastFallback() // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | // |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | DOTNET_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - | // | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | DOTNET_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - | // | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | DOTNET_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - | // | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - | +// | Pad3Shuffle4 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | DOTNET_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - | // | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - | // | | | | | | | | | | | | | -// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - | -// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Bulk/Shuffle3Channel.cs index 8b7b89eb36..e4c12900f5 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/Shuffle3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/Shuffle3Channel.cs @@ -43,21 +43,21 @@ public void Shuffle3() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | // |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - | -// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | +// | Shuffle3 | 3. SSE | DOTNET_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - | -// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | +// | Shuffle3 | 3. SSE | DOTNET_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - | -// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | +// | Shuffle3 | 3. SSE | DOTNET_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | -// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | +// | Shuffle3 | 3. SSE | DOTNET_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | // 2023-02-21 // ########## @@ -74,18 +74,18 @@ public void Shuffle3() // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | // |--------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - | -// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | DOTNET_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - | // | Shuffle3 | 3. AVX | Empty | 96 | 15.18 ns | 0.056 ns | 0.053 ns | 0.34 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - | -// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | DOTNET_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - | // | Shuffle3 | 3. AVX | Empty | 384 | 29.70 ns | 0.061 ns | 0.054 ns | 0.19 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - | -// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | DOTNET_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - | // | Shuffle3 | 3. AVX | Empty | 768 | 49.28 ns | 0.156 ns | 0.131 ns | 0.16 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - | -// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - | +// | Shuffle3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | DOTNET_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - | // | Shuffle3 | 3. AVX | Empty | 1536 | 93.57 ns | 0.196 ns | 0.183 ns | 0.16 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Bulk/Shuffle4Slice3Channel.cs index 5ade55c73f..579e2c54db 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/Shuffle4Slice3Channel.cs @@ -47,45 +47,45 @@ public void Shuffle4Slice3FastFallback() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - | -// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | +// | Shuffle4Slice3 | 3. SSE | DOTNET_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - | -// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 3. SSE | DOTNET_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - | -// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | +// | Shuffle4Slice3 | 3. SSE | DOTNET_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - | -// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 3. SSE | DOTNET_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - | -// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | +// | Shuffle4Slice3 | 3. SSE | DOTNET_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. SSE | DOTNET_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | // 2023-02-21 // ########## @@ -102,42 +102,42 @@ public void Shuffle4Slice3FastFallback() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 45.59 ns | 0.166 ns | 0.147 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 128 | 15.62 ns | 0.056 ns | 0.052 ns | 0.34 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 45.59 ns | 0.166 ns | 0.147 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | DOTNET_EnableAVX=0 | 128 | 15.62 ns | 0.056 ns | 0.052 ns | 0.34 | - | - | - | - | // | Shuffle4Slice3 | 3. AVX | Empty | 128 | 16.37 ns | 0.047 ns | 0.040 ns | 0.36 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 13.23 ns | 0.028 ns | 0.026 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 128 | 14.41 ns | 0.013 ns | 0.012 ns | 1.09 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 13.23 ns | 0.028 ns | 0.026 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 128 | 14.41 ns | 0.013 ns | 0.012 ns | 1.09 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. AVX | Empty | 128 | 14.70 ns | 0.050 ns | 0.047 ns | 1.11 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 85.48 ns | 0.192 ns | 0.179 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.18 ns | 0.230 ns | 0.204 ns | 0.22 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 85.48 ns | 0.192 ns | 0.179 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | DOTNET_EnableAVX=0 | 256 | 19.18 ns | 0.230 ns | 0.204 ns | 0.22 | - | - | - | - | // | Shuffle4Slice3 | 3. AVX | Empty | 256 | 18.66 ns | 0.017 ns | 0.015 ns | 0.22 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 24.34 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 256 | 18.58 ns | 0.061 ns | 0.057 ns | 0.76 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 24.34 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 256 | 18.58 ns | 0.061 ns | 0.057 ns | 0.76 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. AVX | Empty | 256 | 19.23 ns | 0.018 ns | 0.016 ns | 0.79 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 165.31 ns | 0.742 ns | 0.694 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 512 | 28.10 ns | 0.077 ns | 0.068 ns | 0.17 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 165.31 ns | 0.742 ns | 0.694 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | DOTNET_EnableAVX=0 | 512 | 28.10 ns | 0.077 ns | 0.068 ns | 0.17 | - | - | - | - | // | Shuffle4Slice3 | 3. AVX | Empty | 512 | 28.99 ns | 0.018 ns | 0.014 ns | 0.18 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 53.45 ns | 0.270 ns | 0.226 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 512 | 27.50 ns | 0.034 ns | 0.028 ns | 0.51 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 53.45 ns | 0.270 ns | 0.226 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 512 | 27.50 ns | 0.034 ns | 0.028 ns | 0.51 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. AVX | Empty | 512 | 28.76 ns | 0.017 ns | 0.015 ns | 0.54 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 323.87 ns | 0.549 ns | 0.487 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.81 ns | 0.056 ns | 0.050 ns | 0.13 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 323.87 ns | 0.549 ns | 0.487 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | DOTNET_EnableAVX=0 | 1024 | 40.81 ns | 0.056 ns | 0.050 ns | 0.13 | - | - | - | - | // | Shuffle4Slice3 | 3. AVX | Empty | 1024 | 39.95 ns | 0.075 ns | 0.067 ns | 0.12 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 101.37 ns | 0.080 ns | 0.067 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.72 ns | 0.049 ns | 0.041 ns | 0.40 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 101.37 ns | 0.080 ns | 0.067 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 1024 | 40.72 ns | 0.049 ns | 0.041 ns | 0.40 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. AVX | Empty | 1024 | 39.78 ns | 0.029 ns | 0.027 ns | 0.39 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 642.95 ns | 2.067 ns | 1.933 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 73.19 ns | 0.082 ns | 0.077 ns | 0.11 | - | - | - | - | +// | Shuffle4Slice3 | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 642.95 ns | 2.067 ns | 1.933 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | DOTNET_EnableAVX=0 | 2048 | 73.19 ns | 0.082 ns | 0.077 ns | 0.11 | - | - | - | - | // | Shuffle4Slice3 | 3. AVX | Empty | 2048 | 69.83 ns | 0.319 ns | 0.267 ns | 0.11 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 196.85 ns | 0.238 ns | 0.211 ns | 1.00 | - | - | - | - | -// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 72.89 ns | 0.117 ns | 0.098 ns | 0.37 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 196.85 ns | 0.238 ns | 0.211 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | DOTNET_EnableAVX=0 | 2048 | 72.89 ns | 0.117 ns | 0.098 ns | 0.37 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. AVX | Empty | 2048 | 69.59 ns | 0.073 ns | 0.061 ns | 0.35 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Bulk/ShuffleByte4Channel.cs index 911c4e0a58..6a16bb5710 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/ShuffleByte4Channel.cs @@ -42,25 +42,25 @@ public void Shuffle4Channel() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | // 2023-02-21 // ########## @@ -77,22 +77,22 @@ public void Shuffle4Channel() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 10.76 ns | 0.033 ns | 0.029 ns | 1.00 | 0.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.39 ns | 0.045 ns | 0.040 ns | 1.06 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 10.76 ns | 0.033 ns | 0.029 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 128 | 11.39 ns | 0.045 ns | 0.040 ns | 1.06 | 0.01 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 128 | 14.05 ns | 0.029 ns | 0.024 ns | 1.31 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 32.09 ns | 0.655 ns | 1.000 ns | 1.00 | 0.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 14.03 ns | 0.047 ns | 0.041 ns | 0.44 | 0.02 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 32.09 ns | 0.655 ns | 1.000 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 256 | 14.03 ns | 0.047 ns | 0.041 ns | 0.44 | 0.02 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 256 | 15.18 ns | 0.052 ns | 0.043 ns | 0.48 | 0.03 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 59.26 ns | 0.084 ns | 0.070 ns | 1.00 | 0.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 18.80 ns | 0.036 ns | 0.034 ns | 0.32 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 59.26 ns | 0.084 ns | 0.070 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 512 | 18.80 ns | 0.036 ns | 0.034 ns | 0.32 | 0.00 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 512 | 17.69 ns | 0.038 ns | 0.034 ns | 0.30 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 112.48 ns | 0.285 ns | 0.253 ns | 1.00 | 0.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 31.57 ns | 0.041 ns | 0.036 ns | 0.28 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 112.48 ns | 0.285 ns | 0.253 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 1024 | 31.57 ns | 0.041 ns | 0.036 ns | 0.28 | 0.00 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 1024 | 28.41 ns | 0.068 ns | 0.064 ns | 0.25 | 0.00 | - | - | - | - | // | | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 218.59 ns | 0.303 ns | 0.283 ns | 1.00 | 0.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 53.04 ns | 0.106 ns | 0.099 ns | 0.24 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 218.59 ns | 0.303 ns | 0.283 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 2048 | 53.04 ns | 0.106 ns | 0.099 ns | 0.24 | 0.00 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 2048 | 34.74 ns | 0.061 ns | 0.054 ns | 0.16 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Bulk/ShuffleFloat4Channel.cs index 5bb3cf9165..7cc894486f 100644 --- a/tests/ImageSharp.Benchmarks/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Bulk/ShuffleFloat4Channel.cs @@ -42,25 +42,25 @@ public void Shuffle4Channel() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | // |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | -// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | +// | Shuffle4Channel | 3. SSE | DOTNET_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | // 2023-02-21 // ########## @@ -77,22 +77,22 @@ public void Shuffle4Channel() // // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | // |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 57.819 ns | 0.2360 ns | 0.1970 ns | 1.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.564 ns | 0.0234 ns | 0.0195 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 128 | 57.819 ns | 0.2360 ns | 0.1970 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 128 | 11.564 ns | 0.0234 ns | 0.0195 ns | 0.20 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 128 | 7.770 ns | 0.0696 ns | 0.0617 ns | 0.13 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 105.282 ns | 0.2713 ns | 0.2405 ns | 1.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.867 ns | 0.0393 ns | 0.0348 ns | 0.19 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 256 | 105.282 ns | 0.2713 ns | 0.2405 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 256 | 19.867 ns | 0.0393 ns | 0.0348 ns | 0.19 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 256 | 17.586 ns | 0.0582 ns | 0.0544 ns | 0.17 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 200.799 ns | 0.5678 ns | 0.5033 ns | 1.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 41.137 ns | 0.1524 ns | 0.1351 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 512 | 200.799 ns | 0.5678 ns | 0.5033 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 512 | 41.137 ns | 0.1524 ns | 0.1351 ns | 0.20 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 512 | 24.040 ns | 0.0445 ns | 0.0395 ns | 0.12 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 401.046 ns | 0.5865 ns | 0.5199 ns | 1.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 94.904 ns | 0.4633 ns | 0.4334 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 1024 | 401.046 ns | 0.5865 ns | 0.5199 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 1024 | 94.904 ns | 0.4633 ns | 0.4334 ns | 0.24 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 1024 | 68.456 ns | 0.1192 ns | 0.0996 ns | 0.17 | - | - | - | - | // | | | | | | | | | | | | | -// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 772.297 ns | 0.6270 ns | 0.5558 ns | 1.00 | - | - | - | - | -// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 184.561 ns | 0.4319 ns | 0.4040 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 1. No HwIntrinsics | DOTNET_EnableHWIntrinsic=0,DOTNET_FeatureSIMD=0 | 2048 | 772.297 ns | 0.6270 ns | 0.5558 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | DOTNET_EnableAVX=0 | 2048 | 184.561 ns | 0.4319 ns | 0.4040 ns | 0.24 | - | - | - | - | // | Shuffle4Channel | 3. AVX | Empty | 2048 | 133.634 ns | 1.7864 ns | 1.8345 ns | 0.17 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index 92f8917cf8..e21d0c76d8 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -33,24 +33,24 @@ public partial class Config // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things // like `LZCNT`, `BMI1`, or `BMI2` // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` - private const string EnableAES = "COMPlus_EnableAES"; - private const string EnableAVX = "COMPlus_EnableAVX"; - private const string EnableAVX2 = "COMPlus_EnableAVX2"; - private const string EnableBMI1 = "COMPlus_EnableBMI1"; - private const string EnableBMI2 = "COMPlus_EnableBMI2"; - private const string EnableFMA = "COMPlus_EnableFMA"; - private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; - private const string EnableLZCNT = "COMPlus_EnableLZCNT"; - private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; - private const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; - private const string EnableSSE = "COMPlus_EnableSSE"; - private const string EnableSSE2 = "COMPlus_EnableSSE2"; - private const string EnableSSE3 = "COMPlus_EnableSSE3"; - private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; - private const string EnableSSE41 = "COMPlus_EnableSSE41"; - private const string EnableSSE42 = "COMPlus_EnableSSE42"; - private const string EnableSSSE3 = "COMPlus_EnableSSSE3"; - private const string FeatureSIMD = "COMPlus_FeatureSIMD"; + private const string EnableAES = "DOTNET_EnableAES"; + private const string EnableAVX = "DOTNET_EnableAVX"; + private const string EnableAVX2 = "DOTNET_EnableAVX2"; + private const string EnableBMI1 = "DOTNET_EnableBMI1"; + private const string EnableBMI2 = "DOTNET_EnableBMI2"; + private const string EnableFMA = "DOTNET_EnableFMA"; + private const string EnableHWIntrinsic = "DOTNET_EnableHWIntrinsic"; + private const string EnableLZCNT = "DOTNET_EnableLZCNT"; + private const string EnablePCLMULQDQ = "DOTNET_EnablePCLMULQDQ"; + private const string EnablePOPCNT = "DOTNET_EnablePOPCNT"; + private const string EnableSSE = "DOTNET_EnableSSE"; + private const string EnableSSE2 = "DOTNET_EnableSSE2"; + private const string EnableSSE3 = "DOTNET_EnableSSE3"; + private const string EnableSSE3_4 = "DOTNET_EnableSSE3_4"; + private const string EnableSSE41 = "DOTNET_EnableSSE41"; + private const string EnableSSE42 = "DOTNET_EnableSSE42"; + private const string EnableSSSE3 = "DOTNET_EnableSSSE3"; + private const string FeatureSIMD = "DOTNET_FeatureSIMD"; public class HwIntrinsics_SSE_AVX : Config { diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 07ad5e8f03..63126dcbca 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -104,7 +104,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -148,7 +148,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -192,7 +192,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -241,7 +241,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -288,7 +288,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -333,7 +333,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -379,7 +379,7 @@ public static void RunWithHwIntrinsicsFeature( ProcessStartInfo processStartInfo = new(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + processStartInfo.Environment[$"DOTNET_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action,