Blinue · hauuau · Jan 10, 2024
diff --git a/src/Effects/NNEDI3/nnedi3-nns128-win8x4.hlsl b/src/Effects/NNEDI3/nnedi3-nns128-win8x4.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns128-win8x6.hlsl b/src/Effects/NNEDI3/nnedi3-nns128-win8x6.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns16-win8x4.hlsl b/src/Effects/NNEDI3/nnedi3-nns16-win8x4.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns16-win8x6.hlsl b/src/Effects/NNEDI3/nnedi3-nns16-win8x6.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns256-win8x4.hlsl b/src/Effects/NNEDI3/nnedi3-nns256-win8x4.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns256-win8x6.hlsl b/src/Effects/NNEDI3/nnedi3-nns256-win8x6.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns32-win8x4.hlsl b/src/Effects/NNEDI3/nnedi3-nns32-win8x4.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns32-win8x6.hlsl b/src/Effects/NNEDI3/nnedi3-nns32-win8x6.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns64-win8x4.hlsl b/src/Effects/NNEDI3/nnedi3-nns64-win8x4.hlsl
diff --git a/src/Effects/NNEDI3/nnedi3-nns64-win8x6.hlsl b/src/Effects/NNEDI3/nnedi3-nns64-win8x6.hlsl
diff --git a/src/Effects/RAVU/ravu-3x-r2-rgb.hlsl b/src/Effects/RAVU/ravu-3x-r2-rgb.hlsl
@@ -0,0 +1,226 @@
+// Generated by ravu-3x.py
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+
+
+//!MAGPIE EFFECT
+//!VERSION 3
+//!OUTPUT_WIDTH  INPUT_WIDTH * 3
+//!OUTPUT_HEIGHT INPUT_HEIGHT * 3
+
+
+//!TEXTURE
+Texture2D INPUT;
+
+//!SAMPLER
+//!FILTER POINT
+SamplerState sam_INPUT;
+
+
+//!SAMPLER
+//!FILTER LINEAR
+SamplerState sam_INPUT_LINEAR;
+
+//!TEXTURE
+//!SOURCE ravu_3x_lut2_f16.dds
+//!FORMAT R16G16B16A16_FLOAT
+Texture2D ravu_3x_lut2;
+
+//!SAMPLER
+//!FILTER LINEAR
+SamplerState sam_ravu_3x_lut2;
+
+
+
+
+
+//!COMMON
+// Conversion from GLSL to HLSL is done through defines as much as possible to ease synchronization and comparison with upstream
+#define ivec2 int2
+
+#define vec2 float2
+#define vec3 float3
+#define vec4 float4
+
+#define shared groupshared
+
+// TODO: check
+// some sources suggest that atan2 has reverse order of arguments compared to atan
+#define atan atan2
+#define fract frac
+#define intBitsToFloat asfloat
+#define inversesqrt rsqrt
+#define mix lerp
+
+// mod deals only with positive numbers here and it could be substituted by fmod
+#define mod fmod
+
+#define barrier GroupMemoryBarrierWithGroupSync
+#define texture(tex, pos) tex.SampleLevel(sam_##tex, pos, 0.0)
+
+// TODO: check
+// HLSL uses row-major matrixes, while GLSL uses column-major matrixes
+// Is this the proper way to deal with this difference?
+#define mat4x3 float4x3
+#define matrixCompMult(mtx1, mtx2) (mtx1 * mtx2)
+
+#define OUTPUT_pt float2(GetOutputPt())
+#define frag_pos(id) (vec2(id) + vec2(0.5, 0.5))
+#define frag_map(id) (OUTPUT_pt * frag_pos(id))
+
+#define gl_LocalInvocationIndex (threadId.y*MP_NUM_THREADS_X + threadId.x)
+#define gl_LocalInvocationID threadId
+#define gl_WorkGroupSize (uint2(MP_NUM_THREADS_X, MP_NUM_THREADS_Y))
+#define gl_WorkGroupID (blockStart / uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT))
+#define gl_GlobalInvocationID (gl_WorkGroupID*gl_WorkGroupSize + threadId.xy)
+
+#define LAST_PASS 1
+
+// disable warning about unknown pragma
+#pragma warning(disable: 3568)
+// disable warning about too many threads (ravu-r4-rgb triggers it)
+#pragma warning(disable: 4714)
+//!PASS 1
+//!DESC RAVU-3x (rgb, r2)
+//!IN INPUT, ravu_3x_lut2
+//!BLOCK_SIZE 96, 24
+//!NUM_THREADS 32, 8
+static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
+// HLSL doesn't have outerProduct
+float4x3 outerProduct(float3 l, float4 r) {
+    return mul(float4x1(r), float1x3(l));
+}
+shared vec3 inp[340];
+shared float inp_luma[340];
+#define CURRENT_PASS 1
+#define HOOKED_map(id) frag_map(id)
+#define GET_SAMPLE(x) x
+#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz)
+void imageStoreOverride(uint2 pos, float3 value) {
+    WriteToOutput(pos, value);
+}
+#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
+static const float2 INPUT_size = float2(GetInputSize());
+static const float2 INPUT_pt = float2(GetInputPt());
+#define ravu_3x_lut2_tex(pos) (vec4(texture(ravu_3x_lut2, pos)))
+#define HOOKED_tex(pos) INPUT_tex(pos)
+#define HOOKED_size INPUT_size
+#define HOOKED_pt INPUT_pt
+void Pass1(uint2 blockStart, uint3 threadId) {
+ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
+int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y);
+for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
+uint x = (uint)id / 10, y = (uint)id % 10;
+inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x+x)+(-0.5), float(group_base.y+y)+(-0.5))).xyz;
+inp_luma[id] = dot(inp[id], color_primary);
+}
+barrier();
+#if CURRENT_PASS == LAST_PASS
+uint2 destPos = blockStart + threadId.xy * 3;
+if (!CheckViewport(destPos)) {
+    return;
+}
+#endif
+float luma0 = inp_luma[local_pos + 0];
+float luma1 = inp_luma[local_pos + 1];
+float luma2 = inp_luma[local_pos + 2];
+float luma3 = inp_luma[local_pos + 10];
+float luma4 = inp_luma[local_pos + 11];
+float luma5 = inp_luma[local_pos + 12];
+float luma6 = inp_luma[local_pos + 20];
+float luma7 = inp_luma[local_pos + 21];
+float luma8 = inp_luma[local_pos + 22];
+vec3 abd = vec3(0.0, 0.0, 0.0);
+float gx, gy;
+gx = (luma3-luma0);
+gy = (luma1-luma0);
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
+gx = (luma4-luma1);
+gy = (luma2-luma0)/2.0;
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
+gx = (luma5-luma2);
+gy = (luma2-luma1);
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
+gx = (luma6-luma0)/2.0;
+gy = (luma4-luma3);
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
+gx = (luma7-luma1)/2.0;
+gy = (luma5-luma3)/2.0;
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
+gx = (luma8-luma2)/2.0;
+gy = (luma5-luma4);
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
+gx = (luma6-luma3);
+gy = (luma7-luma6);
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
+gx = (luma7-luma4);
+gy = (luma8-luma6)/2.0;
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
+gx = (luma8-luma5);
+gy = (luma8-luma7);
+abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
+float a = abd.x, b = abd.y, d = abd.z;
+float T = a + d, D = a * d - b * b;
+float delta = sqrt(max(T * T / 4.0 - D, 0.0));
+float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
+float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
+float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
+float lambda = sqrtL1;
+float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
+float angle = floor(theta * 24.0 / 3.141592653589793);
+float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
+float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
+float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
+mat4x3 res0 = 0.0, res1 = 0.0;
+vec4 w0, w1;
+w0 = texture(ravu_3x_lut2, vec2(0.05, coord_y));
+w1 = texture(ravu_3x_lut2, vec2(0.15, coord_y));
+res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 22], w1.wzyx);
+res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 22], w0.wzyx);
+w0 = texture(ravu_3x_lut2, vec2(0.25, coord_y));
+w1 = texture(ravu_3x_lut2, vec2(0.35, coord_y));
+res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 21], w1.wzyx);
+res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 21], w0.wzyx);
+w0 = texture(ravu_3x_lut2, vec2(0.45, coord_y));
+w1 = texture(ravu_3x_lut2, vec2(0.55, coord_y));
+res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 20], w1.wzyx);
+res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 20], w0.wzyx);
+w0 = texture(ravu_3x_lut2, vec2(0.65, coord_y));
+w1 = texture(ravu_3x_lut2, vec2(0.75, coord_y));
+res0 += outerProduct(inp[local_pos + 10], w0) + outerProduct(inp[local_pos + 12], w1.wzyx);
+res1 += outerProduct(inp[local_pos + 10], w1) + outerProduct(inp[local_pos + 12], w0.wzyx);
+w0 = texture(ravu_3x_lut2, vec2(0.85, coord_y));
+w1 = texture(ravu_3x_lut2, vec2(0.95, coord_y));
+res0 += outerProduct(inp[local_pos + 11], w0);
+res1 += outerProduct(inp[local_pos + 11], w1);
+res0[0] = clamp(res0[0], 0.0, 1.0);
+res0[1] = clamp(res0[1], 0.0, 1.0);
+res0[2] = clamp(res0[2], 0.0, 1.0);
+res0[3] = clamp(res0[3], 0.0, 1.0);
+res1[0] = clamp(res1[0], 0.0, 1.0);
+res1[1] = clamp(res1[1], 0.0, 1.0);
+res1[2] = clamp(res1[2], 0.0, 1.0);
+res1[3] = clamp(res1[3], 0.0, 1.0);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), inp[local_pos + 11]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]);
+imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]);
+}