Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add v3 effects from the port of mpv-prescalers #805

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
557 changes: 557 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns128-win8x4.hlsl

Large diffs are not rendered by default.

589 changes: 589 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns128-win8x6.hlsl

Large diffs are not rendered by default.

333 changes: 333 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns16-win8x4.hlsl

Large diffs are not rendered by default.

365 changes: 365 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns16-win8x6.hlsl

Large diffs are not rendered by default.

813 changes: 813 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns256-win8x4.hlsl

Large diffs are not rendered by default.

845 changes: 845 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns256-win8x6.hlsl

Large diffs are not rendered by default.

365 changes: 365 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns32-win8x4.hlsl

Large diffs are not rendered by default.

397 changes: 397 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns32-win8x6.hlsl

Large diffs are not rendered by default.

429 changes: 429 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns64-win8x4.hlsl

Large diffs are not rendered by default.

461 changes: 461 additions & 0 deletions src/Effects/NNEDI3/nnedi3-nns64-win8x6.hlsl

Large diffs are not rendered by default.

226 changes: 226 additions & 0 deletions src/Effects/RAVU/ravu-3x-r2-rgb.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
// Generated by ravu-3x.py
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.




//!MAGPIE EFFECT
//!VERSION 3
//!OUTPUT_WIDTH INPUT_WIDTH * 3
//!OUTPUT_HEIGHT INPUT_HEIGHT * 3


//!TEXTURE
Texture2D INPUT;

//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;


//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;

//!TEXTURE
//!SOURCE ravu_3x_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut2;

//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut2;





//!COMMON
// Conversion from GLSL to HLSL is done through defines as much as possible to ease synchronization and comparison with upstream
#define ivec2 int2

#define vec2 float2
#define vec3 float3
#define vec4 float4

#define shared groupshared

// TODO: check
// some sources suggest that atan2 has reverse order of arguments compared to atan
#define atan atan2
#define fract frac
#define intBitsToFloat asfloat
#define inversesqrt rsqrt
#define mix lerp

// mod deals only with positive numbers here and it could be substituted by fmod
#define mod fmod

#define barrier GroupMemoryBarrierWithGroupSync
#define texture(tex, pos) tex.SampleLevel(sam_##tex, pos, 0.0)

// TODO: check
// HLSL uses row-major matrixes, while GLSL uses column-major matrixes
// Is this the proper way to deal with this difference?
#define mat4x3 float4x3
#define matrixCompMult(mtx1, mtx2) (mtx1 * mtx2)

#define OUTPUT_pt float2(GetOutputPt())
#define frag_pos(id) (vec2(id) + vec2(0.5, 0.5))
#define frag_map(id) (OUTPUT_pt * frag_pos(id))

#define gl_LocalInvocationIndex (threadId.y*MP_NUM_THREADS_X + threadId.x)
#define gl_LocalInvocationID threadId
#define gl_WorkGroupSize (uint2(MP_NUM_THREADS_X, MP_NUM_THREADS_Y))
#define gl_WorkGroupID (blockStart / uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT))
#define gl_GlobalInvocationID (gl_WorkGroupID*gl_WorkGroupSize + threadId.xy)

#define LAST_PASS 1

// disable warning about unknown pragma
#pragma warning(disable: 3568)
// disable warning about too many threads (ravu-r4-rgb triggers it)
#pragma warning(disable: 4714)
//!PASS 1
//!DESC RAVU-3x (rgb, r2)
//!IN INPUT, ravu_3x_lut2
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
// HLSL doesn't have outerProduct
float4x3 outerProduct(float3 l, float4 r) {
return mul(float4x1(r), float1x3(l));
}
shared vec3 inp[340];
shared float inp_luma[340];
#define CURRENT_PASS 1
#define HOOKED_map(id) frag_map(id)
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz)
void imageStoreOverride(uint2 pos, float3 value) {
WriteToOutput(pos, value);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut2_tex(pos) (vec4(texture(ravu_3x_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 10, y = (uint)id % 10;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x+x)+(-0.5), float(group_base.y+y)+(-0.5))).xyz;
inp_luma[id] = dot(inp[id], color_primary);
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
if (!CheckViewport(destPos)) {
return;
}
#endif
float luma0 = inp_luma[local_pos + 0];
float luma1 = inp_luma[local_pos + 1];
float luma2 = inp_luma[local_pos + 2];
float luma3 = inp_luma[local_pos + 10];
float luma4 = inp_luma[local_pos + 11];
float luma5 = inp_luma[local_pos + 12];
float luma6 = inp_luma[local_pos + 20];
float luma7 = inp_luma[local_pos + 21];
float luma8 = inp_luma[local_pos + 22];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma3-luma0);
gy = (luma1-luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma4-luma1);
gy = (luma2-luma0)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma5-luma2);
gy = (luma2-luma1);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma6-luma0)/2.0;
gy = (luma4-luma3);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma7-luma1)/2.0;
gy = (luma5-luma3)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (luma8-luma2)/2.0;
gy = (luma5-luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma6-luma3);
gy = (luma7-luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma7-luma4);
gy = (luma8-luma6)/2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma8-luma5);
gy = (luma8-luma7);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
mat4x3 res0 = 0.0, res1 = 0.0;
vec4 w0, w1;
w0 = texture(ravu_3x_lut2, vec2(0.05, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.15, coord_y));
res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 22], w1.wzyx);
res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 22], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.35, coord_y));
res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 21], w1.wzyx);
res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 21], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.45, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.55, coord_y));
res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 20], w1.wzyx);
res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 20], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.65, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.75, coord_y));
res0 += outerProduct(inp[local_pos + 10], w0) + outerProduct(inp[local_pos + 12], w1.wzyx);
res1 += outerProduct(inp[local_pos + 10], w1) + outerProduct(inp[local_pos + 12], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.85, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.95, coord_y));
res0 += outerProduct(inp[local_pos + 11], w0);
res1 += outerProduct(inp[local_pos + 11], w1);
res0[0] = clamp(res0[0], 0.0, 1.0);
res0[1] = clamp(res0[1], 0.0, 1.0);
res0[2] = clamp(res0[2], 0.0, 1.0);
res0[3] = clamp(res0[3], 0.0, 1.0);
res1[0] = clamp(res1[0], 0.0, 1.0);
res1[1] = clamp(res1[1], 0.0, 1.0);
res1[2] = clamp(res1[2], 0.0, 1.0);
res1[3] = clamp(res1[3], 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), inp[local_pos + 11]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]);
}
Loading
Loading