diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 1fb0f43b99963..5600479fa781e 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -192,56 +192,42 @@ float4 sample_p(float u) return Palette.Sample(PaletteSampler, u); } -float4 clamp_wrap_uv(float4 uv) +float2 clamp_wrap_uv_2(uint mode, float2 uv, float tex_size, float2 min_max, uint2 msk_fix) { - float4 tex_size; - - if (PS_INVALID_TEX0 == 1) - tex_size = WH.zwzw; - else - tex_size = WH.xyxy; - - if(PS_WMS == PS_WMT) + if (mode == 2) { - if(PS_WMS == 2) - { - uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); - } - else if(PS_WMS == 3) - { - #if PS_FST == 0 + return clamp(uv, min_max.xx, min_max.yy); + } + if (mode == 3) + { + #if PS_FST == 0 // wrap negative uv coords to avoid an off by one error that shifted // textures. Fixes Xenosaga's hair issue. uv = frac(uv); - #endif - uv = (float4)(((uint4)(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; - } + #endif + + uv *= tex_size; + float2 masked = float2((uint2(uv) & msk_fix.xx) | msk_fix.yy); + + if (msk_fix.x & 1) // For upscaling, let the bottom bit mask everything below + masked += frac(uv); + + return masked / tex_size; } + return uv; +} + +float4 clamp_wrap_uv(float4 uv) +{ + float2 tex_size; + + if (PS_INVALID_TEX0 == 1) + tex_size = WH.zw; else - { - if(PS_WMS == 2) - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - else if(PS_WMS == 3) - { - #if PS_FST == 0 - uv.xz = frac(uv.xz); - #endif - uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - } - if(PS_WMT == 2) - { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - } - else if(PS_WMT == 3) - { - #if PS_FST == 0 - uv.yw = frac(uv.yw); - #endif - uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; - } - } + tex_size = WH.xy; + + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, MinMax.xz, MskFix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, MinMax.yw, MskFix.yw); return uv; } @@ -326,42 +312,28 @@ float4 fetch_c(int2 uv) // Depth sampling ////////////////////////////////////////////////////////////////////// -int2 clamp_wrap_uv_depth(int2 uv) +int clamp_wrap_uv_depth_1(uint mode, int uv, int2 msk_fix) { - int4 mask = (int4)MskFix << 4; - if (PS_WMS == PS_WMT) + int2 mask = msk_fix << 4; + + if (mode == 2) + return clamp(uv, mask.x, mask.y | 0xF); + if (mode == 3) { - if (PS_WMS == 2) - { - uv = clamp(uv, mask.xy, mask.zw); - } - else if (PS_WMS == 3) - { - uv = (uv & mask.xy) | mask.zw; - } - } - else - { - if (PS_WMS == 2) - { - uv.x = clamp(uv.x, mask.x, mask.z); - } - else if (PS_WMS == 3) - { - uv.x = (uv.x & mask.x) | mask.z; - } - if (PS_WMT == 2) - { - uv.y = clamp(uv.y, mask.y, mask.w); - } - else if (PS_WMT == 3) - { - uv.y = (uv.y & mask.y) | mask.w; - } + if (msk_fix.x & 1) + mask.x |= 0xF; + return (uv & mask.x) | mask.y; } return uv; } +int2 clamp_wrap_uv_depth(int2 uv) +{ + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, (int2)MskFix.xz); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, (int2)MskFix.yw); + return uv; +} + float4 sample_depth(float2 st, float2 pos) { float2 uv_f = (float2)clamp_wrap_uv_depth(int2(st)) * (float2)PS_SCALE_FACTOR * (float2)(1.0f / 16.0f); diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 7d88226c9154b..c90432003656b 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -149,54 +149,43 @@ vec4 sample_p(float idx) return texture(PaletteSampler, vec2(idx, 0.0f)); } -vec4 clamp_wrap_uv(vec4 uv) +vec2 clamp_wrap_uv_2(uint mode, vec2 uv, float tex_size, vec2 min_max, uvec2 msk_fix) { - vec4 uv_out = uv; -#if PS_INVALID_TEX0 == 1 - vec4 tex_size = WH.zwzw; -#else - vec4 tex_size = WH.xyxy; -#endif - -#if PS_WMS == PS_WMT - -#if PS_WMS == 2 - uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); -#elif PS_WMS == 3 - #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv = fract(uv); - #endif - uv_out = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; -#endif - -#else // PS_WMS != PS_WMT - -#if PS_WMS == 2 - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + if (mode == 2) + { + return clamp(uv, min_max.xx, min_max.yy); + } + if (mode == 3) + { + #if PS_FST == 0 + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv = fract(uv); + #endif -#elif PS_WMS == 3 - #if PS_FST == 0 - uv.xz = fract(uv.xz); - #endif - uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + uv *= tex_size; + vec2 masked = vec2((uvec2(uv) & msk_fix.xx) | msk_fix.yy); -#endif + if ((msk_fix.x & 1) != 0) // For upscaling, let the bottom bit mask everything below + masked += fract(uv); -#if PS_WMT == 2 - uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return masked / tex_size; + } + return uv; +} -#elif PS_WMT == 3 - #if PS_FST == 0 - uv.yw = fract(uv.yw); - #endif - uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; +vec4 clamp_wrap_uv(vec4 uv) +{ +#if PS_INVALID_TEX0 == 1 + vec2 tex_size = WH.zw; +#else + vec2 tex_size = WH.xy; #endif -#endif + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, MinMax.xz, MskFix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, MinMax.yw, MskFix.yw); - return uv_out; + return uv; } mat4 sample_4c(vec4 uv) @@ -294,39 +283,28 @@ vec4 fetch_c(ivec2 uv) ////////////////////////////////////////////////////////////////////// // Depth sampling ////////////////////////////////////////////////////////////////////// -ivec2 clamp_wrap_uv_depth(ivec2 uv) +int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) { - ivec2 uv_out = uv; - // Keep the full precision // It allow to multiply the ScalingFactor before the 1/16 coeff - ivec4 mask = ivec4(MskFix) << 4; + ivec2 mask = msk_fix << 4; -#if PS_WMS == PS_WMT - -#if PS_WMS == 2 - uv_out = clamp(uv, mask.xy, mask.zw); -#elif PS_WMS == 3 - uv_out = (uv & mask.xy) | mask.zw; -#endif - -#else // PS_WMS != PS_WMT - -#if PS_WMS == 2 - uv_out.x = clamp(uv.x, mask.x, mask.z); -#elif PS_WMS == 3 - uv_out.x = (uv.x & mask.x) | mask.z; -#endif - -#if PS_WMT == 2 - uv_out.y = clamp(uv.y, mask.y, mask.w); -#elif PS_WMT == 3 - uv_out.y = (uv.y & mask.y) | mask.w; -#endif - -#endif + if (mode == 2) + return clamp(uv, mask.x, mask.y | 0xF); + if (mode == 3) + { + if ((msk_fix.x & 1) != 0) + mask.x |= 0xF; + return (uv & mask.x) | mask.y; + } + return uv; +} - return uv_out; +ivec2 clamp_wrap_uv_depth(ivec2 uv) +{ + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, ivec2(MskFix.xz)); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, ivec2(MskFix.yw)); + return uv; } vec4 sample_depth(vec2 st) diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index b15b1761908fd..654330b03e800 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -444,62 +444,44 @@ vec4 sample_p(float u) return texture(Palette, vec2(u, 0.0f)); } -vec4 clamp_wrap_uv(vec4 uv) +vec2 clamp_wrap_uv_2(uint mode, vec2 uv, float tex_size, vec2 min_max, uvec2 msk_fix) { - vec4 tex_size; - - #if PS_INVALID_TEX0 - tex_size = WH.zwzw; - #else - tex_size = WH.xyxy; - #endif - - #if PS_WMS == PS_WMT + if (mode == 2) { - #if PS_WMS == 2 - { - uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); - } - #elif PS_WMS == 3 - { - #if PS_FST == 0 + return clamp(uv, min_max.xx, min_max.yy); + } + if (mode == 3) + { + #if PS_FST == 0 // wrap negative uv coords to avoid an off by one error that shifted // textures. Fixes Xenosaga's hair issue. uv = fract(uv); - #endif - uv = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; - } #endif + + uv *= tex_size; + vec2 masked = vec2((uvec2(uv) & msk_fix.xx) | msk_fix.yy); + + if ((msk_fix.x & 1) != 0) // For upscaling, let the bottom bit mask everything below + masked += fract(uv); + + return masked / tex_size; } + return uv; +} + +vec4 clamp_wrap_uv(vec4 uv) +{ + vec2 tex_size; + + #if PS_INVALID_TEX0 + tex_size = WH.zw; #else - { - #if PS_WMS == 2 - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - #elif PS_WMS == 3 - { - #if PS_FST == 0 - uv.xz = fract(uv.xz); - #endif - uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - } - #endif - #if PS_WMT == 2 - { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - } - #elif PS_WMT == 3 - { - #if PS_FST == 0 - uv.yw = fract(uv.yw); - #endif - uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; - } - #endif - } + tex_size = WH.xy; #endif + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, MinMax.xz, MskFix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, MinMax.yw, MskFix.yw); + return uv; } @@ -580,43 +562,25 @@ vec4 fetch_c(ivec2 uv) // Depth sampling ////////////////////////////////////////////////////////////////////// -ivec2 clamp_wrap_uv_depth(ivec2 uv) +int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) { - ivec4 mask = ivec4(MskFix << 4); - #if (PS_WMS == PS_WMT) + ivec2 mask = msk_fix << 4; + + if (mode == 2) + return clamp(uv, mask.x, mask.y | 0xF); + if (mode == 3) { - #if (PS_WMS == 2) - { - uv = clamp(uv, mask.xy, mask.zw); - } - #elif (PS_WMS == 3) - { - uv = (uv & mask.xy) | mask.zw; - } - #endif + if ((msk_fix.x & 1) != 0) + mask.x |= 0xF; + return (uv & mask.x) | mask.y; } - #else - { - #if (PS_WMS == 2) - { - uv.x = clamp(uv.x, mask.x, mask.z); - } - #elif (PS_WMS == 3) - { - uv.x = (uv.x & mask.x) | mask.z; - } - #endif - #if (PS_WMT == 2) - { - uv.y = clamp(uv.y, mask.y, mask.w); - } - #elif (PS_WMT == 3) - { - uv.y = (uv.y & mask.y) | mask.w; - } - #endif - } - #endif + return uv; +} + +ivec2 clamp_wrap_uv_depth(ivec2 uv) +{ + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, ivec2(MskFix.xz)); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, ivec2(MskFix.yw)); return uv; } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index b67bcc2620bdd..162d77030e4db 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1370,9 +1370,6 @@ void GSRendererHW::Draw() GSVector4i unscaled_size = GSVector4i(GSVector4(m_src->m_texture->GetSize()) / GSVector4(m_src->m_texture->GetScale())); if (m_context->CLAMP.WMS == CLAMP_REPEAT && (tmm.uses_boundary & TextureMinMaxResult::USES_BOUNDARY_U) && unscaled_size.x != tw) { - // Our shader-emulated region repeat doesn't upscale :( - // Try to avoid it if possible - // TODO: Upscale-supporting shader-emulated region repeat if (unscaled_size.x < tw && m_vt.m_min.t.x > -(tw - unscaled_size.x) && m_vt.m_max.t.x < tw) { // Game only extends into data we don't have (but doesn't wrap around back onto good data), clamp seems like the most reasonable solution diff --git a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp index 5edad8bd7aab4..9f48bc555c6d8 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp @@ -1222,7 +1222,8 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex) if (complex_wms_wmt) { m_conf.cb_ps.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);; - m_conf.cb_ps.MinMax = GSVector4(m_conf.cb_ps.MskFix) / WH.xyxy(); + GSVector4 upscale_offset(0.f, 0.f, (15.f / 16.f), (15.f / 16.f)); // Adjust end position to the end of the upscaled pixel + m_conf.cb_ps.MinMax = (GSVector4(m_conf.cb_ps.MskFix) + upscale_offset) / WH.xyxy(); } else if (trilinear_manual) { diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 58cc187bfa258..09ca81370017b 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -263,55 +263,38 @@ struct PSMain return palette.sample(palette_sampler, float2(idx, 0)); } - float4 clamp_wrap_uv(float4 uv) + float2 clamp_wrap_uv_2(uint mode, float2 uv, float tex_size, float2 min_max, uint2 msk_fix) { - float4 uv_out = uv; - float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy; - - if (PS_WMS == PS_WMT) + if (mode == 2) { - if (PS_WMS == 2) - { - uv_out = clamp(uv, cb.uv_min_max.xyxy, cb.uv_min_max.zwzw); - } - else if (PS_WMS == 3) - { - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - if (!FST) - uv = fract(uv); - - uv_out = float4((ushort4(uv * tex_size) & ushort4(cb.uv_msk_fix.xyxy)) | ushort4(cb.uv_msk_fix.zwzw)) / tex_size; - } + return clamp(uv, min_max.xx, min_max.yy); } - else + if (mode == 3) { - if (PS_WMS == 2) - { - uv_out.xz = clamp(uv.xz, cb.uv_min_max.xx, cb.uv_min_max.zz); - } - else if (PS_WMS == 3) - { - if (!FST) - uv.xz = fract(uv.xz); + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + if (!FST) + uv = fract(uv); - uv_out.xz = float2((ushort2(uv.xz * tex_size.xx) & ushort2(cb.uv_msk_fix.xx)) | ushort2(cb.uv_msk_fix.zz)) / tex_size.xx; - } + uv *= tex_size; + float2 masked = float2((uint2(uv) & msk_fix.xx) | msk_fix.yy); - if (PS_WMT == 2) - { - uv_out.yw = clamp(uv.yw, cb.uv_min_max.yy, cb.uv_min_max.ww); - } - else if (PS_WMT == 3) - { - if (!FST) - uv.yw = fract(uv.yw); + if (msk_fix.x & 1) // For upscaling, let the bottom bit mask everything below + masked += fract(uv); - uv_out.yw = float2((ushort2(uv.yw * tex_size.yy) & ushort2(cb.uv_msk_fix.yy)) | ushort2(cb.uv_msk_fix.ww)) / tex_size.yy; - } + return masked / tex_size; } + return uv; + } + + float4 clamp_wrap_uv(float4 uv) + { + float2 tex_size = PS_INVALID_TEX0 ? cb.wh.zw : cb.wh.xy; + + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, cb.uv_min_max.xz, cb.uv_msk_fix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, cb.uv_min_max.yw, cb.uv_msk_fix.yw); - return uv_out; + return uv; } float4x4 sample_4c(float4 uv) @@ -379,39 +362,33 @@ struct PSMain // MARK: Depth sampling - ushort2 clamp_wrap_uv_depth(ushort2 uv) + uint clamp_wrap_uv_depth_1(uint mode, uint uv, uint2 msk_fix) { - ushort2 uv_out = uv; // Keep the full precision // It allow to multiply the ScalingFactor before the 1/16 coeff - ushort4 mask = ushort4(cb.uv_msk_fix) << 4; + uint2 mask = msk_fix << 4; - if (PS_WMS == PS_WMT) + if (mode == 2) + return clamp(uv, mask.x, mask.y | 0xF); + if (mode == 3) { - if (PS_WMS == 2) - uv_out = clamp(uv, mask.xy, mask.zw); - else if (PS_WMS == 3) - uv_out = (uv & mask.xy) | mask.zw; - } - else - { - if (PS_WMS == 2) - uv_out.x = clamp(uv.x, mask.x, mask.z); - else if (PS_WMS == 3) - uv_out.x = (uv.x & mask.x) | mask.z; - - if (PS_WMT == 2) - uv_out.y = clamp(uv.y, mask.y, mask.w); - else if (PS_WMT == 3) - uv_out.y = (uv.y & mask.y) | mask.w; + if (msk_fix.x & 1) + mask.x |= 0xF; + return (uv & mask.x) | mask.y; } + return uv; + } - return uv_out; + uint2 clamp_wrap_uv_depth(uint2 uv) + { + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, cb.uv_msk_fix.xz); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, cb.uv_msk_fix.yw); + return uv; } float4 sample_depth(float2 st) { - float2 uv_f = float2(clamp_wrap_uv_depth(ushort2(st))) * (float2(SCALING_FACTOR) * float2(1.f / 16.f)); + float2 uv_f = float2(clamp_wrap_uv_depth(uint2(st))) * (float2(SCALING_FACTOR) * float2(1.f / 16.f)); ushort2 uv = ushort2(uv_f); float4 t = float4(0);