From 9a3ff690918a8e03f4a4255f9e67ff532cb99033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 17 Apr 2023 09:34:26 +0200 Subject: [PATCH] Workaround for some SOCOM game's misuse of CLUT8 textures. Emulating this correctly would be possible too but would only work at 1x rendering resolution. --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + GPU/Common/DepalettizeShaderCommon.cpp | 7 ++++++ GPU/Common/Draw2D.cpp | 2 +- GPU/Common/TextureCacheCommon.cpp | 31 +++++++++++++++++++++----- GPU/Common/TextureDecoder.cpp | 19 ++++++++++++++++ GPU/Common/TextureDecoder.h | 25 ++++++--------------- GPU/GLES/ShaderManagerGLES.h | 2 +- assets/compat.ini | 13 +++++++++++ 9 files changed, 76 insertions(+), 25 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index da7746cfca7d..578bcdec3ae4 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -130,6 +130,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "BlockTransferDepth", &flags_.BlockTransferDepth); CheckSetting(iniFile, gameID, "DaxterRotatedAnalogStick", &flags_.DaxterRotatedAnalogStick); CheckSetting(iniFile, gameID, "ForceMaxDepthResolution", &flags_.ForceMaxDepthResolution); + CheckSetting(iniFile, gameID, "SOCOMClut8Replacement", &flags_.SOCOMClut8Replacement); } void Compatibility::CheckVRSettings(IniFile &iniFile, const std::string &gameID) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 3fdf284631e3..6bbdc3353ce1 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -100,6 +100,7 @@ struct CompatFlags { bool BlockTransferDepth; bool DaxterRotatedAnalogStick; bool ForceMaxDepthResolution; + bool SOCOMClut8Replacement; }; struct VRCompat { diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 1d3630919ced..13d9f3c58cc8 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -111,6 +111,13 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) { if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n"); if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n"); writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n"); + + if (config.textureFormat == GE_TFMT_CLUT8) { + // SOCOM case. #16210 + // To debug the issue, remove this shift to see the texture (check for clamping etc). + writer.C(" index >>= 8;\n"); + } + break; case GE_FORMAT_DEPTH16: // Decode depth buffer. diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 4f19277956bf..95a53bf2c9d5 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -261,7 +261,7 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::functionCreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), info.tag); - _assert_(fs); + _assert_msg_(fs, "Failed to create shader module!\n%s", fsCode); // verts have positions in 2D clip coordinates. static const InputLayoutDesc desc = { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 8f0ffb40d2d9..e483248e7f96 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1028,18 +1028,28 @@ bool TextureCacheCommon::MatchFramebuffer( } // Check works for D16 too. + // These are combinations that we have special-cased handling for. There are more + // ones possible, but rare - we'll add them as we find them used. const bool matchingClutFormat = (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) || (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) || (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || (fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16) || - (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8); + (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8) || + (fb_format == GE_FORMAT_5551 && entry.format == GE_TFMT_CLUT8 && PSP_CoreParameter().compat.flags().SOCOMClut8Replacement); - const int texBitsPerPixel = std::max(1U, (u32)textureBitsPerPixel[entry.format]); + const int texBitsPerPixel = TextureFormatBitsPerPixel(entry.format); const int byteOffset = texaddr - addr; if (byteOffset > 0) { + int texbpp = texBitsPerPixel; + if (fb_format == GE_FORMAT_5551 && entry.format == GE_TFMT_CLUT8) { + // In this case we treat CLUT8 as if it were CLUT16, see issue #16210. So we need + // to compute the x offset appropriately. + texbpp = 16; + } + matchInfo->yOffset = byteOffset / fb_stride_in_bytes; - matchInfo->xOffset = 8 * (byteOffset % fb_stride_in_bytes) / texBitsPerPixel; + matchInfo->xOffset = 8 * (byteOffset % fb_stride_in_bytes) / texbpp; } else if (byteOffset < 0) { int texelOffset = 8 * byteOffset / texBitsPerPixel; // We don't support negative Y offsets, and negative X offsets are only for the Killzone workaround. @@ -1066,7 +1076,7 @@ bool TextureCacheCommon::MatchFramebuffer( // Trying to play it safe. Below 0x04110000 is almost always framebuffers. // TODO: Maybe we can reduce this check and find a better way above 0x04110000? if (matchInfo->yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000 && !PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) { - WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset, framebuffer->width, framebuffer->height); + WARN_LOG_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset, framebuffer->width, framebuffer->height); return false; } @@ -1133,6 +1143,11 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) gstate_c.curTextureWidth = framebuffer->bufferWidth; gstate_c.curTextureHeight = framebuffer->bufferHeight; + if (candidate.channel == RASTER_COLOR && gstate.getTextureFormat() == GE_TFMT_CLUT8 && framebuffer->fb_format == GE_FORMAT_5551 && PSP_CoreParameter().compat.flags().SOCOMClut8Replacement) { + // See #16210. UV must be adjusted as if the texture was twice the width. + gstate_c.curTextureWidth *= 2.0f; + } + if (needsDepthXSwizzle) { gstate_c.curTextureWidth = RoundUpToPowerOf2(gstate_c.curTextureWidth); } @@ -2145,6 +2160,7 @@ void TextureCacheCommon::ApplyTexture() { } } +// Can we depalettize at all? This refers to both in-fragment-shader depal and "traditional" depal through a separate pass. static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { if (IsClutFormat(texFormat)) { switch (bufferFormat) { @@ -2155,6 +2171,10 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma if (texFormat == GE_TFMT_CLUT16) { return true; } + if (texFormat == GE_TFMT_CLUT8 && bufferFormat == GE_FORMAT_5551 && PSP_CoreParameter().compat.flags().SOCOMClut8Replacement) { + // Wacky case from issue #16210 (SOCOM etc). + return true; + } break; case GE_FORMAT_8888: if (texFormat == GE_TFMT_CLUT32 || texFormat == GE_TFMT_CLUT8) { // clut8 takes a special depal mode. @@ -2214,7 +2234,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && clutRenderAddress_ == 0xFFFFFFFF && !gstate_c.curTextureIs3D && - draw_->GetShaderLanguageDesc().bitwiseOps; + draw_->GetShaderLanguageDesc().bitwiseOps && + !(texFormat == GE_TFMT_CLUT8 && framebuffer->fb_format == GE_FORMAT_5551); // socom switch (draw_->GetShaderLanguageDesc().shaderLanguage) { case ShaderLanguage::HLSL_D3D9: diff --git a/GPU/Common/TextureDecoder.cpp b/GPU/Common/TextureDecoder.cpp index a12be66cb9ba..32da2dffa90c 100644 --- a/GPU/Common/TextureDecoder.cpp +++ b/GPU/Common/TextureDecoder.cpp @@ -48,6 +48,25 @@ #define DO_NOT_VECTORIZE_LOOP #endif +const u8 textureBitsPerPixel[16] = { + 16, //GE_TFMT_5650, + 16, //GE_TFMT_5551, + 16, //GE_TFMT_4444, + 32, //GE_TFMT_8888, + 4, //GE_TFMT_CLUT4, + 8, //GE_TFMT_CLUT8, + 16, //GE_TFMT_CLUT16, + 32, //GE_TFMT_CLUT32, + 4, //GE_TFMT_DXT1, + 8, //GE_TFMT_DXT3, + 8, //GE_TFMT_DXT5, + 0, // INVALID, + 0, // INVALID, + 0, // INVALID, + 0, // INVALID, + 0, // INVALID, +}; + #ifdef _M_SSE static u32 QuickTexHashSSE2(const void *checkp, u32 size) { diff --git a/GPU/Common/TextureDecoder.h b/GPU/Common/TextureDecoder.h index ad54815ff7e5..4976bbd958d7 100644 --- a/GPU/Common/TextureDecoder.h +++ b/GPU/Common/TextureDecoder.h @@ -73,27 +73,16 @@ uint32_t GetDXT1Texel(const DXT1Block *src, int x, int y); uint32_t GetDXT3Texel(const DXT3Block *src, int x, int y); uint32_t GetDXT5Texel(const DXT5Block *src, int x, int y); -static const u8 textureBitsPerPixel[16] = { - 16, //GE_TFMT_5650, - 16, //GE_TFMT_5551, - 16, //GE_TFMT_4444, - 32, //GE_TFMT_8888, - 4, //GE_TFMT_CLUT4, - 8, //GE_TFMT_CLUT8, - 16, //GE_TFMT_CLUT16, - 32, //GE_TFMT_CLUT32, - 4, //GE_TFMT_DXT1, - 8, //GE_TFMT_DXT3, - 8, //GE_TFMT_DXT5, - 0, // INVALID, - 0, // INVALID, - 0, // INVALID, - 0, // INVALID, - 0, // INVALID, -}; +extern const u8 textureBitsPerPixel[16]; u32 GetTextureBufw(int level, u32 texaddr, GETextureFormat format); +// WARNING: Bits not bytes, this is needed due to the presence of 4 - bit formats. +inline u32 TextureFormatBitsPerPixel(GETextureFormat format) { + u32 bits = textureBitsPerPixel[(int)format]; + return bits != 0 ? bits : 1; // Best to return 1 here to survive divisions in case of invalid data. +} + inline bool AlphaSumIsFull(u32 alphaSum, u32 fullAlphaMask) { return fullAlphaMask != 0 && (alphaSum & fullAlphaMask) == fullAlphaMask; } diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 027c7def9fde..ff9478ca240d 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -209,7 +209,7 @@ class ShaderManagerGLES : public ShaderManagerCommon { GLRenderManager *render_; LinkedShaderCache linkedShaderCache_; - bool lastVShaderSame_; + bool lastVShaderSame_ = false; FShaderID lastFSID_; VShaderID lastVSID_; diff --git a/assets/compat.ini b/assets/compat.ini index 97b6498e6332..3e7a9c782a37 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1555,3 +1555,16 @@ UCET00844 = true UCUS98705 = true UCED00971 = true UCUS98713 = true + +[SOCOMClut8Replacement] +# SOCOM and other games use CLUT8 with crafty sampling as if it was CLUT16. Issue #16210 +UCES00855 = true +UCUS98649 = true +NPUG70003 = true # demo +UCUS98714 = true # demo + +# SOCOM Fireteam Bravo 3 +UCES01242 = true +NPHG00032 = true +UCUS98716 = true +NPEG90024 = true # demo