diff --git a/GPU/Software/DrawPixel.cpp b/GPU/Software/DrawPixel.cpp index 3585c9a3ff12..ab244aacf812 100644 --- a/GPU/Software/DrawPixel.cpp +++ b/GPU/Software/DrawPixel.cpp @@ -325,6 +325,10 @@ static inline bool DepthTestPassed(GEComparison func, int x, int y, int stride, } } +bool CheckDepthTestPassed(GEComparison func, int x, int y, int stride, u16 z) { + return DepthTestPassed(func, x, y, stride, z); +} + static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) { // All of the operations here intentionally preserve alpha/stencil. switch (op) { @@ -400,7 +404,7 @@ template void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) { Vec4 prim_color = Vec4(color_in).Clamp(0, 255); // Depth range test - applied in clear mode, if not through mode. - if (pixelID.applyDepthRange) + if (pixelID.applyDepthRange && !pixelID.earlyZChecks) if (z < pixelID.cached.minz || z > pixelID.cached.maxz) return; @@ -436,14 +440,14 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo } // Also apply depth at the same time. If disabled, same as passing. - if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { + if (!pixelID.earlyZChecks && pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZFail(), stencil); SetPixelStencil(fbFormat, pixelID.cached.framebufStride, targetWriteMask, x, y, stencil); return; } stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZPass(), stencil); - } else { + } else if (!pixelID.earlyZChecks) { if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { return; } diff --git a/GPU/Software/DrawPixel.h b/GPU/Software/DrawPixel.h index b1063d59a15e..982ee2776531 100644 --- a/GPU/Software/DrawPixel.h +++ b/GPU/Software/DrawPixel.h @@ -41,6 +41,8 @@ SingleFunc GetSingleFunc(const PixelFuncID &id); void Init(); void Shutdown(); +bool CheckDepthTestPassed(GEComparison func, int x, int y, int stride, u16 z); + bool DescribeCodePtr(const u8 *ptr, std::string &name); struct PixelBlendState { diff --git a/GPU/Software/DrawPixelX86.cpp b/GPU/Software/DrawPixelX86.cpp index 536f1efb0214..8f006bece4fd 100644 --- a/GPU/Software/DrawPixelX86.cpp +++ b/GPU/Software/DrawPixelX86.cpp @@ -149,7 +149,7 @@ RegCache::Reg PixelJitCache::GetColorOff(const PixelFuncID &id) { if (!regCache_.Has(RegCache::GEN_COLOR_OFF)) { Describe("GetColorOff"); if (id.useStandardStride && !id.dithering) { - bool loadDepthOff = id.depthWrite || id.DepthTestFunc() != GE_COMP_ALWAYS; + bool loadDepthOff = id.depthWrite || (id.DepthTestFunc() != GE_COMP_ALWAYS && !id.earlyZChecks); X64Reg depthTemp = INVALID_REG; X64Reg argYReg = regCache_.Find(RegCache::GEN_ARG_Y); X64Reg argXReg = regCache_.Find(RegCache::GEN_ARG_X); @@ -345,7 +345,7 @@ void PixelJitCache::WriteConstantPool(const PixelFuncID &id) { } bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) { - if (id.applyDepthRange) { + if (id.applyDepthRange && !id.earlyZChecks) { Describe("ApplyDepthR"); X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z); X64Reg idReg = GetPixelID(); @@ -365,7 +365,7 @@ bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) { // Since this is early on, try to free up the z reg if we don't need it anymore. if (id.clearMode && !id.DepthClear()) regCache_.ForceRelease(RegCache::GEN_ARG_Z); - else if (!id.clearMode && !id.depthWrite && id.DepthTestFunc() == GE_COMP_ALWAYS) + else if (!id.clearMode && !id.depthWrite && (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks)) regCache_.ForceRelease(RegCache::GEN_ARG_Z); return true; @@ -721,7 +721,7 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil } bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg stencilReg) { - if (id.DepthTestFunc() == GE_COMP_ALWAYS) + if (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks) return true; X64Reg depthOffReg = GetDepthOff(id); @@ -964,7 +964,7 @@ bool PixelJitCache::Jit_WriteStencilOnly(const PixelFuncID &id, RegCache::Reg st } bool PixelJitCache::Jit_DepthTest(const PixelFuncID &id) { - if (id.DepthTestFunc() == GE_COMP_ALWAYS) + if (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks) return true; if (id.DepthTestFunc() == GE_COMP_NEVER) { diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp index 084c8978296d..0471114caae7 100644 --- a/GPU/Software/FuncId.cpp +++ b/GPU/Software/FuncId.cpp @@ -163,6 +163,13 @@ void ComputePixelFuncID(PixelFuncID *id, bool throughMode) { id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY; id->applyFog = gstate.isFogEnabled() && !throughMode; + + id->earlyZChecks = id->DepthTestFunc() != GE_COMP_ALWAYS; + if (id->stencilTest && id->earlyZChecks) { + // Can't do them early if stencil might need to write. + if (id->SFail() != GE_STENCILOP_KEEP || id->ZFail() != GE_STENCILOP_KEEP) + id->earlyZChecks = false; + } } // Cache some values for later convenience. diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h index 72207ebd5abc..46307fc55cbb 100644 --- a/GPU/Software/FuncId.h +++ b/GPU/Software/FuncId.h @@ -107,7 +107,8 @@ struct PixelFuncID { uint8_t sFail : 3; uint8_t zFail : 3; uint8_t zPass : 3; - // 60 bits, 4 free. + bool earlyZChecks : 1; + // 61 bits, 3 free. }; }; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index a387e3b943f4..c534d3779280 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -710,6 +710,11 @@ void DrawTriangleSlice( const bool flatColor1 = flatColorAll || (v0.color1 == v1.color1 && v0.color1 == v2.color1); const bool noFog = clearMode || !pixelID.applyFog || (v0.fogdepth >= 1.0f && v1.fogdepth >= 1.0f && v2.fogdepth >= 1.0f); + if (pixelID.applyDepthRange && flatZ) { + if (v0.screenpos.z < pixelID.cached.minz || v0.screenpos.z > pixelID.cached.maxz) + return; + } + #if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC) uint32_t bpp = pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2; std::string tag = StringFromFormat("DisplayListT_%08x", state.listPC); @@ -754,6 +759,32 @@ void DrawTriangleSlice( if (AnyMask(mask)) { Vec4 wsum_recip = EdgeRecip(w0, w1, w2); + Vec4 z; + if (flatZ) { + z = Vec4::AssignToAll(v2.screenpos.z); + } else { + // Z is interpolated pretty much directly. + Vec4 zfloats = w0.Cast() * v0.screenpos.z + w1.Cast() * v1.screenpos.z + w2.Cast() * v2.screenpos.z; + z = (zfloats * wsum_recip).Cast(); + } + + if (pixelID.earlyZChecks) { + for (int i = 0; i < 4; ++i) { + if (pixelID.applyDepthRange) { + if (z[i] < pixelID.cached.minz || z[i] > pixelID.cached.maxz) + mask[i] = -1; + } + if (mask[i] < 0) + continue; + + int x = p.x + (i & 1); + int y = p.y + (i / 2); + if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z[i])) { + mask[i] = -1; + } + } + } + // Color interpolation is not perspective corrected on the PSP. Vec4 prim_color[4]; if (!flatColor0) { @@ -816,15 +847,6 @@ void DrawTriangleSlice( } } - Vec4 z; - if (flatZ) { - z = Vec4::AssignToAll(v2.screenpos.z); - } else { - // Z is interpolated pretty much directly. - Vec4 zfloats = w0.Cast() * v0.screenpos.z + w1.Cast() * v1.screenpos.z + w2.Cast() * v2.screenpos.z; - z = (zfloats * wsum_recip).Cast(); - } - PROFILE_THIS_SCOPE("draw_tri_px"); DrawingCoords subp = p; for (int i = 0; i < 4; ++i) { @@ -947,6 +969,12 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords & Vec4 z = Vec4::AssignToAll(v1.screenpos.z); Vec3 sec_color = v1.color1; + if (state.pixelID.applyDepthRange) { + // We can bail early since the Z is flat. + if (v1.screenpos.z < state.pixelID.cached.minz || v1.screenpos.z > state.pixelID.cached.maxz) + return; + } + #if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC) uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2; std::string tag = StringFromFormat("DisplayListR_%08x", state.listPC); @@ -972,6 +1000,19 @@ void DrawRectangle(const VertexData &v0, const VertexData &v1, const BinCoords & prim_color[i] = v1.color0; } + if (state.pixelID.earlyZChecks) { + for (int i = 0; i < 4; ++i) { + if (mask[i] < 0) + continue; + + int x = p.x + (i & 1); + int y = p.y + (i / 2); + if (!CheckDepthTestPassed(state.pixelID.DepthTestFunc(), x, y, state.pixelID.cached.depthbufStride, z[i])) { + mask[i] = -1; + } + } + } + if (state.enableTextures) { Vec4 s, t; s = Vec4::AssignToAll(st.s()) + sto4; @@ -1038,6 +1079,20 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta auto &pixelID = state.pixelID; auto &samplerID = state.samplerID; + DrawingCoords p = TransformUnit::ScreenToDrawing(pos); + u16 z = pos.z; + + if (pixelID.earlyZChecks) { + if (pixelID.applyDepthRange) { + if (z < pixelID.cached.minz || z > pixelID.cached.maxz) + return; + } + + if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), p.x, p.y, pixelID.cached.depthbufStride, z)) { + return; + } + } + if (state.enableTextures) { float s = v0.texturecoords.s(); float t = v0.texturecoords.t(); @@ -1060,9 +1115,6 @@ void DrawPoint(const VertexData &v0, const BinCoords &range, const RasterizerSta if (!pixelID.clearMode) prim_color += Vec4(sec_color, 0); - DrawingCoords p = TransformUnit::ScreenToDrawing(pos); - u16 z = pos.z; - u8 fog = 255; if (pixelID.applyFog) { fog = ClampFogDepth(v0.fogdepth); @@ -1302,7 +1354,23 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range double z = a.z; const int steps1 = steps == 0 ? 1 : steps; for (int i = 0; i < steps; i++) { - if (x >= range.x1 && y >= range.y1 && x <= range.x2 && y <= range.y2) { + DrawingCoords p = TransformUnit::ScreenToDrawing(x, y); + + bool maskOK = x >= range.x1 && y >= range.y1 && x <= range.x2 && y <= range.y2; + if (maskOK) { + if (pixelID.earlyZChecks) { + if (pixelID.applyDepthRange) { + if (z < pixelID.cached.minz || z > pixelID.cached.maxz) + maskOK = false; + } + + if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { + maskOK = false; + } + } + } + + if (maskOK) { // Interpolate between the two points. Vec4 prim_color; Vec3 sec_color; @@ -1368,7 +1436,6 @@ void DrawLine(const VertexData &v0, const VertexData &v1, const BinCoords &range prim_color += Vec4(sec_color, 0); PROFILE_THIS_SCOPE("draw_px"); - DrawingCoords p = TransformUnit::ScreenToDrawing(x, y); state.drawPixel(p.x, p.y, z, fog, ToVec4IntArg(prim_color), pixelID); #if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)