RPCS3 · kd-11 · Sep 9, 2022 · Sep 7, 2022 · Sep 7, 2022 · Sep 8, 2022
diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp
@@ -467,7 +467,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
 		for (auto& info : m_vertex_layout.interleaved_blocks)
 		{
 			const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
-			info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
+			info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info->base_offset), info->memory_location);
 		}
 	}
 

diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -308,7 +308,7 @@ void GLGSRender::on_init_thread()
 	}
 
 	//Occlusion query
-	for (u32 i = 0; i < occlusion_query_count; ++i)
+	for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
 	{
 		GLuint handle = 0;
 		auto &query = m_occlusion_query_data[i];
@@ -484,7 +484,7 @@ void GLGSRender::on_exit()
 
 	m_shader_interpreter.destroy();
 
-	for (u32 i = 0; i < occlusion_query_count; ++i)
+	for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
 	{
 		auto &query = m_occlusion_query_data[i];
 		query.active = false;

diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp
@@ -127,7 +127,7 @@ namespace
 		vertex_input_state operator()(const rsx::draw_inlined_array& /*command*/)
 		{
 			const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size();
-			const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
+			const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0]->attribute_stride;
 
 			if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
 			{
@@ -192,8 +192,8 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
 		if (m_vertex_layout.interleaved_blocks.size() == 1 &&
 			rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
 		{
-			const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
-			storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
+			const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
+			storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;
 
 			if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
 			{

diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp
@@ -21,6 +21,7 @@ namespace rsx
 	{
 		FIFO_control::FIFO_control(::rsx::thread* pctrl)
 		{
+			m_thread = pctrl;
 			m_ctrl = pctrl->ctrl;
 			m_iotable = &pctrl->iomap_table;
 		}
@@ -53,7 +54,7 @@ namespace rsx
 
 				while (read_put() == m_internal_get && !Emu.IsStopped())
 				{
-					get_current_renderer()->cpu_wait({});
+					m_thread->cpu_wait({});
 				}
 			}
 		}
@@ -118,12 +119,17 @@ namespace rsx
 					m_cache_size = put - m_cache_addr;
 				}
 
-				rsx::reservation_lock<true, 1> rsx_lock(addr1, m_cache_size, true);
+				// Atomic FIFO debug options
+				const bool force_cache_fill = g_cfg.core.rsx_fifo_accuracy == rsx_fifo_mode::atomic_ordered;
+				const bool strict_fetch_ordering = g_cfg.core.rsx_fifo_accuracy >= rsx_fifo_mode::atomic_ordered;
 
+				rsx::reservation_lock<true, 1> rsx_lock(addr1, m_cache_size, true);
 				const auto src = vm::_ptr<spu_rdata_t>(addr1);
 
-				// Find the next set bit after every iteration
 				u64 start_time = 0;
+				u32 bytes_read = 0;
+
+				// Find the next set bit after every iteration
 				for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
 				{
 					// If a reservation is being updated, try to load another
@@ -144,42 +150,51 @@ namespace rsx
 								break;
 							}
 
+							bytes_read += 128;
 							continue;
 						}
 					}
 
 					if (!start_time)
 					{
+						if (bytes_read >= 256 && !force_cache_fill)
+						{
+							// Cut our losses if we have something to work with.
+							// This is the first time falling out of the reservation loop above, so we have clean data with no holes.
+							m_cache_size = bytes_read;
+							break;
+						}
+
 						start_time = rsx::uclock();
 					}
 
-					if (rsx::uclock() - start_time >= 50u)
+					auto now = rsx::uclock();
+					if (now - start_time >= 50u)
 					{
-						const auto rsx = get_current_renderer();
-
-						if (rsx->is_stopped())
+						if (m_thread->is_stopped())
 						{
 							return {};
 						}
 
-						rsx->cpu_wait({});
+						m_thread->cpu_wait({});
 
-						// Add idle time in reverse: after exchnage start_time becomes uclock(), use substruction because of the reversed order of parameters
-						const u64 _start = std::exchange(start_time, rsx::uclock());
-						rsx->performance_counters.idle_time -= _start - start_time;
+						const auto then = std::exchange(now, rsx::uclock());
+						start_time = now;
+						m_thread->performance_counters.idle_time += now - then;
+					}
+					else
+					{
+						busy_wait(200);
 					}
 
-					busy_wait(200);
-
-					if (g_cfg.core.rsx_fifo_accuracy >= rsx_fifo_mode::atomic_ordered)
+					if (strict_fetch_ordering)
 					{
 						i = (i - 1) % 8;
 					}
 				}
 			}
 
-			be_t<u32> ret;
-			std::memcpy(&ret, reinterpret_cast<const u8*>(&m_cache) + (addr - m_cache_addr), sizeof(u32));
+			const auto ret = utils::bless<const be_t<u32>>(&m_cache)[(addr - m_cache_addr) >> 2];
 			return {true, ret};
 		}
 
@@ -221,15 +236,15 @@ namespace rsx
 				bool ok{};
 				u32 arg = 0;
 
-				if (g_cfg.core.rsx_fifo_accuracy)
+				if (g_cfg.core.rsx_fifo_accuracy) [[ unlikely ]]
 				{
 					std::tie(ok, arg) = fetch_u32(m_internal_get + 4);
 
 					if (!ok)
 					{
 						if (arg == FIFO_ERROR)
 						{
-							get_current_renderer()->recover_fifo();
+							m_thread->recover_fifo();
 						}
 
 						return false;
@@ -311,7 +326,7 @@ namespace rsx
 				m_memwatch_cmp = 0;
 			}
 
-			if (!g_cfg.core.rsx_fifo_accuracy)
+			if (!g_cfg.core.rsx_fifo_accuracy) [[ likely ]]
 			{
 				const u32 put = read_put();
 

diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h
@@ -113,6 +113,7 @@ namespace rsx
 		class FIFO_control
 		{
 		private:
+			mutable rsx::thread* m_thread;
 			RsxDmaControl* m_ctrl = nullptr;
 			const rsx::rsx_iomap_table* m_iotable;
 			u32 m_internal_get = 0;
@@ -129,6 +130,7 @@ namespace rsx
 			u32 m_cache_addr = 0;
 			u32 m_cache_size = 0;
 			alignas(64) std::byte m_cache[8][128];
+
 		public:
 			FIFO_control(rsx::thread* pctrl);
 			~FIFO_control() = default;

diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -1933,9 +1933,8 @@ namespace rsx
 
 		if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
 		{
-			interleaved_range_info info = {};
+			interleaved_range_info& info = *result.alloc_interleaved_block();
 			info.interleaved = true;
-			info.locations.reserve(8);
 
 			for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
 			{
@@ -1963,7 +1962,7 @@ namespace rsx
 			if (info.attribute_stride)
 			{
 				// At least one array feed must be enabled for vertex input
-				result.interleaved_blocks.emplace_back(std::move(info));
+				result.interleaved_blocks.push_back(&info);
 			}
 
 			return;
@@ -2030,21 +2029,21 @@ namespace rsx
 
 				for (auto &block : result.interleaved_blocks)
 				{
-					if (block.single_vertex)
+					if (block->single_vertex)
 					{
 						//Single vertex definition, continue
 						continue;
 					}
 
-					if (block.attribute_stride != info.stride())
+					if (block->attribute_stride != info.stride())
 					{
 						//Stride does not match, continue
 						continue;
 					}
 
-					if (base_address > block.base_offset)
+					if (base_address > block->base_offset)
 					{
-						const u32 diff = base_address - block.base_offset;
+						const u32 diff = base_address - block->base_offset;
 						if (diff > info.stride())
 						{
 							//Not interleaved, continue
@@ -2053,26 +2052,26 @@ namespace rsx
 					}
 					else
 					{
-						const u32 diff = block.base_offset - base_address;
+						const u32 diff = block->base_offset - base_address;
 						if (diff > info.stride())
 						{
 							//Not interleaved, continue
 							continue;
 						}
 
 						//Matches, and this address is lower than existing
-						block.base_offset = base_address;
+						block->base_offset = base_address;
 					}
 
 					alloc_new_block = false;
-					block.locations.push_back({ index, modulo, info.frequency() });
-					block.interleaved = true;
+					block->locations.push_back({ index, modulo, info.frequency() });
+					block->interleaved = true;
 					break;
 				}
 
 				if (alloc_new_block)
 				{
-					interleaved_range_info block = {};
+					interleaved_range_info& block = *result.alloc_interleaved_block();
 					block.base_offset = base_address;
 					block.attribute_stride = info.stride();
 					block.memory_location = info.offset() >> 31;
@@ -2085,15 +2084,15 @@ namespace rsx
 						block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
 					}
 
-					result.interleaved_blocks.emplace_back(std::move(block));
+					result.interleaved_blocks.push_back(&block);
 				}
 			}
 		}
 
 		for (auto &info : result.interleaved_blocks)
 		{
 			//Calculate real data address to be used during upload
-			info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info.base_offset), info.memory_location);
+			info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
 		}
 	}
 
@@ -2353,7 +2352,7 @@ namespace rsx
 		{
 			for (const auto &block : layout.interleaved_blocks)
 			{
-				volatile_memory_size += block.attribute_stride * vertex_count;
+				volatile_memory_size += block->attribute_stride * vertex_count;
 			}
 		}
 		else
@@ -2400,7 +2399,7 @@ namespace rsx
 		{
 			const auto &block = layout.interleaved_blocks[0];
 			u32 inline_data_offset = volatile_offset;
-			for (const auto& attrib : block.locations)
+			for (const auto& attrib : block->locations)
 			{
 				auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];
 
@@ -2412,14 +2411,14 @@ namespace rsx
 		{
 			for (const auto &block : layout.interleaved_blocks)
 			{
-				for (const auto& attrib : block.locations)
+				for (const auto& attrib : block->locations)
 				{
 					const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
-					offset_in_block[attrib.index] = persistent_offset + (local_address - block.base_offset);
+					offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
 				}
 
-				const auto range = block.calculate_required_range(first_vertex, vertex_count);
-				persistent_offset += block.attribute_stride * range.second;
+				const auto range = block->calculate_required_range(first_vertex, vertex_count);
+				persistent_offset += block->attribute_stride * range.second;
 			}
 		}
 
@@ -2484,7 +2483,7 @@ namespace rsx
 						type = info.type();
 						size = info.size();
 
-						attrib0 = layout.interleaved_blocks[0].attribute_stride | default_frequency_mask;
+						attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
 					}
 				}
 				else
@@ -2624,12 +2623,12 @@ namespace rsx
 		{
 			for (const auto &block : layout.interleaved_blocks)
 			{
-				auto range = block.calculate_required_range(first_vertex, vertex_count);
+				auto range = block->calculate_required_range(first_vertex, vertex_count);
 
-				const u32 data_size = range.second * block.attribute_stride;
-				const u32 vertex_base = range.first * block.attribute_stride;
+				const u32 data_size = range.second * block->attribute_stride;
+				const u32 vertex_base = range.first * block->attribute_stride;
 
-				g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block.real_offset_address) + vertex_base, data_size);
+				g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
 				persistent += data_size;
 			}
 		}