Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsx: Misc optimizations and fixes #12632

Merged
merged 6 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rpcs3/Emu/RSX/GL/GLDraw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ void GLGSRender::emit_geometry(u32 sub_index)
for (auto& info : m_vertex_layout.interleaved_blocks)
{
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info->base_offset), info->memory_location);
}
}

Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/RSX/GL/GLGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ void GLGSRender::on_init_thread()
}

//Occlusion query
for (u32 i = 0; i < occlusion_query_count; ++i)
for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
{
GLuint handle = 0;
auto &query = m_occlusion_query_data[i];
Expand Down Expand Up @@ -484,7 +484,7 @@ void GLGSRender::on_exit()

m_shader_interpreter.destroy();

for (u32 i = 0; i < occlusion_query_count; ++i)
for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
{
auto &query = m_occlusion_query_data[i];
query.active = false;
Expand Down
6 changes: 3 additions & 3 deletions rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ namespace
vertex_input_state operator()(const rsx::draw_inlined_array& /*command*/)
{
const auto stream_length = rsx::method_registers.current_draw_clause.inline_vertex_array.size();
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0].attribute_stride;
const u32 vertex_count = u32(stream_length * sizeof(u32)) / m_vertex_layout.interleaved_blocks[0]->attribute_stride;

if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
Expand Down Expand Up @@ -192,8 +192,8 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
if (m_vertex_layout.interleaved_blocks.size() == 1 &&
rsx::method_registers.current_draw_clause.command != rsx::draw_command::inlined_array)
{
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0].attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + data_offset;
const auto data_offset = (vertex_base * m_vertex_layout.interleaved_blocks[0]->attribute_stride);
storage_address = m_vertex_layout.interleaved_blocks[0]->real_offset_address + data_offset;

if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
{
Expand Down
53 changes: 34 additions & 19 deletions rpcs3/Emu/RSX/RSXFIFO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace rsx
{
FIFO_control::FIFO_control(::rsx::thread* pctrl)
{
m_thread = pctrl;
m_ctrl = pctrl->ctrl;
m_iotable = &pctrl->iomap_table;
}
Expand Down Expand Up @@ -53,7 +54,7 @@ namespace rsx

while (read_put() == m_internal_get && !Emu.IsStopped())
{
get_current_renderer()->cpu_wait({});
m_thread->cpu_wait({});
}
}
}
Expand Down Expand Up @@ -118,12 +119,17 @@ namespace rsx
m_cache_size = put - m_cache_addr;
}

rsx::reservation_lock<true, 1> rsx_lock(addr1, m_cache_size, true);
// Atomic FIFO debug options
const bool force_cache_fill = g_cfg.core.rsx_fifo_accuracy == rsx_fifo_mode::atomic_ordered;
const bool strict_fetch_ordering = g_cfg.core.rsx_fifo_accuracy >= rsx_fifo_mode::atomic_ordered;

rsx::reservation_lock<true, 1> rsx_lock(addr1, m_cache_size, true);
const auto src = vm::_ptr<spu_rdata_t>(addr1);

// Find the next set bit after every iteration
u64 start_time = 0;
u32 bytes_read = 0;

// Find the next set bit after every iteration
for (int i = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
{
// If a reservation is being updated, try to load another
Expand All @@ -144,42 +150,51 @@ namespace rsx
break;
}

bytes_read += 128;
continue;
}
}

if (!start_time)
{
if (bytes_read >= 256 && !force_cache_fill)
{
// Cut our losses if we have something to work with.
// This is the first time falling out of the reservation loop above, so we have clean data with no holes.
m_cache_size = bytes_read;
break;
}

start_time = rsx::uclock();
}

if (rsx::uclock() - start_time >= 50u)
auto now = rsx::uclock();
if (now - start_time >= 50u)
{
const auto rsx = get_current_renderer();

if (rsx->is_stopped())
if (m_thread->is_stopped())
{
return {};
}

rsx->cpu_wait({});
m_thread->cpu_wait({});

// Add idle time in reverse: after exchnage start_time becomes uclock(), use substruction because of the reversed order of parameters
const u64 _start = std::exchange(start_time, rsx::uclock());
rsx->performance_counters.idle_time -= _start - start_time;
const auto then = std::exchange(now, rsx::uclock());
start_time = now;
m_thread->performance_counters.idle_time += now - then;
}
else
{
busy_wait(200);
}

busy_wait(200);

if (g_cfg.core.rsx_fifo_accuracy >= rsx_fifo_mode::atomic_ordered)
if (strict_fetch_ordering)
{
i = (i - 1) % 8;
}
}
}

be_t<u32> ret;
std::memcpy(&ret, reinterpret_cast<const u8*>(&m_cache) + (addr - m_cache_addr), sizeof(u32));
const auto ret = utils::bless<const be_t<u32>>(&m_cache)[(addr - m_cache_addr) >> 2];
return {true, ret};
}

Expand Down Expand Up @@ -221,15 +236,15 @@ namespace rsx
bool ok{};
u32 arg = 0;

if (g_cfg.core.rsx_fifo_accuracy)
if (g_cfg.core.rsx_fifo_accuracy) [[ unlikely ]]
{
std::tie(ok, arg) = fetch_u32(m_internal_get + 4);

if (!ok)
{
if (arg == FIFO_ERROR)
{
get_current_renderer()->recover_fifo();
m_thread->recover_fifo();
}

return false;
Expand Down Expand Up @@ -311,7 +326,7 @@ namespace rsx
m_memwatch_cmp = 0;
}

if (!g_cfg.core.rsx_fifo_accuracy)
if (!g_cfg.core.rsx_fifo_accuracy) [[ likely ]]
{
const u32 put = read_put();

Expand Down
2 changes: 2 additions & 0 deletions rpcs3/Emu/RSX/RSXFIFO.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ namespace rsx
class FIFO_control
{
private:
mutable rsx::thread* m_thread;
RsxDmaControl* m_ctrl = nullptr;
const rsx::rsx_iomap_table* m_iotable;
u32 m_internal_get = 0;
Expand All @@ -129,6 +130,7 @@ namespace rsx
u32 m_cache_addr = 0;
u32 m_cache_size = 0;
alignas(64) std::byte m_cache[8][128];

public:
FIFO_control(rsx::thread* pctrl);
~FIFO_control() = default;
Expand Down
49 changes: 24 additions & 25 deletions rpcs3/Emu/RSX/RSXThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1933,9 +1933,8 @@ namespace rsx

if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
{
interleaved_range_info info = {};
interleaved_range_info& info = *result.alloc_interleaved_block();
info.interleaved = true;
info.locations.reserve(8);

for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
Expand Down Expand Up @@ -1963,7 +1962,7 @@ namespace rsx
if (info.attribute_stride)
{
// At least one array feed must be enabled for vertex input
result.interleaved_blocks.emplace_back(std::move(info));
result.interleaved_blocks.push_back(&info);
}

return;
Expand Down Expand Up @@ -2030,21 +2029,21 @@ namespace rsx

for (auto &block : result.interleaved_blocks)
{
if (block.single_vertex)
if (block->single_vertex)
{
//Single vertex definition, continue
continue;
}

if (block.attribute_stride != info.stride())
if (block->attribute_stride != info.stride())
{
//Stride does not match, continue
continue;
}

if (base_address > block.base_offset)
if (base_address > block->base_offset)
{
const u32 diff = base_address - block.base_offset;
const u32 diff = base_address - block->base_offset;
if (diff > info.stride())
{
//Not interleaved, continue
Expand All @@ -2053,26 +2052,26 @@ namespace rsx
}
else
{
const u32 diff = block.base_offset - base_address;
const u32 diff = block->base_offset - base_address;
if (diff > info.stride())
{
//Not interleaved, continue
continue;
}

//Matches, and this address is lower than existing
block.base_offset = base_address;
block->base_offset = base_address;
}

alloc_new_block = false;
block.locations.push_back({ index, modulo, info.frequency() });
block.interleaved = true;
block->locations.push_back({ index, modulo, info.frequency() });
block->interleaved = true;
break;
}

if (alloc_new_block)
{
interleaved_range_info block = {};
interleaved_range_info& block = *result.alloc_interleaved_block();
block.base_offset = base_address;
block.attribute_stride = info.stride();
block.memory_location = info.offset() >> 31;
Expand All @@ -2085,15 +2084,15 @@ namespace rsx
block.attribute_stride = rsx::get_vertex_type_size_on_host(info.type(), info.size());
}

result.interleaved_blocks.emplace_back(std::move(block));
result.interleaved_blocks.push_back(&block);
}
}
}

for (auto &info : result.interleaved_blocks)
{
//Calculate real data address to be used during upload
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info.base_offset), info.memory_location);
info->real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(state.vertex_data_base_offset(), info->base_offset), info->memory_location);
}
}

Expand Down Expand Up @@ -2353,7 +2352,7 @@ namespace rsx
{
for (const auto &block : layout.interleaved_blocks)
{
volatile_memory_size += block.attribute_stride * vertex_count;
volatile_memory_size += block->attribute_stride * vertex_count;
}
}
else
Expand Down Expand Up @@ -2400,7 +2399,7 @@ namespace rsx
{
const auto &block = layout.interleaved_blocks[0];
u32 inline_data_offset = volatile_offset;
for (const auto& attrib : block.locations)
for (const auto& attrib : block->locations)
{
auto &info = rsx::method_registers.vertex_arrays_info[attrib.index];

Expand All @@ -2412,14 +2411,14 @@ namespace rsx
{
for (const auto &block : layout.interleaved_blocks)
{
for (const auto& attrib : block.locations)
for (const auto& attrib : block->locations)
{
const u32 local_address = (rsx::method_registers.vertex_arrays_info[attrib.index].offset() & 0x7fffffff);
offset_in_block[attrib.index] = persistent_offset + (local_address - block.base_offset);
offset_in_block[attrib.index] = persistent_offset + (local_address - block->base_offset);
}

const auto range = block.calculate_required_range(first_vertex, vertex_count);
persistent_offset += block.attribute_stride * range.second;
const auto range = block->calculate_required_range(first_vertex, vertex_count);
persistent_offset += block->attribute_stride * range.second;
}
}

Expand Down Expand Up @@ -2484,7 +2483,7 @@ namespace rsx
type = info.type();
size = info.size();

attrib0 = layout.interleaved_blocks[0].attribute_stride | default_frequency_mask;
attrib0 = layout.interleaved_blocks[0]->attribute_stride | default_frequency_mask;
}
}
else
Expand Down Expand Up @@ -2624,12 +2623,12 @@ namespace rsx
{
for (const auto &block : layout.interleaved_blocks)
{
auto range = block.calculate_required_range(first_vertex, vertex_count);
auto range = block->calculate_required_range(first_vertex, vertex_count);

const u32 data_size = range.second * block.attribute_stride;
const u32 vertex_base = range.first * block.attribute_stride;
const u32 data_size = range.second * block->attribute_stride;
const u32 vertex_base = range.first * block->attribute_stride;

g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block.real_offset_address) + vertex_base, data_size);
g_fxo->get<rsx::dma_manager>().copy(persistent, vm::_ptr<char>(block->real_offset_address) + vertex_base, data_size);
persistent += data_size;
}
}
Expand Down
Loading