diff --git a/CMakeLists.txt b/CMakeLists.txt index 21acab6..2229085 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,11 +61,13 @@ check_ipo_supported(RESULT LTO_SUPPORTED OUTPUT LTO_ERROR) # And this part tells CMake where to find and install the file itself add_executable(iris MACOSX_BUNDLE ${OSX_ICON} ${WIN_ICON}) -if (LTO_SUPPORTED) - message(STATUS "IPO/LTO enabled") - set_property(TARGET iris PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) -else() - message(STATUS "IPO/LTO not supported: ${LTO_ERROR}") +if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") + if (LTO_SUPPORTED) + message(STATUS "IPO/LTO enabled") + set_property(TARGET iris PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(STATUS "IPO/LTO not supported: ${LTO_ERROR}") + endif() endif() set_property(TARGET iris PROPERTY CXX_STANDARD 20) @@ -97,6 +99,7 @@ target_sources(iris PRIVATE frontend/ui/spu2.cpp frontend/ui/state.cpp frontend/ui/statusbar.cpp + frontend/ui/vu_disassembly.cpp src/ps2.c src/ps2_elf.c src/ps2_iso9660.c diff --git a/frontend/iris.cpp b/frontend/iris.cpp index 066e337..2900951 100644 --- a/frontend/iris.cpp +++ b/frontend/iris.cpp @@ -231,6 +231,7 @@ void update_window(iris::instance* iris) { if (iris->show_gs_debugger) show_gs_debugger(iris); if (iris->show_spu2_debugger) show_spu2_debugger(iris); if (iris->show_memory_viewer) show_memory_viewer(iris); + if (iris->show_vu_disassembler) show_vu_disassembler(iris); if (iris->show_status_bar && !iris->fullscreen) show_status_bar(iris); if (iris->show_breakpoints) show_breakpoints(iris); if (iris->show_about_window) show_about_window(iris); diff --git a/frontend/iris.hpp b/frontend/iris.hpp index 7e07d7d..95a34f1 100644 --- a/frontend/iris.hpp +++ b/frontend/iris.hpp @@ -143,6 +143,7 @@ struct instance { bool show_settings = false; bool show_memory_card_tool = false; bool show_imgui_demo = false; + bool show_vu_disassembler = false; // Special windows bool show_bios_setting_window = false; @@ -215,6 +216,7 @@ void show_iop_dma(iris::instance* iris); void show_gs_debugger(iris::instance* iris); void show_spu2_debugger(iris::instance* iris); void show_memory_viewer(iris::instance* iris); +void show_vu_disassembler(iris::instance* iris); void show_status_bar(iris::instance* iris); void show_breakpoints(iris::instance* iris); void show_about_window(iris::instance* iris); diff --git a/frontend/settings.cpp b/frontend/settings.cpp index 1cdc4a7..9ace447 100644 --- a/frontend/settings.cpp +++ b/frontend/settings.cpp @@ -99,6 +99,7 @@ int parse_toml_settings(iris::instance* iris) { iris->show_gs_debugger = debugger["show_gs_debugger"].value_or(false); iris->show_spu2_debugger = debugger["show_spu2_debugger"].value_or(false); iris->show_memory_viewer = debugger["show_memory_viewer"].value_or(false); + iris->show_vu_disassembler = debugger["show_vu_disassembler"].value_or(false); iris->show_status_bar = debugger["show_status_bar"].value_or(true); iris->show_breakpoints = debugger["show_breakpoints"].value_or(false); iris->show_imgui_demo = debugger["show_imgui_demo"].value_or(false); @@ -265,6 +266,7 @@ void close_settings(iris::instance* iris) { { "show_gs_debugger", iris->show_gs_debugger }, { "show_spu2_debugger", iris->show_spu2_debugger }, { "show_memory_viewer", iris->show_memory_viewer }, + { "show_vu_disassembler", iris->show_vu_disassembler }, { "show_status_bar", iris->show_status_bar }, { "show_breakpoints", iris->show_breakpoints }, { "show_imgui_demo", iris->show_imgui_demo } diff --git a/frontend/ui/menubar.cpp b/frontend/ui/menubar.cpp index 5a09ad9..4a0d931 100644 --- a/frontend/ui/menubar.cpp +++ b/frontend/ui/menubar.cpp @@ -302,38 +302,39 @@ void show_main_menubar(iris::instance* iris) { ImGui::EndMenu(); } if (BeginMenu("Tools")) { - if (MenuItem(ICON_MS_LINE_START_CIRCLE " ImGui Demo", NULL, &iris->show_imgui_demo)); + if (MenuItem(ICON_MS_BUILD " ImGui Demo", NULL, &iris->show_imgui_demo)); ImGui::EndMenu(); } if (BeginMenu("Debug")) { SeparatorText("EE"); // if (BeginMenu(ICON_MS_BUG_REPORT " EE")) { - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Control##ee", NULL, &iris->show_ee_control)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " State##ee", NULL, &iris->show_ee_state)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Logs##ee", NULL, &iris->show_ee_logs)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Interrupts##ee", NULL, &iris->show_ee_interrupts)); + if (MenuItem(ICON_MS_SETTINGS " Control##ee", NULL, &iris->show_ee_control)); + if (MenuItem(ICON_MS_EDIT_NOTE " State##ee", NULL, &iris->show_ee_state)); + if (MenuItem(ICON_MS_TERMINAL " Logs##ee", NULL, &iris->show_ee_logs)); + if (MenuItem(ICON_MS_BOLT " Interrupts##ee", NULL, &iris->show_ee_interrupts)); // ImGui::EndMenu(); // } SeparatorText("IOP"); // if (BeginMenu(ICON_MS_BUG_REPORT " IOP")) { - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Control##iop", NULL, &iris->show_iop_control)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " State##iop", NULL, &iris->show_iop_state)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Logs##iop", NULL, &iris->show_iop_logs)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Interrupts##iop", NULL, &iris->show_iop_interrupts)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Modules##iop", NULL, &iris->show_iop_modules)); + if (MenuItem(ICON_MS_SETTINGS " Control##iop", NULL, &iris->show_iop_control)); + if (MenuItem(ICON_MS_EDIT_NOTE " State##iop", NULL, &iris->show_iop_state)); + if (MenuItem(ICON_MS_TERMINAL " Logs##iop", NULL, &iris->show_iop_logs)); + if (MenuItem(ICON_MS_BOLT " Interrupts##iop", NULL, &iris->show_iop_interrupts)); + if (MenuItem(ICON_MS_EXTENSION " Modules##iop", NULL, &iris->show_iop_modules)); // ImGui::EndMenu(); // } Separator(); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Breakpoints", NULL, &iris->show_breakpoints)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " GS debugger", NULL, &iris->show_gs_debugger)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " SPU2 debugger", NULL, &iris->show_spu2_debugger)); - if (MenuItem(ICON_MS_LINE_START_CIRCLE " Memory viewer", NULL, &iris->show_memory_viewer)); + if (MenuItem(ICON_MS_BUG_REPORT " Breakpoints", NULL, &iris->show_breakpoints)); + if (MenuItem(ICON_MS_BRUSH " GS debugger", NULL, &iris->show_gs_debugger)); + if (MenuItem(ICON_MS_MUSIC_NOTE " SPU2 debugger", NULL, &iris->show_spu2_debugger)); + if (MenuItem(ICON_MS_MEMORY " Memory viewer", NULL, &iris->show_memory_viewer)); + if (MenuItem(ICON_MS_VIEW_IN_AR " VU disassembler", NULL, &iris->show_vu_disassembler)); Separator(); @@ -352,13 +353,14 @@ void show_main_menubar(iris::instance* iris) { iris->show_gs_debugger = false; iris->show_spu2_debugger = false; iris->show_memory_viewer = false; + iris->show_vu_disassembler = false; iris->show_breakpoints = false; } ImGui::EndMenu(); } if (BeginMenu("Help")) { - if (MenuItem(ICON_MS_LINE_START_CIRCLE " About")) { + if (MenuItem(ICON_MS_INFO " About")) { iris->show_about_window = true; } diff --git a/frontend/ui/vu_disassembly.cpp b/frontend/ui/vu_disassembly.cpp new file mode 100644 index 0000000..158afe3 --- /dev/null +++ b/frontend/ui/vu_disassembly.cpp @@ -0,0 +1,319 @@ +#include +#include +#include +#include + +#include "iris.hpp" + +#include "pfd/pfd.h" +#include "res/IconsMaterialSymbols.h" + +#include "ee/vu_dis.h" + +#define IM_RGB(r, g, b) ImVec4(((float)r / 255.0f), ((float)g / 255.0f), ((float)b / 255.0f), 1.0) + +namespace iris { + +struct vu_dis_state g_vu_dis_state = { 0 }; + +uint32_t addr = 0; +bool stop_at_e_bit = false; +bool disassemble_all = false; +bool add_padding = true; +bool compact_view = false; +bool show_address_opcode = true; + +void print_highlighted_vu1(const char* buf) { + using namespace ImGui; + + std::vector tokens; + + std::string text; + + while (*buf) { + text.clear(); + + if (isalpha(*buf)) { + while (isalpha(*buf) || isdigit(*buf) || (*buf == '.')) + text.push_back(*buf++); + } else if (isxdigit(*buf) || (*buf == '-')) { + while (isxdigit(*buf) || (*buf == 'x') || (*buf == '-')) + text.push_back(*buf++); + } else if (*buf == '$') { + while (*buf == '$' || isdigit(*buf) || isalpha(*buf) || *buf == '_') + text.push_back(*buf++); + } else if (*buf == ',') { + while (*buf == ',') + text.push_back(*buf++); + } else if (*buf == '(') { + while (*buf == '(') + text.push_back(*buf++); + } else if (*buf == ')') { + while (*buf == ')') + text.push_back(*buf++); + } else if (*buf == '<') { + while (*buf != '>') + text.push_back(*buf++); + + text.push_back(*buf++); + } else if (*buf == '_') { + text.push_back(*buf++); + } else if (*buf == '.') { + text.push_back(*buf++); + } else if (*buf == '+') { + text.push_back(*buf++); + } else if (*buf == '-') { + text.push_back(*buf++); + } else { + printf("unhandled char %c (%d) \"%s\"\n", *buf, *buf, buf); + + exit(1); + } + + while (isspace(*buf)) + text.push_back(*buf++); + + tokens.push_back(text); + } + + for (const std::string& t : tokens) { + if (isalpha(t[0])) { + TextColored(IM_RGB(211, 167, 30), "%s", t.c_str()); + } else if (isdigit(t[0]) || t[0] == '-') { + TextColored(IM_RGB(138, 143, 226), "%s", t.c_str()); + } else if (t[0] == '$') { + TextColored(IM_RGB(68, 169, 240), "%s", t.c_str()); + } else if (t[0] == '<') { + TextColored(IM_RGB(89, 89, 89), "%s", t.c_str()); + } else { + Text("%s", t.c_str()); + } + + SameLine(0.0f, 0.0f); + } + + NewLine(); +} + +static void show_vu_disassembly_view(iris::instance* iris, uint64_t* mem, size_t size) { + using namespace ImGui; + + PushFont(iris->font_code); + + if (BeginTable("table1", compact_view ? 2 : 3, ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Hideable)) { + PushFont(iris->font_small_code); + + TableSetupColumn(" Address/Opcode"); + TableSetupColumn(compact_view ? "Upper/Lower" : "Upper"); + + if (!compact_view) { + TableSetupColumn("Lower"); + } + + TableHeadersRow(); + PopFont(); + + int e_bit = 0; + + for (int row = disassemble_all ? 0 : addr; row < size; row++) { + g_vu_dis_state.addr = row * 8; + + TableNextRow(); + TableSetColumnIndex(0); + + uint64_t u = mem[row] >> 32; + uint64_t l = mem[row] & 0xffffffff; + + if (!compact_view) { + TextDisabled("%04x: %08x %08x", row, u, l); SameLine(); + } else { + TextDisabled("%04x: %08x", row, u); SameLine(); + } + + TableSetColumnIndex(1); + + char upper[512], lower[512]; + + g_vu_dis_state.addr = row; + + vu_disassemble_upper(upper, u, &g_vu_dis_state); + vu_disassemble_lower(lower, l, &g_vu_dis_state); + + if (add_padding && !compact_view) { + sprintf_s(upper, "%-40s", upper); + sprintf_s(lower, "%-40s", lower); + } + + print_highlighted_vu1(upper); + + if (!compact_view) { + TableSetColumnIndex(2); + } + + print_highlighted_vu1(lower); + + if (e_bit && stop_at_e_bit && !disassemble_all) break; + + e_bit = (u & 0x40000000) ? 1 : 0; + } + + EndTable(); + } + + PopFont(); +} + +void save_disassembly(FILE* file, uint64_t* mem, size_t size) { + int e_bit = 0; + + for (int row = disassemble_all ? 0 : addr; row < size; row++) { + g_vu_dis_state.addr = row * 8; + + uint64_t u = mem[row] >> 32; + uint64_t l = mem[row] & 0xffffffff; + + char upper[512], lower[512]; + + g_vu_dis_state.addr = row; + + vu_disassemble_upper(upper, u, &g_vu_dis_state); + vu_disassemble_lower(lower, l, &g_vu_dis_state); + + if (add_padding && !compact_view) { + sprintf_s(upper, "%-40s", upper); + sprintf_s(lower, "%-40s", lower); + } + + if (compact_view) { + fprintf(file, "%04x: %08x %s\n", row, u, upper); + fprintf(file, " %08x %s\n", l, lower); + } else { + fprintf(file, "%04x: %08x %08x %s %s\n", row, u, l, upper, lower); + } + + if (e_bit && stop_at_e_bit && !disassemble_all) break; + + e_bit = (u & 0x40000000) ? 1 : 0; + } +} + +void show_vu_disassembler(iris::instance* iris) { + using namespace ImGui; + + PushFont(iris->font_icons); + + if (Begin("VU disassembler", &iris->show_vu_disassembler, ImGuiWindowFlags_MenuBar)) { + if (BeginMenuBar()) { + if (BeginMenu("File")) { + if (MenuItem(ICON_MS_FILE_SAVE " Save disassembly as...", NULL)) { + + } + + ImGui::EndMenu(); + } + + if (BeginMenu("Settings")) { + MenuItem(ICON_MS_FORMAT_LETTER_SPACING_WIDER " Add padding", NULL, &add_padding); + MenuItem(ICON_MS_COLLAPSE_ALL " Compact view", NULL, &compact_view); + + ImGui::EndMenu(); + } + + EndMenuBar(); + } + + if (BeginTabBar("##vudistabbar", ImGuiTabBarFlags_Reorderable)) { + if (BeginTabItem("VU0")) { + BeginDisabled(disassemble_all); + AlignTextToFramePadding(); + Text("Address"); SameLine(); + + SetNextItemWidth(100.0f); + PushFont(iris->font_code); + + if (InputInt("##address", (int*)&addr, 0, 0, ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_EnterReturnsTrue | ImGuiInputTextFlags_EscapeClearsAll)); + + PopFont(); + + SameLine(); + Checkbox("Stop at E bit", &stop_at_e_bit); + + EndDisabled(); + + SameLine(); + Checkbox("Disassemble all", &disassemble_all); SameLine(); + if (Button(ICON_MS_SAVE)) { + pfd::save_file file("Save VU0 disassembly", "vu0.s", { "Text files", "*.txt" }); + + if (!file.result().empty()) { + FILE* f = fopen(file.result().c_str(), "w"); + + if (f) { + save_disassembly(f, iris->ps2->vu0->micro_mem, 512); + fclose(f); + } else { + pfd::message("Error", "Failed to open file for writing.", pfd::choice::ok, pfd::icon::error); + } + } + } + + SeparatorText("Disassembly"); + + if (BeginChild("vu0##disassembly")) { + show_vu_disassembly_view(iris, iris->ps2->vu0->micro_mem, 512); + } EndChild(); + + EndTabItem(); + } + + if (BeginTabItem("VU1")) { + BeginDisabled(disassemble_all); + AlignTextToFramePadding(); + Text("Address"); SameLine(); + + SetNextItemWidth(100.0f); + PushFont(iris->font_code); + + if (InputInt("##address", (int*)&addr, 0, 0, ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_EnterReturnsTrue | ImGuiInputTextFlags_EscapeClearsAll)); + + PopFont(); + + SameLine(); + Checkbox("Stop at E bit", &stop_at_e_bit); + + EndDisabled(); + + SameLine(); + Checkbox("Disassemble all", &disassemble_all); SameLine(); + if (Button(ICON_MS_SAVE)) { + pfd::save_file file("Save VU1 disassembly", "vu1.s", { "Text files", "*.txt" }); + + if (!file.result().empty()) { + FILE* f = fopen(file.result().c_str(), "w"); + + if (f) { + save_disassembly(f, iris->ps2->vu1->micro_mem, 2048); + fclose(f); + } else { + pfd::message("Error", "Failed to open file for writing.", pfd::choice::ok, pfd::icon::error); + } + } + } + + SeparatorText("Disassembly"); + + if (BeginChild("vu1##disassembly")) { + show_vu_disassembly_view(iris, iris->ps2->vu1->micro_mem, 2048); + } EndChild(); + + EndTabItem(); + } + + EndTabBar(); + } + } End(); + + PopFont(); +} + +} \ No newline at end of file diff --git a/main.cpp b/main.cpp index e2ccf2b..4a80731 100644 --- a/main.cpp +++ b/main.cpp @@ -478,9 +478,9 @@ SDL_AppResult SDL_AppIterate(void* appstate) { return SDL_APP_CONTINUE; } - - // Execute until vblank is over - while (ps2_gs_is_vblank(iris->ps2->gs)) { + + // Execute until VBlank + while (!ps2_gs_is_vblank(iris->ps2->gs)) { do_cycle(iris); if (iris->pause) { @@ -490,13 +490,16 @@ SDL_AppResult SDL_AppIterate(void* appstate) { } } + // Draw frame iris::update_window(iris); - - // Break on VBlank - while (!ps2_gs_is_vblank(iris->ps2->gs)) { + + // Execute until vblank is over + while (ps2_gs_is_vblank(iris->ps2->gs)) { do_cycle(iris); if (iris->pause) { + iris::update_window(iris); + return SDL_APP_CONTINUE; } } diff --git a/src/ee/bus.c b/src/ee/bus.c index 8a070c1..5ac75da 100644 --- a/src/ee/bus.c +++ b/src/ee/bus.c @@ -177,11 +177,13 @@ uint64_t ee_bus_read16(void* udata, uint32_t addr) { // MAP_MEM_READ(16, 0x30000000, 0x31FFFFFF, ram, ee_ram); // MAP_MEM_READ(16, 0x1C000000, 0x1C1FFFFF, ram, iop_ram); // MAP_MEM_READ(16, 0x1FC00000, 0x1FFFFFFF, bios, bios); + MAP_REG_READ(16, 0x10008000, 0x1000EFFF, dmac, dmac); + MAP_REG_READ(16, 0x1000F520, 0x1000F5FF, dmac, dmac); MAP_MEM_READ(16, 0x11000000, 0x11007FFF, vu, vu0); MAP_MEM_READ(16, 0x11008000, 0x1100FFFF, vu, vu1); MAP_MEM_READ(16, 0x1E000000, 0x1E3FFFFF, bios, rom1); MAP_MEM_READ(16, 0x1E400000, 0x1E7FFFFF, bios, rom2); - MAP_REG_READ(32, 0x10000000, 0x10001FFF, ee_timers, timers); + MAP_REG_READ(16, 0x10000000, 0x10001FFF, ee_timers, timers); if (addr == 0x1a000010) return 0xffff; @@ -262,7 +264,7 @@ uint64_t ee_bus_read32(void* udata, uint32_t addr) { } break; } - printf("bus: Unhandled 32-bit read from physical address 0x%08x\n", addr); + // printf("bus: Unhandled 32-bit read from physical address 0x%08x\n", addr); if ((addr & 0xffff0000) == 0xfffe0000) exit(1); @@ -369,9 +371,12 @@ void ee_bus_write16(void* udata, uint32_t addr, uint64_t data) { // MAP_MEM_WRITE(16, 0x30000000, 0x31FFFFFF, ram, ee_ram); // MAP_MEM_WRITE(16, 0x1C000000, 0x1C1FFFFF, ram, iop_ram); // MAP_MEM_WRITE(16, 0x1FC00000, 0x1FFFFFFF, bios, bios); + MAP_REG_WRITE(16, 0x10008000, 0x1000EFFF, dmac, dmac); + MAP_REG_WRITE(16, 0x1000F520, 0x1000F5FF, dmac, dmac); MAP_MEM_WRITE(16, 0x11000000, 0x11007FFF, vu, vu0); MAP_MEM_WRITE(16, 0x11008000, 0x1100FFFF, vu, vu1); MAP_REG_WRITE(16, 0x1000F000, 0x1000F01F, intc, intc); + MAP_REG_WRITE(16, 0x10000000, 0x10001FFF, ee_timers, timers); switch (addr) { case 0x1a000008: diff --git a/src/ee/dmac.c b/src/ee/dmac.c index 5779702..7f7dfe9 100644 --- a/src/ee/dmac.c +++ b/src/ee/dmac.c @@ -93,9 +93,9 @@ uint64_t ps2_dmac_read32(struct ps2_dmac* dmac, uint32_t addr) { if (c) { switch (addr & 0xff) { - case 0x00: return c->chcr; - case 0x10: return c->madr; - case 0x20: return c->qwc; + case 0x00: if (c == &dmac->ipu_to) printf("dmac: Read channel %s chcr=%08x\n", dmac_get_channel_name(dmac, addr), c->chcr); return c->chcr; + case 0x10: if (c == &dmac->ipu_to) printf("dmac: Read channel %s madr=%08x\n", dmac_get_channel_name(dmac, addr), c->madr); return c->madr; + case 0x20: if (c == &dmac->ipu_to) printf("dmac: Read channel %s qwc=%08x\n", dmac_get_channel_name(dmac, addr), c->qwc); return c->qwc; case 0x30: return c->tadr; case 0x40: return c->asr0; case 0x50: return c->asr1; @@ -145,6 +145,7 @@ static inline void dmac_process_source_tag(struct ps2_dmac* dmac, struct dmac_ch // ); c->tag.end = 0; + c->qwc = c->tag.qwc; switch (c->tag.id) { case 0: { // REFE tag @@ -228,6 +229,8 @@ static inline void dmac_process_dest_tag(struct ps2_dmac* dmac, struct dmac_chan c->tag.mem = TAG_MEM(tag); c->tag.data = TAG_DATA(tag); + c->qwc = c->tag.qwc; + c->tag.end = dmac->sif0.tag.irq && (dmac->sif0.chcr & 0x80); switch (c->tag.id) { @@ -268,6 +271,8 @@ void dmac_handle_vif0_transfer(struct ps2_dmac* dmac) { // dmac->vif0.tadr // ); + int mode = (dmac->vif0.chcr >> 2) & 3; + for (int i = 0; i < dmac->vif0.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->vif0.madr, 0); @@ -277,7 +282,7 @@ void dmac_handle_vif0_transfer(struct ps2_dmac* dmac) { dmac->vif0.madr += 16; } - if (((dmac->vif0.chcr >> 2) & 7) != 1) { + if (mode == 0) { dmac->vif0.chcr &= ~0x100; dmac->vif0.qwc = 0; @@ -306,7 +311,7 @@ void dmac_handle_vif0_transfer(struct ps2_dmac* dmac) { ee_bus_write32(dmac->bus, 0x10004000, dmac->vif0.tag.data >> 32); } - for (int i = 0; i < dmac->vif0.tag.qwc; i++) { + for (int i = 0; i < dmac->vif0.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->vif0.madr, dmac->vif0.tag.mem); // printf("ee: Sending %016lx%016lx from %08x to VIF0 FIFO\n", @@ -352,6 +357,9 @@ void dmac_handle_vif1_transfer(struct ps2_dmac* dmac) { // dmac->vif1.tadr // ); + int tte = (dmac->vif1.chcr >> 6) & 1; + int mode = (dmac->vif1.chcr >> 2) & 3; + struct sched_event event; event.name = "VIF1 DMA IRQ"; @@ -384,7 +392,7 @@ void dmac_handle_vif1_transfer(struct ps2_dmac* dmac) { dmac->vif1.madr += 16; } - if (((dmac->vif1.chcr >> 2) & 7) != 1) { + if (mode == 0) { sched_schedule(dmac->sched, event); return; @@ -410,7 +418,7 @@ void dmac_handle_vif1_transfer(struct ps2_dmac* dmac) { ee_bus_write32(dmac->bus, 0x10005000, dmac->vif1.tag.data >> 32); } - for (int i = 0; i < dmac->vif1.tag.qwc; i++) { + for (int i = 0; i < dmac->vif1.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->vif1.madr, dmac->vif1.tag.mem); // printf("ee: Sending %016lx%016lx from %08x to VIF1 FIFO\n", @@ -446,6 +454,8 @@ void dmac_send_gif_irq(void* udata, int overshoot) { void dmac_handle_gif_transfer(struct ps2_dmac* dmac) { struct sched_event event; + int mode = (dmac->gif.chcr >> 2) & 3; + event.name = "GIF DMA IRQ"; event.udata = dmac; event.callback = dmac_send_gif_irq; @@ -477,7 +487,7 @@ void dmac_handle_gif_transfer(struct ps2_dmac* dmac) { dmac->gif.madr += 16; } - if (((dmac->gif.chcr >> 2) & 7) != 1) { + if (mode == 0) { return; } @@ -487,9 +497,9 @@ void dmac_handle_gif_transfer(struct ps2_dmac* dmac) { dmac_process_source_tag(dmac, &dmac->gif, tag); - // fprintf(file, "ee: gif tag qwc=%08x madr=%08x tadr=%08x\n", dmac->gif.tag.qwc, dmac->gif.madr, dmac->gif.tadr); + // printf("ee: gif tag qwc=%08x madr=%08x tadr=%08x\n", dmac->gif.tag.qwc, dmac->gif.madr, dmac->gif.tadr); - for (int i = 0; i < dmac->gif.tag.qwc; i++) { + for (int i = 0; i < dmac->gif.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->gif.madr, dmac->gif.tag.mem); // fprintf(file, "ee: Sending %016lx%016lx from %08x to GIF FIFO (chain)\n", @@ -516,16 +526,16 @@ void dmac_handle_ipu_from_transfer(struct ps2_dmac* dmac) { int mode = (dmac->ipu_from.chcr >> 2) & 3; - printf("dmac: ipu_from start data=%08x dir=%d mod=%d tte=%d madr=%08x qwc=%08x tadr=%08x dreq=%d\n", - dmac->ipu_from.chcr, - dmac->ipu_from.chcr & 1, - (dmac->ipu_from.chcr >> 2) & 3, - !!(dmac->ipu_from.chcr & 0x40), - dmac->ipu_from.madr, - dmac->ipu_from.qwc, - dmac->ipu_from.tadr, - dmac->ipu_from.dreq - ); + // printf("dmac: ipu_from start data=%08x dir=%d mod=%d tte=%d madr=%08x qwc=%08x tadr=%08x dreq=%d\n", + // dmac->ipu_from.chcr, + // dmac->ipu_from.chcr & 1, + // (dmac->ipu_from.chcr >> 2) & 3, + // !!(dmac->ipu_from.chcr & 0x40), + // dmac->ipu_from.madr, + // dmac->ipu_from.qwc, + // dmac->ipu_from.tadr, + // dmac->ipu_from.dreq + // ); if (mode != 0) { printf("dmac: ipu_from mode %d not supported\n", mode); @@ -550,9 +560,10 @@ void dmac_handle_ipu_from_transfer(struct ps2_dmac* dmac) { dmac->ipu_from.chcr &= ~0x100; dmac->ipu_from.qwc = 0; - printf("dmac: ipu_from channel done\n"); + // printf("dmac: ipu_from channel done\n"); } } + int dmac_transfer_ipu_to_qword(struct ps2_dmac* dmac) { if ((dmac->ipu_to.chcr & 0x100) == 0) { // printf("dmac: ipu_to channel not started\n"); @@ -566,13 +577,13 @@ int dmac_transfer_ipu_to_qword(struct ps2_dmac* dmac) { return 0; } - if (dmac->ipu_to.tag.qwc) { + if (dmac->ipu_to.qwc) { uint128_t q = dmac_read_qword(dmac, dmac->ipu_to.madr, dmac->ipu_to.tag.mem); ee_bus_write128(dmac->bus, 0x10007010, q); dmac->ipu_to.madr += 16; - dmac->ipu_to.tag.qwc--; + dmac->ipu_to.qwc--; return 1; } @@ -585,8 +596,6 @@ int dmac_transfer_ipu_to_qword(struct ps2_dmac* dmac) { dmac->ipu_to.chcr &= ~0x100; dmac->ipu_to.qwc = 0; - exit(1); - return 0; } @@ -594,8 +603,9 @@ int dmac_transfer_ipu_to_qword(struct ps2_dmac* dmac) { dmac_process_source_tag(dmac, &dmac->ipu_to, tag); - // printf("dmac: ipu_to tag qwc=%08lx id=%ld irq=%ld addr=%08lx mem=%ld data=%016lx end=%d tte=%d\n", + // printf("dmac: ipu_to tag tag.qwc=%08lx qwc=%08lx id=%ld irq=%ld addr=%08lx mem=%ld data=%016lx end=%d tte=%d\n", // dmac->ipu_to.tag.qwc, + // dmac->ipu_to.qwc, // dmac->ipu_to.tag.id, // dmac->ipu_to.tag.irq, // dmac->ipu_to.tag.addr, @@ -614,15 +624,15 @@ void dmac_handle_ipu_to_transfer(struct ps2_dmac* dmac) { return; } - printf("dmac: ipu_to start data=%08x dir=%d mod=%d tte=%d madr=%08x qwc=%08x tadr=%08x\n", - dmac->ipu_to.chcr, - dmac->ipu_to.chcr & 1, - (dmac->ipu_to.chcr >> 2) & 3, - !!(dmac->ipu_to.chcr & 0x40), - dmac->ipu_to.madr, - dmac->ipu_to.qwc, - dmac->ipu_to.tadr - ); + // printf("dmac: ipu_to start data=%08x dir=%d mod=%d tte=%d madr=%08x qwc=%08x tadr=%08x\n", + // dmac->ipu_to.chcr, + // dmac->ipu_to.chcr & 1, + // (dmac->ipu_to.chcr >> 2) & 3, + // !!(dmac->ipu_to.chcr & 0x40), + // dmac->ipu_to.madr, + // dmac->ipu_to.qwc, + // dmac->ipu_to.tadr + // ); while (dmac_transfer_ipu_to_qword(dmac)) { // Keep transferring until we run out of QWC or DREQ is cleared @@ -685,7 +695,7 @@ void dmac_handle_sif0_transfer(struct ps2_dmac* dmac) { // dmac->sif0.chcr // ); - for (int i = 0; i < dmac->sif0.tag.qwc; i++) { + for (int i = 0; i < dmac->sif0.qwc; i++) { if (ps2_sif0_is_empty(dmac->sif)) { printf("dmac: qwc != 0 FIFO empty\n"); @@ -770,7 +780,7 @@ void dmac_handle_sif1_transfer(struct ps2_dmac* dmac) { // ); // printf("ee: SIF1 tag madr=%08x\n", dmac->sif1.madr); - for (int i = 0; i < dmac->sif1.tag.qwc; i++) { + for (int i = 0; i < dmac->sif1.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->sif1.madr, dmac->sif1.tag.mem); // printf("%08x: ", dmac->sif1.madr); @@ -875,7 +885,7 @@ void dmac_handle_spr_from_transfer(struct ps2_dmac* dmac) { dmac->spr_from.sadr += 0x10; dmac->spr_from.sadr &= 0x3ff0; - dmac->spr_from.tag.qwc = tag.u32[0] & 0xffff; + dmac->spr_from.qwc = tag.u32[0] & 0xffff; dmac->spr_from.tag.id = (tag.u32[0] >> 28) & 0x7; dmac->spr_from.tag.irq = tag.u32[0] & 0x80000000; dmac->spr_from.tag.end = dmac->spr_from.tag.id == 7; @@ -895,7 +905,7 @@ void dmac_handle_spr_from_transfer(struct ps2_dmac* dmac) { // (dmac->spr_from.chcr >> 7) & 1 // ); - for (int i = 0; i < dmac->spr_from.tag.qwc; i++) { + for (int i = 0; i < dmac->spr_from.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->spr_from.sadr, 1); ee_bus_write128(dmac->bus, dmac->spr_from.madr, q); @@ -977,6 +987,12 @@ void dmac_handle_spr_to_transfer(struct ps2_dmac* dmac) { dmac_process_source_tag(dmac, &dmac->spr_to, tag); + if ((dmac->spr_to.chcr >> 6) & 1) { + ps2_ram_write128(dmac->spr, dmac->spr_to.sadr, tag); + + dmac->spr_to.madr += 0x10; + } + // printf("ee: spr_to tag qwc=%08lx madr=%08lx tadr=%08lx id=%ld addr=%08lx mem=%ld data=%016lx end=%d tte=%d\n", // dmac->spr_to.tag.qwc, // dmac->spr_to.madr, @@ -989,7 +1005,7 @@ void dmac_handle_spr_to_transfer(struct ps2_dmac* dmac) { // (dmac->spr_to.chcr >> 7) & 1 // ); - for (int i = 0; i < dmac->spr_to.tag.qwc; i++) { + for (int i = 0; i < dmac->spr_to.qwc; i++) { uint128_t q = dmac_read_qword(dmac, dmac->spr_to.madr, dmac->spr_to.tag.mem); ps2_ram_write128(dmac->spr, dmac->spr_to.sadr, q); @@ -1008,6 +1024,17 @@ void dmac_handle_spr_to_transfer(struct ps2_dmac* dmac) { static inline void dmac_handle_channel_start(struct ps2_dmac* dmac, uint32_t addr) { struct dmac_channel* c = dmac_get_channel(dmac, addr); + if (c == &dmac->ipu_to) + printf("dmac: ipu_to start data=%08x dir=%d mod=%d tte=%d madr=%08x qwc=%08x tadr=%08x\n", + dmac->ipu_to.chcr, + dmac->ipu_to.chcr & 1, + (dmac->ipu_to.chcr >> 2) & 3, + !!(dmac->ipu_to.chcr & 0x40), + dmac->ipu_to.madr, + dmac->ipu_to.qwc, + dmac->ipu_to.tadr + ); + // printf("ee: %s start data=%08x dir=%d mod=%d tte=%d madr=%08x qwc=%08x tadr=%08x\n", // dmac_get_channel_name(dmac, addr), // c->chcr, @@ -1019,6 +1046,20 @@ static inline void dmac_handle_channel_start(struct ps2_dmac* dmac, uint32_t add // c->tadr // ); + // if (c == &dmac->ipu_to && c->qwc != 0) { + // int mode = (c->chcr >> 2) & 3; + + // if (mode == 1) { + // uint128_t tag; + + // tag.u32[0] = (c->chcr & 0xffff0000) | (c->qwc & 0xffff); + + // dmac_process_source_tag(dmac, c, tag); + // } else { + // c->tag.end = 1; + // } + // } + switch (addr & 0xff00) { case 0x8000: dmac_handle_vif0_transfer(dmac); return; case 0x9000: dmac_handle_vif1_transfer(dmac); return; @@ -1047,7 +1088,21 @@ void ps2_dmac_write32(struct ps2_dmac* dmac, uint32_t addr, uint64_t data) { struct dmac_channel* c = dmac_get_channel(dmac, addr); switch (addr) { - case 0x1000E000: dmac->ctrl = data; return; + case 0x1000E000: { + dmac->ctrl = data; + + int mfifo_drain = (dmac->ctrl >> 2) & 3; + int stall_ctrl = (dmac->ctrl >> 4) & 3; + int stall_drain = (dmac->ctrl >> 6) & 3; + + if (mfifo_drain || stall_ctrl || stall_drain) { + printf("dmac: mfifo_drain=%d stall_ctrl=%d stall_drain=%d\n", + mfifo_drain, stall_ctrl, stall_drain + ); + + // exit(1); + } + } return; case 0x1000E010: dmac_write_stat(dmac, data); return; case 0x1000E020: dmac->pcr = data; dmac_test_cpcond0(dmac); return; case 0x1000E030: dmac->sqwc = data; return; @@ -1060,10 +1115,25 @@ void ps2_dmac_write32(struct ps2_dmac* dmac, uint32_t addr, uint64_t data) { if (c) { switch (addr & 0xff) { case 0x00: { - c->chcr = data; + if (c == &dmac->ipu_to) + printf("dmac: channel %s value=%08x chcr=%08x\n", dmac_get_channel_name(dmac, addr), data, c->chcr); + + // c->chcr = data; + + // if (data & 0x100) { + // dmac_handle_channel_start(dmac, addr); + // } - if (data & 0x100) { - dmac_handle_channel_start(dmac, addr); + // Behavior required for IPU FMVs to work + if ((c->chcr & 0x100) == 0) { + c->chcr = data; + + if (data & 0x100) { + dmac_handle_channel_start(dmac, addr); + } + } else { + printf("dmac: channel %s value=%08x chcr=%08x\n", dmac_get_channel_name(dmac, addr), data, c->chcr); + c->chcr &= (data & 0x100) | 0xfffffeff; } } return; case 0x10: { @@ -1172,4 +1242,16 @@ void ps2_dmac_write8(struct ps2_dmac* dmac, uint32_t addr, uint64_t data) { exit(1); return; +} + +uint64_t ps2_dmac_read16(struct ps2_dmac* dmac, uint32_t addr) { + printf("dmac: 16-bit read from %08x\n", addr); + + exit(1); +} + +void ps2_dmac_write16(struct ps2_dmac* dmac, uint32_t addr, uint64_t data) { + printf("dmac: 16-bit write to %08x (%04x)\n", addr, data & 0xffff); + + exit(1); } \ No newline at end of file diff --git a/src/ee/dmac.h b/src/ee/dmac.h index f9ab359..675a6f0 100644 --- a/src/ee/dmac.h +++ b/src/ee/dmac.h @@ -93,8 +93,10 @@ struct ps2_dmac* ps2_dmac_create(void); void ps2_dmac_init(struct ps2_dmac* dmac, struct ps2_sif* sif, struct ps2_iop_dma* iop_dma, struct ps2_ram* spr, struct ee_state* ee, struct sched_state* sched, struct ee_bus* bus); void ps2_dmac_destroy(struct ps2_dmac* dmac); uint64_t ps2_dmac_read8(struct ps2_dmac* dmac, uint32_t addr); +uint64_t ps2_dmac_read16(struct ps2_dmac* dmac, uint32_t addr); uint64_t ps2_dmac_read32(struct ps2_dmac* dmac, uint32_t addr); void ps2_dmac_write8(struct ps2_dmac* dmac, uint32_t addr, uint64_t data); +void ps2_dmac_write16(struct ps2_dmac* dmac, uint32_t addr, uint64_t data); void ps2_dmac_write32(struct ps2_dmac* dmac, uint32_t addr, uint64_t data); void dmac_handle_vif0_transfer(struct ps2_dmac* dmac); diff --git a/src/ee/ee.c b/src/ee/ee.c index 861ee82..8d651e6 100755 --- a/src/ee/ee.c +++ b/src/ee/ee.c @@ -4,6 +4,7 @@ #include #include #include +#include #ifdef _EE_USE_INTRINSICS #include @@ -15,6 +16,9 @@ #include "ee.h" #include "ee_dis.h" +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) + #ifdef _WIN32 #define SSUBOVF64 __builtin_ssubll_overflow #define SADDOVF64 __builtin_saddll_overflow @@ -208,6 +212,60 @@ static inline void fpu_cvtws(union ee_fpu_reg* d, union ee_fpu_reg* s) { d->u32 = 0x80000000; } +static inline int fpu_check_overflow(struct ee_state* ee, union ee_fpu_reg* reg) { + if ((reg->u32 & ~0x80000000) == 0x7f800000) { + reg->u32 = (reg->u32 & 0x80000000) | 0x7f7fffff; + ee->fcr |= FPU_FLG_O | FPU_FLG_SO; + + return 1; + } + + ee->fcr &= ~FPU_FLG_O; + + return 0; +} + +static inline int fpu_check_underflow(struct ee_state* ee, union ee_fpu_reg* reg) { + if (((reg->u32 & 0x7F800000) == 0) && ((reg->u32 & 0x007FFFFF) != 0)) { + reg->u32 &= 0x80000000; + ee->fcr |= FPU_FLG_U | FPU_FLG_SU; + + return 1; + } + + ee->fcr &= ~FPU_FLG_U; + + return 0; +} + +static inline int fpu_check_overflow_no_flags(struct ee_state* ee, union ee_fpu_reg* reg) { + if ((reg->u32 & ~0x80000000) == 0x7f800000) { + reg->u32 = (reg->u32 & 0x80000000) | 0x7f7fffff; + + return 1; + } + + return 0; +} + +static inline int fpu_check_underflow_no_flags(struct ee_state* ee, union ee_fpu_reg* reg) { + if (((reg->u32 & 0x7F800000) == 0) && ((reg->u32 & 0x007FFFFF) != 0)) { + reg->u32 &= 0x80000000; + + return 1; + } + + return 0; +} + +static inline int fpu_max(int32_t a, int32_t b) { + return (a < 0 && b < 0) ? min(a, b) : max(a, b); +} + +static inline int fpu_min(int32_t a, int32_t b) { + return (a < 0 && b < 0) ? max(a, b) : min(a, b); +} + static inline struct ee_vtlb_entry* ee_search_vtlb(struct ee_state* ee, uint32_t virt) { struct ee_vtlb_entry* entry = NULL; @@ -259,11 +317,11 @@ static inline int ee_translate_virt(struct ee_state* ee, uint32_t virt, uint32_t *phys = virt & ee_bus_region_mask_table[virt >> 29]; - printf("ee: Unhandled virtual address %08x @ cyc=%ld\n", virt, ee->total_cycles); + // printf("ee: Unhandled virtual address %08x @ cyc=%ld\n", virt, ee->total_cycles); - *(int*)0 = 0; + // *(int*)0 = 0; - exit(1); + // exit(1); // To-do: MMU mapping *phys = virt & 0x1fffffff; @@ -483,7 +541,8 @@ void ee_set_cpcond0(struct ee_state* ee, int v) { } static inline void ee_i_abss(struct ee_state* ee) { - EE_FD = fabsf(EE_FS); + ee->f[EE_D_FD].u32 = ee->f[EE_D_FS].u32 & 0x7fffffff; + // EE_FD = fabsf(EE_FS); } static inline void ee_i_add(struct ee_state* ee) { int32_t s = EE_RS; @@ -500,6 +559,11 @@ static inline void ee_i_add(struct ee_state* ee) { } static inline void ee_i_addas(struct ee_state* ee) { ee->a.f = EE_FS + EE_FT; + + if (fpu_check_overflow(ee, &ee->a)) + return; + + fpu_check_underflow(ee, &ee->a); } static inline void ee_i_addi(struct ee_state* ee) { int32_t s = EE_RS; @@ -516,7 +580,14 @@ static inline void ee_i_addiu(struct ee_state* ee) { EE_RT = SE6432(EE_RS32 + SE3216(EE_D_I16)); } static inline void ee_i_adds(struct ee_state* ee) { - EE_FD = EE_FS + EE_FT; + int d = EE_D_FD; + + ee->f[d].f = EE_FS + EE_FT; + + if (fpu_check_overflow(ee, &ee->f[d])) + return; + + fpu_check_underflow(ee, &ee->f[d]); } static inline void ee_i_addu(struct ee_state* ee) { EE_RD = SE6432(EE_RS + EE_RT); @@ -624,47 +695,14 @@ static inline void ee_i_ceq(struct ee_state* ee) { ee->fcr &= ~(1 << 23); } } -static inline void ee_i_cf(struct ee_state* ee) { printf("ee: cf unimplemented\n"); exit(1); } +static inline void ee_i_cf(struct ee_state* ee) { + ee->fcr &= ~(1 << 23); +} static inline void ee_i_cfc1(struct ee_state* ee) { - EE_RT = EE_D_FS ? ee->fcr : 0x2e00; + EE_RT = (EE_D_FS >= 16) ? ee->fcr : 0x2e30; } static inline void ee_i_cfc2(struct ee_state* ee) { - // To-do: Handle FBRST, VPU_STAT, CMSAR1 - int d = EE_D_RD; - - EE_RT = d < 16 ? ee->vu0->vi[d] : ee->vu0->cr[d - 16]; - - if (d == 28) { - EE_RT &= 0x0c0c; - } - - // static const char* regs[] = { - // "Status flag", - // "MAC flag", - // "clipping flag", - // "reserved", - // "R", - // "I", - // "Q", - // "reserved", - // "reserved", - // "reserved", - // "TPC", - // "CMSAR0", - // "FBRST", - // "VPU-STAT", - // "reserved", - // "CMSAR1", - // }; - - // if (d >= 16) - // printf("ee: cfc2 %d (%s) <- %08x\n", d-16, regs[d-16], ee->vu0->cr[d - 16]); - - // if (d >= 16) { - // file = fopen("vu.dump", "a"); - // fprintf(file, "ee: cfc2 %d (%s) <- %08x\n", d-16, regs[d-16], ee->vu0->cr[d - 16]); - // fclose(file); - // } + EE_RT = SE6432(ps2_vu_read_vi(ee->vu0, EE_D_RD)); } static inline void ee_i_cle(struct ee_state* ee) { if (EE_FS <= EE_FT) { @@ -681,7 +719,10 @@ static inline void ee_i_clt(struct ee_state* ee) { } } static inline void ee_i_ctc1(struct ee_state* ee) { - ee->fcr = EE_RT; + if (EE_D_FS < 16) + return; + + ee->fcr = (ee->fcr & ~(0x83c078)) | (EE_RT & 0x83c078); } static inline void ee_i_ctc2(struct ee_state* ee) { // To-do: Handle FBRST, VPU_STAT, CMSAR1 @@ -706,21 +747,7 @@ static inline void ee_i_ctc2(struct ee_state* ee) { "CMSAR1", }; - if (d < 16) { - ee->vu0->vi[d] = EE_RT32; - } else { - if ((d-16) == 0) { - // uint32_t status = ee->vu0->cr[0]; - ee->vu0->cr[0] &= ~0xfc0; - ee->vu0->cr[0] |= EE_RT32 & 0xfc0; - - // printf("prev=%08x curr=%08x val=%08x\n", status, ee->vu0->cr[0], EE_RT32); - } else { - ee->vu0->cr[d - 16] = EE_RT32; - } - - // printf("ee: ctc2 %d (%s) -> %08x\n", d-16, regs[d-16], EE_RT32); - } + ps2_vu_write_vi(ee->vu0, d, EE_RT32); } static inline void ee_i_cvts(struct ee_state* ee) { EE_FD = (float)ee->f[EE_D_FS].s32; @@ -793,7 +820,32 @@ static inline void ee_i_div1(struct ee_state* ee) { } } static inline void ee_i_divs(struct ee_state* ee) { - EE_FD = EE_FS / EE_FT; + int t = EE_D_RT; + int d = EE_D_FD; + int s = EE_D_FS; + + ee->fcr &= ~(FPU_FLG_I | FPU_FLG_D); + + // If both the dividend and divisor are zero, set I/SI, + // else set D/SD + if ((ee->f[t].u32 & 0x7F800000) == 0) { + if ((ee->f[s].u32 & 0x7F800000) == 0) { + ee->fcr |= FPU_FLG_I | FPU_FLG_SI; + } else { + ee->fcr |= FPU_FLG_D | FPU_FLG_SD; + } + + ee->f[d].u32 = ((ee->f[t].u32 ^ ee->f[s].u32) & 0x80000000) | 0x7f7fffff; + + return; + } + + ee->f[d].f = EE_FS / EE_FT; + + if (fpu_check_overflow_no_flags(ee, &ee->f[d])) + return; + + fpu_check_underflow_no_flags(ee, &ee->f[d]); } static inline void ee_i_divu(struct ee_state* ee) { int t = EE_D_RT; @@ -945,6 +997,10 @@ static inline void ee_i_lq(struct ee_state* ee) { ee->r[EE_D_RT] = bus_read128(ee, (EE_RS32 + SE3216(EE_D_I16)) & ~0xf); } static inline void ee_i_lqc2(struct ee_state* ee) { + int d = EE_D_RT; + + if (!d) return; + ee->vu0->vf[EE_D_RT].u128 = bus_read128(ee, (EE_RS32 + SE3216(EE_D_I16)) & ~0xf); } static inline void ee_i_lui(struct ee_state* ee) { @@ -1017,9 +1073,25 @@ static inline void ee_i_madd1(struct ee_state* ee) { } static inline void ee_i_maddas(struct ee_state* ee) { ee->a.f += EE_FS * EE_FT; + + if (fpu_check_overflow(ee, &ee->a)) + return; + + fpu_check_underflow(ee, &ee->a); } static inline void ee_i_madds(struct ee_state* ee) { - EE_FD = ee->a.f + EE_FS * EE_FT; + int t = EE_D_RT; + int d = EE_D_FD; + int s = EE_D_FS; + + float temp = fpu_cvtf(ee->f[s].f) * fpu_cvtf(ee->f[t].f); + + ee->f[d].f = fpu_cvtf(ee->a.f) + fpu_cvtf(temp); + + if (fpu_check_overflow(ee, &ee->f[d])) + return; + + fpu_check_underflow(ee, &ee->f[d]); } static inline void ee_i_maddu(struct ee_state* ee) { uint64_t r = (uint64_t)EE_RS32 * (uint64_t)EE_RT32; @@ -1044,7 +1116,9 @@ static inline void ee_i_maddu1(struct ee_state* ee) { EE_RD = EE_LO1; } static inline void ee_i_maxs(struct ee_state* ee) { - EE_FD = fmaxf(EE_FS, EE_FT); + ee->f[EE_D_FD].u32 = fpu_max(ee->f[EE_D_FS].u32, ee->f[EE_D_RT].u32); + + ee->fcr &= ~(FPU_FLG_O | FPU_FLG_U); } static inline void ee_i_mfc0(struct ee_state* ee) { EE_RT = SE6432(ee->cop0_r[EE_D_RD]); @@ -1068,7 +1142,9 @@ static inline void ee_i_mfsa(struct ee_state* ee) { EE_RD = ee->sa & 0xf; } static inline void ee_i_mins(struct ee_state* ee) { - EE_FD = fminf(EE_FS, EE_FT); + ee->f[EE_D_FD].u32 = fpu_min(ee->f[EE_D_FS].u32, ee->f[EE_D_RT].u32); + + ee->fcr &= ~(FPU_FLG_O | FPU_FLG_U); } static inline void ee_i_movn(struct ee_state* ee) { if (EE_RT) EE_RD = EE_RS; @@ -1081,9 +1157,25 @@ static inline void ee_i_movz(struct ee_state* ee) { } static inline void ee_i_msubas(struct ee_state* ee) { ee->a.f -= EE_FS * EE_FT; + + if (fpu_check_overflow(ee, &ee->a)) + return; + + fpu_check_underflow(ee, &ee->a); } static inline void ee_i_msubs(struct ee_state* ee) { - EE_FD = ee->a.f - (EE_FS * EE_FT); + int t = EE_D_RT; + int d = EE_D_FD; + int s = EE_D_FS; + + float temp = fpu_cvtf(ee->f[s].f) * fpu_cvtf(ee->f[t].f); + + ee->f[d].f = fpu_cvtf(ee->a.f) - fpu_cvtf(temp); + + if (fpu_check_overflow(ee, &ee->f[d])) + return; + + fpu_check_underflow(ee, &ee->f[d]); } static inline void ee_i_mtc0(struct ee_state* ee) { ee->cop0_r[EE_D_RD] = EE_RT32; @@ -1114,9 +1206,21 @@ static inline void ee_i_mtsah(struct ee_state* ee) { } static inline void ee_i_mulas(struct ee_state* ee) { ee->a.f = EE_FS * EE_FT; + + if (fpu_check_overflow(ee, &ee->a)) + return; + + fpu_check_underflow(ee, &ee->a); } static inline void ee_i_muls(struct ee_state* ee) { - EE_FD = EE_FS * EE_FT; + int d = EE_D_FD; + + ee->f[d].f = EE_FS * EE_FT; + + if (fpu_check_overflow(ee, &ee->f[d])) + return; + + fpu_check_underflow(ee, &ee->f[d]); } static inline void ee_i_mult(struct ee_state* ee) { uint64_t r = SE6432(EE_RS32) * SE6432(EE_RT32); @@ -1151,7 +1255,9 @@ static inline void ee_i_multu1(struct ee_state* ee) { EE_RD = EE_LO1; } static inline void ee_i_negs(struct ee_state* ee) { - EE_FD = -EE_FS; + ee->f[EE_D_FD].u32 = ee->f[EE_D_FS].u32 ^ 0x80000000; + + ee->fcr &= ~(FPU_FLG_O | FPU_FLG_U); } static inline void ee_i_nor(struct ee_state* ee) { EE_RD = ~(EE_RS | EE_RT); @@ -2033,7 +2139,20 @@ static inline void ee_i_prot3w(struct ee_state* ee) { ee->r[d].u32[2] = rt.u32[0]; ee->r[d].u32[3] = rt.u32[3]; } -static inline void ee_i_psllh(struct ee_state* ee) { printf("ee: psllh unimplemented\n"); exit(1); } +static inline void ee_i_psllh(struct ee_state* ee) { + int sa = EE_D_SA & 0xf; + int t = EE_D_RT; + int d = EE_D_RD; + + ee->r[d].u16[0] = ee->r[t].u16[0] << sa; + ee->r[d].u16[1] = ee->r[t].u16[1] << sa; + ee->r[d].u16[2] = ee->r[t].u16[2] << sa; + ee->r[d].u16[3] = ee->r[t].u16[3] << sa; + ee->r[d].u16[4] = ee->r[t].u16[4] << sa; + ee->r[d].u16[5] = ee->r[t].u16[5] << sa; + ee->r[d].u16[6] = ee->r[t].u16[6] << sa; + ee->r[d].u16[7] = ee->r[t].u16[7] << sa; +} static inline void ee_i_psllvw(struct ee_state* ee) { printf("ee: psllvw unimplemented\n"); exit(1); } static inline void ee_i_psllw(struct ee_state* ee) { int sa = EE_D_SA; @@ -2045,7 +2164,20 @@ static inline void ee_i_psllw(struct ee_state* ee) { ee->r[d].u32[2] = ee->r[t].u32[2] << sa; ee->r[d].u32[3] = ee->r[t].u32[3] << sa; } -static inline void ee_i_psrah(struct ee_state* ee) { printf("ee: psrah unimplemented\n"); exit(1); } +static inline void ee_i_psrah(struct ee_state* ee) { + int sa = EE_D_SA & 0xf; + int t = EE_D_RT; + int d = EE_D_RD; + + ee->r[d].u16[0] = ((int16_t)ee->r[t].u16[0]) >> sa; + ee->r[d].u16[1] = ((int16_t)ee->r[t].u16[1]) >> sa; + ee->r[d].u16[2] = ((int16_t)ee->r[t].u16[2]) >> sa; + ee->r[d].u16[3] = ((int16_t)ee->r[t].u16[3]) >> sa; + ee->r[d].u16[4] = ((int16_t)ee->r[t].u16[4]) >> sa; + ee->r[d].u16[5] = ((int16_t)ee->r[t].u16[5]) >> sa; + ee->r[d].u16[6] = ((int16_t)ee->r[t].u16[6]) >> sa; + ee->r[d].u16[7] = ((int16_t)ee->r[t].u16[7]) >> sa; +} static inline void ee_i_psravw(struct ee_state* ee) { printf("ee: psravw unimplemented\n"); exit(1); } static inline void ee_i_psraw(struct ee_state* ee) { int sa = EE_D_SA; @@ -2338,10 +2470,33 @@ static inline void ee_i_qmtc2(struct ee_state* ee) { int t = EE_D_RT; int d = EE_D_RD; + if (!d) return; + ee->vu0->vf[d].u128 = ee->r[t]; } static inline void ee_i_rsqrts(struct ee_state* ee) { - EE_FD = EE_FS / sqrtf(EE_FT); + int t = EE_D_RT; + int d = EE_D_FD; + + ee->fcr &= ~(FPU_FLG_I | FPU_FLG_D); + + if ((ee->f[t].u32 & 0x7f800000) == 0) { + ee->fcr |= FPU_FLG_D | FPU_FLG_SD; + ee->f[d].u32 = (ee->f[t].u32 & 0x80000000) | 0x7f7fffff; + + return; + } else if (ee->f[t].u32 & 0x80000000) { + ee->fcr |= FPU_FLG_I | FPU_FLG_SI; + + ee->f[d].f = EE_FS / sqrtf(fabsf(fpu_cvtf(ee->f[t].f))); + } else { + ee->f[d].f = EE_FS / sqrtf(fpu_cvtf(ee->f[t].f)); + } + + if (fpu_check_overflow_no_flags(ee, &ee->f[d])) + return; + + fpu_check_underflow_no_flags(ee, &ee->f[d]); } static inline void ee_i_sb(struct ee_state* ee) { bus_write8(ee, EE_RS32 + SE3216(EE_D_I16), EE_RT); @@ -2403,7 +2558,20 @@ static inline void ee_i_sqc2(struct ee_state* ee) { bus_write128(ee, (EE_RS32 + SE3216(EE_D_I16)) & ~0xf, ee->vu0->vf[EE_D_RT].u128); } static inline void ee_i_sqrts(struct ee_state* ee) { - EE_FD = sqrtf(EE_FT); + int t = EE_D_RT; + int d = EE_D_FD; + + ee->fcr &= ~(FPU_FLG_I | FPU_FLG_D); + + if ((ee->f[t].u32 & 0x7f800000) == 0) { + ee->f[d].u32 = ee->f[t].u32 & 0x80000000; + } else if (ee->f[t].u32 & 0x80000000) { + ee->fcr |= FPU_FLG_I | FPU_FLG_SI; + + ee->f[d].f = sqrtf(fabsf(fpu_cvtf(ee->f[t].f))); + } else { + ee->f[d].f = sqrtf(fpu_cvtf(ee->f[t].f)); + } } static inline void ee_i_sra(struct ee_state* ee) { EE_RD = SE6432(((int32_t)EE_RT32) >> EE_D_SA); @@ -2430,9 +2598,21 @@ static inline void ee_i_sub(struct ee_state* ee) { } static inline void ee_i_subas(struct ee_state* ee) { ee->a.f = EE_FS - EE_FT; + + if (fpu_check_overflow(ee, &ee->a)) + return; + + fpu_check_underflow(ee, &ee->a); } static inline void ee_i_subs(struct ee_state* ee) { - EE_FD = EE_FS - EE_FT; + int d = EE_D_FD; + + ee->f[d].f = EE_FS - EE_FT; + + if (fpu_check_overflow(ee, &ee->f[d])) + return; + + fpu_check_underflow(ee, &ee->f[d]); } static inline void ee_i_subu(struct ee_state* ee) { EE_RD = SE6432(EE_RS - EE_RT); @@ -2666,6 +2846,11 @@ void ee_init(struct ee_state* ee, struct vu_state* vu0, struct vu_state* vu1, st ee->scratchpad = ps2_ram_create(); ps2_ram_init(ee->scratchpad, 0x4000); + + // EE's FPU uses round to zero by default + fesetround(FE_TOWARDZERO); + + ee->fcr = 0x01000001; } static inline void ee_execute(struct ee_state* ee) { @@ -3305,7 +3490,6 @@ void ee_reset(struct ee_state* ee) { ee->cop0_r[i] = 0; ee->a.u32 = 0; - ee->fcr = 0; ee->hi = (uint128_t){ .u64[0] = 0, .u64[1] = 0 }; ee->lo = (uint128_t){ .u64[0] = 0, .u64[1] = 0 }; @@ -3319,6 +3503,10 @@ void ee_reset(struct ee_state* ee) { ee->prid = 0x2e20; ee->pc = EE_VEC_RESET; ee->next_pc = ee->pc + 4; + + fesetround(FE_TOWARDZERO); + + ee->fcr = 0x01000001; } void ee_destroy(struct ee_state* ee) { diff --git a/src/ee/ee.h b/src/ee/ee.h index 8ca7f53..ad204b4 100644 --- a/src/ee/ee.h +++ b/src/ee/ee.h @@ -78,6 +78,16 @@ struct ee_bus_s { #define EE_VEC_COMMON 0x00000180 #define EE_VEC_IRQ 0x00000200 +#define FPU_FLG_C 0x00800000 +#define FPU_FLG_I 0x00020000 +#define FPU_FLG_D 0x00010000 +#define FPU_FLG_O 0x00008000 +#define FPU_FLG_U 0x00004000 +#define FPU_FLG_SI 0x00000040 +#define FPU_FLG_SD 0x00000020 +#define FPU_FLG_SO 0x00000010 +#define FPU_FLG_SU 0x00000008 + /* 1 V0 - Even page valid. When not set, the memory referenced in this entry is not mapped. 2 D0 - Even page dirty. When not set, writes cause an exception. diff --git a/src/ee/gif.c b/src/ee/gif.c index 6392bc7..1ca2be7 100644 --- a/src/ee/gif.c +++ b/src/ee/gif.c @@ -172,14 +172,22 @@ void gif_handle_tag(struct ps2_gif* gif, uint128_t data) { gif->tag.reg = data.u64[1]; gif->tag.index = 0; + if (gif->tag.nregs == 0) + gif->tag.nregs = 16; + switch (gif->tag.fmt) { - case 0: + case 0: { + gif->tag.remaining = gif->tag.nregs * gif->tag.nloop; + gif->tag.qwc = gif->tag.nloop * gif->tag.nregs; + } break; case 1: { gif->tag.remaining = gif->tag.nregs * gif->tag.nloop; + gif->tag.qwc = (gif->tag.nloop * gif->tag.nregs + 1) / 2; } break; case 2: case 3: { gif->tag.remaining = gif->tag.nloop; + gif->tag.qwc = gif->tag.nloop; } break; } @@ -227,8 +235,9 @@ void gif_handle_packed(struct ps2_gif* gif, uint128_t data) { default: printf("gif: PACKED format for reg %d unimplemented\n", r); exit(1); break; } - // Note: This handles odd NREGS*NLOOP case - if (gif->tag.index == gif->tag.remaining) { + gif->tag.qwc--; + + if (gif->tag.qwc == 0) { gif->state = GIF_STATE_RECV_TAG; return; @@ -267,11 +276,13 @@ void gif_handle_reglist(struct ps2_gif* gif, uint128_t data) { if (gif->tag.index == gif->tag.remaining) { gif->state = GIF_STATE_RECV_TAG; - return; + break; } } - if (gif->tag.index == gif->tag.remaining) { + gif->tag.qwc--; + + if (gif->tag.qwc == 0) { gif->state = GIF_STATE_RECV_TAG; return; @@ -282,13 +293,11 @@ void gif_handle_image(struct ps2_gif* gif, uint128_t data) { ps2_gs_write_internal(gif->gs, GS_HWREG, data.u64[0]); ps2_gs_write_internal(gif->gs, GS_HWREG, data.u64[1]); - ++gif->tag.index; + gif->tag.qwc--; - if (gif->tag.index == gif->tag.remaining) { + if (gif->tag.qwc == 0) { gif->state = GIF_STATE_RECV_TAG; } - - return; } void ps2_gif_write128(struct ps2_gif* gif, uint32_t addr, uint128_t data) { @@ -301,7 +310,7 @@ void ps2_gif_write128(struct ps2_gif* gif, uint32_t addr, uint128_t data) { return; } - if (gif->tag.index != gif->tag.remaining) { + if (gif->tag.qwc) { switch (gif->tag.fmt) { case 0: gif_handle_packed(gif, data); return; case 1: gif_handle_reglist(gif, data); return; diff --git a/src/ee/gif.h b/src/ee/gif.h index 3c74b00..2ad63f7 100644 --- a/src/ee/gif.h +++ b/src/ee/gif.h @@ -22,6 +22,7 @@ struct gif_tag { int fmt; int nregs; uint64_t reg; + uint64_t qwc; int index; int remaining; diff --git a/src/ee/timers.c b/src/ee/timers.c index 8252636..711b4d2 100644 --- a/src/ee/timers.c +++ b/src/ee/timers.c @@ -133,4 +133,38 @@ void ee_timer_tick(struct ps2_ee_timers* timers, int timer) { void ps2_ee_timers_tick(struct ps2_ee_timers* timers) { for (int i = 0; i < 4; i++) ee_timer_tick(timers, i); +} + +void ps2_ee_timers_write16(struct ps2_ee_timers* timers, uint32_t addr, uint64_t data) { + int t = (addr >> 11) & 3; + + switch (addr & 0xff) { + case 0x00: timers->timer[t].counter = data & 0xffff; return; + case 0x10: ee_timers_write_mode(timers, data & 0xffff, t); return; + case 0x20: timers->timer[t].compare = data; return; + case 0x30: timers->timer[t].hold = data & 0xffff; return; + } + + printf("ee: timer %d write %08x to %02x\n", t, data, addr & 0xff); + + exit(1); +} + +uint64_t ps2_ee_timers_read16(struct ps2_ee_timers* timers, uint32_t addr) { + int t = (addr >> 11) & 3; + + // printf("ee: timer %d read %08x\n", t, addr & 0xff); + + switch (addr & 0xff) { + case 0x00: return timers->timer[t].counter & 0xffff; + case 0x10: return timers->timer[t].mode & 0xffff; + case 0x20: return timers->timer[t].compare & 0xffff; + case 0x30: return timers->timer[t].hold & 0xffff; + } + + printf("ee: timers read16 %08x\n", addr); + + exit(1); + + return 0; } \ No newline at end of file diff --git a/src/ee/timers.h b/src/ee/timers.h index 19504d2..5988ff5 100644 --- a/src/ee/timers.h +++ b/src/ee/timers.h @@ -28,8 +28,10 @@ struct ps2_ee_timers { struct ps2_ee_timers* ps2_ee_timers_create(void); void ps2_ee_timers_init(struct ps2_ee_timers* timers, struct ps2_intc* intc, struct sched_state* sched); void ps2_ee_timers_destroy(struct ps2_ee_timers* timers); +uint64_t ps2_ee_timers_read16(struct ps2_ee_timers* timers, uint32_t addr); uint64_t ps2_ee_timers_read32(struct ps2_ee_timers* timers, uint32_t addr); void ps2_ee_timers_write32(struct ps2_ee_timers* timers, uint32_t addr, uint64_t data); +void ps2_ee_timers_write16(struct ps2_ee_timers* timers, uint32_t addr, uint64_t data); void ps2_ee_timers_tick(struct ps2_ee_timers* timers); void ps2_ee_timers_handle_hblank(struct ps2_ee_timers* timers); void ps2_ee_timers_handle_vblank_in(struct ps2_ee_timers* timers); diff --git a/src/ee/vif.c b/src/ee/vif.c index 709a235..d6e315d 100644 --- a/src/ee/vif.c +++ b/src/ee/vif.c @@ -269,17 +269,29 @@ static inline void vif_handle_fifo_write(struct ps2_vif* vif, uint32_t data) { vif->shift = 0; } break; case VIF_CMD_DIRECT: { - // printf("vif%d: DIRECT(%04x)\n", vif->id, data & 0xffff); + //printf("vif%d: DIRECT(%04x)\n", vif->id, data & 0xffff); + + int imm = data & 0xffff; + + if (imm == 0) { + imm = 0x10000; + } vif->state = VIF_RECV_DATA; - vif->pending_words = (data & 0xffff) * 4; + vif->pending_words = imm * 4; vif->shift = 0; } break; case VIF_CMD_DIRECTHL: { // printf("vif%d: DIRECTHL(%04x)\n", vif->id, data & 0xffff); + int imm = data & 0xffff; + + if (imm == 0) { + imm = 0x10000; + } + vif->state = VIF_RECV_DATA; - vif->pending_words = (data & 0xffff) * 4; + vif->pending_words = imm * 4; vif->shift = 0; } break; diff --git a/src/ee/vu.c b/src/ee/vu.c index 4c1c1a1..af99456 100644 --- a/src/ee/vu.c +++ b/src/ee/vu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "vu.h" #include "vu_dis.h" @@ -61,12 +62,26 @@ void vu_init(struct vu_state* vu, int id, struct ps2_gif* gif, struct ps2_vif* v vu->vf[0].y = 0.0; vu->vf[0].z = 0.0; vu->vf[0].w = 1.0; + + // VU uses round to zero by default + fesetround(FE_TOWARDZERO); } void vu_destroy(struct vu_state* vu) { free(vu); } +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline uint32_t vu_max(int32_t a, int32_t b) { + return (a < 0 && b < 0) ? min(a, b) : max(a, b); +} + +static inline uint32_t vu_min(int32_t a, int32_t b) { + return (a < 0 && b < 0) ? max(a, b) : min(a, b); +} + static inline float vu_atan(float t) { //In reality, VU1 uses an approximation to derive the result. This is shown here. const static float atan_const[] = { @@ -174,7 +189,7 @@ static inline void vu_set_vf(struct vu_state* vu, int r, int f, float v) { } static inline void vu_set_vfu(struct vu_state* vu, int r, int f, int32_t v) { - if (r) vu->vf[r].i32[f] = v; + if (r) vu->vf[r].s32[f] = v; } static inline void vu_set_vf_x(struct vu_state* vu, int r, float v) { @@ -1532,12 +1547,11 @@ void vu_i_max(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; + if (!d) return; + for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - float ft = vu_vf_i(vu, t, i); - - vu_set_vf(vu, d, i, (fs > ft) ? fs : ft); + vu->vf[d].u32[i] = vu_max(vu->vf[s].s32[i], vu->vf[t].s32[i]); } } } @@ -1545,11 +1559,11 @@ void vu_i_maxi(struct vu_state* vu) { int s = VU_UD_S; int d = VU_UD_D; + if (!d) return; + for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - - vu_set_vf(vu, d, i, (fs > vu->i.f) ? fs : vu->i.f); + vu->vf[d].u32[i] = vu_max(vu->vf[s].s32[i], vu->i.s32); } } } @@ -1558,13 +1572,15 @@ void vu_i_maxx(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_x(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[0]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { float fs = vu_vf_i(vu, s, i); - vu_set_vf(vu, d, i, (fs > bc) ? fs : bc); + vu->vf[d].u32[i] = vu_max(vu->vf[s].s32[i], bc); } } } @@ -1573,13 +1589,15 @@ void vu_i_maxy(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_y(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[1]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { float fs = vu_vf_i(vu, s, i); - vu_set_vf(vu, d, i, (fs > bc) ? fs : bc); + vu->vf[d].u32[i] = vu_max(vu->vf[s].s32[i], bc); } } } @@ -1588,13 +1606,15 @@ void vu_i_maxz(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_z(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[2]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { float fs = vu_vf_i(vu, s, i); - vu_set_vf(vu, d, i, (fs > bc) ? fs : bc); + vu->vf[d].u32[i] = vu_max(vu->vf[s].s32[i], bc); } } } @@ -1603,13 +1623,15 @@ void vu_i_maxw(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_w(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[3]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { float fs = vu_vf_i(vu, s, i); - vu_set_vf(vu, d, i, (fs > bc) ? fs : bc); + vu->vf[d].u32[i] = vu_max(vu->vf[s].s32[i], bc); } } } @@ -1618,12 +1640,11 @@ void vu_i_mini(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; + if (!d) return; + for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - float ft = vu_vf_i(vu, t, i); - - vu_set_vf(vu, d, i, (fs < ft) ? fs : ft); + vu->vf[d].u32[i] = vu_min(vu->vf[s].s32[i], vu->vf[t].s32[i]); } } } @@ -1631,11 +1652,11 @@ void vu_i_minii(struct vu_state* vu) { int s = VU_UD_S; int d = VU_UD_D; + if (!d) return; + for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - - vu_set_vf(vu, d, i, (fs < vu->i.f) ? fs : vu->i.f); + vu->vf[d].u32[i] = vu_min(vu->vf[s].s32[i], vu->i.s32); } } } @@ -1644,13 +1665,13 @@ void vu_i_minix(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_x(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[0]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - - vu_set_vf(vu, d, i, (fs < bc) ? fs : bc); + vu->vf[d].u32[i] = vu_min(vu->vf[s].s32[i], bc); } } } @@ -1659,13 +1680,13 @@ void vu_i_miniy(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_y(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[1]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - - vu_set_vf(vu, d, i, (fs < bc) ? fs : bc); + vu->vf[d].u32[i] = vu_min(vu->vf[s].s32[i], bc); } } } @@ -1674,13 +1695,13 @@ void vu_i_miniz(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_z(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[2]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - - vu_set_vf(vu, d, i, (fs < bc) ? fs : bc); + vu->vf[d].u32[i] = vu_min(vu->vf[s].s32[i], bc); } } } @@ -1689,13 +1710,13 @@ void vu_i_miniw(struct vu_state* vu) { int t = VU_UD_T; int d = VU_UD_D; - float bc = vu_vf_w(vu, t); + if (!d) return; + + int32_t bc = vu->vf[t].s32[3]; for (int i = 0; i < 4; i++) { if (VU_UD_DI(i)) { - float fs = vu_vf_i(vu, s, i); - - vu_set_vf(vu, d, i, (fs < bc) ? fs : bc); + vu->vf[d].u32[i] = vu_min(vu->vf[s].s32[i], bc); } } } @@ -1810,7 +1831,7 @@ void vu_i_itof0(struct vu_state* vu) { int t = VU_UD_T; for (int i = 0; i < 4; i++) { - if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)vu->vf[s].i32[i]); + if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)vu->vf[s].s32[i]); } } void vu_i_itof4(struct vu_state* vu) { @@ -1818,7 +1839,7 @@ void vu_i_itof4(struct vu_state* vu) { int t = VU_UD_T; for (int i = 0; i < 4; i++) { - if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)((float)(vu->vf[s].i32[i]) * 0.0625f)); + if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)((float)(vu->vf[s].s32[i]) * 0.0625f)); } } void vu_i_itof12(struct vu_state* vu) { @@ -1826,7 +1847,7 @@ void vu_i_itof12(struct vu_state* vu) { int t = VU_UD_T; for (int i = 0; i < 4; i++) { - if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)((float)(vu->vf[s].i32[i]) * 0.000244140625f)); + if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)((float)(vu->vf[s].s32[i]) * 0.000244140625f)); } } void vu_i_itof15(struct vu_state* vu) { @@ -1834,7 +1855,7 @@ void vu_i_itof15(struct vu_state* vu) { int t = VU_UD_T; for (int i = 0; i < 4; i++) { - if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)((float)(vu->vf[s].i32[i]) * 0.000030517578125f)); + if (VU_UD_DI(i)) vu_set_vf(vu, t, i, (float)((float)(vu->vf[s].s32[i]) * 0.000030517578125f)); } } void vu_i_clip(struct vu_state* vu) { @@ -1998,6 +2019,10 @@ void vu_i_fceq(struct vu_state* vu) { vu->vi[1] = (vu->clip & 0xffffff) == VU_LD_IMM24; } void vu_i_fcget(struct vu_state* vu) { + int t = VU_LD_T; + + if (!t) return; + vu->vi[VU_LD_T] = vu->clip & 0xfff; } void vu_i_fcor(struct vu_state* vu) { @@ -2010,19 +2035,19 @@ void vu_i_fmand(struct vu_state* vu) { vu_set_vi(vu, VU_LD_T, vu->mac_pipeline[3] & VU_IS); } void vu_i_fmeq(struct vu_state* vu) { - VU_IT = (VU_IS & 0xffff) == (vu->status & 0xffff); + vu_set_vi(vu, VU_LD_T, (VU_IS & 0xffff) == (vu->mac_pipeline[3] & 0xffff)); } void vu_i_fmor(struct vu_state* vu) { - VU_IT = (VU_IS & 0xffff) | (vu->status & 0xffff); + vu_set_vi(vu, VU_LD_T, (VU_IS & 0xffff) | (vu->mac_pipeline[3] & 0xffff)); } void vu_i_fsand(struct vu_state* vu) { - VU_IT = vu->status & VU_LD_IMM12; + vu_set_vi(vu, VU_LD_T, vu->status & VU_LD_IMM12); } void vu_i_fseq(struct vu_state* vu) { - VU_IT = (vu->status & 0xfff) == VU_LD_IMM12; + vu_set_vi(vu, VU_LD_T, (vu->status & 0xfff) == VU_LD_IMM12); } void vu_i_fsor(struct vu_state* vu) { - VU_IT = (vu->status & 0xfff) | VU_LD_IMM12; + vu_set_vi(vu, VU_LD_T, (vu->status & 0xfff) | VU_LD_IMM12); } void vu_i_fsset(struct vu_state* vu) { vu->status &= 0x3f; @@ -2322,16 +2347,19 @@ void vu_i_waitp(struct vu_state* vu) { } void vu_i_waitq(struct vu_state* vu) { // No operation + vu->q_delay = 0; } void vu_i_xgkick(struct vu_state* vu) { uint16_t addr = VU_IS; - int eop = 0; + int eop = 1; do { uint128_t tag = vu_mem_read(vu, addr++); + addr &= 0x7ff; + // printf("tag: addr=%08x %08x %08x %08x %08x\n", addr - 1, tag.u32[3], tag.u32[2], tag.u32[1], tag.u32[0]); ps2_gif_write128(vu->gif, 0, tag); @@ -2341,13 +2369,22 @@ void vu_i_xgkick(struct vu_state* vu) { int nloop = tag.u64[0] & 0x7fff; int flg = (tag.u64[0] >> 58) & 3; int nregs = (tag.u64[0] >> 60) & 0xf; + + if (!nloop) + continue; + + if (!nregs) + nregs = 16; + int qwc = 0; switch (flg) { - case 0: - case 1: { + case 0: { qwc = nregs * nloop; } break; + case 1: { + qwc = (nregs * nloop + 1) / 2; // Round up for odd cases + } break; case 2: case 3: { qwc = nloop; @@ -2360,7 +2397,7 @@ void vu_i_xgkick(struct vu_state* vu) { // eop, // flg, // qwc - // ); + // ); for (int i = 0; i < qwc; i++) { // printf("vu: %08x: %08x %08x %08x %08x\n", @@ -2372,16 +2409,12 @@ void vu_i_xgkick(struct vu_state* vu) { // ); ps2_gif_write128(vu->gif, 0, vu_mem_read(vu, addr++)); + + addr &= 0x7ff; } } while (!eop); } void vu_i_xitop(struct vu_state* vu) { - if (vu->id == 0) { - printf("vu: xitop used in VU0\n"); - - // exit(1); - } - vu_set_vi(vu, VU_LD_T, vu->vif->itop); } void vu_i_xtop(struct vu_state* vu) { @@ -2704,7 +2737,10 @@ static inline void vu_execute_lower(struct vu_state* vu, uint32_t opcode) { } void vu_execute_program(struct vu_state* vu, uint32_t addr) { - // printf("vu: Executing program at %08x (%08x) TOP=%08x\n", addr, addr << 3, vu->vif->vif1_top); + // printf("vu%d: Executing program at %08x (%08x) TOP=%08x\n", vu->id, addr, addr << 3, vu->vif->top); + // Disable VU1 + // if (vu->id == 1) + // return; struct vu_dis_state ds; @@ -2735,37 +2771,44 @@ void vu_execute_program(struct vu_state* vu, uint32_t addr) { ds.addr = tpc; - delayed_e_bit = vu->e_bit; + delayed_e_bit = vu->e_bit != 0; vu->upper = liw >> 32; vu->lower = liw & 0xffffffff; - vu->i_bit = vu->upper & 0x80000000; - vu->e_bit = vu->upper & 0x40000000; - vu->m_bit = vu->upper & 0x20000000; - vu->d_bit = vu->upper & 0x10000000; - vu->t_bit = vu->upper & 0x08000000; + vu->i_bit = (vu->upper & 0x80000000) != 0; + vu->e_bit = (vu->upper & 0x40000000) != 0; + vu->m_bit = (vu->upper & 0x20000000) != 0; + vu->d_bit = (vu->upper & 0x10000000) != 0; + vu->t_bit = (vu->upper & 0x08000000) != 0; + + vu->q_delay--; - // printf("%04x: %08x %08x ", tpc, vu->upper, vu->lower); + vu_update_status(vu); - vu->status = vu->mac_pipeline[3]; + // printf("%04x: %08x %08x %s", tpc, vu->upper, vu->lower, vu->e_bit ? "[e] " : " "); + // vu->status = vu->mac_pipeline[3]; + + vu_execute_upper(vu, vu->upper & 0x7ffffff); + if (vu->i_bit) { // printf("loi %08x\n", vu->lower); // LOI vu->i.u32 = vu->lower; } else { + // char ud[512], ld[512]; + // printf("%-40s%-40s\n", vu_disassemble_upper(ud, vu->upper, &ds), vu_disassemble_lower(ld, vu->lower, &ds)); - vu_execute_upper(vu, liw >> 32); - vu_execute_lower(vu, liw & 0xffffffff); + vu_execute_lower(vu, vu->lower); } vu->mac_pipeline[3] = vu->mac_pipeline[2]; vu->mac_pipeline[2] = vu->mac_pipeline[1]; vu->mac_pipeline[1] = vu->mac_pipeline[0]; vu->mac_pipeline[0] = vu->mac; - + vu->clip_pipeline[3] = vu->clip_pipeline[2]; vu->clip_pipeline[2] = vu->clip_pipeline[1]; vu->clip_pipeline[1] = vu->clip_pipeline[0]; @@ -2781,4 +2824,128 @@ void vu_execute_program(struct vu_state* vu, uint32_t addr) { } } +void ps2_vu_write_vi(struct vu_state* vu, int index, uint32_t value) { + switch (index) { + case 0: return; + case 1: case 2: case 3: + case 4: case 5: case 6: case 7: + case 8: case 9: case 10: case 11: + case 12: case 13: case 14: case 15: { + vu->vi[index] = value & 0xffff; + } break; + + case 16: { + vu->status &= ~0xfc0; + vu->status |= value & 0xfc0; + } break; + + case 17: return; // MAC flag register, read-only + case 18: { + vu->clip = value & 0xffffff; + } break; + + case 19: return; // VU revision register? read-only + + case 20: { + vu->r.u32 = value & 0x7fffff; + } break; + case 21: { + vu->i.u32 = value; + } break; + case 22: { + vu->q.u32 = value; + } break; + case 23: return; + case 24: { + vu->cr[8] = value & 0xc0c; + } break; + case 25: return; + case 26: return; // VU TPC register, read-only + case 27: { + vu->cmsar0 = value & 0xffff; + } break; + case 28: { + // To-do: Handle FBRST + vu->fbrst = value & 0xc0c; + + if (value & 2) { + // Reset VU0 + ps2_vu_reset(vu); + } + + if (value & 0x200) { + // Reset VU1 + ps2_vu_reset(vu->vu1); + } + } break; + case 29: return; // VU VPU-STAT register, read-only + case 30: return; // VU reserved register, read-only + case 31: { + vu->cmsar1 = value & 0xffff; + + vu_execute_program(vu->vu1, vu->cmsar1 >> 3); + } break; + } +} + +uint32_t ps2_vu_read_vi(struct vu_state* vu, int index) { + switch (index) { + case 0: case 1: case 2: case 3: + case 4: case 5: case 6: case 7: + case 8: case 9: case 10: case 11: + case 12: case 13: case 14: case 15: { + return vu->vi[index]; + } break; + + case 19: { // VU revision register + return 0x2e30; + } break; + + default: { + return vu->cr[index - 16]; + } break; + } +} + +void ps2_vu_reset(struct vu_state* vu) { + for (int i = 0; i < 16; i++) + vu->vi[i] = 0; + + for (int i = 0; i < 32; i++) { + vu->vf[i].u32[0] = 0; + vu->vf[i].u32[1] = 0; + vu->vf[i].u32[2] = 0; + vu->vf[i].u32[3] = 0; + } + + vu->r.u32 = 0x3f800000; + vu->i.u32 = 0; + vu->q.u32 = 0; + vu->clip = 0; + vu->status = 0; + vu->fbrst = 0; + vu->cmsar0 = 0; + vu->cmsar1 = 0; + vu->mac = 0; + vu->mac_pipeline[0] = 0; + vu->mac_pipeline[1] = 0; + vu->mac_pipeline[2] = 0; + vu->mac_pipeline[3] = 0; + vu->clip_pipeline[0] = 0; + vu->clip_pipeline[1] = 0; + vu->clip_pipeline[2] = 0; + vu->clip_pipeline[3] = 0; + vu->tpc = 0; + vu->next_tpc = 1; + vu->upper = 0; + vu->lower = 0; + vu->i_bit = 0; + vu->e_bit = 0; + vu->m_bit = 0; + vu->d_bit = 0; + vu->t_bit = 0; + + vu->vf[0].w = 1.0; +} + // #undef printf \ No newline at end of file diff --git a/src/ee/vu.h b/src/ee/vu.h index 8535285..de6a81a 100644 --- a/src/ee/vu.h +++ b/src/ee/vu.h @@ -16,7 +16,7 @@ struct vu_reg { uint128_t u128; uint64_t u64[2]; uint32_t u32[4]; - int32_t i32[4]; + int32_t s32[4]; float f[4]; // Named fields @@ -55,6 +55,18 @@ struct vu_state { uint32_t mac_pipeline[4]; uint32_t clip_pipeline[4]; + union { + uint32_t u32; + uint32_t s32; + float f; + } prev_q; + + int q_delay; + + uint16_t ialu_prev_v; + uint16_t ialu_prev_i; + int ialu_delay; + union { uint32_t u32; float f; @@ -70,14 +82,17 @@ struct vu_state { uint32_t rsv0; union { uint32_t u32; + uint32_t s32; float f; } r; union { uint32_t u32; + uint32_t s32; float f; } i; union { uint32_t u32; + uint32_t s32; float f; } q; uint32_t rsv1; @@ -280,6 +295,9 @@ void ps2_vu_write16(struct vu_state* vu, uint32_t addr, uint64_t data); void ps2_vu_write32(struct vu_state* vu, uint32_t addr, uint64_t data); void ps2_vu_write64(struct vu_state* vu, uint32_t addr, uint64_t data); void ps2_vu_write128(struct vu_state* vu, uint32_t addr, uint128_t data); +void ps2_vu_write_vi(struct vu_state* vu, int index, uint32_t value); +uint32_t ps2_vu_read_vi(struct vu_state* vu, int index); +void ps2_vu_reset(struct vu_state* vu); void vu_cycle(struct vu_state* vu); void vu_execute_program(struct vu_state* vu, uint32_t addr); diff --git a/src/gs/renderer/software_thread.cpp b/src/gs/renderer/software_thread.cpp index e9a3ced..c8a0fe7 100644 --- a/src/gs/renderer/software_thread.cpp +++ b/src/gs/renderer/software_thread.cpp @@ -718,6 +718,10 @@ static inline uint32_t gs_read_fb(struct ps2_gs* gs, int x, int y) { return vram[psmct16_shift[idx] & 0xfffff]; } break; + default: { + // printf("Unsupported PSMT %02x for fb read\n", gs->ctx->fbpsm); + // exit(1); + } break; } return 0; @@ -755,6 +759,10 @@ static inline uint32_t gs_read_dispfb(struct ps2_gs* gs, int x, int y, int dfb) return vram[psmct16_shift[idx] & 0xfffff]; } break; + default: { + printf("Unsupported PSMT %02x for dispfb read\n", dfbpsm); + exit(1); + } break; } return 0; @@ -774,6 +782,10 @@ static inline uint32_t gs_read_zb(struct ps2_gs* gs, int x, int y) { return (data & mask) >> shift; } + default: { + printf("Unsupported PSMT %02x for zb read\n", gs->ctx->zbpsm); + exit(1); + } break; } return 0; @@ -845,6 +857,10 @@ static inline uint32_t gs_read_cb(struct ps2_gs* gs, int i) { return vram[psmct16_shift[idx] & 0xfffff]; } break; + default: { + // printf("Unsupported PSMT %02x for 8-bit cb read\n", gs->ctx->cbpsm); + // exit(1); + } break; } } break; @@ -869,6 +885,10 @@ static inline uint32_t gs_read_cb(struct ps2_gs* gs, int i) { return vram[psmct16_shift[idx] & 0xfffff]; } break; + default: { + // printf("Unsupported PSMT %02x for 4-bit cb read\n", gs->ctx->cbpsm); + // exit(1); + } break; } } break; } @@ -926,6 +946,10 @@ static inline uint32_t gs_to_rgba32(struct ps2_gs* gs, uint32_t c, int fmt) { case GS_PSMT4HH: { return gs_to_rgba32(gs, c, gs->ctx->cbpsm); } break; + default: { + // printf("Unsupported PSMT %02x for to_rgba32\n", fmt); + // exit(1); + } break; } return 0; @@ -953,6 +977,10 @@ static inline uint32_t gs_from_rgba32(struct ps2_gs* gs, uint32_t c, int fmt) { case GS_PSMT4HH: { return gs_from_rgba32(gs, c, gs->ctx->cbpsm); } break; + default: { + // printf("Unsupported PSMT %02x for from_rgba32\n", fmt); + // exit(1); + } break; } return 0; @@ -1013,6 +1041,10 @@ static inline uint32_t gs_read_tb_impl(struct ps2_gs* gs, int u, int v) { return gs_read_cb(gs, data >> 28); } break; + default: { + // printf("Unsupported PSMT %02x for tb read\n", gs->ctx->tbpsm); + // exit(1); + } break; } return 0; @@ -1111,6 +1143,10 @@ static inline void gs_write_fb(struct ps2_gs* gs, int x, int y, uint32_t c) { vram[psmct16_shift[idx]] = f; } break; + default: { + // printf("Unsupported PSMT %02x for fb write\n", gs->ctx->fbpsm); + // exit(1); + } break; } } @@ -1136,6 +1172,10 @@ static inline void gs_write_fb_no_alpha(struct ps2_gs* gs, int x, int y, uint32_ *ptr = (c & 0x7fff) | (*ptr & 0x8000); } break; + default: { + // printf("Unsupported PSMT %02x for fb no alpha write\n", gs->ctx->fbpsm); + // exit(1); + } break; } } @@ -1166,6 +1206,10 @@ static inline void gs_write_zb(struct ps2_gs* gs, int x, int y, uint32_t z) { *(ptr + x + (y * gs->ctx->fbw)) = z; } break; + default: { + // printf("Unsupported PSMT %02x for zb write\n", gs->ctx->zbpsm); + // exit(1); + } break; } } @@ -1551,28 +1595,155 @@ void software_thread_init(void* udata, struct ps2_gs* gs, SDL_Window* window, SD ctx->sampler[1] = SDL_CreateGPUSampler(ctx->device, &linear_sci); } -static inline void software_thread_vram_blit(struct ps2_gs* gs, software_thread_state* ctx) { - // printf("dbp=%x (%x) dbw=%d (%d) dpsm=%02x dsa=(%d,%d) sbp=%x (%x) sbw=%d (%d) spsm=%02x ssa=(%d,%d) rr=(%d,%d) xdir=%d\n", - // ctx->dbp, ctx->dbp, - // ctx->dbw, ctx->dbw, - // ctx->dpsm, - // ctx->dsax, - // ctx->dsay, - // ctx->sbp, ctx->sbp, - // ctx->sbw, ctx->sbw, - // ctx->spsm, - // ctx->ssax, - // ctx->ssay, - // ctx->rrw, - // ctx->rrh, - // ctx->xdir - // ); +static inline uint32_t gs_generic_read(struct ps2_gs* gs, uint32_t bp, uint32_t bw, uint32_t u, uint32_t v) { + switch (gs->ctx->tbpsm) { + case GS_PSMCT32: + return gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff]; + case GS_PSMCT24: + return gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff]; + case GS_PSMCT16: { + uint32_t addr = psmct16_addr(bp, bw, u, v); + uint16_t* vram = (uint16_t*)(&gs->vram[addr & 0xfffff]); + + int idx = (u & 15) + ((v & 1) * 16); + + return vram[psmct16_shift[idx] & 0xfffff]; + } break; + case GS_PSMCT16S: { + uint32_t addr = psmct16s_addr(bp, bw, u, v); + uint16_t* vram = (uint16_t*)(&gs->vram[addr & 0xfffff]); + + int idx = (u & 15) + ((v & 1) * 16); + + return vram[psmct16_shift[idx] & 0xfffff]; + } break; + case GS_PSMT8: { + uint32_t addr = psmt8_addr(bp, bw, u, v); + uint8_t* vram = (uint8_t*)(&gs->vram[addr & 0xfffff]); + + int idx = (u & 15) + ((v & 3) * 16); + + return vram[psmt8_shift[idx] & 0xfffff]; + } break; + case GS_PSMT8H: { + uint32_t data = gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff]; + + return data >> 24; + } break; + case GS_PSMT4: { + uint32_t addr = psmt4_addr(bp, bw, u, v); + + int idx = (u & 31) + ((v & 3) * 32); + int shift = psmt4_shift[idx]; + + uint32_t mask = 0xful << shift; + + return (gs->vram[addr & 0xfffff] & mask) >> shift; + } break; + case GS_PSMT4HL: { + uint32_t data = gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff]; + + return (data >> 24) & 0xf; + } break; + case GS_PSMT4HH: { + uint32_t data = gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff]; + + return data >> 28; + } break; + default: { + // printf("Unsupported PSMT %02x for generic read\n", gs->ctx->tbpsm); + // exit(1); + } break; + } +} - for (int y = 0; y < (int)ctx->rrh; y++) { - uint32_t src = ctx->sbp + ctx->ssax + (ctx->ssay * ctx->rrw) + (y * ctx->rrw); - uint32_t dst = ctx->dbp + ctx->dsax + (ctx->dsay * ctx->rrw) + (y * ctx->rrw); +static inline void gs_generic_write(struct ps2_gs* gs, uint32_t bp, uint32_t bw, uint32_t u, uint32_t v, uint32_t data) { + switch (gs->ctx->tbpsm) { + case GS_PSMCT32: + gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff] = data; + break; + case GS_PSMCT24: + gs->vram[psmct32_addr(bp, bw, u, v) & 0xfffff] = data; + break; + case GS_PSMCT16: { + uint32_t addr = psmct16_addr(bp, bw, u, v); + uint16_t* vram = (uint16_t*)(&gs->vram[addr & 0xfffff]); + + int idx = (u & 15) + ((v & 1) * 16); - memcpy(gs->vram + dst, gs->vram + src, ctx->rrw * sizeof(uint32_t)); + vram[psmct16_shift[idx] & 0xfffff] = data; + } break; + case GS_PSMCT16S: { + uint32_t addr = psmct16s_addr(bp, bw, u, v); + uint16_t* vram = (uint16_t*)(&gs->vram[addr & 0xfffff]); + + int idx = (u & 15) + ((v & 1) * 16); + + vram[psmct16_shift[idx] & 0xfffff] = data; + } break; + case GS_PSMT8: { + uint32_t addr = psmt8_addr(bp, bw, u, v); + uint8_t* vram = (uint8_t*)(&gs->vram[addr & 0xfffff]); + + int idx = (u & 15) + ((v & 3) * 16); + + vram[psmt8_shift[idx] & 0xfffff] = data; + } break; + case GS_PSMT8H: { + uint32_t addr = psmct32_addr(bp, bw, u, v) & 0xfffff; + + gs->vram[addr] = (gs->vram[addr] & 0x00ffffff) | (data << 24); + } break; + case GS_PSMT4: { + uint32_t addr = psmt4_addr(bp, bw, u, v) & 0xfffff; + + int idx = (u & 31) + ((v & 3) * 32); + int shift = psmt4_shift[idx]; + + uint32_t mask = 0xful << shift; + + gs->vram[addr] = (gs->vram[addr] & ~mask) | (data << shift); + } break; + case GS_PSMT4HL: { + uint32_t addr = psmct32_addr(bp, bw, u, v) & 0xfffff; + + gs->vram[addr] = (gs->vram[addr] & 0xf0ffffff) | ((data & 0xf) << 24); + } break; + case GS_PSMT4HH: { + uint32_t addr = psmct32_addr(bp, bw, u, v) & 0xfffff; + + gs->vram[addr] = (gs->vram[addr] & 0x0fffffff) | ((data & 0xf) << 28); + } break; + default: { + // printf("Unsupported PSMT %02x for write\n", gs->ctx->tbpsm); + // exit(1); + } break; + } +} + +static inline void software_thread_vram_blit(struct ps2_gs* gs, software_thread_state* ctx) { + printf("dbp=%x (%x) dbw=%d (%d) dpsm=%02x dsa=(%d,%d) sbp=%x (%x) sbw=%d (%d) spsm=%02x ssa=(%d,%d) rr=(%d,%d) xdir=%d\n", + ctx->dbp, ctx->dbp, + ctx->dbw, ctx->dbw, + ctx->dpsm, + ctx->dsax, + ctx->dsay, + ctx->sbp, ctx->sbp, + ctx->sbw, ctx->sbw, + ctx->spsm, + ctx->ssax, + ctx->ssay, + ctx->rrw, + ctx->rrh, + ctx->xdir + ); + + for (int y = 0; y < ctx->rrh; y++) { + for (int x = 0; x < ctx->rrw; x++) { + uint32_t s = gs_generic_read(gs, ctx->sbp, ctx->sbw, ctx->ssax + x, ctx->ssay + y); + + gs_generic_write(gs, ctx->dbp, ctx->dbw, ctx->dsax + x, ctx->dsay + y, s); + } } } @@ -2013,6 +2184,8 @@ void render_sprite(struct ps2_gs* gs, void* udata) { float u = v0.u; float v = v0.v; + float ut = v0.u < v1.u ? v0.u : v1.u; + float vt = v0.v < v1.v ? v0.v : v1.v; for (int y = ymin; y < ymax; y++) { for (int x = xmin; x < xmax; x++) { @@ -2029,8 +2202,8 @@ void render_sprite(struct ps2_gs* gs, void* udata) { int iv; if (gs->fst) { - u = v0.u + (v1.u - v0.u) * tx; - v = v0.v + (v1.v - v0.v) * ty; + u = v0.u + ((int)v1.u - (int)v0.u) * tx; + v = v0.v + ((int)v1.v - (int)v0.v) * ty; iu = u; iv = v; diff --git a/src/iop/spu2.c b/src/iop/spu2.c index c405a81..fd785e2 100644 --- a/src/iop/spu2.c +++ b/src/iop/spu2.c @@ -91,10 +91,10 @@ void ps2_spu2_init(struct ps2_spu2* spu2, struct ps2_iop_dma* dma, struct ps2_io } void spu2_irq(struct ps2_spu2* spu2, int c) { - // if (spu2->spdif_irq & (4 << c)) - // return; + if (spu2->spdif_irq & (8 << c)) + return; - // spu2->spdif_irq |= 4 << c; + spu2->spdif_irq |= 8 << c; // printf("spu2: IRQ fired\n"); @@ -428,7 +428,7 @@ uint64_t ps2_spu2_read16(struct ps2_spu2* spu2, uint32_t addr) { case 0x7ac: return spu2->c[1].in_coef_l; case 0x7ae: return spu2->c[1].in_coef_r; case 0x7C0: return spu2->spdif_out; - // case 0x7C2: return spu2->spdif_irq; + case 0x7C2: return spu2->spdif_irq; case 0x7C6: return spu2->spdif_mode; case 0x7C8: return spu2->spdif_media; case 0x7CA: return spu2->spdif_copy; @@ -614,7 +614,7 @@ void ps2_spu2_write16(struct ps2_spu2* spu2, uint32_t addr, uint64_t data) { case 0x7ac: spu2->c[1].in_coef_l = data; return; case 0x7ae: spu2->c[1].in_coef_r = data; return; case 0x7C0: spu2->spdif_out = data; return; - // case 0x7C2: spu2->spdif_irq = data; return; + case 0x7C2: printf("spdif irq write %04x", data); spu2->spdif_irq = data; return; case 0x7C6: spu2->spdif_mode = data; return; case 0x7C8: spu2->spdif_media = data; return; case 0x7CA: spu2->spdif_copy = data; return;