Skip to content

psyq-obj-parser: Handle comm symbols properly #1907

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions tools/psyq-obj-parser/psyq-obj-parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@
struct Expression;

/* The main parser entry point; will return nullptr on error */
static std::unique_ptr<PsyqLnkFile> parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted);
static std::unique_ptr<PsyqLnkFile> parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted,
bool convertCommToBss);
static std::string readPsyqString(PCSX::IO<PCSX::File> file) { return file->readString(file->byte()); }

/* Our list of sections and symbols will be keyed by their id from the LNK file */
Expand Down Expand Up @@ -164,6 +165,7 @@
EXPORTED,
IMPORTED,
UNINITIALIZED,
COMM,
} symbolType;
uint16_t sectionIndex;
uint32_t offset = 0;
Expand All @@ -172,7 +174,7 @@
ELFIO::Elf_Word elfSym;
uint16_t getKey() { return getLow(); }
uint32_t getOffset(PsyqLnkFile* psyq) const {
if (symbolType == Type::UNINITIALIZED) {
if (symbolType == Type::UNINITIALIZED || symbolType == Type::COMM) {
auto section = psyq->sections.find(sectionIndex);
assert(section != psyq->sections.end());
return section->data.size() + section->zeroes + offset;
Expand Down Expand Up @@ -243,262 +245,267 @@
};

/* The psyq LNK parser code */
std::unique_ptr<PsyqLnkFile> PsyqLnkFile::parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted) {
std::unique_ptr<PsyqLnkFile> PsyqLnkFile::parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted,
bool convertCommToBss) {
std::unique_ptr<PsyqLnkFile> ret = std::make_unique<PsyqLnkFile>();
vprint(":: Reading signature.\n");
std::string signature = file->readString(3);
if (signature != "LNK") {
fmt::print(stderr, "Wrong signature: {}\n", signature);
return nullptr;
}
vprint(" --> Signature ok.\n");

vprint(":: Reading version: ");
uint8_t version = file->byte();
vprint("{}\n", version);
if (version != 2) {
fmt::print(stderr, "Unknown version {}\n", version);
return nullptr;
}
uint32_t curFunctionStart = 0;
std::string curFunctionName = "";

vprint(":: Parsing file...\n");
while (!file->eof()) {
uint8_t opcode = file->byte();
vprint(" :: Read opcode {} --> ", opcode);
switch (opcode) {
case (uint8_t)PsyqOpcode::END: {
vprint("EOF\n");
while (!ret->unseenSectionsList.empty()) {
ret->sectionsList.push_back(&*ret->unseenSectionsList.begin());
}
if (sorted) {
ret->sectionsList.clear();
ret->symbolsList.clear();
for (auto& section : ret->sections) {
ret->sectionsList.push_back(&section);
}
for (auto& symbol : ret->symbols) {
ret->symbolsList.push_back(&symbol);
}
}
// Determine bss symbol placement
// This has to be done after parsing the whole psyq object, as bss may be out of order in the file.
// Doing it here ensures that we process symbols in their id order, instead of by psyq object file
// order, if the user requested ordering by id - otherwise, it'll indeed be order of appearance.
for (auto& symbol : ret->symbolsList) {
// Static bss symbols will be represented as a ZEROES opcode instead of UNINITIALIZED.
// This will cause them to have a size of zero, so ignore size zero symbols here.
// Their relocs will resolve to an offset of the local .bss instead, so this causes no issues.
if (symbol.size > 0) {
if (symbol.size > 0 && symbol.symbolType != PsyqLnkFile::Symbol::Type::COMM) {
auto section = ret->sections.find(symbol.sectionIndex);
if (section != ret->sections.end() && section->isBss()) {
auto align = std::min((uint32_t)section->alignment, symbol.size) - 1;
section->uninitializedOffset += align;
section->uninitializedOffset &= ~align;
symbol.offset = section->uninitializedOffset;
section->uninitializedOffset += symbol.size;
}
}
}
return ret;
}
case (uint8_t)PsyqOpcode::BYTES: {
uint16_t size = file->read<uint16_t>();
vprint("Bytes ({:04x})\n", size);
PCSX::Slice slice = file->read(size);
std::string hex = slice.toHexString();
vprint("{}\n", hex.c_str());
auto section = ret->getCurrentSection();
if (!section) {
fmt::print("Section {} not found\n", ret->currentSection);
return nullptr;
}
if (!ret->sectionsList.isLinked(section)) {
ret->sectionsList.push_back(section);
}
section->pointer = section->getFullSize();
if (section->zeroes) {
void* ptr = calloc(section->zeroes, 1);
PCSX::Slice zeroes;
zeroes.acquire(ptr, section->zeroes);
section->zeroes = 0;
section->data.concatenate(zeroes);
}
section->data.concatenate(slice);
break;
}
case (uint8_t)PsyqOpcode::SWITCH: {
uint16_t sectionIndex = file->read<uint16_t>();
vprint("Switch to section {}\n", sectionIndex);
ret->currentSection = sectionIndex;
break;
}
case (uint8_t)PsyqOpcode::ZEROES: {
uint32_t size = file->read<uint32_t>();
vprint("Zeroes ({:04x})\n", size);
auto section = ret->getCurrentSection();
if (!section) {
fmt::print("Section {} not found\n", ret->currentSection);
return nullptr;
}
section->zeroes += size;
break;
}
case (uint8_t)PsyqOpcode::RELOCATION: {
uint8_t relocType = file->read<uint8_t>();
vprint("Relocation {} ", relocType);
switch (relocType) {
case (uint8_t)PsyqRelocType::REL32: {
vprint("(REL32), ");
break;
}
case (uint8_t)PsyqRelocType::REL26: {
vprint("(REL26), ");
break;
}
case (uint8_t)PsyqRelocType::HI16: {
vprint("(HI16), ");
break;
}
case (uint8_t)PsyqRelocType::LO16: {
vprint("(LO16), ");
break;
}
case (uint8_t)PsyqRelocType::GPREL16: {
vprint("(GPREL16), ");
break;
}
case (uint8_t)PsyqRelocType::GPREL16_LE: {
vprint("(GPREL16 LE), ");
break;
}
case (uint8_t)PsyqRelocType::GPREL16_BE: {
vprint("(GPREL16 BE), ");
break;
}
case (uint8_t)PsyqRelocType::HI16_BE: {
vprint("(HI16 BE), ");
break;
}
case (uint8_t)PsyqRelocType::LO16_BE: {
vprint("(LO16 BE), ");
break;
}
case (uint8_t)PsyqRelocType::REL26_BE: {
vprint("(REL26 BE), ");
break;
}
case (uint8_t)PsyqRelocType::REL32_BE: {
vprint("(REL32 BE), ");
break;
}
default: {
fmt::print("Unknown relocation type {}\n", relocType);
return nullptr;
}
}
uint16_t offset = file->read<uint16_t>();
auto section = ret->getCurrentSection();
if (!section) {
fmt::print("Section {} not found\n", ret->currentSection);
return nullptr;
}
vprint("offset {:04x}+{:08x}, expression: \n", offset, section->pointer);
std::unique_ptr<Expression> expression = Expression::parse(file, verbose);
if (!expression) return nullptr;
// Addend will be populated later during expression evaluation
section->relocations.emplace_back(
Relocation{PsyqRelocType(relocType), offset + section->pointer, 0, std::move(expression)});
break;
}
case (uint8_t)PsyqOpcode::EXPORTED_SYMBOL: {
uint16_t symbolIndex = file->read<uint16_t>();
uint16_t sectionIndex = file->read<uint16_t>();
uint32_t offset = file->read<uint32_t>();
std::string name = readPsyqString(file);
vprint("Export: id {}, section {}, offset {:08x}, name {}\n", symbolIndex, sectionIndex, offset, name);
Symbol* symbol = new Symbol();
symbol->symbolType = Symbol::Type::EXPORTED;
symbol->sectionIndex = sectionIndex;
symbol->offset = offset;
symbol->name = name;
ret->symbols.insert(symbolIndex, symbol);
ret->symbolsList.push_back(symbol);
break;
}
case (uint8_t)PsyqOpcode::IMPORTED_SYMBOL: {
uint16_t symbolIndex = file->read<uint16_t>();
std::string name = readPsyqString(file);
vprint("Import: id {}, name {}\n", symbolIndex, name);
Symbol* symbol = new Symbol();
symbol->symbolType = Symbol::Type::IMPORTED;
symbol->name = name;
ret->symbols.insert(symbolIndex, symbol);
ret->symbolsList.push_back(symbol);
break;
}
case (uint8_t)PsyqOpcode::SECTION: {
uint16_t sectionIndex = file->read<uint16_t>();
uint16_t group = file->read<uint16_t>();
uint8_t alignment = file->read<uint8_t>();
std::string name = readPsyqString(file);
vprint("Section: id {}, group {}, alignment {}, name {}\n", sectionIndex, group, alignment, name);
Section* section = new Section();
section->group = group;
section->alignment = alignment;
section->name = name;
ret->sections.insert(sectionIndex, section);
ret->unseenSectionsList.push_back(section);
if ((alignment - 1) & alignment) {
fmt::print(stderr, "Section alignment {} isn't a power of two.\n", alignment);
return nullptr;
}
break;
}
case (uint8_t)PsyqOpcode::LOCAL_SYMBOL: {
uint16_t sectionIndex = file->read<uint16_t>();
uint32_t offset = file->read<uint32_t>();
std::string name = readPsyqString(file);
vprint("Local: section {}, offset {}, name {}\n", sectionIndex, offset, name);
Symbol* symbol = new Symbol();
symbol->symbolType = Symbol::Type::LOCAL;
symbol->sectionIndex = sectionIndex;
symbol->offset = offset;
symbol->name = name;
ret->symbols.insert(--ret->localIndex, symbol);
ret->symbolsList.push_back(symbol);
break;
}
case (uint8_t)PsyqOpcode::FILENAME: {
uint16_t index = file->read<uint16_t>();
std::string name = readPsyqString(file);
vprint("File {}: {}\n", index, name);
break;
}
case (uint8_t)PsyqOpcode::PROGRAMTYPE: {
uint8_t type = file->read<uint8_t>();
vprint("Program type: {}\n", type);
if (type != 7 && type != 9) {
fmt::print(stderr, "Unknown program type {}.\n", type);
return nullptr;
}
if (ret->gotProgramSeven) {
fmt::print(stderr, "Already got program type.\n");
return nullptr;
}
ret->gotProgramSeven = true;
break;
}
case (uint8_t)PsyqOpcode::UNINITIALIZED: {
uint16_t symbolIndex = file->read<uint16_t>();
uint16_t sectionIndex = file->read<uint16_t>();
uint32_t size = file->read<uint32_t>();
std::string name = readPsyqString(file);

Symbol* symbol = new Symbol();
symbol->symbolType = Symbol::Type::UNINITIALIZED;
if (convertCommToBss) {
symbol->symbolType = Symbol::Type::UNINITIALIZED;
} else {
symbol->symbolType = Symbol::Type::COMM;
}

Check warning on line 508 in tools/psyq-obj-parser/psyq-obj-parser.cc

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Complex Method

PsyqLnkFile::parse increases in cyclomatic complexity from 62 to 64, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.
symbol->sectionIndex = sectionIndex;
symbol->size = size;
symbol->name = name;
Expand Down Expand Up @@ -970,7 +977,9 @@
bool isWeak = false;

fmt::print(" :: Generating symbol {} {} {}\n", name, getOffset(psyq), sectionIndex);
if (symbolType != Type::IMPORTED) {
if (symbolType == Type::COMM) {
elfSectionIndex = ELFIO::SHN_COMMON;
} else if (symbolType != Type::IMPORTED) {

Check warning on line 982 in tools/psyq-obj-parser/psyq-obj-parser.cc

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Complex Method

PsyqLnkFile::Symbol::generateElfSymbol increases in cyclomatic complexity from 9 to 10, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.
auto section = psyq->sections.find(sectionIndex);
if (section == psyq->sections.end()) {
psyq->setElfConversionError("Couldn't find section index {} for symbol {} ('{}')", sectionIndex, getKey(),
Expand Down Expand Up @@ -1387,7 +1396,8 @@
psyq->setElfConversionError("Couldn't find symbol {} for relocation.", expr->symbolIndex);
return false;
}
if (symbol->symbolType != PsyqLnkFile::Symbol::Type::IMPORTED) {
if (symbol->symbolType != PsyqLnkFile::Symbol::Type::IMPORTED &&
symbol->symbolType != PsyqLnkFile::Symbol::Type::COMM) {

Check warning on line 1400 in tools/psyq-obj-parser/psyq-obj-parser.cc

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ Getting worse: Complex Method

PsyqLnkFile::Relocation::generateElf increases in cyclomatic complexity from 62 to 63, threshold = 9. This function has many conditional statements (e.g. if, for, while), leading to lower code health. Avoid adding more conditionals and code to it without refactoring.
return localSymbolReloc(symbol->sectionIndex, symbol->getOffset(psyq) + addend);
}
if (pass == ElfRelocationPass::PASS1) {
Expand Down Expand Up @@ -1536,6 +1546,7 @@
-o output.o tries to dump the parsed psyq LNK file into an ELF file;
can only work with a single input file.
-b outputs a big-endian ELF file.
-c converts comm symbols into .bss symbols
)",
argv[0]);
return -1;
Expand All @@ -1551,7 +1562,7 @@
fmt::print(stderr, "Unable to open file: {}\n", input);
ret = -2;
} else {
auto psyq = PsyqLnkFile::parse(file, verbose, !!args.get<bool>("s"));
auto psyq = PsyqLnkFile::parse(file, verbose, !!args.get<bool>("s"), !!args.get<bool>("c"));
if (!psyq) {
ret = -3;
} else {
Expand Down