Skip to content

Commit 36f2288

Browse files
committed
psyq-obj-parser: Handle comm symbols properly
Convert .comm symbols into the proper ELF equivalent, instead of converting them to local symbol in the .bss segment. This makes it possible use the converted PSY-Q libraries to recreate a bit-exact executable of Frogger - without proper comm symbol support controlling the order of some .bss symbols in the libraries is impossible. Even though this is more correct than before there are some known users of psyq-obj-parser that are relying on the old behaviour thus a command line option is added which can be used to restore the previous behaviour.
1 parent 3b8f0a0 commit 36f2288

File tree

1 file changed

+19
-8
lines changed

1 file changed

+19
-8
lines changed

tools/psyq-obj-parser/psyq-obj-parser.cc

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ struct PsyqLnkFile {
125125
struct Expression;
126126

127127
/* The main parser entry point; will return nullptr on error */
128-
static std::unique_ptr<PsyqLnkFile> parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted);
128+
static std::unique_ptr<PsyqLnkFile> parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted,
129+
bool convertCommToBss);
129130
static std::string readPsyqString(PCSX::IO<PCSX::File> file) { return file->readString(file->byte()); }
130131

131132
/* Our list of sections and symbols will be keyed by their id from the LNK file */
@@ -164,6 +165,7 @@ struct PsyqLnkFile {
164165
EXPORTED,
165166
IMPORTED,
166167
UNINITIALIZED,
168+
COMM,
167169
} symbolType;
168170
uint16_t sectionIndex;
169171
uint32_t offset = 0;
@@ -172,7 +174,7 @@ struct PsyqLnkFile {
172174
ELFIO::Elf_Word elfSym;
173175
uint16_t getKey() { return getLow(); }
174176
uint32_t getOffset(PsyqLnkFile* psyq) const {
175-
if (symbolType == Type::UNINITIALIZED) {
177+
if (symbolType == Type::UNINITIALIZED || symbolType == Type::COMM) {
176178
auto section = psyq->sections.find(sectionIndex);
177179
assert(section != psyq->sections.end());
178180
return section->data.size() + section->zeroes + offset;
@@ -243,7 +245,8 @@ struct PsyqLnkFile {
243245
};
244246

245247
/* The psyq LNK parser code */
246-
std::unique_ptr<PsyqLnkFile> PsyqLnkFile::parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted) {
248+
std::unique_ptr<PsyqLnkFile> PsyqLnkFile::parse(PCSX::IO<PCSX::File> file, bool verbose, bool sorted,
249+
bool convertCommToBss) {
247250
std::unique_ptr<PsyqLnkFile> ret = std::make_unique<PsyqLnkFile>();
248251
vprint(":: Reading signature.\n");
249252
std::string signature = file->readString(3);
@@ -291,7 +294,7 @@ std::unique_ptr<PsyqLnkFile> PsyqLnkFile::parse(PCSX::IO<PCSX::File> file, bool
291294
// Static bss symbols will be represented as a ZEROES opcode instead of UNINITIALIZED.
292295
// This will cause them to have a size of zero, so ignore size zero symbols here.
293296
// Their relocs will resolve to an offset of the local .bss instead, so this causes no issues.
294-
if (symbol.size > 0) {
297+
if (symbol.size > 0 && symbol.symbolType != PsyqLnkFile::Symbol::Type::COMM) {
295298
auto section = ret->sections.find(symbol.sectionIndex);
296299
if (section != ret->sections.end() && section->isBss()) {
297300
auto align = std::min((uint32_t)section->alignment, symbol.size) - 1;
@@ -498,7 +501,11 @@ std::unique_ptr<PsyqLnkFile> PsyqLnkFile::parse(PCSX::IO<PCSX::File> file, bool
498501
std::string name = readPsyqString(file);
499502

500503
Symbol* symbol = new Symbol();
501-
symbol->symbolType = Symbol::Type::UNINITIALIZED;
504+
if (convertCommToBss) {
505+
symbol->symbolType = Symbol::Type::UNINITIALIZED;
506+
} else {
507+
symbol->symbolType = Symbol::Type::COMM;
508+
}
502509
symbol->sectionIndex = sectionIndex;
503510
symbol->size = size;
504511
symbol->name = name;
@@ -970,7 +977,9 @@ bool PsyqLnkFile::Symbol::generateElfSymbol(PsyqLnkFile* psyq, ELFIO::string_sec
970977
bool isWeak = false;
971978

972979
fmt::print(" :: Generating symbol {} {} {}\n", name, getOffset(psyq), sectionIndex);
973-
if (symbolType != Type::IMPORTED) {
980+
if (symbolType == Type::COMM) {
981+
elfSectionIndex = ELFIO::SHN_COMMON;
982+
} else if (symbolType != Type::IMPORTED) {
974983
auto section = psyq->sections.find(sectionIndex);
975984
if (section == psyq->sections.end()) {
976985
psyq->setElfConversionError("Couldn't find section index {} for symbol {} ('{}')", sectionIndex, getKey(),
@@ -1387,7 +1396,8 @@ bool PsyqLnkFile::Relocation::generateElf(ElfRelocationPass pass, const std::str
13871396
psyq->setElfConversionError("Couldn't find symbol {} for relocation.", expr->symbolIndex);
13881397
return false;
13891398
}
1390-
if (symbol->symbolType != PsyqLnkFile::Symbol::Type::IMPORTED) {
1399+
if (symbol->symbolType != PsyqLnkFile::Symbol::Type::IMPORTED &&
1400+
symbol->symbolType != PsyqLnkFile::Symbol::Type::COMM) {
13911401
return localSymbolReloc(symbol->sectionIndex, symbol->getOffset(psyq) + addend);
13921402
}
13931403
if (pass == ElfRelocationPass::PASS1) {
@@ -1536,6 +1546,7 @@ Usage: {} input.obj [input2.obj...] [-h] [-v] [-d] [-n] [-p prefix] [-o output.o
15361546
-o output.o tries to dump the parsed psyq LNK file into an ELF file;
15371547
can only work with a single input file.
15381548
-b outputs a big-endian ELF file.
1549+
-c converts comm symbols into .bss symbols
15391550
)",
15401551
argv[0]);
15411552
return -1;
@@ -1551,7 +1562,7 @@ Usage: {} input.obj [input2.obj...] [-h] [-v] [-d] [-n] [-p prefix] [-o output.o
15511562
fmt::print(stderr, "Unable to open file: {}\n", input);
15521563
ret = -2;
15531564
} else {
1554-
auto psyq = PsyqLnkFile::parse(file, verbose, !!args.get<bool>("s"));
1565+
auto psyq = PsyqLnkFile::parse(file, verbose, !!args.get<bool>("s"), !!args.get<bool>("c"));
15551566
if (!psyq) {
15561567
ret = -3;
15571568
} else {

0 commit comments

Comments
 (0)