Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Output labels when disassembling code #8897

Merged
merged 13 commits into from
Nov 10, 2014
226 changes: 174 additions & 52 deletions src/disasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,16 @@
//
//===----------------------------------------------------------------------===//

#include <string>
#include <cstdio>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>

#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCSymbol.h"

using namespace llvm;

Expand All @@ -42,6 +50,71 @@ class FuncMCView : public MemoryObject {
return 0;
}
};

// Look up a symbol, and return a const char* to its name when the
// address matches. We currently just use "L<address>" as name for the
// symbol. We could easily get more fancy, e.g. numbering symbols
// sequentially or encoding the line number, but that doesn't seem
// necessary.
class SymbolTable {
typedef std::map<uint64_t, MCSymbol*> TableType;
TableType Table;
std::string TempName;
int Pass;
public:
void setPass(int Pass) { this->Pass = Pass; }
int getPass() const { return Pass; }
void insertAddress(uint64_t addr);
void createSymbols(MCContext &Ctx);
const char *lookupSymbol(uint64_t addr);
};
// Insert an address
void SymbolTable::insertAddress(uint64_t addr)
{
Table[addr] = NULL;
}
// Create symbols for all addresses
void SymbolTable::createSymbols(MCContext &Ctx)
{
for (TableType::iterator isymb = Table.begin(), esymb = Table.end();
isymb != esymb; ++isymb) {
uint64_t addr = isymb->first;
std::ostringstream name;
name << "L" << addr;
MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name.str()));
symb->setVariableValue(MCConstantExpr::Create(addr, Ctx));
isymb->second = symb;
}
}
const char *SymbolTable::lookupSymbol(uint64_t addr)
{
std::cout << "lookupSymbol(" << addr << ")\n";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this, that's just leftover debug output. Since the actual disassembled output is cached, cout output appears above, making it easy to miss.

if (!Table.count(addr)) return NULL;
MCSymbol *symb = Table[addr];
TempName = symb->getName().str();
std::cout << " found " << TempName << "\n";
return TempName.c_str();
}
const char *SymbolLookup(void *DisInfo_,
uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
const char **ReferenceName)
{
SymbolTable *DisInfo = (SymbolTable*)DisInfo_;
if (DisInfo->getPass() != 0) {
if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) {
uint64_t addr = ReferenceValue;
const char *symbolName = DisInfo->lookupSymbol(addr);
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
*ReferenceName = NULL;
return symbolName;
}
}
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
*ReferenceName = NULL;
return NULL;
}
}

#ifndef USE_MCJIT
Expand Down Expand Up @@ -118,12 +191,13 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize,
#else
OwningPtr<MCSubtargetInfo>
STI(TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
OwningPtr<MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
#endif
if (!DisAsm) {
JL_PRINTF(JL_STDERR, "error: no disassembler for target", TripleName.c_str(), "\n");
return;
}
SymbolTable DisInfo;

unsigned OutputAsmVariant = 1;
bool ShowEncoding = false;
Expand All @@ -133,6 +207,8 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize,
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
#else
OwningPtr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
OwningPtr<MCInstrAnalysis>
MCIA(TheTarget->createMCInstrAnalysis(MCII.get()));
#endif
MCInstPrinter* IP =
TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *MCII, *MRI, *STI);
Expand Down Expand Up @@ -165,63 +241,109 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize,
// Make the MemoryObject wrapper
FuncMCView memoryObject(Fptr, Fsize);

uint64_t Size = 0;
uint64_t Index = 0;
uint64_t absAddr = 0;
// Take two passes: In the first pass we record all branch labels,
// in the second we actually perform the output
for (int pass = 0; pass < 2; ++ pass) {

DisInfo.setPass(pass);
if (pass != 0) {
// Switch to symbolic disassembly. We cannot do this
// before the first pass, because this changes branch
// targets from immediate values (constants) to
// expressions, which are not handled correctly by
// MCIA->evaluateBranch. (It should be possible to rewrite
// this routine to handle this case correctly as well.)
// Could add OpInfoLookup here
DisAsm->setupForSymbolicDisassembly
(NULL, SymbolLookup, &DisInfo, &Ctx);
}

// Set up the line info
typedef std::vector<JITEvent_EmittedFunctionDetails::LineStart> LInfoVec;
LInfoVec::iterator lineIter = lineinfo.begin();
LInfoVec::iterator lineEnd = lineinfo.end();
uint64_t Size = 0;
uint64_t Index = 0;
uint64_t absAddr = 0;

uint64_t nextLineAddr = -1;
DISubprogram debugscope;
// Set up the line info
typedef std::vector<JITEvent_EmittedFunctionDetails::LineStart>
LInfoVec;
LInfoVec::iterator lineIter = lineinfo.begin();
LInfoVec::iterator lineEnd = lineinfo.end();

if (lineIter != lineEnd) {
nextLineAddr = (*lineIter).Address;
debugscope = DISubprogram((*lineIter).Loc.getScope(jl_LLVMContext));
uint64_t nextLineAddr = -1;
DISubprogram debugscope;

stream << "Filename: " << debugscope.getFilename() << "\n";
stream << "Source line: " << (*lineIter).Loc.getLine() << "\n";
}
if (lineIter != lineEnd) {
nextLineAddr = (*lineIter).Address;
debugscope = DISubprogram((*lineIter).Loc.getScope(jl_LLVMContext));

if (pass != 0) {
stream << "Filename: " << debugscope.getFilename() << "\n";
stream << "Source line: " << (*lineIter).Loc.getLine() << "\n";
}
}

// Do the disassembly
for (Index = 0, absAddr = (uint64_t)Fptr;
Index < memoryObject.getExtent(); Index += Size, absAddr += Size) {

if (nextLineAddr != (uint64_t)-1 && absAddr == nextLineAddr) {
stream << "Source line: " << (*lineIter).Loc.getLine() << "\n";
nextLineAddr = (*++lineIter).Address;
// Do the disassembly
for (Index = 0, absAddr = (uint64_t)Fptr;
Index < memoryObject.getExtent(); Index += Size, absAddr += Size) {

if (nextLineAddr != (uint64_t)-1 && absAddr == nextLineAddr) {
if (pass != 0)
stream << "Source line: "
<< (*lineIter).Loc.getLine() << "\n";
nextLineAddr = (*++lineIter).Address;
}
if (pass != 0) {
// Uncomment this to output addresses for all instructions
// stream << Index << ": ";
const char *symbolName = DisInfo.lookupSymbol(Index);
if (symbolName)
stream << symbolName << ":";
}

MCInst Inst;

MCDisassembler::DecodeStatus S;
S = DisAsm->getInstruction(Inst, Size, memoryObject, Index,
/*REMOVE*/ nulls(), nulls());
switch (S) {
case MCDisassembler::Fail:
if (pass != 0)
SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]),
SourceMgr::DK_Warning,
"invalid instruction encoding");
if (Size == 0)
Size = 1; // skip illegible bytes
break;

case MCDisassembler::SoftFail:
if (pass != 0)
SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]),
SourceMgr::DK_Warning,
"potentially undefined instruction encoding");
// Fall through

case MCDisassembler::Success:
#ifdef LLVM35
if (pass != 0)
Streamer->EmitInstruction(Inst, *STI);
#else
if (pass == 0) {
// Pass 0: Record all branch targets
if (MCIA->isBranch(Inst)) {
uint64_t addr = MCIA->evaluateBranch(Inst, Index, Size);
if (addr != uint64_t(-1))
DisInfo.insertAddress(addr);
}
} else {
// Pass 1: Output instruction
Streamer->EmitInstruction(Inst);
}
#endif
break;
}
}

MCInst Inst;

MCDisassembler::DecodeStatus S;
S = DisAsm->getInstruction(Inst, Size, memoryObject, Index,
/*REMOVE*/ nulls(), nulls());
switch (S) {
case MCDisassembler::Fail:
SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]),
SourceMgr::DK_Warning,
"invalid instruction encoding");
if (Size == 0)
Size = 1; // skip illegible bytes
break;

case MCDisassembler::SoftFail:
SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]),
SourceMgr::DK_Warning,
"potentially undefined instruction encoding");
// Fall through

case MCDisassembler::Success:
#ifdef LLVM35
Streamer->EmitInstruction(Inst, *STI);
#else
Streamer->EmitInstruction(Inst);
#endif
break;
}
if (pass == 0)
DisInfo.createSymbols(Ctx);
}
#else // MCJIT version
FuncMCView memoryObject(Fptr, Fsize); // MemoryObject wrapper
Expand Down