Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add experimental -hash-threshold option to hash very long symbol names. #1445

Merged
merged 1 commit into from
May 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ddmd/globals.d
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ struct Param
// Codegen cl options
bool singleObj;
bool disableRedZone;

uint hashThreshold; // MD5 hash symbols larger than this threshold (0 = no hashing)
}
}

Expand Down
2 changes: 2 additions & 0 deletions ddmd/globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ struct Param
// Codegen cl options
bool singleObj;
bool disableRedZone;

uint32_t hashThreshold; // MD5 hash symbols larger than this threshold (0 = no hashing)
#endif
};

Expand Down
21 changes: 20 additions & 1 deletion ddmd/mtype.d
Original file line number Diff line number Diff line change
Expand Up @@ -2665,12 +2665,31 @@ public:
buf.writeByte(0);
// Allocate buffer on stack, fail over to using malloc()
char[128] namebuf;

// Hash long symbol names
char* name;
if (IN_LLVM && global.params.hashThreshold && (len > global.params.hashThreshold))
{
import std.digest.md;
auto md5hash = md5Of(buf.peekString()[0..len]);
auto hashedname = toHexString(md5hash);
static assert(hashedname.length < namebuf.length-30);
name = namebuf.ptr;
sprintf(name, "_D%lluTypeInfo_%.*s6__initZ", cast(ulong)9 + hashedname.length, hashedname.length, hashedname.ptr);
}
else
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems rather unfortunate. Perhaps we should ditch IN_LLVM for such cases, or replace it by an enum set from the version (so you can do if (IN_LLVM && …). It doesn't seem like we would ever want to try using LDC's front end sources to build DMD…

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. We already have the enum IN_LLVM, so I'll use that. I also think the copying is ugly/stupid.
I will indent the DDMD source, so that we are notified of (perhaps relevant) changes by merge errors.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I'd probably keep the indentation as it is, so that the diff is kept tidy and the LDC-specific part is made painfully obvious when browsing the source. But I guess one could always use diff -w for the former…

{
// else path is DDMD original:

size_t namelen = 19 + len.sizeof * 3 + len + 1;
char* name = namelen <= namebuf.length ? namebuf.ptr : cast(char*)malloc(namelen);
name = namelen <= namebuf.sizeof ? namebuf.ptr : cast(char*)malloc(namelen);
assert(name);
sprintf(name, "_D%lluTypeInfo_%s6__initZ", cast(ulong)9 + len, buf.data);
//printf("%p, deco = %s, name = %s\n", this, deco, name);
assert(strlen(name) < namelen); // don't overflow the buffer

}

size_t off = 0;
static if (!IN_GCC && !IN_LLVM)
{
Expand Down
5 changes: 5 additions & 0 deletions driver/cl_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,11 @@ cl::opt<bool, true>
singleObj("singleobj", cl::desc("Create only a single output object file"),
cl::location(global.params.singleObj));

cl::opt<uint32_t, true> hashThreshold(
"hash-threshold",
cl::desc("hash symbol names longer than this threshold (experimental)"),
cl::location(global.params.hashThreshold), cl::init(0));

cl::opt<bool> linkonceTemplates(
"linkonce-templates",
cl::desc(
Expand Down
4 changes: 2 additions & 2 deletions gen/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "gen/llvm.h"
#include "gen/llvmhelpers.h"
#include "gen/logger.h"
#include "gen/mangling.h"
#include "gen/nested.h"
#include "gen/optimizer.h"
#include "gen/pragma.h"
Expand Down Expand Up @@ -478,8 +479,7 @@ void DtoDeclareFunction(FuncDeclaration *fdecl) {
}

// mangled name
std::string mangledName(mangleExact(fdecl));
mangledName = gABI->mangleForLLVM(mangledName, link);
std::string mangledName = getMangledName(fdecl, link);

// construct function
LLFunctionType *functype = DtoFunctionType(fdecl);
Expand Down
161 changes: 161 additions & 0 deletions gen/mangling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
//===-- mangling.cpp ------------------------------------------------------===//
//
// LDC – the LLVM D compiler
//
// This file is distributed under the BSD-style LDC license. See the LICENSE
// file for details.
//
//===----------------------------------------------------------------------===//
//
// Tries to centralize functionality for mangling of symbols.
//
//===----------------------------------------------------------------------===//

#include "gen/mangling.h"

#include "ddmd/declaration.h"
#include "ddmd/dsymbol.h"
#include "ddmd/identifier.h"
#include "ddmd/module.h"
#include "gen/abi.h"
#include "gen/irstate.h"
#include "llvm/Support/MD5.h"

namespace {

// TODO: Disable hashing of symbols that are defined in libdruntime and
// libphobos. This would enable hashing thresholds below the largest symbol in
// libdruntime/phobos.

bool shouldHashAggrName(llvm::StringRef name) {
/// Add extra chars to the length of aggregate names to account for
/// the additional D mangling suffix and prefix
return (global.params.hashThreshold != 0) &&
((name.size() + 11) > global.params.hashThreshold);
}

llvm::SmallString<32> hashName(llvm::StringRef name) {
llvm::MD5 hasher;
hasher.update(name);
llvm::MD5::MD5Result result;
hasher.final(result);
llvm::SmallString<32> hashStr;
llvm::MD5::stringifyResult(result, hashStr);

return hashStr;
}

/// Hashes the symbol name and prefixes the hash with some recognizable parts of
/// the full symbol name. The prefixing means that the hashed name may be larger
/// than the input when identifiers are very long and the hash threshold is low.
/// Demangled hashed name is:
/// module.L<line_no>.<hash>.<top aggregate>.<identifier>
std::string hashSymbolName(llvm::StringRef name, Dsymbol *symb) {
std::string ret;

// module
{
auto moddecl = symb->getModule()->md;
assert(moddecl);
if (auto packages = moddecl->packages) {
for (size_t i = 0; i < packages->dim; ++i) {
llvm::StringRef str = (*packages)[i]->toChars();
ret += std::to_string(str.size());
ret += str;
}
}
llvm::StringRef str = moddecl->id->toChars();
ret += std::to_string(str.size());
ret += str;
}

// source line number
auto lineNo = std::to_string(symb->loc.linnum);
ret += std::to_string(lineNo.size()+1);
ret += 'L';
ret += lineNo;

// MD5 hash
auto hashedName = hashName(name);
ret += "33_"; // add underscore to delimit the 33 character count
ret += hashedName;

// top aggregate
if (auto agg = symb->isAggregateMember()) {
llvm::StringRef topaggr = agg->ident->toChars();
ret += std::to_string(topaggr.size());
ret += topaggr;
}

// identifier
llvm::StringRef identifier = symb->toChars();
ret += std::to_string(identifier.size());
ret += identifier;

return ret;
}
}

std::string getMangledName(FuncDeclaration *fdecl, LINK link) {
std::string mangledName(mangleExact(fdecl));

// Hash the name if necessary
if (((link == LINKd) || (link == LINKdefault)) &&
(global.params.hashThreshold != 0) &&
(mangledName.length() > global.params.hashThreshold)) {

auto hashedName = hashSymbolName(mangledName, fdecl);
mangledName = "_D" + hashedName + "Z";
}

return gABI->mangleForLLVM(mangledName, link);
}

std::string getMangledInitSymbolName(AggregateDeclaration *aggrdecl) {
std::string ret = "_D";

std::string mangledName = mangle(aggrdecl);
if (shouldHashAggrName(mangledName)) {
ret += hashSymbolName(mangledName, aggrdecl);
} else {
ret += mangledName;
}

ret += "6__initZ";

return ret;
}

std::string getMangledVTableSymbolName(AggregateDeclaration *aggrdecl) {
std::string ret = "_D";

std::string mangledName = mangle(aggrdecl);
if (shouldHashAggrName(mangledName)) {
ret += hashSymbolName(mangledName, aggrdecl);
} else {
ret += mangledName;
}

ret += "6__vtblZ";

return ret;
}

std::string getMangledClassInfoSymbolName(AggregateDeclaration *aggrdecl) {
std::string ret = "_D";

std::string mangledName = mangle(aggrdecl);
if (shouldHashAggrName(mangledName)) {
ret += hashSymbolName(mangledName, aggrdecl);
} else {
ret += mangledName;
}

if (aggrdecl->isInterfaceDeclaration()) {
ret += "11__InterfaceZ";
} else {
ret += "7__ClassZ";
}

return ret;
}
29 changes: 29 additions & 0 deletions gen/mangling.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===-- mangling.h --------------------------------------------------------===//
//
// LDC – the LLVM D compiler
//
// This file is distributed under the BSD-style LDC license. See the LICENSE
// file for details.
//
//===----------------------------------------------------------------------===//
//
// Tries to centralize functionality for mangling of symbols.
//
//===----------------------------------------------------------------------===//

#ifndef LDC_GEN_MANGLING_H
#define LDC_GEN_MANGLING_H

#include <string>
#include "ddmd/globals.h"

class FuncDeclaration;
class AggregateDeclaration;

std::string getMangledName(FuncDeclaration *fdecl, LINK link);

std::string getMangledInitSymbolName(AggregateDeclaration *aggrdecl);
std::string getMangledVTableSymbolName(AggregateDeclaration *aggrdecl);
std::string getMangledClassInfoSymbolName(AggregateDeclaration *aggrdecl);

#endif // LDC_GEN_MANGLING_H
5 changes: 2 additions & 3 deletions ir/iraggr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "gen/irstate.h"
#include "gen/llvmhelpers.h"
#include "gen/logger.h"
#include "gen/mangling.h"
#include "gen/tollvm.h"
#include "ir/iraggr.h"
#include "irdsymbol.h"
Expand All @@ -39,9 +40,7 @@ LLGlobalVariable *IrAggr::getInitSymbol() {
}

// create the initZ symbol
std::string initname("_D");
initname.append(mangle(aggrdecl));
initname.append("6__initZ");
auto initname = getMangledInitSymbolName(aggrdecl);

init =
getOrCreateGlobal(aggrdecl->loc, gIR->module, init_type, true,
Expand Down
18 changes: 5 additions & 13 deletions ir/irclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "gen/runtime.h"
#include "gen/functions.h"
#include "gen/abi.h"
#include "gen/mangling.h"

#include "ir/iraggr.h"
#include "ir/irfunction.h"
Expand All @@ -45,10 +46,8 @@ LLGlobalVariable *IrAggr::getVtblSymbol() {
return vtbl;
}

// create the initZ symbol
std::string initname("_D");
initname.append(mangle(aggrdecl));
initname.append("6__vtblZ");
// create the vtblZ symbol
auto initname = getMangledVTableSymbolName(aggrdecl);

LLType *vtblTy = stripModifiers(type)->ctype->isClass()->getVtbl();

Expand All @@ -66,15 +65,8 @@ LLGlobalVariable *IrAggr::getClassInfoSymbol() {
return classInfo;
}

// create the initZ symbol
std::string initname("_D");
initname.append(mangle(aggrdecl));

if (aggrdecl->isInterfaceDeclaration()) {
initname.append("11__InterfaceZ");
} else {
initname.append("7__ClassZ");
}
// create the ClassZ / InterfaceZ symbol
std::string initname = getMangledClassInfoSymbolName(aggrdecl);

// The type is also ClassInfo for interfaces – the actual TypeInfo for them
// is a TypeInfo_Interface instance that references __ClassZ in its "base"
Expand Down
49 changes: 49 additions & 0 deletions tests/codegen/hashed_mangling.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Test hashing of symbols above hash threshold

// RUN: %ldc -hash-threshold=90 -g -c -output-ll -of=%t90.ll %s && FileCheck %s --check-prefix HASH90 < %t90.ll
// RUN: %ldc -hash-threshold=90 -run %s

// Don't use Phobos functions in this test, because the test hashthreshold is too low for an unhashed libphobos.

module one.two.three;

// HASH90-DAG: define{{.*}} @externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_externCfunctions_are_not_hashed
extern (C) int externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_externCfunctions_are_not_hashed()
{
return 95;
}

auto s(T)(T t)
{
// HASH90-DAG: define{{.*}} @_D3one3two5three8__T1sTiZ1sFNaNbNiNfiZS3one3two5three8__T1sTiZ1sFiZ13__T6ResultTiZ6Result
// HASH90-DAG: define{{.*}} @_D3one3two5three3L1633_699ccf279a146992d539ca3ca16e22e11sZ
// HASH90-DAG: define{{.*}} @_D3one3two5three3L2333_5ee632e10b6f09e8f541a143266bdf226Result3fooZ
struct Result(T)
{
void foo(){}
}
return Result!int();
}

auto klass(T)(T t)
{
class Result(T)
{
// HASH90-DAG: define{{.*}} @_D3one3two5three12__T5klassTiZ5klassFiZ13__T6ResultTiZ6Result3fooMFZv
// HASH90-DAG: define{{.*}} @_D3one3two5three3L3433_46a82aac733d8a4b3588d7fa8937aad66Result3fooZ
void foo(){}
}
return new Result!int();
}

void main()
{
assert(
externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_externCfunctions_are_not_hashed() == 95);

auto x = 1.s.s.s.s;
x.foo;

auto y = 1.klass.klass.klass.klass;
y.foo;
}