diff --git a/ddmd/globals.d b/ddmd/globals.d index a71864111a9..780a72f275c 100644 --- a/ddmd/globals.d +++ b/ddmd/globals.d @@ -211,6 +211,8 @@ struct Param // Codegen cl options bool singleObj; bool disableRedZone; + + uint hashThreshold; // MD5 hash symbols larger than this threshold (0 = no hashing) } } diff --git a/ddmd/globals.h b/ddmd/globals.h index c71b828ddfc..9dbbcd3313c 100644 --- a/ddmd/globals.h +++ b/ddmd/globals.h @@ -209,6 +209,8 @@ struct Param // Codegen cl options bool singleObj; bool disableRedZone; + + uint32_t hashThreshold; // MD5 hash symbols larger than this threshold (0 = no hashing) #endif }; diff --git a/ddmd/mtype.d b/ddmd/mtype.d index 19fc5ff1a16..45ef066dee5 100644 --- a/ddmd/mtype.d +++ b/ddmd/mtype.d @@ -2665,12 +2665,31 @@ public: buf.writeByte(0); // Allocate buffer on stack, fail over to using malloc() char[128] namebuf; + + // Hash long symbol names + char* name; + if (IN_LLVM && global.params.hashThreshold && (len > global.params.hashThreshold)) + { + import std.digest.md; + auto md5hash = md5Of(buf.peekString()[0..len]); + auto hashedname = toHexString(md5hash); + static assert(hashedname.length < namebuf.length-30); + name = namebuf.ptr; + sprintf(name, "_D%lluTypeInfo_%.*s6__initZ", cast(ulong)9 + hashedname.length, hashedname.length, hashedname.ptr); + } + else + { + // else path is DDMD original: + size_t namelen = 19 + len.sizeof * 3 + len + 1; - char* name = namelen <= namebuf.length ? namebuf.ptr : cast(char*)malloc(namelen); + name = namelen <= namebuf.sizeof ? namebuf.ptr : cast(char*)malloc(namelen); assert(name); sprintf(name, "_D%lluTypeInfo_%s6__initZ", cast(ulong)9 + len, buf.data); //printf("%p, deco = %s, name = %s\n", this, deco, name); assert(strlen(name) < namelen); // don't overflow the buffer + + } + size_t off = 0; static if (!IN_GCC && !IN_LLVM) { diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp index 947c2cad7c1..b2a3c57104e 100644 --- a/driver/cl_options.cpp +++ b/driver/cl_options.cpp @@ -375,6 +375,11 @@ cl::opt singleObj("singleobj", cl::desc("Create only a single output object file"), cl::location(global.params.singleObj)); +cl::opt hashThreshold( + "hash-threshold", + cl::desc("hash symbol names longer than this threshold (experimental)"), + cl::location(global.params.hashThreshold), cl::init(0)); + cl::opt linkonceTemplates( "linkonce-templates", cl::desc( diff --git a/gen/functions.cpp b/gen/functions.cpp index 618dbb75d6c..3c643de704d 100644 --- a/gen/functions.cpp +++ b/gen/functions.cpp @@ -28,6 +28,7 @@ #include "gen/llvm.h" #include "gen/llvmhelpers.h" #include "gen/logger.h" +#include "gen/mangling.h" #include "gen/nested.h" #include "gen/optimizer.h" #include "gen/pragma.h" @@ -478,8 +479,7 @@ void DtoDeclareFunction(FuncDeclaration *fdecl) { } // mangled name - std::string mangledName(mangleExact(fdecl)); - mangledName = gABI->mangleForLLVM(mangledName, link); + std::string mangledName = getMangledName(fdecl, link); // construct function LLFunctionType *functype = DtoFunctionType(fdecl); diff --git a/gen/mangling.cpp b/gen/mangling.cpp new file mode 100644 index 00000000000..c899aee60ba --- /dev/null +++ b/gen/mangling.cpp @@ -0,0 +1,161 @@ +//===-- mangling.cpp ------------------------------------------------------===// +// +// LDC – the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// +// +// Tries to centralize functionality for mangling of symbols. +// +//===----------------------------------------------------------------------===// + +#include "gen/mangling.h" + +#include "ddmd/declaration.h" +#include "ddmd/dsymbol.h" +#include "ddmd/identifier.h" +#include "ddmd/module.h" +#include "gen/abi.h" +#include "gen/irstate.h" +#include "llvm/Support/MD5.h" + +namespace { + +// TODO: Disable hashing of symbols that are defined in libdruntime and +// libphobos. This would enable hashing thresholds below the largest symbol in +// libdruntime/phobos. + +bool shouldHashAggrName(llvm::StringRef name) { + /// Add extra chars to the length of aggregate names to account for + /// the additional D mangling suffix and prefix + return (global.params.hashThreshold != 0) && + ((name.size() + 11) > global.params.hashThreshold); +} + +llvm::SmallString<32> hashName(llvm::StringRef name) { + llvm::MD5 hasher; + hasher.update(name); + llvm::MD5::MD5Result result; + hasher.final(result); + llvm::SmallString<32> hashStr; + llvm::MD5::stringifyResult(result, hashStr); + + return hashStr; +} + +/// Hashes the symbol name and prefixes the hash with some recognizable parts of +/// the full symbol name. The prefixing means that the hashed name may be larger +/// than the input when identifiers are very long and the hash threshold is low. +/// Demangled hashed name is: +/// module.L... +std::string hashSymbolName(llvm::StringRef name, Dsymbol *symb) { + std::string ret; + + // module + { + auto moddecl = symb->getModule()->md; + assert(moddecl); + if (auto packages = moddecl->packages) { + for (size_t i = 0; i < packages->dim; ++i) { + llvm::StringRef str = (*packages)[i]->toChars(); + ret += std::to_string(str.size()); + ret += str; + } + } + llvm::StringRef str = moddecl->id->toChars(); + ret += std::to_string(str.size()); + ret += str; + } + + // source line number + auto lineNo = std::to_string(symb->loc.linnum); + ret += std::to_string(lineNo.size()+1); + ret += 'L'; + ret += lineNo; + + // MD5 hash + auto hashedName = hashName(name); + ret += "33_"; // add underscore to delimit the 33 character count + ret += hashedName; + + // top aggregate + if (auto agg = symb->isAggregateMember()) { + llvm::StringRef topaggr = agg->ident->toChars(); + ret += std::to_string(topaggr.size()); + ret += topaggr; + } + + // identifier + llvm::StringRef identifier = symb->toChars(); + ret += std::to_string(identifier.size()); + ret += identifier; + + return ret; +} +} + +std::string getMangledName(FuncDeclaration *fdecl, LINK link) { + std::string mangledName(mangleExact(fdecl)); + + // Hash the name if necessary + if (((link == LINKd) || (link == LINKdefault)) && + (global.params.hashThreshold != 0) && + (mangledName.length() > global.params.hashThreshold)) { + + auto hashedName = hashSymbolName(mangledName, fdecl); + mangledName = "_D" + hashedName + "Z"; + } + + return gABI->mangleForLLVM(mangledName, link); +} + +std::string getMangledInitSymbolName(AggregateDeclaration *aggrdecl) { + std::string ret = "_D"; + + std::string mangledName = mangle(aggrdecl); + if (shouldHashAggrName(mangledName)) { + ret += hashSymbolName(mangledName, aggrdecl); + } else { + ret += mangledName; + } + + ret += "6__initZ"; + + return ret; +} + +std::string getMangledVTableSymbolName(AggregateDeclaration *aggrdecl) { + std::string ret = "_D"; + + std::string mangledName = mangle(aggrdecl); + if (shouldHashAggrName(mangledName)) { + ret += hashSymbolName(mangledName, aggrdecl); + } else { + ret += mangledName; + } + + ret += "6__vtblZ"; + + return ret; +} + +std::string getMangledClassInfoSymbolName(AggregateDeclaration *aggrdecl) { + std::string ret = "_D"; + + std::string mangledName = mangle(aggrdecl); + if (shouldHashAggrName(mangledName)) { + ret += hashSymbolName(mangledName, aggrdecl); + } else { + ret += mangledName; + } + + if (aggrdecl->isInterfaceDeclaration()) { + ret += "11__InterfaceZ"; + } else { + ret += "7__ClassZ"; + } + + return ret; +} diff --git a/gen/mangling.h b/gen/mangling.h new file mode 100644 index 00000000000..bbd8631a537 --- /dev/null +++ b/gen/mangling.h @@ -0,0 +1,29 @@ +//===-- mangling.h --------------------------------------------------------===// +// +// LDC – the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// +// +// Tries to centralize functionality for mangling of symbols. +// +//===----------------------------------------------------------------------===// + +#ifndef LDC_GEN_MANGLING_H +#define LDC_GEN_MANGLING_H + +#include +#include "ddmd/globals.h" + +class FuncDeclaration; +class AggregateDeclaration; + +std::string getMangledName(FuncDeclaration *fdecl, LINK link); + +std::string getMangledInitSymbolName(AggregateDeclaration *aggrdecl); +std::string getMangledVTableSymbolName(AggregateDeclaration *aggrdecl); +std::string getMangledClassInfoSymbolName(AggregateDeclaration *aggrdecl); + +#endif // LDC_GEN_MANGLING_H diff --git a/ir/iraggr.cpp b/ir/iraggr.cpp index 201f12bdab3..72de3d7aa8e 100644 --- a/ir/iraggr.cpp +++ b/ir/iraggr.cpp @@ -16,6 +16,7 @@ #include "gen/irstate.h" #include "gen/llvmhelpers.h" #include "gen/logger.h" +#include "gen/mangling.h" #include "gen/tollvm.h" #include "ir/iraggr.h" #include "irdsymbol.h" @@ -39,9 +40,7 @@ LLGlobalVariable *IrAggr::getInitSymbol() { } // create the initZ symbol - std::string initname("_D"); - initname.append(mangle(aggrdecl)); - initname.append("6__initZ"); + auto initname = getMangledInitSymbolName(aggrdecl); init = getOrCreateGlobal(aggrdecl->loc, gIR->module, init_type, true, diff --git a/ir/irclass.cpp b/ir/irclass.cpp index 4b9ebfc6b65..b0db17af4bc 100644 --- a/ir/irclass.cpp +++ b/ir/irclass.cpp @@ -29,6 +29,7 @@ #include "gen/runtime.h" #include "gen/functions.h" #include "gen/abi.h" +#include "gen/mangling.h" #include "ir/iraggr.h" #include "ir/irfunction.h" @@ -45,10 +46,8 @@ LLGlobalVariable *IrAggr::getVtblSymbol() { return vtbl; } - // create the initZ symbol - std::string initname("_D"); - initname.append(mangle(aggrdecl)); - initname.append("6__vtblZ"); + // create the vtblZ symbol + auto initname = getMangledVTableSymbolName(aggrdecl); LLType *vtblTy = stripModifiers(type)->ctype->isClass()->getVtbl(); @@ -66,15 +65,8 @@ LLGlobalVariable *IrAggr::getClassInfoSymbol() { return classInfo; } - // create the initZ symbol - std::string initname("_D"); - initname.append(mangle(aggrdecl)); - - if (aggrdecl->isInterfaceDeclaration()) { - initname.append("11__InterfaceZ"); - } else { - initname.append("7__ClassZ"); - } + // create the ClassZ / InterfaceZ symbol + std::string initname = getMangledClassInfoSymbolName(aggrdecl); // The type is also ClassInfo for interfaces – the actual TypeInfo for them // is a TypeInfo_Interface instance that references __ClassZ in its "base" diff --git a/tests/codegen/hashed_mangling.d b/tests/codegen/hashed_mangling.d new file mode 100644 index 00000000000..8d83acc3953 --- /dev/null +++ b/tests/codegen/hashed_mangling.d @@ -0,0 +1,49 @@ +// Test hashing of symbols above hash threshold + +// RUN: %ldc -hash-threshold=90 -g -c -output-ll -of=%t90.ll %s && FileCheck %s --check-prefix HASH90 < %t90.ll +// RUN: %ldc -hash-threshold=90 -run %s + +// Don't use Phobos functions in this test, because the test hashthreshold is too low for an unhashed libphobos. + +module one.two.three; + +// HASH90-DAG: define{{.*}} @externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_externCfunctions_are_not_hashed +extern (C) int externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_externCfunctions_are_not_hashed() +{ + return 95; +} + +auto s(T)(T t) +{ + // HASH90-DAG: define{{.*}} @_D3one3two5three8__T1sTiZ1sFNaNbNiNfiZS3one3two5three8__T1sTiZ1sFiZ13__T6ResultTiZ6Result + // HASH90-DAG: define{{.*}} @_D3one3two5three3L1633_699ccf279a146992d539ca3ca16e22e11sZ + // HASH90-DAG: define{{.*}} @_D3one3two5three3L2333_5ee632e10b6f09e8f541a143266bdf226Result3fooZ + struct Result(T) + { + void foo(){} + } + return Result!int(); +} + +auto klass(T)(T t) +{ + class Result(T) + { + // HASH90-DAG: define{{.*}} @_D3one3two5three12__T5klassTiZ5klassFiZ13__T6ResultTiZ6Result3fooMFZv + // HASH90-DAG: define{{.*}} @_D3one3two5three3L3433_46a82aac733d8a4b3588d7fa8937aad66Result3fooZ + void foo(){} + } + return new Result!int(); +} + +void main() +{ + assert( + externCfunctions_are_not_hashed_externCfunctions_are_not_hashed_externCfunctions_are_not_hashed() == 95); + + auto x = 1.s.s.s.s; + x.foo; + + auto y = 1.klass.klass.klass.klass; + y.foo; +}