-
-
Notifications
You must be signed in to change notification settings - Fork 267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Varargs fix & ABI refactoring #768
Conversation
This code: import core.vararg;
import core.stdc.stdio;
struct Small { long a; }
struct Big { long a, b; }
void testArguments(...)
{
foreach (ti; _arguments)
printf(" %s [size %d]\n", ti.toString().ptr, ti.tsize);
}
void testArgptr(T)(...)
{
static if (__traits(isFloating, T))
auto format = "%s %f\n";
else
auto format = "%s %d\n";
static if (!is(T == struct))
printf(format.ptr, typeid(T).toString().ptr, va_arg!T(_argptr));
else
printf(format.ptr, typeid(T).toString().ptr, va_arg!T(_argptr).a);
}
void main()
{
testArgptr!int(1);
testArgptr!double(2.0);
testArgptr!float(3.0f);
immutable(Small) small = { 4L };
testArgptr!Small(small);
Big big = { 5L, 6L };
testArgptr!Big(big);
testArguments(1, 2.0, 3.0f, small, big);
} on Win64 yields:
Produced .ll: ; Function Attrs: uwtable
define i32 @_Dmain({ i64, { i64, i8* }* } %unnamed) #0 {
%small = alloca %hello.Small, align 8
%.structliteral = alloca %hello.Small
%big = alloca %hello.Big, align 8
%.structliteral1 = alloca %hello.Big
%copy_for_callee = alloca %hello.Big, align 16
%copy_for_callee2 = alloca %hello.Big, align 16
%1 = load { i64, %object.TypeInfo** }* @._arguments.array
call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello18__T10testArgptrTiZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %1, i32 1)
%2 = load { i64, %object.TypeInfo** }* @._arguments.array5
call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello18__T10testArgptrTdZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %2, double 2.000000e+00)
%3 = load { i64, %object.TypeInfo** }* @._arguments.array7
call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello18__T10testArgptrTfZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %3, float 3.000000e+00)
%4 = getelementptr %hello.Small* %.structliteral, i32 0, i32 0
store i64 4, i64* %4
%5 = load i64* %4
%6 = bitcast %hello.Small* %small to i8*
%7 = bitcast %hello.Small* %.structliteral to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 8, i32 1, i1 false)
%8 = load %hello.Small* %small
%9 = load { i64, %object.TypeInfo** }* @._arguments.array9
%10 = bitcast %hello.Small* %small to i64*
%11 = load i64* %10
call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello30__T10testArgptrTS5hello5SmallZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %9, i64 %11)
%12 = getelementptr %hello.Big* %.structliteral1, i32 0, i32 0
store i64 5, i64* %12
%13 = load i64* %12
%14 = getelementptr %hello.Big* %.structliteral1, i32 0, i32 1
store i64 6, i64* %14
%15 = load i64* %14
%16 = bitcast %hello.Big* %big to i8*
%17 = bitcast %hello.Big* %.structliteral1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %16, i8* %17, i64 16, i32 1, i1 false)
%18 = load %hello.Big* %big
%19 = load { i64, %object.TypeInfo** }* @._arguments.array11
%20 = load %hello.Big* %big
store %hello.Big %20, %hello.Big* %copy_for_callee
call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello28__T10testArgptrTS5hello3BigZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %19, %hello.Big* noalias nocapture %copy_for_callee)
%21 = load { i64, %object.TypeInfo** }* @._arguments.array13
%22 = bitcast %hello.Small* %small to i64*
%23 = load i64* %22
%24 = load %hello.Big* %big
store %hello.Big %24, %hello.Big* %copy_for_callee2
call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello13testArgumentsFYv({ i64, %object.TypeInfo** } %21, i32 1, double 2.000000e+00, float 3.000000e+00, i64 %23, %hello.Big* noalias nocapture %copy_for_callee2)
ret i32 0
}
...
; Function Attrs: uwtable
define weak_odr void @_D5hello30__T10testArgptrTS5hello5SmallZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %._arguments, ...) #0 {
%_argptr_mem = alloca i8*, align 8
%_arguments_mem = alloca { i64, %object.TypeInfo** }
%format = alloca { i64, i8* }, align 8
%.int_to_composite = alloca %hello.Small, align 8
%1 = alloca %hello.Small
%2 = bitcast i8** %_argptr_mem to i8*
call void @llvm.va_start(i8* %2)
store { i64, %object.TypeInfo** } %._arguments, { i64, %object.TypeInfo** }* %_arguments_mem
store { i64, i8* } { i64 6, i8* getelementptr inbounds ([7 x i8]* @.str17, i32 0, i32 0) }, { i64, i8* }* %format
%3 = load { i64, i8* }* %format
%4 = getelementptr { i64, i8* }* %format, i32 0, i32 1
%.ptr = load i8** %4
%5 = getelementptr %object.TypeInfo* bitcast (%"typeid(Small)"* @_D22TypeInfo_S5hello5Small6__initZ to %object.TypeInfo*), i32 0, i32 0
%6 = load %object.TypeInfo.__vtbl** %5
%"typeid(Small).toString@vtbl" = getelementptr %object.TypeInfo.__vtbl* %6, i32 0, i32 1
%"typeid(Small).toString" = load { i64, i8* } (%object.TypeInfo*)** %"typeid(Small).toString@vtbl", align 8
%tmp = call { i64, i8* } %"typeid(Small).toString"(%object.TypeInfo* bitcast (%"typeid(Small)"* @_D22TypeInfo_S5hello5Small6__initZ to %object.TypeInfo*))
%.ptr1 = extractvalue { i64, i8* } %tmp, 1
%tmp2 = call i64 @_D4core4stdc6stdarg25__T6va_argTS5hello5SmallZ6va_argFNaNbNiKPaZS5hello5Small(i8** %_argptr_mem)
%7 = bitcast %hello.Small* %.int_to_composite to i64*
store i64 %tmp2, i64* %7
%8 = load %hello.Small* %.int_to_composite
store %hello.Small %8, %hello.Small* %1
%9 = getelementptr %hello.Small* %1, i32 0, i32 0
%10 = load i64* %9
%tmp3 = call i32 (i8*, ...)* @printf(i8* %.ptr, i8* %.ptr1, i64 %10)
ret void
}
; Function Attrs: uwtable
define weak_odr void @_D5hello28__T10testArgptrTS5hello3BigZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %._arguments, ...) #0 {
%_argptr_mem = alloca i8*, align 8
%_arguments_mem = alloca { i64, %object.TypeInfo** }
%format = alloca { i64, i8* }, align 8
%.rettmp = alloca %hello.Big, align 8
%1 = bitcast i8** %_argptr_mem to i8*
call void @llvm.va_start(i8* %1)
store { i64, %object.TypeInfo** } %._arguments, { i64, %object.TypeInfo** }* %_arguments_mem
store { i64, i8* } { i64 6, i8* getelementptr inbounds ([7 x i8]* @.str18, i32 0, i32 0) }, { i64, i8* }* %format
%2 = load { i64, i8* }* %format
%3 = getelementptr { i64, i8* }* %format, i32 0, i32 1
%.ptr = load i8** %3
%4 = getelementptr %object.TypeInfo* bitcast (%"typeid(Big)"* @_D20TypeInfo_S5hello3Big6__initZ to %object.TypeInfo*), i32 0, i32 0
%5 = load %object.TypeInfo.__vtbl** %4
%"typeid(Big).toString@vtbl" = getelementptr %object.TypeInfo.__vtbl* %5, i32 0, i32 1
%"typeid(Big).toString" = load { i64, i8* } (%object.TypeInfo*)** %"typeid(Big).toString@vtbl", align 8
%tmp = call { i64, i8* } %"typeid(Big).toString"(%object.TypeInfo* bitcast (%"typeid(Big)"* @_D20TypeInfo_S5hello3Big6__initZ to %object.TypeInfo*))
%.ptr1 = extractvalue { i64, i8* } %tmp, 1
call void @_D4core4stdc6stdarg23__T6va_argTS5hello3BigZ6va_argFNaNbNiKPaZS5hello3Big(%hello.Big* noalias sret %.rettmp, i8** %_argptr_mem)
%6 = getelementptr %hello.Big* %.rettmp, i32 0, i32 0
%7 = load i64* %6
%tmp2 = call i32 (i8*, ...)* @printf(i8* %.ptr, i8* %.ptr1, i64 %7)
ret void
} |
isPassedWithByvalSemantics needs to be adjusted as well I think. |
Yes you are right, reals are incorrectly passed byval and not as doubles! Returning a real strangely enough (correctly) returns a double. |
Also I was in the process of checking struct return. We need to make sure the sret pointer is returned in RAX. |
That is handled by LLVM: Big bla(double x)
{
Big r = { 666, 667 };
return r;
} => _D5hello3blaFdZS5hello3Big:
...
movq %rcx, %rax
movsd %xmm1, 16(%rsp)
movq $666, (%rsp)
movq $667, 8(%rsp)
movups (%rsp), %xmm1
movups %xmm1, (%rcx)
addq $24, %rsp
retq The pointer is passed in RCX and immediately moved to RAX; afterwards, RAX is never touched. |
Ok good. |
fe5e263
to
510a3ad
Compare
Yeah well maybe |
Btw, we should probably call LLVM's va_end() intrinsic for _argptr before returning. I don't know where to put the call though... |
Linux x86 even passes the test suite, that's quite unexpected. ;) |
Our struct passing code doesn't consider PODness either. |
Yes, PODness is probably ignored on Win64 and aggregates such as delegates and dynamic arrays might be passed incorrectly as well. We should probably use a I've added an experimental generic patch for druntime in ldc-developers/druntime#14 (System V still unsupported though). /edit: PODness doesn't seem to be relevant here as the types have already been lowered to C structs. When passing non-trivial types by value, the caller creates a hidden copy, which is initialized with postblit constructor etc. Then that copy is bit-copied to byval_rewrite's |
The latest commit is a bigger one, I'll add some line comments when I find the time. Please help test! |
Very nice but it'd have been better to split this up into individual commits. Those line comments do help but will get lost eventually. |
Yeah I know, I've split it up now. |
@redstar and @klickverbot: did you take a look at this? |
|
||
// EXPLICIT PARAMETERS | ||
|
||
// reverse parameter order |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd want to be sure that DMD is on board with this change before actually going through with it, since it breaks (naked) inline asm. What's the latest status regarding this on the DMD front? Also, I'd be mildly surprised if this actually passes all the Phobos tests without any further changes (e.g. BigInt, …).
Thanks for the comments, David. I completely forgot about inline assembly when it comes to reversing parameter order and returning complex numbers for extern(D). This is of course a major problem, so I'll undo these changes. As to System V varargs ABI: http://llvm.org/docs/LangRef.html#variable-argument-handling-intrinsics seems to suggest the LLVM intrinsics (va_start/va_end/va_copy/va_arg) are able to handle all of that. Are you telling me we still have to compute that va_list struct manually (requiring perfect knowledge of the tedious register assignment)? I can't believe LLVM would have better support for Win64 than *nix x86-64 in this regard. :) And as to System V ABI register assignment: there's an I hope to get LDC to compile in a Linux VM to experiment. |
This block is pretty hacky.
For structs > 8 bytes passed in registers.
Ignore pointer type mismatches as long as both types are pointers; the element type has no effect on the memory layout. The RegCount constructor had to be improved to handle more complex types correctly (e.g., nested structs such as { { i64, i32* } }) as these are not rewritten anymore.
Don't attempt to store past the end of alloca()ted memory. This is important for structs passed in registers whose size is not a power of 2. Fix: alloca()te another sufficiently sized chunk, store into it and then memcpy() the relevant bytes to the target chunk.
Use a map instead of a vector as the attribute index may get as high as 0xFFFFFFFF (LLVM 3.1), apparently for function attributes.
Thanks Kai. I can reproduce the runnable/testabi.d dmd-testsuite issue (see #768 (comment)) with LLVM 3.2 when using -O2 and -O3. A LLVM 3.2 bug isn't likely, is it? ;) Great, now the LLVM 3.4 and 3.5 debug jobs got ldc2 killed while compiling std.algorithm-unittest-debug. I guess that's due to some sort of Travis CI watchdog. The new druntime mini-change hardly affects performance; the move to a std::map in AttrSet in the last commit may have been costly when inserting attributes with decreasing index (when reversing the params for extern(D)), but I haven't checked whether that causes any significant slow-down. |
std.algorithm fails intermittently regardless of your change (OOM, mostly). Don't pay too much attention to it, it's something we need to fix at some point anyway. |
Apparently required for LLVM 3.2.
Alright, the LLVM 3.2 issue is fixed too now. LLVM 3.1-3.5 all green except for std.algorithm kills. |
I triggered Travis-CI to give you the joy of an all-green build. :-) |
@kinke Could you please contact me by mail (kai@redstar.de)? Thanks!!!! |
This fixes the varargs horror (see #702, #172, #73 and ldc-developers/druntime#14) which proved to be an important obstacle for Win64.
In the course of this PR, things got slightly out of hand ;) and I ended up reworking big parts of LDC's ABI system, mainly the System V AMD64 ABI used on all x86_64 platforms except for Windows.