Varargs fix & ABI refactoring #768

kinke · 2014-10-26T15:40:15Z

This fixes the varargs horror (see #702, #172, #73 and ldc-developers/druntime#14) which proved to be an important obstacle for Win64.
In the course of this PR, things got slightly out of hand ;) and I ended up reworking big parts of LDC's ABI system, mainly the System V AMD64 ABI used on all x86_64 platforms except for Windows.

kinke · 2014-10-26T16:38:11Z

This code:

import core.vararg;
import core.stdc.stdio;

struct Small { long a; }
struct Big { long a, b; }

void testArguments(...)
{
    foreach (ti; _arguments)
        printf("  %s [size %d]\n", ti.toString().ptr, ti.tsize);
}

void testArgptr(T)(...)
{
    static if (__traits(isFloating, T))
        auto format = "%s %f\n";
    else
        auto format = "%s %d\n";

    static if (!is(T == struct))
        printf(format.ptr, typeid(T).toString().ptr, va_arg!T(_argptr));
    else
        printf(format.ptr, typeid(T).toString().ptr, va_arg!T(_argptr).a);
}

void main()
{
    testArgptr!int(1);
    testArgptr!double(2.0);
    testArgptr!float(3.0f);

    immutable(Small) small = { 4L };
    testArgptr!Small(small);

    Big big = { 5L, 6L };
    testArgptr!Big(big);

    testArguments(1, 2.0, 3.0f, small, big);
}

on Win64 yields:

int 1
double 2.000000
float 3.000000
hello.Small 4
hello.Big 5
  int [size 4]
  double [size 8]
  float [size 4]
  immutable(hello.Small) [size 8]
  hello.Big [size 16]

Produced .ll:

; Function Attrs: uwtable
define i32 @_Dmain({ i64, { i64, i8* }* } %unnamed) #0 {
  %small = alloca %hello.Small, align 8
  %.structliteral = alloca %hello.Small
  %big = alloca %hello.Big, align 8
  %.structliteral1 = alloca %hello.Big
  %copy_for_callee = alloca %hello.Big, align 16
  %copy_for_callee2 = alloca %hello.Big, align 16
  %1 = load { i64, %object.TypeInfo** }* @._arguments.array
  call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello18__T10testArgptrTiZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %1, i32 1)
  %2 = load { i64, %object.TypeInfo** }* @._arguments.array5
  call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello18__T10testArgptrTdZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %2, double 2.000000e+00)
  %3 = load { i64, %object.TypeInfo** }* @._arguments.array7
  call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello18__T10testArgptrTfZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %3, float 3.000000e+00)
  %4 = getelementptr %hello.Small* %.structliteral, i32 0, i32 0
  store i64 4, i64* %4
  %5 = load i64* %4
  %6 = bitcast %hello.Small* %small to i8*
  %7 = bitcast %hello.Small* %.structliteral to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 8, i32 1, i1 false)
  %8 = load %hello.Small* %small
  %9 = load { i64, %object.TypeInfo** }* @._arguments.array9
  %10 = bitcast %hello.Small* %small to i64*
  %11 = load i64* %10
  call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello30__T10testArgptrTS5hello5SmallZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %9, i64 %11)
  %12 = getelementptr %hello.Big* %.structliteral1, i32 0, i32 0
  store i64 5, i64* %12
  %13 = load i64* %12
  %14 = getelementptr %hello.Big* %.structliteral1, i32 0, i32 1
  store i64 6, i64* %14
  %15 = load i64* %14
  %16 = bitcast %hello.Big* %big to i8*
  %17 = bitcast %hello.Big* %.structliteral1 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %16, i8* %17, i64 16, i32 1, i1 false)
  %18 = load %hello.Big* %big
  %19 = load { i64, %object.TypeInfo** }* @._arguments.array11
  %20 = load %hello.Big* %big
  store %hello.Big %20, %hello.Big* %copy_for_callee
  call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello28__T10testArgptrTS5hello3BigZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %19, %hello.Big* noalias nocapture %copy_for_callee)
  %21 = load { i64, %object.TypeInfo** }* @._arguments.array13
  %22 = bitcast %hello.Small* %small to i64*
  %23 = load i64* %22
  %24 = load %hello.Big* %big
  store %hello.Big %24, %hello.Big* %copy_for_callee2
  call void ({ i64, %object.TypeInfo** }, ...)* @_D5hello13testArgumentsFYv({ i64, %object.TypeInfo** } %21, i32 1, double 2.000000e+00, float 3.000000e+00, i64 %23, %hello.Big* noalias nocapture %copy_for_callee2)
  ret i32 0
}

...

; Function Attrs: uwtable
define weak_odr void @_D5hello30__T10testArgptrTS5hello5SmallZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %._arguments, ...) #0 {
  %_argptr_mem = alloca i8*, align 8
  %_arguments_mem = alloca { i64, %object.TypeInfo** }
  %format = alloca { i64, i8* }, align 8
  %.int_to_composite = alloca %hello.Small, align 8
  %1 = alloca %hello.Small
  %2 = bitcast i8** %_argptr_mem to i8*
  call void @llvm.va_start(i8* %2)
  store { i64, %object.TypeInfo** } %._arguments, { i64, %object.TypeInfo** }* %_arguments_mem
  store { i64, i8* } { i64 6, i8* getelementptr inbounds ([7 x i8]* @.str17, i32 0, i32 0) }, { i64, i8* }* %format
  %3 = load { i64, i8* }* %format
  %4 = getelementptr { i64, i8* }* %format, i32 0, i32 1
  %.ptr = load i8** %4
  %5 = getelementptr %object.TypeInfo* bitcast (%"typeid(Small)"* @_D22TypeInfo_S5hello5Small6__initZ to %object.TypeInfo*), i32 0, i32 0
  %6 = load %object.TypeInfo.__vtbl** %5
  %"typeid(Small).toString@vtbl" = getelementptr %object.TypeInfo.__vtbl* %6, i32 0, i32 1
  %"typeid(Small).toString" = load { i64, i8* } (%object.TypeInfo*)** %"typeid(Small).toString@vtbl", align 8
  %tmp = call { i64, i8* } %"typeid(Small).toString"(%object.TypeInfo* bitcast (%"typeid(Small)"* @_D22TypeInfo_S5hello5Small6__initZ to %object.TypeInfo*))
  %.ptr1 = extractvalue { i64, i8* } %tmp, 1
  %tmp2 = call i64 @_D4core4stdc6stdarg25__T6va_argTS5hello5SmallZ6va_argFNaNbNiKPaZS5hello5Small(i8** %_argptr_mem)
  %7 = bitcast %hello.Small* %.int_to_composite to i64*
  store i64 %tmp2, i64* %7
  %8 = load %hello.Small* %.int_to_composite
  store %hello.Small %8, %hello.Small* %1
  %9 = getelementptr %hello.Small* %1, i32 0, i32 0
  %10 = load i64* %9
  %tmp3 = call i32 (i8*, ...)* @printf(i8* %.ptr, i8* %.ptr1, i64 %10)
  ret void
}

; Function Attrs: uwtable
define weak_odr void @_D5hello28__T10testArgptrTS5hello3BigZ10testArgptrFNbYv({ i64, %object.TypeInfo** } %._arguments, ...) #0 {
  %_argptr_mem = alloca i8*, align 8
  %_arguments_mem = alloca { i64, %object.TypeInfo** }
  %format = alloca { i64, i8* }, align 8
  %.rettmp = alloca %hello.Big, align 8
  %1 = bitcast i8** %_argptr_mem to i8*
  call void @llvm.va_start(i8* %1)
  store { i64, %object.TypeInfo** } %._arguments, { i64, %object.TypeInfo** }* %_arguments_mem
  store { i64, i8* } { i64 6, i8* getelementptr inbounds ([7 x i8]* @.str18, i32 0, i32 0) }, { i64, i8* }* %format
  %2 = load { i64, i8* }* %format
  %3 = getelementptr { i64, i8* }* %format, i32 0, i32 1
  %.ptr = load i8** %3
  %4 = getelementptr %object.TypeInfo* bitcast (%"typeid(Big)"* @_D20TypeInfo_S5hello3Big6__initZ to %object.TypeInfo*), i32 0, i32 0
  %5 = load %object.TypeInfo.__vtbl** %4
  %"typeid(Big).toString@vtbl" = getelementptr %object.TypeInfo.__vtbl* %5, i32 0, i32 1
  %"typeid(Big).toString" = load { i64, i8* } (%object.TypeInfo*)** %"typeid(Big).toString@vtbl", align 8
  %tmp = call { i64, i8* } %"typeid(Big).toString"(%object.TypeInfo* bitcast (%"typeid(Big)"* @_D20TypeInfo_S5hello3Big6__initZ to %object.TypeInfo*))
  %.ptr1 = extractvalue { i64, i8* } %tmp, 1
  call void @_D4core4stdc6stdarg23__T6va_argTS5hello3BigZ6va_argFNaNbNiKPaZS5hello3Big(%hello.Big* noalias sret %.rettmp, i8** %_argptr_mem)
  %6 = getelementptr %hello.Big* %.rettmp, i32 0, i32 0
  %7 = load i64* %6
  %tmp2 = call i32 (i8*, ...)* @printf(i8* %.ptr, i8* %.ptr1, i64 %7)
  ret void
}

Trass3r · 2014-10-26T16:44:07Z

isPassedWithByvalSemantics needs to be adjusted as well I think.
real is now 64bit and should be passed in a register according to MS docs. Though I haven't checked what msvc actually does for long double.

kinke · 2014-10-26T16:56:44Z

Yes you are right, reals are incorrectly passed byval and not as doubles! Returning a real strangely enough (correctly) returns a double.
I don't think isPassedWithByvalSemantics() is the right place though - the incoming type shouldn't be Tfloat80 in the first place.

Trass3r · 2014-10-26T17:57:47Z

Also I was in the process of checking struct return. We need to make sure the sret pointer is returned in RAX.

kinke · 2014-10-26T18:13:43Z

That is handled by LLVM:

Big bla(double x)
{
    Big r = { 666, 667 };
    return r;
}

=>

_D5hello3blaFdZS5hello3Big:
...
    movq    %rcx, %rax
    movsd   %xmm1, 16(%rsp)
    movq    $666, (%rsp)
    movq    $667, 8(%rsp)
    movups  (%rsp), %xmm1
    movups  %xmm1, (%rcx)
    addq    $24, %rsp
    retq

The pointer is passed in RCX and immediately moved to RAX; afterwards, RAX is never touched.

Trass3r · 2014-10-26T18:28:02Z

Ok good.
I really wish one could configure ldc to use Intel syntax by default..

kinke · 2014-10-26T19:54:22Z

Yeah well maybe isPassedWithByvalSemantics() is the right place for it - the last commit fixes the real issue.

kinke · 2014-10-26T20:34:23Z

Btw, we should probably call LLVM's va_end() intrinsic for _argptr before returning. I don't know where to put the call though...

kinke · 2014-10-26T21:14:30Z

Linux x86 even passes the test suite, that's quite unexpected. ;)

Trass3r · 2014-10-26T22:36:56Z

Our struct passing code doesn't consider PODness either.
Also see https://issues.dlang.org/show_bug.cgi?id=11740

kinke · 2014-10-27T00:12:42Z

Yes, PODness is probably ignored on Win64 and aggregates such as delegates and dynamic arrays might be passed incorrectly as well. We should probably use a toArgTypes() based solution for each target ABI.

I've added an experimental generic patch for druntime in ldc-developers/druntime#14 (System V still unsupported though).

/edit: PODness doesn't seem to be relevant here as the types have already been lowered to C structs. When passing non-trivial types by value, the caller creates a hidden copy, which is initialized with postblit constructor etc. Then that copy is bit-copied to byval_rewrite's copy_for_callee. The callee gets the pointer to copy_for_callee. The destructor is invoked on copy_for_callee by the callee; the first copy isn't destructed.

kinke · 2014-11-09T05:21:17Z

The latest commit is a bigger one, I'll add some line comments when I find the time. Please help test!

Trass3r · 2014-11-09T10:19:41Z

Very nice but it'd have been better to split this up into individual commits. Those line comments do help but will get lost eventually.

kinke · 2014-11-09T17:04:36Z

Yeah I know, I've split it up now.

kinke · 2014-11-13T22:35:39Z

@redstar and @klickverbot: did you take a look at this?

dnadlinger · 2014-11-13T22:41:26Z

@kinke: My comment somehow seems to have disappeared (probably I never pressed submit), give me a couple of minutes.
@Trass3r: There is -x86-asm-syntax=intel. You could always put that into your ldc2.conf.

dnadlinger · 2014-11-13T22:50:12Z

gen/abi-x86-64.cpp

-
-        // EXPLICIT PARAMETERS
-
-        // reverse parameter order


I'd want to be sure that DMD is on board with this change before actually going through with it, since it breaks (naked) inline asm. What's the latest status regarding this on the DMD front? Also, I'd be mildly surprised if this actually passes all the Phobos tests without any further changes (e.g. BigInt, …).

kinke · 2014-11-14T17:52:12Z

Thanks for the comments, David. I completely forgot about inline assembly when it comes to reversing parameter order and returning complex numbers for extern(D). This is of course a major problem, so I'll undo these changes.

As to System V varargs ABI: http://llvm.org/docs/LangRef.html#variable-argument-handling-intrinsics seems to suggest the LLVM intrinsics (va_start/va_end/va_copy/va_arg) are able to handle all of that. Are you telling me we still have to compute that va_list struct manually (requiring perfect knowledge of the tedious register assignment)? I can't believe LLVM would have better support for Win64 than *nix x86-64 in this regard. :)

And as to System V ABI register assignment: there's an llvm::CallingConv::X86_64_SysV enum. Can that one be used or does LLVM really not support System V out of the box? It seems to have worked for extern(C) so far. Structs/static arrays <= 64 bit and of a power of 2 are rewritten as integers, all others are possibly rewritten using toArgTypes(). I hoped this would allow LLVM to get it right.

I hope to get LDC to compile in a Linux VM to experiment.

This block is pretty hacky.

For structs > 8 bytes passed in registers.

Ignore pointer type mismatches as long as both types are pointers; the element type has no effect on the memory layout. The RegCount constructor had to be improved to handle more complex types correctly (e.g., nested structs such as { { i64, i32* } }) as these are not rewritten anymore.

Don't attempt to store past the end of alloca()ted memory. This is important for structs passed in registers whose size is not a power of 2. Fix: alloca()te another sufficiently sized chunk, store into it and then memcpy() the relevant bytes to the target chunk.

…mentation.

Use a map instead of a vector as the attribute index may get as high as 0xFFFFFFFF (LLVM 3.1), apparently for function attributes.

kinke · 2015-02-23T21:57:50Z

Thanks Kai. I can reproduce the runnable/testabi.d dmd-testsuite issue (see #768 (comment)) with LLVM 3.2 when using -O2 and -O3. A LLVM 3.2 bug isn't likely, is it? ;)

Great, now the LLVM 3.4 and 3.5 debug jobs got ldc2 killed while compiling std.algorithm-unittest-debug. I guess that's due to some sort of Travis CI watchdog. The new druntime mini-change hardly affects performance; the move to a std::map in AttrSet in the last commit may have been costly when inserting attributes with decreasing index (when reversing the params for extern(D)), but I haven't checked whether that causes any significant slow-down.
The 32-bit issues are gone though.

dnadlinger · 2015-02-24T10:28:06Z

std.algorithm fails intermittently regardless of your change (OOM, mostly). Don't pay too much attention to it, it's something we need to fix at some point anyway.

Apparently required for LLVM 3.2.

kinke · 2015-02-24T19:45:51Z

Alright, the LLVM 3.2 issue is fixed too now. LLVM 3.1-3.5 all green except for std.algorithm kills.

redstar · 2015-02-24T21:01:36Z

I triggered Travis-CI to give you the joy of an all-green build. :-)

Varargs fix & ABI refactoring

redstar · 2015-02-25T20:23:42Z

@kinke Could you please contact me by mail (kai@redstar.de)? Thanks!!!!

kinke force-pushed the vararg2 branch 4 times, most recently from fe5e263 to 510a3ad Compare October 26, 2014 19:52

kinke mentioned this pull request Oct 26, 2014

Fix vararg ABI ldc-developers/druntime#14

Merged

kinke force-pushed the vararg2 branch from fc3c5fc to e0ac824 Compare November 9, 2014 05:10

kinke force-pushed the vararg2 branch from e0ac824 to c21ff8a Compare November 9, 2014 05:56

kinke force-pushed the vararg2 branch from c21ff8a to 650b3c2 Compare November 9, 2014 17:02

dnadlinger reviewed Nov 13, 2014
View reviewed changes

kinke added 13 commits February 23, 2015 22:52

Never rewrite varargs passed by value.

76d65d4

varargs fix: don't pass small structs by reference

b81f6ab

This block is pretty hacky.

SysV ABI: make sure all potential padding bytes of a struct are copied.

2663ae3

For structs > 8 bytes passed in registers.

Eliminate dead code.

5a38f9d

Get rid of superfluous loads and stores when rewriting return values.

08c4c07

SysV ABI: fix ExplicitByValRewrite for D structs and static D arrays.

7376a0c

SysV ABI: filter out irrelevant ABI mismatches wrt. to previous imple…

079e60f

…mentation.

Refactor ABI rewrites.

dbe801d

SysV ABI: remove support for VALIDATE_AGAINST_OLD_LDC_VERSION.

8a3a51e

Add required druntime and dmd-testsuite fixes.

7bbcdc1

Fix AttrSet struct for LLVM 3.1 and 3.2.

c3f2e19

Use a map instead of a vector as the attribute index may get as high as 0xFFFFFFFF (LLVM 3.1), apparently for function attributes.

kinke force-pushed the vararg2 branch from 61e1613 to c3f2e19 Compare February 23, 2015 21:54

Rework LLVM attributes abstraction.

92d9ade

kinke force-pushed the vararg2 branch from edb4552 to 92d9ade Compare February 24, 2015 12:42

Do not read from undefined bytes when bit-casting.

b45cc2e

Apparently required for LLVM 3.2.

redstar added a commit that referenced this pull request Feb 24, 2015

Merge pull request #768 from kinke/vararg2

9ec79de

Varargs fix & ABI refactoring

redstar merged commit 9ec79de into ldc-developers:master Feb 24, 2015

redstar mentioned this pull request Feb 25, 2015

[DUB][scid] test crashes #823

Closed

This was referenced Feb 25, 2015

Make the x86_64 System V ABI code use Type::toArgTypes #288

Closed

core.stdc.stdarg doesn't work on 64 bits #73

Closed

ABI transformations are never applied for variadic arguments #172

Closed

Vararg functions does not compile on x86_64 #702

Closed

kinke deleted the vararg2 branch August 24, 2017 19:42

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Varargs fix & ABI refactoring #768

Varargs fix & ABI refactoring #768

kinke commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 26, 2014

kinke commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 27, 2014

kinke commented Nov 9, 2014

Trass3r commented Nov 9, 2014

kinke commented Nov 9, 2014

kinke commented Nov 13, 2014

dnadlinger commented Nov 13, 2014

dnadlinger Nov 13, 2014

kinke commented Nov 14, 2014

kinke commented Feb 23, 2015

dnadlinger commented Feb 24, 2015

kinke commented Feb 24, 2015

redstar commented Feb 24, 2015

redstar commented Feb 25, 2015

Varargs fix & ABI refactoring #768

Varargs fix & ABI refactoring #768

Conversation

kinke commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 26, 2014

kinke commented Oct 26, 2014

kinke commented Oct 26, 2014

Trass3r commented Oct 26, 2014

kinke commented Oct 27, 2014

kinke commented Nov 9, 2014

Trass3r commented Nov 9, 2014

kinke commented Nov 9, 2014

kinke commented Nov 13, 2014

dnadlinger commented Nov 13, 2014

dnadlinger Nov 13, 2014

Choose a reason for hiding this comment

kinke commented Nov 14, 2014

kinke commented Feb 23, 2015

dnadlinger commented Feb 24, 2015

kinke commented Feb 24, 2015

redstar commented Feb 24, 2015

redstar commented Feb 25, 2015