passing struct by value does not respect x86_64 SysV ABI #443

nicolas-cellier-aka-nice · 2019-11-06T19:13:05Z

The case when a structure passed by value is passed via memory (thru a pointer), via registers, or just stack, same for structure return value is much more complex than what is programmed currently in both FFI and Alien (FFI and Alien currently do not even agree...).

This badly need more test cases and more attention.
See squeak-dev thread [squeak-dev] Alien primFFICall returning struct with 64bit vm
http://lists.squeakfoundation.org/pipermail/squeak-dev/2019-November/204974.html

See draft for ABI, near page 21 and 24 (not very clear)
https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf

nicolas-cellier-aka-nice · 2019-11-06T20:20:25Z

Here is a small example below showing the eightbyte rule in action:

short-short-int fits in a eightbyte ($RAX), but short-int-short does not, it requires 2 eightbytes ($RAX, $RDX) - as if alignment was taken into account.
float-int fits in a eightbyte, so is passed in $RAX (integer dominates)
float-float is passed in $XMM0
double-int requires 2 eightbytes, so is returned in $XMM0,$RAX

We cannot return 3 eightbytes registers (like 2 long, 1 double), so the struct is passed by pointer in $RDI int this case, and $RDI is copied in $RAX on return.

/*
 * test returning struct by value
 * clang --shared -Os -o libalientest.dylib alientest.c
 * objdump --disassemble libalientest.dylib
 */

typedef struct sd2 {double a,b;} sd2;
typedef struct sf2 {float a,b;} sf2;
typedef struct sl2 {long a,b;} sl2;
typedef struct si2 {int a,b;} si2;
typedef struct ss2 {short a,b;} ss2;
typedef struct ssi {short a; int b;} ssi;
typedef struct sfi {float a; int b;} sfi;
typedef struct sdi {double a; int b;} sdi;
typedef struct sf2d {float a,b; double c;} sf2d;
typedef struct sfdf {float a; double b; float c;} sfdf;
typedef struct ss2i {short a,b; int c;} ss2i;
typedef struct ssis {short a; int b; short c;} ssis;
typedef struct ssls {short a; long b; short c;} ssls;
typedef struct sslf {short a; long b; float c;} sslf;
typedef struct sf4 {float a,b,c,d;} sf4;
typedef struct ss4 {short a,b,c,d;} ss4;

sd2  id2 (double a,double b) { sd2 v = {a,b}; return v; }
sf2  if2 (float  a,float  b) { sf2 v = {a,b}; return v; }
sl2  il2 (long   a,long   b) { sl2 v = {a,b}; return v; }
si2  ii2 (int    a,int    b) { si2 v = {a,b}; return v; }
ss2  is2 (short  a,short  b) { ss2 v = {a,b}; return v; }
ssi  isi (short  a,int    b) { ssi v = {a,b}; return v; }
sfi  ifi (float  a,int    b) { sfi v = {a,b}; return v; }
sdi  idi (double a,int    b) { sdi v = {a,b}; return v; }
sf2d if2d(float  a,float  b, double c) { sf2d v = {a,b,c}; return v; }
sfdf ifdf(float  a,double b, float  c) { sfdf v = {a,b,c}; return v; }
ss2i is2i(short  a,short  b, int    c) { ss2i v = {a,b,c}; return v; }
ssis isis(short  a,int    b, short  c) { ssis v = {a,b,c}; return v; }
ssls isls(short  a,long   b, short  c) { ssls v = {a,b,c}; return v; }
sslf islf(short  a,long   b, float  c) { sslf v = {a,b,c}; return v; }
sf4  if4 (float  a,float  b,float   c,float d) { sf4 v = {a,b,c,d}; return v; }
ss4  is4 (short  a,short  b,short   c,short d) { ss4 v = {a,b,c,d}; return v; }

sd2  ed2 () { return id2(1.0 ,2.0 ); }
sf2  ef2 () { return if2(1.0f,2.0f); }
sl2  el2 () { return il2(1L,2L); }
si2  ei2 () { return ii2(1,2); }
ss2  es2 () { return is2(1,2); }
sfi  efi () { return ifi(1.0f,2); }
sdi  edi () { return idi(1.0,2); }
ssi  esi () { return isi(1,2); }
sf2d ef2d() { return if2d(1.0f,2.0f,3.0); }
sfdf efdf() { return ifdf(1.0f,2.0,3.0f); }
ss2i es2i() { return is2i(1,2,3); }
ssis esis() { return isis(1,2,3); }
ssls esls() { return isls(1,2L,3); }
sslf eslf() { return islf(1,2L,3.0f); }
sf4  ef4 () { return if4(1.0f,2.0f,3.0f,4.0f); }
ss4  es4 () { return is4(1,2,3,4); }

which disassemble to:

libalientest.dylib:	file format Mach-O 64-bit x86-64

Disassembly of section __TEXT,__text:
_id2:
     d1f:	55 	pushq	%rbp
     d20:	48 89 e5 	movq	%rsp, %rbp
     d23:	5d 	popq	%rbp
     d24:	c3 	retq

_if2:
     d25:	55 	pushq	%rbp
     d26:	48 89 e5 	movq	%rsp, %rbp
     d29:	66 0f 3a 21 c1 10 	insertps	$16, %xmm1, %xmm0
     d2f:	5d 	popq	%rbp
     d30:	c3 	retq

_il2:
     d31:	55 	pushq	%rbp
     d32:	48 89 e5 	movq	%rsp, %rbp
     d35:	48 89 f8 	movq	%rdi, %rax
     d38:	48 89 f2 	movq	%rsi, %rdx
     d3b:	5d 	popq	%rbp
     d3c:	c3 	retq

_ii2:
     d3d:	55 	pushq	%rbp
     d3e:	48 89 e5 	movq	%rsp, %rbp
     d41:	48 c1 e6 20 	shlq	$32, %rsi
     d45:	89 f8 	movl	%edi, %eax
     d47:	48 09 f0 	orq	%rsi, %rax
     d4a:	5d 	popq	%rbp
     d4b:	c3 	retq

_is2:
     d4c:	55 	pushq	%rbp
     d4d:	48 89 e5 	movq	%rsp, %rbp
     d50:	c1 e6 10 	shll	$16, %esi
     d53:	0f b7 c7 	movzwl	%di, %eax
     d56:	09 f0 	orl	%esi, %eax
     d58:	5d 	popq	%rbp
     d59:	c3 	retq

_isi:
     d5a:	55 	pushq	%rbp
     d5b:	48 89 e5 	movq	%rsp, %rbp
     d5e:	48 c1 e6 20 	shlq	$32, %rsi
     d62:	0f b7 c7 	movzwl	%di, %eax
     d65:	48 09 f0 	orq	%rsi, %rax
     d68:	5d 	popq	%rbp
     d69:	c3 	retq

_ifi:
     d6a:	55 	pushq	%rbp
     d6b:	48 89 e5 	movq	%rsp, %rbp
     d6e:	66 0f 7e c0 	movd	%xmm0, %eax
     d72:	48 c1 e7 20 	shlq	$32, %rdi
     d76:	48 09 f8 	orq	%rdi, %rax
     d79:	5d 	popq	%rbp
     d7a:	c3 	retq

_idi:
     d7b:	55 	pushq	%rbp
     d7c:	48 89 e5 	movq	%rsp, %rbp
     d7f:	89 f8 	movl	%edi, %eax
     d81:	5d 	popq	%rbp
     d82:	c3 	retq

_if2d:
     d83:	55 	pushq	%rbp
     d84:	48 89 e5 	movq	%rsp, %rbp
     d87:	66 0f 3a 21 c1 10 	insertps	$16, %xmm1, %xmm0
     d8d:	0f 28 ca 	movaps	%xmm2, %xmm1
     d90:	5d 	popq	%rbp
     d91:	c3 	retq

_ifdf:
     d92:	55 	pushq	%rbp
     d93:	48 89 e5 	movq	%rsp, %rbp
     d96:	f3 0f 11 07 	movss	%xmm0, (%rdi)
     d9a:	f2 0f 11 4f 08 	movsd	%xmm1, 8(%rdi)
     d9f:	f3 0f 11 57 10 	movss	%xmm2, 16(%rdi)
     da4:	48 89 f8 	movq	%rdi, %rax
     da7:	5d 	popq	%rbp
     da8:	c3 	retq

_is2i:
     da9:	55 	pushq	%rbp
     daa:	48 89 e5 	movq	%rsp, %rbp
     dad:	48 c1 e2 20 	shlq	$32, %rdx
     db1:	0f b7 ce 	movzwl	%si, %ecx
     db4:	48 c1 e1 10 	shlq	$16, %rcx
     db8:	0f b7 c7 	movzwl	%di, %eax
     dbb:	48 09 c8 	orq	%rcx, %rax
     dbe:	48 09 d0 	orq	%rdx, %rax
     dc1:	5d 	popq	%rbp
     dc2:	c3 	retq

_isis:
     dc3:	55 	pushq	%rbp
     dc4:	48 89 e5 	movq	%rsp, %rbp
     dc7:	48 c1 e6 20 	shlq	$32, %rsi
     dcb:	0f b7 c7 	movzwl	%di, %eax
     dce:	48 09 f0 	orq	%rsi, %rax
     dd1:	5d 	popq	%rbp
     dd2:	c3 	retq

_isls:
     dd3:	55 	pushq	%rbp
     dd4:	48 89 e5 	movq	%rsp, %rbp
     dd7:	66 89 37 	movw	%si, (%rdi)
     dda:	48 89 57 08 	movq	%rdx, 8(%rdi)
     dde:	66 89 4f 10 	movw	%cx, 16(%rdi)
     de2:	48 89 f8 	movq	%rdi, %rax
     de5:	5d 	popq	%rbp
     de6:	c3 	retq

_islf:
     de7:	55 	pushq	%rbp
     de8:	48 89 e5 	movq	%rsp, %rbp
     deb:	66 89 37 	movw	%si, (%rdi)
     dee:	48 89 57 08 	movq	%rdx, 8(%rdi)
     df2:	f3 0f 11 47 10 	movss	%xmm0, 16(%rdi)
     df7:	48 89 f8 	movq	%rdi, %rax
     dfa:	5d 	popq	%rbp
     dfb:	c3 	retq

_if4:
     dfc:	55 	pushq	%rbp
     dfd:	48 89 e5 	movq	%rsp, %rbp
     e00:	66 0f 3a 21 c1 10 	insertps	$16, %xmm1, %xmm0
     e06:	66 0f 3a 21 d3 10 	insertps	$16, %xmm3, %xmm2
     e0c:	0f 28 ca 	movaps	%xmm2, %xmm1
     e0f:	5d 	popq	%rbp
     e10:	c3 	retq

_is4:
     e11:	55 	pushq	%rbp
     e12:	48 89 e5 	movq	%rsp, %rbp
     e15:	48 c1 e1 30 	shlq	$48, %rcx
     e19:	0f b7 d2 	movzwl	%dx, %edx
     e1c:	48 c1 e2 20 	shlq	$32, %rdx
     e20:	0f b7 f6 	movzwl	%si, %esi
     e23:	48 c1 e6 10 	shlq	$16, %rsi
     e27:	0f b7 c7 	movzwl	%di, %eax
     e2a:	48 09 f0 	orq	%rsi, %rax
     e2d:	48 09 d0 	orq	%rdx, %rax
     e30:	48 09 c8 	orq	%rcx, %rax
     e33:	5d 	popq	%rbp
     e34:	c3 	retq

_ed2:
     e35:	55 	pushq	%rbp
     e36:	48 89 e5 	movq	%rsp, %rbp
     e39:	f2 0f 10 05 2f 01 00 00 	movsd	303(%rip), %xmm0
     e41:	f2 0f 10 0d 2f 01 00 00 	movsd	303(%rip), %xmm1
     e49:	5d 	popq	%rbp
     e4a:	c3 	retq

_ef2:
     e4b:	55 	pushq	%rbp
     e4c:	48 89 e5 	movq	%rsp, %rbp
     e4f:	0f 28 05 3a 01 00 00 	movaps	314(%rip), %xmm0
     e56:	5d 	popq	%rbp
     e57:	c3 	retq

_el2:
     e58:	55 	pushq	%rbp
     e59:	48 89 e5 	movq	%rsp, %rbp
     e5c:	b8 01 00 00 00 	movl	$1, %eax
     e61:	ba 02 00 00 00 	movl	$2, %edx
     e66:	5d 	popq	%rbp
     e67:	c3 	retq

_ei2:
     e68:	55 	pushq	%rbp
     e69:	48 89 e5 	movq	%rsp, %rbp
     e6c:	48 b8 01 00 00 00 02 00 00 00 	movabsq	$8589934593, %rax
     e76:	5d 	popq	%rbp
     e77:	c3 	retq

_es2:
     e78:	55 	pushq	%rbp
     e79:	48 89 e5 	movq	%rsp, %rbp
     e7c:	b8 01 00 02 00 	movl	$131073, %eax
     e81:	5d 	popq	%rbp
     e82:	c3 	retq

_efi:
     e83:	55 	pushq	%rbp
     e84:	48 89 e5 	movq	%rsp, %rbp
     e87:	48 b8 00 00 80 3f 02 00 00 00 	movabsq	$9655287808, %rax
     e91:	5d 	popq	%rbp
     e92:	c3 	retq

_edi:
     e93:	55 	pushq	%rbp
     e94:	48 89 e5 	movq	%rsp, %rbp
     e97:	f2 0f 10 05 d1 00 00 00 	movsd	209(%rip), %xmm0
     e9f:	b8 02 00 00 00 	movl	$2, %eax
     ea4:	5d 	popq	%rbp
     ea5:	c3 	retq

_esi:
     ea6:	55 	pushq	%rbp
     ea7:	48 89 e5 	movq	%rsp, %rbp
     eaa:	48 b8 01 00 00 00 02 00 00 00 	movabsq	$8589934593, %rax
     eb4:	5d 	popq	%rbp
     eb5:	c3 	retq

_ef2d:
     eb6:	55 	pushq	%rbp
     eb7:	48 89 e5 	movq	%rsp, %rbp
     eba:	0f 28 05 cf 00 00 00 	movaps	207(%rip), %xmm0
     ec1:	f2 0f 10 0d b7 00 00 00 	movsd	183(%rip), %xmm1
     ec9:	5d 	popq	%rbp
     eca:	c3 	retq

_efdf:
     ecb:	55 	pushq	%rbp
     ecc:	48 89 e5 	movq	%rsp, %rbp
     ecf:	c7 07 00 00 80 3f 	movl	$1065353216, (%rdi)
     ed5:	48 b8 00 00 00 00 00 00 00 40 	movabsq	$4611686018427387904, %rax
     edf:	48 89 47 08 	movq	%rax, 8(%rdi)
     ee3:	c7 47 10 00 00 40 40 	movl	$1077936128, 16(%rdi)
     eea:	48 89 f8 	movq	%rdi, %rax
     eed:	5d 	popq	%rbp
     eee:	c3 	retq

_es2i:
     eef:	55 	pushq	%rbp
     ef0:	48 89 e5 	movq	%rsp, %rbp
     ef3:	48 b8 01 00 02 00 03 00 00 00 	movabsq	$12885032961, %rax
     efd:	5d 	popq	%rbp
     efe:	c3 	retq

_esis:
     eff:	55 	pushq	%rbp
     f00:	48 89 e5 	movq	%rsp, %rbp
     f03:	48 b8 01 00 00 00 02 00 00 00 	movabsq	$8589934593, %rax
     f0d:	66 ba 03 00 	movw	$3, %dx
     f11:	5d 	popq	%rbp
     f12:	c3 	retq

_esls:
     f13:	55 	pushq	%rbp
     f14:	48 89 e5 	movq	%rsp, %rbp
     f17:	66 c7 07 01 00 	movw	$1, (%rdi)
     f1c:	48 c7 47 08 02 00 00 00 	movq	$2, 8(%rdi)
     f24:	66 c7 47 10 03 00 	movw	$3, 16(%rdi)
     f2a:	48 89 f8 	movq	%rdi, %rax
     f2d:	5d 	popq	%rbp
     f2e:	c3 	retq

_eslf:
     f2f:	55 	pushq	%rbp
     f30:	48 89 e5 	movq	%rsp, %rbp
     f33:	66 c7 07 01 00 	movw	$1, (%rdi)
     f38:	48 c7 47 08 02 00 00 00 	movq	$2, 8(%rdi)
     f40:	c7 47 10 00 00 40 40 	movl	$1077936128, 16(%rdi)
     f47:	48 89 f8 	movq	%rdi, %rax
     f4a:	5d 	popq	%rbp
     f4b:	c3 	retq

_ef4:
     f4c:	55 	pushq	%rbp
     f4d:	48 89 e5 	movq	%rsp, %rbp
     f50:	0f 28 05 39 00 00 00 	movaps	57(%rip), %xmm0
     f57:	0f 28 0d 42 00 00 00 	movaps	66(%rip), %xmm1
     f5e:	5d 	popq	%rbp
     f5f:	c3 	retq

_es4:
     f60:	55 	pushq	%rbp
     f61:	48 89 e5 	movq	%rsp, %rbp
     f64:	48 b8 01 00 02 00 03 00 04 00 	movabsq	$1125912791875585, %rax
     f6e:	5d 	popq	%rbp
     f6f:	c3 	retq

nicolas-cellier-aka-nice · 2019-11-06T20:57:47Z

For passing parameters by value, my understanding is that we can pass up to 8 eigthbytes individually for each struct (either via register if struct fits on two eightbytes, and if we still have available registers, or via stack...). Only for more than 8 eightbytes, we will pass by MEMORY (a pointer to struct).

Here is a little example:

double adi(sdi x) { return (double) x.a + (double) x.b ; }
double adi_2(sdi x,sdi y) { return adi(x) + adi(y); }
double adi_4(sdi x,sdi y,sdi z,sdi t) { return adi_2(x,y) + adi_2(z,t); }

double aslf(sslf x) { return (double) x.a + (double) x.b + (double) x.c ; }
double aslf_2(sslf x,sslf y) { return aslf(x) + aslf(y); }
double aslf_4(sslf x,sslf y,sslf z,sslf t) { return aslf_2(x,y) + aslf_2(z,t); }

double-int are passed by registers (by value) because fitting on 2 eightbytes
short-long-float are passed via stack (by value) because requiring 3 eightbytes
We should try longer (> 8 eightbytes) and verify that they are passed by pointer...

Correction: struct are limited to 2 eightbytes, the case of 8 eigthbytes only apply to SSE vectors (__m512), but we do not handle such SSE vectors in OpenSmalltalk FFI.

balazskosi · 2019-11-08T10:44:14Z

I've heard that Lua's FFI is wonderful. Maybe we should take a look how did they tackle this problem.

After some quick digging around, here is the code setting up the arguments:
https://github.com/LuaJIT/LuaJIT/blob/v2.1/src/lj_ccall.c#L917

Here are the x64 specific defines used by the setting up code:
https://github.com/LuaJIT/LuaJIT/blob/v2.1/src/lj_ccall.c#L132

They definitely have all the type information. They get it from parsing the C declarations.

Here is an example of the declaration part:
https://github.com/luapower/chipmunk/blob/master/chipmunk_h.lua
And the usage:
https://github.com/luapower/chipmunk/blob/master/chipmunk_demo.lua

krono · 2019-11-08T12:04:16Z

PyPy/Python CFFI is similiarily good:
https://cffi.readthedocs.io/en/latest/

OpenSmalltalk-Bot · 2019-11-08T16:41:18Z

I think I ran into this trying to build FFI to libclang. It passes a lot of structs by value and some calls came back with nothing. My message to vm-dev@lists.squeakfoundation.org on Nov 21, 2017 follows; Apologies if this isn't relevant.

…

I've been trying to track this down for a couple weeks now. I have concluded that structs passed by value to functions on the 64 bit VM are not properly populated. The struct's memory is all zero'd. I found this while trying to work with LibClang and found that functions that fetched code locations from code ranges always returned invalid zero'd locations. After spending some time with lldb I have traced the problem into the native code and found that the argument is not correct. I've carved out the wee bit of clang to reproduce this in a tiny library. The gist of it is below and the entire file is included. Basically the struct passed to the function clang_getRangeStart is zero'd memory regardless of the data I send from the image side. The build command I used on sierra is clang -shared -undefined dynamic_lookup -o microclang.dylib microclang.c I'm stuck. The FFI64 plugin is way beyond my comprehension. // microclang.c typedef struct { const void *ptr_data[2]; unsigned int_data; } CXSourceLocation; /** * \brief Identifies a half-open character range in the source code. * * Use clang_getRangeStart() and clang_getRangeEnd() to retrieve the * starting and end locations from a source range, respectively. */ typedef struct { const void *ptr_data[2]; unsigned begin_int_data; unsigned end_int_data; } CXSourceRange; const char* first = "first_pointer"; const char* second = "second_pointer"; // return a fake range with non zero data CXSourceRange clang_getArbitraryRange() { CXSourceRange range = {0}; range.ptr_data[0] = (void*)first; range.ptr_data[1] = (void*)second; range.begin_int_data = 17; range.end_int_data = 24; return range; } // Actual clang function - range is always zero'd here despite having values in the image CXSourceLocation clang_getRangeStart(CXSourceRange range) { // Special decoding for CXSourceLocations for CXLoadedDiagnostics. if ((uintptr_t)range.ptr_data[0] & 0x1) { CXSourceLocation Result = { { range.ptr_data[0], nullptr }, 0 }; return Result; } CXSourceLocation Result = { { range.ptr_data[0], range.ptr_data[1] }, range.begin_int_data }; return Result; } On Nov 6, 2019, at 12:57 PM, Nicolas Cellier ***@***.***> wrote: For passing parameters by value, my understanding is that we can pass up to 8 eigthbytes individually for each struct (either via register if struct fits on two eightbytes, and if we still have available registers, or via stack...). Only for more than 8 eightbytes, we will pass by MEMORY (a pointer to struct). Here is a little example: double adi(sdi x) { return (double) x.a + (double) x.b ; } double adi_2(sdi x,sdi y) { return adi(x) + adi(y); } double adi_4(sdi x,sdi y,sdi z,sdi t) { return adi_2(x,y) + adi_2(z,t); } double aslf(sslf x) { return (double) x.a + (double) x.b + (double) x.c ; } double aslf_2(sslf x,sslf y) { return aslf(x) + aslf(y); } double aslf_4(sslf x,sslf y,sslf z,sslf t) { return aslf_2(x,y) + aslf_2(z,t); } double-int are passed by registers (by value) because fitting on 2 eightbytes short-long-float are passed via stack (by value) because requiring 3 eightbytes We should try longer (> 8 eightbytes) and verify that they are passed by pointer... — You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub <#443?email_source=notifications&email_token=AIJPEW3KXWHZHIYSFPN56DDQSMVU3A5CNFSM4JJ3EAU2YY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOEDH6XYY#issuecomment-550497251>, or unsubscribe <https://github.com/notifications/unsubscribe-auth/AIJPEW6526SF2E4V5NDITSTQSMVU3ANCNFSM4JJ3EAUQ>.

eliotmiranda · 2019-11-15T19:41:49Z

Hi Nicolas, Hi All,

this is an issue of funding the correct specification. I fond this one:
[http://people.freebsd.org/~obrien/amd64-elf-abi.pdf]
and it states:

The classification of aggregate (structures and arrays) and union types works as follows:
1. If the size of an object is larger than two eightbytes, or in C++, is a non- POD 10 structure or union type, or contains unaligned fields, it has class MEMORY.

This is far simpler than the alternatives. So at least what we have may be correct on FreeBSD ;-)

Can someone find out what version of the standard is operative on linux and macOS?

The spec that splits struct fields across available registers is horribly complex but doable. I did implement that spec for the VisualWorks FFI. We're in a much better position than the VisualWorks FFI because Andreas designed and implemented signature type information correctly in his (our) FFI. In his/our FFI, a function's type specification is associated with the function itself. In VisualWorks, what is associated with a function is a very simplified reduction of types, and rich type information is only available attached to actual parameters, so pass in a C object with the wrong type information in VW and it will be marshaled incorrectly.

So while this is work to do, I'm sure we can do it quite straight-forwardly. The key issue is to determine the right versions of ABI specification to use before we start implementation. As I showed above I took the easy route; I found a version of the spec that had simple semantics and implemented it. Mea culpa.

eliotmiranda · 2019-12-18T02:57:27Z

What frustrates me here is that we also have all the type info and we have our own code which mostly works. So why is no one suggesting looking at our code and fixing it instead of relying on other's code that doesn't suit as well? We can wipe our own chins.

nicolas-cellier-aka-nice · 2020-01-25T08:04:04Z

Follow up: for returning struct by value, we use a fake SixteenByteReturn struct {sqInt a; sqInt b;}
Unfortunately, this only works for fields returned via integer registers ($RAX ...) not via float registers ($XMM0 ...)

We would need 4 different return cases sqInt-sqInt sqInt-double double-sqInt double-double

For passing struct by value, I just corrected a bug in VMMaker slang about floatType & doubleType check of struct members. But it ain't gonna work in all cases.
https://source.squeak.org/VMMaker/VMMaker.oscog-nice.2677.diff

I will also push more FFI tests for some tricky cases.

I have prototyped a method that enumerate the fields and re-compute the alignment, but it does not sound good, this job/information is already done at image side...

These tests are related to #443

nicolas-cellier-aka-nice · 2020-01-26T23:12:20Z

WIP https://source.squeak.org/VMMakerInbox/VMMaker.oscog-nice.2678.diff
Not yet ready for inclusion

UPDATE:
Bug corrected in https://source.squeak.org/VMMakerInbox/VMMaker.oscog-nice.2679.diff
Now all tests pass.
HOWEVER, the generated FFI only works in fast VM because ffiCalloutTo:SpecOnStack:in: does not get inlined by our CCodeGenerator!
(the C compiler inlines it in the fast flavour, but less aggressive optimizations don't).

UPDATE:
Inlining corrected in https://source.squeak.org/VMMakerInbox/VMMaker.oscog-nice.2680.diff

Next stuff: handle union and packed struct.
I can already recognize union by the fact that a member has a size equal to the union.
For packed struct the idea is to recompute the size in FFI and check if different from pre-computed in the compiledSpec...
Note: this only is going to happen for size <= 16, so should not be too costly.

ThreradedFFIPlugin: solve passing/returning struct by value on X64 See #443 On X64/SysV struct up to 16 byte long can be passed by value (& returned) into a pair of 8-byte registers. The problem is to know whether these are integer (RAX RDX) or float (XMM0 XMM1) registers or eventually a mix of... For each 8-byte, we must know if it contains at least an int (in which case we have to use an int register), or exclusively floating points (a pair of float or a double). Previous algorithm did check first two fields, or last two fields which does not correctly cover all cases... For example int-int-float has last two fields int-float, though it will use RAX XMM0. So we have to know about struct layout... Unfortunately, this information is not included into the compiledSpec. The idea here is to reconstruct the information. See #registerTypeForStructSpecs:OfLength: & It's also impossible to cover the exotic alignments like packed structure cases... if we really want to pass that, this will mean passing the alignment information, a more involved change of #compiledSpec (we need up to 16 bits by field to handle that information since our FFI struct are limited to 65535 bytes anyway). For returning a struct, that's the same problem. We have four possible combinations of int-float registers. Consequently, the idea is to analyze #registerType: and switch to appropriate case. I found convenient to pass the ffiRetSpec compiledSpec object thru CalloutState (it's the Smalltalk WordArray object, not a pointer to its firstIndexableField) for performing this analysis... Not sure if this is the best choice. Since we have 4 different SixteenByte types, I have changed value, since it's what will be used to memcpy to allocated ByteArray handle.

ThreadedFFIPlugins: See #443 FFI support for returning of a packed struct by value in X64 SysV On X64/SysV struct up to 16 byte long can be passed by value into a pair of 8-byte registers. The problem is to know whether these are int (RAX RDX) or float (XMM0 XMM1) registers or eventually a mix of... For each 8-byte, we must know if it contains at least an int (in which case we have to use an int register), or exclusively floating points (a pair of float or a double). Previous algorithm did check first two fields, or last two fields which does not correctly cover all cases... For example int-int-float has last two fields int-float, though it will use RAX XMM0. So we have to know about struct layout... Unfortunately, this information is not included into the compiledSpec. The idea here is to reconstruct the information. See #registerTypeForStructSpecs:OfLength: It's also impossible to cover the exotic alignments like packed structure cases... But if we really want to pass that, this will mean passing the alignment information, a more involved change of #compiledSpec (we need up to 16 bits by field to handle that information since our FFI struct are limited to 65535 bytes anyway). For returning a struct, that's the same problem. We have four possible combinations of int-float registers. Consequently, the idea is to analyze the ffiRetSpec compiledSpec object thru CalloutState (it's the Smalltalk WordArray object, not a pointer to its firstIndexableField) for performing this analysis... Not sure if the best choice. Since we have 4 different SixteenByte types, I have changed value, since it's what will be used to memcpy to allocated ByteArray handle. Checking the size of a struct is not the only condition for returning a struct via registers. Some ABI like X64 SysV also mandates that struct fields be properly aligned. Therefore, we cannot just rely on #returnStructInRegisters:. Rename #returnStructInRegisters: -> #canReturnInRegistersStructOfSize: Perform a more thorough analysis during the setup in #ffiCheckReturn:With:in: The ABI will #encodeStructReturnTypeIn: a new callout state. This structReturnType is telling how the struct should be returned - via registers (and which registers) - or via pointer to memory allocated by caller This structReturnType will be used at time of: - allocating the memory in caller - see #ffiCall:ArgArrayOrNil:NumArgs: - dispatching to the correct FFI prototype - see ThreadedX64SysVFFIPlugin>>#ffiCalloutTo:SpecOnStack:in: - copying back the struct contents to ExternalStructure handle (a ByteArray) - see #ffiReturnStruct:ofType:in: Since structReturnType is encoded, it is not necessarily accessed directly, but rather via new implementation of #returnStructInRegisters: whch now takes the calloutState and knows how to decode its structReturnType. Check for unaligned struct and pass them in MEMORY (alloca'd memory passed thru a pointer). Use a new (branchless) formulation for aligning the byteSize to next multiple of fieldAlignment. Encode registryType of invalid unaligned candidate as 2r110, and pass the struct address returned by the foreign function in $RAX register in place of callout limit when stuct is returned by MEMORY. CoInterpreter: eliminate all but one compiler warning. Cogit/Slang: fix several C compiler warnings re the Cogits. Cogit: DUAL_MAPPED_CODE_ZONE (require -DDUAL_MAPPED_CODE_ZONE=1 to enable) Fix denial of write/execute facility on modern Linuxes by dual mapping the code zone in to a read/execute address range for code execution and a read/write address range for code editing. Maintain codeToDataDelta and provide codeXXXAt:put: to write at address + codeToDataDelta to the offset writable address range. Hence DUAL_MAPPED_CODE_ZONE requires a new executbale permissions applyer that will also do the dual mapping, sqMakeMemoryExecutableFrom:To:CodeToDataDelta:. Provide writableMethodFor: as a convenience for obtaining a writable cogMethod. No longer have the fillInXXXHeaderYYY: methods answer anything since they're given the writable header, not the actual header. Cogit: Refactor indexForSelector:in:at: to indexForSelector:in: in the back end so it can be inlined (via a macro). Slang: emit constant for (M << N) and (M - N) - L for constant integers. Fix in slang case statement expansion labels. During expansion in case statements, trees are duplicated and expanded.

nicolas-cellier-aka-nice · 2020-02-01T17:37:46Z

Should be completely fixed by c855035

… [ lastPointerOfWhileSwizzling: ]

…[ lastPointerOfWhileSwizzling: ] KILLED by 1/209 test cases.

…ver with false ] on method [ freeTreeNodesDo: ]

…er with false ] on method [ freeTreeNodesDo: ] KILLED by 28/234 test cases.

… method [ popObjStack: ] 14 test cases.

…method [ popObjStack: ] 14/14 test case are EQUIVALENT

nicolas-cellier-aka-nice added bug plugins labels Nov 6, 2019

syrel mentioned this issue Nov 10, 2019

TFBooleanType>>basicType should use uint8, not ushort pharo-project/threadedFFI-Plugin#16

Merged

nicolas-cellier-aka-nice added a commit that referenced this issue Jan 25, 2020

Provide some more FFI tests for passing struct by value

18e3583

These tests are related to #443

nicolas-cellier-aka-nice closed this as completed Feb 1, 2020

hogoww referenced this issue in hogoww/opensmalltalk-vm Dec 23, 2021

Mutant pharo-project#443, Installing [ Replace #+ with #- ] on method…

bd2fb7e

… [ lastPointerOfWhileSwizzling: ]

hogoww referenced this issue in hogoww/opensmalltalk-vm Dec 23, 2021

Mutant pharo-project#443, Reverting [ Replace #+ with #- ] on method …

25c610d

…[ lastPointerOfWhileSwizzling: ] KILLED by 1/209 test cases.

hogoww referenced this issue in hogoww/opensmalltalk-vm Dec 29, 2021

Mutant pharo-project#443, Installing [ Replace #ifTrue:ifFalse: recei…

7dc70b2

…ver with false ] on method [ freeTreeNodesDo: ]

hogoww referenced this issue in hogoww/opensmalltalk-vm Dec 29, 2021

Mutant pharo-project#443, Reverting [ Replace #ifTrue:ifFalse: receiv…

50f7a34

…er with false ] on method [ freeTreeNodesDo: ] KILLED by 28/234 test cases.

hogoww referenced this issue in hogoww/opensmalltalk-vm Feb 26, 2022

Mutant pharo-project#443, Installing [ Replace #and: with #nand: ] on…

e186858

… method [ popObjStack: ] 14 test cases.

hogoww referenced this issue in hogoww/opensmalltalk-vm Feb 26, 2022

Mutant pharo-project#443, Reverting [ Replace #and: with #nand: ] on …

a478316

…method [ popObjStack: ] 14/14 test case are EQUIVALENT

yorickpeterse mentioned this issue Jan 9, 2025

Generating JSON with --opt=aggressive can result in out of bounds panics in certain cases inko-lang/inko#792

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

passing struct by value does not respect x86_64 SysV ABI #443

passing struct by value does not respect x86_64 SysV ABI #443

nicolas-cellier-aka-nice commented Nov 6, 2019

nicolas-cellier-aka-nice commented Nov 6, 2019 •

edited

Loading

nicolas-cellier-aka-nice commented Nov 6, 2019 •

edited

Loading

balazskosi commented Nov 8, 2019

krono commented Nov 8, 2019

OpenSmalltalk-Bot commented Nov 8, 2019 via email

eliotmiranda commented Nov 15, 2019

eliotmiranda commented Dec 18, 2019

nicolas-cellier-aka-nice commented Jan 25, 2020

nicolas-cellier-aka-nice commented Jan 26, 2020 •

edited

Loading

nicolas-cellier-aka-nice commented Feb 1, 2020

passing struct by value does not respect x86_64 SysV ABI #443

passing struct by value does not respect x86_64 SysV ABI #443

Comments

nicolas-cellier-aka-nice commented Nov 6, 2019

nicolas-cellier-aka-nice commented Nov 6, 2019 • edited Loading

nicolas-cellier-aka-nice commented Nov 6, 2019 • edited Loading

balazskosi commented Nov 8, 2019

krono commented Nov 8, 2019

OpenSmalltalk-Bot commented Nov 8, 2019 via email

eliotmiranda commented Nov 15, 2019

eliotmiranda commented Dec 18, 2019

nicolas-cellier-aka-nice commented Jan 25, 2020

nicolas-cellier-aka-nice commented Jan 26, 2020 • edited Loading

nicolas-cellier-aka-nice commented Feb 1, 2020

nicolas-cellier-aka-nice commented Nov 6, 2019 •

edited

Loading

nicolas-cellier-aka-nice commented Nov 6, 2019 •

edited

Loading

nicolas-cellier-aka-nice commented Jan 26, 2020 •

edited

Loading