Closed as duplicate of#65495
Description
Go version
gotip version devel go1.25-38d146d5 Sun Mar 16 15:46:25 2025 -0700 linux/amd64
Output of go env
in your module/workspace:
AR='ar'
CC='gcc'
CGO_CFLAGS='-O2 -g'
CGO_CPPFLAGS=''
CGO_CXXFLAGS='-O2 -g'
CGO_ENABLED='1'
CGO_FFLAGS='-O2 -g'
CGO_LDFLAGS='-O2 -g'
CXX='g++'
GCCGO='gccgo'
GO111MODULE='on'
GOAMD64='v1'
GOARCH='amd64'
GOAUTH='netrc'
GOBIN=''
GOCACHE='/home/mcyoung/.cache/go-build'
GOCACHEPROG=''
GODEBUG=''
GOENV='/home/mcyoung/.config/go/env'
GOEXE=''
GOEXPERIMENT=''
GOFIPS140='off'
GOFLAGS=''
GOGCCFLAGS='-fPIC -m64 -pthread -Wl,--no-gc-sections -fmessage-length=0 -ffile-prefix-map=/tmp/go-build4068624110=/tmp/go-build -gno-record-gcc-switches'
GOHOSTARCH='amd64'
GOHOSTOS='linux'
GOINSECURE=''
GOMOD='/dev/null'
GOMODCACHE='/home/mcyoung/projects/go/pkg/mod'
GONOPROXY=''
GONOSUMDB=''
GOOS='linux'
GOPATH='/home/mcyoung/projects/go'
GOPRIVATE=''
GOPROXY='https://proxy.golang.org,direct'
GOROOT='/home/mcyoung/sdk/gotip'
GOSUMDB='sum.golang.org'
GOTELEMETRY='local'
GOTELEMETRYDIR='/home/mcyoung/.config/go/telemetry'
GOTMPDIR=''
GOTOOLCHAIN='auto'
GOTOOLDIR='/home/mcyoung/sdk/gotip/pkg/tool/linux_amd64'
GOVCS=''
GOVERSION='devel go1.25-38d146d5 Sun Mar 16 15:46:25 2025 -0700'
GOWORK=''
PKG_CONFIG='pkg-config'
What did you do?
I wrote the following package and generated assembly from it.
package x
type state struct {
x0 func(state) state
x1 uint64
x2 uint64
x3 uint64
x4 uint64
x5 uint64
x6 uint64
x7 uint64
x8 uint64
}
//go:nosplit
func X(s state) state {
return s.x0(s)
}
This is an example of threaded code, where all operations are replaced with function calls that consume the whole state and return it by value, so that the state can remain in registers. This is a technique for writing highly optimized parsers, such as the one in UPB and Protobuf C++.
What did you see happen?
The output was the following assembly.
TEXT command-line-arguments.X(SB), NOSPLIT|ABIInternal, $224-72
PUSHQ BP
MOVQ SP, BP
SUBQ $216, SP
; Spill state into the caller-provided spill region.
MOVQ AX, 232(SP)
MOVQ BX, 240(SP)
MOVQ CX, 248(SP)
MOVQ DI, 256(SP)
MOVQ SI, 264(SP)
MOVQ R8, 272(SP)
MOVQ R9, 280(SP)
MOVQ R10, 288(SP)
MOVQ R11, 296(SP)
; Dead(?) stores to the stack.
MOVUPS X15, 72(SP)
MOVUPS X15, 80(SP)
MOVUPS X15, 96(SP)
MOVUPS X15, 112(SP)
MOVUPS X15, 128(SP)
; Load rdx with the funcval, and r12 with funcval.pc
MOVQ 232(SP), DX
MOVQ (DX), R12
; Re-hydrate state. These loads are dead.
MOVQ 240(SP), BX
MOVQ 248(SP), CX
MOVQ 256(SP), DI
MOVQ 264(SP), SI
MOVQ 272(SP), R8
MOVQ 280(SP), R9
MOVQ 288(SP), R10
MOVQ 296(SP), R11
MOVQ DX, AX ; This register move is also dead.
CALL R12 ; Do the call.
; Spill the result of the caller-provided spill region.
MOVQ AX, 144(SP)
MOVQ BX, 152(SP)
MOVQ CX, 160(SP)
MOVQ DI, 168(SP)
MOVQ SI, 176(SP)
MOVQ R8, 184(SP)
MOVQ R9, 192(SP)
MOVQ R10, 200(SP)
MOVQ R11, 208(SP)
; Copy the above into our stack frame.
; Stores are dead.
MOVQ 144(SP), R12
MOVQ R12, 72(SP)
MOVUPS 152(SP), X0
MOVUPS X0, 80(SP)
MOVUPS 168(SP), X0
MOVUPS X0, 96(SP)
MOVUPS 184(SP), X0
MOVUPS X0, 112(SP)
MOVUPS 200(SP), X0
MOVUPS X0, 128(SP)
; Re-hydrate the return value. These loads are dead.
MOVQ 72(SP), AX
MOVQ 80(SP), BX
MOVQ 88(SP), CX
MOVQ 96(SP), DI
MOVQ 104(SP), SI
MOVQ 112(SP), R8
MOVQ 120(SP), R9
MOVQ 128(SP), R10
MOVQ 136(SP), R11
; Epilogue.
ADDQ $216, SP
POPQ BP
RET
There are several problems with this code:
s
is spilled in two separate places, even though the ABI requires the caller to guarantee spill space
for arguments.state
's shape is pointer-free except for thex0 *funcval
, so this can't be aiding stack scanning.- Virtually all of the loads from the stack are dead: they are essentially
*x = y; y = *x
.
What did you expect to see?
I expected to see approximately the following code.
TEXT command-line-arguments.X(SB), NOSPLIT|ABIInternal, $224-72
MOVQ AX, DX
MOVQ (AX), R12
CALL R12
RET
Of course, this breaks symbolization of arguments/returns in backtraces (and, presumably debuggers). To my knowledge, there is no way to instruct gc to not perform such spills.