From 3607b526f0760823d6aa310aa69c3c7d98ee8596 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 03:25:13 +0000 Subject: [PATCH] Bump github.com/tetratelabs/wazero from 1.7.2 to 1.8.0 Bumps [github.com/tetratelabs/wazero](https://github.com/tetratelabs/wazero) from 1.7.2 to 1.8.0. - [Release notes](https://github.com/tetratelabs/wazero/releases) - [Commits](https://github.com/tetratelabs/wazero/compare/v1.7.2...v1.8.0) --- updated-dependencies: - dependency-name: github.com/tetratelabs/wazero dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 +- vendor/github.com/tetratelabs/wazero/Makefile | 35 +- .../github.com/tetratelabs/wazero/README.md | 2 +- .../github.com/tetratelabs/wazero/api/wasm.go | 10 +- .../github.com/tetratelabs/wazero/builder.go | 17 +- vendor/github.com/tetratelabs/wazero/cache.go | 7 + .../github.com/tetratelabs/wazero/config.go | 34 +- .../wazero/experimental/checkpoint.go | 13 - .../wazero/experimental/importresolver.go | 19 + .../wazero/experimental/listener.go | 6 - .../experimental/sys/syscall_errno_windows.go | 6 +- .../imports/wasi_snapshot_preview1/fs.go | 21 +- .../imports/wasi_snapshot_preview1/poll.go | 4 +- .../wazero/internal/descriptor/table.go | 9 +- .../internal/engine/interpreter/compiler.go | 78 ++- .../engine/interpreter/interpreter.go | 29 +- .../engine/wazevo/backend/compiler.go | 48 +- .../engine/wazevo/backend/compiler_lower.go | 38 +- .../wazevo/backend/executable_context.go | 219 ------- .../wazevo/backend/isa/amd64/abi_go_call.go | 11 +- .../engine/wazevo/backend/isa/amd64/instr.go | 33 +- .../wazevo/backend/isa/amd64/lower_mem.go | 6 +- .../wazevo/backend/isa/amd64/machine.go | 372 +++++++---- .../isa/amd64/machine_pro_epi_logue.go | 17 +- .../backend/isa/amd64/machine_regalloc.go | 251 +++++++- .../wazevo/backend/isa/amd64/machine_vec.go | 2 +- .../wazevo/backend/isa/amd64/operands.go | 56 +- .../wazevo/backend/isa/amd64/reflect.go | 11 - .../backend/isa/amd64/reflect_tinygo.go | 11 - .../engine/wazevo/backend/isa/amd64/stack.go | 10 +- .../engine/wazevo/backend/isa/arm64/abi.go | 39 +- .../backend/isa/arm64/abi_entry_preamble.go | 29 +- .../wazevo/backend/isa/arm64/abi_go_call.go | 134 ++-- .../engine/wazevo/backend/isa/arm64/instr.go | 465 +++++++------- .../backend/isa/arm64/instr_encoding.go | 138 ++-- .../backend/isa/arm64/lower_constant.go | 6 +- .../wazevo/backend/isa/arm64/lower_instr.go | 409 ++++++------ .../backend/isa/arm64/lower_instr_operands.go | 56 +- .../wazevo/backend/isa/arm64/lower_mem.go | 59 +- .../wazevo/backend/isa/arm64/machine.go | 282 ++++++--- .../isa/arm64/machine_pro_epi_logue.go | 76 ++- .../backend/isa/arm64/machine_regalloc.go | 251 +++++++- .../wazevo/backend/isa/arm64/unwind_stack.go | 12 +- .../internal/engine/wazevo/backend/machine.go | 19 +- .../engine/wazevo/backend/regalloc.go | 319 ---------- .../engine/wazevo/backend/regalloc/api.go | 84 ++- .../wazevo/backend/regalloc/regalloc.go | 593 +++++++++--------- .../engine/wazevo/backend/regalloc/regset.go | 60 +- .../internal/engine/wazevo/backend/vdef.go | 30 +- .../internal/engine/wazevo/call_engine.go | 26 +- .../internal/engine/wazevo/engine_cache.go | 7 + .../engine/wazevo/frontend/frontend.go | 35 +- .../internal/engine/wazevo/frontend/lower.go | 29 +- .../engine/wazevo/frontend/sort_id.go | 2 - .../engine/wazevo/frontend/sort_id_old.go | 17 - .../internal/engine/wazevo/hostmodule.go | 8 +- .../internal/engine/wazevo/module_engine.go | 31 +- .../wazero/internal/engine/wazevo/reflect.go | 11 - .../internal/engine/wazevo/reflect_tinygo.go | 11 - .../internal/engine/wazevo/ssa/basic_block.go | 106 ++-- .../engine/wazevo/ssa/basic_block_sort.go | 2 - .../engine/wazevo/ssa/basic_block_sort_old.go | 24 - .../internal/engine/wazevo/ssa/builder.go | 263 +++++--- .../engine/wazevo/ssa/instructions.go | 47 +- .../wazero/internal/engine/wazevo/ssa/pass.go | 136 ++-- .../engine/wazevo/ssa/pass_blk_layouts.go | 35 +- .../internal/engine/wazevo/ssa/pass_cfg.go | 49 +- .../wazero/internal/engine/wazevo/ssa/type.go | 3 + .../wazero/internal/engine/wazevo/ssa/vs.go | 39 +- .../internal/engine/wazevo/wazevoapi/pool.go | 4 +- .../engine/wazevo/wazevoapi/resetmap.go | 4 +- .../internal/expctxkeys/importresolver.go | 6 + .../wazero/internal/platform/cpuid.go | 5 + .../wazero/internal/platform/cpuid_amd64.go | 36 +- .../internal/platform/cpuid_unsupported.go | 9 +- .../wazero/internal/platform/mmap_unix.go | 2 - .../internal/platform/mmap_unsupported.go | 2 - .../wazero/internal/platform/mmap_windows.go | 2 - .../wazero/internal/platform/mremap_other.go | 23 - .../wazero/internal/platform/mremap_unix.go | 21 - .../wazero/internal/platform/platform.go | 36 -- .../tetratelabs/wazero/internal/sysfs/file.go | 3 - .../wazero/internal/wasm/binary/value.go | 3 +- .../wazero/internal/wasm/engine.go | 3 + .../wazero/internal/wasm/func_validation.go | 16 +- .../wazero/internal/wasm/memory.go | 33 +- .../wazero/internal/wasm/module.go | 7 +- .../tetratelabs/wazero/internal/wasm/store.go | 31 +- .../wazero/internal/wasm/store_module_list.go | 4 +- .../wazero/internal/wasmdebug/dwarf.go | 1 - .../github.com/tetratelabs/wazero/runtime.go | 8 +- .../wazero/sys/stat_unsupported.go | 3 - vendor/modules.txt | 4 +- 94 files changed, 2741 insertions(+), 2848 deletions(-) create mode 100644 vendor/github.com/tetratelabs/wazero/experimental/importresolver.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go create mode 100644 vendor/github.com/tetratelabs/wazero/internal/expctxkeys/importresolver.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go delete mode 100644 vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go diff --git a/go.mod b/go.mod index 5cacb2e521..0fb1cb2b3b 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.9.0 - github.com/tetratelabs/wazero v1.7.2 + github.com/tetratelabs/wazero v1.8.0 github.com/things-go/go-socks5 v0.0.5 github.com/ulikunitz/xz v0.5.12 github.com/xlab/treeprint v1.2.0 diff --git a/go.sum b/go.sum index 4db757a329..1bb9176ade 100644 --- a/go.sum +++ b/go.sum @@ -387,8 +387,8 @@ github.com/tc-hib/winres v0.2.1 h1:YDE0FiP0VmtRaDn7+aaChp1KiF4owBiJa5l964l5ujA= github.com/tc-hib/winres v0.2.1/go.mod h1:C/JaNhH3KBvhNKVbvdlDWkbMDO9H4fKKDaN7/07SSuk= github.com/tcnksm/go-httpstat v0.2.0 h1:rP7T5e5U2HfmOBmZzGgGZjBQ5/GluWUylujl0tJ04I0= github.com/tcnksm/go-httpstat v0.2.0/go.mod h1:s3JVJFtQxtBEBC9dwcdTTXS9xFnM3SXAZwPG41aurT8= -github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc= -github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= +github.com/tetratelabs/wazero v1.8.0 h1:iEKu0d4c2Pd+QSRieYbnQC9yiFlMS9D+Jr0LsRmcF4g= +github.com/tetratelabs/wazero v1.8.0/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs= github.com/thedevsaddam/gojsonq/v2 v2.5.2 h1:CoMVaYyKFsVj6TjU6APqAhAvC07hTI6IQen8PHzHYY0= github.com/thedevsaddam/gojsonq/v2 v2.5.2/go.mod h1:bv6Xa7kWy82uT0LnXPE2SzGqTj33TAEeR560MdJkiXs= github.com/things-go/go-socks5 v0.0.5 h1:qvKaGcBkfDrUL33SchHN93srAmYGzb4CxSM2DPYufe8= diff --git a/vendor/github.com/tetratelabs/wazero/Makefile b/vendor/github.com/tetratelabs/wazero/Makefile index e5ae8a2619..17e264e01c 100644 --- a/vendor/github.com/tetratelabs/wazero/Makefile +++ b/vendor/github.com/tetratelabs/wazero/Makefile @@ -1,7 +1,7 @@ -gofumpt := mvdan.cc/gofumpt@v0.5.0 +gofumpt := mvdan.cc/gofumpt@v0.6.0 gosimports := github.com/rinchsan/gosimports/cmd/gosimports@v0.3.8 -golangci_lint := github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2 +golangci_lint := github.com/golangci/golangci-lint/cmd/golangci-lint@v1.60.0 asmfmt := github.com/klauspost/asmfmt/cmd/asmfmt@v1.3.2 # sync this with netlify.toml! hugo := github.com/gohugoio/hugo@v0.115.2 @@ -20,22 +20,6 @@ main_packages := $(sort $(foreach f,$(dir $(main_sources)),$(if $(findstring ./, go_test_options ?= -timeout 300s -ensureCompilerFastest := -ldflags '-X github.com/tetratelabs/wazero/internal/integration_test/vs.ensureCompilerFastest=true' -.PHONY: bench -bench: - @go build ./internal/integration_test/bench/... - @# Don't use -test.benchmem as it isn't accurate when comparing against CGO libs - @for d in vs/time vs/wasmedge vs/wasmtime ; do \ - cd ./internal/integration_test/$$d ; \ - go test -bench=. . -tags='wasmedge' $(ensureCompilerFastest) ; \ - cd - ;\ - done - -bench_testdata_dir := internal/integration_test/bench/testdata -.PHONY: build.bench -build.bench: - @tinygo build -o $(bench_testdata_dir)/case.wasm -scheduler=none --no-debug -target=wasi $(bench_testdata_dir)/case.go - .PHONY: test.examples test.examples: @go test $(go_test_options) ./examples/... ./imports/assemblyscript/example/... ./imports/emscripten/... ./imports/wasi_snapshot_preview1/example/... @@ -183,7 +167,7 @@ build.spectest.threads: .PHONY: test test: - @go test $(go_test_options) $$(go list ./... | grep -vE '$(spectest_v1_dir)|$(spectest_v2_dir)') + @go test $(go_test_options) ./... @cd internal/version/testdata && go test $(go_test_options) ./... @cd internal/integration_test/fuzz/wazerolib && CGO_ENABLED=0 WASM_BINARY_PATH=testdata/test.wasm go test ./... @@ -194,17 +178,6 @@ coverage: ## Generate test coverage @go test -coverprofile=coverage.txt -covermode=atomic --coverpkg=$(coverpkg) $(main_packages) @go tool cover -func coverage.txt -.PHONY: spectest -spectest: - @$(MAKE) spectest.v1 - @$(MAKE) spectest.v2 - -spectest.v1: - @go test $(go_test_options) $$(go list ./... | grep $(spectest_v1_dir)) - -spectest.v2: - @go test $(go_test_options) $$(go list ./... | grep $(spectest_v2_dir)) - golangci_lint_path := $(shell go env GOPATH)/bin/golangci-lint $(golangci_lint_path): @@ -214,7 +187,7 @@ golangci_lint_goarch ?= $(shell go env GOARCH) .PHONY: lint lint: $(golangci_lint_path) - @GOARCH=$(golangci_lint_goarch) CGO_ENABLED=0 $(golangci_lint_path) run --timeout 5m + @GOARCH=$(golangci_lint_goarch) CGO_ENABLED=0 $(golangci_lint_path) run --timeout 5m -E testableexamples .PHONY: format format: diff --git a/vendor/github.com/tetratelabs/wazero/README.md b/vendor/github.com/tetratelabs/wazero/README.md index 657da29594..f020be99a7 100644 --- a/vendor/github.com/tetratelabs/wazero/README.md +++ b/vendor/github.com/tetratelabs/wazero/README.md @@ -1,6 +1,6 @@ # wazero: the zero dependency WebAssembly runtime for Go developers -[![WebAssembly Core Specification Test](https://github.com/tetratelabs/wazero/actions/workflows/spectest.yaml/badge.svg)](https://github.com/tetratelabs/wazero/actions/workflows/spectest.yaml) [![Go Reference](https://pkg.go.dev/badge/github.com/tetratelabs/wazero.svg)](https://pkg.go.dev/github.com/tetratelabs/wazero) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![Go Reference](https://pkg.go.dev/badge/github.com/tetratelabs/wazero.svg)](https://pkg.go.dev/github.com/tetratelabs/wazero) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) WebAssembly is a way to safely run code compiled in other languages. Runtimes execute WebAssembly Modules (Wasm), which are most often binaries with a `.wasm` diff --git a/vendor/github.com/tetratelabs/wazero/api/wasm.go b/vendor/github.com/tetratelabs/wazero/api/wasm.go index c66b582fae..d99c1a7569 100644 --- a/vendor/github.com/tetratelabs/wazero/api/wasm.go +++ b/vendor/github.com/tetratelabs/wazero/api/wasm.go @@ -151,9 +151,13 @@ type Module interface { // ExportedFunction returns a function exported from this module or nil if it wasn't. // - // Note: The default wazero.ModuleConfig attempts to invoke `_start`, which - // in rare cases can close the module. When in doubt, check IsClosed prior - // to invoking a function export after instantiation. + // # Notes + // - The default wazero.ModuleConfig attempts to invoke `_start`, which + // in rare cases can close the module. When in doubt, check IsClosed prior + // to invoking a function export after instantiation. + // - The semantics of host functions assumes the existence of an "importing module" because, for example, the host function needs access to + // the memory of the importing module. Therefore, direct use of ExportedFunction is forbidden for host modules. + // Practically speaking, it is usually meaningless to directly call a host function from Go code as it is already somewhere in Go code. ExportedFunction(name string) Function // ExportedFunctionDefinitions returns all the exported function diff --git a/vendor/github.com/tetratelabs/wazero/builder.go b/vendor/github.com/tetratelabs/wazero/builder.go index f64afabdf0..b60a9e0978 100644 --- a/vendor/github.com/tetratelabs/wazero/builder.go +++ b/vendor/github.com/tetratelabs/wazero/builder.go @@ -179,6 +179,9 @@ type HostFunctionBuilder interface { // are deferred until Compile. // - Functions are indexed in order of calls to NewFunctionBuilder as // insertion ordering is needed by ABI such as Emscripten (invoke_*). +// - The semantics of host functions assumes the existence of an "importing module" because, for example, the host function needs access to +// the memory of the importing module. Therefore, direct use of ExportedFunction is forbidden for host modules. +// Practically speaking, it is usually meaningless to directly call a host function from Go code as it is already somewhere in Go code. type HostModuleBuilder interface { // Note: until golang/go#5860, we can't use example tests to embed code in interface godocs. @@ -341,12 +344,24 @@ func (b *hostModuleBuilder) Compile(ctx context.Context) (CompiledModule, error) return c, nil } +// hostModuleInstance is a wrapper around api.Module that prevents calling ExportedFunction. +type hostModuleInstance struct{ api.Module } + +// ExportedFunction implements api.Module ExportedFunction. +func (h hostModuleInstance) ExportedFunction(name string) api.Function { + panic("calling ExportedFunction is forbidden on host modules. See the note on ExportedFunction interface") +} + // Instantiate implements HostModuleBuilder.Instantiate func (b *hostModuleBuilder) Instantiate(ctx context.Context) (api.Module, error) { if compiled, err := b.Compile(ctx); err != nil { return nil, err } else { compiled.(*compiledModule).closeWithModule = true - return b.r.InstantiateModule(ctx, compiled, NewModuleConfig()) + m, err := b.r.InstantiateModule(ctx, compiled, NewModuleConfig()) + if err != nil { + return nil, err + } + return hostModuleInstance{m}, nil } } diff --git a/vendor/github.com/tetratelabs/wazero/cache.go b/vendor/github.com/tetratelabs/wazero/cache.go index 2d1b4e3b9c..83cdb94ef3 100644 --- a/vendor/github.com/tetratelabs/wazero/cache.go +++ b/vendor/github.com/tetratelabs/wazero/cache.go @@ -24,6 +24,13 @@ import ( // All implementations are in wazero. // - Instances of this can be reused across multiple runtimes, if configured // via RuntimeConfig. +// - The cache check happens before the compilation, so if multiple Goroutines are +// trying to compile the same module simultaneously, it is possible that they +// all compile the module. The design here is that the lock isn't held for the action "Compile" +// but only for checking and saving the compiled result. Therefore, we strongly recommend that the embedder +// does the centralized compilation in a single Goroutines (or multiple Goroutines per Wasm binary) to generate cache rather than +// trying to Compile in parallel for a single module. In other words, we always recommend to produce CompiledModule +// share it across multiple Goroutines to avoid trying to compile the same module simultaneously. type CompilationCache interface{ api.Closer } // NewCompilationCache returns a new CompilationCache to be passed to RuntimeConfig. diff --git a/vendor/github.com/tetratelabs/wazero/config.go b/vendor/github.com/tetratelabs/wazero/config.go index 819a76df5e..ea7b84f443 100644 --- a/vendor/github.com/tetratelabs/wazero/config.go +++ b/vendor/github.com/tetratelabs/wazero/config.go @@ -148,7 +148,7 @@ type RuntimeConfig interface { // customSections := c.CustomSections() WithCustomSections(bool) RuntimeConfig - // WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances: + // WithCloseOnContextDone ensures the executions of functions to be terminated under one of the following circumstances: // // - context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel) // - context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline) @@ -159,6 +159,8 @@ type RuntimeConfig interface { // entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated // machine codes against async Goroutine preemption" section in RATIONALE.md for detail. // + // Upon the termination of the function executions, api.Module is closed. + // // Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces // interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason, // this is disabled by default. @@ -217,9 +219,18 @@ const ( // part. wazero automatically performs ahead-of-time compilation as needed when // Runtime.CompileModule is invoked. // -// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not -// support compiler. Use NewRuntimeConfig to safely detect and fallback to -// NewRuntimeConfigInterpreter if needed. +// # Warning +// +// - This panics at runtime if the runtime.GOOS or runtime.GOARCH does not +// support compiler. Use NewRuntimeConfig to safely detect and fallback to +// NewRuntimeConfigInterpreter if needed. +// +// - If you are using wazero in buildmode=c-archive or c-shared, make sure that you set up the alternate signal stack +// by using, e.g. `sigaltstack` combined with `SA_ONSTACK` flag on `sigaction` on Linux, +// before calling any api.Function. This is because the Go runtime does not set up the alternate signal stack +// for c-archive or c-shared modes, and wazero uses the different stack than the calling Goroutine. +// Hence, the signal handler might get invoked on the wazero's stack, which may cause a stack overflow. +// https://github.com/tetratelabs/wazero/blob/2092c0a879f30d49d7b37f333f4547574b8afe0d/internal/integration_test/fuzz/fuzz/tests/sigstack.rs#L19-L36 func NewRuntimeConfigCompiler() RuntimeConfig { ret := engineLessConfig.clone() ret.engineKind = engineKindCompiler @@ -484,7 +495,20 @@ type ModuleConfig interface { WithFSConfig(FSConfig) ModuleConfig // WithName configures the module name. Defaults to what was decoded from - // the name section. Empty string ("") clears any name. + // the name section. Duplicate names are not allowed in a single Runtime. + // + // Calling this with the empty string "" makes the module anonymous. + // That is useful when you want to instantiate the same CompiledModule multiple times like below: + // + // for i := 0; i < N; i++ { + // // Instantiate a new Wasm module from the already compiled `compiledWasm` anonymously without a name. + // instance, err := r.InstantiateModule(ctx, compiledWasm, wazero.NewModuleConfig().WithName("")) + // // .... + // } + // + // See the `concurrent-instantiation` example for a complete usage. + // + // Non-empty named modules are available for other modules to import by name. WithName(string) ModuleConfig // WithStartFunctions configures the functions to call after the module is diff --git a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go index 443c5a294f..c75db615e6 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go @@ -21,13 +21,6 @@ type Snapshotter interface { Snapshot() Snapshot } -// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled. -// The context.Context passed to a exported function invocation should have this key set -// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey. -// -// Deprecated: use WithSnapshotter to enable snapshots. -type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey - // WithSnapshotter enables snapshots. // Passing the returned context to a exported function invocation enables snapshots, // and allows host functions to retrieve the Snapshotter using GetSnapshotter. @@ -35,12 +28,6 @@ func WithSnapshotter(ctx context.Context) context.Context { return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{}) } -// SnapshotterKey is a context key to access a Snapshotter from a host function. -// It is only present if EnableSnapshotter was set in the function invocation context. -// -// Deprecated: use GetSnapshotter to get the snapshotter. -type SnapshotterKey = expctxkeys.SnapshotterKey - // GetSnapshotter gets the Snapshotter from a host function. // It is only present if WithSnapshotter was called with the function invocation context. func GetSnapshotter(ctx context.Context) Snapshotter { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/importresolver.go b/vendor/github.com/tetratelabs/wazero/experimental/importresolver.go new file mode 100644 index 0000000000..36c0e22b15 --- /dev/null +++ b/vendor/github.com/tetratelabs/wazero/experimental/importresolver.go @@ -0,0 +1,19 @@ +package experimental + +import ( + "context" + + "github.com/tetratelabs/wazero/api" + "github.com/tetratelabs/wazero/internal/expctxkeys" +) + +// ImportResolver is an experimental func type that, if set, +// will be used as the first step in resolving imports. +// See issue 2294. +// If the import name is not found, it should return nil. +type ImportResolver func(name string) api.Module + +// WithImportResolver returns a new context with the given ImportResolver. +func WithImportResolver(ctx context.Context, resolver ImportResolver) context.Context { + return context.WithValue(ctx, expctxkeys.ImportResolverKey{}, resolver) +} diff --git a/vendor/github.com/tetratelabs/wazero/experimental/listener.go b/vendor/github.com/tetratelabs/wazero/experimental/listener.go index b2ba1fe834..55fc6b668e 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/listener.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go @@ -24,12 +24,6 @@ type StackIterator interface { ProgramCounter() ProgramCounter } -// FunctionListenerFactoryKey is a context.Context Value key. -// Its associated value should be a FunctionListenerFactory. -// -// Deprecated: use WithFunctionListenerFactory to enable snapshots. -type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey - // WithFunctionListenerFactory registers a FunctionListenerFactory // with the context. func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go index 761a1f9dc2..5ebc1780f4 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go @@ -23,6 +23,10 @@ const ( // instead of syscall.ENOTDIR _ERROR_DIRECTORY = syscall.Errno(0x10B) + // _ERROR_NOT_A_REPARSE_POINT is a Windows error returned by os.Readlink + // instead of syscall.EINVAL + _ERROR_NOT_A_REPARSE_POINT = syscall.Errno(0x1126) + // _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select // when a given handle is not a socket. _ERROR_INVALID_SOCKET = syscall.Errno(0x2736) @@ -51,7 +55,7 @@ func errorToErrno(err error) Errno { return EBADF case syscall.ERROR_PRIVILEGE_NOT_HELD: return EPERM - case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME: + case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME, _ERROR_NOT_A_REPARSE_POINT: return EINVAL } errno, _ := syscallToErrno(err) diff --git a/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go b/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go index 384036a275..150f75cc16 100644 --- a/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go +++ b/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go @@ -1596,6 +1596,10 @@ func pathOpenFn(_ context.Context, mod api.Module, params []uint64) experimental return errno } + if pathLen == 0 { + return experimentalsys.EINVAL + } + fileOpenFlags := openFlags(dirflags, oflags, fdflags, rights) isDir := fileOpenFlags&experimentalsys.O_DIRECTORY != 0 @@ -1704,7 +1708,6 @@ func openFlags(dirflags, oflags, fdflags uint16, rights uint32) (openFlags exper } if oflags&wasip1.O_DIRECTORY != 0 { openFlags |= experimentalsys.O_DIRECTORY - return // Early return for directories as the rest of flags doesn't make sense for it. } else if oflags&wasip1.O_EXCL != 0 { openFlags |= experimentalsys.O_EXCL } @@ -1951,25 +1954,19 @@ func pathSymlinkFn(_ context.Context, mod api.Module, params []uint64) experimen return experimentalsys.EFAULT } - newPathBuf, ok := mem.Read(newPath, newPathLen) - if !ok { - return experimentalsys.EFAULT + _, newPathName, errno := atPath(fsc, mod.Memory(), fd, newPath, newPathLen) + if errno != 0 { + return errno } return dir.FS.Symlink( // Do not join old path since it's only resolved when dereference the link created here. // And the dereference result depends on the opening directory's file descriptor at that point. - bufToStr(oldPathBuf), - path.Join(dir.Name, bufToStr(newPathBuf)), + unsafe.String(&oldPathBuf[0], int(oldPathLen)), + newPathName, ) } -// bufToStr converts the given byte slice as string unsafely. -func bufToStr(buf []byte) string { - // TODO: use unsafe.String after flooring Go 1.20. - return *(*string)(unsafe.Pointer(&buf)) -} - // pathUnlinkFile is the WASI function named PathUnlinkFileName which unlinks a // file. // diff --git a/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/poll.go b/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/poll.go index d09f30245b..4f96af2df3 100644 --- a/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/poll.go +++ b/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/poll.go @@ -68,9 +68,7 @@ func pollOneoffFn(_ context.Context, mod api.Module, params []uint64) sys.Errno } outBuf, ok := mem.Read(out, nsubscriptions*32) // zero-out all buffer before writing - for i := range outBuf { - outBuf[i] = 0 - } + clear(outBuf) if !ok { return sys.EFAULT diff --git a/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go b/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go index 542958bc7e..03761e6ec4 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go +++ b/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go @@ -154,11 +154,6 @@ func (t *Table[Key, Item]) Range(f func(Key, Item) bool) { // Reset clears the content of the table. func (t *Table[Key, Item]) Reset() { - for i := range t.masks { - t.masks[i] = 0 - } - var zero Item - for i := range t.items { - t.items[i] = zero - } + clear(t.masks) + clear(t.items) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go index 56dfac6206..4e20e4b2cb 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go @@ -26,11 +26,14 @@ const ( type ( controlFrame struct { frameID uint32 - // originalStackLen holds the number of values on the stack + // originalStackLenWithoutParam holds the number of values on the stack // when Start executing this control frame minus params for the block. originalStackLenWithoutParam int - blockType *wasm.FunctionType - kind controlFrameKind + // originalStackLenWithoutParamUint64 is almost the same as originalStackLenWithoutParam + // except that it holds the number of values on the stack in uint64. + originalStackLenWithoutParamUint64 int + blockType *wasm.FunctionType + kind controlFrameKind } controlFrames struct{ frames []controlFrame } ) @@ -157,9 +160,11 @@ type compiler struct { enabledFeatures api.CoreFeatures callFrameStackSizeInUint64 int stack []unsignedType - currentFrameID uint32 - controlFrames controlFrames - unreachableState struct { + // stackLenInUint64 is the length of the stack in uint64. + stackLenInUint64 int + currentFrameID uint32 + controlFrames controlFrames + unreachableState struct { on bool depth int } @@ -341,6 +346,7 @@ func (c *compiler) Next() (*compilationResult, error) { c.pc = 0 c.currentOpPC = 0 c.currentFrameID = 0 + c.stackLenInUint64 = 0 c.unreachableState.on, c.unreachableState.depth = false, 0 if err := c.compile(sig, code.Body, code.LocalTypes, code.BodyOffsetInCodeSection); err != nil { @@ -449,10 +455,11 @@ operatorSwitch: // Create a new frame -- entering this block. frame := controlFrame{ - frameID: c.nextFrameID(), - originalStackLenWithoutParam: len(c.stack) - len(bt.Params), - kind: controlFrameKindBlockWithoutContinuationLabel, - blockType: bt, + frameID: c.nextFrameID(), + originalStackLenWithoutParam: len(c.stack) - len(bt.Params), + originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, + kind: controlFrameKindBlockWithoutContinuationLabel, + blockType: bt, } c.controlFrames.push(frame) @@ -473,10 +480,11 @@ operatorSwitch: // Create a new frame -- entering loop. frame := controlFrame{ - frameID: c.nextFrameID(), - originalStackLenWithoutParam: len(c.stack) - len(bt.Params), - kind: controlFrameKindLoop, - blockType: bt, + frameID: c.nextFrameID(), + originalStackLenWithoutParam: len(c.stack) - len(bt.Params), + originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, + kind: controlFrameKindLoop, + blockType: bt, } c.controlFrames.push(frame) @@ -515,8 +523,9 @@ operatorSwitch: // Create a new frame -- entering if. frame := controlFrame{ - frameID: c.nextFrameID(), - originalStackLenWithoutParam: len(c.stack) - len(bt.Params), + frameID: c.nextFrameID(), + originalStackLenWithoutParam: len(c.stack) - len(bt.Params), + originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, // Note this will be set to controlFrameKindIfWithElse // when else opcode found later. kind: controlFrameKindIfWithoutElse, @@ -543,7 +552,7 @@ operatorSwitch: // If it is currently in unreachable, and the non-nested if, // reset the stack so we can correctly handle the else block. top := c.controlFrames.top() - c.stack = c.stack[:top.originalStackLenWithoutParam] + c.stackSwitchAt(top) top.kind = controlFrameKindIfWithElse // Re-push the parameters to the if block so that else block can use them. @@ -572,7 +581,7 @@ operatorSwitch: // Reset the stack manipulated by the then block, and re-push the block param types to the stack. - c.stack = c.stack[:frame.originalStackLenWithoutParam] + c.stackSwitchAt(frame) for _, t := range frame.blockType.Params { c.stackPush(wasmValueTypeTounsignedType(t)) } @@ -601,7 +610,7 @@ operatorSwitch: return nil } - c.stack = c.stack[:frame.originalStackLenWithoutParam] + c.stackSwitchAt(frame) for _, t := range frame.blockType.Results { c.stackPush(wasmValueTypeTounsignedType(t)) } @@ -628,7 +637,7 @@ operatorSwitch: // We need to reset the stack so that // the values pushed inside the block. dropOp := newOperationDrop(c.getFrameDropRange(frame, true)) - c.stack = c.stack[:frame.originalStackLenWithoutParam] + c.stackSwitchAt(frame) // Push the result types onto the stack. for _, t := range frame.blockType.Results { @@ -3505,6 +3514,11 @@ func (c *compiler) stackPeek() (ret unsignedType) { return } +func (c *compiler) stackSwitchAt(frame *controlFrame) { + c.stack = c.stack[:frame.originalStackLenWithoutParam] + c.stackLenInUint64 = frame.originalStackLenWithoutParamUint64 +} + func (c *compiler) stackPop() (ret unsignedType) { // No need to check stack bound // as we can assume that all the operations @@ -3512,11 +3526,13 @@ func (c *compiler) stackPop() (ret unsignedType) { // at module validation phase. ret = c.stack[len(c.stack)-1] c.stack = c.stack[:len(c.stack)-1] + c.stackLenInUint64 -= 1 + int(unsignedTypeV128&ret>>2) return } func (c *compiler) stackPush(ts unsignedType) { c.stack = append(c.stack, ts) + c.stackLenInUint64 += 1 + int(unsignedTypeV128&ts>>2) } // emit adds the operations into the result. @@ -3565,7 +3581,7 @@ func (c *compiler) emitDefaultValue(t wasm.ValueType) { // of the n-th local. func (c *compiler) localDepth(index wasm.Index) int { height := c.localIndexToStackHeightInUint64[index] - return c.stackLenInUint64(len(c.stack)) - 1 - int(height) + return c.stackLenInUint64 - 1 - height } func (c *compiler) localType(index wasm.Index) (t wasm.ValueType) { @@ -3592,14 +3608,7 @@ func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveR } else { start = frame.blockType.ResultNumInUint64 } - var end int - if frame.kind == controlFrameKindFunction { - // On the function return, we eliminate all the contents on the stack - // including locals (existing below of frame.originalStackLen) - end = c.stackLenInUint64(len(c.stack)) - 1 - } else { - end = c.stackLenInUint64(len(c.stack)) - 1 - c.stackLenInUint64(frame.originalStackLenWithoutParam) - } + end := c.stackLenInUint64 - 1 - frame.originalStackLenWithoutParamUint64 if start <= end { return inclusiveRange{Start: int32(start), End: int32(end)} } else { @@ -3607,17 +3616,6 @@ func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveR } } -func (c *compiler) stackLenInUint64(ceil int) (ret int) { - for i := 0; i < ceil; i++ { - if c.stack[i] == unsignedTypeV128 { - ret += 2 - } else { - ret++ - } - } - return -} - func (c *compiler) readMemoryArg(tag string) (memoryArg, error) { c.result.UsesMemory = true alignment, num, err := leb128.LoadUint32(c.body[c.pc+1:]) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index a89ddc4573..ee0b453ca0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -98,6 +98,9 @@ func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) { // OwnsGlobals implements the same method as documented on wasm.ModuleEngine. func (e *moduleEngine) OwnsGlobals() bool { return false } +// MemoryGrown implements wasm.ModuleEngine. +func (e *moduleEngine) MemoryGrown() {} + // callEngine holds context per moduleEngine.Call, and shared across all the // function calls originating from the same moduleEngine.Call execution. // @@ -3898,14 +3901,9 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance case operationKindV128Dot: x2Hi, x2Lo := ce.popValue(), ce.popValue() x1Hi, x1Lo := ce.popValue(), ce.popValue() - ce.pushValue( - uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) | - (uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32), - ) - ce.pushValue( - uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) | - (uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32), - ) + lo, hi := v128Dot(x1Hi, x1Lo, x2Hi, x2Lo) + ce.pushValue(lo) + ce.pushValue(hi) frame.pc++ case operationKindV128ITruncSatFromF: hi, lo := ce.popValue(), ce.popValue() @@ -4581,3 +4579,18 @@ func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleIns ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen] } } + +// v128Dot performs a dot product of two 64-bit vectors. +// Note: for some reason (which I suspect is due to a bug in Go compiler's regalloc), +// inlining this function causes a bug which happens **only when** we run with -race AND arm64 AND Go 1.22. +func v128Dot(x1Hi, x1Lo, x2Hi, x2Lo uint64) (uint64, uint64) { + r1 := int32(int16(x1Lo>>0)) * int32(int16(x2Lo>>0)) + r2 := int32(int16(x1Lo>>16)) * int32(int16(x2Lo>>16)) + r3 := int32(int16(x1Lo>>32)) * int32(int16(x2Lo>>32)) + r4 := int32(int16(x1Lo>>48)) * int32(int16(x2Lo>>48)) + r5 := int32(int16(x1Hi>>0)) * int32(int16(x2Hi>>0)) + r6 := int32(int16(x1Hi>>16)) * int32(int16(x2Hi>>16)) + r7 := int32(int16(x1Hi>>32)) * int32(int16(x2Hi>>32)) + r8 := int32(int16(x1Hi>>48)) * int32(int16(x2Hi>>48)) + return uint64(uint32(r1+r2)) | (uint64(uint32(r3+r4)) << 32), uint64(uint32(r5+r6)) | (uint64(uint32(r7+r8)) << 32) +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go index 59bbfe02d2..62d3650152 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go @@ -69,7 +69,7 @@ type Compiler interface { AllocateVReg(typ ssa.Type) regalloc.VReg // ValueDefinition returns the definition of the given value. - ValueDefinition(ssa.Value) *SSAValueDefinition + ValueDefinition(ssa.Value) SSAValueDefinition // VRegOf returns the virtual register of the given ssa.Value. VRegOf(value ssa.Value) regalloc.VReg @@ -79,13 +79,13 @@ type Compiler interface { // MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID, // and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group. - MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool + MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool // MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode, // this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid. // // Note: caller should be careful to avoid excessive allocation on opcodes slice. - MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode + MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode // AddRelocationInfo appends the relocation information for the function reference at the current buffer offset. AddRelocationInfo(funcRef ssa.FuncRef) @@ -126,10 +126,7 @@ type compiler struct { nextVRegID regalloc.VRegID // ssaValueToVRegs maps ssa.ValueID to regalloc.VReg. ssaValueToVRegs [] /* VRegID to */ regalloc.VReg - // ssaValueDefinitions maps ssa.ValueID to its definition. - ssaValueDefinitions []SSAValueDefinition - // ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts(). - ssaValueRefCounts []int + ssaValuesInfo []ssa.ValueInfo // returnVRegs is the list of virtual registers that store the return values. returnVRegs []regalloc.VReg varEdges [][2]regalloc.VReg @@ -206,15 +203,10 @@ func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) { // assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder. func (c *compiler) assignVirtualRegisters() { builder := c.ssaBuilder - refCounts := builder.ValueRefCounts() - c.ssaValueRefCounts = refCounts + c.ssaValuesInfo = builder.ValuesInfo() - need := len(refCounts) - if need >= len(c.ssaValueToVRegs) { - c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...) - } - if need >= len(c.ssaValueDefinitions) { - c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...) + if diff := len(c.ssaValuesInfo) - len(c.ssaValueToVRegs); diff > 0 { + c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, diff+1)...) } for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() { @@ -225,40 +217,26 @@ func (c *compiler) assignVirtualRegisters() { typ := p.Type() vreg := c.AllocateVReg(typ) c.ssaValueToVRegs[pid] = vreg - c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg} c.ssaTypeOfVRegID[vreg.ID()] = p.Type() } // Assigns each value to a virtual register produced by instructions. for cur := blk.Root(); cur != nil; cur = cur.Next() { r, rs := cur.Returns() - var N int if r.Valid() { id := r.ID() ssaTyp := r.Type() typ := r.Type() vReg := c.AllocateVReg(typ) c.ssaValueToVRegs[id] = vReg - c.ssaValueDefinitions[id] = SSAValueDefinition{ - Instr: cur, - N: 0, - RefCount: refCounts[id], - } c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp - N++ } for _, r := range rs { id := r.ID() ssaTyp := r.Type() vReg := c.AllocateVReg(ssaTyp) c.ssaValueToVRegs[id] = vReg - c.ssaValueDefinitions[id] = SSAValueDefinition{ - Instr: cur, - N: N, - RefCount: refCounts[id], - } c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp - N++ } } } @@ -299,8 +277,12 @@ func (c *compiler) Init() { } // ValueDefinition implements Compiler.ValueDefinition. -func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition { - return &c.ssaValueDefinitions[value.ID()] +func (c *compiler) ValueDefinition(value ssa.Value) SSAValueDefinition { + return SSAValueDefinition{ + V: value, + Instr: c.ssaBuilder.InstructionOfValue(value), + RefCount: c.ssaValuesInfo[value.ID()].RefCount, + } } // VRegOf implements Compiler.VRegOf. @@ -319,7 +301,7 @@ func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type { } // MatchInstr implements Compiler.MatchInstr. -func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool { +func (c *compiler) MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool { instr := def.Instr return def.IsFromInstr() && instr.Opcode() == opcode && @@ -328,7 +310,7 @@ func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool { } // MatchInstrOneOf implements Compiler.MatchInstrOneOf. -func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode { +func (c *compiler) MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode { instr := def.Instr if !def.IsFromInstr() { return ssa.OpcodeInvalid diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go index 80e65668ad..735cfa3d35 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go @@ -9,7 +9,7 @@ import ( func (c *compiler) Lower() { c.assignVirtualRegisters() c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature())) - c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax()) + c.mach.StartLoweringFunction(c.ssaBuilder.BlockIDMax()) c.lowerBlocks() } @@ -20,12 +20,11 @@ func (c *compiler) lowerBlocks() { c.lowerBlock(blk) } - ectx := c.mach.ExecutableContext() // After lowering all blocks, we need to link adjacent blocks to layout one single instruction list. var prev ssa.BasicBlock for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() { if prev != nil { - ectx.LinkAdjacentBlocks(prev, next) + c.mach.LinkAdjacentBlocks(prev, next) } prev = next } @@ -33,8 +32,7 @@ func (c *compiler) lowerBlocks() { func (c *compiler) lowerBlock(blk ssa.BasicBlock) { mach := c.mach - ectx := mach.ExecutableContext() - ectx.StartBlock(blk) + mach.StartBlock(blk) // We traverse the instructions in reverse order because we might want to lower multiple // instructions together. @@ -76,7 +74,7 @@ func (c *compiler) lowerBlock(blk ssa.BasicBlock) { default: mach.LowerInstr(cur) } - ectx.FlushPendingInstructions() + mach.FlushPendingInstructions() } // Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg. @@ -84,7 +82,7 @@ func (c *compiler) lowerBlock(blk ssa.BasicBlock) { c.lowerFunctionArguments(blk) } - ectx.EndBlock() + mach.EndBlock() } // lowerBranches is called right after StartBlock and before any LowerInstr call if @@ -93,23 +91,24 @@ func (c *compiler) lowerBlock(blk ssa.BasicBlock) { // // See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock. func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) { - ectx := c.mach.ExecutableContext() + mach := c.mach c.setCurrentGroupID(br0.GroupID()) c.mach.LowerSingleBranch(br0) - ectx.FlushPendingInstructions() + mach.FlushPendingInstructions() if br1 != nil { c.setCurrentGroupID(br1.GroupID()) c.mach.LowerConditionalBranch(br1) - ectx.FlushPendingInstructions() + mach.FlushPendingInstructions() } if br0.Opcode() == ssa.OpcodeJump { - _, args, target := br0.BranchData() + _, args, targetBlockID := br0.BranchData() argExists := len(args) != 0 if argExists && br1 != nil { panic("BUG: critical edge split failed") } + target := c.ssaBuilder.BasicBlock(targetBlockID) if argExists && target.ReturnBlock() { if len(args) > 0 { c.mach.LowerReturns(args) @@ -118,24 +117,25 @@ func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) { c.lowerBlockArguments(args, target) } } - ectx.FlushPendingInstructions() + mach.FlushPendingInstructions() } func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) { - ectx := c.mach.ExecutableContext() + mach := c.mach c.tmpVals = c.tmpVals[:0] + data := c.ssaBuilder.ValuesInfo() for i := 0; i < entry.Params(); i++ { p := entry.Param(i) - if c.ssaValueRefCounts[p.ID()] > 0 { + if data[p.ID()].RefCount > 0 { c.tmpVals = append(c.tmpVals, p) } else { // If the argument is not used, we can just pass an invalid value. c.tmpVals = append(c.tmpVals, ssa.ValueInvalid) } } - c.mach.LowerParams(c.tmpVals) - ectx.FlushPendingInstructions() + mach.LowerParams(c.tmpVals) + mach.FlushPendingInstructions() } // lowerBlockArguments lowers how to pass arguments to the given successor block. @@ -152,12 +152,12 @@ func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) { src := args[i] dstReg := c.VRegOf(dst) - srcDef := c.ssaValueDefinitions[src.ID()] - if srcDef.IsFromInstr() && srcDef.Instr.Constant() { + srcInstr := c.ssaBuilder.InstructionOfValue(src) + if srcInstr != nil && srcInstr.Constant() { c.constEdges = append(c.constEdges, struct { cInst *ssa.Instruction dst regalloc.VReg - }{cInst: srcDef.Instr, dst: dstReg}) + }{cInst: srcInstr, dst: dstReg}) } else { srcReg := c.VRegOf(src) // Even when the src=dst, insert the move so that we can keep such registers keep-alive. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go deleted file mode 100644 index 81c6a6b62e..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go +++ /dev/null @@ -1,219 +0,0 @@ -package backend - -import ( - "fmt" - "math" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type ExecutableContext interface { - // StartLoweringFunction is called when the lowering of the given function is started. - // maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function. - StartLoweringFunction(maximumBlockID ssa.BasicBlockID) - - // LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list. - LinkAdjacentBlocks(prev, next ssa.BasicBlock) - - // StartBlock is called when the compilation of the given block is started. - // The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with - // ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd. - StartBlock(ssa.BasicBlock) - - // EndBlock is called when the compilation of the current block is finished. - EndBlock() - - // FlushPendingInstructions flushes the pending instructions to the buffer. - // This will be called after the lowering of each SSA Instruction. - FlushPendingInstructions() -} - -type ExecutableContextT[Instr any] struct { - CurrentSSABlk ssa.BasicBlock - - // InstrPool is the InstructionPool of instructions. - InstructionPool wazevoapi.Pool[Instr] - asNop func(*Instr) - setNext func(*Instr, *Instr) - setPrev func(*Instr, *Instr) - - // RootInstr is the root instruction of the executable. - RootInstr *Instr - labelPositionPool wazevoapi.Pool[LabelPosition[Instr]] - NextLabel Label - // LabelPositions maps a label to the instructions of the region which the label represents. - LabelPositions map[Label]*LabelPosition[Instr] - OrderedBlockLabels []*LabelPosition[Instr] - - // PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. - PerBlockHead, PerBlockEnd *Instr - // PendingInstructions are the instructions which are not yet emitted into the instruction list. - PendingInstructions []*Instr - - // SsaBlockIDToLabels maps an SSA block ID to the label. - SsaBlockIDToLabels []Label -} - -func NewExecutableContextT[Instr any]( - resetInstruction func(*Instr), - setNext func(*Instr, *Instr), - setPrev func(*Instr, *Instr), - asNop func(*Instr), -) *ExecutableContextT[Instr] { - return &ExecutableContextT[Instr]{ - InstructionPool: wazevoapi.NewPool[Instr](resetInstruction), - asNop: asNop, - setNext: setNext, - setPrev: setPrev, - labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]), - LabelPositions: make(map[Label]*LabelPosition[Instr]), - NextLabel: LabelInvalid, - } -} - -func resetLabelPosition[T any](l *LabelPosition[T]) { - *l = LabelPosition[T]{} -} - -// StartLoweringFunction implements ExecutableContext. -func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) { - imax := int(max) - if len(e.SsaBlockIDToLabels) <= imax { - // Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration. - e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...) - } -} - -func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) { - e.CurrentSSABlk = blk - - l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()] - if l == LabelInvalid { - l = e.AllocateLabel() - e.SsaBlockIDToLabels[blk.ID()] = l - } - - end := e.allocateNop0() - e.PerBlockHead, e.PerBlockEnd = end, end - - labelPos, ok := e.LabelPositions[l] - if !ok { - labelPos = e.AllocateLabelPosition(l) - e.LabelPositions[l] = labelPos - } - e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos) - labelPos.Begin, labelPos.End = end, end - labelPos.SB = blk -} - -// EndBlock implements ExecutableContext. -func (e *ExecutableContextT[T]) EndBlock() { - // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. - e.insertAtPerBlockHead(e.allocateNop0()) - - l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()] - e.LabelPositions[l].Begin = e.PerBlockHead - - if e.CurrentSSABlk.EntryBlock() { - e.RootInstr = e.PerBlockHead - } -} - -func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) { - if e.PerBlockHead == nil { - e.PerBlockHead = i - e.PerBlockEnd = i - return - } - e.setNext(i, e.PerBlockHead) - e.setPrev(e.PerBlockHead, i) - e.PerBlockHead = i -} - -// FlushPendingInstructions implements ExecutableContext. -func (e *ExecutableContextT[T]) FlushPendingInstructions() { - l := len(e.PendingInstructions) - if l == 0 { - return - } - for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. - e.insertAtPerBlockHead(e.PendingInstructions[i]) - } - e.PendingInstructions = e.PendingInstructions[:0] -} - -func (e *ExecutableContextT[T]) Reset() { - e.labelPositionPool.Reset() - e.InstructionPool.Reset() - for l := Label(0); l <= e.NextLabel; l++ { - delete(e.LabelPositions, l) - } - e.PendingInstructions = e.PendingInstructions[:0] - e.OrderedBlockLabels = e.OrderedBlockLabels[:0] - e.RootInstr = nil - e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0] - e.PerBlockHead, e.PerBlockEnd = nil, nil - e.NextLabel = LabelInvalid -} - -// AllocateLabel allocates an unused label. -func (e *ExecutableContextT[T]) AllocateLabel() Label { - e.NextLabel++ - return e.NextLabel -} - -func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] { - l := e.labelPositionPool.Allocate() - l.L = la - return l -} - -func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label { - if blk.ReturnBlock() { - return LabelReturn - } - l := e.SsaBlockIDToLabels[blk.ID()] - if l == LabelInvalid { - l = e.AllocateLabel() - e.SsaBlockIDToLabels[blk.ID()] = l - } - return l -} - -func (e *ExecutableContextT[T]) allocateNop0() *T { - i := e.InstructionPool.Allocate() - e.asNop(i) - return i -} - -// LinkAdjacentBlocks implements backend.Machine. -func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { - prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)] - nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)] - e.setNext(prevLabelPos.End, nextLabelPos.Begin) -} - -// LabelPosition represents the regions of the generated code which the label represents. -type LabelPosition[Instr any] struct { - SB ssa.BasicBlock - L Label - Begin, End *Instr - BinaryOffset int64 -} - -// Label represents a position in the generated code which is either -// a real instruction or the constant InstructionPool (e.g. jump tables). -// -// This is exactly the same as the traditional "label" in assembly code. -type Label uint32 - -const ( - LabelInvalid Label = 0 - LabelReturn Label = math.MaxUint32 -) - -// String implements backend.Machine. -func (l Label) String() string { - return fmt.Sprintf("L%d", l) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go index 751050aff0..96f035e582 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedVRegs = []regalloc.VReg{ // CompileGoFunctionTrampoline implements backend.Machine. func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { - ectx := m.ectx argBegin := 1 // Skips exec context by default. if needModuleContextPtr { argBegin++ @@ -25,7 +24,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * m.currentABI = abi cur := m.allocateNop() - ectx.RootInstr = cur + m.rootInstr = cur // Execution context is always the first argument. execCtrPtr := raxVReg @@ -272,7 +271,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur = m.revertRBPRSP(cur) linkInstr(cur, m.allocateInstr().asRet()) - m.encodeWithoutSSA(ectx.RootInstr) + m.encodeWithoutSSA(m.rootInstr) return m.c.Buf() } @@ -347,10 +346,8 @@ var stackGrowSaveVRegs = []regalloc.VReg{ // CompileStackGrowCallSequence implements backend.Machine. func (m *machine) CompileStackGrowCallSequence() []byte { - ectx := m.ectx - cur := m.allocateNop() - ectx.RootInstr = cur + m.rootInstr = cur cur = m.setupRBPRSP(cur) @@ -379,7 +376,7 @@ func (m *machine) CompileStackGrowCallSequence() []byte { cur = m.revertRBPRSP(cur) linkInstr(cur, m.allocateInstr().asRet()) - m.encodeWithoutSSA(ectx.RootInstr) + m.encodeWithoutSSA(m.rootInstr) return m.c.Buf() } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go index d27e79c0e5..6a3e58f51f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go @@ -17,16 +17,6 @@ type instruction struct { kind instructionKind } -// Next implements regalloc.Instr. -func (i *instruction) Next() regalloc.Instr { - return i.next -} - -// Prev implements regalloc.Instr. -func (i *instruction) Prev() regalloc.Instr { - return i.prev -} - // IsCall implements regalloc.Instr. func (i *instruction) IsCall() bool { return i.kind == call } @@ -36,9 +26,6 @@ func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect } // IsReturn implements regalloc.Instr. func (i *instruction) IsReturn() bool { return i.kind == ret } -// AddedBeforeRegAlloc implements regalloc.Instr. -func (i *instruction) AddedBeforeRegAlloc() bool { return i.addedBeforeRegAlloc } - // String implements regalloc.Instr. func (i *instruction) String() string { switch i.kind { @@ -651,26 +638,14 @@ func resetInstruction(i *instruction) { *i = instruction{} } -func setNext(i *instruction, next *instruction) { - i.next = next -} - -func setPrev(i *instruction, prev *instruction) { - i.prev = prev -} - -func asNop(i *instruction) { - i.kind = nop0 -} - -func (i *instruction) asNop0WithLabel(label backend.Label) *instruction { //nolint +func (i *instruction) asNop0WithLabel(label label) *instruction { //nolint i.kind = nop0 i.u1 = uint64(label) return i } -func (i *instruction) nop0Label() backend.Label { - return backend.Label(i.u1) +func (i *instruction) nop0Label() label { + return label(i.u1) } type instructionKind byte @@ -1161,7 +1136,7 @@ func (i *instruction) asJmp(target operand) *instruction { return i } -func (i *instruction) jmpLabel() backend.Label { +func (i *instruction) jmpLabel() label { switch i.kind { case jmp, jmpIf, lea, xmmUnaryRmR: return i.op1.label() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go index bee673d25c..befe8c6436 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go @@ -130,9 +130,9 @@ func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode { } } -func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend { - if x.IsFromBlockParam() { - return addend{x.BlkParamVReg, 0, 0} +func (m *machine) lowerAddend(x backend.SSAValueDefinition) addend { + if !x.IsFromInstr() { + return addend{m.c.VRegOf(x.V), 0, 0} } // Ensure the addend is not referenced in multiple places; we will discard nested Iadds. op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:]) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 310ad2203a..aeeb6b6454 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -16,18 +16,13 @@ import ( // NewBackend returns a new backend for arm64. func NewBackend() backend.Machine { - ectx := backend.NewExecutableContextT[instruction]( - resetInstruction, - setNext, - setPrev, - asNop, - ) - return &machine{ - ectx: ectx, + m := &machine{ cpuFeatures: platform.CpuFeatures, - regAlloc: regalloc.NewAllocator(regInfo), + regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), spillSlots: map[regalloc.VRegID]int64{}, amodePool: wazevoapi.NewPool[amode](nil), + labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), + instrPool: wazevoapi.NewPool[instruction](resetInstruction), constSwizzleMaskConstIndex: -1, constSqmulRoundSatIndex: -1, constI8x16SHLMaskTableIndex: -1, @@ -41,23 +36,46 @@ func NewBackend() backend.Machine { constExtAddPairwiseI16x8uMask1Index: -1, constExtAddPairwiseI16x8uMask2Index: -1, } + m.regAllocFn.m = m + return m } type ( // machine implements backend.Machine for amd64. machine struct { c backend.Compiler - ectx *backend.ExecutableContextT[instruction] stackBoundsCheckDisabled bool + instrPool wazevoapi.Pool[instruction] amodePool wazevoapi.Pool[amode] cpuFeatures platform.CpuFeatureFlags - regAlloc regalloc.Allocator - regAllocFn *backend.RegAllocFunction[*instruction, *machine] + regAlloc regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] + regAllocFn regAllocFn regAllocStarted bool + // labelPositionPool is the pool of labelPosition. The id is the label where + // if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. + labelPositionPool wazevoapi.IDedPool[labelPosition] + // nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID + // so that we can have an identical label for the SSA block ID, which is useful for debugging. + nextLabel label + // rootInstr is the first instruction of the function. + rootInstr *instruction + // currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. + currentLabelPos *labelPosition + // orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. + orderedSSABlockLabelPos []*labelPosition + // returnLabelPos is the labelPosition for the return block. + returnLabelPos labelPosition + // perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. + perBlockHead, perBlockEnd *instruction + // pendingInstructions are the instructions which are not yet emitted into the instruction list. + pendingInstructions []*instruction + // maxSSABlockID is the maximum ssa.BasicBlockID in the current function. + maxSSABlockID label + spillSlotSize int64 spillSlots map[regalloc.VRegID]int64 currentABI *backend.FunctionABI @@ -67,8 +85,11 @@ type ( labelResolutionPends []labelResolutionPend + // jmpTableTargets holds the labels of the jump table targets. jmpTableTargets [][]uint32 - consts []_const + // jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. + jmpTableTargetsNext int + consts []_const constSwizzleMaskConstIndex, constSqmulRoundSatIndex, constI8x16SHLMaskTableIndex, constI8x16LogicalSHRMaskTableIndex, @@ -79,9 +100,10 @@ type ( } _const struct { - lo, hi uint64 - _var []byte - label *labelPosition + lo, hi uint64 + _var []byte + label label + labelPos *labelPosition } labelResolutionPend struct { @@ -90,22 +112,73 @@ type ( // imm32Offset is the offset of the last 4 bytes of the instruction. imm32Offset int64 } +) - labelPosition = backend.LabelPosition[instruction] +type ( + // label represents a position in the generated code which is either + // a real instruction or the constant InstructionPool (e.g. jump tables). + // + // This is exactly the same as the traditional "label" in assembly code. + label uint32 + + // labelPosition represents the regions of the generated code which the label represents. + // This implements regalloc.Block. + labelPosition struct { + // sb is not nil if this corresponds to a ssa.BasicBlock. + sb ssa.BasicBlock + // cur is used to walk through the instructions in the block during the register allocation. + cur, + // begin and end are the first and last instructions of the block. + begin, end *instruction + // binaryOffset is the offset in the binary where the label is located. + binaryOffset int64 + } ) -func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) backend.Label { +// String implements backend.Machine. +func (l label) String() string { + return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { + *l = labelPosition{} +} + +const labelReturn = math.MaxUint32 + +func ssaBlockLabel(sb ssa.BasicBlock) label { + if sb.ReturnBlock() { + return labelReturn + } + return label(sb.ID()) +} + +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { + if sb.ReturnBlock() { + m.returnLabelPos.sb = sb + return &m.returnLabelPos + } + + l := ssaBlockLabel(sb) + pos := m.labelPositionPool.GetOrAllocate(int(l)) + pos.sb = sb + return pos +} + +func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) label { index := *i if index == -1 { - label := m.allocateLabel() + l, pos := m.allocateLabel() index = len(m.consts) m.consts = append(m.consts, _const{ - _var: _var, - label: label, + _var: _var, + label: l, + labelPos: pos, }) *i = index } - return m.consts[index].label.L + return m.consts[index].label } // Reset implements backend.Machine. @@ -120,18 +193,20 @@ func (m *machine) Reset() { } m.stackBoundsCheckDisabled = false - m.ectx.Reset() - - m.regAllocFn.Reset() m.regAlloc.Reset() + m.labelPositionPool.Reset() + m.instrPool.Reset() m.regAllocStarted = false m.clobberedRegs = m.clobberedRegs[:0] m.spillSlotSize = 0 m.maxRequiredStackSizeForCalls = 0 + m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil + m.pendingInstructions = m.pendingInstructions[:0] + m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] m.amodePool.Reset() - m.jmpTableTargets = m.jmpTableTargets[:0] + m.jmpTableTargetsNext = 0 m.constSwizzleMaskConstIndex = -1 m.constSqmulRoundSatIndex = -1 m.constI8x16SHLMaskTableIndex = -1 @@ -146,8 +221,63 @@ func (m *machine) Reset() { m.constExtAddPairwiseI16x8uMask2Index = -1 } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { return m.ectx } +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { + m.maxSSABlockID = label(maxBlockID) + m.nextLabel = label(maxBlockID) + 1 +} + +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { + prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) + prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { + m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) + labelPos := m.currentLabelPos + end := m.allocateNop() + m.perBlockHead, m.perBlockEnd = end, end + labelPos.begin, labelPos.end = end, end + m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { + // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. + m.insertAtPerBlockHead(m.allocateNop()) + + m.currentLabelPos.begin = m.perBlockHead + + if m.currentLabelPos.sb.EntryBlock() { + m.rootInstr = m.perBlockHead + } +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { + if m.perBlockHead == nil { + m.perBlockHead = i + m.perBlockEnd = i + return + } + + i.next = m.perBlockHead + m.perBlockHead.prev = i + m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { + l := len(m.pendingInstructions) + if l == 0 { + return + } + for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. + m.insertAtPerBlockHead(m.pendingInstructions[i]) + } + m.pendingInstructions = m.pendingInstructions[:0] +} // DisableStackCheck implements backend.Machine. func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } @@ -155,23 +285,17 @@ func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } // SetCompiler implements backend.Machine. func (m *machine) SetCompiler(c backend.Compiler) { m.c = c - m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, c.SSABuilder(), c) + m.regAllocFn.ssaB = c.SSABuilder() } // SetCurrentABI implements backend.Machine. -func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { - m.currentABI = abi -} +func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { m.currentABI = abi } // RegAlloc implements backend.Machine. func (m *machine) RegAlloc() { rf := m.regAllocFn - for _, pos := range m.ectx.OrderedBlockLabels { - rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) - } - m.regAllocStarted = true - m.regAlloc.DoAllocation(rf) + m.regAlloc.DoAllocation(&rf) // Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes. m.spillSlotSize = (m.spillSlotSize + 15) &^ 15 } @@ -184,49 +308,54 @@ func (m *machine) InsertReturn() { // LowerSingleBranch implements backend.Machine. func (m *machine) LowerSingleBranch(b *ssa.Instruction) { - ectx := m.ectx switch b.Opcode() { case ssa.OpcodeJump: - _, _, targetBlk := b.BranchData() + _, _, targetBlkID := b.BranchData() if b.IsFallthroughJump() { return } jmp := m.allocateInstr() - target := ectx.GetOrAllocateSSABlockLabel(targetBlk) - if target == backend.LabelReturn { + target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) + if target == labelReturn { jmp.asRet() } else { jmp.asJmp(newOperandLabel(target)) } m.insert(jmp) case ssa.OpcodeBrTable: - index, target := b.BrTableData() - m.lowerBrTable(index, target) + index, targetBlkIDs := b.BrTableData() + m.lowerBrTable(index, targetBlkIDs) default: panic("BUG: unexpected branch opcode" + b.Opcode().String()) } } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { - // TODO: reuse the slice! - labels := make([]uint32, len(targets)) - for j, target := range targets { - labels[j] = uint32(m.ectx.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { + if m.jmpTableTargetsNext == len(m.jmpTableTargets) { + m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) + } + + index = m.jmpTableTargetsNext + m.jmpTableTargetsNext++ + m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] + for _, targetBlockID := range targets.View() { + target := m.c.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) + m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(ssaBlockLabel(target))) } - index = len(m.jmpTableTargets) - m.jmpTableTargets = append(m.jmpTableTargets, labels) return } var condBranchMatches = [...]ssa.Opcode{ssa.OpcodeIcmp, ssa.OpcodeFcmp} -func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) { +func (m *machine) lowerBrTable(index ssa.Value, targets ssa.Values) { _v := m.getOperand_Reg(m.c.ValueDefinition(index)) v := m.copyToTmp(_v.reg()) + targetCount := len(targets.View()) + // First, we need to do the bounds check. maxIndex := m.c.AllocateVReg(ssa.TypeI32) - m.lowerIconst(maxIndex, uint64(len(targets)-1), false) + m.lowerIconst(maxIndex, uint64(targetCount-1), false) cmp := m.allocateInstr().asCmpRmiR(true, newOperandReg(maxIndex), v, false) m.insert(cmp) @@ -255,23 +384,22 @@ func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) { jmpTable := m.allocateInstr() targetSliceIndex := m.addJmpTableTarget(targets) - jmpTable.asJmpTableSequence(targetSliceIndex, len(targets)) + jmpTable.asJmpTableSequence(targetSliceIndex, targetCount) m.insert(jmpTable) } // LowerConditionalBranch implements backend.Machine. func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { - exctx := m.ectx - cval, args, targetBlk := b.BranchData() + cval, args, targetBlkID := b.BranchData() if len(args) > 0 { panic(fmt.Sprintf( "conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", - exctx.CurrentSSABlk, - targetBlk, + m.currentLabelPos.sb, + targetBlkID, )) } - target := exctx.GetOrAllocateSSABlockLabel(targetBlk) + target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) cvalDef := m.c.ValueDefinition(cval) switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) { @@ -1272,9 +1400,9 @@ func (m *machine) lowerVconst(dst regalloc.VReg, lo, hi uint64) { } load := m.allocateInstr() - constLabel := m.allocateLabel() - m.consts = append(m.consts, _const{label: constLabel, lo: lo, hi: hi}) - load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(constLabel.L)), dst) + l, pos := m.allocateLabel() + m.consts = append(m.consts, _const{label: l, labelPos: pos, lo: lo, hi: hi}) + load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(l)), dst) m.insert(load) } @@ -1473,21 +1601,24 @@ func (m *machine) lowerExitIfTrueWithCode(execCtx regalloc.VReg, cond ssa.Value, jmpIf.asJmpIf(condFromSSAIntCmpCond(c).invert(), newOperandLabel(l)) } -func (m *machine) tryLowerBandToFlag(x, y *backend.SSAValueDefinition) (ok bool) { - var target *backend.SSAValueDefinition +func (m *machine) tryLowerBandToFlag(x, y backend.SSAValueDefinition) (ok bool) { + var target backend.SSAValueDefinition + var got bool if x.IsFromInstr() && x.Instr.Constant() && x.Instr.ConstantVal() == 0 { if m.c.MatchInstr(y, ssa.OpcodeBand) { target = y + got = true } } if y.IsFromInstr() && y.Instr.Constant() && y.Instr.ConstantVal() == 0 { if m.c.MatchInstr(x, ssa.OpcodeBand) { target = x + got = true } } - if target == nil { + if !got { return false } @@ -1522,7 +1653,7 @@ func (m *machine) allocateExitInstructions(execCtx, exitCodeReg regalloc.VReg) ( return } -func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel backend.Label) { +func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel label) { exitCodeReg := rbpVReg saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtx, exitCodeReg) @@ -1819,9 +1950,9 @@ func (m *machine) lowerCall(si *ssa.Instruction) { // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, stackSlotSize int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) { arg := &a.Args[argIndex] - if def != nil && def.IsFromInstr() { + if def.IsFromInstr() { // Constant instructions are inlined. if inst := def.Instr; inst.Constant() { m.insertLoadConstant(inst, reg) @@ -1904,23 +2035,20 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) { // Format implements backend.Machine. func (m *machine) Format() string { - ectx := m.ectx - begins := map[*instruction]backend.Label{} - for l, pos := range ectx.LabelPositions { - begins[pos.Begin] = l - } - - irBlocks := map[backend.Label]ssa.BasicBlockID{} - for i, l := range ectx.SsaBlockIDToLabels { - irBlocks[l] = ssa.BasicBlockID(i) + begins := map[*instruction]label{} + for l := label(0); l < m.nextLabel; l++ { + pos := m.labelPositionPool.Get(int(l)) + if pos != nil { + begins[pos.begin] = l + } } var lines []string - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { if l, ok := begins[cur]; ok { var labelStr string - if blkID, ok := irBlocks[l]; ok { - labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) + if l <= m.maxSSABlockID { + labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, l) } else { labelStr = fmt.Sprintf("%s:", l) } @@ -1933,9 +2061,9 @@ func (m *machine) Format() string { } for _, vc := range m.consts { if vc._var == nil { - lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label.L, vc.lo, vc.hi)) + lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label, vc.lo, vc.hi)) } else { - lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label.L, vc._var)) + lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label, vc._var)) } } return "\n" + strings.Join(lines, "\n") + "\n" @@ -1943,15 +2071,14 @@ func (m *machine) Format() string { func (m *machine) encodeWithoutSSA(root *instruction) { m.labelResolutionPends = m.labelResolutionPends[:0] - ectx := m.ectx - bufPtr := m.c.BufPtr() for cur := root; cur != nil; cur = cur.next { offset := int64(len(*bufPtr)) if cur.kind == nop0 { l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { - pos.BinaryOffset = offset + pos := m.labelPositionPool.Get(int(l)) + if pos != nil { + pos.binaryOffset = offset } } @@ -1968,7 +2095,7 @@ func (m *machine) encodeWithoutSSA(root *instruction) { switch p.instr.kind { case jmp, jmpIf, lea: target := p.instr.jmpLabel() - targetOffset := ectx.LabelPositions[target].BinaryOffset + targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset imm32Offset := p.imm32Offset jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction. binary.LittleEndian.PutUint32((*bufPtr)[imm32Offset:], uint32(jmpOffset)) @@ -1980,33 +2107,33 @@ func (m *machine) encodeWithoutSSA(root *instruction) { // Encode implements backend.Machine Encode. func (m *machine) Encode(ctx context.Context) (err error) { - ectx := m.ectx bufPtr := m.c.BufPtr() var fn string var fnIndex int - var labelToSSABlockID map[backend.Label]ssa.BasicBlockID + var labelPosToLabel map[*labelPosition]label if wazevoapi.PerfMapEnabled { fn = wazevoapi.GetCurrentFunctionName(ctx) - labelToSSABlockID = make(map[backend.Label]ssa.BasicBlockID) - for i, l := range ectx.SsaBlockIDToLabels { - labelToSSABlockID[l] = ssa.BasicBlockID(i) + labelPosToLabel = make(map[*labelPosition]label) + for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { + pos := m.labelPositionPool.Get(i) + labelPosToLabel[pos] = label(i) } fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) } m.labelResolutionPends = m.labelResolutionPends[:0] - for _, pos := range ectx.OrderedBlockLabels { + for _, pos := range m.orderedSSABlockLabelPos { offset := int64(len(*bufPtr)) - pos.BinaryOffset = offset - for cur := pos.Begin; cur != pos.End.next; cur = cur.next { + pos.binaryOffset = offset + for cur := pos.begin; cur != pos.end.next; cur = cur.next { offset := int64(len(*bufPtr)) switch cur.kind { case nop0: l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { - pos.BinaryOffset = offset + if pos := m.labelPositionPool.Get(int(l)); pos != nil { + pos.binaryOffset = offset } case sourceOffsetInfo: m.c.AddSourceOffsetInfo(offset, cur.sourceOffsetInfo()) @@ -2021,22 +2148,16 @@ func (m *machine) Encode(ctx context.Context) (err error) { } if wazevoapi.PerfMapEnabled { - l := pos.L - var labelStr string - if blkID, ok := labelToSSABlockID[l]; ok { - labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) - } else { - labelStr = l.String() - } + l := labelPosToLabel[pos] size := int64(len(*bufPtr)) - offset - wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) + wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, l)) } } for i := range m.consts { offset := int64(len(*bufPtr)) vc := &m.consts[i] - vc.label.BinaryOffset = offset + vc.labelPos.binaryOffset = offset if vc._var == nil { lo, hi := vc.lo, vc.hi m.c.Emit8Bytes(lo) @@ -2054,7 +2175,7 @@ func (m *machine) Encode(ctx context.Context) (err error) { switch p.instr.kind { case jmp, jmpIf, lea, xmmUnaryRmR: target := p.instr.jmpLabel() - targetOffset := ectx.LabelPositions[target].BinaryOffset + targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset imm32Offset := p.imm32Offset jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction. binary.LittleEndian.PutUint32(buf[imm32Offset:], uint32(jmpOffset)) @@ -2063,7 +2184,7 @@ func (m *machine) Encode(ctx context.Context) (err error) { // Each entry is the offset from the beginning of the jmpTableIsland instruction in 8 bytes. targets := m.jmpTableTargets[p.instr.u1] for i, l := range targets { - targetOffset := ectx.LabelPositions[backend.Label(l)].BinaryOffset + targetOffset := m.labelPositionPool.Get(int(l)).binaryOffset jmpOffset := targetOffset - tableBegin binary.LittleEndian.PutUint64(buf[tableBegin+int64(i)*8:], uint64(jmpOffset)) } @@ -2092,7 +2213,7 @@ func (m *machine) ResolveRelocations(refToBinaryOffset []int, binary []byte, rel // CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo. func (m *machine) CallTrampolineIslandInfo(_ int) (_, _ int, _ error) { return } -func (m *machine) lowerIcmpToFlag(xd, yd *backend.SSAValueDefinition, _64 bool) { +func (m *machine) lowerIcmpToFlag(xd, yd backend.SSAValueDefinition, _64 bool) { x := m.getOperand_Reg(xd) y := m.getOperand_Mem_Imm32_Reg(yd) cmp := m.allocateInstr().asCmpRmiR(true, y, x.reg(), _64) @@ -2135,7 +2256,7 @@ func (m *machine) lowerFcmpToFlags(instr *ssa.Instruction) (f1, f2 cond, and boo // allocateInstr allocates an instruction. func (m *machine) allocateInstr() *instruction { - instr := m.ectx.InstructionPool.Allocate() + instr := m.instrPool.Allocate() if !m.regAllocStarted { instr.addedBeforeRegAlloc = true } @@ -2149,25 +2270,22 @@ func (m *machine) allocateNop() *instruction { } func (m *machine) insert(i *instruction) { - ectx := m.ectx - ectx.PendingInstructions = append(ectx.PendingInstructions, i) + m.pendingInstructions = append(m.pendingInstructions, i) } -func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nolint - pos := m.allocateLabel() - l = pos.L +func (m *machine) allocateBrTarget() (nop *instruction, l label) { //nolint + l, pos := m.allocateLabel() nop = m.allocateInstr() nop.asNop0WithLabel(l) - pos.Begin, pos.End = nop, nop + pos.begin, pos.end = nop, nop return } -func (m *machine) allocateLabel() *labelPosition { - ectx := m.ectx - l := ectx.AllocateLabel() - pos := ectx.AllocateLabelPosition(l) - ectx.LabelPositions[l] = pos - return pos +func (m *machine) allocateLabel() (label, *labelPosition) { + l := m.nextLabel + pos := m.labelPositionPool.GetOrAllocate(int(l)) + m.nextLabel++ + return l, pos } func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { @@ -3181,22 +3299,22 @@ func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) { } } - xmaskLabel := m.allocateLabel() - m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xmaskLabel}) - ymaskLabel := m.allocateLabel() - m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: ymaskLabel}) + xl, xmaskPos := m.allocateLabel() + m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xl, labelPos: xmaskPos}) + yl, ymaskPos := m.allocateLabel() + m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: yl, labelPos: ymaskPos}) xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y)) tmpX, tmpY := m.copyToTmp(xx.reg()), m.copyToTmp(yy.reg()) // Apply mask to X. tmp := m.c.AllocateVReg(ssa.TypeV128) - loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xmaskLabel.L)), tmp) + loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xl)), tmp) m.insert(loadMaskLo) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpX)) // Apply mask to Y. - loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(ymaskLabel.L)), tmp) + loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(yl)), tmp) m.insert(loadMaskHi) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpY)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go index 8fa974c661..e53729860d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go @@ -12,7 +12,7 @@ func (m *machine) PostRegAlloc() { } func (m *machine) setupPrologue() { - cur := m.ectx.RootInstr + cur := m.rootInstr prevInitInst := cur.next // At this point, we have the stack layout as follows: @@ -130,14 +130,13 @@ func (m *machine) setupPrologue() { // 3. Inserts the dec/inc RSP instruction right before/after the call instruction. // 4. Lowering that is supposed to be done after regalloc. func (m *machine) postRegAlloc() { - ectx := m.ectx - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { switch k := cur.kind; k { case ret: m.setupEpilogueAfter(cur.prev) continue case fcvtToSintSequence, fcvtToUintSequence: - m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] if k == fcvtToSintSequence { m.lowerFcvtToSintSequenceAfterRegalloc(cur) } else { @@ -146,29 +145,29 @@ func (m *machine) postRegAlloc() { prev := cur.prev next := cur.next cur := prev - for _, instr := range m.ectx.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) continue case xmmCMov: - m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerXmmCmovAfterRegAlloc(cur) prev := cur.prev next := cur.next cur := prev - for _, instr := range m.ectx.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) continue case idivRemSequence: - m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerIDivRemSequenceAfterRegAlloc(cur) prev := cur.prev next := cur.next cur := prev - for _, instr := range m.ectx.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go index 0bb28ee9e7..de9dcc9444 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go @@ -1,13 +1,226 @@ package amd64 import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" ) -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +// regAllocFn implements regalloc.Function. +type regAllocFn struct { + ssaB ssa.Builder + m *machine + loopNestingForestRoots []ssa.BasicBlock + blockIter int +} + +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 + return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { + if f.blockIter < 0 { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter-- + return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = 0 + return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { + if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter++ + return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { + f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { + f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() + return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { + root := f.loopNestingForestRoots[i] + pos := f.m.getOrAllocateSSABlockLabelPosition(root) + return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { + sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { + sb := f.ssaB.Idom(blk.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { + f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { + f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { + childSB := pos.sb.LoopNestingForestChildren()[i] + return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { + succSB := pos.sb.Succ(i) + if succSB.ReturnBlock() { + return nil + } + return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { + predSB := pos.sb.Pred(i) + return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { + c := f.m.c + *regs = (*regs)[:0] + for i := 0; i < pos.sb.Params(); i++ { + v := c.VRegOf(pos.sb.Param(i)) + *regs = append(*regs, v) + } + return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { + return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { + ret := pos.begin + pos.cur = ret + return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { + for { + if pos.cur == pos.end { + return nil + } + instr := pos.cur.next + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { + pos.cur = pos.end + return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { + for { + if pos.cur == pos.begin { + return nil + } + instr := pos.cur.prev + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { + return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { + return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) { typ := src.RegType() if typ != dst.RegType() { panic("BUG: src and dst must have the same type") @@ -26,8 +239,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { linkInstr(cur, prevNext) } -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -61,8 +273,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft return linkInstr(cur, prevNext) } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -98,13 +309,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af return linkInstr(cur, prevNext) } -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { - m.clobberedRegs = append(m.clobberedRegs[:0], regs...) -} - -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) { if x1.RegType() == regalloc.RegTypeInt { prevNext := cur.next xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8) @@ -113,25 +318,24 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { } else { if tmp.Valid() { prevNext := cur.next - m.InsertMoveBefore(tmp, x1, prevNext) - m.InsertMoveBefore(x1, x2, prevNext) - m.InsertMoveBefore(x2, tmp, prevNext) + m.insertMoveBefore(tmp, x1, prevNext) + m.insertMoveBefore(x1, x2, prevNext) + m.insertMoveBefore(x2, tmp, prevNext) } else { prevNext := cur.next r2 := x2.RealReg() // Temporarily spill x1 to stack. - cur = m.InsertStoreRegisterAt(x1, cur, true).prev + cur = m.insertStoreRegisterAt(x1, cur, true).prev // Then move x2 to x1. cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1)) linkInstr(cur, prevNext) // Then reload the original value on x1 from stack to r2. - m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) + m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true) } } } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction { cur := end for cur.kind == nop0 { cur = cur.prev @@ -146,8 +350,3 @@ func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { return end } } - -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { - return m.ectx.SsaBlockIDToLabels[id] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go index 539a8b754b..8d514d8576 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go @@ -127,7 +127,7 @@ func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) { tmpX := m.copyToTmp(xx.reg()) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp)) + m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqw, newOperandReg(tmpX), tmp)) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX)) m.copyTo(tmpX, m.c.VRegOf(ret)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go index c6fcb86731..7879756833 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go @@ -59,7 +59,7 @@ func (o *operand) format(_64 bool) string { case operandKindImm32: return fmt.Sprintf("$%d", int32(o.imm32())) case operandKindLabel: - return backend.Label(o.imm32()).String() + return label(o.imm32()).String() default: panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind)) } @@ -85,22 +85,22 @@ func (o *operand) imm32() uint32 { return uint32(o.data) } -func (o *operand) label() backend.Label { +func (o *operand) label() label { switch o.kind { case operandKindLabel: - return backend.Label(o.data) + return label(o.data) case operandKindMem: mem := o.addressMode() if mem.kind() != amodeRipRel { panic("BUG: invalid label") } - return backend.Label(mem.imm32) + return label(mem.imm32) default: panic("BUG: invalid operand kind") } } -func newOperandLabel(label backend.Label) operand { +func newOperandLabel(label label) operand { return operand{kind: operandKindLabel, data: uint64(label)} } @@ -221,7 +221,7 @@ func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, s return ret } -func (m *machine) newAmodeRipRel(label backend.Label) *amode { +func (m *machine) newAmodeRipRel(label label) *amode { ret := m.amodePool.Allocate() *ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)} return ret @@ -246,18 +246,18 @@ func (a *amode) String() string { "%d(%s,%s,%d)", int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift) case amodeRipRel: - return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32)) + return fmt.Sprintf("%s(%%rip)", label(a.imm32)) default: panic("BUG: invalid amode kind") } } -func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) { - if def.IsFromBlockParam() { - return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Reg(def backend.SSAValueDefinition) (op operand) { + if !def.IsFromInstr() { + return newOperandReg(m.c.VRegOf(def.V)) } - if def.SSAValue().Type() == ssa.TypeV128 { + if def.V.Type() == ssa.TypeV128 { // SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment. return m.getOperand_Reg(def) } @@ -272,9 +272,9 @@ func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operan return m.getOperand_Reg(def) } -func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { - if def.IsFromBlockParam() { - return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { + if !def.IsFromInstr() { + return newOperandReg(m.c.VRegOf(def.V)) } if m.c.MatchInstr(def, ssa.OpcodeLoad) { @@ -287,9 +287,9 @@ func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op return m.getOperand_Imm32_Reg(def) } -func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { - if def.IsFromBlockParam() { - return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { + if !def.IsFromInstr() { + return newOperandReg(m.c.VRegOf(def.V)) } instr := def.Instr @@ -323,24 +323,14 @@ func asImm32(val uint64, allowSignExt bool) (uint32, bool) { return u32val, true } -func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) { +func (m *machine) getOperand_Reg(def backend.SSAValueDefinition) (op operand) { var v regalloc.VReg - if def.IsFromBlockParam() { - v = def.BlkParamVReg + if instr := def.Instr; instr != nil && instr.Constant() { + // We inline all the constant instructions so that we could reduce the register usage. + v = m.lowerConstant(instr) + instr.MarkLowered() } else { - instr := def.Instr - if instr.Constant() { - // We inline all the constant instructions so that we could reduce the register usage. - v = m.lowerConstant(instr) - instr.MarkLowered() - } else { - if n := def.N; n == 0 { - v = m.c.VRegOf(instr.Return()) - } else { - _, rs := instr.Returns() - v = m.c.VRegOf(rs[n-1]) - } - } + v = m.c.VRegOf(def.V) } return newOperandReg(v) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go deleted file mode 100644 index 5219837e35..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { - s.Len = int(limit) - s.Cap = int(limit) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go deleted file mode 100644 index df4cf46ec5..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { - s.Len = limit - s.Len = limit -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go index 05ba5f027e..ef823bdbdc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go @@ -9,12 +9,14 @@ import ( ) func stackView(rbp, top uintptr) []byte { + l := int(top - rbp) var stackBuf []byte { - // TODO: use unsafe.Slice after floor version is set to Go 1.20. + //nolint:staticcheck hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf)) hdr.Data = rbp - setSliceLimits(hdr, top-rbp) + hdr.Len = l + hdr.Cap = l } return stackBuf } @@ -72,9 +74,9 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { // | SizeInBytes | // +-----------------+ <---- stackPointerBeforeGoCall // (low address) - data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8) + data := unsafe.Add(unsafe.Pointer(stackPointerBeforeGoCall), 8) size := *stackPointerBeforeGoCall / 8 - return unsafe.Slice((*uint64)(data), int(size)) + return unsafe.Slice((*uint64)(data), size) } func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 6615471c6a..d1eaa7cd4f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) { bits := arg.Type.Bits() // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} load := m.allocateInstr() switch arg.Type { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(reg), amode, bits) + load.asULoad(reg, amode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - load.asFpuLoad(operandNR(reg), amode, bits) + load.asFpuLoad(reg, amode, bits) default: panic("BUG") } @@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) { // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} store := m.allocateInstr() store.asStore(operandNR(reg), amode, bits) m.insert(store) @@ -180,9 +182,9 @@ func (m *machine) LowerReturns(rets []ssa.Value) { // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, slotBegin int64) { arg := &a.Args[argIndex] - if def != nil && def.IsFromInstr() { + if def.IsFromInstr() { // Constant instructions are inlined. if inst := def.Instr; inst.Constant() { val := inst.Return() @@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i ldr := m.allocateInstr() switch r.Type { case ssa.TypeI32, ssa.TypeI64: - ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asULoad(reg, amode, r.Type.Bits()) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asFpuLoad(reg, amode, r.Type.Bits()) default: panic("BUG") } @@ -225,25 +227,24 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i } } -func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { - exct := m.executableContext - exct.PendingInstructions = exct.PendingInstructions[:0] +func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { + m.pendingInstructions = m.pendingInstructions[:0] mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) - for _, instr := range exct.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } return cur, mode } -func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { +func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode { if rn.RegType() != regalloc.RegTypeInt { panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) } - var amode addressMode + amode := m.amodePool.Allocate() if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} } else { var indexReg regalloc.VReg if allowTmpRegUse { @@ -253,7 +254,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg indexReg = m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(indexReg, offset) } - amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} } return amode } @@ -315,7 +316,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) + alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true) m.insert(alu) } else { m.lowerConstantI64(tmpRegVReg, diff) @@ -326,7 +327,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) + alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true) m.insert(alu) } } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go index 7a9cceb332..f8b5d97ac7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go @@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo } else { postIndexImm = 8 } - loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} + loadMode := m.amodePool.Allocate() + *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} instr := m.allocateInstr() switch typ { case ssa.TypeI32: - instr.asULoad(loadTargetReg, loadMode, 32) + instr.asULoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeI64: - instr.asULoad(loadTargetReg, loadMode, 64) + instr.asULoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeF32: - instr.asFpuLoad(loadTargetReg, loadMode, 32) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeF64: - instr.asFpuLoad(loadTargetReg, loadMode, 64) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeV128: - instr.asFpuLoad(loadTargetReg, loadMode, 128) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128) } cur = linkInstr(cur, instr) if isStackArg { - var storeMode addressMode + var storeMode *addressMode cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true) toStack := m.allocateInstr() toStack.asStore(loadTargetReg, storeMode, bits) @@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg } if isStackArg { - var loadMode addressMode + var loadMode *addressMode cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true) toReg := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - toReg.asULoad(storeTargetReg, loadMode, bits) + toReg.asULoad(storeTargetReg.reg(), loadMode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - toReg.asFpuLoad(storeTargetReg, loadMode, bits) + toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits) default: panic("TODO?") } cur = linkInstr(cur, toReg) } - mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} instr := m.allocateInstr() instr.asStore(storeTargetReg, mode, bits) cur = linkInstr(cur, instr) @@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction { instr := m.allocateInstr() - mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} if store { instr.asStore(operandNR(d), mode, 64) } else { - instr.asULoad(operandNR(d), mode, 64) + instr.asULoad(d, mode, 64) } return linkInstr(prev, instr) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 466b1f9609..06f8a4a053 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedRegistersSorted = []regalloc.VReg{ // CompileGoFunctionTrampoline implements backend.Machine. func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { - exct := m.executableContext argBegin := 1 // Skips exec context by default. if needModuleContextPtr { argBegin++ @@ -26,7 +25,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur := m.allocateInstr() cur.asNop0() - exct.RootInstr = cur + m.rootInstr = cur // Execution context is always the first argument. execCtrPtr := x0VReg @@ -87,7 +86,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // Module context is always the second argument. moduleCtrPtr := x1VReg store := m.allocateInstr() - amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} store.asStore(operandNR(moduleCtrPtr), amode, 64) cur = linkInstr(cur, store) } @@ -120,11 +120,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * } else { sizeInBits = 64 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindPostIndex, - rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8), - }, sizeInBits) + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)} + store.asStore(operandNR(v), amode, sizeInBits) cur = linkInstr(cur, store) } @@ -139,7 +137,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * frameSizeReg = xzrVReg sliceSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, -16, true) + _amode := addressModePreOrPostIndex(m, spVReg, -16, true) storeP := m.allocateInstr() storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode) cur = linkInstr(cur, storeP) @@ -165,8 +163,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true) ldr := m.allocateInstr() // And load the return address. - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */) + ldr.asULoad(lrVReg, amode, 64) cur = linkInstr(cur, ldr) originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. @@ -183,23 +181,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * r := &abi.Rets[i] if r.Kind == backend.ABIArgKindReg { loadIntoReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asULoad(r.Reg, mode, 32) case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asULoad(r.Reg, mode, 64) case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asFpuLoad(r.Reg, mode, 32) case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asFpuLoad(r.Reg, mode, 64) case ssa.TypeV128: mode.imm = 16 - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128) + loadIntoReg.asFpuLoad(r.Reg, mode, 128) default: panic("TODO") } @@ -208,28 +207,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // First we need to load the value to a temporary just like ^^. intTmp, floatTmp := x11VReg, v11VReg loadIntoTmpReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} var resultReg regalloc.VReg switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32) + loadIntoTmpReg.asULoad(intTmp, mode, 32) resultReg = intTmp case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64) + loadIntoTmpReg.asULoad(intTmp, mode, 64) resultReg = intTmp case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32) resultReg = floatTmp case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64) resultReg = floatTmp case ssa.TypeV128: mode.imm = 16 - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128) resultReg = floatTmp default: panic("TODO") @@ -243,7 +243,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * ret.asRet() linkInstr(cur, ret) - m.encode(m.executableContext.RootInstr) + m.encode(m.rootInstr) return m.compiler.Buf() } @@ -258,12 +258,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal case regalloc.RegTypeFloat: sizeInBits = 128 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + store.asStore(operandNR(v), mode, sizeInBits) store.prev = cur cur.next = store cur = store @@ -276,7 +277,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() for _, v := range regs { load := m.allocateInstr() - var as func(dst operand, amode addressMode, sizeInBits byte) + var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte) var sizeInBits byte switch v.RegType() { case regalloc.RegTypeInt: @@ -286,12 +287,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re as = load.asFpuLoad sizeInBits = 128 } - as(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + as(v, mode, sizeInBits) cur = linkInstr(cur, load) offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16. } @@ -299,20 +301,18 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re } func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction { - exct := m.executableContext - exct.PendingInstructions = exct.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerConstantI64(dst, v) - for _, instr := range exct.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } return cur } func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction { - exct := m.executableContext - exct.PendingInstructions = exct.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerConstantI32(dst, v) - for _, instr := range exct.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } return cur @@ -324,11 +324,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode // Set the exit status on the execution context. setExistStatus := m.allocateInstr() - setExistStatus.asStore(operandNR(constReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()} + setExistStatus.asStore(operandNR(constReg), mode, 32) cur = linkInstr(cur, setExistStatus) return cur } @@ -340,12 +338,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction { cur = linkInstr(cur, adr) storeReturnAddr := m.allocateInstr() - storeReturnAddr.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, storeReturnAddr) // Exit the execution. @@ -364,11 +363,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe cur = linkInstr(cur, movSp) strSp := m.allocateInstr() - strSp.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSp.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, strSp) return cur } @@ -376,27 +376,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) { load := m.allocateInstr() var result regalloc.VReg - mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} switch arg.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 32) + load.asULoad(intVReg, mode, 32) result = intVReg case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 64) + load.asULoad(intVReg, mode, 64) result = intVReg case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 32) + load.asFpuLoad(floatVReg, mode, 32) result = floatVReg case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 64) + load.asFpuLoad(floatVReg, mode, 64) result = floatVReg case ssa.TypeV128: mode.imm = 16 - load.asFpuLoad(operandNR(floatVReg), mode, 128) + load.asFpuLoad(floatVReg, mode, 128) result = floatVReg default: panic("TODO") @@ -408,7 +409,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction { store := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} var sizeInBits byte switch result.Type { case ssa.TypeI32, ssa.TypeF32: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 8aabc5997b..1f563428aa 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -3,10 +3,12 @@ package arm64 import ( "fmt" "math" + "unsafe" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" ) type ( @@ -22,9 +24,9 @@ type ( // TODO: optimize the layout later once the impl settles. instruction struct { prev, next *instruction - u1, u2, u3 uint64 - rd, rm, rn, ra operand - amode addressMode + u1, u2 uint64 + rd regalloc.VReg + rm, rn operand kind instructionKind addedBeforeRegAlloc bool } @@ -34,18 +36,6 @@ type ( instructionKind byte ) -func asNop0(i *instruction) { - i.kind = nop0 -} - -func setNext(i, next *instruction) { - i.next = next -} - -func setPrev(i, prev *instruction) { - i.prev = prev -} - // IsCall implements regalloc.Instr IsCall. func (i *instruction) IsCall() bool { return i.kind == call @@ -61,21 +51,6 @@ func (i *instruction) IsReturn() bool { return i.kind == ret } -// Next implements regalloc.Instr Next. -func (i *instruction) Next() regalloc.Instr { - return i.next -} - -// Prev implements regalloc.Instr Prev. -func (i *instruction) Prev() regalloc.Instr { - return i.prev -} - -// AddedBeforeRegAlloc implements regalloc.Instr AddedBeforeRegAlloc. -func (i *instruction) AddedBeforeRegAlloc() bool { - return i.addedBeforeRegAlloc -} - type defKind byte const ( @@ -174,7 +149,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { switch defKinds[i.kind] { case defKindNone: case defKindRD: - *regs = append(*regs, i.rd.nr()) + *regs = append(*regs, i.rd) case defKindCall: _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) for i := byte(0); i < retIntRealRegs; i++ { @@ -194,7 +169,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) { switch defKinds[i.kind] { case defKindNone: case defKindRD: - i.rd = i.rd.assignReg(reg) + i.rd = reg case defKindCall: panic("BUG: call instructions shouldn't be assigned") default: @@ -329,7 +304,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { if rm := i.rm.reg(); rm.Valid() { *regs = append(*regs, rm) } - if ra := i.ra.reg(); ra.Valid() { + if ra := regalloc.VReg(i.u2); ra.Valid() { *regs = append(*regs, ra) } case useKindRNRN1RM: @@ -341,18 +316,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { *regs = append(*regs, rm) } case useKindAMode: - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindRNAMode: *regs = append(*regs, i.rn.reg()) - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindCond: @@ -374,7 +351,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { case useKindRDRewrite: *regs = append(*regs, i.rn.reg()) *regs = append(*regs, i.rm.reg()) - *regs = append(*regs, i.rd.reg()) + *regs = append(*regs, i.rd) default: panic(fmt.Sprintf("useKind for %v not defined", i)) } @@ -408,8 +385,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if rd := i.rd.reg(); rd.Valid() { - i.rd = i.rd.assignReg(reg) + if rd := i.rd; rd.Valid() { + i.rd = reg } } case useKindRNRN1RM: @@ -435,32 +412,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if ra := i.ra.reg(); ra.Valid() { - i.ra = i.ra.assignReg(reg) + if ra := regalloc.VReg(i.u2); ra.Valid() { + i.u2 = uint64(reg) } } case useKindAMode: if index == 0 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } } case useKindRNAMode: if index == 0 { i.rn = i.rn.assignReg(reg) } else if index == 1 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } else { panic("BUG") } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } else { panic("BUG") } @@ -503,35 +484,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef { } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movZ - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movK - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movN - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -553,21 +534,21 @@ func (i *instruction) asRet() { i.kind = ret } -func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = storeP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = loadP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = store8 @@ -589,10 +570,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { i.kind = fpuStore128 } i.rn = src - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = sLoad8 @@ -604,10 +585,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { panic("BUG") } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = uLoad8 @@ -619,10 +600,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { i.kind = uLoad64 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 32: i.kind = fpuLoad32 @@ -632,10 +613,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) i.kind = fpuLoad128 } i.rd = dst - i.amode = amode + i.setAmode(amode) +} + +func (i *instruction) getAmode() *addressMode { + return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1)) } -func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { +func (i *instruction) setAmode(a *addressMode) { + i.u1 = uint64(uintptr(unsafe.Pointer(a))) +} + +func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) { // NOTE: currently only has support for no-offset loads, though it is suspicious that // we would need to support offset load (that is only available for post-index). i.kind = vecLoad1R @@ -646,32 +635,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { i.kind = cSet - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(c) if mask { i.u2 = 1 } } -func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = cSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = fpuCSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } @@ -691,7 +680,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar } func (i *instruction) brTableSequenceOffsetsResolved() { - i.u3 = 1 // indicate that the offsets are resolved, for debugging. + i.rm.data = 1 // indicate that the offsets are resolved, for debugging. } func (i *instruction) brLabel() label { @@ -701,7 +690,7 @@ func (i *instruction) brLabel() label { // brOffsetResolved is called when the target label is resolved. func (i *instruction) brOffsetResolve(offset int64) { i.u2 = uint64(offset) - i.u3 = 1 // indicate that the offset is resolved, for debugging. + i.rm.data = 1 // indicate that the offset is resolved, for debugging. } func (i *instruction) brOffset() int64 { @@ -714,7 +703,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) { i.u1 = c.asUint64() i.u2 = uint64(target) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -728,17 +717,17 @@ func (i *instruction) condBrLabel() label { // condBrOffsetResolve is called when the target label is resolved. func (i *instruction) condBrOffsetResolve(offset int64) { - i.rd.data = uint64(offset) - i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. + i.rn.data = uint64(offset) + i.rn.data2 = 1 // indicate that the offset is resolved, for debugging. } // condBrOffsetResolved returns true if condBrOffsetResolve is already called. func (i *instruction) condBrOffsetResolved() bool { - return i.rd.data2 == 1 + return i.rn.data2 == 1 } func (i *instruction) condBrOffset() int64 { - return int64(i.rd.data) + return int64(i.rn.data) } func (i *instruction) condBrCond() cond { @@ -746,33 +735,33 @@ func (i *instruction) condBrCond() cond { } func (i *instruction) condBr64bit() bool { - return i.u3 == 1 + return i.u2&(1<<32) != 0 } func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst32 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst64 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { i.kind = loadFpuConst128 i.u1 = lo i.u2 = hi - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { i.kind = fpuCmp i.rn, i.rm = rn, rm if is64bit { - i.u3 = 1 + i.u1 = 1 } } @@ -783,12 +772,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i i.u1 = uint64(c) i.u2 = uint64(flag) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR @@ -804,22 +793,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { +func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) { i.kind = aluRRRR i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra + i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra) if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } // asALUShift setups a shift based ALU instruction. -func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. @@ -831,17 +820,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { i.kind = aluRRBitmaskImm i.u1 = uint64(aluOp) - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u2 = imm if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } @@ -852,76 +841,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) { func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { i.kind = movFromFPSR - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { i.kind = bitRR - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(bitOp) if is64bit { i.u2 = 1 } } -func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { i.kind = fpuRRR i.u1 = uint64(op) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { +func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) { i.kind = fpuRR i.u1 = uint64(op) i.rd, i.rn = rd, rn if dst64bit { - i.u3 = 1 + i.u2 = 1 } } func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { i.kind = extend - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(fromBits) i.u2 = uint64(toBits) if signed { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asMove32(rd, rn regalloc.VReg) { i.kind = mov32 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { i.kind = mov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { i.kind = fpuMov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { i.kind = fpuMov128 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } -func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = movToVec i.rd = rd i.rn = rn i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { +func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) { if signed { i.kind = movFromVecSigned } else { @@ -932,48 +921,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecDup i.u1 = uint64(arr) i.rn, i.rd = rn, rd } -func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = vecDupElement i.u1 = uint64(arr) i.rn, i.rd = rn, rd i.u2 = uint64(index) } -func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { +func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) { i.kind = vecExtract i.u1 = uint64(arr) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(index) } -func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { +func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { i.kind = vecMovElement i.u1 = uint64(arr) - i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) + i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32 i.rn, i.rd = rn, rd } -func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecMisc i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecLanes i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecShiftImm i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd @@ -981,7 +970,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange return i } -func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { switch nregs { case 0, 1: i.kind = vecTbl @@ -1000,14 +989,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen i.u2 = uint64(arr) } -func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecPermute i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } -func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecRRR i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1017,7 +1006,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. // IMPORTANT: the destination register must be already defined before this instruction. -func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecRRRRewrite i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1033,8 +1022,8 @@ func (i *instruction) IsCopy() bool { // String implements fmt.Stringer. func (i *instruction) String() (str string) { - is64SizeBitToSize := func(u3 uint64) byte { - if u3 == 0 { + is64SizeBitToSize := func(v uint64) byte { + if v == 0 { return 32 } return 64 @@ -1049,46 +1038,46 @@ func (i *instruction) String() (str string) { str = "nop0" } case aluRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1 >> 32) str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size)) case aluRRImm12: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRBitmaskImm: - size := is64SizeBitToSize(i.u3) - rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) + size := is64SizeBitToSize(i.u1 >> 32) + rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size) if size == 32 { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) } else { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) } case aluRRImmShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %#x", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.shiftImm(), ) case aluRRRShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size), ) case aluRRRExtend: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), // Regardless of the source size, the register is formatted in 32-bit. i.rm.format(32), @@ -1097,57 +1086,57 @@ func (i *instruction) String() (str string) { size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s", bitOp(i.u1), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), ) case uLoad8: - str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad8: - str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad16: - str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad16: - str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad32: - str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case store8: - str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) + str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8)) case store16: - str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) + str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16)) case store32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32)) case store64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case storeP64: str = fmt.Sprintf("stp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case loadP64: str = fmt.Sprintf("ldp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case mov64: str = fmt.Sprintf("mov %s, %s", - formatVRegSized(i.rd.nr(), 64), + formatVRegSized(i.rd, 64), formatVRegSized(i.rn.nr(), 64)) case mov32: - str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32)) case movZ: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movN: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movK: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case extend: fromBits, toBits := byte(i.u1), byte(i.u2) var signedStr string - if i.u3 == 1 { + if i.u2>>32 == 1 { signedStr = "s" } else { signedStr = "u" @@ -1161,39 +1150,39 @@ func (i *instruction) String() (str string) { case 32: fromStr = "w" } - str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32)) case cSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("csel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), ) case cSet: if i.u2 != 0 { - str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } else { - str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } case cCmpImm: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", formatVRegSized(i.rn.nr(), size), i.rm.data, i.u2&0b1111, condFlag(i.u1)) case fpuMov64: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement8B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) case fpuMov128: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement16B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) case fpuMovFromVec: panic("TODO") case fpuRR: - dstSz := is64SizeBitToSize(i.u3) + dstSz := is64SizeBitToSize(i.u2) srcSz := dstSz op := fpuUniOp(i.u1) switch op { @@ -1203,38 +1192,38 @@ func (i *instruction) String() (str string) { srcSz = 64 } str = fmt.Sprintf("%s %s, %s", op.String(), - formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) + formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz)) case fpuRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuRRI: panic("TODO") case fpuRRRR: panic("TODO") case fpuCmp: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1) str = fmt.Sprintf("fcmp %s, %s", formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case fpuStore32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64)) case fpuLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case fpuStore64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case fpuLoad128: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64)) case fpuStore128: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64)) case loadFpuConst32: - str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) + str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1))) case loadFpuConst64: - str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) + str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1)) case loadFpuConst128: str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", - formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) + formatVRegSized(i.rd, 128), i.u1, i.u2) case fpuToInt: var op, src, dst string if signed := i.u1 == 1; signed { @@ -1242,15 +1231,15 @@ func (i *instruction) String() (str string) { } else { op = "fcvtzu" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) } else { src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegSized(i.rd.nr(), 64) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegSized(i.rd, 64) } else { - dst = formatVRegSized(i.rd.nr(), 32) + dst = formatVRegSized(i.rd, 32) } str = fmt.Sprintf("%s %s, %s", op, dst, src) @@ -1261,21 +1250,21 @@ func (i *instruction) String() (str string) { } else { op = "ucvtf" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegSized(i.rn.nr(), 64) } else { src = formatVRegSized(i.rn.nr(), 32) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegWidthVec(i.rd, vecArrangementD) } else { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) + dst = formatVRegWidthVec(i.rd, vecArrangementS) } str = fmt.Sprintf("%s %s, %s", op, dst, src) case fpuCSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("fcsel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), @@ -1291,7 +1280,7 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) + str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) case movFromVec, movFromVecSigned: var size byte var opcode string @@ -1315,23 +1304,23 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) + str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) case vecDup: str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64), ) case vecDupElement: arr := vecArrangement(i.u1) str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), ) case vecDupFromFpu: panic("TODO") case vecExtract: str = fmt.Sprintf("ext %s, %s, %s, #%d", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), uint32(i.u2), @@ -1340,15 +1329,15 @@ func (i *instruction) String() (str string) { panic("TODO") case vecMovElement: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), - formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)), + formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)), ) case vecMiscNarrow: panic("TODO") case vecRRR, vecRRRRewrite: str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), ) @@ -1356,12 +1345,12 @@ func (i *instruction) String() (str string) { vop := vecOp(i.u1) if vop == vecOpCmeq0 { str = fmt.Sprintf("cmeq %s, %s, #0", - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } else { str = fmt.Sprintf("%s %s, %s", vop, - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } case vecLanes: @@ -1379,24 +1368,24 @@ func (i *instruction) String() (str string) { } str = fmt.Sprintf("%s %s, %s", vecOp(i.u1), - formatVRegWidthVec(i.rd.nr(), destArr), + formatVRegWidthVec(i.rd, destArr), formatVRegVec(i.rn.nr(), arr, vecIndexNone)) case vecShiftImm: arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, #%d", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), i.rm.shiftImm()) case vecTbl: arr := vecArrangement(i.u2) str = fmt.Sprintf("tbl %s, { %s }, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case vecTbl2: arr := vecArrangement(i.u2) - rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() + rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr() rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) str = fmt.Sprintf("tbl %s, { %s, %s }, %s", formatVRegVec(rd, arr, vecIndexNone), @@ -1407,13 +1396,13 @@ func (i *instruction) String() (str string) { arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case movToFPSR: str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) case movFromFPSR: - str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) + str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64)) case call: str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) case callInd: @@ -1422,15 +1411,15 @@ func (i *instruction) String() (str string) { str = "ret" case br: target := label(i.u1) - if i.u3 != 0 { + if i.rm.data != 0 { str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) } else { str = fmt.Sprintf("b %s", target.String()) } case condBr: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) c := cond(i.u1) - target := label(i.u2) + target := label(i.u2 & 0xffffffff) switch c.kind() { case condKindRegisterZero: if !i.condBrOffsetResolved() { @@ -1456,7 +1445,7 @@ func (i *instruction) String() (str string) { } } case adr: - str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) + str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1)) case brTableSequence: targetIndex := i.u1 str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) @@ -1473,7 +1462,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicCas: m := "casal" size := byte(32) @@ -1485,7 +1474,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) case atomicLoad: m := "ldar" size := byte(32) @@ -1497,7 +1486,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicStore: m := "stlr" size := byte(32) @@ -1517,9 +1506,9 @@ func (i *instruction) String() (str string) { case emitSourceOffsetInfo: str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) case vecLoad1R: - str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) case loadConstBlockArg: - str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1) + str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1) default: panic(i.kind) } @@ -1528,26 +1517,26 @@ func (i *instruction) String() (str string) { func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { i.kind = adr - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(offset) } -func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicRmw - i.rd, i.rn, i.rm = rt, rn, rs + i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs) i.u1 = uint64(op) i.u2 = size } -func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicCas - i.rm, i.rn, i.rd = rt, rn, rs + i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs i.u2 = size } -func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) { +func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) { i.kind = atomicLoad - i.rn, i.rd = rn, rt + i.rn, i.rd = operandNR(rn), rt i.u2 = size } @@ -1755,12 +1744,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V i.kind = loadConstBlockArg i.u1 = v i.u2 = uint64(typ) - i.rd = operandNR(dst) + i.rd = dst return i } func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { - return i.u1, ssa.Type(i.u2), i.rd.nr() + return i.u1, ssa.Type(i.u2), i.rd } func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { @@ -1778,7 +1767,7 @@ func (i *instruction) asUDF() *instruction { return i } -func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { +func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) { i.kind = fpuToInt i.rn = rn i.rd = rd @@ -1789,11 +1778,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } -func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { +func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) { i.kind = intToFpu i.rn = rn i.rd = rd @@ -1804,7 +1793,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } @@ -1817,7 +1806,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { // aluOp determines the type of ALU operation. Instructions whose kind is one of // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend // would use this type. -type aluOp int +type aluOp uint32 func (a aluOp) String() string { switch a { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 227a964741..21be9b71e7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -12,7 +12,7 @@ import ( // Encode implements backend.Machine Encode. func (m *machine) Encode(ctx context.Context) error { m.resolveRelativeAddresses(ctx) - m.encode(m.executableContext.RootInstr) + m.encode(m.rootInstr) if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize { return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize) } @@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) { case callInd: c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode())) case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode())) case vecLoad1R: c.Emit4Bytes(encodeVecLoad1R( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1))) case condBr: @@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) { panic("BUG") } case movN: - c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movZ: - c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movK: - c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case mov32: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) case mov64: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() toIsSp := to == sp fromIsSp := from == sp c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) case loadP64, storeP64: rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] - amode := i.amode + amode := i.getAmode() rn := regNumberInEncoding[amode.rn.RealReg()] var pre bool switch amode.kind { @@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) case loadFpuConst32: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { encodeLoadFpuConst32(c, rd, i.u1) } case loadFpuConst64: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { - encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) + encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1) } case loadFpuConst128: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] lo, hi := i.u1, i.u2 if lo == 0 && hi == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) @@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) { case aluRRRR: c.Emit4Bytes(encodeAluRRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.ra.realReg()], - uint32(i.u3), + regNumberInEncoding[regalloc.VReg(i.u2).RealReg()], + uint32(i.u1>>32), )) case aluRRImmShift: c.Emit4Bytes(encodeAluRRImm( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), - uint32(i.u3), + uint32(i.u2>>32), )) case aluRRR: rn := i.rn.realReg() c.Emit4Bytes(encodeAluRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[rn], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2>>32 == 1, rn == sp, )) case aluRRRExtend: rm, exo, to := i.rm.er() c.Emit4Bytes(encodeAluRRRExtend( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[rm.RealReg()], exo, @@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) { r, amt, sop := i.rm.sr() c.Emit4Bytes(encodeAluRRRShift( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[r.RealReg()], uint32(amt), sop, - i.u3 == 1, + i.u2>>32 == 1, )) case aluRRBitmaskImm: c.Emit4Bytes(encodeAluBitmaskImmediate( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], i.u2, - i.u3 == 1, + i.u1>>32 == 1, )) case bitRR: c.Emit4Bytes(encodeBitRR( bitOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2)), ) @@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) { imm12, shift := i.rm.imm12() c.Emit4Bytes(encodeAluRRImm12( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], imm12, shift, - i.u3 == 1, + i.u2>>32 == 1, )) case fpuRRR: c.Emit4Bytes(encodeFpuRRR( fpuBinOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2 == 1, )) case fpuMov64, fpuMov128: // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var q uint32 if kind == fpuMov128 { @@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) case cSet: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] cf := condFlag(i.u1) if i.u2 == 1 { // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- @@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) } case extend: - c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) + c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()])) case fpuCmp: // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] var ftype uint32 - if i.u3 == 1 { + if i.u1 == 1 { ftype = 0b01 // double precision. } c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) @@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0) } case adr: - c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) + c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1))) case cSel: c.Emit4Bytes(encodeConditionalSelect( kind, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case fpuCSel: c.Emit4Bytes(encodeFpuCSel( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case movToVec: c.Emit4Bytes(encodeMoveToVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), )) case movFromVec, movFromVecSigned: c.Emit4Bytes(encodeMoveFromVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), @@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) { )) case vecDup: c.Emit4Bytes(encodeVecDup( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)))) case vecDupElement: c.Emit4Bytes(encodeVecDupElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2))) case vecExtract: c.Emit4Bytes(encodeVecExtract( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u1)), @@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) { case vecPermute: c.Emit4Bytes(encodeVecPermute( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u2)))) case vecMovElement: c.Emit4Bytes(encodeVecMovElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1), - uint32(i.u2), uint32(i.u3), + uint32(i.u2), uint32(i.u2>>32), )) case vecMisc: c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecLanes: c.Emit4Bytes(encodeVecLanes( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecShiftImm: c.Emit4Bytes(encodeVecShiftImm( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), vecArrangement(i.u2), @@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) { case vecTbl: c.Emit4Bytes(encodeVecTbl( 1, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) { case vecTbl2: c.Emit4Bytes(encodeVecTbl( 2, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) { case fpuRR: c.Emit4Bytes(encodeFloatDataOneSource( fpuUniOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], - i.u3 == 1, + i.u2 == 1, )) case vecRRR: if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { @@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) { case vecRRRRewrite: c.Emit4Bytes(encodeVecRRR( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2), )) case cCmpImm: // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en - sf := uint32(i.u3 & 0b1) + sf := uint32((i.u2 >> 32) & 0b1) nzcv := uint32(i.u2 & 0b1111) cond := uint32(condFlag(i.u1)) imm := uint32(i.rm.data & 0b11111) @@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) { sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, ) case movFromFPSR: - rt := regNumberInEncoding[i.rd.realReg()] + rt := regNumberInEncoding[i.rd.RealReg()] c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) case movToFPSR: rt := regNumberInEncoding[i.rn.realReg()] @@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(encodeAtomicRmw( atomicRmwOp(i.u1), regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), )) case atomicCas: c.Emit4Bytes(encodeAtomicCas( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rm.realReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), @@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) { case atomicLoad: c.Emit4Bytes(encodeAtomicLoadStore( regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], uint32(i.u2), 1, )) @@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en func encodeCnvBetweenFloatInt(i *instruction) uint32 { - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var opcode uint32 @@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b00 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b010 } else { @@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b11 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b000 @@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en // // "shift" must have been divided by 16 at this point. -func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { +func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) { ret = rd ret |= uint32(imm&0xffff) << 5 - ret |= (uint32(shift)) << 21 + ret |= (shift) << 21 ret |= 0b100101 << 23 ret |= opc << 29 - ret |= uint32(_64bit) << 31 + ret |= _64bit << 31 return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go index 698b382d46..6c6824fb0a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go @@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVZ(dst, v, uint64(shift), dst64) + instr.asMOVZ(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVK(dst, v, uint64(shift), dst64) + instr.asMOVK(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVN(dst, v, uint64(shift), dst64) + instr.asMOVN(dst, v, uint32(shift), dst64) m.insert(instr) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 2bb234e8c1..f9df356c0e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -17,19 +17,18 @@ import ( // LowerSingleBranch implements backend.Machine. func (m *machine) LowerSingleBranch(br *ssa.Instruction) { - ectx := m.executableContext switch br.Opcode() { case ssa.OpcodeJump: - _, _, targetBlk := br.BranchData() + _, _, targetBlkID := br.BranchData() if br.IsFallthroughJump() { return } b := m.allocateInstr() - target := ectx.GetOrAllocateSSABlockLabel(targetBlk) - if target == labelReturn { + targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) + if targetBlk.ReturnBlock() { b.asRet() } else { - b.asBr(target) + b.asBr(ssaBlockLabel(targetBlk)) } m.insert(b) case ssa.OpcodeBrTable: @@ -40,7 +39,8 @@ func (m *machine) LowerSingleBranch(br *ssa.Instruction) { } func (m *machine) lowerBrTable(i *ssa.Instruction) { - index, targets := i.BrTableData() + index, targetBlockIDs := i.BrTableData() + targetBlockCount := len(targetBlockIDs.View()) indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone) // Firstly, we have to do the bounds check of the index, and @@ -50,35 +50,35 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { // subs wzr, index, maxIndexReg // csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg. maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) - m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) + m.lowerConstantI32(maxIndexReg, int32(targetBlockCount-1)) subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false) + subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false) m.insert(subs) csel := m.allocateInstr() adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32) - csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false) + csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false) m.insert(csel) brSequence := m.allocateInstr() - tableIndex := m.addJmpTableTarget(targets) - brSequence.asBrTableSequence(adjustedIndex, tableIndex, len(targets)) + tableIndex := m.addJmpTableTarget(targetBlockIDs) + brSequence.asBrTableSequence(adjustedIndex, tableIndex, targetBlockCount) m.insert(brSequence) } // LowerConditionalBranch implements backend.Machine. func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { - exctx := m.executableContext - cval, args, targetBlk := b.BranchData() + cval, args, targetBlkID := b.BranchData() if len(args) > 0 { panic(fmt.Sprintf( "conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", - exctx.CurrentSSABlk, - targetBlk, + m.currentLabelPos.sb, + targetBlkID, )) } - target := exctx.GetOrAllocateSSABlockLabel(targetBlk) + targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) + target := ssaBlockLabel(targetBlk) cvalDef := m.compiler.ValueDefinition(cval) switch { @@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerSelectVec(rc, rn, rm, rd) } else { m.lowerSelect(c, x, y, instr.Return()) @@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) @@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) @@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFcvtFromUint: x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFdemote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false) m.insert(cnt) case ssa.OpcodeFpromote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true) m.insert(cnt) @@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv) case ssa.OpcodeSrem, ssa.OpcodeUrem: x, y, ctx := instr.Arg3() ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) + rd := m.compiler.VRegOf(instr.Return()) + m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) case ssa.OpcodeVconst: result := m.compiler.VRegOf(instr.Return()) lo, hi := instr.VconstData() @@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B) m.insert(ins) case ssa.OpcodeVbxor: @@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) // creg is overwritten by BSL, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() - mov.asFpuMov128(tmp.nr(), creg.nr()) + mov.asFpuMov128(tmp, creg.nr()) m.insert(mov) ins := m.allocateInstr() @@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { mov2 := m.allocateInstr() rd := m.compiler.VRegOf(instr.Return()) - mov2.asFpuMov128(rd, tmp.nr()) + mov2.asFpuMov128(rd, tmp) m.insert(mov2) case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue: x, lane := instr.ArgWithLane() @@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr = ssaLaneToArrangement(lane) } rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVcheckTrue(op, rm, rd, arr) case ssa.OpcodeVhighBits: x, lane := instr.ArgWithLane() rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) m.lowerVhighBits(rm, rd, arr) case ssa.OpcodeVIadd: @@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { panic("unsupported lane " + lane.String()) } - widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr) - widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr) - addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr) + widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr) + widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr) + addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr) m.insert(widenLo) m.insert(widenHi) m.insert(addp) @@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVIMul(rd, rn, rm, arr) case ssa.OpcodeVIabs: m.lowerVecMisc(vecOpAbs, instr) @@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVShift(op, rd, rn, rm, arr) case ssa.OpcodeVSqrt: m.lowerVecMisc(vecOpFsqrt, instr) @@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat) case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint: x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint) case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) var arr vecArrangement switch lane { @@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) loQxtn := m.allocateInstr() hiQxtn := m.allocateInstr() @@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.insert(hiQxtn) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmp.nr()) + mov.asFpuMov128(rd, tmp) m.insert(mov) case ssa.OpcodeFvpromoteLow: x, lane := instr.ArgWithLane() @@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeFvdemote: @@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeExtractlane: x, index, signed, lane := instr.ExtractlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) mov := m.allocateInstr() switch lane { @@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, index, lane := instr.InsertlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + rd := m.compiler.VRegOf(instr.Return()) + tmpReg := m.compiler.AllocateVReg(ssa.TypeV128) // Initially mov rn to tmp. mov1 := m.allocateInstr() - mov1.asFpuMov128(tmpReg.nr(), rn.nr()) + mov1.asFpuMov128(tmpReg, rn.nr()) m.insert(mov1) // movToVec and vecMovElement do not clear the remaining bits to zero, @@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { // Finally mov tmp to rd. mov3 := m.allocateInstr() - mov3.asFpuMov128(rd.nr(), tmpReg.nr()) + mov3.asFpuMov128(rd, tmpReg) m.insert(mov3) case ssa.OpcodeSwizzle: x, y, lane := instr.Arg2WithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, lane1, lane2 := instr.ShuffleData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerShuffle(rd, rn, rm, lane1, lane2) case ssa.OpcodeSplat: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) dup := m.allocateInstr() switch lane { @@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S)) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr())) + rd := m.compiler.VRegOf(instr.Return()) + m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr())) case ssa.OpcodeLoadSplat: ptr, offset, lane := instr.LoadSplatData() @@ -791,10 +791,10 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { default: panic("TODO: lowering " + op.String()) } - m.executableContext.FlushPendingInstructions() + m.FlushPendingInstructions() } -func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { +func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { // `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30. vReg, wReg := v29VReg, v30VReg @@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { m.insert(tbl2) } -func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { var modulo byte switch arr { case vecArrangement16B: @@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem if op != ssa.OpcodeVIshl { // Negate the amount to make this as right shift. neg := m.allocateInstr() - neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true) + neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true) m.insert(neg) } // Copy the shift amount into a vector register as sshl/ushl requires it to be there. dup := m.allocateInstr() - dup.asVecDup(vtmp, rtmp, arr) + dup.asVecDup(vtmp.nr(), rtmp, arr) m.insert(dup) if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { @@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem } } -func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) { tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) // Special case VallTrue for i64x2. @@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem // cset dst, eq ins := m.allocateInstr() - ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D) + ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D) m.insert(ins) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D) + addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D) m.insert(addp) fcmp := m.allocateInstr() @@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(fcmp) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, eq) + cset.asCSet(rd, false, eq) m.insert(cset) return @@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem ins := m.allocateInstr() if op == ssa.OpcodeVanyTrue { // umaxp v4?.16b, v2?.16b, v2?.16b - ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B) + ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B) } else { // uminv d4?, v2?.4s - ins.asVecLanes(vecOpUminv, tmp, rm, arr) + ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr) } m.insert(ins) @@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(movv) fc := m.allocateInstr() - fc.asCCmpImm(rd, uint64(0), al, 0, true) + fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true) m.insert(fc) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, ne) + cset.asCSet(rd, false, ne) m.insert(cset) } -func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) { r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) @@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v1[i] = 0xff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B) m.insert(sshr) // Load the bit mask into r0. @@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) // Lane-wise logical AND with the bit mask, meaning that we have @@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Below, we use the following notation: // wi := (1 << i) if vi<0, 0 otherwise. and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B) m.insert(and) // Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have // v0[i] = w(i+8) if i < 8, w(i-8) otherwise. ext := m.allocateInstr() - ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8)) + ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8)) m.insert(ext) // v = [w0, w8, ..., w7, w15] zip1 := m.allocateInstr() - zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B) + zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B) m.insert(zip1) // v.h[0] = w0 + ... + w15 addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) // Extract the v.h[0] as the result. @@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H) m.insert(sshr) // Load the bit mask into r0. @@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) movfv := m.allocateInstr() @@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffffffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S) m.insert(sshr) // Load the bit mask into r0. @@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S) m.insert(addv) movfv := m.allocateInstr() @@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Move the higher 64-bit int into r0. movv1 := m.allocateInstr() - movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false) + movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false) m.insert(movv1) // Move the sign bit into the least significant bit. lsr1 := m.allocateInstr() - lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true) + lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true) m.insert(lsr1) lsr2 := m.allocateInstr() - lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true) + lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true) m.insert(lsr2) // rd = (r0<<1) | rd lsl := m.allocateInstr() - lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false) + lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false) m.insert(lsl) default: panic("Unsupported " + arr.String()) @@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(op, rd, rn, arr) m.insert(ins) } @@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ins.asVecRRR(op, rd, rn, rm, arr) m.insert(ins) } -func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) { if arr != vecArrangement2D { mul := m.allocateInstr() mul.asVecRRR(vecOpMul, rd, rn, rm, arr) m.insert(mul) } else { - tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp1 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp3 := m.compiler.AllocateVReg(ssa.TypeV128) - tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmpRes := m.compiler.AllocateVReg(ssa.TypeV128) // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 rev64 := m.allocateInstr() @@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(rev64) mul := m.allocateInstr() - mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S) + mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S) m.insert(mul) xtn1 := m.allocateInstr() @@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(xtn1) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S) + addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S) m.insert(addp) xtn2 := m.allocateInstr() @@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { // In short, in UMLAL instruction, the result register is also one of the source register, and // the value on the result register is significant. shll := m.allocateInstr() - shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S) + shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S) m.insert(shll) umlal := m.allocateInstr() - umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S) + umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S) m.insert(umlal) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmpRes.nr()) + mov.asFpuMov128(rd, tmpRes) m.insert(mov) } } @@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { // BSL modifies the destination register, so we need to use a temporary register so that // the actual definition of the destination register happens *after* the BSL instruction. // That way, we can force the spill instruction to be inserted after the BSL instruction. - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) fcmgt := m.allocateInstr() if max { @@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { res := operandNR(m.compiler.VRegOf(instr.Return())) mov2 := m.allocateInstr() - mov2.asFpuMov128(res.nr(), tmp.nr()) + mov2.asFpuMov128(res.nr(), tmp) m.insert(mov2) } -func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { - div.asALU(aluOpSDiv, rd, rn, rm, _64bit) + div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit) } else { - div.asALU(aluOpUDiv, rd, rn, rm, _64bit) + div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit) } m.insert(div) @@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // rd = rn-rd*rm by MSUB instruction. msub := m.allocateInstr() - msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit) + msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit) m.insert(msub) } -func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { @@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1" minusOneCheck := m.allocateInstr() // Sets eq condition if rm == -1. - minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit) + minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit) m.insert(minusOneCheck) ccmp := m.allocateInstr() @@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c func (m *machine) lowerFcopysign(x, y, ret ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmpI, tmpF operand + var tmpI, tmpF regalloc.VReg _64 := x.Type() == ssa.TypeF64 if _64 { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF64) + tmpI = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF32) + tmpI = m.compiler.AllocateVReg(ssa.TypeI32) } rd := m.compiler.VRegOf(ret) - m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64) + m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64) } -func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) { +func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) { // This is exactly the same code emitted by GCC for "__builtin_copysign": // // mov x0, -9223372036854775808 @@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool setMSB := m.allocateInstr() if _64bit { - m.lowerConstantI64(tmpI.nr(), math.MinInt64) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0)) + m.lowerConstantI64(tmpI, math.MinInt64) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0)) } else { - m.lowerConstantI32(tmpI.nr(), math.MinInt32) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0)) + m.lowerConstantI32(tmpI, math.MinInt32) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0)) } m.insert(setMSB) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) + tmpReg := m.compiler.AllocateVReg(ssa.TypeF64) mov := m.allocateInstr() - mov.asFpuMov64(tmpReg.nr(), rn.nr()) + mov.asFpuMov64(tmpReg, rn.nr()) m.insert(mov) vbit := m.allocateInstr() - vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B) + vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B) m.insert(vbit) movDst := m.allocateInstr() - movDst.asFpuMov64(rd.nr(), tmpReg.nr()) + movDst.asFpuMov64(rd, tmpReg) m.insert(movDst) } @@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { v, dstType := instr.BitcastData() srcType := v.Type() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) srcInt := srcType.IsInt() dstInt := dstType.IsInt() switch { @@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone) - rd := operandNR(m.compiler.VRegOf(out)) + rd := m.compiler.VRegOf(out) neg := m.allocateInstr() neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64) m.insert(neg) } -func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { +func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { if !nonTrapping { // First of all, we have to clear the FPU flags. flagClear := m.allocateInstr() @@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 // Check if the conversion was undefined by comparing the status with 1. // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true) + alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true) m.insert(alu) // If it is not undefined, we can return the result. @@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 } } -func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) { +func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) { cvt := m.allocateInstr() cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit) m.insert(cvt) @@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64) m.insert(instr) } @@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) { case !add && yNegated: // rn+rm = x-(-y) = x-y aop = aluOpAdd } - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64) m.insert(alu) @@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext) alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit) + alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit) m.insert(alu) cset := m.allocateInstr() @@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { } } -func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr) @@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool if arr == vecArrangement2D { narrow := m.allocateInstr() if signed { - narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S) } else { - narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S) } m.insert(narrow) } } -func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpScvtf, rd, rn, arr) @@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) { x, amount := si.Arg2() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits()) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64) @@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) - var rd operand + var rd regalloc.VReg if ignoreResult { - rd = operandNR(xzrVReg) + rd = xzrVReg } else { - rd = operandNR(m.compiler.VRegOf(si.Return())) + rd = m.compiler.VRegOf(si.Return()) } _64 := x.Type().Bits() == 64 @@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult c := instr.ConstantVal() if isBitMaskImmediate(c, _64) { // Constant bit wise operations can be lowered to a single instruction. - alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64) + alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64) m.insert(alu) return } @@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - rd := operandNR(m.compiler.VRegOf(r)) + rd := m.compiler.VRegOf(r) // Encode rotl as neg + rotr: neg is a sub against the zero-reg. m.lowerRotlImpl(rd, rn, rm, tmp, _64) } -func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) { +func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) { // Encode rotl as neg + rotr: neg is a sub against the zero-reg. neg := m.allocateInstr() neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit) m.insert(neg) alu := m.allocateInstr() - alu.asALU(aluOpRotR, rd, rn, tmp, is64bit) + alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit) m.insert(alu) } @@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64) @@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) { // TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg. mul := m.allocateInstr() - mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64) + mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64) m.insert(mul) } @@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) { // mov x5, v0.d[0] ;; finally we mov the result back to a GPR // - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) ins := m.allocateInstr() - ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0)) + ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0)) m.insert(ins) rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) cnt := m.allocateInstr() - cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B) + cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B) m.insert(cnt) rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) uaddlv := m.allocateInstr() - uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B) + uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B) m.insert(uaddlv) mov := m.allocateInstr() @@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true) setExitCode := m.allocateInstr() - setExitCode.asStore(operandNR(tmpReg1), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), + } + setExitCode.asStore(operandNR(tmpReg1), mode, 32) // In order to unwind the stack, we also need to push the current stack pointer: tmp2 := m.compiler.AllocateVReg(ssa.TypeI64) movSpToTmp := m.allocateInstr() movSpToTmp.asMove64(tmp2, spVReg) strSpToExecCtx := m.allocateInstr() - strSpToExecCtx.asStore(operandNR(tmp2), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode2 := m.amodePool.Allocate() + *mode2 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64) // Also the address of this exit. tmp3 := m.compiler.AllocateVReg(ssa.TypeI64) currentAddrToTmp := m.allocateInstr() currentAddrToTmp.asAdr(tmp3, 0) storeCurrentAddrToExecCtx := m.allocateInstr() - storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode3 := m.amodePool.Allocate() + *mode3 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64) exitSeq := m.allocateInstr() exitSeq.asExitSequence(execCtxVReg) @@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, rm, x.Type().Bits() == 64, @@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, operandNR(xzrVReg), c.Type().Bits() == 64, @@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) switch x.Type() { case ssa.TypeI32, ssa.TypeI64: // csel rd, rn, rm, cc @@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { } } -func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { +func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) { // First check if `rc` is zero or not. checkZero := m.allocateInstr() - checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false) + checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false) m.insert(checkZero) // Then use CSETM to set all bits to one if `rc` is zero. @@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { m.insert(cset) // Then move the bits to the result vector register. - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) dup := m.allocateInstr() dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D) m.insert(dup) @@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { // Finally, move the result to the destination register. mov2 := m.allocateInstr() - mov2.asFpuMov128(rd.nr(), tmp2.nr()) + mov2.asFpuMov128(rd, tmp2) m.insert(mov2) } @@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) { addr, val := si.Arg2() addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) rs := m.getOperand_NR(valDef, extModeNone) _64 := si.Return().Type().Bits() == 64 - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64) + m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64) } -func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) { +func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) { switch { case negateArg: neg := m.allocateInstr() - neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit) + neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(neg) case flipArg: flip := m.allocateInstr() - flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit) + flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(flip) default: tmp = rs @@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) { rn := m.getOperand_NR(addrDef, extModeNone) rt := m.getOperand_NR(replDef, extModeNone) rs := m.getOperand_NR(expDef, extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type())) + tmp := m.compiler.AllocateVReg(si.Return().Type()) _64 := si.Return().Type().Bits() == 64 // rs is overwritten by CAS, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() if _64 { - mov.asMove64(tmp.nr(), rs.nr()) + mov.asMove64(tmp, rs.nr()) } else { - mov.asMove32(tmp.nr(), rs.nr()) + mov.asMove32(tmp, rs.nr()) } m.insert(mov) - m.lowerAtomicCasImpl(rn, tmp, rt, size) + m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size) mov2 := m.allocateInstr() rd := m.compiler.VRegOf(si.Return()) if _64 { - mov2.asMove64(rd, tmp.nr()) + mov2.asMove64(rd, tmp) } else { - mov2.asMove32(rd, tmp.nr()) + mov2.asMove32(rd, tmp) } m.insert(mov2) } -func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) { +func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) { cas := m.allocateInstr() cas.asAtomicCas(rn, rs, rt, size) m.insert(cas) @@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) { addrDef := m.compiler.ValueDefinition(addr) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) - m.lowerAtomicLoadImpl(rn, rt, size) + m.lowerAtomicLoadImpl(rn.nr(), rt, size) } -func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) { +func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) { ld := m.allocateInstr() ld.asAtomicLoad(rn, rt, size) m.insert(ld) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go index d9fbf1789b..7a398c3d09 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go @@ -162,9 +162,9 @@ func (o operand) assignReg(v regalloc.VReg) operand { // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). // If the operand can be expressed as operandKindImm12, `mode` is ignored. -func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_Imm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } instr := def.Instr @@ -179,9 +179,9 @@ func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mod // getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value. // If the immediate value is negated, the second return value is true, otherwise always false. -func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg), false +func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)), false } instr := def.Instr @@ -193,7 +193,7 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef } signExtended := int64(c) - if def.SSAValue().Type().Bits() == 32 { + if def.V.Type().Bits() == 32 { signExtended = (signExtended << 32) >> 32 } negatedWithoutSign := -signExtended @@ -208,9 +208,9 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef // ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def). // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) { @@ -251,9 +251,9 @@ func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extM // ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def). // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } if m.compiler.MatchInstr(def, ssa.OpcodeIshl) { @@ -273,9 +273,9 @@ func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode } // getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def). -func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ShiftImm_NR(def backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } instr := def.Instr @@ -289,28 +289,18 @@ func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode e // ensureValueNR returns an operand of operandKindNR from the given value (defined by `def). // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { +func (m *machine) getOperand_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { var v regalloc.VReg - if def.IsFromBlockParam() { - v = def.BlkParamVReg + if def.IsFromInstr() && def.Instr.Constant() { + // We inline all the constant instructions so that we could reduce the register usage. + v = m.lowerConstant(def.Instr) + def.Instr.MarkLowered() } else { - instr := def.Instr - if instr.Constant() { - // We inline all the constant instructions so that we could reduce the register usage. - v = m.lowerConstant(instr) - instr.MarkLowered() - } else { - if n := def.N; n == 0 { - v = m.compiler.VRegOf(instr.Return()) - } else { - _, rs := instr.Returns() - v = m.compiler.VRegOf(rs[n-1]) - } - } + v = m.compiler.VRegOf(def.V) } r := v - switch inBits := def.SSAValue().Type().Bits(); { + switch inBits := def.V.Type().Bits(); { case mode == extModeNone: case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32): case inBits == 32 && mode == extModeZeroExtend64: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index 4842eaa382..fd0760d723 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -24,6 +24,14 @@ type ( addressModeKind byte ) +func resetAddressMode(a *addressMode) { + a.kind = 0 + a.rn = 0 + a.rm = 0 + a.extOp = 0 + a.imm = 0 +} + const ( // addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended, // and then scaled by bits(type)/8. @@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) { return } -func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode { +func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode { if !offsetFitsInAddressModeKindRegSignedImm9(imm) { panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm)) } + mode := m.amodePool.Allocate() if preIndex { - return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} } else { - return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} } + return mode } func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool { @@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret amode := m.lowerToAddressMode(ptr, offset, size) load := m.allocateInstr() if signed { - load.asSLoad(operandNR(ret), amode, size) + load.asSLoad(ret, amode, size) } else { - load.asULoad(operandNR(ret), amode, size) + load.asULoad(ret, amode, size) } m.insert(load) } @@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa. load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(dst), amode, typ.Bits()) + load.asULoad(dst, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(dst), amode, typ.Bits()) + load.asFpuLoad(dst, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(dst), amode, 128) + load.asFpuLoad(dst, amode, 128) default: panic("TODO") } @@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, m.lowerConstantI64(offsetReg, int64(offset)) addedBase := m.addReg64ToReg64(base, offsetReg) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ld1r := m.allocateInstr() ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane)) @@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) { } // lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) { +func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) { // TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and // addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed // to support more efficient address resolution. @@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte // During the construction, this might emit additional instructions. // // Extracted as a separate function for easy testing. -func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) { +func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) { + amode = m.amodePool.Allocate() switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); { case a64sExist && a32sExist: var base regalloc.VReg base = a64s.Dequeue() var a32 addend32 a32 = a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} offset = 0 case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} offset = 0 case a64sExist: var base regalloc.VReg base = a64s.Dequeue() if !a64s.Empty() { index := a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } case a32sExist: base32 := a32s.Dequeue() @@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], if !a32s.Empty() { index := a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } default: // Only static offsets. tmpReg := m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(tmpReg, offset) - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} offset = 0 } @@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() if imm12Op, ok := asImm12Operand(uint64(c)); ok { - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true) } else if imm12Op, ok = asImm12Operand(uint64(-c)); ok { - alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true) } else { tmp := m.compiler.AllocateVReg(ssa.TypeI64) m.load64bitConst(c, tmp) - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true) + alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true) } m.insert(alu) return @@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true) m.insert(alu) return } @@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true) m.insert(alu) return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index b435d9ba96..00e6b238f9 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -3,6 +3,7 @@ package arm64 import ( "context" "fmt" + "math" "strings" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" @@ -14,12 +15,35 @@ import ( type ( // machine implements backend.Machine. machine struct { - compiler backend.Compiler - executableContext *backend.ExecutableContextT[instruction] - currentABI *backend.FunctionABI - - regAlloc regalloc.Allocator - regAllocFn *backend.RegAllocFunction[*instruction, *machine] + compiler backend.Compiler + currentABI *backend.FunctionABI + instrPool wazevoapi.Pool[instruction] + // labelPositionPool is the pool of labelPosition. The id is the label where + // if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. + labelPositionPool wazevoapi.IDedPool[labelPosition] + + // nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID + // so that we can have an identical label for the SSA block ID, which is useful for debugging. + nextLabel label + // rootInstr is the first instruction of the function. + rootInstr *instruction + // currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. + currentLabelPos *labelPosition + // orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. + orderedSSABlockLabelPos []*labelPosition + // returnLabelPos is the labelPosition for the return block. + returnLabelPos labelPosition + // perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. + perBlockHead, perBlockEnd *instruction + // pendingInstructions are the instructions which are not yet emitted into the instruction list. + pendingInstructions []*instruction + // maxSSABlockID is the maximum ssa.BasicBlockID in the current function. + maxSSABlockID label + + regAlloc regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] + regAllocFn regAllocFn + + amodePool wazevoapi.Pool[addressMode] // addendsWorkQueue is used during address lowering, defined here for reuse. addendsWorkQueue wazevoapi.Queue[ssa.Value] @@ -33,6 +57,8 @@ type ( // jmpTableTargets holds the labels of the jump table targets. jmpTableTargets [][]uint32 + // jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. + jmpTableTargetsNext int // spillSlotSize is the size of the stack slot in bytes used for spilling registers. // During the execution of the function, the stack looks like: @@ -89,44 +115,132 @@ type ( nextLabel label offset int64 } +) - labelPosition = backend.LabelPosition[instruction] - label = backend.Label +type ( + // label represents a position in the generated code which is either + // a real instruction or the constant InstructionPool (e.g. jump tables). + // + // This is exactly the same as the traditional "label" in assembly code. + label uint32 + + // labelPosition represents the regions of the generated code which the label represents. + // This implements regalloc.Block. + labelPosition struct { + // sb is not nil if this corresponds to a ssa.BasicBlock. + sb ssa.BasicBlock + // cur is used to walk through the instructions in the block during the register allocation. + cur, + // begin and end are the first and last instructions of the block. + begin, end *instruction + // binaryOffset is the offset in the binary where the label is located. + binaryOffset int64 + } ) const ( - labelReturn = backend.LabelReturn - labelInvalid = backend.LabelInvalid + labelReturn label = math.MaxUint32 + labelInvalid = labelReturn - 1 ) +// String implements backend.Machine. +func (l label) String() string { + return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { + *l = labelPosition{} +} + // NewBackend returns a new backend for arm64. func NewBackend() backend.Machine { m := &machine{ spillSlots: make(map[regalloc.VRegID]int64), - executableContext: newExecutableContext(), - regAlloc: regalloc.NewAllocator(regInfo), + regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), + amodePool: wazevoapi.NewPool[addressMode](resetAddressMode), + instrPool: wazevoapi.NewPool[instruction](resetInstruction), + labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), } + m.regAllocFn.m = m return m } -func newExecutableContext() *backend.ExecutableContextT[instruction] { - return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0) +func ssaBlockLabel(sb ssa.BasicBlock) label { + if sb.ReturnBlock() { + return labelReturn + } + return label(sb.ID()) +} + +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { + if sb.ReturnBlock() { + m.returnLabelPos.sb = sb + return &m.returnLabelPos + } + + l := ssaBlockLabel(sb) + pos := m.labelPositionPool.GetOrAllocate(int(l)) + pos.sb = sb + return pos } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { - return m.executableContext +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { + prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) + prevPos.end.next = nextPos.begin } -// RegAlloc implements backend.Machine Function. -func (m *machine) RegAlloc() { - rf := m.regAllocFn - for _, pos := range m.executableContext.OrderedBlockLabels { - rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { + m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) + labelPos := m.currentLabelPos + end := m.allocateNop() + m.perBlockHead, m.perBlockEnd = end, end + labelPos.begin, labelPos.end = end, end + m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { + // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. + m.insertAtPerBlockHead(m.allocateNop()) + + m.currentLabelPos.begin = m.perBlockHead + + if m.currentLabelPos.sb.EntryBlock() { + m.rootInstr = m.perBlockHead } +} +func (m *machine) insertAtPerBlockHead(i *instruction) { + if m.perBlockHead == nil { + m.perBlockHead = i + m.perBlockEnd = i + return + } + + i.next = m.perBlockHead + m.perBlockHead.prev = i + m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { + l := len(m.pendingInstructions) + if l == 0 { + return + } + for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. + m.insertAtPerBlockHead(m.pendingInstructions[i]) + } + m.pendingInstructions = m.pendingInstructions[:0] +} + +// RegAlloc implements backend.Machine Function. +func (m *machine) RegAlloc() { m.regAllocStarted = true - m.regAlloc.DoAllocation(rf) + m.regAlloc.DoAllocation(&m.regAllocFn) // Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes. m.spillSlotSize = (m.spillSlotSize + 15) &^ 15 } @@ -143,12 +257,22 @@ func (m *machine) Reset() { m.clobberedRegs = m.clobberedRegs[:0] m.regAllocStarted = false m.regAlloc.Reset() - m.regAllocFn.Reset() m.spillSlotSize = 0 m.unresolvedAddressModes = m.unresolvedAddressModes[:0] m.maxRequiredStackSizeForCalls = 0 - m.executableContext.Reset() - m.jmpTableTargets = m.jmpTableTargets[:0] + m.jmpTableTargetsNext = 0 + m.amodePool.Reset() + m.instrPool.Reset() + m.labelPositionPool.Reset() + m.pendingInstructions = m.pendingInstructions[:0] + m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil + m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] +} + +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { + m.maxSSABlockID = label(maxBlockID) + m.nextLabel = label(maxBlockID) + 1 } // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -164,12 +288,11 @@ func (m *machine) DisableStackCheck() { // SetCompiler implements backend.Machine. func (m *machine) SetCompiler(ctx backend.Compiler) { m.compiler = ctx - m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx) + m.regAllocFn.ssaB = ctx.SSABuilder() } func (m *machine) insert(i *instruction) { - ectx := m.executableContext - ectx.PendingInstructions = append(ectx.PendingInstructions, i) + m.pendingInstructions = append(m.pendingInstructions, i) } func (m *machine) insertBrTargetLabel() label { @@ -179,19 +302,18 @@ func (m *machine) insertBrTargetLabel() label { } func (m *machine) allocateBrTarget() (nop *instruction, l label) { - ectx := m.executableContext - l = ectx.AllocateLabel() + l = m.nextLabel + m.nextLabel++ nop = m.allocateInstr() nop.asNop0WithLabel(l) - pos := ectx.AllocateLabelPosition(l) - pos.Begin, pos.End = nop, nop - ectx.LabelPositions[l] = pos + pos := m.labelPositionPool.GetOrAllocate(int(l)) + pos.begin, pos.end = nop, nop return } // allocateInstr allocates an instruction. func (m *machine) allocateInstr() *instruction { - instr := m.executableContext.InstructionPool.Allocate() + instr := m.instrPool.Allocate() if !m.regAllocStarted { instr.addedBeforeRegAlloc = true } @@ -209,7 +331,7 @@ func (m *machine) allocateNop() *instruction { } func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { - amode := &i.amode + amode := i.getAmode() switch amode.kind { case addressModeKindResultStackSpace: amode.imm += ret0offset @@ -248,7 +370,6 @@ func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruc // resolveRelativeAddresses resolves the relative addresses before encoding. func (m *machine) resolveRelativeAddresses(ctx context.Context) { - ectx := m.executableContext for { if len(m.unresolvedAddressModes) > 0 { arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP() @@ -262,35 +383,36 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { var fn string var fnIndex int - var labelToSSABlockID map[label]ssa.BasicBlockID + var labelPosToLabel map[*labelPosition]label if wazevoapi.PerfMapEnabled { - fn = wazevoapi.GetCurrentFunctionName(ctx) - labelToSSABlockID = make(map[label]ssa.BasicBlockID) - for i, l := range ectx.SsaBlockIDToLabels { - labelToSSABlockID[l] = ssa.BasicBlockID(i) + labelPosToLabel = make(map[*labelPosition]label) + for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { + labelPosToLabel[m.labelPositionPool.Get(i)] = label(i) } + + fn = wazevoapi.GetCurrentFunctionName(ctx) fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) } // Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label. var offset int64 - for i, pos := range ectx.OrderedBlockLabels { - pos.BinaryOffset = offset + for i, pos := range m.orderedSSABlockLabelPos { + pos.binaryOffset = offset var size int64 - for cur := pos.Begin; ; cur = cur.next { + for cur := pos.begin; ; cur = cur.next { switch cur.kind { case nop0: l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { - pos.BinaryOffset = offset + size + if pos := m.labelPositionPool.Get(int(l)); pos != nil { + pos.binaryOffset = offset + size } case condBr: if !cur.condBrOffsetResolved() { var nextLabel label - if i < len(ectx.OrderedBlockLabels)-1 { + if i < len(m.orderedSSABlockLabelPos)-1 { // Note: this is only used when the block ends with fallthrough, // therefore can be safely assumed that the next block exists when it's needed. - nextLabel = ectx.OrderedBlockLabels[i+1].L + nextLabel = ssaBlockLabel(m.orderedSSABlockLabelPos[i+1].sb) } m.condBrRelocs = append(m.condBrRelocs, condBrReloc{ cbr: cur, currentLabelPos: pos, offset: offset + size, @@ -299,21 +421,14 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { } } size += cur.size() - if cur == pos.End { + if cur == pos.end { break } } if wazevoapi.PerfMapEnabled { if size > 0 { - l := pos.L - var labelStr string - if blkID, ok := labelToSSABlockID[l]; ok { - labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) - } else { - labelStr = l.String() - } - wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) + wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelPosToLabel[pos])) } } offset += size @@ -327,7 +442,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { offset := reloc.offset target := cbr.condBrLabel() - offsetOfTarget := ectx.LabelPositions[target].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset diff := offsetOfTarget - offset if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { // This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block, @@ -348,11 +463,11 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { } var currentOffset int64 - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { switch cur.kind { case br: target := cur.brLabel() - offsetOfTarget := ectx.LabelPositions[target].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset diff := offsetOfTarget - currentOffset divided := diff >> 2 if divided < minSignedInt26 || divided > maxSignedInt26 { @@ -363,7 +478,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { case condBr: if !cur.condBrOffsetResolved() { target := cur.condBrLabel() - offsetOfTarget := ectx.LabelPositions[target].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset diff := offsetOfTarget - currentOffset if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly") @@ -375,7 +490,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { targets := m.jmpTableTargets[tableIndex] for i := range targets { l := label(targets[i]) - offsetOfTarget := ectx.LabelPositions[l].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(l)).binaryOffset diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin) targets[i] = uint32(diff) } @@ -396,7 +511,7 @@ const ( ) func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) { - cur := currentBlk.End + cur := currentBlk.end originalTarget := cbr.condBrLabel() endNext := cur.next @@ -419,30 +534,27 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk * cur = linkInstr(cur, br) // Update the end of the current block. - currentBlk.End = cur + currentBlk.end = cur linkInstr(cur, endNext) } // Format implements backend.Machine. func (m *machine) Format() string { - ectx := m.executableContext begins := map[*instruction]label{} - for l, pos := range ectx.LabelPositions { - begins[pos.Begin] = l - } - - irBlocks := map[label]ssa.BasicBlockID{} - for i, l := range ectx.SsaBlockIDToLabels { - irBlocks[l] = ssa.BasicBlockID(i) + for l := label(0); l < m.nextLabel; l++ { + pos := m.labelPositionPool.Get(int(l)) + if pos != nil { + begins[pos.begin] = l + } } var lines []string - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { if l, ok := begins[cur]; ok { var labelStr string - if blkID, ok := irBlocks[l]; ok { - labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) + if l <= m.maxSSABlockID { + labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, int(l)) } else { labelStr = fmt.Sprintf("%s:", l) } @@ -503,13 +615,17 @@ func (m *machine) frameSize() int64 { return s } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { - // TODO: reuse the slice! - labels := make([]uint32, len(targets)) - for j, target := range targets { - labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { + if m.jmpTableTargetsNext == len(m.jmpTableTargets) { + m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) + } + + index = m.jmpTableTargetsNext + m.jmpTableTargetsNext++ + m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] + for _, targetBlockID := range targets.View() { + target := m.compiler.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) + m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(target.ID())) } - index = len(m.jmpTableTargets) - m.jmpTableTargets = append(m.jmpTableTargets, labels) return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index 466fac4640..c646a8fab0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -15,9 +15,7 @@ func (m *machine) PostRegAlloc() { // setupPrologue initializes the prologue of the function. func (m *machine) setupPrologue() { - ectx := m.executableContext - - cur := ectx.RootInstr + cur := m.rootInstr prevInitInst := cur.next // @@ -70,7 +68,7 @@ func (m *machine) setupPrologue() { // +-----------------+ <----- SP // (low address) // - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -159,7 +157,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc sizeOfArgRetReg = tmpRegVReg subSp := m.allocateInstr() - subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true) + subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true) cur = linkInstr(cur, subSp) } else { sizeOfArgRetReg = xzrVReg @@ -168,7 +166,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc // Saves the return address (lr) and the size_of_arg_ret below the SP. // size_of_arg_ret is used for stack unwinding. pstr := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */) + amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */) pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode) cur = linkInstr(cur, pstr) return cur @@ -182,7 +180,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { } else { frameSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -196,24 +194,23 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { // 1. Removes the redundant copy instruction. // 2. Inserts the epilogue. func (m *machine) postRegAlloc() { - ectx := m.executableContext - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { switch cur.kind { case ret: m.setupEpilogueAfter(cur.prev) case loadConstBlockArg: lc := cur next := lc.next - m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerLoadConstantBlockArgAfterRegAlloc(lc) - for _, instr := range m.executableContext.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) - m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] default: // Removes the redundant copy instruction. - if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() { + if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { prev, next := cur.prev, cur.next // Remove the copy instruction. prev.next = next @@ -286,16 +283,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { for i := range m.clobberedRegs { vr := m.clobberedRegs[l-i] // reverse order to restore. load := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, + amode := addressModePreOrPostIndex(m, spVReg, 16, // stack pointer must be 16-byte aligned. false, // Increment after store. ) // TODO: pair loads to reduce the number of instructions. switch regTypeToRegisterSizeInBits(vr.RegType()) { case 64: // save int reg. - load.asULoad(operandNR(vr), amode, 64) + load.asULoad(vr, amode, 64) case 128: // save vector reg. - load.asFpuLoad(operandNR(vr), amode, 128) + load.asFpuLoad(vr, amode, 128) } cur = linkInstr(cur, load) } @@ -317,8 +314,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { // SP----> +-----------------+ ldr := m.allocateInstr() - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + ldr.asULoad(lrVReg, + addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) cur = linkInstr(cur, ldr) if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { @@ -351,14 +348,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok { // sub tmp, sp, #requiredStackSize sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true) cur = linkInstr(cur, sub) } else { // This case, we first load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) // Then subtract it. sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) cur = linkInstr(cur, sub) } @@ -366,16 +363,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // ldr tmp2, [executionContext #StackBottomPtr] ldr := m.allocateInstr() - ldr.asULoad(operandNR(tmp2), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument. imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), - }, 64) + } + ldr.asULoad(tmp2, amode, 64) cur = linkInstr(cur, ldr) // subs xzr, tmp, tmp2 subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true) + subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true) cur = linkInstr(cur, subs) // b.ge #imm @@ -388,22 +387,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // First load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) setRequiredStackSize := m.allocateInstr() - setRequiredStackSize.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), - }, 64) + amode := m.amodePool.Allocate() + *amode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), + } + setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64) cur = linkInstr(cur, setRequiredStackSize) } ldrAddress := m.allocateInstr() - ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{ + amode2 := m.amodePool.Allocate() + *amode2 = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), - }, 64) + } + ldrAddress.asULoad(tmpRegVReg, amode2, 64) cur = linkInstr(cur, ldrAddress) // Then jumps to the stack grow call sequence's address, meaning @@ -427,11 +429,9 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // CompileStackGrowCallSequence implements backend.Machine. func (m *machine) CompileStackGrowCallSequence() []byte { - ectx := m.executableContext - cur := m.allocateInstr() cur.asNop0() - ectx.RootInstr = cur + m.rootInstr = cur // Save the callee saved and argument registers. cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs) @@ -453,16 +453,14 @@ func (m *machine) CompileStackGrowCallSequence() []byte { ret.asRet() linkInstr(cur, ret) - m.encode(ectx.RootInstr) + m.encode(m.rootInstr) return m.compiler.Buf() } func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction { - ectx := m.executableContext - - ectx.PendingInstructions = ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.insertAddOrSubStackPointer(rd, diff, add) - for _, inserted := range ectx.PendingInstructions { + for _, inserted := range m.pendingInstructions { cur = linkInstr(cur, inserted) } return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index 1c8793b73d..f2ed53ae55 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -3,18 +3,226 @@ package arm64 // This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine. import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" ) -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { - m.clobberedRegs = append(m.clobberedRegs[:0], regs...) +// regAllocFn implements regalloc.Function. +type regAllocFn struct { + ssaB ssa.Builder + m *machine + loopNestingForestRoots []ssa.BasicBlock + blockIter int } -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 + return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { + if f.blockIter < 0 { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter-- + return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = 0 + return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { + if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter++ + return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { + f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { + f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() + return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { + root := f.loopNestingForestRoots[i] + pos := f.m.getOrAllocateSSABlockLabelPosition(root) + return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { + sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { + sb := f.ssaB.Idom(blk.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { + f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { + f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { + childSB := pos.sb.LoopNestingForestChildren()[i] + return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { + succSB := pos.sb.Succ(i) + if succSB.ReturnBlock() { + return nil + } + return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { + predSB := pos.sb.Pred(i) + return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { + c := f.m.compiler + *regs = (*regs)[:0] + for i := 0; i < pos.sb.Params(); i++ { + v := c.VRegOf(pos.sb.Param(i)) + *regs = append(*regs, v) + } + return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { + return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { + ret := pos.begin + pos.cur = ret + return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { + for { + if pos.cur == pos.end { + return nil + } + instr := pos.cur.next + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { + pos.cur = pos.end + return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { + for { + if pos.cur == pos.begin { + return nil + } + instr := pos.cur.prev + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { + return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { + return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) { prevNext := cur.next var mov1, mov2, mov3 *instruction if x1.RegType() == regalloc.RegTypeInt { @@ -32,12 +240,12 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { if !tmp.Valid() { r2 := x2.RealReg() // Temporarily spill x1 to stack. - cur = m.InsertStoreRegisterAt(x1, cur, true).prev + cur = m.insertStoreRegisterAt(x1, cur, true).prev // Then move x2 to x1. cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2)) linkInstr(cur, prevNext) // Then reload the original value on x1 from stack to r2. - m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) + m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true) } else { mov1 = m.allocateInstr().asFpuMov128(tmp, x1) mov2 = m.allocateInstr().asFpuMov128(x1, x2) @@ -50,8 +258,7 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { } } -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) { typ := src.RegType() if typ != dst.RegType() { panic("BUG: src and dst must have the same type") @@ -70,13 +277,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { linkInstr(cur, prevNext) } -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { - return m.executableContext.SsaBlockIDToLabels[id] -} - -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -91,7 +292,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) store := m.allocateInstr() store.asStore(operandNR(v), amode, typ.Bits()) @@ -100,8 +301,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft return linkInstr(cur, prevNext) } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -116,16 +316,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(v), amode, typ.Bits()) + load.asULoad(v, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(v), amode, typ.Bits()) + load.asFpuLoad(v, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(v), amode, 128) + load.asFpuLoad(v, amode, 128) default: panic("TODO") } @@ -134,8 +334,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af return linkInstr(cur, prevNext) } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction { cur := end for cur.kind == nop0 { cur = cur.prev diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go index edb0e36e33..a72b86f6bf 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go @@ -14,7 +14,7 @@ func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr { var stackBuf []byte { - // TODO: use unsafe.Slice after floor version is set to Go 1.20. + //nolint:staticcheck hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf)) hdr.Data = sp hdr.Len = l @@ -78,13 +78,7 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { // +-----------------+ <---- stackPointerBeforeGoCall // (low address) ptr := unsafe.Pointer(stackPointerBeforeGoCall) + data := (*uint64)(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). size := *(*uint64)(unsafe.Add(ptr, 8)) - var view []uint64 - { - sh := (*reflect.SliceHeader)(unsafe.Pointer(&view)) - sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). - sh.Len = int(size) - sh.Cap = int(size) - } - return view + return unsafe.Slice(data, size) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go index 54ce89e468..9044a9e4bc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go @@ -11,7 +11,24 @@ import ( type ( // Machine is a backend for a specific ISA machine. Machine interface { - ExecutableContext() ExecutableContext + // StartLoweringFunction is called when the compilation of the given function is started. + // The maxBlockID is the maximum ssa.BasicBlockID in the function. + StartLoweringFunction(maxBlockID ssa.BasicBlockID) + + // LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list. + LinkAdjacentBlocks(prev, next ssa.BasicBlock) + + // StartBlock is called when the compilation of the given block is started. + // The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with + // ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd. + StartBlock(ssa.BasicBlock) + + // EndBlock is called when the compilation of the current block is finished. + EndBlock() + + // FlushPendingInstructions flushes the pending instructions to the buffer. + // This will be called after the lowering of each SSA Instruction. + FlushPendingInstructions() // DisableStackCheck disables the stack check for the current compilation for debugging/testing. DisableStackCheck() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go deleted file mode 100644 index 3f36c84e57..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go +++ /dev/null @@ -1,319 +0,0 @@ -package backend - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction. -type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface { - // InsertMoveBefore inserts the move instruction from src to dst before the given instruction. - InsertMoveBefore(dst, src regalloc.VReg, instr I) - // InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction. - // If after is true, the instruction(s) will be inserted after the given instruction, otherwise before. - InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I - // InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction. - // If after is true, the instruction(s) will be inserted after the given instruction, otherwise before. - InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I - // ClobberedRegisters is called when the register allocation is done and the clobbered registers are known. - ClobberedRegisters(regs []regalloc.VReg) - // Swap swaps the two virtual registers after the given instruction. - Swap(cur I, x1, x2, tmp regalloc.VReg) - // LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details. - LastInstrForInsertion(begin, end I) I - // SSABlockLabel returns the label of the given ssa.BasicBlockID. - SSABlockLabel(id ssa.BasicBlockID) Label -} - -type ( - // RegAllocFunction implements regalloc.Function. - RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct { - m m - ssb ssa.Builder - c Compiler - // iter is the iterator for reversePostOrderBlocks - iter int - reversePostOrderBlocks []RegAllocBlock[I, m] - // labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks. - labelToRegAllocBlockIndex map[Label]int - loopNestingForestRoots []ssa.BasicBlock - } - - // RegAllocBlock implements regalloc.Block. - RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct { - // f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses(). - f *RegAllocFunction[I, m] - sb ssa.BasicBlock - l Label - begin, end I - loopNestingForestChildren []ssa.BasicBlock - cur I - id int - cachedLastInstrForInsertion I - } -) - -// NewRegAllocFunction returns a new RegAllocFunction. -func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] { - return &RegAllocFunction[I, M]{ - m: m, - ssb: ssb, - c: c, - labelToRegAllocBlockIndex: make(map[Label]int), - } -} - -// AddBlock adds a new block to the function. -func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) { - i := len(f.reversePostOrderBlocks) - f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{ - f: f, - sb: sb, - l: l, - begin: begin, - end: end, - id: int(sb.ID()), - }) - f.labelToRegAllocBlockIndex[l] = i -} - -// Reset resets the function for the next compilation. -func (f *RegAllocFunction[I, M]) Reset() { - f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0] - f.iter = 0 -} - -// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter. -func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) { - m := f.m - m.InsertStoreRegisterAt(v, instr.(I), true) -} - -// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore. -func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) { - m := f.m - m.InsertReloadRegisterAt(v, instr.(I), false) -} - -// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter. -func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) { - m := f.m - m.InsertReloadRegisterAt(v, instr.(I), true) -} - -// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore. -func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) { - m := f.m - m.InsertStoreRegisterAt(v, instr.(I), false) -} - -// ClobberedRegisters implements regalloc.Function ClobberedRegisters. -func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) { - f.m.ClobberedRegisters(regs) -} - -// SwapBefore implements regalloc.Function SwapBefore. -func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) { - f.m.Swap(instr.Prev().(I), x1, x2, tmp) -} - -// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin. -func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block { - f.iter = len(f.reversePostOrderBlocks) - 1 - return f.PostOrderBlockIteratorNext() -} - -// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext. -func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block { - if f.iter < 0 { - return nil - } - b := &f.reversePostOrderBlocks[f.iter] - f.iter-- - return b -} - -// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin. -func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block { - f.iter = 0 - return f.ReversePostOrderBlockIteratorNext() -} - -// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext. -func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block { - if f.iter >= len(f.reversePostOrderBlocks) { - return nil - } - b := &f.reversePostOrderBlocks[f.iter] - f.iter++ - return b -} - -// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots. -func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int { - f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots() - return len(f.loopNestingForestRoots) -} - -// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot. -func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block { - blk := f.loopNestingForestRoots[i] - l := f.m.SSABlockLabel(blk.ID()) - index := f.labelToRegAllocBlockIndex[l] - return &f.reversePostOrderBlocks[index] -} - -// InsertMoveBefore implements regalloc.Function InsertMoveBefore. -func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) { - f.m.InsertMoveBefore(dst, src, instr.(I)) -} - -// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor. -func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block { - ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb) - l := f.m.SSABlockLabel(ret.ID()) - index := f.labelToRegAllocBlockIndex[l] - return &f.reversePostOrderBlocks[index] -} - -// Idom implements regalloc.Function Idom. -func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block { - builder := f.ssb - idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb) - if idom == nil { - panic("BUG: idom must not be nil") - } - l := f.m.SSABlockLabel(idom.ID()) - index := f.labelToRegAllocBlockIndex[l] - return &f.reversePostOrderBlocks[index] -} - -// ID implements regalloc.Block. -func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) } - -// BlockParams implements regalloc.Block. -func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg { - c := r.f.c - *regs = (*regs)[:0] - for i := 0; i < r.sb.Params(); i++ { - v := c.VRegOf(r.sb.Param(i)) - *regs = append(*regs, v) - } - return *regs -} - -// InstrIteratorBegin implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr { - r.cur = r.begin - return r.cur -} - -// InstrIteratorNext implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr { - for { - if r.cur == r.end { - return nil - } - instr := r.cur.Next() - r.cur = instr.(I) - if instr == nil { - return nil - } else if instr.AddedBeforeRegAlloc() { - // Only concerned about the instruction added before regalloc. - return instr - } - } -} - -// InstrRevIteratorBegin implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr { - r.cur = r.end - return r.cur -} - -// InstrRevIteratorNext implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr { - for { - if r.cur == r.begin { - return nil - } - instr := r.cur.Prev() - r.cur = instr.(I) - if instr == nil { - return nil - } else if instr.AddedBeforeRegAlloc() { - // Only concerned about the instruction added before regalloc. - return instr - } - } -} - -// FirstInstr implements regalloc.Block. -func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr { - return r.begin -} - -// EndInstr implements regalloc.Block. -func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr { - return r.end -} - -// LastInstrForInsertion implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr { - var nil I - if r.cachedLastInstrForInsertion == nil { - r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end) - } - return r.cachedLastInstrForInsertion -} - -// Preds implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() } - -// Pred implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block { - sb := r.sb - pred := sb.Pred(i) - l := r.f.m.SSABlockLabel(pred.ID()) - index := r.f.labelToRegAllocBlockIndex[l] - return &r.f.reversePostOrderBlocks[index] -} - -// Entry implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() } - -// Succs implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Succs() int { - return r.sb.Succs() -} - -// Succ implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block { - sb := r.sb - succ := sb.Succ(i) - if succ.ReturnBlock() { - return nil - } - l := r.f.m.SSABlockLabel(succ.ID()) - index := r.f.labelToRegAllocBlockIndex[l] - return &r.f.reversePostOrderBlocks[index] -} - -// LoopHeader implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LoopHeader() bool { - return r.sb.LoopHeader() -} - -// LoopNestingForestChildren implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int { - r.loopNestingForestChildren = r.sb.LoopNestingForestChildren() - return len(r.loopNestingForestChildren) -} - -// LoopNestingForestChild implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block { - blk := r.loopNestingForestChildren[i] - l := r.f.m.SSABlockLabel(blk.ID()) - index := r.f.labelToRegAllocBlockIndex[l] - return &r.f.reversePostOrderBlocks[index] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go index 23157b4782..5d15bd9dc1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go @@ -4,104 +4,100 @@ import "fmt" // These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register // allocators to work on any ISA. -// -// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode -// where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged -// by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html type ( // Function is the top-level interface to do register allocation, which corresponds to a CFG containing // Blocks(s). - Function interface { + // + // I is the type of the instruction, and B is the type of the basic block. + Function[I Instr, B Block[I]] interface { // PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG. // In other words, the last blocks in the CFG will be returned first. - PostOrderBlockIteratorBegin() Block + PostOrderBlockIteratorBegin() B // PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG. - PostOrderBlockIteratorNext() Block + PostOrderBlockIteratorNext() B // ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG. // In other words, the first blocks in the CFG will be returned first. - ReversePostOrderBlockIteratorBegin() Block + ReversePostOrderBlockIteratorBegin() B // ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG. - ReversePostOrderBlockIteratorNext() Block + ReversePostOrderBlockIteratorNext() B // ClobberedRegisters tell the clobbered registers by this function. ClobberedRegisters([]VReg) // LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function. LoopNestingForestRoots() int // LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function. - LoopNestingForestRoot(i int) Block + LoopNestingForestRoot(i int) B // LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree. - LowestCommonAncestor(blk1, blk2 Block) Block + LowestCommonAncestor(blk1, blk2 B) B // Idom returns the immediate dominator of the given block. - Idom(blk Block) Block + Idom(blk B) B + + // LoopNestingForestChild returns the i-th child of the block in the loop nesting forest. + LoopNestingForestChild(b B, i int) B + // Pred returns the i-th predecessor of the block in the CFG. + Pred(b B, i int) B + // Succ returns the i-th successor of the block in the CFG. + Succ(b B, i int) B + // BlockParams returns the virtual registers used as the parameters of this block. + BlockParams(B, *[]VReg) []VReg // Followings are for rewriting the function. - // SwapAtEndOfBlock swaps the two virtual registers at the end of the given block. - SwapBefore(x1, x2, tmp VReg, instr Instr) + // SwapBefore swaps the two virtual registers at the end of the given block. + SwapBefore(x1, x2, tmp VReg, instr I) // StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register. - StoreRegisterBefore(v VReg, instr Instr) + StoreRegisterBefore(v VReg, instr I) // StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register. - StoreRegisterAfter(v VReg, instr Instr) + StoreRegisterAfter(v VReg, instr I) // ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register. - ReloadRegisterBefore(v VReg, instr Instr) + ReloadRegisterBefore(v VReg, instr I) // ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register. - ReloadRegisterAfter(v VReg, instr Instr) + ReloadRegisterAfter(v VReg, instr I) // InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers. - InsertMoveBefore(dst, src VReg, instr Instr) + InsertMoveBefore(dst, src VReg, instr I) } // Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s). - Block interface { + // Right now, this corresponds to a ssa.BasicBlock lowered to the machine level. + Block[I Instr] interface { + comparable // ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG. ID() int32 - // BlockParams returns the virtual registers used as the parameters of this block. - BlockParams(*[]VReg) []VReg // InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped. // Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr. - InstrIteratorBegin() Instr + InstrIteratorBegin() I // InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped. // Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr. - InstrIteratorNext() Instr + InstrIteratorNext() I // InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order. - InstrRevIteratorBegin() Instr + InstrRevIteratorBegin() I // InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order. - InstrRevIteratorNext() Instr + InstrRevIteratorNext() I // FirstInstr returns the fist instruction in this block where instructions will be inserted after it. - FirstInstr() Instr - // EndInstr returns the end instruction in this block. - EndInstr() Instr + FirstInstr() I // LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it. // Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges. // At the time of register allocation, all the critical edges are already split, so there is no need // to worry about the case where branching instruction has multiple successors. // Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns // the unconditional branch, not the nop. In other words it is either nop or unconditional branch. - LastInstrForInsertion() Instr + LastInstrForInsertion() I // Preds returns the number of predecessors of this block in the CFG. Preds() int - // Pred returns the i-th predecessor of this block in the CFG. - Pred(i int) Block // Entry returns true if the block is for the entry block. Entry() bool // Succs returns the number of successors of this block in the CFG. Succs() int - // Succ returns the i-th successor of this block in the CFG. - Succ(i int) Block // LoopHeader returns true if this block is a loop header. LoopHeader() bool // LoopNestingForestChildren returns the number of children of this block in the loop nesting forest. LoopNestingForestChildren() int - // LoopNestingForestChild returns the i-th child of this block in the loop nesting forest. - LoopNestingForestChild(i int) Block } // Instr is an instruction in a block, abstracting away the underlying ISA. Instr interface { + comparable fmt.Stringer - // Next returns the next instruction in the same block. - Next() Instr - // Prev returns the previous instruction in the same block. - Prev() Instr // Defs returns the virtual registers defined by this instruction. Defs(*[]VReg) []VReg // Uses returns the virtual registers used by this instruction. @@ -124,13 +120,5 @@ type ( IsIndirectCall() bool // IsReturn returns true if this instruction is a return instruction. IsReturn() bool - // AddedBeforeRegAlloc returns true if this instruction is added before register allocation. - AddedBeforeRegAlloc() bool - } - - // InstrConstraint is an interface for arch-specific instruction constraints. - InstrConstraint interface { - comparable - Instr } ) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go index b4450d56fb..a5857f4f26 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go @@ -18,13 +18,13 @@ import ( ) // NewAllocator returns a new Allocator. -func NewAllocator(allocatableRegs *RegisterInfo) Allocator { - a := Allocator{ +func NewAllocator[I Instr, B Block[I], F Function[I, B]](allocatableRegs *RegisterInfo) Allocator[I, B, F] { + a := Allocator[I, B, F]{ regInfo: allocatableRegs, - phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList), - blockStates: wazevoapi.NewIDedPool[blockState](resetBlockState), + phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]), + blockStates: wazevoapi.NewIDedPool[blockState[I, B, F]](resetBlockState[I, B, F]), } - a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState) + a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B, F]](resetVrState[I, B, F]) a.state.reset() for _, regs := range allocatableRegs.AllocatableRegisters { for _, r := range regs { @@ -49,33 +49,39 @@ type ( } // Allocator is a register allocator. - Allocator struct { + Allocator[I Instr, B Block[I], F Function[I, B]] struct { // regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator. regInfo *RegisterInfo // allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA. allocatableSet RegSet allocatedCalleeSavedRegs []VReg vs []VReg - vs2 []VRegID - phiDefInstListPool wazevoapi.Pool[phiDefInstList] + ss []*vrState[I, B, F] + copies []_copy[I, B, F] + phiDefInstListPool wazevoapi.Pool[phiDefInstList[I]] // Followings are re-used during various places. - blks []Block - reals []RealReg - currentOccupants regInUseSet + blks []B + reals []RealReg // Following two fields are updated while iterating the blocks in the reverse postorder. - state state - blockStates wazevoapi.IDedPool[blockState] + state state[I, B, F] + blockStates wazevoapi.IDedPool[blockState[I, B, F]] + } + + // _copy represents a source and destination pair of a copy instruction. + _copy[I Instr, B Block[I], F Function[I, B]] struct { + src *vrState[I, B, F] + dstID VRegID } // programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg. programCounter int32 - state struct { + state[I Instr, B Block[I], F Function[I, B]] struct { argRealRegs []VReg - regsInUse regInUseSet - vrStates wazevoapi.IDedPool[vrState] + regsInUse regInUseSet[I, B, F] + vrStates wazevoapi.IDedPool[vrState[I, B, F]] currentBlockID int32 @@ -83,30 +89,30 @@ type ( allocatedRegSet RegSet } - blockState struct { + blockState[I Instr, B Block[I], F Function[I, B]] struct { // liveIns is a list of VReg that are live at the beginning of the block. - liveIns []VRegID + liveIns []*vrState[I, B, F] // seen is true if the block is visited during the liveness analysis. seen bool // visited is true if the block is visited during the allocation phase. visited bool startFromPredIndex int // startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges. - startRegs regInUseSet + startRegs regInUseSet[I, B, F] // endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges. - endRegs regInUseSet + endRegs regInUseSet[I, B, F] } - vrState struct { + vrState[I Instr, B Block[I], f Function[I, B]] struct { v VReg r RealReg // defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil. - defInstr Instr + defInstr I // defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value. - defBlk Block + defBlk B // lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that // reloads this value. This is used to determine the spill location. Only valid if spilled=true. - lca Block + lca B // lastUse is the program counter of the last use of this value. This changes while iterating the block, and // should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID. lastUse programCounter @@ -121,14 +127,14 @@ type ( desiredLoc desiredLoc // phiDefInstList is a list of instructions that defines this phi value. // This is used to determine the spill location, and only valid if isPhi=true. - *phiDefInstList + *phiDefInstList[I] } // phiDefInstList is a linked list of instructions that defines a phi value. - phiDefInstList struct { - instr Instr + phiDefInstList[I Instr] struct { + instr I v VReg - next *phiDefInstList + next *phiDefInstList[I] } // desiredLoc represents a desired location for a VReg. @@ -160,13 +166,14 @@ func (d desiredLoc) stack() bool { return d&3 == desiredLoc(desiredLocKindStack) } -func resetPhiDefInstList(l *phiDefInstList) { - l.instr = nil +func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) { + var nilInstr I + l.instr = nilInstr l.next = nil l.v = VRegInvalid } -func (s *state) dump(info *RegisterInfo) { //nolint:unused +func (s *state[I, B, F]) dump(info *RegisterInfo) { //nolint:unused fmt.Println("\t\tstate:") fmt.Println("\t\t\targRealRegs:", s.argRealRegs) fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info)) @@ -185,7 +192,7 @@ func (s *state) dump(info *RegisterInfo) { //nolint:unused fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", ")) } -func (s *state) reset() { +func (s *state[I, B, F]) reset() { s.argRealRegs = s.argRealRegs[:0] s.vrStates.Reset() s.allocatedRegSet = RegSet(0) @@ -193,79 +200,74 @@ func (s *state) reset() { s.currentBlockID = -1 } -func (s *state) setVRegState(v VReg, r RealReg) { - id := int(v.ID()) - st := s.vrStates.GetOrAllocate(id) - st.r = r - st.v = v -} - -func resetVrState(vs *vrState) { +func resetVrState[I Instr, B Block[I], F Function[I, B]](vs *vrState[I, B, F]) { vs.v = VRegInvalid vs.r = RealRegInvalid - vs.defInstr = nil - vs.defBlk = nil + var nilInstr I + vs.defInstr = nilInstr + var nilBlk B + vs.defBlk = nilBlk vs.spilled = false vs.lastUse = -1 vs.lastUseUpdatedAtBlockID = -1 - vs.lca = nil + vs.lca = nilBlk vs.isPhi = false vs.phiDefInstList = nil vs.desiredLoc = desiredLocUnspecified } -func (s *state) getVRegState(v VRegID) *vrState { - return s.vrStates.GetOrAllocate(int(v)) +func (s *state[I, B, F]) getOrAllocateVRegState(v VReg) *vrState[I, B, F] { + st := s.vrStates.GetOrAllocate(int(v.ID())) + if st.v == VRegInvalid { + st.v = v + } + return st } -func (s *state) useRealReg(r RealReg, v VReg) { - if s.regsInUse.has(r) { - panic("BUG: useRealReg: the given real register is already used") - } - s.regsInUse.add(r, v) - s.setVRegState(v, r) +func (s *state[I, B, F]) getVRegState(v VRegID) *vrState[I, B, F] { + return s.vrStates.Get(int(v)) +} + +func (s *state[I, B, F]) useRealReg(r RealReg, vr *vrState[I, B, F]) { + s.regsInUse.add(r, vr) + vr.r = r s.allocatedRegSet = s.allocatedRegSet.add(r) } -func (s *state) releaseRealReg(r RealReg) { +func (s *state[I, B, F]) releaseRealReg(r RealReg) { current := s.regsInUse.get(r) - if current.Valid() { + if current != nil { s.regsInUse.remove(r) - s.setVRegState(current, RealRegInvalid) + current.r = RealRegInvalid } } // recordReload records that the given VReg is reloaded in the given block. // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value. -func (vs *vrState) recordReload(f Function, blk Block) { +func (vs *vrState[I, B, F]) recordReload(f F, blk B) { vs.spilled = true - if vs.lca == nil { + var nilBlk B + if lca := vs.lca; lca == nilBlk { if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID()) } vs.lca = blk - } else { + } else if lca != blk { if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID()) } - vs.lca = f.LowestCommonAncestor(vs.lca, blk) + vs.lca = f.LowestCommonAncestor(lca, blk) if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("updated lca=%d\n", vs.lca.ID()) } } } -func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) { +func (a *Allocator[I, B, F]) findOrSpillAllocatable(s *state[I, B, F], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) { r = RealRegInvalid // First, check if the preferredMask has any allocatable register. if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) { - for _, candidateReal := range allocatable { - // TODO: we should ensure the preferred register is in the allocatable set in the first place, - // but right now, just in case, we check it here. - if candidateReal == preferred { - return preferred - } - } + return preferred } var lastUseAt programCounter @@ -276,7 +278,7 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb } using := s.regsInUse.get(candidateReal) - if using == VRegInvalid { + if using == nil { // This is not used at this point. return candidateReal } @@ -285,17 +287,17 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb // For example, if the register is used as an argument register, and it might be // spilled and not reloaded when it ends up being used as a temporary to pass // stack based argument. - if using.IsRealReg() { + if using.v.IsRealReg() { continue } isPreferred := candidateReal == preferred // last == -1 means the value won't be used anymore. - if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) { + if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) { lastUseAt = last r = candidateReal - spillVReg = using + spillVReg = using.v if isPreferred { break } @@ -313,7 +315,7 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb return r } -func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg { +func (s *state[I, B, F]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg { for _, r := range allocatable { if !s.regsInUse.has(r) && !forbiddenMask.has(r) { return r @@ -322,22 +324,20 @@ func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) Rea return RealRegInvalid } -func (s *state) resetAt(bs *blockState) { - s.regsInUse.range_(func(_ RealReg, vr VReg) { - s.setVRegState(vr, RealRegInvalid) +func (s *state[I, B, F]) resetAt(bs *blockState[I, B, F]) { + s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B, F]) { + vs.r = RealRegInvalid }) s.regsInUse.reset() - bs.endRegs.range_(func(r RealReg, v VReg) { - id := int(v.ID()) - st := s.vrStates.GetOrAllocate(id) - if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn { - s.regsInUse.add(r, v) - s.setVRegState(v, r) + bs.endRegs.range_(func(r RealReg, vs *vrState[I, B, F]) { + if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn { + s.regsInUse.add(r, vs) + vs.r = r } }) } -func resetBlockState(b *blockState) { +func resetBlockState[I Instr, B Block[I], F Function[I, B]](b *blockState[I, B, F]) { b.seen = false b.visited = false b.endRegs.reset() @@ -346,7 +346,7 @@ func resetBlockState(b *blockState) { b.liveIns = b.liveIns[:0] } -func (b *blockState) dump(a *RegisterInfo) { +func (b *blockState[I, B, F]) dump(a *RegisterInfo) { fmt.Println("\t\tblockState:") fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a)) fmt.Println("\t\t\tendRegs:", b.endRegs.format(a)) @@ -355,13 +355,13 @@ func (b *blockState) dump(a *RegisterInfo) { } // DoAllocation performs register allocation on the given Function. -func (a *Allocator) DoAllocation(f Function) { +func (a *Allocator[I, B, F]) DoAllocation(f F) { a.livenessAnalysis(f) a.alloc(f) a.determineCalleeSavedRealRegs(f) } -func (a *Allocator) determineCalleeSavedRealRegs(f Function) { +func (a *Allocator[I, B, F]) determineCalleeSavedRealRegs(f F) { a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0] a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) { if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) { @@ -371,17 +371,17 @@ func (a *Allocator) determineCalleeSavedRealRegs(f Function) { f.ClobberedRegisters(a.allocatedCalleeSavedRegs) } -func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState { +func (a *Allocator[I, B, F]) getOrAllocateBlockState(blockID int32) *blockState[I, B, F] { return a.blockStates.GetOrAllocate(int(blockID)) } // phiBlk returns the block that defines the given phi value, nil otherwise. -func (s *state) phiBlk(v VRegID) Block { - vs := s.getVRegState(v) +func (vs *vrState[I, B, F]) phiBlk() B { if vs.isPhi { return vs.defBlk } - return nil + var nilBlk B + return nilBlk } const ( @@ -391,31 +391,35 @@ const ( // liveAnalysis constructs Allocator.blockLivenessData. // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2. -func (a *Allocator) livenessAnalysis(f Function) { +func (a *Allocator[I, B, F]) livenessAnalysis(f F) { s := &a.state - for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter. + for i := VRegID(0); i < vRegIDReservedForRealNum; i++ { + s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i))) + } + + var nilBlk B + var nilInstr I + for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() { // We should gather phi value data. - for _, p := range blk.BlockParams(&a.vs) { - vs := s.getVRegState(p.ID()) + for _, p := range f.BlockParams(blk, &a.vs) { + vs := s.getOrAllocateVRegState(p) vs.isPhi = true vs.defBlk = blk } - } - for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { blkID := blk.ID() info := a.getOrAllocateBlockState(blkID) - a.vs2 = a.vs2[:0] + a.ss = a.ss[:0] const ( flagDeleted = false flagLive = true ) ns := blk.Succs() for i := 0; i < ns; i++ { - succ := blk.Succ(i) - if succ == nil { + succ := f.Succ(blk, i) + if succ == nilBlk { continue } @@ -425,39 +429,39 @@ func (a *Allocator) livenessAnalysis(f Function) { continue } - for _, v := range succInfo.liveIns { - if s.phiBlk(v) != succ { - st := s.getVRegState(v) + for _, st := range succInfo.liveIns { + if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple // We use .spilled field to store the flag. st.spilled = flagLive - a.vs2 = append(a.vs2, v) + a.ss = append(a.ss, st) } } } - for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { + for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() { var use, def VReg + var defIsPhi bool for _, def = range instr.Defs(&a.vs) { if !def.IsRealReg() { - id := def.ID() - st := s.getVRegState(id) - // We use .spilled field to store the flag. + st := s.getOrAllocateVRegState(def) + defIsPhi = st.isPhi + // Note: We use .spilled field to store the flag. st.spilled = flagDeleted - a.vs2 = append(a.vs2, id) } } for _, use = range instr.Uses(&a.vs) { if !use.IsRealReg() { - id := use.ID() - st := s.getVRegState(id) - // We use .spilled field to store the flag. - st.spilled = flagLive - a.vs2 = append(a.vs2, id) + st := s.getOrAllocateVRegState(use) + // Note: We use .spilled field to store the flag. + if st.spilled != flagLive { //nolint:gosimple + st.spilled = flagLive + a.ss = append(a.ss, st) + } } } - if def.Valid() && s.phiBlk(def.ID()) != nil { + if defIsPhi { if use.Valid() && use.IsRealReg() { // If the destination is a phi value, and the source is a real register, this is the beginning of the function. a.state.argRealRegs = append(a.state.argRealRegs, use) @@ -465,11 +469,10 @@ func (a *Allocator) livenessAnalysis(f Function) { } } - for _, v := range a.vs2 { - st := s.getVRegState(v) + for _, st := range a.ss { // We use .spilled field to store the flag. if st.spilled == flagLive { //nolint:gosimple - info.liveIns = append(info.liveIns, v) + info.liveIns = append(info.liveIns, st) st.spilled = false } } @@ -480,51 +483,48 @@ func (a *Allocator) livenessAnalysis(f Function) { nrs := f.LoopNestingForestRoots() for i := 0; i < nrs; i++ { root := f.LoopNestingForestRoot(i) - a.loopTreeDFS(root) + a.loopTreeDFS(f, root) } } // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way. -func (a *Allocator) loopTreeDFS(entry Block) { +func (a *Allocator[I, B, F]) loopTreeDFS(f F, entry B) { a.blks = a.blks[:0] a.blks = append(a.blks, entry) - s := &a.state for len(a.blks) > 0 { tail := len(a.blks) - 1 loop := a.blks[tail] a.blks = a.blks[:tail] - a.vs2 = a.vs2[:0] + a.ss = a.ss[:0] const ( flagDone = false flagPending = true ) info := a.getOrAllocateBlockState(loop.ID()) - for _, v := range info.liveIns { - if s.phiBlk(v) != loop { - a.vs2 = append(a.vs2, v) - st := s.getVRegState(v) + for _, st := range info.liveIns { + if st.phiBlk() != loop { + a.ss = append(a.ss, st) // We use .spilled field to store the flag. st.spilled = flagPending } } - var siblingAddedView []VRegID + var siblingAddedView []*vrState[I, B, F] cn := loop.LoopNestingForestChildren() for i := 0; i < cn; i++ { - child := loop.LoopNestingForestChild(i) + child := f.LoopNestingForestChild(loop, i) childID := child.ID() childInfo := a.getOrAllocateBlockState(childID) if i == 0 { begin := len(childInfo.liveIns) - for _, v := range a.vs2 { - st := s.getVRegState(v) + for _, st := range a.ss { // We use .spilled field to store the flag. if st.spilled == flagPending { //nolint:gosimple st.spilled = flagDone // TODO: deduplicate, though I don't think it has much impact. - childInfo.liveIns = append(childInfo.liveIns, v) + childInfo.liveIns = append(childInfo.liveIns, st) } } siblingAddedView = childInfo.liveIns[begin:] @@ -540,8 +540,7 @@ func (a *Allocator) loopTreeDFS(entry Block) { if cn == 0 { // If there's no forest child, we haven't cleared the .spilled field at this point. - for _, v := range a.vs2 { - st := s.getVRegState(v) + for _, st := range a.ss { st.spilled = false } } @@ -558,37 +557,36 @@ func (a *Allocator) loopTreeDFS(entry Block) { // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value. // // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^. -func (a *Allocator) alloc(f Function) { +func (a *Allocator[I, B, F]) alloc(f F) { // First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block). - for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { + var nilBlk B + for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() { if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("========== allocating blk%d ========\n", blk.ID()) } if blk.Entry() { - a.finalizeStartReg(blk) + a.finalizeStartReg(f, blk) } a.allocBlock(f, blk) } // After the allocation, we all know the start and end state of each block. So we can fix the merge states. - for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { + for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() { a.fixMergeState(f, blk) } // Finally, we insert the spill instructions as we know all the places where the reloads happen. a.scheduleSpills(f) } -func (a *Allocator) updateLiveInVRState(liveness *blockState) { +func (a *Allocator[I, B, F]) updateLiveInVRState(liveness *blockState[I, B, F]) { currentBlockID := a.state.currentBlockID - for _, v := range liveness.liveIns { - vs := a.state.getVRegState(v) + for _, vs := range liveness.liveIns { vs.lastUse = programCounterLiveIn vs.lastUseUpdatedAtBlockID = currentBlockID } } -func (a *Allocator) finalizeStartReg(blk Block) { +func (a *Allocator[I, B, F]) finalizeStartReg(f F, blk B) { bID := blk.ID() - liveness := a.getOrAllocateBlockState(bID) s := &a.state currentBlkState := a.getOrAllocateBlockState(bID) if currentBlkState.startFromPredIndex > -1 { @@ -596,20 +594,20 @@ func (a *Allocator) finalizeStartReg(blk Block) { } s.currentBlockID = bID - a.updateLiveInVRState(liveness) + a.updateLiveInVRState(currentBlkState) preds := blk.Preds() - var predState *blockState + var predState *blockState[I, B, F] switch preds { case 0: // This is the entry block. case 1: - predID := blk.Pred(0).ID() + predID := f.Pred(blk, 0).ID() predState = a.getOrAllocateBlockState(predID) currentBlkState.startFromPredIndex = 0 default: // TODO: there should be some better heuristic to choose the predecessor. for i := 0; i < preds; i++ { - predID := blk.Pred(i).ID() + predID := f.Pred(blk, i).ID() if _predState := a.getOrAllocateBlockState(predID); _predState.visited { predState = _predState currentBlkState.startFromPredIndex = i @@ -622,18 +620,18 @@ func (a *Allocator) finalizeStartReg(blk Block) { panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID())) } for _, u := range s.argRealRegs { - s.useRealReg(u.RealReg(), u) + s.useRealReg(u.RealReg(), s.getVRegState(u.ID())) } currentBlkState.startFromPredIndex = 0 - } else if predState != nil { + } else { if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n", - bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex) + bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex) } s.resetAt(predState) } - s.regsInUse.range_(func(allocated RealReg, v VReg) { + s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.startRegs.add(allocated, v) }) if wazevoapi.RegAllocLoggingEnabled { @@ -641,7 +639,7 @@ func (a *Allocator) finalizeStartReg(blk Block) { } } -func (a *Allocator) allocBlock(f Function, blk Block) { +func (a *Allocator[I, B, F]) allocBlock(f F, blk B) { bID := blk.ID() s := &a.state currentBlkState := a.getOrAllocateBlockState(bID) @@ -652,36 +650,34 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } // Clears the previous state. - s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) { - s.setVRegState(vr, RealRegInvalid) - }) + s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { vr.r = RealRegInvalid }) s.regsInUse.reset() // Then set the start state. - currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) { - s.useRealReg(allocatedRealReg, vr) - }) + currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { s.useRealReg(allocatedRealReg, vr) }) - desiredUpdated := a.vs2[:0] + desiredUpdated := a.ss[:0] // Update the last use of each VReg. + a.copies = a.copies[:0] // Stores the copy instructions. var pc programCounter - for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { - var use, def VReg - for _, use = range instr.Uses(&a.vs) { + var nilInstr I + for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() { + var useState *vrState[I, B, F] + for _, use := range instr.Uses(&a.vs) { + useState = s.getVRegState(use.ID()) if !use.IsRealReg() { - s.getVRegState(use.ID()).lastUse = pc + useState.lastUse = pc } } if instr.IsCopy() { - def = instr.Defs(&a.vs)[0] + def := instr.Defs(&a.vs)[0] + a.copies = append(a.copies, _copy[I, B, F]{src: useState, dstID: def.ID()}) r := def.RealReg() if r != RealRegInvalid { - useID := use.ID() - vs := s.getVRegState(useID) - if !vs.isPhi { // TODO: no idea why do we need this. - vs.desiredLoc = newDesiredLocReg(r) - desiredUpdated = append(desiredUpdated, useID) + if !useState.isPhi { // TODO: no idea why do we need this. + useState.desiredLoc = newDesiredLocReg(r) + desiredUpdated = append(desiredUpdated, useState) } } } @@ -690,18 +686,18 @@ func (a *Allocator) allocBlock(f Function, blk Block) { // Mark all live-out values by checking live-in of the successors. // While doing so, we also update the desired register values. - var succ Block + var succ B + var nilBlk B for i, ns := 0, blk.Succs(); i < ns; i++ { - succ = blk.Succ(i) - if succ == nil { + succ = f.Succ(blk, i) + if succ == nilBlk { continue } succID := succ.ID() succState := a.getOrAllocateBlockState(succID) - for _, v := range succState.liveIns { - if s.phiBlk(v) != succ { - st := s.getVRegState(v) + for _, st := range succState.liveIns { + if st.phiBlk() != succ { st.lastUse = programCounterLiveOut } } @@ -710,43 +706,33 @@ func (a *Allocator) allocBlock(f Function, blk Block) { if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo)) } - succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) { - vs := s.getVRegState(vr.ID()) + succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B, F]) { vs.desiredLoc = newDesiredLocReg(allocatedRealReg) - desiredUpdated = append(desiredUpdated, vr.ID()) + desiredUpdated = append(desiredUpdated, vs) }) - for _, p := range succ.BlockParams(&a.vs) { + for _, p := range f.BlockParams(succ, &a.vs) { vs := s.getVRegState(p.ID()) if vs.desiredLoc.realReg() == RealRegInvalid { vs.desiredLoc = desiredLocStack - desiredUpdated = append(desiredUpdated, p.ID()) + desiredUpdated = append(desiredUpdated, vs) } } } } // Propagate the desired register values from the end of the block to the beginning. - for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { - if instr.IsCopy() { - def := instr.Defs(&a.vs)[0] - defState := s.getVRegState(def.ID()) - desired := defState.desiredLoc.realReg() - if desired == RealRegInvalid { - continue - } - - use := instr.Uses(&a.vs)[0] - useID := use.ID() - useState := s.getVRegState(useID) - if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified { - useState.desiredLoc = newDesiredLocReg(desired) - desiredUpdated = append(desiredUpdated, useID) - } + for _, instr := range a.copies { + defState := s.getVRegState(instr.dstID) + desired := defState.desiredLoc.realReg() + useState := instr.src + if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified { + useState.desiredLoc = newDesiredLocReg(desired) + desiredUpdated = append(desiredUpdated, useState) } } pc = 0 - for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { + for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() { if wazevoapi.RegAllocLoggingEnabled { fmt.Println(instr) } @@ -755,7 +741,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) { killSet := a.reals[:0] // Gather the set of registers that will be used in the current instruction. - for _, use := range instr.Uses(&a.vs) { + uses := instr.Uses(&a.vs) + for _, use := range uses { if use.IsRealReg() { r := use.RealReg() currentUsedSet = currentUsedSet.add(r) @@ -770,19 +757,19 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } } - for i, use := range instr.Uses(&a.vs) { + for i, use := range uses { if !use.IsRealReg() { vs := s.getVRegState(use.ID()) killed := vs.lastUse == pc r := vs.r if r == RealRegInvalid { - r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet, + r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet, // Prefer the desired register if it's available. vs.desiredLoc.realReg()) vs.recordReload(f, blk) f.ReloadRegisterBefore(use.SetRealReg(r), instr) - s.useRealReg(r, use) + s.useRealReg(r, vs) } if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r)) @@ -799,10 +786,9 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } isIndirect := instr.IsIndirectCall() - call := instr.IsCall() || isIndirect - if call { + if instr.IsCall() || isIndirect { addr := RealRegInvalid - if instr.IsIndirectCall() { + if isIndirect { addr = a.vs[0].RealReg() } a.releaseCallerSavedRegs(addr) @@ -814,8 +800,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) { a.reals = killSet defs := instr.Defs(&a.vs) - switch { - case len(defs) > 1: + switch len(defs) { + default: // Some instructions define multiple values on real registers. // E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx. // @@ -830,20 +816,21 @@ func (a *Allocator) allocBlock(f Function, blk Block) { if s.regsInUse.has(r) { s.releaseRealReg(r) } - s.useRealReg(r, def) + s.useRealReg(r, s.getVRegState(def.ID())) } - case len(defs) == 1: + case 0: + case 1: def := defs[0] + vState := s.getVRegState(def.ID()) if def.IsRealReg() { r := def.RealReg() if a.allocatableSet.has(r) { if s.regsInUse.has(r) { s.releaseRealReg(r) } - s.useRealReg(r, def) + s.useRealReg(r, vState) } } else { - vState := s.getVRegState(def.ID()) r := vState.r if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid { @@ -864,7 +851,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } r = desired s.releaseRealReg(r) - s.useRealReg(r, def) + s.useRealReg(r, vState) } } } @@ -880,9 +867,9 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } if r == RealRegInvalid { typ := def.RegType() - r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid) + r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid) } - s.useRealReg(r, def) + s.useRealReg(r, vState) } dr := def.SetRealReg(r) instr.AssignDef(dr) @@ -915,9 +902,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) { pc++ } - s.regsInUse.range_(func(allocated RealReg, v VReg) { - currentBlkState.endRegs.add(allocated, v) - }) + s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.endRegs.add(allocated, v) }) currentBlkState.visited = true if wazevoapi.RegAllocLoggingEnabled { @@ -925,32 +910,30 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } // Reset the desired end location. - for _, v := range desiredUpdated { - vs := s.getVRegState(v) + for _, vs := range desiredUpdated { vs.desiredLoc = desiredLocUnspecified } - a.vs2 = desiredUpdated[:0] + a.ss = desiredUpdated[:0] for i := 0; i < blk.Succs(); i++ { - succ := blk.Succ(i) - if succ == nil { + succ := f.Succ(blk, i) + if succ == nilBlk { continue } // If the successor is not visited yet, finalize the start state. - a.finalizeStartReg(succ) + a.finalizeStartReg(f, succ) } } -func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { +func (a *Allocator[I, B, F]) releaseCallerSavedRegs(addrReg RealReg) { s := &a.state - for i := 0; i < 64; i++ { - allocated := RealReg(i) + for allocated := RealReg(0); allocated < 64; allocated++ { if allocated == addrReg { // If this is the call indirect, we should not touch the addr register. continue } - if v := s.regsInUse.get(allocated); v.Valid() { - if v.IsRealReg() { + if vs := s.regsInUse.get(allocated); vs != nil { + if vs.v.IsRealReg() { continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg. } if !a.regInfo.CallerSavedRegisters.has(allocated) { @@ -962,7 +945,7 @@ func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { } } -func (a *Allocator) fixMergeState(f Function, blk Block) { +func (a *Allocator[I, B, F]) fixMergeState(f F, blk B) { preds := blk.Preds() if preds <= 1 { return @@ -974,11 +957,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { bID := blk.ID() blkSt := a.getOrAllocateBlockState(bID) desiredOccupants := &blkSt.startRegs - aliveOnRegVRegs := make(map[VReg]RealReg) - for i := 0; i < 64; i++ { - r := RealReg(i) - if v := blkSt.startRegs.get(r); v.Valid() { - aliveOnRegVRegs[v] = r + var desiredOccupantsSet RegSet + for i, v := range desiredOccupants { + if v != nil { + desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i)) } } @@ -987,151 +969,146 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { } s.currentBlockID = bID - a.updateLiveInVRState(a.getOrAllocateBlockState(bID)) + a.updateLiveInVRState(blkSt) - currentOccupants := &a.currentOccupants for i := 0; i < preds; i++ { - currentOccupants.reset() if i == blkSt.startFromPredIndex { continue } - currentOccupantsRev := make(map[VReg]RealReg) - pred := blk.Pred(i) + pred := f.Pred(blk, i) predSt := a.getOrAllocateBlockState(pred.ID()) - for ii := 0; ii < 64; ii++ { - r := RealReg(ii) - if v := predSt.endRegs.get(r); v.Valid() { - if _, ok := aliveOnRegVRegs[v]; !ok { - continue - } - currentOccupants.add(r, v) - currentOccupantsRev[v] = r - } - } s.resetAt(predSt) // Finds the free registers if any. intTmp, floatTmp := VRegInvalid, VRegInvalid if intFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set, + a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet, ); intFree != RealRegInvalid { intTmp = FromRealReg(intFree, RegTypeInt) } if floatFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set, + a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet, ); floatFree != RealRegInvalid { floatTmp = FromRealReg(floatFree, RegTypeFloat) } - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) - } - - for ii := 0; ii < 64; ii++ { - r := RealReg(ii) + for r := RealReg(0); r < 64; r++ { desiredVReg := desiredOccupants.get(r) - if !desiredVReg.Valid() { + if desiredVReg == nil { continue } - currentVReg := currentOccupants.get(r) - if desiredVReg.ID() == currentVReg.ID() { + currentVReg := s.regsInUse.get(r) + if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() { continue } - typ := desiredVReg.RegType() + typ := desiredVReg.v.RegType() var tmpRealReg VReg if typ == RegTypeInt { tmpRealReg = intTmp } else { tmpRealReg = floatTmp } - a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ) + a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ) } } } -func (a *Allocator) reconcileEdge(f Function, +// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`. +// +// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor. +// - desiredVReg is the desired VReg value that should be on the register `r`. +// - freeReg is the temporary register that can be used to swap the values, which may or may not be used. +// - typ is the register type of the `r`. +func (a *Allocator[I, B, F]) reconcileEdge(f F, r RealReg, - pred Block, - currentOccupants *regInUseSet, - currentOccupantsRev map[VReg]RealReg, - currentVReg, desiredVReg VReg, + pred B, + currentState, desiredState *vrState[I, B, F], freeReg VReg, typ RegType, ) { + desiredVReg := desiredState.v + currentVReg := VRegInvalid + if currentState != nil { + currentVReg = currentState.v + } + // There are four cases to consider: + // 1. currentVReg is valid, but desiredVReg is on the stack. + // 2. Both currentVReg and desiredVReg are valid. + // 3. Desired is on a different register than `r` and currentReg is not valid. + // 4. Desired is on the stack and currentReg is not valid. + s := &a.state if currentVReg.Valid() { - // Both are on reg. - er, ok := currentOccupantsRev[desiredVReg] - if !ok { + er := desiredState.r + if er == RealRegInvalid { + // Case 1: currentVReg is valid, but desiredVReg is on the stack. if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n", desiredVReg.ID(), a.regInfo.RealRegName(r), ) } - // This case is that the desired value is on the stack, but currentVReg is on the target register. - // We need to move the current value to the stack, and reload the desired value. + // We need to move the current value to the stack, and reload the desired value into the register. // TODO: we can do better here. f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion()) - delete(currentOccupantsRev, currentVReg) + s.releaseRealReg(r) - s.getVRegState(desiredVReg.ID()).recordReload(f, pred) + desiredState.recordReload(f, pred) f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) - currentOccupants.add(r, desiredVReg) - currentOccupantsRev[desiredVReg] = r + s.useRealReg(r, desiredState) return - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", - desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), + } else { + // Case 2: Both currentVReg and desiredVReg are valid. + if wazevoapi.RegAllocLoggingEnabled { + fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", + desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), + ) + } + // This case, we need to swap the values between the current and desired values. + f.SwapBefore( + currentVReg.SetRealReg(r), + desiredVReg.SetRealReg(er), + freeReg, + pred.LastInstrForInsertion(), ) - } - f.SwapBefore( - currentVReg.SetRealReg(r), - desiredVReg.SetRealReg(er), - freeReg, - pred.LastInstrForInsertion(), - ) - s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) - currentOccupantsRev[desiredVReg] = r - currentOccupantsRev[currentVReg] = er - currentOccupants.add(r, desiredVReg) - currentOccupants.add(er, currentVReg) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) + s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) + s.releaseRealReg(r) + s.releaseRealReg(er) + s.useRealReg(r, desiredState) + s.useRealReg(er, currentState) + if wazevoapi.RegAllocLoggingEnabled { + fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) + } } } else { - // Desired is on reg, but currently the target register is not used. if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is desired to be on %s, current not used\n", desiredVReg.ID(), a.regInfo.RealRegName(r), ) } - if currentReg, ok := currentOccupantsRev[desiredVReg]; ok { + if currentReg := desiredState.r; currentReg != RealRegInvalid { + // Case 3: Desired is on a different register than `r` and currentReg is not valid. + // We simply need to move the desired value to the register. f.InsertMoveBefore( FromRealReg(r, typ), desiredVReg.SetRealReg(currentReg), pred.LastInstrForInsertion(), ) - currentOccupants.remove(currentReg) + s.releaseRealReg(currentReg) } else { - s.getVRegState(desiredVReg.ID()).recordReload(f, pred) + // Case 4: Both currentVReg and desiredVReg are not valid. + // We simply need to reload the desired value into the register. + desiredState.recordReload(f, pred) f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) } - currentOccupantsRev[desiredVReg] = r - currentOccupants.add(r, desiredVReg) - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) + s.useRealReg(r, desiredState) } } -func (a *Allocator) scheduleSpills(f Function) { +func (a *Allocator[I, B, F]) scheduleSpills(f F) { states := a.state.vrStates for i := 0; i <= states.MaxIDEncountered(); i++ { vs := states.Get(i) @@ -1144,7 +1121,7 @@ func (a *Allocator) scheduleSpills(f Function) { } } -func (a *Allocator) scheduleSpill(f Function, vs *vrState) { +func (a *Allocator[I, B, F]) scheduleSpill(f F, vs *vrState[I, B, F]) { v := vs.v // If the value is the phi value, we need to insert a spill after each phi definition. if vs.isPhi { @@ -1157,10 +1134,11 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) { pos := vs.lca definingBlk := vs.defBlk r := RealRegInvalid - if definingBlk == nil { + var nilBlk B + if definingBlk == nilBlk { panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String())) } - if pos == nil { + if pos == nilBlk { panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String())) } @@ -1169,9 +1147,8 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) { } for pos != definingBlk { st := a.getOrAllocateBlockState(pos.ID()) - for ii := 0; ii < 64; ii++ { - rr := RealReg(ii) - if st.startRegs.get(rr) == v { + for rr := RealReg(0); rr < 64; rr++ { + if vs := st.startRegs.get(rr); vs != nil && vs.v == v { r = rr // Already in the register, so we can place the spill at the beginning of the block. break @@ -1204,7 +1181,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) { } // Reset resets the allocator's internal state so that it can be reused. -func (a *Allocator) Reset() { +func (a *Allocator[I, B, F]) Reset() { a.state.reset() a.blockStates.Reset() a.phiDefInstListPool.Reset() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go index e9bf60661c..ce84c9c0cd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go @@ -46,63 +46,51 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) { } } -type regInUseSet struct { - set RegSet - vrs [64]VReg +type regInUseSet[I Instr, B Block[I], F Function[I, B]] [64]*vrState[I, B, F] + +func newRegInUseSet[I Instr, B Block[I], F Function[I, B]]() regInUseSet[I, B, F] { + var ret regInUseSet[I, B, F] + ret.reset() + return ret } -func (rs *regInUseSet) reset() { - rs.set = 0 - for i := range rs.vrs { - rs.vrs[i] = VRegInvalid - } +func (rs *regInUseSet[I, B, F]) reset() { + clear(rs[:]) } -func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused +func (rs *regInUseSet[I, B, F]) format(info *RegisterInfo) string { //nolint:unused var ret []string - for i := 0; i < 64; i++ { - if rs.set&(1<v%d)", info.RealRegName(RealReg(i)), vr.ID())) + for i, vr := range rs { + if vr != nil { + ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.v.ID())) } } return strings.Join(ret, ", ") } -func (rs *regInUseSet) has(r RealReg) bool { - if r >= 64 { - return false - } - return rs.set&(1<= 64 { - return VRegInvalid - } - return rs.vrs[r] +func (rs *regInUseSet[I, B, F]) get(r RealReg) *vrState[I, B, F] { + return rs[r] } -func (rs *regInUseSet) remove(r RealReg) { - if r >= 64 { - return - } - rs.set &= ^(1 << uint(r)) - rs.vrs[r] = VRegInvalid +func (rs *regInUseSet[I, B, F]) remove(r RealReg) { + rs[r] = nil } -func (rs *regInUseSet) add(r RealReg, vr VReg) { +func (rs *regInUseSet[I, B, F]) add(r RealReg, vr *vrState[I, B, F]) { if r >= 64 { return } - rs.set |= 1 << uint(r) - rs.vrs[r] = vr + rs[r] = vr } -func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) { - for i := 0; i < 64; i++ { - if rs.set&(1< 0 { + //nolint:staticcheck sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&listeners)) binary.LittleEndian.PutUint64(ret[8:], uint64(sliceHeader.Data)) binary.LittleEndian.PutUint64(ret[16:], uint64(sliceHeader.Len)) @@ -33,6 +34,7 @@ func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionList func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module { var opaqueViewOverSlice []byte + //nolint:staticcheck sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice)) sh.Data = opaqueBegin sh.Len = 32 @@ -42,6 +44,7 @@ func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module { func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.FunctionListener { var opaqueViewOverSlice []byte + //nolint:staticcheck sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice)) sh.Data = opaqueBegin sh.Len = 32 @@ -51,9 +54,11 @@ func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.Func l := binary.LittleEndian.Uint64(opaqueViewOverSlice[16:]) c := binary.LittleEndian.Uint64(opaqueViewOverSlice[24:]) var ret []experimental.FunctionListener + //nolint:staticcheck sh = (*reflect.SliceHeader)(unsafe.Pointer(&ret)) sh.Data = uintptr(b) - setSliceLimits(sh, uintptr(l), uintptr(c)) + sh.Len = int(l) + sh.Cap = int(c) return ret } @@ -62,6 +67,7 @@ func hostModuleGoFuncFromOpaque[T any](index int, opaqueBegin uintptr) T { ptr := opaqueBegin + offset var opaqueViewOverFunction []byte + //nolint:staticcheck sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverFunction)) sh.Data = ptr sh.Len = 16 diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go index ba8f546c0d..efa1b9bbaa 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go @@ -86,16 +86,6 @@ func newAlignedOpaque(size int) moduleContextOpaque { return buf } -func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) { - s := uint64(len(mem.Buffer)) - var b uint64 - if len(mem.Buffer) > 0 { - b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) - } - binary.LittleEndian.PutUint64(opaque[offset:], b) - binary.LittleEndian.PutUint64(opaque[offset+8:], s) -} - func (m *moduleEngine) setupOpaque() { inst := m.module offsets := &m.parent.offsets @@ -106,7 +96,7 @@ func (m *moduleEngine) setupOpaque() { ) if lm := offsets.LocalMemoryBegin; lm >= 0 { - putLocalMemory(opaque, lm, inst.MemoryInstance) + m.putLocalMemory() } // Note: imported memory is resolved in ResolveImportedFunction. @@ -227,6 +217,25 @@ func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) { // OwnsGlobals implements the same method as documented on wasm.ModuleEngine. func (m *moduleEngine) OwnsGlobals() bool { return true } +// MemoryGrown implements wasm.ModuleEngine. +func (m *moduleEngine) MemoryGrown() { + m.putLocalMemory() +} + +// putLocalMemory writes the local memory buffer pointer and length to the opaque buffer. +func (m *moduleEngine) putLocalMemory() { + mem := m.module.MemoryInstance + offset := m.parent.offsets.LocalMemoryBegin + + s := uint64(len(mem.Buffer)) + var b uint64 + if len(mem.Buffer) > 0 { + b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) + } + binary.LittleEndian.PutUint64(m.opaque[offset:], b) + binary.LittleEndian.PutUint64(m.opaque[offset+8:], s) +} + // ResolveImportedFunction implements wasm.ModuleEngine. func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) { executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go deleted file mode 100644 index 6a03fc65c7..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !tinygo - -package wazevo - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) { - s.Len = int(l) - s.Cap = int(c) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go deleted file mode 100644 index eda3e706ac..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build tinygo - -package wazevo - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) { - s.Len = l - s.Cap = c -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go index 10b6b4b62b..cf7f14d3b1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go @@ -34,9 +34,6 @@ type BasicBlock interface { // The returned Value is the definition of the param in this block. Param(i int) Value - // InsertInstruction inserts an instruction that implements Value into the tail of this block. - InsertInstruction(raw *Instruction) - // Root returns the root instruction of this block. Root() *Instruction @@ -49,21 +46,12 @@ type BasicBlock interface { // ReturnBlock returns ture if this block represents the function return. ReturnBlock() bool - // FormatHeader returns the debug string of this block, not including instruction. - FormatHeader(b Builder) string - // Valid is true if this block is still valid even after optimizations. Valid() bool // Sealed is true if this block has been sealed. Sealed() bool - // BeginPredIterator returns the first predecessor of this block. - BeginPredIterator() BasicBlock - - // NextPredIterator returns the next predecessor of this block. - NextPredIterator() BasicBlock - // Preds returns the number of predecessors of this block. Preds() int @@ -88,10 +76,11 @@ type ( basicBlock struct { id BasicBlockID rootInstr, currentInstr *Instruction - params []blockParam - predIter int - preds []basicBlockPredecessorInfo - success []*basicBlock + // params are Values that represent parameters to a basicBlock. + // Each parameter can be considered as an output of PHI instruction in traditional SSA. + params Values + preds []basicBlockPredecessorInfo + success []*basicBlock // singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called. singlePred *basicBlock // lastDefinitions maps Variable to its last definition in this block. @@ -116,11 +105,14 @@ type ( // loopNestingForestChildren holds the children of this block in the loop nesting forest. // Non-empty if and only if this block is a loop header (i.e. loopHeader=true) - loopNestingForestChildren []BasicBlock + loopNestingForestChildren wazevoapi.VarLength[BasicBlock] // reversePostOrder is used to sort all the blocks in the function in reverse post order. // This is used in builder.LayoutBlocks. - reversePostOrder int + reversePostOrder int32 + + // visited is used during various traversals. + visited int32 // child and sibling are the ones in the dominator tree. child, sibling *basicBlock @@ -128,15 +120,6 @@ type ( // BasicBlockID is the unique ID of a basicBlock. BasicBlockID uint32 - // blockParam implements Value and represents a parameter to a basicBlock. - blockParam struct { - // value is the Value that corresponds to the parameter in this block, - // and can be considered as an output of PHI instruction in traditional SSA. - value Value - // typ is the type of the parameter. - typ Type - } - unknownValue struct { // variable is the variable that this unknownValue represents. variable Variable @@ -145,6 +128,9 @@ type ( } ) +// basicBlockVarLengthNil is the default nil value for basicBlock.loopNestingForestChildren. +var basicBlockVarLengthNil = wazevoapi.NewNilVarLength[BasicBlock]() + const basicBlockIDReturnBlock = 0xffffffff // Name implements BasicBlock.Name. @@ -190,24 +176,23 @@ func (bb *basicBlock) ReturnBlock() bool { // AddParam implements BasicBlock.AddParam. func (bb *basicBlock) AddParam(b Builder, typ Type) Value { paramValue := b.allocateValue(typ) - bb.params = append(bb.params, blockParam{typ: typ, value: paramValue}) + bb.params = bb.params.Append(&b.(*builder).varLengthPool, paramValue) return paramValue } // addParamOn adds a parameter to this block whose value is already allocated. -func (bb *basicBlock) addParamOn(typ Type, value Value) { - bb.params = append(bb.params, blockParam{typ: typ, value: value}) +func (bb *basicBlock) addParamOn(b *builder, value Value) { + bb.params = bb.params.Append(&b.varLengthPool, value) } // Params implements BasicBlock.Params. func (bb *basicBlock) Params() int { - return len(bb.params) + return len(bb.params.View()) } // Param implements BasicBlock.Param. func (bb *basicBlock) Param(i int) Value { - p := &bb.params[i] - return p.value + return bb.params.View()[i] } // Valid implements BasicBlock.Valid. @@ -220,8 +205,8 @@ func (bb *basicBlock) Sealed() bool { return bb.sealed } -// InsertInstruction implements BasicBlock.InsertInstruction. -func (bb *basicBlock) InsertInstruction(next *Instruction) { +// insertInstruction implements BasicBlock.InsertInstruction. +func (bb *basicBlock) insertInstruction(b *builder, next *Instruction) { current := bb.currentInstr if current != nil { current.next = next @@ -233,12 +218,12 @@ func (bb *basicBlock) InsertInstruction(next *Instruction) { switch next.opcode { case OpcodeJump, OpcodeBrz, OpcodeBrnz: - target := next.blk.(*basicBlock) - target.addPred(bb, next) + target := BasicBlockID(next.rValue) + b.basicBlock(target).addPred(bb, next) case OpcodeBrTable: - for _, _target := range next.targets { - target := _target.(*basicBlock) - target.addPred(bb, next) + for _, _target := range next.rValues.View() { + target := BasicBlockID(_target) + b.basicBlock(target).addPred(bb, next) } } } @@ -248,22 +233,6 @@ func (bb *basicBlock) NumPreds() int { return len(bb.preds) } -// BeginPredIterator implements BasicBlock.BeginPredIterator. -func (bb *basicBlock) BeginPredIterator() BasicBlock { - bb.predIter = 0 - return bb.NextPredIterator() -} - -// NextPredIterator implements BasicBlock.NextPredIterator. -func (bb *basicBlock) NextPredIterator() BasicBlock { - if bb.predIter >= len(bb.preds) { - return nil - } - pred := bb.preds[bb.predIter].blk - bb.predIter++ - return pred -} - // Preds implements BasicBlock.Preds. func (bb *basicBlock) Preds() int { return len(bb.preds) @@ -296,7 +265,7 @@ func (bb *basicBlock) Tail() *Instruction { // reset resets the basicBlock to its initial state so that it can be reused for another function. func resetBasicBlock(bb *basicBlock) { - bb.params = bb.params[:0] + bb.params = ValuesNil bb.rootInstr, bb.currentInstr = nil, nil bb.preds = bb.preds[:0] bb.success = bb.success[:0] @@ -305,7 +274,8 @@ func resetBasicBlock(bb *basicBlock) { bb.unknownValues = bb.unknownValues[:0] bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions) bb.reversePostOrder = -1 - bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0] + bb.visited = 0 + bb.loopNestingForestChildren = basicBlockVarLengthNil bb.loopHeader = false bb.sibling = nil bb.child = nil @@ -335,11 +305,11 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) { pred.success = append(pred.success, bb) } -// FormatHeader implements BasicBlock.FormatHeader. -func (bb *basicBlock) FormatHeader(b Builder) string { - ps := make([]string, len(bb.params)) - for i, p := range bb.params { - ps[i] = p.value.formatWithType(b) +// formatHeader returns the string representation of the header of the basicBlock. +func (bb *basicBlock) formatHeader(b Builder) string { + ps := make([]string, len(bb.params.View())) + for i, p := range bb.params.View() { + ps[i] = p.formatWithType(b) } if len(bb.preds) > 0 { @@ -366,7 +336,9 @@ func (bb *basicBlock) validate(b *builder) { if len(bb.preds) > 0 { for _, pred := range bb.preds { if pred.branch.opcode != OpcodeBrTable { - if target := pred.branch.blk; target != bb { + blockID := int(pred.branch.rValue) + target := b.basicBlocksPool.View(blockID) + if target != bb { panic(fmt.Sprintf("BUG: '%s' is not branch to %s, but to %s", pred.branch.Format(b), bb.Name(), target.Name())) } @@ -376,14 +348,14 @@ func (bb *basicBlock) validate(b *builder) { if bb.ReturnBlock() { exp = len(b.currentSignature.Results) } else { - exp = len(bb.params) + exp = len(bb.params.View()) } if len(pred.branch.vs.View()) != exp { panic(fmt.Sprintf( "BUG: len(argument at %s) != len(params at %s): %d != %d: %s", pred.blk.Name(), bb.Name(), - len(pred.branch.vs.View()), len(bb.params), pred.branch.Format(b), + len(pred.branch.vs.View()), len(bb.params.View()), pred.branch.Format(b), )) } @@ -398,7 +370,7 @@ func (bb *basicBlock) String() string { // LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren. func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock { - return bb.loopNestingForestChildren + return bb.loopNestingForestChildren.View() } // LoopHeader implements BasicBlock.LoopHeader. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go index e1471edc37..fb98298f7f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go @@ -1,5 +1,3 @@ -//go:build go1.21 - package ssa import ( diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go deleted file mode 100644 index 9dc881dae7..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build !go1.21 - -// TODO: delete after the floor Go version is 1.21 - -package ssa - -import "sort" - -func sortBlocks(blocks []*basicBlock) { - sort.SliceStable(blocks, func(i, j int) bool { - iBlk, jBlk := blocks[i], blocks[j] - if jBlk.ReturnBlock() { - return true - } - if iBlk.ReturnBlock() { - return false - } - iRoot, jRoot := iBlk.rootInstr, jBlk.rootInstr - if iRoot == nil || jRoot == nil { // For testing. - return true - } - return iBlk.rootInstr.id < jBlk.rootInstr.id - }) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go index 1fc84d2eaf..43dd7d2928 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go @@ -54,9 +54,6 @@ type Builder interface { // MustFindValue searches the latest definition of the given Variable and returns the result. MustFindValue(variable Variable) Value - // MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock. - MustFindValueInBlk(variable Variable, blk BasicBlock) Value - // FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock. // If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid. FindValueInLinearPath(variable Variable) Value @@ -97,9 +94,9 @@ type Builder interface { // Returns nil if there's no unseen BasicBlock. BlockIteratorNext() BasicBlock - // ValueRefCounts returns the map of ValueID to its reference count. - // The returned slice must not be modified. - ValueRefCounts() []int + // ValuesInfo returns the data per Value used to lower the SSA in backend. + // This is indexed by ValueID. + ValuesInfo() []ValueInfo // BlockIteratorReversePostOrderBegin is almost the same as BlockIteratorBegin except it returns the BasicBlock in the reverse post-order. // This is available after RunPasses is run. @@ -127,21 +124,29 @@ type Builder interface { // Idom returns the immediate dominator of the given BasicBlock. Idom(blk BasicBlock) BasicBlock + // VarLengthPool returns the VarLengthPool of Value. VarLengthPool() *wazevoapi.VarLengthPool[Value] + + // InsertZeroValue inserts a zero value constant instruction of the given type. + InsertZeroValue(t Type) + + // BasicBlock returns the BasicBlock of the given ID. + BasicBlock(id BasicBlockID) BasicBlock + + // InstructionOfValue returns the Instruction that produces the given Value or nil if the Value is not produced by any Instruction. + InstructionOfValue(v Value) *Instruction } // NewBuilder returns a new Builder implementation. func NewBuilder() Builder { return &builder{ - instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction), - basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock), - varLengthPool: wazevoapi.NewVarLengthPool[Value](), - valueAnnotations: make(map[ValueID]string), - signatures: make(map[SignatureID]*Signature), - blkVisited: make(map[*basicBlock]int), - valueIDAliases: make(map[ValueID]Value), - redundantParameterIndexToValue: make(map[int]Value), - returnBlk: &basicBlock{id: basicBlockIDReturnBlock}, + instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction), + basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock), + varLengthBasicBlockPool: wazevoapi.NewVarLengthPool[BasicBlock](), + varLengthPool: wazevoapi.NewVarLengthPool[Value](), + valueAnnotations: make(map[ValueID]string), + signatures: make(map[SignatureID]*Signature), + returnBlk: &basicBlock{id: basicBlockIDReturnBlock}, } } @@ -158,36 +163,32 @@ type builder struct { currentBB *basicBlock returnBlk *basicBlock - // variables track the types for Variable with the index regarded Variable. - variables []Type // nextValueID is used by builder.AllocateValue. nextValueID ValueID // nextVariable is used by builder.AllocateVariable. nextVariable Variable - valueIDAliases map[ValueID]Value + // valueAnnotations contains the annotations for each Value, only used for debugging. valueAnnotations map[ValueID]string - // valueRefCounts is used to lower the SSA in backend, and will be calculated - // by the last SSA-level optimization pass. - valueRefCounts []int + // valuesInfo contains the data per Value used to lower the SSA in backend. This is indexed by ValueID. + valuesInfo []ValueInfo // dominators stores the immediate dominator of each BasicBlock. // The index is blockID of the BasicBlock. dominators []*basicBlock sparseTree dominatorSparseTree + varLengthBasicBlockPool wazevoapi.VarLengthPool[BasicBlock] + // loopNestingForestRoots are the roots of the loop nesting forest. loopNestingForestRoots []BasicBlock // The followings are used for optimization passes/deterministic compilation. - instStack []*Instruction - blkVisited map[*basicBlock]int - valueIDToInstruction []*Instruction - blkStack []*basicBlock - blkStack2 []*basicBlock - ints []int - redundantParameterIndexToValue map[int]Value + instStack []*Instruction + blkStack []*basicBlock + blkStack2 []*basicBlock + redundantParams []redundantParam // blockIterCur is used to implement blockIteratorBegin and blockIteratorNext. blockIterCur int @@ -200,6 +201,60 @@ type builder struct { donePostBlockLayoutPasses bool currentSourceOffset SourceOffset + + // zeros are the zero value constants for each type. + zeros [typeEnd]Value +} + +// ValueInfo contains the data per Value used to lower the SSA in backend. +type ValueInfo struct { + // RefCount is the reference count of the Value. + RefCount uint32 + alias Value +} + +// redundantParam is a pair of the index of the redundant parameter and the Value. +// This is used to eliminate the redundant parameters in the optimization pass. +type redundantParam struct { + // index is the index of the redundant parameter in the basicBlock. + index int + // uniqueValue is the Value which is passed to the redundant parameter. + uniqueValue Value +} + +// BasicBlock implements Builder.BasicBlock. +func (b *builder) BasicBlock(id BasicBlockID) BasicBlock { + return b.basicBlock(id) +} + +func (b *builder) basicBlock(id BasicBlockID) *basicBlock { + if id == basicBlockIDReturnBlock { + return b.returnBlk + } + return b.basicBlocksPool.View(int(id)) +} + +// InsertZeroValue implements Builder.InsertZeroValue. +func (b *builder) InsertZeroValue(t Type) { + if b.zeros[t].Valid() { + return + } + zeroInst := b.AllocateInstruction() + switch t { + case TypeI32: + zeroInst.AsIconst32(0) + case TypeI64: + zeroInst.AsIconst64(0) + case TypeF32: + zeroInst.AsF32const(0) + case TypeF64: + zeroInst.AsF64const(0) + case TypeV128: + zeroInst.AsVconst(0, 0) + default: + panic("TODO: " + t.String()) + } + b.zeros[t] = zeroInst.Insert(b).Return() } func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] { @@ -215,10 +270,12 @@ func (b *builder) ReturnBlock() BasicBlock { func (b *builder) Init(s *Signature) { b.nextVariable = 0 b.currentSignature = s + b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid} resetBasicBlock(b.returnBlk) b.instructionsPool.Reset() b.basicBlocksPool.Reset() b.varLengthPool.Reset() + b.varLengthBasicBlockPool.Reset() b.donePreBlockLayoutPasses = false b.doneBlockLayout = false b.donePostBlockLayoutPasses = false @@ -226,31 +283,20 @@ func (b *builder) Init(s *Signature) { sig.used = false } - b.ints = b.ints[:0] + b.redundantParams = b.redundantParams[:0] b.blkStack = b.blkStack[:0] b.blkStack2 = b.blkStack2[:0] b.dominators = b.dominators[:0] b.loopNestingForestRoots = b.loopNestingForestRoots[:0] - - for i := 0; i < b.basicBlocksPool.Allocated(); i++ { - blk := b.basicBlocksPool.View(i) - delete(b.blkVisited, blk) - } b.basicBlocksPool.Reset() for v := ValueID(0); v < b.nextValueID; v++ { delete(b.valueAnnotations, v) - delete(b.valueIDAliases, v) - b.valueRefCounts[v] = 0 - b.valueIDToInstruction[v] = nil + b.valuesInfo[v] = ValueInfo{alias: ValueInvalid} } b.nextValueID = 0 b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0] b.doneBlockLayout = false - for i := range b.valueRefCounts { - b.valueRefCounts[i] = 0 - } - b.currentSourceOffset = sourceOffsetUnknown } @@ -330,7 +376,7 @@ func (b *builder) Idom(blk BasicBlock) BasicBlock { // InsertInstruction implements Builder.InsertInstruction. func (b *builder) InsertInstruction(instr *Instruction) { - b.currentBB.InsertInstruction(instr) + b.currentBB.insertInstruction(b, instr) if l := b.currentSourceOffset; l.Valid() { // Emit the source offset info only when the instruction has side effect because @@ -352,7 +398,7 @@ func (b *builder) InsertInstruction(instr *Instruction) { } r1 := b.allocateValue(t1) - instr.rValue = r1 + instr.rValue = r1.setInstructionID(instr.id) tsl := len(ts) if tsl == 0 { @@ -361,20 +407,14 @@ func (b *builder) InsertInstruction(instr *Instruction) { rValues := b.varLengthPool.Allocate(tsl) for i := 0; i < tsl; i++ { - rValues = rValues.Append(&b.varLengthPool, b.allocateValue(ts[i])) + rn := b.allocateValue(ts[i]) + rValues = rValues.Append(&b.varLengthPool, rn.setInstructionID(instr.id)) } instr.rValues = rValues } // DefineVariable implements Builder.DefineVariable. func (b *builder) DefineVariable(variable Variable, value Value, block BasicBlock) { - if b.variables[variable].invalid() { - panic("BUG: trying to define variable " + variable.String() + " but is not declared yet") - } - - if b.variables[variable] != value.Type() { - panic(fmt.Sprintf("BUG: inconsistent type for variable %d: expected %s but got %s", variable, b.variables[variable], value.Type())) - } bb := block.(*basicBlock) bb.lastDefinitions[variable] = value } @@ -401,20 +441,9 @@ func (b *builder) EntryBlock() BasicBlock { // DeclareVariable implements Builder.DeclareVariable. func (b *builder) DeclareVariable(typ Type) Variable { - v := b.allocateVariable() - iv := int(v) - if l := len(b.variables); l <= iv { - b.variables = append(b.variables, make([]Type, 2*(l+1))...) - } - b.variables[v] = typ - return v -} - -// allocateVariable allocates a new variable. -func (b *builder) allocateVariable() (ret Variable) { - ret = b.nextVariable + v := b.nextVariable b.nextVariable++ - return + return v.setType(typ) } // allocateValue implements Builder.AllocateValue. @@ -448,15 +477,9 @@ func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Valu return ValueInvalid } -func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value { - typ := b.definedVariableType(variable) - return b.findValue(typ, variable, blk.(*basicBlock)) -} - // MustFindValue implements Builder.MustFindValue. func (b *builder) MustFindValue(variable Variable) Value { - typ := b.definedVariableType(variable) - return b.findValue(typ, variable, b.currentBB) + return b.findValue(variable.getType(), variable, b.currentBB) } // findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in @@ -482,6 +505,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value value: value, }) return value + } else if blk.EntryBlock() { + // If this is the entry block, we reach the uninitialized variable which has zero value. + return b.zeros[variable.getType()] } if pred := blk.singlePred; pred != nil { @@ -495,21 +521,41 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value // If this block has multiple predecessors, we have to gather the definitions, // and treat them as an argument to this block. // - // The first thing is to define a new parameter to this block which may or may not be redundant, but - // later we eliminate trivial params in an optimization pass. This must be done before finding the - // definitions in the predecessors so that we can break the cycle. - paramValue := blk.AddParam(b, typ) - b.DefineVariable(variable, paramValue, blk) - - // After the new param is added, we have to manipulate the original branching instructions - // in predecessors so that they would pass the definition of `variable` as the argument to - // the newly added PHI. + // But before that, we have to check if the possible definitions are the same Value. + tmpValue := b.allocateValue(typ) + // Break the cycle by defining the variable with the tmpValue. + b.DefineVariable(variable, tmpValue, blk) + // Check all the predecessors if they have the same definition. + uniqueValue := ValueInvalid for i := range blk.preds { - pred := &blk.preds[i] - value := b.findValue(typ, variable, pred.blk) - pred.branch.addArgumentBranchInst(b, value) + predValue := b.findValue(typ, variable, blk.preds[i].blk) + if uniqueValue == ValueInvalid { + uniqueValue = predValue + } else if uniqueValue != predValue { + uniqueValue = ValueInvalid + break + } + } + + if uniqueValue != ValueInvalid { + // If all the predecessors have the same definition, we can use that value. + b.alias(tmpValue, uniqueValue) + return uniqueValue + } else { + // Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but + // later we eliminate trivial params in an optimization pass. This must be done before finding the + // definitions in the predecessors so that we can break the cycle. + blk.addParamOn(b, tmpValue) + // After the new param is added, we have to manipulate the original branching instructions + // in predecessors so that they would pass the definition of `variable` as the argument to + // the newly added PHI. + for i := range blk.preds { + pred := &blk.preds[i] + value := b.findValue(typ, variable, pred.blk) + pred.branch.addArgumentBranchInst(b, value) + } + return tmpValue } - return paramValue } // Seal implements Builder.Seal. @@ -522,8 +568,8 @@ func (b *builder) Seal(raw BasicBlock) { for _, v := range blk.unknownValues { variable, phiValue := v.variable, v.value - typ := b.definedVariableType(variable) - blk.addParamOn(typ, phiValue) + typ := variable.getType() + blk.addParamOn(b, phiValue) for i := range blk.preds { pred := &blk.preds[i] predValue := b.findValue(typ, variable, pred.blk) @@ -535,15 +581,6 @@ func (b *builder) Seal(raw BasicBlock) { } } -// definedVariableType returns the type of the given variable. If the variable is not defined yet, it panics. -func (b *builder) definedVariableType(variable Variable) Type { - typ := b.variables[variable] - if typ.invalid() { - panic(fmt.Sprintf("%s is not defined yet", variable)) - } - return typ -} - // Format implements Builder.Format. func (b *builder) Format() string { str := strings.Builder{} @@ -566,7 +603,7 @@ func (b *builder) Format() string { } for bb := iterBegin(); bb != nil; bb = iterNext() { str.WriteByte('\n') - str.WriteString(bb.FormatHeader(b)) + str.WriteString(bb.formatHeader(b)) str.WriteByte('\n') for cur := bb.Root(); cur != nil; cur = cur.Next() { @@ -645,15 +682,24 @@ func (b *builder) blockIteratorReversePostOrderNext() *basicBlock { } } -// ValueRefCounts implements Builder.ValueRefCounts. -func (b *builder) ValueRefCounts() []int { - return b.valueRefCounts +// ValuesInfo implements Builder.ValuesInfo. +func (b *builder) ValuesInfo() []ValueInfo { + return b.valuesInfo } // alias records the alias of the given values. The alias(es) will be // eliminated in the optimization pass via resolveArgumentAlias. func (b *builder) alias(dst, src Value) { - b.valueIDAliases[dst.ID()] = src + did := int(dst.ID()) + if did >= len(b.valuesInfo) { + l := did + 1 - len(b.valuesInfo) + b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...) + view := b.valuesInfo[len(b.valuesInfo)-l:] + for i := range view { + view[i].alias = ValueInvalid + } + } + b.valuesInfo[did].alias = src } // resolveArgumentAlias resolves the alias of the arguments of the given instruction. @@ -678,10 +724,13 @@ func (b *builder) resolveArgumentAlias(instr *Instruction) { // resolveAlias resolves the alias of the given value. func (b *builder) resolveAlias(v Value) Value { + info := b.valuesInfo + l := ValueID(len(info)) // Some aliases are chained, so we need to resolve them recursively. for { - if src, ok := b.valueIDAliases[v.ID()]; ok { - v = src + vid := v.ID() + if vid < l && info[vid].alias.Valid() { + v = info[vid].alias } else { break } @@ -729,3 +778,13 @@ func (b *builder) LoopNestingForestRoots() []BasicBlock { func (b *builder) LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock { return b.sparseTree.findLCA(blk1.ID(), blk2.ID()) } + +// InstructionOfValue returns the instruction that produces the given Value, or nil +// if the Value is not produced by any instruction. +func (b *builder) InstructionOfValue(v Value) *Instruction { + instrID := v.instructionID() + if instrID <= 0 { + return nil + } + return b.instructionsPool.View(instrID - 1) +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go index 3e3482efc4..9a3d1da6e9 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go @@ -25,11 +25,13 @@ type Instruction struct { v3 Value vs Values typ Type - blk BasicBlock - targets []BasicBlock prev, next *Instruction - rValue Value + // rValue is the (first) return value of this instruction. + // For branching instructions except for OpcodeBrTable, they hold BlockID to jump cast to Value. + rValue Value + // rValues are the rest of the return values of this instruction. + // For OpcodeBrTable, it holds the list of BlockID to jump cast to Value. rValues Values gid InstructionGroupID sourceOffset SourceOffset @@ -105,6 +107,9 @@ type InstructionGroupID uint32 // Returns Value(s) produced by this instruction if any. // The `first` is the first return value, and `rest` is the rest of the values. func (i *Instruction) Returns() (first Value, rest []Value) { + if i.IsBranching() { + return ValueInvalid, nil + } return i.rValue, i.rValues.View() } @@ -2077,7 +2082,7 @@ func (i *Instruction) InvertBrx() { } // BranchData returns the branch data for this instruction necessary for backends. -func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target BasicBlock) { +func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target BasicBlockID) { switch i.opcode { case OpcodeJump: condVal = ValueInvalid @@ -2087,17 +2092,17 @@ func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target Bas panic("BUG") } blockArgs = i.vs.View() - target = i.blk + target = BasicBlockID(i.rValue) return } // BrTableData returns the branch table data for this instruction necessary for backends. -func (i *Instruction) BrTableData() (index Value, targets []BasicBlock) { +func (i *Instruction) BrTableData() (index Value, targets Values) { if i.opcode != OpcodeBrTable { panic("BUG: BrTableData only available for OpcodeBrTable") } index = i.v - targets = i.targets + targets = i.rValues return } @@ -2105,7 +2110,7 @@ func (i *Instruction) BrTableData() (index Value, targets []BasicBlock) { func (i *Instruction) AsJump(vs Values, target BasicBlock) *Instruction { i.opcode = OpcodeJump i.vs = vs - i.blk = target + i.rValue = Value(target.ID()) return i } @@ -2130,7 +2135,7 @@ func (i *Instruction) AsBrz(v Value, args Values, target BasicBlock) { i.opcode = OpcodeBrz i.v = v i.vs = args - i.blk = target + i.rValue = Value(target.ID()) } // AsBrnz initializes this instruction as a branch-if-not-zero instruction with OpcodeBrnz. @@ -2138,15 +2143,16 @@ func (i *Instruction) AsBrnz(v Value, args Values, target BasicBlock) *Instructi i.opcode = OpcodeBrnz i.v = v i.vs = args - i.blk = target + i.rValue = Value(target.ID()) return i } // AsBrTable initializes this instruction as a branch-table instruction with OpcodeBrTable. -func (i *Instruction) AsBrTable(index Value, targets []BasicBlock) { +// targets is a list of basic block IDs cast to Values. +func (i *Instruction) AsBrTable(index Value, targets Values) { i.opcode = OpcodeBrTable i.v = index - i.targets = targets + i.rValues = targets } // AsCall initializes this instruction as a call instruction with OpcodeCall. @@ -2531,7 +2537,8 @@ func (i *Instruction) Format(b Builder) string { if i.IsFallthroughJump() { vs[0] = " fallthrough" } else { - vs[0] = " " + i.blk.(*basicBlock).Name() + blockId := BasicBlockID(i.rValue) + vs[0] = " " + b.BasicBlock(blockId).Name() } for idx := range view { vs[idx+1] = view[idx].Format(b) @@ -2542,7 +2549,8 @@ func (i *Instruction) Format(b Builder) string { view := i.vs.View() vs := make([]string, len(view)+2) vs[0] = " " + i.v.Format(b) - vs[1] = i.blk.(*basicBlock).Name() + blockId := BasicBlockID(i.rValue) + vs[1] = b.BasicBlock(blockId).Name() for idx := range view { vs[idx+2] = view[idx].Format(b) } @@ -2551,8 +2559,8 @@ func (i *Instruction) Format(b Builder) string { // `BrTable index, [label1, label2, ... labelN]` instSuffix = fmt.Sprintf(" %s", i.v.Format(b)) instSuffix += ", [" - for i, target := range i.targets { - blk := target.(*basicBlock) + for i, target := range i.rValues.View() { + blk := b.BasicBlock(BasicBlockID(target)) if i == 0 { instSuffix += blk.Name() } else { @@ -2621,11 +2629,12 @@ func (i *Instruction) Format(b Builder) string { instr := i.opcode.String() + instSuffix var rvs []string - if rv := i.rValue; rv.Valid() { - rvs = append(rvs, rv.formatWithType(b)) + r1, rs := i.Returns() + if r1.Valid() { + rvs = append(rvs, r1.formatWithType(b)) } - for _, v := range i.rValues.View() { + for _, v := range rs { rvs = append(rvs, v.formatWithType(b)) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go index a2e986cd15..b9763791dd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go @@ -22,9 +22,9 @@ func (b *builder) RunPasses() { func (b *builder) runPreBlockLayoutPasses() { passSortSuccessors(b) passDeadBlockEliminationOpt(b) - passRedundantPhiEliminationOpt(b) // The result of passCalculateImmediateDominators will be used by various passes below. passCalculateImmediateDominators(b) + passRedundantPhiEliminationOpt(b) passNopInstElimination(b) // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. @@ -78,12 +78,11 @@ func (b *builder) runFinalizingPasses() { // passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so. func passDeadBlockEliminationOpt(b *builder) { entryBlk := b.entryBlk() - b.clearBlkVisited() b.blkStack = append(b.blkStack, entryBlk) for len(b.blkStack) > 0 { reachableBlk := b.blkStack[len(b.blkStack)-1] b.blkStack = b.blkStack[:len(b.blkStack)-1] - b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass. + reachableBlk.visited = 1 if !reachableBlk.sealed && !reachableBlk.ReturnBlock() { panic(fmt.Sprintf("%s is not sealed", reachableBlk)) @@ -94,7 +93,7 @@ func passDeadBlockEliminationOpt(b *builder) { } for _, succ := range reachableBlk.success { - if _, ok := b.blkVisited[succ]; ok { + if succ.visited == 1 { continue } b.blkStack = append(b.blkStack, succ) @@ -102,15 +101,18 @@ func passDeadBlockEliminationOpt(b *builder) { } for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - if _, ok := b.blkVisited[blk]; !ok { + if blk.visited != 1 { blk.invalid = true } + blk.visited = 0 } } // passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block). +// This requires the reverse post-order traversal to be calculated before calling this function, +// hence passCalculateImmediateDominators must be called before this. func passRedundantPhiEliminationOpt(b *builder) { - redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations. + redundantParams := b.redundantParams[:0] // reuse the slice from previous iterations. // TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the // same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact @@ -118,15 +120,19 @@ func passRedundantPhiEliminationOpt(b *builder) { // relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs, // the maximum number of iteration was 22, which seems to be acceptable but not that small either since the // complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands. + // -- Note -- + // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when + // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree. for { changed := false - _ = b.blockIteratorBegin() // skip entry block! + _ = b.blockIteratorReversePostOrderBegin() // skip entry block! // Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops! - for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() { - paramNum := len(blk.params) + for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() { + params := blk.params.View() + paramNum := len(params) for paramIndex := 0; paramIndex < paramNum; paramIndex++ { - phiValue := blk.params[paramIndex].value + phiValue := params[paramIndex] redundant := true nonSelfReferencingValue := ValueInvalid @@ -157,55 +163,58 @@ func passRedundantPhiEliminationOpt(b *builder) { } if redundant { - b.redundantParameterIndexToValue[paramIndex] = nonSelfReferencingValue - redundantParameterIndexes = append(redundantParameterIndexes, paramIndex) + redundantParams = append(redundantParams, redundantParam{ + index: paramIndex, uniqueValue: nonSelfReferencingValue, + }) } } - if len(b.redundantParameterIndexToValue) == 0 { + if len(redundantParams) == 0 { continue } changed = true // Remove the redundant PHIs from the argument list of branching instructions. for predIndex := range blk.preds { - var cur int + redundantParamsCur, predParamCur := 0, 0 predBlk := blk.preds[predIndex] branchInst := predBlk.branch view := branchInst.vs.View() for argIndex, value := range view { - if _, ok := b.redundantParameterIndexToValue[argIndex]; !ok { - view[cur] = value - cur++ + if len(redundantParams) == redundantParamsCur || + redundantParams[redundantParamsCur].index != argIndex { + view[predParamCur] = value + predParamCur++ + } else { + redundantParamsCur++ } } - branchInst.vs.Cut(cur) + branchInst.vs.Cut(predParamCur) } // Still need to have the definition of the value of the PHI (previously as the parameter). - for _, redundantParamIndex := range redundantParameterIndexes { - phiValue := blk.params[redundantParamIndex].value - onlyValue := b.redundantParameterIndexToValue[redundantParamIndex] + for i := range redundantParams { + redundantValue := &redundantParams[i] + phiValue := params[redundantValue.index] // Create an alias in this block from the only phi argument to the phi value. - b.alias(phiValue, onlyValue) + b.alias(phiValue, redundantValue.uniqueValue) } // Finally, Remove the param from the blk. - var cur int + paramsCur, redundantParamsCur := 0, 0 for paramIndex := 0; paramIndex < paramNum; paramIndex++ { - param := blk.params[paramIndex] - if _, ok := b.redundantParameterIndexToValue[paramIndex]; !ok { - blk.params[cur] = param - cur++ + param := params[paramIndex] + if len(redundantParams) == redundantParamsCur || redundantParams[redundantParamsCur].index != paramIndex { + params[paramsCur] = param + paramsCur++ + } else { + redundantParamsCur++ } } - blk.params = blk.params[:cur] + blk.params.Cut(paramsCur) // Clears the map for the next iteration. - for _, paramIndex := range redundantParameterIndexes { - delete(b.redundantParameterIndexToValue, paramIndex) - } - redundantParameterIndexes = redundantParameterIndexes[:0] + redundantParams = redundantParams[:0] } if !changed { @@ -214,7 +223,7 @@ func passRedundantPhiEliminationOpt(b *builder) { } // Reuse the slice for the future passes. - b.ints = redundantParameterIndexes + b.redundantParams = redundantParams } // passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and @@ -226,11 +235,13 @@ func passRedundantPhiEliminationOpt(b *builder) { // TODO: the algorithm here might not be efficient. Get back to this later. func passDeadCodeEliminationOpt(b *builder) { nvid := int(b.nextValueID) - if nvid >= len(b.valueRefCounts) { - b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...) - } - if nvid >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) + if nvid >= len(b.valuesInfo) { + l := nvid - len(b.valuesInfo) + 1 + b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...) + view := b.valuesInfo[len(b.valuesInfo)-l:] + for i := range view { + view[i].alias = ValueInvalid + } } // First, we gather all the instructions with side effects. @@ -250,14 +261,6 @@ func passDeadCodeEliminationOpt(b *builder) { // The strict side effect should create different instruction groups. gid++ } - - r1, rs := cur.Returns() - if r1.Valid() { - b.valueIDToInstruction[r1.ID()] = cur - } - for _, r := range rs { - b.valueIDToInstruction[r.ID()] = cur - } } } @@ -278,28 +281,28 @@ func passDeadCodeEliminationOpt(b *builder) { v1, v2, v3, vs := live.Args() if v1.Valid() { - producingInst := b.valueIDToInstruction[v1.ID()] + producingInst := b.InstructionOfValue(v1) if producingInst != nil { liveInstructions = append(liveInstructions, producingInst) } } if v2.Valid() { - producingInst := b.valueIDToInstruction[v2.ID()] + producingInst := b.InstructionOfValue(v2) if producingInst != nil { liveInstructions = append(liveInstructions, producingInst) } } if v3.Valid() { - producingInst := b.valueIDToInstruction[v3.ID()] + producingInst := b.InstructionOfValue(v3) if producingInst != nil { liveInstructions = append(liveInstructions, producingInst) } } for _, v := range vs { - producingInst := b.valueIDToInstruction[v.ID()] + producingInst := b.InstructionOfValue(v) if producingInst != nil { liveInstructions = append(liveInstructions, producingInst) } @@ -347,46 +350,19 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) { if wazevoapi.SSALoggingEnabled { fmt.Printf("v%d referenced from %v\n", id, from.Format(b)) } - b.valueRefCounts[id]++ -} - -// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places. -func (b *builder) clearBlkVisited() { - b.blkStack2 = b.blkStack2[:0] - for key := range b.blkVisited { - b.blkStack2 = append(b.blkStack2, key) - } - for _, blk := range b.blkStack2 { - delete(b.blkVisited, blk) - } - b.blkStack2 = b.blkStack2[:0] + info := &b.valuesInfo[id] + info.RefCount++ } // passNopInstElimination eliminates the instructions which is essentially a no-op. func passNopInstElimination(b *builder) { - if int(b.nextValueID) >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) - } - - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for cur := blk.rootInstr; cur != nil; cur = cur.next { - r1, rs := cur.Returns() - if r1.Valid() { - b.valueIDToInstruction[r1.ID()] = cur - } - for _, r := range rs { - b.valueIDToInstruction[r.ID()] = cur - } - } - } - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { for cur := blk.rootInstr; cur != nil; cur = cur.next { switch cur.Opcode() { // TODO: add more logics here. case OpcodeIshl, OpcodeSshr, OpcodeUshr: x, amount := cur.Arg2() - definingInst := b.valueIDToInstruction[amount.ID()] + definingInst := b.InstructionOfValue(amount) if definingInst == nil { // If there's no defining instruction, that means the amount is coming from the parameter. continue diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go index 9068180a0b..0118e8b2e5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go @@ -23,8 +23,6 @@ import ( // // This heuristic is done in maybeInvertBranches function. func passLayoutBlocks(b *builder) { - b.clearBlkVisited() - // We might end up splitting critical edges which adds more basic blocks, // so we store the currently existing basic blocks in nonSplitBlocks temporarily. // That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks. @@ -35,7 +33,7 @@ func passLayoutBlocks(b *builder) { } nonSplitBlocks = append(nonSplitBlocks, blk) if i != len(b.reversePostOrderedBasicBlocks)-1 { - _ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1]) + _ = maybeInvertBranches(b, blk, b.reversePostOrderedBasicBlocks[i+1]) } } @@ -47,20 +45,20 @@ func passLayoutBlocks(b *builder) { for _, blk := range nonSplitBlocks { for i := range blk.preds { pred := blk.preds[i].blk - if _, ok := b.blkVisited[pred]; ok || !pred.Valid() { + if pred.visited == 1 || !pred.Valid() { continue } else if pred.reversePostOrder < blk.reversePostOrder { // This means the edge is critical, and this pred is the trampoline and yet to be inserted. // Split edge trampolines must come before the destination in reverse post-order. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred) - b.blkVisited[pred] = 0 // mark as inserted, the value is not used. + pred.visited = 1 // mark as inserted. } } // Now that we've already added all the potential trampoline blocks incoming to this block, // we can add this block itself. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk) - b.blkVisited[blk] = 0 // mark as inserted, the value is not used. + blk.visited = 1 // mark as inserted. if len(blk.success) < 2 { // There won't be critical edge originating from this block. @@ -113,10 +111,10 @@ func passLayoutBlocks(b *builder) { } fallthroughBranch := blk.currentInstr - if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline { + if fallthroughBranch.opcode == OpcodeJump && BasicBlockID(fallthroughBranch.rValue) == trampoline.id { // This can be lowered as fallthrough at the end of the block. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. + trampoline.visited = 1 // mark as inserted. } else { uninsertedTrampolines = append(uninsertedTrampolines, trampoline) } @@ -126,7 +124,7 @@ func passLayoutBlocks(b *builder) { if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself. // This means the critical edge was backward, so we insert after the current block immediately. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. + trampoline.visited = 1 // mark as inserted. } // If the target is forward, we can wait to insert until the target is inserted. } uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block. @@ -142,8 +140,8 @@ func passLayoutBlocks(b *builder) { if wazevoapi.SSAValidationEnabled { for _, trampoline := range trampolines { - if _, ok := b.blkVisited[trampoline]; !ok { - panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b)) + if trampoline.visited != 1 { + panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b)) } trampoline.validate(b) } @@ -159,7 +157,7 @@ func (b *builder) markFallthroughJumps() { for i, blk := range b.reversePostOrderedBasicBlocks { if i < l { cur := blk.currentInstr - if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] { + if cur.opcode == OpcodeJump && BasicBlockID(cur.rValue) == b.reversePostOrderedBasicBlocks[i+1].id { cur.AsFallthroughJump() } } @@ -170,7 +168,7 @@ func (b *builder) markFallthroughJumps() { // nextInRPO is the next block in the reverse post-order. // // Returns true if the branch is inverted for testing purpose. -func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool { +func maybeInvertBranches(b *builder, now *basicBlock, nextInRPO *basicBlock) bool { fallthroughBranch := now.currentInstr if fallthroughBranch.opcode == OpcodeBrTable { return false @@ -189,7 +187,8 @@ func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool { // So this block has two branches (a conditional branch followed by an unconditional branch) at the end. // We can invert the condition of the branch if it makes the fallthrough more likely. - fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock) + fallthroughTarget := b.basicBlock(BasicBlockID(fallthroughBranch.rValue)) + condTarget := b.basicBlock(BasicBlockID(condBranch.rValue)) if fallthroughTarget.loopHeader { // First, if the tail's target is loopHeader, we don't need to do anything here, @@ -233,8 +232,8 @@ invert: } condBranch.InvertBrx() - condBranch.blk = fallthroughTarget - fallthroughBranch.blk = condTarget + condBranch.rValue = Value(fallthroughTarget.ID()) + fallthroughBranch.rValue = Value(condTarget.ID()) if wazevoapi.SSALoggingEnabled { fmt.Printf("inverting branches at %d->%d and %d->%d\n", now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID()) @@ -277,7 +276,7 @@ func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlock // Replace originalBranch with the newBranch. newBranch := b.AllocateInstruction() newBranch.opcode = originalBranch.opcode - newBranch.blk = trampoline + newBranch.rValue = Value(trampoline.ID()) switch originalBranch.opcode { case OpcodeJump: case OpcodeBrz, OpcodeBrnz: @@ -305,7 +304,7 @@ func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlock trampoline.validate(b) } - if len(trampoline.params) > 0 { + if len(trampoline.params.View()) > 0 { panic("trampoline should not have params") } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go index 50cb9c4750..e8288c4bd3 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go @@ -15,10 +15,6 @@ import ( // At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag. func passCalculateImmediateDominators(b *builder) { reversePostOrder := b.reversePostOrderedBasicBlocks[:0] - exploreStack := b.blkStack[:0] - b.clearBlkVisited() - - entryBlk := b.entryBlk() // Store the reverse postorder from the entrypoint into reversePostOrder slice. // This calculation of reverse postorder is not described in the paper, @@ -28,14 +24,17 @@ func passCalculateImmediateDominators(b *builder) { // which is a reasonable assumption as long as SSA Builder is properly used. // // First we push blocks in postorder iteratively visit successors of the entry block. - exploreStack = append(exploreStack, entryBlk) + entryBlk := b.entryBlk() + exploreStack := append(b.blkStack[:0], entryBlk) + // These flags are used to track the state of the block in the DFS traversal. + // We temporarily use the reversePostOrder field to store the state. const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2 - b.blkVisited[entryBlk] = visitStateSeen + entryBlk.visited = visitStateSeen for len(exploreStack) > 0 { tail := len(exploreStack) - 1 blk := exploreStack[tail] exploreStack = exploreStack[:tail] - switch b.blkVisited[blk] { + switch blk.visited { case visitStateUnseen: // This is likely a bug in the frontend. panic("BUG: unsupported CFG") @@ -48,16 +47,18 @@ func passCalculateImmediateDominators(b *builder) { if succ.ReturnBlock() || succ.invalid { continue } - if b.blkVisited[succ] == visitStateUnseen { - b.blkVisited[succ] = visitStateSeen + if succ.visited == visitStateUnseen { + succ.visited = visitStateSeen exploreStack = append(exploreStack, succ) } } // Finally, we could pop this block once we pop all of its successors. - b.blkVisited[blk] = visitStateDone + blk.visited = visitStateDone case visitStateDone: // Note: at this point we push blk in postorder despite its name. reversePostOrder = append(reversePostOrder, blk) + default: + panic("BUG") } } // At this point, reversePostOrder has postorder actually, so we reverse it. @@ -67,7 +68,7 @@ func passCalculateImmediateDominators(b *builder) { } for i, blk := range reversePostOrder { - blk.reversePostOrder = i + blk.reversePostOrder = int32(i) } // Reuse the dominators slice if possible from the previous computation of function. @@ -180,7 +181,7 @@ func passBuildLoopNestingForest(b *builder) { b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk) } else if n == ent { } else if n.loopHeader { - n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk) + n.loopNestingForestChildren = n.loopNestingForestChildren.Append(&b.varLengthBasicBlockPool, blk) } } @@ -193,7 +194,7 @@ func passBuildLoopNestingForest(b *builder) { func printLoopNestingForest(root *basicBlock, depth int) { fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID()) - for _, child := range root.loopNestingForestChildren { + for _, child := range root.loopNestingForestChildren.View() { fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID()) if child.LoopHeader() { printLoopNestingForest(child.(*basicBlock), depth+2) @@ -202,10 +203,10 @@ func printLoopNestingForest(root *basicBlock, depth int) { } type dominatorSparseTree struct { - time int + time int32 euler []*basicBlock - first, depth []int - table [][]int + first, depth []int32 + table [][]int32 } // passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree. @@ -232,11 +233,11 @@ func passBuildDominatorTree(b *builder) { n := b.basicBlocksPool.Allocated() st := &b.sparseTree st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...) - st.first = append(st.first[:0], make([]int, n)...) + st.first = append(st.first[:0], make([]int32, n)...) for i := range st.first { st.first[i] = -1 } - st.depth = append(st.depth[:0], make([]int, 2*n-1)...) + st.depth = append(st.depth[:0], make([]int32, 2*n-1)...) st.time = 0 // Start building the sparse tree. @@ -244,9 +245,9 @@ func passBuildDominatorTree(b *builder) { st.buildSparseTable() } -func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) { +func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int32) { if wazevoapi.SSALoggingEnabled { - fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID()) + fmt.Println(strings.Repeat("\t", int(height)), "euler tour:", node.ID()) } dt.euler[dt.time] = node dt.depth[dt.time] = height @@ -270,13 +271,13 @@ func (dt *dominatorSparseTree) buildSparseTable() { table := dt.table if n >= len(table) { - table = append(table, make([][]int, n+1)...) + table = append(table, make([][]int32, n-len(table)+1)...) } for i := range table { if len(table[i]) < k { - table[i] = append(table[i], make([]int, k)...) + table[i] = append(table[i], make([]int32, k-len(table[i]))...) } - table[i][0] = i + table[i][0] = int32(i) } for j := 1; 1<= 1<<28 { + panic(fmt.Sprintf("Too large variable: %d", v)) + } + return Variable(typ)<<28 | v +} + +func (v Variable) getType() Type { + return Type(v >> 28) } // Value represents an SSA value with a type information. The relationship with Variable is 1: N (including 0), // that means there might be multiple Variable(s) for a Value. // -// Higher 32-bit is used to store Type for this value. +// 32 to 59-bit is used to store the unique identifier of the Instruction that generates this value if any. +// 60 to 63-bit is used to store Type for this value. type Value uint64 // ValueID is the lower 32bit of Value, which is the pure identifier of Value without type info. @@ -33,7 +47,7 @@ type ValueID uint32 const ( valueIDInvalid ValueID = math.MaxUint32 - ValueInvalid Value = Value(valueIDInvalid) + ValueInvalid = Value(valueIDInvalid) ) // Format creates a debug string for this Value using the data stored in Builder. @@ -54,7 +68,7 @@ func (v Value) formatWithType(b Builder) (ret string) { if wazevoapi.SSALoggingEnabled { // This is useful to check live value analysis bugs. if bd := b.(*builder); bd.donePostBlockLayoutPasses { id := v.ID() - ret += fmt.Sprintf("(ref=%d)", bd.valueRefCounts[id]) + ret += fmt.Sprintf("(ref=%d)", bd.valuesInfo[id].RefCount) } } return ret @@ -67,7 +81,7 @@ func (v Value) Valid() bool { // Type returns the Type of this value. func (v Value) Type() Type { - return Type(v >> 32) + return Type(v >> 60) } // ID returns the valueID of this value. @@ -77,7 +91,20 @@ func (v Value) ID() ValueID { // setType sets a type to this Value and returns the updated Value. func (v Value) setType(typ Type) Value { - return v | Value(typ)<<32 + return v | Value(typ)<<60 +} + +// setInstructionID sets an Instruction.id to this Value and returns the updated Value. +func (v Value) setInstructionID(id int) Value { + if id < 0 || uint(id) >= 1<<28 { + panic(fmt.Sprintf("Too large instruction ID: %d", id)) + } + return v | Value(id)<<32 +} + +// instructionID() returns the Instruction.id of this Value. +func (v Value) instructionID() int { + return int(v>>32) & 0x0fffffff } // Values is a slice of Value. Use this instead of []Value to reuse the underlying memory. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go index 3149fdc9e1..313e34f9ae 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go @@ -69,7 +69,7 @@ type IDedPool[T any] struct { // NewIDedPool returns a new IDedPool. func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] { - return IDedPool[T]{pool: NewPool[T](resetFn)} + return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1} } // GetOrAllocate returns the T with the given id. @@ -97,7 +97,7 @@ func (p *IDedPool[T]) Get(id int) *T { // Reset resets the pool. func (p *IDedPool[T]) Reset() { p.pool.Reset() - for i := range p.idToItems { + for i := 0; i <= p.maxIDEncountered; i++ { p.idToItems[i] = nil } p.maxIDEncountered = -1 diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go index 7177fbb4bf..3fc7aa143d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go @@ -5,9 +5,7 @@ func ResetMap[K comparable, V any](m map[K]V) map[K]V { if m == nil { m = make(map[K]V) } else { - for v := range m { - delete(m, v) - } + clear(m) } return m } diff --git a/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/importresolver.go b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/importresolver.go new file mode 100644 index 0000000000..af52cc80eb --- /dev/null +++ b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/importresolver.go @@ -0,0 +1,6 @@ +package expctxkeys + +// ImportResolverKey is a context.Context Value key. +// Its associated value should be an ImportResolver. +// See issue 2294. +type ImportResolverKey struct{} diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go index 25d7d3fdca..0dc6ec19ce 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go @@ -6,6 +6,9 @@ type CpuFeatureFlags interface { Has(cpuFeature CpuFeature) bool // HasExtra returns true when the specified extraFlag (represented as uint64) is supported HasExtra(cpuFeature CpuFeature) bool + // Raw returns the raw bitset that represents CPU features used by wazero. This can be used for cache keying. + // For now, we only use four features, so uint64 is enough. + Raw() uint64 } type CpuFeature uint64 @@ -17,9 +20,11 @@ const ( CpuFeatureAmd64SSE4_1 CpuFeature = 1 << 19 // CpuFeatureAmd64SSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities on amd64 CpuFeatureAmd64SSE4_2 CpuFeature = 1 << 20 + // Note: when adding new features, ensure that the feature is included in CpuFeatureFlags.Raw. ) const ( // CpuExtraFeatureAmd64ABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) on amd64 CpuExtraFeatureAmd64ABM CpuFeature = 1 << 5 + // Note: when adding new features, ensure that the feature is included in CpuFeatureFlags.Raw. ) diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go index 8c9f1a9f34..fbdb539366 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go @@ -2,10 +2,10 @@ package platform -// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods -var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags() +// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods. +var CpuFeatures = loadCpuFeatureFlags() -// cpuFeatureFlags implements CpuFeatureFlags interface +// cpuFeatureFlags implements CpuFeatureFlags interface. type cpuFeatureFlags struct { flags uint64 extraFlags uint64 @@ -15,13 +15,13 @@ type cpuFeatureFlags struct { // implemented in impl_amd64.s func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) -// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap +// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap. func cpuidAsBitmap(arg1, arg2 uint32) uint64 { _ /* eax */, _ /* ebx */, ecx, edx := cpuid(arg1, arg2) return (uint64(edx) << 32) | uint64(ecx) } -// loadStandardRange load flags from the standard range, panics otherwise +// loadStandardRange load flags from the standard range, panics otherwise. func loadStandardRange(id uint32) uint64 { // ensure that the id is in the valid range, returned by cpuid(0,0) maxRange, _, _, _ := cpuid(0, 0) @@ -31,7 +31,7 @@ func loadStandardRange(id uint32) uint64 { return cpuidAsBitmap(id, 0) } -// loadStandardRange load flags from the extended range, panics otherwise +// loadStandardRange load flags from the extended range, panics otherwise. func loadExtendedRange(id uint32) uint64 { // ensure that the id is in the valid range, returned by cpuid(0x80000000,0) maxRange, _, _, _ := cpuid(0x80000000, 0) @@ -48,12 +48,32 @@ func loadCpuFeatureFlags() CpuFeatureFlags { } } -// Has implements the same method on the CpuFeatureFlags interface +// Has implements the same method on the CpuFeatureFlags interface. func (f *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return (f.flags & uint64(cpuFeature)) != 0 } -// HasExtra implements the same method on the CpuFeatureFlags interface +// HasExtra implements the same method on the CpuFeatureFlags interface. func (f *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return (f.extraFlags & uint64(cpuFeature)) != 0 } + +// Raw implements the same method on the CpuFeatureFlags interface. +func (f *cpuFeatureFlags) Raw() uint64 { + // Below, we only set the first 4 bits for the features we care about, + // instead of setting all the unnecessary bits obtained from the CPUID instruction. + var ret uint64 + if f.Has(CpuFeatureAmd64SSE3) { + ret = 1 << 0 + } + if f.Has(CpuFeatureAmd64SSE4_1) { + ret |= 1 << 1 + } + if f.Has(CpuFeatureAmd64SSE4_2) { + ret |= 1 << 2 + } + if f.HasExtra(CpuExtraFeatureAmd64ABM) { + ret |= 1 << 3 + } + return ret +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go index 8ae826d367..291bcea65f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go @@ -4,11 +4,14 @@ package platform var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{} -// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms +// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms. type cpuFeatureFlags struct{} -// Has implements the same method on the CpuFeatureFlags interface +// Has implements the same method on the CpuFeatureFlags interface. func (c *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return false } -// HasExtra implements the same method on the CpuFeatureFlags interface +// HasExtra implements the same method on the CpuFeatureFlags interface. func (c *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return false } + +// Raw implements the same method on the CpuFeatureFlags interface. +func (c *cpuFeatureFlags) Raw() uint64 { return 0 } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go index a61996d58b..b0519003b7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go @@ -12,8 +12,6 @@ const ( mmapProtARM64 = syscall.PROT_READ | syscall.PROT_WRITE ) -const MmapSupported = true - func munmapCodeSegment(code []byte) error { return syscall.Munmap(code) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go index 27833db377..079aa643f4 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go @@ -9,8 +9,6 @@ import ( var errUnsupported = fmt.Errorf("mmap unsupported on GOOS=%s. Use interpreter instead.", runtime.GOOS) -const MmapSupported = false - func munmapCodeSegment(code []byte) error { panic(errUnsupported) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go index 69fcb6d6b6..03a254d4a6 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go @@ -21,8 +21,6 @@ const ( windows_PAGE_EXECUTE_READWRITE uintptr = 0x00000040 ) -const MmapSupported = true - func munmapCodeSegment(code []byte) error { return freeMemory(code) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go deleted file mode 100644 index 5cba99fb25..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go +++ /dev/null @@ -1,23 +0,0 @@ -//go:build !(darwin || linux || freebsd) || tinygo - -package platform - -func remapCodeSegmentAMD64(code []byte, size int) ([]byte, error) { - b, err := mmapCodeSegmentAMD64(size) - if err != nil { - return nil, err - } - copy(b, code) - mustMunmapCodeSegment(code) - return b, nil -} - -func remapCodeSegmentARM64(code []byte, size int) ([]byte, error) { - b, err := mmapCodeSegmentARM64(size) - if err != nil { - return nil, err - } - copy(b, code) - mustMunmapCodeSegment(code) - return b, nil -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go deleted file mode 100644 index 8f42d44fd7..0000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go +++ /dev/null @@ -1,21 +0,0 @@ -//go:build (darwin || linux || freebsd) && !tinygo - -package platform - -func remapCodeSegmentAMD64(code []byte, size int) ([]byte, error) { - return remapCodeSegment(code, size, mmapProtAMD64) -} - -func remapCodeSegmentARM64(code []byte, size int) ([]byte, error) { - return remapCodeSegment(code, size, mmapProtARM64) -} - -func remapCodeSegment(code []byte, size, prot int) ([]byte, error) { - b, err := mmapCodeSegment(size, prot) - if err != nil { - return nil, err - } - copy(b, code) - mustMunmapCodeSegment(code) - return b, nil -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go b/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go index c6dc0f857b..a275562406 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go @@ -36,28 +36,6 @@ func MmapCodeSegment(size int) ([]byte, error) { } } -// RemapCodeSegment reallocates the memory mapping of an existing code segment -// to increase its size. The previous code mapping is unmapped and must not be -// reused after the function returns. -// -// This is similar to mremap(2) on linux, and emulated on platforms which do not -// have this syscall. -// -// See https://man7.org/linux/man-pages/man2/mremap.2.html -func RemapCodeSegment(code []byte, size int) ([]byte, error) { - if size < len(code) { - panic("BUG: RemapCodeSegment with size less than code") - } - if code == nil { - return MmapCodeSegment(size) - } - if runtime.GOARCH == "amd64" { - return remapCodeSegmentAMD64(code, size) - } else { - return remapCodeSegmentARM64(code, size) - } -} - // MunmapCodeSegment unmaps the given memory region. func MunmapCodeSegment(code []byte) error { if len(code) == 0 { @@ -65,17 +43,3 @@ func MunmapCodeSegment(code []byte) error { } return munmapCodeSegment(code) } - -// mustMunmapCodeSegment panics instead of returning an error to the -// application. -// -// # Why panic? -// -// It is less disruptive to the application to leak the previous block if it -// could be unmapped than to leak the new block and return an error. -// Realistically, either scenarios are pretty hard to debug, so we panic. -func mustMunmapCodeSegment(code []byte) { - if err := munmapCodeSegment(code); err != nil { - panic(err) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go index 9a77205bb5..fdbf1fde0d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go +++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go @@ -38,9 +38,6 @@ func NewStdioFile(stdin bool, f fs.File) (fsapi.File, error) { } func OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (*os.File, experimentalsys.Errno) { - if flag&experimentalsys.O_DIRECTORY != 0 && flag&(experimentalsys.O_WRONLY|experimentalsys.O_RDWR) != 0 { - return nil, experimentalsys.EISDIR // invalid to open a directory writeable - } return openFile(path, flag, perm) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go index 755ee5ea3e..dfc4417edc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go @@ -54,7 +54,6 @@ func decodeUTF8(r *bytes.Reader, contextFormat string, contextArgs ...interface{ return "", 0, fmt.Errorf("%s is not valid UTF-8", fmt.Sprintf(contextFormat, contextArgs...)) } - // TODO: use unsafe.String after flooring Go 1.20. - ret := *(*string)(unsafe.Pointer(&buf)) + ret := unsafe.String(&buf[0], int(size)) return ret, size + uint32(sizeOfSize), nil } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go index 58a4582178..61a342ef23 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go @@ -69,4 +69,7 @@ type ModuleEngine interface { // FunctionInstanceReference returns Reference for the given Index for a FunctionInstance. The returned values are used by // the initialization via ElementSegment. FunctionInstanceReference(funcIndex Index) Reference + + // MemoryGrown notifies the engine that the memory has grown. + MemoryGrown() } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go index 8da6890765..6044892289 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go @@ -67,11 +67,6 @@ func (m *Module) validateFunctionWithMaxStackValues( declaredFunctionIndexes map[Index]struct{}, br *bytes.Reader, ) error { - nonStaticLocals := make(map[Index]struct{}) - if len(m.NonStaticLocals) > 0 { - m.NonStaticLocals[idx] = nonStaticLocals - } - functionType := &m.TypeSection[m.FunctionSection[idx]] code := &m.CodeSection[idx] body := code.Body @@ -357,7 +352,6 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))", OpcodeLocalSetName, index, l) } - nonStaticLocals[index] = struct{}{} var expType ValueType if index < inputLen { expType = functionType.Params[index] @@ -373,7 +367,6 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))", OpcodeLocalTeeName, index, l) } - nonStaticLocals[index] = struct{}{} var expType ValueType if index < inputLen { expType = functionType.Params[index] @@ -458,14 +451,14 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("read immediate: %w", err) } - list := make([]uint32, nl) + sts.ls = sts.ls[:0] for i := uint32(0); i < nl; i++ { l, n, err := leb128.DecodeUint32(br) if err != nil { return fmt.Errorf("read immediate: %w", err) } num += n - list[i] = l + sts.ls = append(sts.ls, l) } ln, n, err := leb128.DecodeUint32(br) if err != nil { @@ -518,7 +511,7 @@ func (m *Module) validateFunctionWithMaxStackValues( } } - for _, l := range list { + for _, l := range sts.ls { if int(l) >= len(controlBlockStack.stack) { return fmt.Errorf("invalid l param given for %s", OpcodeBrTableName) } @@ -2010,6 +2003,8 @@ var vecSplatValueTypes = [...]ValueType{ type stacks struct { vs valueTypeStack cs controlBlockStack + // ls is the label slice that is reused for each br_table instruction. + ls []uint32 } func (sts *stacks) reset(functionType *FunctionType) { @@ -2019,6 +2014,7 @@ func (sts *stacks) reset(functionType *FunctionType) { sts.vs.maximumStackPointer = 0 sts.cs.stack = sts.cs.stack[:0] sts.cs.stack = append(sts.cs.stack, controlBlock{blockType: functionType}) + sts.ls = sts.ls[:0] } type controlBlockStack struct { diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go index 5cc5012dae..8e072fd127 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go @@ -52,18 +52,22 @@ type MemoryInstance struct { definition api.MemoryDefinition // Mux is used in interpreter mode to prevent overlapping calls to atomic instructions, - // introduced with WebAssembly threads proposal. + // introduced with WebAssembly threads proposal, and in compiler mode to make memory modifications + // within Grow non-racy for the Go race detector. Mux sync.Mutex // waiters implements atomic wait and notify. It is implemented similarly to golang.org/x/sync/semaphore, // with a fixed weight of 1 and no spurious notifications. waiters sync.Map + // ownerModuleEngine is the module engine that owns this memory instance. + ownerModuleEngine ModuleEngine + expBuffer experimental.LinearMemory } // NewMemoryInstance creates a new instance based on the parameters in the SectionIDMemory. -func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *MemoryInstance { +func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator, moduleEngine ModuleEngine) *MemoryInstance { minBytes := MemoryPagesToBytesNum(memSec.Min) capBytes := MemoryPagesToBytesNum(memSec.Cap) maxBytes := MemoryPagesToBytesNum(memSec.Max) @@ -89,12 +93,13 @@ func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) * buffer = make([]byte, minBytes, capBytes) } return &MemoryInstance{ - Buffer: buffer, - Min: memSec.Min, - Cap: memoryBytesNumToPages(uint64(cap(buffer))), - Max: memSec.Max, - Shared: memSec.IsShared, - expBuffer: expBuffer, + Buffer: buffer, + Min: memSec.Min, + Cap: memoryBytesNumToPages(uint64(cap(buffer))), + Max: memSec.Max, + Shared: memSec.IsShared, + expBuffer: expBuffer, + ownerModuleEngine: moduleEngine, } } @@ -223,6 +228,11 @@ func MemoryPagesToBytesNum(pages uint32) (bytesNum uint64) { // Grow implements the same method as documented on api.Memory. func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { + if m.Shared { + m.Mux.Lock() + defer m.Mux.Unlock() + } + currentPages := m.Pages() if delta == 0 { return currentPages, true @@ -247,14 +257,12 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { m.Buffer = buffer m.Cap = newPages } - return currentPages, true } else if newPages > m.Cap { // grow the memory. if m.Shared { panic("shared memory cannot be grown, this is a bug in wazero") } m.Buffer = append(m.Buffer, make([]byte, MemoryPagesToBytesNum(delta))...) m.Cap = newPages - return currentPages, true } else { // We already have the capacity we need. if m.Shared { // We assume grow is called under a guest lock. @@ -264,8 +272,9 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { } else { m.Buffer = m.Buffer[:MemoryPagesToBytesNum(newPages)] } - return currentPages, true } + m.ownerModuleEngine.MemoryGrown() + return currentPages, true } // Pages implements the same method as documented on api.Memory. @@ -296,6 +305,7 @@ func PagesToUnitOfBytes(pages uint32) string { // Uses atomic write to update the length of a slice. func atomicStoreLengthAndCap(slice *[]byte, length uintptr, cap uintptr) { + //nolint:staticcheck slicePtr := (*reflect.SliceHeader)(unsafe.Pointer(slice)) capPtr := (*uintptr)(unsafe.Pointer(&slicePtr.Cap)) atomic.StoreUintptr(capPtr, cap) @@ -305,6 +315,7 @@ func atomicStoreLengthAndCap(slice *[]byte, length uintptr, cap uintptr) { // Uses atomic write to update the length of a slice. func atomicStoreLength(slice *[]byte, length uintptr) { + //nolint:staticcheck slicePtr := (*reflect.SliceHeader)(unsafe.Pointer(slice)) lenPtr := (*uintptr)(unsafe.Pointer(&slicePtr.Len)) atomic.StoreUintptr(lenPtr, length) diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go index 68573b918e..8369ad9ed6 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go @@ -185,9 +185,6 @@ type Module struct { // as described in https://yurydelendik.github.io/webassembly-dwarf/, though it is not specified in the Wasm // specification: https://github.com/WebAssembly/debugging/issues/1 DWARFLines *wasmdebug.DWARFLines - - // NonStaticLocals collects the local indexes that will change its value through either local.get or local.tee. - NonStaticLocals []map[Index]struct{} } // ModuleID represents sha256 hash value uniquely assigned to Module. @@ -366,8 +363,6 @@ func (m *Module) validateFunctions(enabledFeatures api.CoreFeatures, functions [ br := bytes.NewReader(nil) // Also, we reuse the stacks across multiple function validations to reduce allocations. vs := &stacks{} - // Non-static locals are gathered during validation and used in the down-stream compilation. - m.NonStaticLocals = make([]map[Index]struct{}, len(m.FunctionSection)) for idx, typeIndex := range m.FunctionSection { if typeIndex >= typeCount { return fmt.Errorf("invalid %s: type section index %d out of range", m.funcDesc(SectionIDFunction, Index(idx)), typeIndex) @@ -655,7 +650,7 @@ func paramNames(localNames IndirectNameMap, funcIdx uint32, paramLen int) []stri func (m *ModuleInstance) buildMemory(module *Module, allocator experimental.MemoryAllocator) { memSec := module.MemorySection if memSec != nil { - m.MemoryInstance = NewMemoryInstance(memSec, allocator) + m.MemoryInstance = NewMemoryInstance(memSec, allocator, m.Engine) m.MemoryInstance.definition = &module.MemoryDefinitionSection[0] } } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go index 1db661e853..dda6e5b635 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go @@ -3,6 +3,7 @@ package wasm import ( "context" "encoding/binary" + "errors" "fmt" "sync" "sync/atomic" @@ -352,7 +353,7 @@ func (s *Store) instantiate( return nil, err } - if err = m.resolveImports(module); err != nil { + if err = m.resolveImports(ctx, module); err != nil { return nil, err } @@ -410,12 +411,22 @@ func (s *Store) instantiate( return } -func (m *ModuleInstance) resolveImports(module *Module) (err error) { +func (m *ModuleInstance) resolveImports(ctx context.Context, module *Module) (err error) { + // Check if ctx contains an ImportResolver. + resolveImport, _ := ctx.Value(expctxkeys.ImportResolverKey{}).(experimental.ImportResolver) + for moduleName, imports := range module.ImportPerModule { var importedModule *ModuleInstance - importedModule, err = m.s.module(moduleName) - if err != nil { - return err + if resolveImport != nil { + if v := resolveImport(moduleName); v != nil { + importedModule = v.(*ModuleInstance) + } + } + if importedModule == nil { + importedModule, err = m.s.module(moduleName) + if err != nil { + return err + } } for _, i := range imports { @@ -649,20 +660,20 @@ func (s *Store) GetFunctionTypeID(t *FunctionType) (FunctionTypeID, error) { } // CloseWithExitCode implements the same method as documented on wazero.Runtime. -func (s *Store) CloseWithExitCode(ctx context.Context, exitCode uint32) (err error) { +func (s *Store) CloseWithExitCode(ctx context.Context, exitCode uint32) error { s.mux.Lock() defer s.mux.Unlock() // Close modules in reverse initialization order. + var errs []error for m := s.moduleList; m != nil; m = m.next { // If closing this module errs, proceed anyway to close the others. - if e := m.closeWithExitCode(ctx, exitCode); e != nil && err == nil { - // TODO: use multiple errors handling in Go 1.20. - err = e // first error + if err := m.closeWithExitCode(ctx, exitCode); err != nil { + errs = append(errs, err) } } s.moduleList = nil s.nameToModule = nil s.nameToModuleCap = 0 s.typeIDs = nil - return + return errors.Join(errs...) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go index 17c63e38e6..ede3047deb 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go @@ -3,8 +3,6 @@ package wasm import ( "errors" "fmt" - - "github.com/tetratelabs/wazero/api" ) // deleteModule makes the moduleName available for instantiation again. @@ -88,7 +86,7 @@ func (s *Store) registerModule(m *ModuleInstance) error { } // Module implements wazero.Runtime Module -func (s *Store) Module(moduleName string) api.Module { +func (s *Store) Module(moduleName string) *ModuleInstance { m, err := s.module(moduleName) if err != nil { return nil diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go b/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go index 3b0d3a7a62..50ba7b2b3f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go @@ -171,7 +171,6 @@ entry: // Advance the line reader for the found position. lineReader.Seek(ln.pos) err = lineReader.Next(&le) - if err != nil { // If we reach this block, that means there's a bug in the []line creation logic above. panic("BUG: stored dwarf.LineReaderPos is invalid") diff --git a/vendor/github.com/tetratelabs/wazero/runtime.go b/vendor/github.com/tetratelabs/wazero/runtime.go index d1f0a1a310..34742289eb 100644 --- a/vendor/github.com/tetratelabs/wazero/runtime.go +++ b/vendor/github.com/tetratelabs/wazero/runtime.go @@ -197,7 +197,13 @@ func (r *runtime) Module(moduleName string) api.Module { if len(moduleName) == 0 { return nil } - return r.store.Module(moduleName) + m := r.store.Module(moduleName) + if m == nil { + return nil + } else if m.Source.IsHostModule { + return hostModuleInstance{m} + } + return m } // CompileModule implements Runtime.CompileModule diff --git a/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go b/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go index 583c2adb04..cc37012cff 100644 --- a/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go +++ b/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go @@ -7,9 +7,6 @@ import "io/fs" // sysParseable is only used here as we define "supported" as being able to // parse `info.Sys()`. The above `go:build` constraints exclude 32-bit until // that's requested. -// -// TODO: When Go 1.21 is out, use the "unix" build constraint (as 1.21 makes -// our floor Go version 1.19. const sysParseable = false func statFromFileInfo(info fs.FileInfo) Stat_t { diff --git a/vendor/modules.txt b/vendor/modules.txt index a1dafd3957..d30f7645b0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -704,8 +704,8 @@ github.com/tailscale/wireguard-go/tun # github.com/tcnksm/go-httpstat v0.2.0 ## explicit github.com/tcnksm/go-httpstat -# github.com/tetratelabs/wazero v1.7.2 -## explicit; go 1.20 +# github.com/tetratelabs/wazero v1.8.0 +## explicit; go 1.21 github.com/tetratelabs/wazero github.com/tetratelabs/wazero/api github.com/tetratelabs/wazero/experimental