From e37f481778c778ccf006b7d0e7a5c56fcef108fd Mon Sep 17 00:00:00 2001
From: Pavol Loffay
Date: Tue, 15 Oct 2024 14:20:11 +0200
Subject: [PATCH] Support Tempo on IBM s390x (#4175)
* Support Tempo on IBM s390x
Signed-off-by: Pavol Loffay
* Fix
Signed-off-by: Pavol Loffay
* Fix
Signed-off-by: Pavol Loffay
* Fix
Signed-off-by: Pavol Loffay
* update serverless go.sum
Signed-off-by: Joe Elliott
---------
Signed-off-by: Pavol Loffay
Signed-off-by: Joe Elliott
Co-authored-by: Joe Elliott
---
CHANGELOG.md | 1 +
cmd/tempo-serverless/cloud-run/go.mod | 11 +-
cmd/tempo-serverless/cloud-run/go.sum | 24 +-
cmd/tempo-serverless/lambda/go.mod | 11 +-
cmd/tempo-serverless/lambda/go.sum | 24 +-
go.mod | 11 +-
go.sum | 24 +-
.../github.com/andybalholm/brotli/encoder.go | 9 +
.../andybalholm/brotli/matchfinder/emitter.go | 11 -
.../andybalholm/brotli/matchfinder/m4.go | 43 +-
.../klauspost/compress/.goreleaser.yml | 6 +-
.../github.com/klauspost/compress/README.md | 29 +-
.../klauspost/compress/flate/deflate.go | 2 +-
.../klauspost/compress/flate/inflate.go | 74 +-
.../klauspost/compress/fse/decompress.go | 2 +-
.../klauspost/compress/gzhttp/compress.go | 80 +-
.../compress/gzhttp/compress_go119.go | 9 -
.../compress/gzhttp/compress_go120.go | 9 -
.../klauspost/compress/gzhttp/transport.go | 19 +-
.../klauspost/compress/huff0/decompress.go | 4 +-
.../klauspost/compress/s2/encode.go | 25 +-
.../klauspost/compress/s2/encode_amd64.go | 201 +-
.../klauspost/compress/s2/encode_go.go | 4 +-
.../compress/s2/encodeblock_amd64.go | 44 +-
.../klauspost/compress/s2/encodeblock_amd64.s | 21920 ++++++++--------
.../klauspost/compress/s2/writer.go | 31 +-
.../klauspost/compress/zstd/blockdec.go | 4 +-
.../klauspost/compress/zstd/enc_better.go | 32 +-
.../klauspost/compress/zstd/enc_dfast.go | 16 +-
.../klauspost/compress/zstd/encoder.go | 45 +-
.../klauspost/compress/zstd/framedec.go | 4 +-
.../klauspost/compress/zstd/seqdec_amd64.go | 4 +-
.../klauspost/compress/zstd/seqdec_amd64.s | 8 +-
.../klauspost/compress/zstd/zstd.go | 4 +
.../mattn/go-runewidth/runewidth_table.go | 323 +-
.../parquet-go/parquet-go/allocator.go | 10 +-
.../github.com/parquet-go/parquet-go/array.go | 13 +-
.../github.com/parquet-go/parquet-go/bloom.go | 25 +-
.../parquet-go/parquet-go/bloom/filter.go | 9 +-
.../parquet-go/parquet-go/column.go | 4 +-
.../parquet-go/parquet-go/column_buffer.go | 77 +-
.../parquet-go/column_buffer_amd64.go | 2 +-
.../parquet-go/parquet-go/column_index_be.go | 854 +
.../{column_index.go => column_index_le.go} | 69 +-
.../parquet-go/parquet-go/convert.go | 13 +-
.../parquet-go/parquet-go/deprecated/int96.go | 16 -
.../parquet-go/parquet-go/dictionary.go | 59 +-
.../parquet-go/parquet-go/dictionary_amd64.go | 18 +-
.../parquet-go/dictionary_purego.go | 7 +-
.../bytestreamsplit/bytestreamsplit.go | 12 +-
.../bytestreamsplit/bytestreamsplit_purego.go | 8 +-
.../encoding/delta/binary_packed.go | 12 +-
.../encoding/delta/binary_packed_amd64.go | 4 +-
.../parquet-go/encoding/delta/delta.go | 4 +-
.../parquet-go/encoding/plain/plain.go | 114 +-
.../parquet-go/encoding/rle/dictionary.go | 4 +-
.../parquet-go/parquet-go/encoding/rle/rle.go | 33 +-
.../parquet-go/encoding/thrift}/LICENSE | 0
.../parquet-go}/encoding/thrift/binary.go | 6 +-
.../parquet-go}/encoding/thrift/compact.go | 4 +-
.../parquet-go}/encoding/thrift/debug.go | 0
.../parquet-go}/encoding/thrift/decode.go | 0
.../parquet-go}/encoding/thrift/encode.go | 0
.../parquet-go}/encoding/thrift/error.go | 0
.../parquet-go}/encoding/thrift/protocol.go | 0
.../parquet-go}/encoding/thrift/struct.go | 0
.../parquet-go}/encoding/thrift/thrift.go | 0
.../parquet-go}/encoding/thrift/unsafe.go | 4 -
.../parquet-go/parquet-go/encoding/values.go | 93 +-
.../github.com/parquet-go/parquet-go/file.go | 71 +-
.../parquet-go/hashprobe/hashprobe.go | 14 +-
.../internal/bitpack/unpack_int32_amd64.go | 2 +-
.../internal/bitpack/unpack_int32_purego.go | 18 +-
.../internal/bitpack/unpack_int64_amd64.go | 2 +-
.../internal/bitpack/unpack_int64_purego.go | 22 +-
.../internal/unsafecast/unsafecast.go | 144 +-
.../github.com/parquet-go/parquet-go/node.go | 7 +
.../github.com/parquet-go/parquet-go/order.go | 4 +-
.../parquet-go/parquet-go/order_purego.go | 170 +-
.../parquet-go/parquet-go/page_values.go | 14 +-
.../parquet-go/parquet-go/sparse/array.go | 72 +-
.../parquet-go/parquet-go/sparse/gather.go | 10 +-
.../github.com/parquet-go/parquet-go/type.go | 7 +-
.../github.com/parquet-go/parquet-go/value.go | 15 +-
.../parquet-go/parquet-go/writer.go | 2 +-
vendor/golang.org/x/sys/LICENSE | 4 +-
vendor/golang.org/x/sys/cpu/cpu.go | 21 +
vendor/golang.org/x/sys/cpu/cpu_arm64.go | 12 +
.../golang.org/x/sys/cpu/cpu_linux_arm64.go | 5 +
.../golang.org/x/sys/cpu/cpu_linux_noinit.go | 2 +-
.../golang.org/x/sys/cpu/cpu_linux_riscv64.go | 137 +
vendor/golang.org/x/sys/cpu/cpu_riscv64.go | 11 +-
vendor/golang.org/x/sys/unix/README.md | 2 +-
vendor/golang.org/x/sys/unix/mkerrors.sh | 6 +-
vendor/golang.org/x/sys/unix/syscall_aix.go | 2 +-
.../golang.org/x/sys/unix/syscall_darwin.go | 49 +
vendor/golang.org/x/sys/unix/syscall_hurd.go | 1 +
vendor/golang.org/x/sys/unix/syscall_linux.go | 64 +-
.../x/sys/unix/syscall_linux_arm64.go | 2 +
.../x/sys/unix/syscall_linux_loong64.go | 2 +
.../x/sys/unix/syscall_linux_riscv64.go | 2 +
.../golang.org/x/sys/unix/syscall_openbsd.go | 1 +
.../golang.org/x/sys/unix/vgetrandom_linux.go | 13 +
.../x/sys/unix/vgetrandom_unsupported.go | 11 +
.../x/sys/unix/zerrors_darwin_amd64.go | 12 +
.../x/sys/unix/zerrors_darwin_arm64.go | 12 +
vendor/golang.org/x/sys/unix/zerrors_linux.go | 51 +-
.../x/sys/unix/zerrors_linux_386.go | 7 +
.../x/sys/unix/zerrors_linux_amd64.go | 7 +
.../x/sys/unix/zerrors_linux_arm.go | 7 +
.../x/sys/unix/zerrors_linux_arm64.go | 7 +
.../x/sys/unix/zerrors_linux_loong64.go | 7 +
.../x/sys/unix/zerrors_linux_mips.go | 7 +
.../x/sys/unix/zerrors_linux_mips64.go | 7 +
.../x/sys/unix/zerrors_linux_mips64le.go | 7 +
.../x/sys/unix/zerrors_linux_mipsle.go | 7 +
.../x/sys/unix/zerrors_linux_ppc.go | 7 +
.../x/sys/unix/zerrors_linux_ppc64.go | 7 +
.../x/sys/unix/zerrors_linux_ppc64le.go | 7 +
.../x/sys/unix/zerrors_linux_riscv64.go | 7 +
.../x/sys/unix/zerrors_linux_s390x.go | 7 +
.../x/sys/unix/zerrors_linux_sparc64.go | 7 +
.../x/sys/unix/zerrors_zos_s390x.go | 2 +
.../x/sys/unix/zsyscall_darwin_amd64.go | 68 +
.../x/sys/unix/zsyscall_darwin_amd64.s | 15 +
.../x/sys/unix/zsyscall_darwin_arm64.go | 68 +
.../x/sys/unix/zsyscall_darwin_arm64.s | 15 +
.../golang.org/x/sys/unix/zsyscall_linux.go | 33 +-
.../x/sys/unix/zsyscall_openbsd_386.go | 24 +
.../x/sys/unix/zsyscall_openbsd_386.s | 5 +
.../x/sys/unix/zsyscall_openbsd_amd64.go | 24 +
.../x/sys/unix/zsyscall_openbsd_amd64.s | 5 +
.../x/sys/unix/zsyscall_openbsd_arm.go | 24 +
.../x/sys/unix/zsyscall_openbsd_arm.s | 5 +
.../x/sys/unix/zsyscall_openbsd_arm64.go | 24 +
.../x/sys/unix/zsyscall_openbsd_arm64.s | 5 +
.../x/sys/unix/zsyscall_openbsd_mips64.go | 24 +
.../x/sys/unix/zsyscall_openbsd_mips64.s | 5 +
.../x/sys/unix/zsyscall_openbsd_ppc64.go | 24 +
.../x/sys/unix/zsyscall_openbsd_ppc64.s | 6 +
.../x/sys/unix/zsyscall_openbsd_riscv64.go | 24 +
.../x/sys/unix/zsyscall_openbsd_riscv64.s | 5 +
.../x/sys/unix/zsysnum_linux_386.go | 1 +
.../x/sys/unix/zsysnum_linux_amd64.go | 2 +
.../x/sys/unix/zsysnum_linux_arm.go | 1 +
.../x/sys/unix/zsysnum_linux_arm64.go | 3 +-
.../x/sys/unix/zsysnum_linux_loong64.go | 3 +
.../x/sys/unix/zsysnum_linux_mips.go | 1 +
.../x/sys/unix/zsysnum_linux_mips64.go | 1 +
.../x/sys/unix/zsysnum_linux_mips64le.go | 1 +
.../x/sys/unix/zsysnum_linux_mipsle.go | 1 +
.../x/sys/unix/zsysnum_linux_ppc.go | 1 +
.../x/sys/unix/zsysnum_linux_ppc64.go | 1 +
.../x/sys/unix/zsysnum_linux_ppc64le.go | 1 +
.../x/sys/unix/zsysnum_linux_riscv64.go | 3 +-
.../x/sys/unix/zsysnum_linux_s390x.go | 1 +
.../x/sys/unix/zsysnum_linux_sparc64.go | 1 +
.../x/sys/unix/ztypes_darwin_amd64.go | 13 +
.../x/sys/unix/ztypes_darwin_arm64.go | 13 +
.../x/sys/unix/ztypes_freebsd_386.go | 1 +
.../x/sys/unix/ztypes_freebsd_amd64.go | 1 +
.../x/sys/unix/ztypes_freebsd_arm.go | 1 +
.../x/sys/unix/ztypes_freebsd_arm64.go | 1 +
.../x/sys/unix/ztypes_freebsd_riscv64.go | 1 +
vendor/golang.org/x/sys/unix/ztypes_linux.go | 96 +-
.../x/sys/unix/ztypes_linux_riscv64.go | 33 +
.../golang.org/x/sys/windows/dll_windows.go | 2 +-
.../x/sys/windows/security_windows.go | 2 +-
.../x/sys/windows/syscall_windows.go | 16 +-
.../golang.org/x/sys/windows/types_windows.go | 72 +-
.../x/sys/windows/zsyscall_windows.go | 87 +-
vendor/modules.txt | 18 +-
172 files changed, 14354 insertions(+), 12073 deletions(-)
delete mode 100644 vendor/github.com/klauspost/compress/gzhttp/compress_go119.go
delete mode 100644 vendor/github.com/klauspost/compress/gzhttp/compress_go120.go
create mode 100644 vendor/github.com/parquet-go/parquet-go/column_index_be.go
rename vendor/github.com/parquet-go/parquet-go/{column_index.go => column_index_le.go} (90%)
rename vendor/github.com/{segmentio/encoding => parquet-go/parquet-go/encoding/thrift}/LICENSE (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/binary.go (98%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/compact.go (98%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/debug.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/decode.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/encode.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/error.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/protocol.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/struct.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/thrift.go (100%)
rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/unsafe.go (85%)
create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_linux.go
create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0d0500edbfe..0ff3e8df725 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,6 @@
## main / unreleased
+* [ENHANCEMENT] Support Tempo on IBM s390x [#4172](https://github.com/grafana/tempo/pull/4175) (@pavolloffay)
* [ENHANCEMENT] Changed log level from INFO to DEBUG for the TempoDB Find operation using traceId to reduce excessive/unwanted logs in log search. [#4179](https://github.com/grafana/tempo/pull/4179) (@Aki0x137)
* [ENHANCEMENT] tempo-query: separate tls settings for server and client [#4177](https://github.com/grafana/tempo/pull/4177) (@frzifus)
* [ENHANCEMENT] Pushdown collection of results from generators in the querier [#4119](https://github.com/grafana/tempo/pull/4119) (@electron0zero)
diff --git a/cmd/tempo-serverless/cloud-run/go.mod b/cmd/tempo-serverless/cloud-run/go.mod
index f8bd78a3d37..e48440c67d1 100644
--- a/cmd/tempo-serverless/cloud-run/go.mod
+++ b/cmd/tempo-serverless/cloud-run/go.mod
@@ -20,7 +20,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect
- github.com/andybalholm/brotli v1.1.0 // indirect
+ github.com/andybalholm/brotli v1.1.1 // indirect
github.com/apache/thrift v0.20.0 // indirect
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/beorn7/perks v1.0.1 // indirect
@@ -59,11 +59,11 @@ require (
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
- github.com/klauspost/compress v1.17.9 // indirect
+ github.com/klauspost/compress v1.17.11 // indirect
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
- github.com/mattn/go-runewidth v0.0.15 // indirect
+ github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/miekg/dns v1.1.61 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/minio/minio-go/v7 v7.0.70 // indirect
@@ -78,7 +78,7 @@ require (
github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect
github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
- github.com/parquet-go/parquet-go v0.23.0 // indirect
+ github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pires/go-proxyproto v0.7.0 // indirect
@@ -94,7 +94,6 @@ require (
github.com/rs/xid v1.5.0 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
- github.com/segmentio/encoding v0.4.0 // indirect
github.com/sercand/kuberesolver/v5 v5.1.1 // indirect
github.com/sony/gobreaker v0.4.1 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
@@ -126,7 +125,7 @@ require (
golang.org/x/net v0.27.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.7.0 // indirect
- golang.org/x/sys v0.22.0 // indirect
+ golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.23.0 // indirect
diff --git a/cmd/tempo-serverless/cloud-run/go.sum b/cmd/tempo-serverless/cloud-run/go.sum
index 06d0d648896..11625dca355 100644
--- a/cmd/tempo-serverless/cloud-run/go.sum
+++ b/cmd/tempo-serverless/cloud-run/go.sum
@@ -35,8 +35,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGn
github.com/alicebob/miniredis v2.5.0+incompatible h1:yBHoLpsyjupjz3NL3MhKMVkR41j82Yjf3KFv7ApYzUI=
github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M=
github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88=
-github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
-github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
+github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
+github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI=
github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8=
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
@@ -170,8 +170,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
-github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
@@ -188,8 +188,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
-github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
@@ -230,8 +230,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
-github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk=
-github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk=
+github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg=
+github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw=
github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4=
github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
@@ -269,8 +269,6 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke
github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
-github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8=
-github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI=
github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY=
github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ=
github.com/sony/gobreaker v0.4.1 h1:oMnRNZXX5j85zso6xCPRNPtmAycat+WcoKbklScLDgQ=
@@ -313,6 +311,8 @@ github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA=
github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 h1:5mLPGnFdSsevFRFc9q3yYbBkB6tsm4aCwwQV/j1JQAQ=
@@ -388,8 +388,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
-golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
diff --git a/cmd/tempo-serverless/lambda/go.mod b/cmd/tempo-serverless/lambda/go.mod
index 2463d03d31c..3eb799d9ff8 100644
--- a/cmd/tempo-serverless/lambda/go.mod
+++ b/cmd/tempo-serverless/lambda/go.mod
@@ -22,7 +22,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect
- github.com/andybalholm/brotli v1.1.0 // indirect
+ github.com/andybalholm/brotli v1.1.1 // indirect
github.com/apache/thrift v0.20.0 // indirect
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/beorn7/perks v1.0.1 // indirect
@@ -62,11 +62,11 @@ require (
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
- github.com/klauspost/compress v1.17.9 // indirect
+ github.com/klauspost/compress v1.17.11 // indirect
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
- github.com/mattn/go-runewidth v0.0.15 // indirect
+ github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/miekg/dns v1.1.61 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/minio/minio-go/v7 v7.0.70 // indirect
@@ -81,7 +81,7 @@ require (
github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect
github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
- github.com/parquet-go/parquet-go v0.23.0 // indirect
+ github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pires/go-proxyproto v0.7.0 // indirect
@@ -98,7 +98,6 @@ require (
github.com/rs/xid v1.5.0 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
- github.com/segmentio/encoding v0.4.0 // indirect
github.com/sercand/kuberesolver/v5 v5.1.1 // indirect
github.com/sony/gobreaker v0.4.1 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
@@ -130,7 +129,7 @@ require (
golang.org/x/net v0.27.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.7.0 // indirect
- golang.org/x/sys v0.22.0 // indirect
+ golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.23.0 // indirect
diff --git a/cmd/tempo-serverless/lambda/go.sum b/cmd/tempo-serverless/lambda/go.sum
index 80a339939e6..70937341c86 100644
--- a/cmd/tempo-serverless/lambda/go.sum
+++ b/cmd/tempo-serverless/lambda/go.sum
@@ -35,8 +35,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGn
github.com/alicebob/miniredis v2.5.0+incompatible h1:yBHoLpsyjupjz3NL3MhKMVkR41j82Yjf3KFv7ApYzUI=
github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M=
github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88=
-github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
-github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
+github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
+github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI=
github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8=
github.com/aws/aws-lambda-go v1.28.0 h1:fZiik1PZqW2IyAN4rj+Y0UBaO1IDFlsNo9Zz/XnArK4=
@@ -174,8 +174,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
-github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
@@ -192,8 +192,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
-github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
@@ -234,8 +234,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
-github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk=
-github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk=
+github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg=
+github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw=
github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4=
github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
@@ -274,8 +274,6 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke
github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
-github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8=
-github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI=
github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY=
github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
@@ -321,6 +319,8 @@ github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA=
github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 h1:5mLPGnFdSsevFRFc9q3yYbBkB6tsm4aCwwQV/j1JQAQ=
@@ -396,8 +396,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
-golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
diff --git a/go.mod b/go.mod
index 9e4c1f6095c..3d8fadad039 100644
--- a/go.mod
+++ b/go.mod
@@ -33,7 +33,7 @@ require (
github.com/jedib0t/go-pretty/v6 v6.2.4
github.com/json-iterator/go v1.1.12
github.com/jsternberg/zap-logfmt v1.2.0
- github.com/klauspost/compress v1.17.9
+ github.com/klauspost/compress v1.17.11
github.com/minio/minio-go/v7 v7.0.70
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c
github.com/olekukonko/tablewriter v0.0.5
@@ -98,7 +98,7 @@ require (
github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.102.0
github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver v0.102.0
github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.102.0
- github.com/parquet-go/parquet-go v0.23.0
+ github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe
github.com/stoewer/parquet-cli v0.0.7
go.opentelemetry.io/collector/config/configgrpc v0.102.1
go.opentelemetry.io/collector/config/confighttp v0.102.1
@@ -133,7 +133,7 @@ require (
github.com/alecthomas/participle/v2 v2.1.1 // indirect
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect
- github.com/andybalholm/brotli v1.1.0 // indirect
+ github.com/andybalholm/brotli v1.1.1 // indirect
github.com/apache/thrift v0.20.0 // indirect
github.com/armon/go-metrics v0.4.1 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
@@ -216,7 +216,7 @@ require (
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
- github.com/mattn/go-runewidth v0.0.15 // indirect
+ github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/miekg/dns v1.1.61 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
@@ -260,7 +260,6 @@ require (
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
- github.com/segmentio/encoding v0.4.0 // indirect
github.com/sercand/kuberesolver/v5 v5.1.1 // indirect
github.com/shirou/gopsutil/v3 v3.24.4 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
@@ -321,7 +320,7 @@ require (
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
golang.org/x/mod v0.19.0 // indirect
- golang.org/x/sys v0.22.0 // indirect
+ golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/tools v0.23.0 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
diff --git a/go.sum b/go.sum
index 972730dd25a..0bed50c515b 100644
--- a/go.sum
+++ b/go.sum
@@ -93,8 +93,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a h1:HbKu58rmZp
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc=
github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M=
github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88=
-github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
-github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
+github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
+github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI=
github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8=
@@ -566,8 +566,8 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
-github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
@@ -616,8 +616,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
-github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
@@ -731,8 +731,8 @@ github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7s
github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c=
github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI=
github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c=
-github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk=
-github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk=
+github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg=
+github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
@@ -825,8 +825,6 @@ github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 h1:BkTk4gynLjguayxrYxZoMZjBnA
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
-github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8=
-github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI=
github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e h1:uO75wNGioszjmIzcY/tvdDYKRLVvzggtAmmJkn9j4GQ=
github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e/go.mod h1:tm/wZFQ8e24NYaBGIlnO2WGCAi67re4HHuOm0sftE/M=
github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY=
@@ -906,6 +904,8 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -1258,8 +1258,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
-golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
diff --git a/vendor/github.com/andybalholm/brotli/encoder.go b/vendor/github.com/andybalholm/brotli/encoder.go
index 650d1e42b49..1928382596e 100644
--- a/vendor/github.com/andybalholm/brotli/encoder.go
+++ b/vendor/github.com/andybalholm/brotli/encoder.go
@@ -21,6 +21,15 @@ func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, la
e.wroteHeader = true
}
+ if len(src) == 0 {
+ if lastBlock {
+ e.bw.writeBits(2, 3) // islast + isempty
+ e.bw.jumpToByteBoundary()
+ return e.bw.dst
+ }
+ return dst
+ }
+
var literalHisto [256]uint32
var commandHisto [704]uint32
var distanceHisto [64]uint32
diff --git a/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go b/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go
index 37ed8e13340..507d1cae64c 100644
--- a/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go
+++ b/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go
@@ -32,14 +32,3 @@ func (e *matchEmitter) emit(m absoluteMatch) {
})
e.NextEmit = m.End
}
-
-// trim shortens m if it extends past maxEnd. Then if the length is at least
-// minLength, the match is emitted.
-func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
- if m.End > maxEnd {
- m.End = maxEnd
- }
- if m.End-m.Start >= minLength {
- e.emit(m)
- }
-}
diff --git a/vendor/github.com/andybalholm/brotli/matchfinder/m4.go b/vendor/github.com/andybalholm/brotli/matchfinder/m4.go
index 5b2acba2e14..818947255df 100644
--- a/vendor/github.com/andybalholm/brotli/matchfinder/m4.go
+++ b/vendor/github.com/andybalholm/brotli/matchfinder/m4.go
@@ -56,7 +56,7 @@ func (q *M4) Reset() {
}
func (q *M4) score(m absoluteMatch) int {
- return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
+ return (m.End-m.Start)*256 + (bits.LeadingZeros32(uint32(m.Start-m.Match))-32)*q.DistanceBitCost
}
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
@@ -112,7 +112,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
// We have found some matches, and we're far enough along that we probably
// won't find overlapping matches, so we might as well emit them.
if matches[1] != (absoluteMatch{}) {
- e.trim(matches[1], matches[0].Start, q.MinLength)
+ if matches[1].End > matches[0].Start {
+ matches[1].End = matches[0].Start
+ }
+ if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 {
+ e.emit(matches[1])
+ }
}
e.emit(matches[0])
matches = [3]absoluteMatch{}
@@ -139,12 +144,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
// Look for a match.
var currentMatch absoluteMatch
- if i-candidate != matches[0].Start-matches[0].Match {
- if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
- m := extendMatch2(src, i, candidate, e.NextEmit)
- if m.End-m.Start > q.MinLength {
- currentMatch = m
- }
+ if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
+ m := extendMatch2(src, i, candidate, e.NextEmit)
+ if m.End-m.Start > q.MinLength && q.score(m) > 0 {
+ currentMatch = m
}
}
@@ -157,12 +160,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
if candidate <= 0 || i-candidate > q.MaxDistance {
break
}
- if i-candidate != matches[0].Start-matches[0].Match {
- if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
- m := extendMatch2(src, i, candidate, e.NextEmit)
- if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
- currentMatch = m
- }
+ if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
+ m := extendMatch2(src, i, candidate, e.NextEmit)
+ if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
+ currentMatch = m
}
}
}
@@ -217,14 +218,24 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
default:
// Emit the first match, shortening it if necessary to avoid overlap with the second.
- e.trim(matches[2], matches[1].Start, q.MinLength)
+ if matches[2].End > matches[1].Start {
+ matches[2].End = matches[1].Start
+ }
+ if matches[2].End-matches[2].Start >= q.MinLength && q.score(matches[2]) > 0 {
+ e.emit(matches[2])
+ }
matches[2] = absoluteMatch{}
}
}
// We've found all the matches now; emit the remaining ones.
if matches[1] != (absoluteMatch{}) {
- e.trim(matches[1], matches[0].Start, q.MinLength)
+ if matches[1].End > matches[0].Start {
+ matches[1].End = matches[0].Start
+ }
+ if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 {
+ e.emit(matches[1])
+ }
}
if matches[0] != (absoluteMatch{}) {
e.emit(matches[0])
diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml
index a22953805c6..4528059ca68 100644
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@@ -1,5 +1,5 @@
-# This is an example goreleaser.yaml file with some sane defaults.
-# Make sure to check the documentation at http://goreleaser.com
+version: 2
+
before:
hooks:
- ./gen.sh
@@ -99,7 +99,7 @@ archives:
checksum:
name_template: 'checksums.txt'
snapshot:
- name_template: "{{ .Tag }}-next"
+ version_template: "{{ .Tag }}-next"
changelog:
sort: asc
filters:
diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md
index 05c7359e481..de264c85a5a 100644
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -16,6 +16,27 @@ This package provides various compression algorithms.
# changelog
+* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10)
+ * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978
+ * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002
+ * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982
+ * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007
+ * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996
+
+* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9)
+ * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949
+ * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963
+ * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971
+ * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951
+
+* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8)
+ * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885
+ * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938
+
+* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7)
+ * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927
+ * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930
+
* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
@@ -81,7 +102,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp
* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
- * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
+ * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
@@ -136,7 +157,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp
* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649
* Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651
* flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656
- * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657
+ * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657
* s2: Improve "best" compression https://github.com/klauspost/compress/pull/658
* s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635
* s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646
@@ -339,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when
* s2: Fix binaries.
* Feb 25, 2021 (v1.11.8)
- * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended.
+ * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended.
* s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315)
* s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322)
* zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314)
@@ -518,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when
* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster.
* Feb 19, 2016: Handle small payloads faster in level 1-3.
* Feb 19, 2016: Added faster level 2 + 3 compression modes.
-* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5.
+* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5.
* Feb 14, 2016: Snappy: Merge upstream changes.
* Feb 14, 2016: Snappy: Fix aggressive skipping.
* Feb 14, 2016: Snappy: Update benchmark.
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 66d1657d2c6..af53fb860cc 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -861,7 +861,7 @@ func (d *compressor) reset(w io.Writer) {
}
switch d.compressionLevel.chain {
case 0:
- // level was NoCompression or ConstantCompresssion.
+ // level was NoCompression or ConstantCompression.
d.windowEnd = 0
default:
s := d.state
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 2f410d64f5a..0d7b437f1c6 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -298,6 +298,14 @@ const (
huffmanGenericReader
)
+// flushMode tells decompressor when to return data
+type flushMode uint8
+
+const (
+ syncFlush flushMode = iota // return data after sync flush block
+ partialFlush // return data after each block
+)
+
// Decompress state.
type decompressor struct {
// Input source.
@@ -332,6 +340,8 @@ type decompressor struct {
nb uint
final bool
+
+ flushMode flushMode
}
func (f *decompressor) nextBlock() {
@@ -618,7 +628,10 @@ func (f *decompressor) dataBlock() {
}
if n == 0 {
- f.toRead = f.dict.readFlush()
+ if f.flushMode == syncFlush {
+ f.toRead = f.dict.readFlush()
+ }
+
f.finishBlock()
return
}
@@ -657,8 +670,12 @@ func (f *decompressor) finishBlock() {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
+
f.err = io.EOF
+ } else if f.flushMode == partialFlush && f.dict.availRead() > 0 {
+ f.toRead = f.dict.readFlush()
}
+
f.step = nextBlock
}
@@ -789,15 +806,25 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
return nil
}
-// NewReader returns a new ReadCloser that can be used
-// to read the uncompressed version of r.
-// If r does not also implement io.ByteReader,
-// the decompressor may read more data than necessary from r.
-// It is the caller's responsibility to call Close on the ReadCloser
-// when finished reading.
-//
-// The ReadCloser returned by NewReader also implements Resetter.
-func NewReader(r io.Reader) io.ReadCloser {
+type ReaderOpt func(*decompressor)
+
+// WithPartialBlock tells decompressor to return after each block,
+// so it can read data written with partial flush
+func WithPartialBlock() ReaderOpt {
+ return func(f *decompressor) {
+ f.flushMode = partialFlush
+ }
+}
+
+// WithDict initializes the reader with a preset dictionary
+func WithDict(dict []byte) ReaderOpt {
+ return func(f *decompressor) {
+ f.dict.init(maxMatchOffset, dict)
+ }
+}
+
+// NewReaderOpts returns new reader with provided options
+func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
@@ -806,9 +833,26 @@ func NewReader(r io.Reader) io.ReadCloser {
f.codebits = new([numCodes]int)
f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
+
+ for _, opt := range opts {
+ opt(&f)
+ }
+
return &f
}
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r.
+// If r does not also implement io.ByteReader,
+// the decompressor may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser
+// when finished reading.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReader(r io.Reader) io.ReadCloser {
+ return NewReaderOpts(r)
+}
+
// NewReaderDict is like NewReader but initializes the reader
// with a preset dictionary. The returned Reader behaves as if
// the uncompressed data stream started with the given dictionary,
@@ -817,13 +861,5 @@ func NewReader(r io.Reader) io.ReadCloser {
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
- fixedHuffmanDecoderInit()
-
- var f decompressor
- f.r = makeReader(r)
- f.bits = new([maxNumLit + maxNumDist]int)
- f.codebits = new([numCodes]int)
- f.step = nextBlock
- f.dict.init(maxMatchOffset, dict)
- return &f
+ return NewReaderOpts(r, WithDict(dict))
}
diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go
index cc05d0f7ea9..0c7dd4ffef9 100644
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -15,7 +15,7 @@ const (
// It is possible, but by no way guaranteed that corrupt data will
// return an error.
// It is up to the caller to verify integrity of the returned data.
-// Use a predefined Scrach to set maximum acceptable output size.
+// Use a predefined Scratch to set maximum acceptable output size.
func Decompress(b []byte, s *Scratch) ([]byte, error) {
s, err := s.prepare(b)
if err != nil {
diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress.go b/vendor/github.com/klauspost/compress/gzhttp/compress.go
index 289ae3e2ee8..52e3077ec4e 100644
--- a/vendor/github.com/klauspost/compress/gzhttp/compress.go
+++ b/vendor/github.com/klauspost/compress/gzhttp/compress.go
@@ -131,15 +131,15 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
if cl >= w.minSize || len(w.buf) >= w.minSize {
- // If a Content-Type wasn't specified, infer it from the current buffer.
- if ct == "" {
+ // If a Content-Type wasn't specified, infer it from the current buffer when the response has a body.
+ if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 {
ct = http.DetectContentType(w.buf)
- }
- // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
- // Set the header only if the key does not exist
- if _, ok := hdr[contentType]; w.setContentType && !ok {
- hdr.Set(contentType, ct)
+ // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
+ // Set the header only if the key does not exist
+ if _, ok := hdr[contentType]; w.setContentType && !ok {
+ hdr.Set(contentType, ct)
+ }
}
// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
@@ -306,7 +306,7 @@ func (w *GzipResponseWriter) startPlain() error {
func (w *GzipResponseWriter) WriteHeader(code int) {
// Handle informational headers
// This is gated to not forward 1xx responses on builds prior to go1.20.
- if shouldWrite1xxResponses() && code >= 100 && code <= 199 {
+ if code >= 100 && code <= 199 {
w.ResponseWriter.WriteHeader(code)
return
}
@@ -324,6 +324,20 @@ func (w *GzipResponseWriter) init() {
w.gw = w.gwFactory.New(w.ResponseWriter, w.level)
}
+// bodyAllowedForStatus reports whether a given response status code
+// permits a body. See RFC 7230, section 3.3.
+func bodyAllowedForStatus(status int) bool {
+ switch {
+ case status >= 100 && status <= 199:
+ return false
+ case status == 204:
+ return false
+ case status == 304:
+ return false
+ }
+ return true
+}
+
// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
func (w *GzipResponseWriter) Close() error {
if w.ignore {
@@ -335,7 +349,9 @@ func (w *GzipResponseWriter) Close() error {
ce = w.Header().Get(contentEncoding)
cr = w.Header().Get(contentRange)
)
- if ct == "" {
+
+ // Detects the response content-type when it does not exist and the response has a body.
+ if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 {
ct = http.DetectContentType(w.buf)
// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
@@ -379,7 +395,8 @@ func (w *GzipResponseWriter) Flush() {
cr = w.Header().Get(contentRange)
)
- if ct == "" {
+ // Detects the response content-type when it does not exist and the response has a body.
+ if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 {
ct = http.DetectContentType(w.buf)
// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
@@ -464,6 +481,11 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
return func(h http.Handler) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Add(vary, acceptEncoding)
+ if c.allowCompressedRequests && contentGzip(r) {
+ r.Header.Del(contentEncoding)
+ r.Body = &gzipReader{body: r.Body}
+ }
+
if acceptsGzip(r) {
gw := grwPool.Get().(*GzipResponseWriter)
*gw = GzipResponseWriter{
@@ -536,17 +558,18 @@ func (pct parsedContentType) equals(mediaType string, params map[string]string)
// Used for functional configuration.
type config struct {
- minSize int
- level int
- writer writer.GzipWriterFactory
- contentTypes func(ct string) bool
- keepAcceptRanges bool
- setContentType bool
- suffixETag string
- dropETag bool
- jitterBuffer int
- randomJitter string
- sha256Jitter bool
+ minSize int
+ level int
+ writer writer.GzipWriterFactory
+ contentTypes func(ct string) bool
+ keepAcceptRanges bool
+ setContentType bool
+ suffixETag string
+ dropETag bool
+ jitterBuffer int
+ randomJitter string
+ sha256Jitter bool
+ allowCompressedRequests bool
}
func (c *config) validate() error {
@@ -579,6 +602,15 @@ func MinSize(size int) option {
}
}
+// AllowCompressedRequests will enable or disable RFC 7694 compressed requests.
+// By default this is Disabled.
+// See https://datatracker.ietf.org/doc/html/rfc7694
+func AllowCompressedRequests(b bool) option {
+ return func(c *config) {
+ c.allowCompressedRequests = b
+ }
+}
+
// CompressionLevel sets the compression level
func CompressionLevel(level int) option {
return func(c *config) {
@@ -752,6 +784,12 @@ func RandomJitter(n, buffer int, paranoid bool) option {
}
}
+// contentGzip returns true if the given HTTP request indicates that it gzipped.
+func contentGzip(r *http.Request) bool {
+ // See more detail in `acceptsGzip`
+ return r.Method != http.MethodHead && r.Body != nil && parseEncodingGzip(r.Header.Get(contentEncoding)) > 0
+}
+
// acceptsGzip returns true if the given HTTP request indicates that it will
// accept a gzipped response.
func acceptsGzip(r *http.Request) bool {
diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go b/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go
deleted file mode 100644
index 97fc25acbc9..00000000000
--- a/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go
+++ /dev/null
@@ -1,9 +0,0 @@
-//go:build !go1.20
-// +build !go1.20
-
-package gzhttp
-
-// shouldWrite1xxResponses indicates whether the current build supports writes of 1xx status codes.
-func shouldWrite1xxResponses() bool {
- return false
-}
diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go b/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go
deleted file mode 100644
index 2b65f67c795..00000000000
--- a/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go
+++ /dev/null
@@ -1,9 +0,0 @@
-//go:build go1.20
-// +build go1.20
-
-package gzhttp
-
-// shouldWrite1xxResponses indicates whether the current build supports writes of 1xx status codes.
-func shouldWrite1xxResponses() bool {
- return true
-}
diff --git a/vendor/github.com/klauspost/compress/gzhttp/transport.go b/vendor/github.com/klauspost/compress/gzhttp/transport.go
index 623aea2ed8a..3914a06e013 100644
--- a/vendor/github.com/klauspost/compress/gzhttp/transport.go
+++ b/vendor/github.com/klauspost/compress/gzhttp/transport.go
@@ -61,10 +61,21 @@ func TransportCustomEval(fn func(header http.Header) bool) transportOption {
}
}
+// TransportAlwaysDecompress will always decompress the response,
+// regardless of whether we requested it or not.
+// Default is false, which will pass compressed data through
+// if we did not request compression.
+func TransportAlwaysDecompress(enabled bool) transportOption {
+ return func(c *gzRoundtripper) {
+ c.alwaysDecomp = enabled
+ }
+}
+
type gzRoundtripper struct {
parent http.RoundTripper
acceptEncoding string
withZstd, withGzip bool
+ alwaysDecomp bool
customEval func(header http.Header) bool
}
@@ -90,15 +101,19 @@ func (g *gzRoundtripper) RoundTrip(req *http.Request) (*http.Response, error) {
}
resp, err := g.parent.RoundTrip(req)
- if err != nil || !requestedComp {
+ if err != nil {
return resp, err
}
- decompress := false
+ decompress := g.alwaysDecomp
if g.customEval != nil {
if !g.customEval(resp.Header) {
return resp, nil
}
decompress = true
+ } else {
+ if !requestedComp && !g.alwaysDecomp {
+ return resp, nil
+ }
}
// Decompress
if (decompress || g.withGzip) && asciiEqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 54bd08b25c0..0f56b02d747 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) {
errs++
}
if errs > 0 {
- fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
+ fmt.Fprintf(w, "%d errors in base, stopping\n", errs)
continue
}
// Ensure that all combinations are covered.
@@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) {
errs++
}
if errs > 20 {
- fmt.Fprintf(w, "%d errros, stopping\n", errs)
+ fmt.Fprintf(w, "%d errors, stopping\n", errs)
break
}
}
diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go
index 0c9088adfee..20b802270a7 100644
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@@ -9,6 +9,9 @@ import (
"encoding/binary"
"math"
"math/bits"
+ "sync"
+
+ "github.com/klauspost/compress/internal/race"
)
// Encode returns the encoded form of src. The returned slice may be a sub-
@@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte {
return dst[:d]
}
+var estblockPool [2]sync.Pool
+
// EstimateBlockSize will perform a very fast compression
// without outputting the result and return the compressed output size.
// The function returns -1 if no improvement could be achieved.
@@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) {
return -1
}
if len(src) <= 1024 {
- d = calcBlockSizeSmall(src)
+ const sz, pool = 2048, 0
+ tmp, ok := estblockPool[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer estblockPool[pool].Put(tmp)
+
+ d = calcBlockSizeSmall(src, tmp)
} else {
- d = calcBlockSize(src)
+ const sz, pool = 32768, 1
+ tmp, ok := estblockPool[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer estblockPool[pool].Put(tmp)
+
+ d = calcBlockSize(src, tmp)
}
if d == 0 {
diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
index 4f45206a4ef..7aadd255fe3 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
@@ -3,10 +3,16 @@
package s2
-import "github.com/klauspost/compress/internal/race"
+import (
+ "sync"
+
+ "github.com/klauspost/compress/internal/race"
+)
const hasAmd64Asm = true
+var encPools [4]sync.Pool
+
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
@@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) {
)
if len(src) >= 4<<20 {
- return encodeBlockAsm(dst, src)
+ const sz, pool = 65536, 0
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeBlockAsm(dst, src, tmp)
}
if len(src) >= limit12B {
- return encodeBlockAsm4MB(dst, src)
+ const sz, pool = 65536, 0
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeBlockAsm4MB(dst, src, tmp)
}
if len(src) >= limit10B {
- return encodeBlockAsm12B(dst, src)
+ const sz, pool = 16384, 1
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeBlockAsm12B(dst, src, tmp)
}
if len(src) >= limit8B {
- return encodeBlockAsm10B(dst, src)
+ const sz, pool = 4096, 2
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeBlockAsm10B(dst, src, tmp)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
- return encodeBlockAsm8B(dst, src)
+ const sz, pool = 1024, 3
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeBlockAsm8B(dst, src, tmp)
}
+var encBetterPools [5]sync.Pool
+
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
@@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) {
)
if len(src) > 4<<20 {
- return encodeBetterBlockAsm(dst, src)
+ const sz, pool = 589824, 0
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+ return encodeBetterBlockAsm(dst, src, tmp)
}
if len(src) >= limit12B {
- return encodeBetterBlockAsm4MB(dst, src)
+ const sz, pool = 589824, 0
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+
+ return encodeBetterBlockAsm4MB(dst, src, tmp)
}
if len(src) >= limit10B {
- return encodeBetterBlockAsm12B(dst, src)
+ const sz, pool = 81920, 0
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+
+ return encodeBetterBlockAsm12B(dst, src, tmp)
}
if len(src) >= limit8B {
- return encodeBetterBlockAsm10B(dst, src)
+ const sz, pool = 20480, 1
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+ return encodeBetterBlockAsm10B(dst, src, tmp)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
- return encodeBetterBlockAsm8B(dst, src)
+
+ const sz, pool = 5120, 2
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+ return encodeBetterBlockAsm8B(dst, src, tmp)
}
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
@@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) {
// Use 8 bit table when less than...
limit8B = 512
)
- if len(src) >= 64<<10 {
- return encodeSnappyBlockAsm(dst, src)
+ if len(src) > 65536 {
+ const sz, pool = 65536, 0
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeSnappyBlockAsm(dst, src, tmp)
}
if len(src) >= limit12B {
- return encodeSnappyBlockAsm64K(dst, src)
+ const sz, pool = 65536, 0
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeSnappyBlockAsm64K(dst, src, tmp)
}
if len(src) >= limit10B {
- return encodeSnappyBlockAsm12B(dst, src)
+ const sz, pool = 16384, 1
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeSnappyBlockAsm12B(dst, src, tmp)
}
if len(src) >= limit8B {
- return encodeSnappyBlockAsm10B(dst, src)
+ const sz, pool = 4096, 2
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeSnappyBlockAsm10B(dst, src, tmp)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
- return encodeSnappyBlockAsm8B(dst, src)
+ const sz, pool = 1024, 3
+ tmp, ok := encPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encPools[pool].Put(tmp)
+ return encodeSnappyBlockAsm8B(dst, src, tmp)
}
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
@@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) {
// Use 8 bit table when less than...
limit8B = 512
)
- if len(src) >= 64<<10 {
- return encodeSnappyBetterBlockAsm(dst, src)
+ if len(src) > 65536 {
+ const sz, pool = 589824, 0
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+ return encodeSnappyBetterBlockAsm(dst, src, tmp)
}
+
if len(src) >= limit12B {
- return encodeSnappyBetterBlockAsm64K(dst, src)
+ const sz, pool = 294912, 4
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+
+ return encodeSnappyBetterBlockAsm64K(dst, src, tmp)
}
if len(src) >= limit10B {
- return encodeSnappyBetterBlockAsm12B(dst, src)
+ const sz, pool = 81920, 0
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+
+ return encodeSnappyBetterBlockAsm12B(dst, src, tmp)
}
if len(src) >= limit8B {
- return encodeSnappyBetterBlockAsm10B(dst, src)
+ const sz, pool = 20480, 1
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+ return encodeSnappyBetterBlockAsm10B(dst, src, tmp)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
- return encodeSnappyBetterBlockAsm8B(dst, src)
+
+ const sz, pool = 5120, 2
+ tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+ if !ok {
+ tmp = &[sz]byte{}
+ }
+ race.WriteSlice(tmp[:])
+ defer encBetterPools[pool].Put(tmp)
+ return encodeSnappyBetterBlockAsm8B(dst, src, tmp)
}
diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go
index 6b393c34d37..dd1c973ca51 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int {
}
// input must be > inputMargin
-func calcBlockSize(src []byte) (d int) {
+func calcBlockSize(src []byte, _ *[32768]byte) (d int) {
// Initialize the hash table.
const (
tableBits = 13
@@ -503,7 +503,7 @@ emitRemainder:
}
// length must be > inputMargin.
-func calcBlockSizeSmall(src []byte) (d int) {
+func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) {
// Initialize the hash table.
const (
tableBits = 9
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
index 297e41501ba..f43aa815435 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@@ -11,154 +11,154 @@ func _dummy_()
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBlockAsm(dst []byte, src []byte) int
+func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
// encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4194304 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBlockAsm4MB(dst []byte, src []byte) int
+func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
// encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBlockAsm12B(dst []byte, src []byte) int
+func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
// encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBlockAsm10B(dst []byte, src []byte) int
+func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
// encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBlockAsm8B(dst []byte, src []byte) int
+func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
// encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBetterBlockAsm(dst []byte, src []byte) int
+func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
// encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4194304 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
+func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
// encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBetterBlockAsm12B(dst []byte, src []byte) int
+func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
// encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBetterBlockAsm10B(dst []byte, src []byte) int
+func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
// encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeBetterBlockAsm8B(dst []byte, src []byte) int
+func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
// encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBlockAsm(dst []byte, src []byte) int
+func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
// encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 65535 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
+func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
// encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
+func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
// encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
+func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
// encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
+func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
// encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
// encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 65535 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
// encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
// encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
// encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func calcBlockSize(src []byte) int
+func calcBlockSize(src []byte, tmp *[32768]byte) int
// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 1024 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
-func calcBlockSizeSmall(src []byte) int
+func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
index 2ff5b334017..df9be687be7 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
@@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0
#endif
RET
-// func encodeBlockAsm(dst []byte, src []byte) int
+// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBlockAsm
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x06, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x06, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x10, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x10, R11
+ IMULQ R9, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm
- LEAL 1(CX), SI
- MOVL 12(SP), DI
- MOVL SI, BX
- SUBL 16(SP), BX
+ LEAL 1(DX), DI
+ MOVL 12(SP), R8
+ MOVL DI, SI
+ SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm
repeat_extend_back_loop_encodeBlockAsm:
- CMPL SI, DI
+ CMPL DI, R8
JBE repeat_extend_back_end_encodeBlockAsm
- MOVB -1(DX)(BX*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(SI*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm
- LEAL -1(SI), SI
- DECL BX
+ LEAL -1(DI), DI
+ DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm
repeat_extend_back_end_encodeBlockAsm:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 5(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 5(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBlockAsm:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeBlockAsm
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBlockAsm
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_repeat_emit_encodeBlockAsm
- CMPL BX, $0x01000000
+ CMPL SI, $0x01000000
JB four_bytes_repeat_emit_encodeBlockAsm
- MOVB $0xfc, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_repeat_emit_encodeBlockAsm
four_bytes_repeat_emit_encodeBlockAsm:
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_repeat_emit_encodeBlockAsm
three_bytes_repeat_emit_encodeBlockAsm:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeBlockAsm
two_bytes_repeat_emit_encodeBlockAsm:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeBlockAsm
JMP memmove_long_repeat_emit_encodeBlockAsm
one_byte_repeat_emit_encodeBlockAsm:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBlockAsm
memmove_long_repeat_emit_encodeBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R12
+ SHRQ $0x05, R12
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R13
+ SUBQ R11, R13
+ DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
+ LEAQ -32(R10)(R13*1), R11
+ LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R14)
+ MOVOA X5, 16(R14)
+ ADDQ $0x20, R14
+ ADDQ $0x20, R11
ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
+ DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
+ MOVOU -32(R10)(R13*1), X4
+ MOVOU -16(R10)(R13*1), X5
+ MOVOA X4, -32(CX)(R13*1)
+ MOVOA X5, -16(CX)(R13*1)
+ ADDQ $0x20, R13
+ CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBlockAsm:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R9
+ SUBL DX, R9
+ LEAQ (BX)(DX*1), R10
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_repeat_extend_encodeBlockAsm:
- CMPL R8, $0x10
+ CMPL R9, $0x10
JB matchlen_match8_repeat_extend_encodeBlockAsm
- MOVQ (R9)(R11*1), R10
- MOVQ 8(R9)(R11*1), R12
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ MOVQ 8(R10)(R12*1), R13
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
- XORQ 8(BX)(R11*1), R12
+ XORQ 8(SI)(R12*1), R13
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm
- LEAL -16(R8), R8
- LEAL 16(R11), R11
+ LEAL -16(R9), R9
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm
matchlen_bsf_16repeat_extend_encodeBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm
matchlen_match8_repeat_extend_encodeBlockAsm:
- CMPL R8, $0x08
+ CMPL R9, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
- LEAL -8(R8), R8
- LEAL 8(R11), R11
+ LEAL -8(R9), R9
+ LEAL 8(R12), R12
JMP matchlen_match4_repeat_extend_encodeBlockAsm
matchlen_bsf_8_repeat_extend_encodeBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm
matchlen_match4_repeat_extend_encodeBlockAsm:
- CMPL R8, $0x04
+ CMPL R9, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm
- MOVL (R9)(R11*1), R10
- CMPL (BX)(R11*1), R10
+ MOVL (R10)(R12*1), R11
+ CMPL (SI)(R12*1), R11
JNE matchlen_match2_repeat_extend_encodeBlockAsm
- LEAL -4(R8), R8
- LEAL 4(R11), R11
+ LEAL -4(R9), R9
+ LEAL 4(R12), R12
matchlen_match2_repeat_extend_encodeBlockAsm:
- CMPL R8, $0x01
+ CMPL R9, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm
JB repeat_extend_forward_end_encodeBlockAsm
- MOVW (R9)(R11*1), R10
- CMPW (BX)(R11*1), R10
+ MOVW (R10)(R12*1), R11
+ CMPW (SI)(R12*1), R11
JNE matchlen_match1_repeat_extend_encodeBlockAsm
- LEAL 2(R11), R11
- SUBL $0x02, R8
+ LEAL 2(R12), R12
+ SUBL $0x02, R9
JZ repeat_extend_forward_end_encodeBlockAsm
matchlen_match1_repeat_extend_encodeBlockAsm:
- MOVB (R9)(R11*1), R10
- CMPB (BX)(R11*1), R10
+ MOVB (R10)(R12*1), R11
+ CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
repeat_extend_forward_end_encodeBlockAsm:
- ADDL R11, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
- TESTL DI, DI
+ ADDL R12, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
+ TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm
// emitRepeat
emit_repeat_again_match_repeat_encodeBlockAsm:
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_match_repeat_encodeBlockAsm
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_match_repeat_encodeBlockAsm
cant_repeat_two_offset_match_repeat_encodeBlockAsm:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_match_repeat_encodeBlockAsm
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_match_repeat_encodeBlockAsm
- CMPL BX, $0x0100ffff
+ CMPL SI, $0x0100ffff
JB repeat_five_match_repeat_encodeBlockAsm
- LEAL -16842747(BX), BX
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(SI), SI
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_repeat_encodeBlockAsm
repeat_five_match_repeat_encodeBlockAsm:
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_match_repeat_encodeBlockAsm:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_match_repeat_encodeBlockAsm:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_match_repeat_encodeBlockAsm:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_match_repeat_encodeBlockAsm:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_as_copy_encodeBlockAsm:
// emitCopy
- CMPL SI, $0x00010000
+ CMPL DI, $0x00010000
JB two_byte_offset_repeat_as_copy_encodeBlockAsm
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm
- MOVB $0xff, (AX)
- MOVL SI, 1(AX)
- LEAL -64(BX), BX
- ADDQ $0x05, AX
- CMPL BX, $0x04
+ MOVB $0xff, (CX)
+ MOVL DI, 1(CX)
+ LEAL -64(SI), SI
+ ADDQ $0x05, CX
+ CMPL SI, $0x04
JB four_bytes_remain_repeat_as_copy_encodeBlockAsm
// emitRepeat
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL BX, $0x0100ffff
+ CMPL SI, $0x0100ffff
JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
- LEAL -16842747(BX), BX
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(SI), SI
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
four_bytes_remain_repeat_as_copy_encodeBlockAsm:
- TESTL BX, BX
+ TESTL SI, SI
JZ repeat_end_emit_encodeBlockAsm
- XORL DI, DI
- LEAL -1(DI)(BX*4), BX
- MOVB BL, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
+ XORL R8, R8
+ LEAL -1(R8)(SI*4), SI
+ MOVB SI, (CX)
+ MOVL DI, 1(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm
two_byte_offset_repeat_as_copy_encodeBlockAsm:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE long_offset_short_repeat_as_copy_encodeBlockAsm
- MOVL $0x00000001, DI
- LEAL 16(DI), DI
- MOVB SI, 1(AX)
- MOVL SI, R8
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, BX
+ MOVL $0x00000001, R8
+ LEAL 16(R8), R8
+ MOVB DI, 1(CX)
+ MOVL DI, R9
+ SHRL $0x08, R9
+ SHLL $0x05, R9
+ ORL R9, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, SI
// emitRepeat
- LEAL -4(BX), BX
+ LEAL -4(SI), SI
JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
- CMPL BX, $0x0100ffff
+ CMPL SI, $0x0100ffff
JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
- LEAL -16842747(BX), BX
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(SI), SI
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
long_offset_short_repeat_as_copy_encodeBlockAsm:
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
// emitRepeat
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL BX, $0x0100ffff
+ CMPL SI, $0x0100ffff
JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
- LEAL -16842747(BX), BX
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(SI), SI
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm
emit_copy_three_repeat_as_copy_encodeBlockAsm:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeBlockAsm:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm
no_repeat_found_encodeBlockAsm:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm
candidate3_match_encodeBlockAsm:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm
candidate2_match_encodeBlockAsm:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeBlockAsm:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm
match_extend_back_loop_encodeBlockAsm:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBlockAsm
JMP match_extend_back_loop_encodeBlockAsm
match_extend_back_end_encodeBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 5(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 5(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBlockAsm:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeBlockAsm
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm
- CMPL DI, $0x00010000
+ CMPL R8, $0x00010000
JB three_bytes_match_emit_encodeBlockAsm
- CMPL DI, $0x01000000
+ CMPL R8, $0x01000000
JB four_bytes_match_emit_encodeBlockAsm
- MOVB $0xfc, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL R8, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_match_emit_encodeBlockAsm
four_bytes_match_emit_encodeBlockAsm:
- MOVL DI, R9
- SHRL $0x10, R9
- MOVB $0xf8, (AX)
- MOVW DI, 1(AX)
- MOVB R9, 3(AX)
- ADDQ $0x04, AX
+ MOVL R8, R10
+ SHRL $0x10, R10
+ MOVB $0xf8, (CX)
+ MOVW R8, 1(CX)
+ MOVB R10, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_encodeBlockAsm
three_bytes_match_emit_encodeBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBlockAsm
two_bytes_match_emit_encodeBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeBlockAsm
JMP memmove_long_match_emit_encodeBlockAsm
one_byte_match_emit_encodeBlockAsm:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBlockAsm:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeBlockAsm
memmove_long_match_emit_encodeBlockAsm:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeBlockAsm:
match_nolit_loop_encodeBlockAsm:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeBlockAsm:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeBlockAsm
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeBlockAsm
matchlen_bsf_16match_nolit_encodeBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeBlockAsm
matchlen_match8_match_nolit_encodeBlockAsm:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeBlockAsm
matchlen_bsf_8_match_nolit_encodeBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm
matchlen_match4_match_nolit_encodeBlockAsm:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeBlockAsm
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeBlockAsm:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm
JB match_nolit_end_encodeBlockAsm
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeBlockAsm
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm
matchlen_match1_match_nolit_encodeBlockAsm:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeBlockAsm:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB two_byte_offset_match_nolit_encodeBlockAsm
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE four_bytes_remain_match_nolit_encodeBlockAsm
- MOVB $0xff, (AX)
- MOVL BX, 1(AX)
- LEAL -64(R9), R9
- ADDQ $0x05, AX
- CMPL R9, $0x04
+ MOVB $0xff, (CX)
+ MOVL SI, 1(CX)
+ LEAL -64(R10), R10
+ ADDQ $0x05, CX
+ CMPL R10, $0x04
JB four_bytes_remain_match_nolit_encodeBlockAsm
// emitRepeat
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm_emit_copy
- CMPL R9, $0x00010100
+ CMPL R10, $0x00010100
JB repeat_four_match_nolit_encodeBlockAsm_emit_copy
- CMPL R9, $0x0100ffff
+ CMPL R10, $0x0100ffff
JB repeat_five_match_nolit_encodeBlockAsm_emit_copy
- LEAL -16842747(R9), R9
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R10), R10
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
repeat_five_match_nolit_encodeBlockAsm_emit_copy:
- LEAL -65536(R9), R9
- MOVL R9, BX
- MOVW $0x001d, (AX)
- MOVW R9, 2(AX)
- SARL $0x10, BX
- MOVB BL, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R10), R10
+ MOVL R10, SI
+ MOVW $0x001d, (CX)
+ MOVW R10, 2(CX)
+ SARL $0x10, SI
+ MOVB SI, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy:
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
four_bytes_remain_match_nolit_encodeBlockAsm:
- TESTL R9, R9
+ TESTL R10, R10
JZ match_nolit_emitcopy_end_encodeBlockAsm
- XORL SI, SI
- LEAL -1(SI)(R9*4), R9
- MOVB R9, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ XORL DI, DI
+ LEAL -1(DI)(R10*4), R10
+ MOVB R10, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_offset_match_nolit_encodeBlockAsm:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeBlockAsm
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE long_offset_short_match_nolit_encodeBlockAsm
- MOVL $0x00000001, SI
- LEAL 16(SI), SI
- MOVB BL, 1(AX)
- MOVL BX, DI
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R9
+ MOVL $0x00000001, DI
+ LEAL 16(DI), DI
+ MOVB SI, 1(CX)
+ MOVL SI, R8
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R10
// emitRepeat
- LEAL -4(R9), R9
+ LEAL -4(R10), R10
JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
- CMPL R9, $0x00010100
+ CMPL R10, $0x00010100
JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
- CMPL R9, $0x0100ffff
+ CMPL R10, $0x0100ffff
JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
- LEAL -16842747(R9), R9
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R10), R10
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- LEAL -65536(R9), R9
- MOVL R9, BX
- MOVW $0x001d, (AX)
- MOVW R9, 2(AX)
- SARL $0x10, BX
- MOVB BL, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R10), R10
+ MOVL R10, SI
+ MOVW $0x001d, (CX)
+ MOVW R10, 2(CX)
+ SARL $0x10, SI
+ MOVB SI, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
long_offset_short_match_nolit_encodeBlockAsm:
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
// emitRepeat
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL R9, $0x00010100
+ CMPL R10, $0x00010100
JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL R9, $0x0100ffff
+ CMPL R10, $0x0100ffff
JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
- LEAL -16842747(R9), R9
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R10), R10
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
- LEAL -65536(R9), R9
- MOVL R9, BX
- MOVW $0x001d, (AX)
- MOVW R9, 2(AX)
- SARL $0x10, BX
- MOVB BL, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R10), R10
+ MOVL R10, SI
+ MOVW $0x001d, (CX)
+ MOVW R10, 2(CX)
+ SARL $0x10, SI
+ MOVB SI, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_offset_short_match_nolit_encodeBlockAsm:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeBlockAsm
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeBlockAsm
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy_three_match_nolit_encodeBlockAsm:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm:
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x10, DI
- IMULQ R8, DI
- SHRQ $0x32, DI
- SHLQ $0x10, BX
- IMULQ R8, BX
- SHRQ $0x32, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x10, R8
+ IMULQ R9, R8
+ SHRQ $0x32, R8
+ SHLQ $0x10, SI
+ IMULQ R9, SI
+ SHRQ $0x32, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm
- INCL CX
+ INCL DX
JMP search_loop_encodeBlockAsm
emit_remainder_encodeBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 5(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBlockAsm
@@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm:
JB three_bytes_emit_remainder_encodeBlockAsm
CMPL DX, $0x01000000
JB four_bytes_emit_remainder_encodeBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL DX, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_emit_remainder_encodeBlockAsm
four_bytes_emit_remainder_encodeBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
+ MOVB $0xf8, (CX)
+ MOVW DX, 1(CX)
+ MOVB BL, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_encodeBlockAsm
three_bytes_emit_remainder_encodeBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBlockAsm
two_bytes_emit_remainder_encodeBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBlockAsm
JMP memmove_long_emit_remainder_encodeBlockAsm
one_byte_emit_remainder_encodeBlockAsm:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm:
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBlockAsm
memmove_long_emit_remainder_encodeBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
@@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBlockAsm4MB(dst []byte, src []byte) int
+// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm4MB(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm4MB(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm4MB:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBlockAsm4MB
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm4MB:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x06, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x06, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm4MB
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x10, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x10, R11
+ IMULQ R9, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm4MB
- LEAL 1(CX), SI
- MOVL 12(SP), DI
- MOVL SI, BX
- SUBL 16(SP), BX
+ LEAL 1(DX), DI
+ MOVL 12(SP), R8
+ MOVL DI, SI
+ SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm4MB
repeat_extend_back_loop_encodeBlockAsm4MB:
- CMPL SI, DI
+ CMPL DI, R8
JBE repeat_extend_back_end_encodeBlockAsm4MB
- MOVB -1(DX)(BX*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(SI*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm4MB
- LEAL -1(SI), SI
- DECL BX
+ LEAL -1(DI), DI
+ DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm4MB
repeat_extend_back_end_encodeBlockAsm4MB:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 4(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 4(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBlockAsm4MB:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeBlockAsm4MB
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBlockAsm4MB
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_repeat_emit_encodeBlockAsm4MB
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_repeat_emit_encodeBlockAsm4MB
three_bytes_repeat_emit_encodeBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeBlockAsm4MB
two_bytes_repeat_emit_encodeBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeBlockAsm4MB
JMP memmove_long_repeat_emit_encodeBlockAsm4MB
one_byte_repeat_emit_encodeBlockAsm4MB:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB
memmove_long_repeat_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R12
+ SHRQ $0x05, R12
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R13
+ SUBQ R11, R13
+ DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
+ LEAQ -32(R10)(R13*1), R11
+ LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R14)
+ MOVOA X5, 16(R14)
+ ADDQ $0x20, R14
+ ADDQ $0x20, R11
ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
+ DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
+ MOVOU -32(R10)(R13*1), X4
+ MOVOU -16(R10)(R13*1), X5
+ MOVOA X4, -32(CX)(R13*1)
+ MOVOA X5, -16(CX)(R13*1)
+ ADDQ $0x20, R13
+ CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBlockAsm4MB:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R9
+ SUBL DX, R9
+ LEAQ (BX)(DX*1), R10
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
- CMPL R8, $0x10
+ CMPL R9, $0x10
JB matchlen_match8_repeat_extend_encodeBlockAsm4MB
- MOVQ (R9)(R11*1), R10
- MOVQ 8(R9)(R11*1), R12
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ MOVQ 8(R10)(R12*1), R13
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
- XORQ 8(BX)(R11*1), R12
+ XORQ 8(SI)(R12*1), R13
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
- LEAL -16(R8), R8
- LEAL 16(R11), R11
+ LEAL -16(R9), R9
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm4MB
matchlen_match8_repeat_extend_encodeBlockAsm4MB:
- CMPL R8, $0x08
+ CMPL R9, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm4MB
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
- LEAL -8(R8), R8
- LEAL 8(R11), R11
+ LEAL -8(R9), R9
+ LEAL 8(R12), R12
JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB
matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm4MB
matchlen_match4_repeat_extend_encodeBlockAsm4MB:
- CMPL R8, $0x04
+ CMPL R9, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm4MB
- MOVL (R9)(R11*1), R10
- CMPL (BX)(R11*1), R10
+ MOVL (R10)(R12*1), R11
+ CMPL (SI)(R12*1), R11
JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB
- LEAL -4(R8), R8
- LEAL 4(R11), R11
+ LEAL -4(R9), R9
+ LEAL 4(R12), R12
matchlen_match2_repeat_extend_encodeBlockAsm4MB:
- CMPL R8, $0x01
+ CMPL R9, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm4MB
JB repeat_extend_forward_end_encodeBlockAsm4MB
- MOVW (R9)(R11*1), R10
- CMPW (BX)(R11*1), R10
+ MOVW (R10)(R12*1), R11
+ CMPW (SI)(R12*1), R11
JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB
- LEAL 2(R11), R11
- SUBL $0x02, R8
+ LEAL 2(R12), R12
+ SUBL $0x02, R9
JZ repeat_extend_forward_end_encodeBlockAsm4MB
matchlen_match1_repeat_extend_encodeBlockAsm4MB:
- MOVB (R9)(R11*1), R10
- CMPB (BX)(R11*1), R10
+ MOVB (R10)(R12*1), R11
+ CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm4MB
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
repeat_extend_forward_end_encodeBlockAsm4MB:
- ADDL R11, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
- TESTL DI, DI
+ ADDL R12, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
+ TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm4MB
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_match_repeat_encodeBlockAsm4MB
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_match_repeat_encodeBlockAsm4MB
cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_match_repeat_encodeBlockAsm4MB
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_match_repeat_encodeBlockAsm4MB
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_match_repeat_encodeBlockAsm4MB:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_match_repeat_encodeBlockAsm4MB:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_match_repeat_encodeBlockAsm4MB:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_match_repeat_encodeBlockAsm4MB:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_as_copy_encodeBlockAsm4MB:
// emitCopy
- CMPL SI, $0x00010000
+ CMPL DI, $0x00010000
JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
- MOVB $0xff, (AX)
- MOVL SI, 1(AX)
- LEAL -64(BX), BX
- ADDQ $0x05, AX
- CMPL BX, $0x04
+ MOVB $0xff, (CX)
+ MOVL DI, 1(CX)
+ LEAL -64(SI), SI
+ ADDQ $0x05, CX
+ CMPL SI, $0x04
JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
- TESTL BX, BX
+ TESTL SI, SI
JZ repeat_end_emit_encodeBlockAsm4MB
- XORL DI, DI
- LEAL -1(DI)(BX*4), BX
- MOVB BL, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
+ XORL R8, R8
+ LEAL -1(R8)(SI*4), SI
+ MOVB SI, (CX)
+ MOVL DI, 1(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm4MB
two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB
- MOVL $0x00000001, DI
- LEAL 16(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, BX
+ MOVL $0x00000001, R8
+ LEAL 16(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, SI
// emitRepeat
- LEAL -4(BX), BX
+ LEAL -4(SI), SI
JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- CMPL BX, $0x00010100
+ CMPL SI, $0x00010100
JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- LEAL -65536(BX), BX
- MOVL BX, SI
- MOVW $0x001d, (AX)
- MOVW BX, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(SI), SI
+ MOVL SI, DI
+ MOVW $0x001d, (CX)
+ MOVW SI, 2(CX)
+ SARL $0x10, DI
+ MOVB DI, 4(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm4MB
emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeBlockAsm4MB:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm4MB
no_repeat_found_encodeBlockAsm4MB:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm4MB
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm4MB
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm4MB
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm4MB
candidate3_match_encodeBlockAsm4MB:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm4MB
candidate2_match_encodeBlockAsm4MB:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeBlockAsm4MB:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm4MB
match_extend_back_loop_encodeBlockAsm4MB:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm4MB
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm4MB
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBlockAsm4MB
JMP match_extend_back_loop_encodeBlockAsm4MB
match_extend_back_end_encodeBlockAsm4MB:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 4(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 4(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBlockAsm4MB:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm4MB
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeBlockAsm4MB
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm4MB
- CMPL DI, $0x00010000
+ CMPL R8, $0x00010000
JB three_bytes_match_emit_encodeBlockAsm4MB
- MOVL DI, R9
- SHRL $0x10, R9
- MOVB $0xf8, (AX)
- MOVW DI, 1(AX)
- MOVB R9, 3(AX)
- ADDQ $0x04, AX
+ MOVL R8, R10
+ SHRL $0x10, R10
+ MOVB $0xf8, (CX)
+ MOVW R8, 1(CX)
+ MOVB R10, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_encodeBlockAsm4MB
three_bytes_match_emit_encodeBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBlockAsm4MB
two_bytes_match_emit_encodeBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeBlockAsm4MB
JMP memmove_long_match_emit_encodeBlockAsm4MB
one_byte_match_emit_encodeBlockAsm4MB:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm4MB:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeBlockAsm4MB
memmove_long_match_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeBlockAsm4MB:
match_nolit_loop_encodeBlockAsm4MB:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeBlockAsm4MB
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeBlockAsm4MB
matchlen_match8_match_nolit_encodeBlockAsm4MB:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm4MB
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeBlockAsm4MB
matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm4MB
matchlen_match4_match_nolit_encodeBlockAsm4MB:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm4MB
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeBlockAsm4MB
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeBlockAsm4MB:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm4MB
JB match_nolit_end_encodeBlockAsm4MB
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeBlockAsm4MB
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm4MB
matchlen_match1_match_nolit_encodeBlockAsm4MB:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm4MB
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeBlockAsm4MB:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB two_byte_offset_match_nolit_encodeBlockAsm4MB
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB
- MOVB $0xff, (AX)
- MOVL BX, 1(AX)
- LEAL -64(R9), R9
- ADDQ $0x05, AX
- CMPL R9, $0x04
+ MOVB $0xff, (CX)
+ MOVL SI, 1(CX)
+ LEAL -64(R10), R10
+ ADDQ $0x05, CX
+ CMPL R10, $0x04
JB four_bytes_remain_match_nolit_encodeBlockAsm4MB
// emitRepeat
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
- CMPL R9, $0x00010100
+ CMPL R10, $0x00010100
JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
- LEAL -65536(R9), R9
- MOVL R9, BX
- MOVW $0x001d, (AX)
- MOVW R9, 2(AX)
- SARL $0x10, BX
- MOVB BL, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R10), R10
+ MOVL R10, SI
+ MOVW $0x001d, (CX)
+ MOVW R10, 2(CX)
+ SARL $0x10, SI
+ MOVB SI, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
four_bytes_remain_match_nolit_encodeBlockAsm4MB:
- TESTL R9, R9
+ TESTL R10, R10
JZ match_nolit_emitcopy_end_encodeBlockAsm4MB
- XORL SI, SI
- LEAL -1(SI)(R9*4), R9
- MOVB R9, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ XORL DI, DI
+ LEAL -1(DI)(R10*4), R10
+ MOVB R10, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
two_byte_offset_match_nolit_encodeBlockAsm4MB:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE long_offset_short_match_nolit_encodeBlockAsm4MB
- MOVL $0x00000001, SI
- LEAL 16(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R9
-
+ MOVL $0x00000001, DI
+ LEAL 16(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R10
+
// emitRepeat
- LEAL -4(R9), R9
+ LEAL -4(R10), R10
JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
- CMPL R9, $0x00010100
+ CMPL R10, $0x00010100
JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
- LEAL -65536(R9), R9
- MOVL R9, BX
- MOVW $0x001d, (AX)
- MOVW R9, 2(AX)
- SARL $0x10, BX
- MOVB BL, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R10), R10
+ MOVL R10, SI
+ MOVW $0x001d, (CX)
+ MOVW R10, 2(CX)
+ SARL $0x10, SI
+ MOVB SI, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
long_offset_short_match_nolit_encodeBlockAsm4MB:
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
- CMPL R9, $0x00010100
+ CMPL R10, $0x00010100
JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
- LEAL -65536(R9), R9
- MOVL R9, BX
- MOVW $0x001d, (AX)
- MOVW R9, 2(AX)
- SARL $0x10, BX
- MOVB BL, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R10), R10
+ MOVL R10, SI
+ MOVW $0x001d, (CX)
+ MOVW R10, 2(CX)
+ SARL $0x10, SI
+ MOVB SI, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeBlockAsm4MB
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeBlockAsm4MB
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
emit_copy_three_match_nolit_encodeBlockAsm4MB:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm4MB:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm4MB
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm4MB:
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x10, DI
- IMULQ R8, DI
- SHRQ $0x32, DI
- SHLQ $0x10, BX
- IMULQ R8, BX
- SHRQ $0x32, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x10, R8
+ IMULQ R9, R8
+ SHRQ $0x32, R8
+ SHLQ $0x10, SI
+ IMULQ R9, SI
+ SHRQ $0x32, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm4MB
- INCL CX
+ INCL DX
JMP search_loop_encodeBlockAsm4MB
emit_remainder_encodeBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 4(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 4(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBlockAsm4MB
@@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB:
JB three_bytes_emit_remainder_encodeBlockAsm4MB
MOVL DX, BX
SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
+ MOVB $0xf8, (CX)
+ MOVW DX, 1(CX)
+ MOVB BL, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_encodeBlockAsm4MB
three_bytes_emit_remainder_encodeBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBlockAsm4MB
two_bytes_emit_remainder_encodeBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBlockAsm4MB
JMP memmove_long_emit_remainder_encodeBlockAsm4MB
one_byte_emit_remainder_encodeBlockAsm4MB:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB:
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB
memmove_long_emit_remainder_encodeBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
MOVOU (SI), X4
@@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm4MB:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBlockAsm12B(dst []byte, src []byte) int
+// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm12B(SB), $16408-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000080, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm12B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000080, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBlockAsm12B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm12B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm12B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x000000cf1bbcdcbb, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x18, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x000000cf1bbcdcbb, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x18, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x34, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x18, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x18, R11
+ IMULQ R9, R11
+ SHRQ $0x34, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x18, R10
+ IMULQ R9, R10
+ SHRQ $0x34, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm12B
- LEAL 1(CX), SI
- MOVL 12(SP), DI
- MOVL SI, BX
- SUBL 16(SP), BX
+ LEAL 1(DX), DI
+ MOVL 12(SP), R8
+ MOVL DI, SI
+ SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm12B
repeat_extend_back_loop_encodeBlockAsm12B:
- CMPL SI, DI
+ CMPL DI, R8
JBE repeat_extend_back_end_encodeBlockAsm12B
- MOVB -1(DX)(BX*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(SI*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm12B
- LEAL -1(SI), SI
- DECL BX
+ LEAL -1(DI), DI
+ DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm12B
repeat_extend_back_end_encodeBlockAsm12B:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBlockAsm12B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeBlockAsm12B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBlockAsm12B
JB three_bytes_repeat_emit_encodeBlockAsm12B
three_bytes_repeat_emit_encodeBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeBlockAsm12B
two_bytes_repeat_emit_encodeBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeBlockAsm12B
JMP memmove_long_repeat_emit_encodeBlockAsm12B
one_byte_repeat_emit_encodeBlockAsm12B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm12B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
memmove_long_repeat_emit_encodeBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R12
+ SHRQ $0x05, R12
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R13
+ SUBQ R11, R13
+ DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
+ LEAQ -32(R10)(R13*1), R11
+ LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R14)
+ MOVOA X5, 16(R14)
+ ADDQ $0x20, R14
+ ADDQ $0x20, R11
ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
+ DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
+ MOVOU -32(R10)(R13*1), X4
+ MOVOU -16(R10)(R13*1), X5
+ MOVOA X4, -32(CX)(R13*1)
+ MOVOA X5, -16(CX)(R13*1)
+ ADDQ $0x20, R13
+ CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBlockAsm12B:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R9
+ SUBL DX, R9
+ LEAQ (BX)(DX*1), R10
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
- CMPL R8, $0x10
+ CMPL R9, $0x10
JB matchlen_match8_repeat_extend_encodeBlockAsm12B
- MOVQ (R9)(R11*1), R10
- MOVQ 8(R9)(R11*1), R12
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ MOVQ 8(R10)(R12*1), R13
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
- XORQ 8(BX)(R11*1), R12
+ XORQ 8(SI)(R12*1), R13
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B
- LEAL -16(R8), R8
- LEAL 16(R11), R11
+ LEAL -16(R9), R9
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm12B
matchlen_match8_repeat_extend_encodeBlockAsm12B:
- CMPL R8, $0x08
+ CMPL R9, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm12B
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
- LEAL -8(R8), R8
- LEAL 8(R11), R11
+ LEAL -8(R9), R9
+ LEAL 8(R12), R12
JMP matchlen_match4_repeat_extend_encodeBlockAsm12B
matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm12B
matchlen_match4_repeat_extend_encodeBlockAsm12B:
- CMPL R8, $0x04
+ CMPL R9, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm12B
- MOVL (R9)(R11*1), R10
- CMPL (BX)(R11*1), R10
+ MOVL (R10)(R12*1), R11
+ CMPL (SI)(R12*1), R11
JNE matchlen_match2_repeat_extend_encodeBlockAsm12B
- LEAL -4(R8), R8
- LEAL 4(R11), R11
+ LEAL -4(R9), R9
+ LEAL 4(R12), R12
matchlen_match2_repeat_extend_encodeBlockAsm12B:
- CMPL R8, $0x01
+ CMPL R9, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm12B
JB repeat_extend_forward_end_encodeBlockAsm12B
- MOVW (R9)(R11*1), R10
- CMPW (BX)(R11*1), R10
+ MOVW (R10)(R12*1), R11
+ CMPW (SI)(R12*1), R11
JNE matchlen_match1_repeat_extend_encodeBlockAsm12B
- LEAL 2(R11), R11
- SUBL $0x02, R8
+ LEAL 2(R12), R12
+ SUBL $0x02, R9
JZ repeat_extend_forward_end_encodeBlockAsm12B
matchlen_match1_repeat_extend_encodeBlockAsm12B:
- MOVB (R9)(R11*1), R10
- CMPB (BX)(R11*1), R10
+ MOVB (R10)(R12*1), R11
+ CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm12B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
repeat_extend_forward_end_encodeBlockAsm12B:
- ADDL R11, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
- TESTL DI, DI
+ ADDL R12, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
+ TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm12B
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_match_repeat_encodeBlockAsm12B
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_match_repeat_encodeBlockAsm12B
cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_match_repeat_encodeBlockAsm12B
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_match_repeat_encodeBlockAsm12B:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_match_repeat_encodeBlockAsm12B:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_match_repeat_encodeBlockAsm12B:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_as_copy_encodeBlockAsm12B:
// emitCopy
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B
- MOVL $0x00000001, DI
- LEAL 16(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, BX
+ MOVL $0x00000001, R8
+ LEAL 16(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, SI
// emitRepeat
- LEAL -4(BX), BX
+ LEAL -4(SI), SI
JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
long_offset_short_repeat_as_copy_encodeBlockAsm12B:
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm12B
emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeBlockAsm12B:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm12B
no_repeat_found_encodeBlockAsm12B:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm12B
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm12B
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm12B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm12B
candidate3_match_encodeBlockAsm12B:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm12B
candidate2_match_encodeBlockAsm12B:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeBlockAsm12B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm12B
match_extend_back_loop_encodeBlockAsm12B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm12B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm12B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBlockAsm12B
JMP match_extend_back_loop_encodeBlockAsm12B
match_extend_back_end_encodeBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBlockAsm12B:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm12B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeBlockAsm12B
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm12B
JB three_bytes_match_emit_encodeBlockAsm12B
three_bytes_match_emit_encodeBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBlockAsm12B
two_bytes_match_emit_encodeBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeBlockAsm12B
JMP memmove_long_match_emit_encodeBlockAsm12B
one_byte_match_emit_encodeBlockAsm12B:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBlockAsm12B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm12B:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeBlockAsm12B
memmove_long_match_emit_encodeBlockAsm12B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeBlockAsm12B:
match_nolit_loop_encodeBlockAsm12B:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeBlockAsm12B
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B
matchlen_bsf_16match_nolit_encodeBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeBlockAsm12B
matchlen_match8_match_nolit_encodeBlockAsm12B:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm12B
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeBlockAsm12B
matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm12B
matchlen_match4_match_nolit_encodeBlockAsm12B:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm12B
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeBlockAsm12B
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeBlockAsm12B:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm12B
JB match_nolit_end_encodeBlockAsm12B
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeBlockAsm12B
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm12B
matchlen_match1_match_nolit_encodeBlockAsm12B:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm12B
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeBlockAsm12B:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE long_offset_short_match_nolit_encodeBlockAsm12B
- MOVL $0x00000001, SI
- LEAL 16(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R9
+ MOVL $0x00000001, DI
+ LEAL 16(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R10
// emitRepeat
- LEAL -4(R9), R9
+ LEAL -4(R10), R10
JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
long_offset_short_match_nolit_encodeBlockAsm12B:
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm12B
-
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
+ JMP match_nolit_emitcopy_end_encodeBlockAsm12B
+
two_byte_offset_short_match_nolit_encodeBlockAsm12B:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeBlockAsm12B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeBlockAsm12B
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
emit_copy_three_match_nolit_encodeBlockAsm12B:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm12B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm12B
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm12B:
- MOVQ $0x000000cf1bbcdcbb, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x18, DI
- IMULQ R8, DI
- SHRQ $0x34, DI
- SHLQ $0x18, BX
- IMULQ R8, BX
- SHRQ $0x34, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x000000cf1bbcdcbb, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x18, R8
+ IMULQ R9, R8
+ SHRQ $0x34, R8
+ SHLQ $0x18, SI
+ IMULQ R9, SI
+ SHRQ $0x34, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm12B
- INCL CX
+ INCL DX
JMP search_loop_encodeBlockAsm12B
emit_remainder_encodeBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBlockAsm12B
@@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B:
JB three_bytes_emit_remainder_encodeBlockAsm12B
three_bytes_emit_remainder_encodeBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBlockAsm12B
two_bytes_emit_remainder_encodeBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBlockAsm12B
JMP memmove_long_emit_remainder_encodeBlockAsm12B
one_byte_emit_remainder_encodeBlockAsm12B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B:
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm12B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
memmove_long_emit_remainder_encodeBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
@@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBlockAsm10B(dst []byte, src []byte) int
+// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm10B(SB), $4120-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000020, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm10B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000020, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBlockAsm10B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm10B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm10B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x9e3779b1, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x20, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x36, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x20, R11
+ IMULQ R9, R11
+ SHRQ $0x36, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x20, R10
+ IMULQ R9, R10
+ SHRQ $0x36, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm10B
- LEAL 1(CX), SI
- MOVL 12(SP), DI
- MOVL SI, BX
- SUBL 16(SP), BX
+ LEAL 1(DX), DI
+ MOVL 12(SP), R8
+ MOVL DI, SI
+ SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm10B
repeat_extend_back_loop_encodeBlockAsm10B:
- CMPL SI, DI
+ CMPL DI, R8
JBE repeat_extend_back_end_encodeBlockAsm10B
- MOVB -1(DX)(BX*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(SI*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm10B
- LEAL -1(SI), SI
- DECL BX
+ LEAL -1(DI), DI
+ DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm10B
repeat_extend_back_end_encodeBlockAsm10B:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBlockAsm10B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeBlockAsm10B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBlockAsm10B
JB three_bytes_repeat_emit_encodeBlockAsm10B
three_bytes_repeat_emit_encodeBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeBlockAsm10B
two_bytes_repeat_emit_encodeBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeBlockAsm10B
JMP memmove_long_repeat_emit_encodeBlockAsm10B
one_byte_repeat_emit_encodeBlockAsm10B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm10B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBlockAsm10B
memmove_long_repeat_emit_encodeBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R12
+ SHRQ $0x05, R12
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R13
+ SUBQ R11, R13
+ DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
+ LEAQ -32(R10)(R13*1), R11
+ LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R14)
+ MOVOA X5, 16(R14)
+ ADDQ $0x20, R14
+ ADDQ $0x20, R11
ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
+ DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
+ MOVOU -32(R10)(R13*1), X4
+ MOVOU -16(R10)(R13*1), X5
+ MOVOA X4, -32(CX)(R13*1)
+ MOVOA X5, -16(CX)(R13*1)
+ ADDQ $0x20, R13
+ CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBlockAsm10B:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R9
+ SUBL DX, R9
+ LEAQ (BX)(DX*1), R10
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
- CMPL R8, $0x10
+ CMPL R9, $0x10
JB matchlen_match8_repeat_extend_encodeBlockAsm10B
- MOVQ (R9)(R11*1), R10
- MOVQ 8(R9)(R11*1), R12
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ MOVQ 8(R10)(R12*1), R13
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
- XORQ 8(BX)(R11*1), R12
+ XORQ 8(SI)(R12*1), R13
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B
- LEAL -16(R8), R8
- LEAL 16(R11), R11
+ LEAL -16(R9), R9
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm10B
matchlen_match8_repeat_extend_encodeBlockAsm10B:
- CMPL R8, $0x08
+ CMPL R9, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm10B
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
- LEAL -8(R8), R8
- LEAL 8(R11), R11
+ LEAL -8(R9), R9
+ LEAL 8(R12), R12
JMP matchlen_match4_repeat_extend_encodeBlockAsm10B
matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm10B
matchlen_match4_repeat_extend_encodeBlockAsm10B:
- CMPL R8, $0x04
+ CMPL R9, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm10B
- MOVL (R9)(R11*1), R10
- CMPL (BX)(R11*1), R10
+ MOVL (R10)(R12*1), R11
+ CMPL (SI)(R12*1), R11
JNE matchlen_match2_repeat_extend_encodeBlockAsm10B
- LEAL -4(R8), R8
- LEAL 4(R11), R11
+ LEAL -4(R9), R9
+ LEAL 4(R12), R12
matchlen_match2_repeat_extend_encodeBlockAsm10B:
- CMPL R8, $0x01
+ CMPL R9, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm10B
JB repeat_extend_forward_end_encodeBlockAsm10B
- MOVW (R9)(R11*1), R10
- CMPW (BX)(R11*1), R10
+ MOVW (R10)(R12*1), R11
+ CMPW (SI)(R12*1), R11
JNE matchlen_match1_repeat_extend_encodeBlockAsm10B
- LEAL 2(R11), R11
- SUBL $0x02, R8
+ LEAL 2(R12), R12
+ SUBL $0x02, R9
JZ repeat_extend_forward_end_encodeBlockAsm10B
matchlen_match1_repeat_extend_encodeBlockAsm10B:
- MOVB (R9)(R11*1), R10
- CMPB (BX)(R11*1), R10
+ MOVB (R10)(R12*1), R11
+ CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm10B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
repeat_extend_forward_end_encodeBlockAsm10B:
- ADDL R11, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
- TESTL DI, DI
+ ADDL R12, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
+ TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm10B
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_match_repeat_encodeBlockAsm10B
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_match_repeat_encodeBlockAsm10B
cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_match_repeat_encodeBlockAsm10B
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_three_match_repeat_encodeBlockAsm10B:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_match_repeat_encodeBlockAsm10B:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_offset_match_repeat_encodeBlockAsm10B:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_as_copy_encodeBlockAsm10B:
// emitCopy
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B
- MOVL $0x00000001, DI
- LEAL 16(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, BX
+ MOVL $0x00000001, R8
+ LEAL 16(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, SI
// emitRepeat
- LEAL -4(BX), BX
+ LEAL -4(SI), SI
JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
long_offset_short_repeat_as_copy_encodeBlockAsm10B:
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
// emitRepeat
- MOVL BX, DI
- LEAL -4(BX), BX
- CMPL DI, $0x08
+ MOVL SI, R8
+ LEAL -4(SI), SI
+ CMPL R8, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
- CMPL DI, $0x0c
+ CMPL R8, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm10B
emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeBlockAsm10B:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm10B
no_repeat_found_encodeBlockAsm10B:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm10B
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm10B
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm10B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm10B
candidate3_match_encodeBlockAsm10B:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm10B
candidate2_match_encodeBlockAsm10B:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeBlockAsm10B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm10B
match_extend_back_loop_encodeBlockAsm10B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm10B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm10B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBlockAsm10B
JMP match_extend_back_loop_encodeBlockAsm10B
match_extend_back_end_encodeBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBlockAsm10B:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm10B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeBlockAsm10B
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm10B
JB three_bytes_match_emit_encodeBlockAsm10B
three_bytes_match_emit_encodeBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBlockAsm10B
two_bytes_match_emit_encodeBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeBlockAsm10B
JMP memmove_long_match_emit_encodeBlockAsm10B
one_byte_match_emit_encodeBlockAsm10B:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBlockAsm10B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm10B
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm10B
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm10B
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm10B:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeBlockAsm10B
memmove_long_match_emit_encodeBlockAsm10B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeBlockAsm10B:
match_nolit_loop_encodeBlockAsm10B:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeBlockAsm10B
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B
matchlen_bsf_16match_nolit_encodeBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeBlockAsm10B
matchlen_match8_match_nolit_encodeBlockAsm10B:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm10B
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeBlockAsm10B
matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm10B
matchlen_match4_match_nolit_encodeBlockAsm10B:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm10B
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeBlockAsm10B
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeBlockAsm10B:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm10B
JB match_nolit_end_encodeBlockAsm10B
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeBlockAsm10B
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm10B
matchlen_match1_match_nolit_encodeBlockAsm10B:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm10B
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeBlockAsm10B:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE long_offset_short_match_nolit_encodeBlockAsm10B
- MOVL $0x00000001, SI
- LEAL 16(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R9
+ MOVL $0x00000001, DI
+ LEAL 16(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R10
// emitRepeat
- LEAL -4(R9), R9
+ LEAL -4(R10), R10
JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
long_offset_short_match_nolit_encodeBlockAsm10B:
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R9, SI
- LEAL -4(R9), R9
- CMPL SI, $0x08
+ MOVL R10, DI
+ LEAL -4(R10), R10
+ CMPL DI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
two_byte_offset_short_match_nolit_encodeBlockAsm10B:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeBlockAsm10B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeBlockAsm10B
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
emit_copy_three_match_nolit_encodeBlockAsm10B:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm10B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm10B
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm10B:
- MOVQ $0x9e3779b1, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x20, DI
- IMULQ R8, DI
- SHRQ $0x36, DI
- SHLQ $0x20, BX
- IMULQ R8, BX
- SHRQ $0x36, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x20, R8
+ IMULQ R9, R8
+ SHRQ $0x36, R8
+ SHLQ $0x20, SI
+ IMULQ R9, SI
+ SHRQ $0x36, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm10B
- INCL CX
+ INCL DX
JMP search_loop_encodeBlockAsm10B
emit_remainder_encodeBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBlockAsm10B
@@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B:
JB three_bytes_emit_remainder_encodeBlockAsm10B
three_bytes_emit_remainder_encodeBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBlockAsm10B
two_bytes_emit_remainder_encodeBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBlockAsm10B
JMP memmove_long_emit_remainder_encodeBlockAsm10B
one_byte_emit_remainder_encodeBlockAsm10B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B:
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm10B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBlockAsm10B
memmove_long_emit_remainder_encodeBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
@@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBlockAsm8B(dst []byte, src []byte) int
+// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm8B(SB), $1048-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000008, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm8B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000008, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBlockAsm8B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeBlockAsm8B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x04, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x04, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBlockAsm8B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x9e3779b1, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x38, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x20, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x38, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x38, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
- JNE no_repeat_found_encodeBlockAsm8B
- LEAL 1(CX), SI
- MOVL 12(SP), DI
- MOVL SI, BX
- SUBL 16(SP), BX
+ SHLQ $0x20, R11
+ IMULQ R9, R11
+ SHRQ $0x38, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x20, R10
+ IMULQ R9, R10
+ SHRQ $0x38, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
+ JNE no_repeat_found_encodeBlockAsm8B
+ LEAL 1(DX), DI
+ MOVL 12(SP), R8
+ MOVL DI, SI
+ SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm8B
repeat_extend_back_loop_encodeBlockAsm8B:
- CMPL SI, DI
+ CMPL DI, R8
JBE repeat_extend_back_end_encodeBlockAsm8B
- MOVB -1(DX)(BX*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(SI*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeBlockAsm8B
- LEAL -1(SI), SI
- DECL BX
+ LEAL -1(DI), DI
+ DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm8B
repeat_extend_back_end_encodeBlockAsm8B:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeBlockAsm8B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeBlockAsm8B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeBlockAsm8B
JB three_bytes_repeat_emit_encodeBlockAsm8B
three_bytes_repeat_emit_encodeBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeBlockAsm8B
two_bytes_repeat_emit_encodeBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeBlockAsm8B
JMP memmove_long_repeat_emit_encodeBlockAsm8B
one_byte_repeat_emit_encodeBlockAsm8B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeBlockAsm8B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm8B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeBlockAsm8B
memmove_long_repeat_emit_encodeBlockAsm8B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R12
+ SHRQ $0x05, R12
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R13
+ SUBQ R11, R13
+ DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
+ LEAQ -32(R10)(R13*1), R11
+ LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R14)
+ MOVOA X5, 16(R14)
+ ADDQ $0x20, R14
+ ADDQ $0x20, R11
ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
+ DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
+ MOVOU -32(R10)(R13*1), X4
+ MOVOU -16(R10)(R13*1), X5
+ MOVOA X4, -32(CX)(R13*1)
+ MOVOA X5, -16(CX)(R13*1)
+ ADDQ $0x20, R13
+ CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeBlockAsm8B:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R9
+ SUBL DX, R9
+ LEAQ (BX)(DX*1), R10
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
- CMPL R8, $0x10
+ CMPL R9, $0x10
JB matchlen_match8_repeat_extend_encodeBlockAsm8B
- MOVQ (R9)(R11*1), R10
- MOVQ 8(R9)(R11*1), R12
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ MOVQ 8(R10)(R12*1), R13
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
- XORQ 8(BX)(R11*1), R12
+ XORQ 8(SI)(R12*1), R13
JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B
- LEAL -16(R8), R8
- LEAL 16(R11), R11
+ LEAL -16(R9), R9
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm8B
matchlen_match8_repeat_extend_encodeBlockAsm8B:
- CMPL R8, $0x08
+ CMPL R9, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm8B
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
+ MOVQ (R10)(R12*1), R11
+ XORQ (SI)(R12*1), R11
JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
- LEAL -8(R8), R8
- LEAL 8(R11), R11
+ LEAL -8(R9), R9
+ LEAL 8(R12), R12
JMP matchlen_match4_repeat_extend_encodeBlockAsm8B
matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm8B
matchlen_match4_repeat_extend_encodeBlockAsm8B:
- CMPL R8, $0x04
+ CMPL R9, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm8B
- MOVL (R9)(R11*1), R10
- CMPL (BX)(R11*1), R10
+ MOVL (R10)(R12*1), R11
+ CMPL (SI)(R12*1), R11
JNE matchlen_match2_repeat_extend_encodeBlockAsm8B
- LEAL -4(R8), R8
- LEAL 4(R11), R11
+ LEAL -4(R9), R9
+ LEAL 4(R12), R12
matchlen_match2_repeat_extend_encodeBlockAsm8B:
- CMPL R8, $0x01
+ CMPL R9, $0x01
JE matchlen_match1_repeat_extend_encodeBlockAsm8B
JB repeat_extend_forward_end_encodeBlockAsm8B
- MOVW (R9)(R11*1), R10
- CMPW (BX)(R11*1), R10
+ MOVW (R10)(R12*1), R11
+ CMPW (SI)(R12*1), R11
JNE matchlen_match1_repeat_extend_encodeBlockAsm8B
- LEAL 2(R11), R11
- SUBL $0x02, R8
+ LEAL 2(R12), R12
+ SUBL $0x02, R9
JZ repeat_extend_forward_end_encodeBlockAsm8B
matchlen_match1_repeat_extend_encodeBlockAsm8B:
- MOVB (R9)(R11*1), R10
- CMPB (BX)(R11*1), R10
+ MOVB (R10)(R12*1), R11
+ CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm8B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
repeat_extend_forward_end_encodeBlockAsm8B:
- ADDL R11, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
- TESTL DI, DI
+ ADDL R12, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
+ TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm8B
// emitRepeat
- MOVL BX, SI
- LEAL -4(BX), BX
- CMPL SI, $0x08
+ MOVL SI, DI
+ LEAL -4(SI), SI
+ CMPL DI, $0x08
JBE repeat_two_match_repeat_encodeBlockAsm8B
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_match_repeat_encodeBlockAsm8B
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_three_match_repeat_encodeBlockAsm8B:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_two_match_repeat_encodeBlockAsm8B:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_as_copy_encodeBlockAsm8B:
// emitCopy
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B
- MOVL $0x00000001, DI
- LEAL 16(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, BX
+ MOVL $0x00000001, R8
+ LEAL 16(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, SI
// emitRepeat
- LEAL -4(BX), BX
+ LEAL -4(SI), SI
JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
- MOVL BX, SI
- LEAL -4(BX), BX
- CMPL SI, $0x08
+ MOVL SI, DI
+ LEAL -4(SI), SI
+ CMPL DI, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
long_offset_short_repeat_as_copy_encodeBlockAsm8B:
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
// emitRepeat
- MOVL BX, SI
- LEAL -4(BX), BX
- CMPL SI, $0x08
+ MOVL SI, DI
+ LEAL -4(SI), SI
+ CMPL DI, $0x08
JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
- CMPL SI, $0x0c
+ CMPL DI, $0x0c
JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
- CMPL BX, $0x00000104
+ CMPL SI, $0x00000104
JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
- LEAL -256(BX), BX
- MOVW $0x0019, (AX)
- MOVW BX, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(SI), SI
+ MOVW $0x0019, (CX)
+ MOVW SI, 2(CX)
+ ADDQ $0x04, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
- LEAL -4(BX), BX
- MOVW $0x0015, (AX)
- MOVB BL, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(SI), SI
+ MOVW $0x0015, (CX)
+ MOVB SI, 2(CX)
+ ADDQ $0x03, CX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
- SHLL $0x02, BX
- ORL $0x01, BX
- MOVW BX, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, SI
+ ORL $0x01, SI
+ MOVW SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
- XORQ DI, DI
- LEAL 1(DI)(BX*4), BX
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ XORQ R8, R8
+ LEAL 1(R8)(SI*4), SI
+ MOVB DI, 1(CX)
+ SARL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeBlockAsm8B
emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeBlockAsm8B:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeBlockAsm8B
no_repeat_found_encodeBlockAsm8B:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm8B
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm8B
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm8B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBlockAsm8B
candidate3_match_encodeBlockAsm8B:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeBlockAsm8B
candidate2_match_encodeBlockAsm8B:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeBlockAsm8B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm8B
match_extend_back_loop_encodeBlockAsm8B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBlockAsm8B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBlockAsm8B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBlockAsm8B
JMP match_extend_back_loop_encodeBlockAsm8B
match_extend_back_end_encodeBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBlockAsm8B:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm8B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeBlockAsm8B
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeBlockAsm8B
JB three_bytes_match_emit_encodeBlockAsm8B
three_bytes_match_emit_encodeBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBlockAsm8B
two_bytes_match_emit_encodeBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeBlockAsm8B
JMP memmove_long_match_emit_encodeBlockAsm8B
one_byte_match_emit_encodeBlockAsm8B:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBlockAsm8B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeBlockAsm8B
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm8B
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm8B
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm8B:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeBlockAsm8B
memmove_long_match_emit_encodeBlockAsm8B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
- ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
+ ADDQ $0x20, R12
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeBlockAsm8B:
match_nolit_loop_encodeBlockAsm8B:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeBlockAsm8B
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B
matchlen_bsf_16match_nolit_encodeBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeBlockAsm8B
matchlen_match8_match_nolit_encodeBlockAsm8B:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm8B
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeBlockAsm8B
matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm8B
matchlen_match4_match_nolit_encodeBlockAsm8B:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm8B
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeBlockAsm8B
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeBlockAsm8B:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeBlockAsm8B
JB match_nolit_end_encodeBlockAsm8B
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeBlockAsm8B
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeBlockAsm8B
matchlen_match1_match_nolit_encodeBlockAsm8B:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm8B
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeBlockAsm8B:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE long_offset_short_match_nolit_encodeBlockAsm8B
- MOVL $0x00000001, SI
- LEAL 16(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R9
+ MOVL $0x00000001, DI
+ LEAL 16(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R10
// emitRepeat
- LEAL -4(R9), R9
+ LEAL -4(R10), R10
JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
- MOVL R9, BX
- LEAL -4(R9), R9
- CMPL BX, $0x08
+ MOVL R10, SI
+ LEAL -4(R10), R10
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
long_offset_short_match_nolit_encodeBlockAsm8B:
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R9, BX
- LEAL -4(R9), R9
- CMPL BX, $0x08
+ MOVL R10, SI
+ LEAL -4(R10), R10
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
- CMPL R9, $0x00000104
+ CMPL R10, $0x00000104
JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
- LEAL -256(R9), R9
- MOVW $0x0019, (AX)
- MOVW R9, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R10), R10
+ MOVW $0x0019, (CX)
+ MOVW R10, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
- LEAL -4(R9), R9
- MOVW $0x0015, (AX)
- MOVB R9, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R10), R10
+ MOVW $0x0015, (CX)
+ MOVB R10, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
- SHLL $0x02, R9
- ORL $0x01, R9
- MOVW R9, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R10
+ ORL $0x01, R10
+ MOVW R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
- XORQ SI, SI
- LEAL 1(SI)(R9*4), R9
- MOVB BL, 1(AX)
- SARL $0x08, BX
- SHLL $0x05, BX
- ORL BX, R9
- MOVB R9, (AX)
- ADDQ $0x02, AX
+ XORQ DI, DI
+ LEAL 1(DI)(R10*4), R10
+ MOVB SI, 1(CX)
+ SARL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, R10
+ MOVB R10, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
two_byte_offset_short_match_nolit_encodeBlockAsm8B:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeBlockAsm8B
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
emit_copy_three_match_nolit_encodeBlockAsm8B:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeBlockAsm8B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBlockAsm8B
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBlockAsm8B:
- MOVQ $0x9e3779b1, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x20, DI
- IMULQ R8, DI
- SHRQ $0x38, DI
- SHLQ $0x20, BX
- IMULQ R8, BX
- SHRQ $0x38, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x20, R8
+ IMULQ R9, R8
+ SHRQ $0x38, R8
+ SHLQ $0x20, SI
+ IMULQ R9, SI
+ SHRQ $0x38, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm8B
- INCL CX
+ INCL DX
JMP search_loop_encodeBlockAsm8B
emit_remainder_encodeBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBlockAsm8B
@@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B:
JB three_bytes_emit_remainder_encodeBlockAsm8B
three_bytes_emit_remainder_encodeBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBlockAsm8B
two_bytes_emit_remainder_encodeBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBlockAsm8B
JMP memmove_long_emit_remainder_encodeBlockAsm8B
one_byte_emit_remainder_encodeBlockAsm8B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B:
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm8B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBlockAsm8B
memmove_long_emit_remainder_encodeBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
@@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBetterBlockAsm(dst []byte, src []byte) int
+// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm(SB), $589848-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00001200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00001200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBetterBlockAsm
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -6(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeBetterBlockAsm:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x07, BX
- CMPL BX, $0x63
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x07, SI
+ CMPL SI, $0x63
JBE check_maxskip_ok_encodeBetterBlockAsm
- LEAL 100(CX), BX
+ LEAL 100(DX), SI
JMP check_maxskip_cont_encodeBetterBlockAsm
check_maxskip_ok_encodeBetterBlockAsm:
- LEAL 1(CX)(BX*1), BX
+ LEAL 1(DX)(SI*1), SI
check_maxskip_cont_encodeBetterBlockAsm:
- CMPL BX, 8(SP)
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 524312(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 524312(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x00cf1bbcdcbfa563, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x2f, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 524288(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 524288(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeBetterBlockAsm
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeBetterBlockAsm
no_short_found_encodeBetterBlockAsm:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBetterBlockAsm
candidateS_match_encodeBetterBlockAsm:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x2f, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeBetterBlockAsm:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm
match_extend_back_loop_encodeBetterBlockAsm:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBetterBlockAsm
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBetterBlockAsm
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm
JMP match_extend_back_loop_encodeBetterBlockAsm
match_extend_back_end_encodeBetterBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 5(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 5(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeBetterBlockAsm
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeBetterBlockAsm
matchlen_match8_match_nolit_encodeBetterBlockAsm:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm
matchlen_match4_match_nolit_encodeBetterBlockAsm:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeBetterBlockAsm:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm
JB match_nolit_end_encodeBetterBlockAsm
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeBetterBlockAsm
matchlen_match1_match_nolit_encodeBetterBlockAsm:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeBetterBlockAsm:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- CMPL 16(SP), DI
+ CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm
- CMPL R11, $0x01
+ CMPL R12, $0x01
JA match_length_ok_encodeBetterBlockAsm
- CMPL DI, $0x0000ffff
+ CMPL R8, $0x0000ffff
JBE match_length_ok_encodeBetterBlockAsm
- MOVL 20(SP), CX
- INCL CX
+ MOVL 20(SP), DX
+ INCL DX
JMP search_loop_encodeBetterBlockAsm
match_length_ok_encodeBetterBlockAsm:
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeBetterBlockAsm
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_match_emit_encodeBetterBlockAsm
- CMPL BX, $0x01000000
+ CMPL SI, $0x01000000
JB four_bytes_match_emit_encodeBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm
four_bytes_match_emit_encodeBetterBlockAsm:
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm
three_bytes_match_emit_encodeBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm
two_bytes_match_emit_encodeBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeBetterBlockAsm
JMP memmove_long_match_emit_encodeBetterBlockAsm
one_byte_match_emit_encodeBetterBlockAsm:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBetterBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm
memmove_long_match_emit_encodeBetterBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeBetterBlockAsm:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
- CMPL DI, $0x00010000
+ CMPL R8, $0x00010000
JB two_byte_offset_match_nolit_encodeBetterBlockAsm
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm
- MOVB $0xff, (AX)
- MOVL DI, 1(AX)
- LEAL -64(R11), R11
- ADDQ $0x05, AX
- CMPL R11, $0x04
+ MOVB $0xff, (CX)
+ MOVL R8, 1(CX)
+ LEAL -64(R12), R12
+ ADDQ $0x05, CX
+ CMPL R12, $0x04
JB four_bytes_remain_match_nolit_encodeBetterBlockAsm
// emitRepeat
emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL R11, $0x0100ffff
+ CMPL R12, $0x0100ffff
JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
- LEAL -16842747(R11), R11
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R12), R12
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
four_bytes_remain_match_nolit_encodeBetterBlockAsm:
- TESTL R11, R11
+ TESTL R12, R12
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm
- XORL BX, BX
- LEAL -1(BX)(R11*4), R11
- MOVB R11, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
+ XORL SI, SI
+ LEAL -1(SI)(R12*4), R12
+ MOVB R12, (CX)
+ MOVL R8, 1(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
two_byte_offset_match_nolit_encodeBetterBlockAsm:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE long_offset_short_match_nolit_encodeBetterBlockAsm
- MOVL $0x00000001, BX
- LEAL 16(BX), BX
- MOVB DI, 1(AX)
- MOVL DI, R8
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R11
+ MOVL $0x00000001, SI
+ LEAL 16(SI), SI
+ MOVB R8, 1(CX)
+ MOVL R8, R9
+ SHRL $0x08, R9
+ SHLL $0x05, R9
+ ORL R9, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R12
// emitRepeat
- LEAL -4(R11), R11
+ LEAL -4(R12), R12
JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
- CMPL R11, $0x0100ffff
+ CMPL R12, $0x0100ffff
JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
- LEAL -16842747(R11), R11
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R12), R12
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
long_offset_short_match_nolit_encodeBetterBlockAsm:
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
// emitRepeat
emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL R11, $0x0100ffff
+ CMPL R12, $0x0100ffff
JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
- LEAL -16842747(R11), R11
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R12), R12
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy_three_match_nolit_encodeBetterBlockAsm:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
match_is_repeat_encodeBetterBlockAsm:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_repeat_encodeBetterBlockAsm
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm
- CMPL BX, $0x01000000
+ CMPL SI, $0x01000000
JB four_bytes_match_emit_repeat_encodeBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
four_bytes_match_emit_repeat_encodeBetterBlockAsm:
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
three_bytes_match_emit_repeat_encodeBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
two_bytes_match_emit_repeat_encodeBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_repeat_encodeBetterBlockAsm
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
one_byte_match_emit_repeat_encodeBetterBlockAsm:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_repeat_encodeBetterBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
memmove_long_match_emit_repeat_encodeBetterBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitRepeat
emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm
- CMPL R11, $0x0100ffff
+ CMPL R12, $0x0100ffff
JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm
- LEAL -16842747(R11), R11
- MOVL $0xfffb001d, (AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
+ LEAL -16842747(R12), R12
+ MOVL $0xfffb001d, (CX)
+ MOVB $0xff, 4(CX)
+ ADDQ $0x05, CX
JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm
- CMPQ AX, (SP)
- JB match_nolit_dst_ok_encodeBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm:
- MOVQ $0x00cf1bbcdcbfa563, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x2f, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x32, R10
- SHLQ $0x08, R11
- IMULQ BX, R11
- SHRQ $0x2f, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x32, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 524312(SP)(R10*4)
- MOVL R13, 524312(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ CMPQ CX, (SP)
+ JB match_nolit_dst_ok_encodeBetterBlockAsm
+ MOVQ $0x00000000, ret+56(FP)
+ RET
+
+match_nolit_dst_ok_encodeBetterBlockAsm:
+ MOVQ $0x00cf1bbcdcbfa563, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x08, R10
+ IMULQ SI, R10
+ SHRQ $0x2f, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x32, R11
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 524288(AX)(R11*4)
+ MOVL R14, 524288(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeBetterBlockAsm:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeBetterBlockAsm
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x2f, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x08, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x2f, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x08, R11
+ IMULQ SI, R11
+ SHRQ $0x2f, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeBetterBlockAsm
emit_remainder_encodeBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 5(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBetterBlockAsm
@@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm:
JB three_bytes_emit_remainder_encodeBetterBlockAsm
CMPL DX, $0x01000000
JB four_bytes_emit_remainder_encodeBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL DX, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
four_bytes_emit_remainder_encodeBetterBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
+ MOVB $0xf8, (CX)
+ MOVW DX, 1(CX)
+ MOVB BL, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
three_bytes_emit_remainder_encodeBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
two_bytes_emit_remainder_encodeBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBetterBlockAsm
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
one_byte_emit_remainder_encodeBetterBlockAsm:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm:
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm
memmove_long_emit_remainder_encodeBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
@@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
+// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00001200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm4MB(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00001200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm4MB:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBetterBlockAsm4MB
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -6(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeBetterBlockAsm4MB:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x07, BX
- CMPL BX, $0x63
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x07, SI
+ CMPL SI, $0x63
JBE check_maxskip_ok_encodeBetterBlockAsm4MB
- LEAL 100(CX), BX
+ LEAL 100(DX), SI
JMP check_maxskip_cont_encodeBetterBlockAsm4MB
check_maxskip_ok_encodeBetterBlockAsm4MB:
- LEAL 1(CX)(BX*1), BX
+ LEAL 1(DX)(SI*1), SI
check_maxskip_cont_encodeBetterBlockAsm4MB:
- CMPL BX, 8(SP)
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm4MB
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 524312(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 524312(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x00cf1bbcdcbfa563, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x2f, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 524288(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 524288(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm4MB
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeBetterBlockAsm4MB
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeBetterBlockAsm4MB
no_short_found_encodeBetterBlockAsm4MB:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm4MB
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm4MB
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBetterBlockAsm4MB
candidateS_match_encodeBetterBlockAsm4MB:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x2f, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm4MB
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeBetterBlockAsm4MB:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm4MB
match_extend_back_loop_encodeBetterBlockAsm4MB:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBetterBlockAsm4MB
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBetterBlockAsm4MB
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm4MB
JMP match_extend_back_loop_encodeBetterBlockAsm4MB
match_extend_back_end_encodeBetterBlockAsm4MB:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 4(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 4(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBetterBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm4MB:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeBetterBlockAsm4MB
matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm4MB
matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
JB match_nolit_end_encodeBetterBlockAsm4MB
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeBetterBlockAsm4MB
matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm4MB
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeBetterBlockAsm4MB:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- CMPL 16(SP), DI
+ CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm4MB
- CMPL R11, $0x01
+ CMPL R12, $0x01
JA match_length_ok_encodeBetterBlockAsm4MB
- CMPL DI, $0x0000ffff
+ CMPL R8, $0x0000ffff
JBE match_length_ok_encodeBetterBlockAsm4MB
- MOVL 20(SP), CX
- INCL CX
+ MOVL 20(SP), DX
+ INCL DX
JMP search_loop_encodeBetterBlockAsm4MB
match_length_ok_encodeBetterBlockAsm4MB:
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeBetterBlockAsm4MB
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm4MB
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_match_emit_encodeBetterBlockAsm4MB
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
three_bytes_match_emit_encodeBetterBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
two_bytes_match_emit_encodeBetterBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeBetterBlockAsm4MB
JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
one_byte_match_emit_encodeBetterBlockAsm4MB:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB
memmove_long_match_emit_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
- CMPL DI, $0x00010000
+ CMPL R8, $0x00010000
JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
- MOVB $0xff, (AX)
- MOVL DI, 1(AX)
- LEAL -64(R11), R11
- ADDQ $0x05, AX
- CMPL R11, $0x04
+ MOVB $0xff, (CX)
+ MOVL R8, 1(CX)
+ LEAL -64(R12), R12
+ ADDQ $0x05, CX
+ CMPL R12, $0x04
JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
- TESTL R11, R11
+ TESTL R12, R12
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
- XORL BX, BX
- LEAL -1(BX)(R11*4), R11
- MOVB R11, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
+ XORL SI, SI
+ LEAL -1(SI)(R12*4), R12
+ MOVB R12, (CX)
+ MOVL R8, 1(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB
- MOVL $0x00000001, BX
- LEAL 16(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R11
+ MOVL $0x00000001, SI
+ LEAL 16(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R12
// emitRepeat
- LEAL -4(R11), R11
+ LEAL -4(R12), R12
JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
match_is_repeat_encodeBetterBlockAsm4MB:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
- CMPL R11, $0x00010100
+ CMPL R12, $0x00010100
JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
- LEAL -65536(R11), R11
- MOVL R11, DI
- MOVW $0x001d, (AX)
- MOVW R11, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
+ LEAL -65536(R12), R12
+ MOVL R12, R8
+ MOVW $0x001d, (CX)
+ MOVW R12, 2(CX)
+ SARL $0x10, R8
+ MOVB R8, 4(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm4MB
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm4MB:
- MOVQ $0x00cf1bbcdcbfa563, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x2f, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x32, R10
- SHLQ $0x08, R11
- IMULQ BX, R11
- SHRQ $0x2f, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x32, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 524312(SP)(R10*4)
- MOVL R13, 524312(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x00cf1bbcdcbfa563, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x08, R10
+ IMULQ SI, R10
+ SHRQ $0x2f, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x32, R11
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 524288(AX)(R11*4)
+ MOVL R14, 524288(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeBetterBlockAsm4MB:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeBetterBlockAsm4MB
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x2f, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x08, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x2f, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x08, R11
+ IMULQ SI, R11
+ SHRQ $0x2f, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeBetterBlockAsm4MB
emit_remainder_encodeBetterBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 4(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 4(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBetterBlockAsm4MB
@@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB:
JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB
MOVL DX, BX
SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
+ MOVB $0xf8, (CX)
+ MOVW DX, 1(CX)
+ MOVB BL, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBetterBlockAsm4MB
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
one_byte_emit_remainder_encodeBetterBlockAsm4MB:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBetterBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB:
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
MOVOU (SI), X4
@@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBetterBlockAsm12B(dst []byte, src []byte) int
+// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm12B(SB), $81944-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000280, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm12B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000280, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBetterBlockAsm12B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -6(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeBetterBlockAsm12B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x06, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x06, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm12B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x34, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 65560(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 65560(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x34, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 65536(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 65536(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm12B
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeBetterBlockAsm12B
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeBetterBlockAsm12B
no_short_found_encodeBetterBlockAsm12B:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm12B
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm12B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBetterBlockAsm12B
candidateS_match_encodeBetterBlockAsm12B:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm12B
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeBetterBlockAsm12B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm12B
match_extend_back_loop_encodeBetterBlockAsm12B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBetterBlockAsm12B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBetterBlockAsm12B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm12B
JMP match_extend_back_loop_encodeBetterBlockAsm12B
match_extend_back_end_encodeBetterBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm12B:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeBetterBlockAsm12B
matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm12B
matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B
JB match_nolit_end_encodeBetterBlockAsm12B
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeBetterBlockAsm12B
matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm12B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeBetterBlockAsm12B:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- CMPL 16(SP), DI
+ CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm12B
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeBetterBlockAsm12B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm12B
JB three_bytes_match_emit_encodeBetterBlockAsm12B
three_bytes_match_emit_encodeBetterBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm12B
two_bytes_match_emit_encodeBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeBetterBlockAsm12B
JMP memmove_long_match_emit_encodeBetterBlockAsm12B
one_byte_match_emit_encodeBetterBlockAsm12B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBetterBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B
memmove_long_match_emit_encodeBetterBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeBetterBlockAsm12B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B
- MOVL $0x00000001, BX
- LEAL 16(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R11
+ MOVL $0x00000001, SI
+ LEAL 16(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R12
// emitRepeat
- LEAL -4(R11), R11
+ LEAL -4(R12), R12
JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
long_offset_short_match_nolit_encodeBetterBlockAsm12B:
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
match_is_repeat_encodeBetterBlockAsm12B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_repeat_encodeBetterBlockAsm12B
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_repeat_encodeBetterBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm12B
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm12B:
- MOVQ $0x0000cf1bbcdcbf9b, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x32, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x34, R10
- SHLQ $0x10, R11
- IMULQ BX, R11
- SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x34, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 65560(SP)(R10*4)
- MOVL R13, 65560(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x0000cf1bbcdcbf9b, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x10, R10
+ IMULQ SI, R10
+ SHRQ $0x32, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x34, R11
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x32, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x34, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 65536(AX)(R11*4)
+ MOVL R14, 65536(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeBetterBlockAsm12B:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeBetterBlockAsm12B
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x32, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x10, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x32, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x10, R11
+ IMULQ SI, R11
+ SHRQ $0x32, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeBetterBlockAsm12B
emit_remainder_encodeBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBetterBlockAsm12B
@@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B:
JB three_bytes_emit_remainder_encodeBetterBlockAsm12B
three_bytes_emit_remainder_encodeBetterBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
two_bytes_emit_remainder_encodeBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBetterBlockAsm12B
JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
one_byte_emit_remainder_encodeBetterBlockAsm12B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B:
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
memmove_long_emit_remainder_encodeBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
@@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBetterBlockAsm10B(dst []byte, src []byte) int
+// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm10B(SB), $20504-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x000000a0, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm10B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x000000a0, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBetterBlockAsm10B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -6(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeBetterBlockAsm10B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm10B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x36, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 16408(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 16408(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x34, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x36, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 16384(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 16384(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm10B
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeBetterBlockAsm10B
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeBetterBlockAsm10B
no_short_found_encodeBetterBlockAsm10B:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm10B
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm10B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBetterBlockAsm10B
candidateS_match_encodeBetterBlockAsm10B:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x34, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm10B
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeBetterBlockAsm10B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm10B
match_extend_back_loop_encodeBetterBlockAsm10B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBetterBlockAsm10B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBetterBlockAsm10B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm10B
JMP match_extend_back_loop_encodeBetterBlockAsm10B
match_extend_back_end_encodeBetterBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm10B:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeBetterBlockAsm10B
matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm10B
matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B
JB match_nolit_end_encodeBetterBlockAsm10B
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeBetterBlockAsm10B
matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm10B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeBetterBlockAsm10B:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- CMPL 16(SP), DI
+ CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm10B
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeBetterBlockAsm10B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm10B
JB three_bytes_match_emit_encodeBetterBlockAsm10B
three_bytes_match_emit_encodeBetterBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm10B
two_bytes_match_emit_encodeBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeBetterBlockAsm10B
JMP memmove_long_match_emit_encodeBetterBlockAsm10B
one_byte_match_emit_encodeBetterBlockAsm10B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBetterBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B
memmove_long_match_emit_encodeBetterBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeBetterBlockAsm10B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B
- MOVL $0x00000001, BX
- LEAL 16(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R11
+ MOVL $0x00000001, SI
+ LEAL 16(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R12
// emitRepeat
- LEAL -4(R11), R11
+ LEAL -4(R12), R12
JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
long_offset_short_match_nolit_encodeBetterBlockAsm10B:
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
match_is_repeat_encodeBetterBlockAsm10B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_repeat_encodeBetterBlockAsm10B
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_repeat_encodeBetterBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm10B
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm10B:
- MOVQ $0x0000cf1bbcdcbf9b, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x34, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x36, R10
- SHLQ $0x10, R11
- IMULQ BX, R11
- SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x36, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 16408(SP)(R10*4)
- MOVL R13, 16408(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x0000cf1bbcdcbf9b, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x10, R10
+ IMULQ SI, R10
+ SHRQ $0x34, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x36, R11
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x34, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x36, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 16384(AX)(R11*4)
+ MOVL R14, 16384(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeBetterBlockAsm10B:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeBetterBlockAsm10B
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x34, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x10, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x34, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x10, R11
+ IMULQ SI, R11
+ SHRQ $0x34, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeBetterBlockAsm10B
emit_remainder_encodeBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBetterBlockAsm10B
@@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B:
JB three_bytes_emit_remainder_encodeBetterBlockAsm10B
three_bytes_emit_remainder_encodeBetterBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
two_bytes_emit_remainder_encodeBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBetterBlockAsm10B
JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
one_byte_emit_remainder_encodeBetterBlockAsm10B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B:
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
memmove_long_emit_remainder_encodeBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
@@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeBetterBlockAsm8B(dst []byte, src []byte) int
+// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm8B(SB), $5144-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000028, CX
- LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm8B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000028, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeBetterBlockAsm8B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -6(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeBetterBlockAsm8B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x04, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x04, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm8B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x38, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 4120(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 4120(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x36, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x38, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 4096(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 4096(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm8B
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeBetterBlockAsm8B
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeBetterBlockAsm8B
no_short_found_encodeBetterBlockAsm8B:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeBetterBlockAsm8B
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeBetterBlockAsm8B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeBetterBlockAsm8B
candidateS_match_encodeBetterBlockAsm8B:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x36, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm8B
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeBetterBlockAsm8B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm8B
match_extend_back_loop_encodeBetterBlockAsm8B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeBetterBlockAsm8B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeBetterBlockAsm8B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm8B
JMP match_extend_back_loop_encodeBetterBlockAsm8B
match_extend_back_end_encodeBetterBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeBetterBlockAsm8B:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeBetterBlockAsm8B
matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B
matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm8B
matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B
JB match_nolit_end_encodeBetterBlockAsm8B
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeBetterBlockAsm8B
matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm8B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeBetterBlockAsm8B:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- CMPL 16(SP), DI
+ CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm8B
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeBetterBlockAsm8B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeBetterBlockAsm8B
JB three_bytes_match_emit_encodeBetterBlockAsm8B
three_bytes_match_emit_encodeBetterBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeBetterBlockAsm8B
two_bytes_match_emit_encodeBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeBetterBlockAsm8B
JMP memmove_long_match_emit_encodeBetterBlockAsm8B
one_byte_match_emit_encodeBetterBlockAsm8B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeBetterBlockAsm8B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x04
+ CMPQ R9, $0x04
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
+ MOVL (R10), R11
+ MOVL R11, (CX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
+ MOVL (R10), R11
+ MOVL -4(R10)(R9*1), R10
+ MOVL R11, (CX)
+ MOVL R10, -4(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B
memmove_long_match_emit_encodeBetterBlockAsm8B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeBetterBlockAsm8B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B
- MOVL $0x00000001, BX
- LEAL 16(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
- SUBL $0x08, R11
+ MOVL $0x00000001, SI
+ LEAL 16(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
+ SUBL $0x08, R12
// emitRepeat
- LEAL -4(R11), R11
+ LEAL -4(R12), R12
JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
long_offset_short_match_nolit_encodeBetterBlockAsm8B:
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
+ JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
match_is_repeat_encodeBetterBlockAsm8B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_repeat_encodeBetterBlockAsm8B
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_repeat_encodeBetterBlockAsm8B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveShort
- CMPQ DI, $0x04
+ CMPQ R8, $0x04
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
- CMPQ DI, $0x08
+ CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
- CMPQ DI, $0x10
+ CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
- CMPQ DI, $0x20
+ CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
- MOVL (R8), R9
- MOVL R9, (AX)
+ MOVL (R9), R10
+ MOVL R10, (CX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
- MOVL (R8), R9
- MOVL -4(R8)(DI*1), R8
- MOVL R9, (AX)
- MOVL R8, -4(AX)(DI*1)
+ MOVL (R9), R10
+ MOVL -4(R9)(R8*1), R9
+ MOVL R10, (CX)
+ MOVL R9, -4(CX)(R8*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (R8), R9
- MOVQ -8(R8)(DI*1), R8
- MOVQ R9, (AX)
- MOVQ R8, -8(AX)(DI*1)
+ MOVQ (R9), R10
+ MOVQ -8(R9)(R8*1), R9
+ MOVQ R10, (CX)
+ MOVQ R9, -8(CX)(R8*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (R8), X0
- MOVOU -16(R8)(DI*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU -16(R9)(R8*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveLong
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVQ DI, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R12
- SUBQ R9, R12
- DECQ R10
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVQ R8, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R13
+ SUBQ R10, R13
+ DECQ R11
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R8)(R12*1), R9
- LEAQ -32(AX)(R12*1), R13
+ LEAQ -32(R9)(R13*1), R10
+ LEAQ -32(CX)(R13*1), R14
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R14)
+ MOVOA X5, 16(R14)
+ ADDQ $0x20, R14
+ ADDQ $0x20, R10
ADDQ $0x20, R13
- ADDQ $0x20, R9
- ADDQ $0x20, R12
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R8)(R12*1), X4
- MOVOU -16(R8)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ DI, R12
+ MOVOU -32(R9)(R13*1), X4
+ MOVOU -16(R9)(R13*1), X5
+ MOVOA X4, -32(CX)(R13*1)
+ MOVOA X5, -16(CX)(R13*1)
+ ADDQ $0x20, R13
+ CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitRepeat
- MOVL R11, BX
- LEAL -4(R11), R11
- CMPL BX, $0x08
+ MOVL R12, SI
+ LEAL -4(R12), R12
+ CMPL SI, $0x08
JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
- CMPL BX, $0x0c
+ CMPL SI, $0x0c
JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
- CMPL R11, $0x00000104
+ CMPL R12, $0x00000104
JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
- LEAL -256(R11), R11
- MOVW $0x0019, (AX)
- MOVW R11, 2(AX)
- ADDQ $0x04, AX
+ LEAL -256(R12), R12
+ MOVW $0x0019, (CX)
+ MOVW R12, 2(CX)
+ ADDQ $0x04, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
- LEAL -4(R11), R11
- MOVW $0x0015, (AX)
- MOVB R11, 2(AX)
- ADDQ $0x03, AX
+ LEAL -4(R12), R12
+ MOVW $0x0015, (CX)
+ MOVB R12, 2(CX)
+ ADDQ $0x03, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
- SHLL $0x02, R11
- ORL $0x01, R11
- MOVW R11, (AX)
- ADDQ $0x02, AX
+ SHLL $0x02, R12
+ ORL $0x01, R12
+ MOVW R12, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
- XORQ BX, BX
- LEAL 1(BX)(R11*4), R11
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, R11
- MOVB R11, (AX)
- ADDQ $0x02, AX
+ XORQ SI, SI
+ LEAL 1(SI)(R12*4), R12
+ MOVB R8, 1(CX)
+ SARL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, R12
+ MOVB R12, (CX)
+ ADDQ $0x02, CX
match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeBetterBlockAsm8B
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm8B:
- MOVQ $0x0000cf1bbcdcbf9b, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x36, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x38, R10
- SHLQ $0x10, R11
- IMULQ BX, R11
- SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x38, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 4120(SP)(R10*4)
- MOVL R13, 4120(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x0000cf1bbcdcbf9b, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x10, R10
+ IMULQ SI, R10
+ SHRQ $0x36, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x38, R11
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x36, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x38, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 4096(AX)(R11*4)
+ MOVL R14, 4096(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeBetterBlockAsm8B:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeBetterBlockAsm8B
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x36, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x10, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x36, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x10, R11
+ IMULQ SI, R11
+ SHRQ $0x36, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeBetterBlockAsm8B
emit_remainder_encodeBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeBetterBlockAsm8B
@@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B:
JB three_bytes_emit_remainder_encodeBetterBlockAsm8B
three_bytes_emit_remainder_encodeBetterBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
two_bytes_emit_remainder_encodeBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeBetterBlockAsm8B
JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
one_byte_emit_remainder_encodeBetterBlockAsm8B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B:
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
memmove_long_emit_remainder_encodeBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
@@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBlockAsm(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBlockAsm
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBlockAsm:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x06, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x06, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x10, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x10, R11
+ IMULQ R9, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm
repeat_extend_back_loop_encodeSnappyBlockAsm:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_encodeSnappyBlockAsm
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeSnappyBlockAsm
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm
repeat_extend_back_end_encodeSnappyBlockAsm:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 5(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 5(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeSnappyBlockAsm:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeSnappyBlockAsm
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeSnappyBlockAsm
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_repeat_emit_encodeSnappyBlockAsm
- CMPL BX, $0x01000000
+ CMPL SI, $0x01000000
JB four_bytes_repeat_emit_encodeSnappyBlockAsm
- MOVB $0xfc, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
four_bytes_repeat_emit_encodeSnappyBlockAsm:
- MOVL BX, R9
- SHRL $0x10, R9
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R9, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R10
+ SHRL $0x10, R10
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R10, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
three_bytes_repeat_emit_encodeSnappyBlockAsm:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
two_bytes_repeat_emit_encodeSnappyBlockAsm:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeSnappyBlockAsm
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
one_byte_repeat_emit_encodeSnappyBlockAsm:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveShort
- CMPQ DI, $0x08
+ CMPQ R8, $0x08
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
- CMPQ DI, $0x10
+ CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
- CMPQ DI, $0x20
+ CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
- MOVQ (R8), R9
- MOVQ R9, (AX)
+ MOVQ (R9), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
- MOVQ (R8), R9
- MOVQ -8(R8)(DI*1), R8
- MOVQ R9, (AX)
- MOVQ R8, -8(AX)(DI*1)
+ MOVQ (R9), R10
+ MOVQ -8(R9)(R8*1), R9
+ MOVQ R10, (CX)
+ MOVQ R9, -8(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
- MOVOU (R8), X0
- MOVOU -16(R8)(DI*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU -16(R9)(R8*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm
memmove_long_repeat_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveLong
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVQ DI, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVQ R8, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R8)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(R9)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R8)(R11*1), X4
- MOVOU -16(R8)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ DI, R11
+ MOVOU -32(R9)(R12*1), X4
+ MOVOU -16(R9)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm
matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm
matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm
matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm
JB repeat_extend_forward_end_encodeSnappyBlockAsm
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm
matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_encodeSnappyBlockAsm:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
- CMPL SI, $0x00010000
+ CMPL DI, $0x00010000
JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
- MOVB $0xff, (AX)
- MOVL SI, 1(AX)
- LEAL -64(BX), BX
- ADDQ $0x05, AX
- CMPL BX, $0x04
+ MOVB $0xff, (CX)
+ MOVL DI, 1(CX)
+ LEAL -64(SI), SI
+ ADDQ $0x05, CX
+ CMPL SI, $0x04
JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
- TESTL BX, BX
+ TESTL SI, SI
JZ repeat_end_emit_encodeSnappyBlockAsm
- XORL DI, DI
- LEAL -1(DI)(BX*4), BX
- MOVB BL, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
+ XORL R8, R8
+ LEAL -1(R8)(SI*4), SI
+ MOVB SI, (CX)
+ MOVL DI, 1(CX)
+ ADDQ $0x05, CX
JMP repeat_end_emit_encodeSnappyBlockAsm
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeSnappyBlockAsm
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeSnappyBlockAsm:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm
no_repeat_found_encodeSnappyBlockAsm:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBlockAsm
candidate3_match_encodeSnappyBlockAsm:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeSnappyBlockAsm
candidate2_match_encodeSnappyBlockAsm:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBlockAsm:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm
match_extend_back_loop_encodeSnappyBlockAsm:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBlockAsm
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBlockAsm
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm
JMP match_extend_back_loop_encodeSnappyBlockAsm
match_extend_back_end_encodeSnappyBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 5(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 5(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeSnappyBlockAsm
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeSnappyBlockAsm
- CMPL DI, $0x00010000
+ CMPL R8, $0x00010000
JB three_bytes_match_emit_encodeSnappyBlockAsm
- CMPL DI, $0x01000000
+ CMPL R8, $0x01000000
JB four_bytes_match_emit_encodeSnappyBlockAsm
- MOVB $0xfc, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL R8, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm
four_bytes_match_emit_encodeSnappyBlockAsm:
- MOVL DI, R9
- SHRL $0x10, R9
- MOVB $0xf8, (AX)
- MOVW DI, 1(AX)
- MOVB R9, 3(AX)
- ADDQ $0x04, AX
+ MOVL R8, R10
+ SHRL $0x10, R10
+ MOVB $0xf8, (CX)
+ MOVW R8, 1(CX)
+ MOVB R10, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm
three_bytes_match_emit_encodeSnappyBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm
two_bytes_match_emit_encodeSnappyBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeSnappyBlockAsm
JMP memmove_long_match_emit_encodeSnappyBlockAsm
one_byte_match_emit_encodeSnappyBlockAsm:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm
memmove_long_match_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeSnappyBlockAsm:
match_nolit_loop_encodeSnappyBlockAsm:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBlockAsm
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm
matchlen_match8_match_nolit_encodeSnappyBlockAsm:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm
matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm
matchlen_match4_match_nolit_encodeSnappyBlockAsm:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeSnappyBlockAsm:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBlockAsm
JB match_nolit_end_encodeSnappyBlockAsm
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeSnappyBlockAsm
matchlen_match1_match_nolit_encodeSnappyBlockAsm:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeSnappyBlockAsm:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB two_byte_offset_match_nolit_encodeSnappyBlockAsm
four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm
- MOVB $0xff, (AX)
- MOVL BX, 1(AX)
- LEAL -64(R9), R9
- ADDQ $0x05, AX
- CMPL R9, $0x04
+ MOVB $0xff, (CX)
+ MOVL SI, 1(CX)
+ LEAL -64(R10), R10
+ ADDQ $0x05, CX
+ CMPL R10, $0x04
JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm
JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
- TESTL R9, R9
+ TESTL R10, R10
JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm
- XORL SI, SI
- LEAL -1(SI)(R9*4), R9
- MOVB R9, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ XORL DI, DI
+ LEAL -1(DI)(R10*4), R10
+ MOVB R10, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
two_byte_offset_match_nolit_encodeSnappyBlockAsm:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
emit_copy_three_match_nolit_encodeSnappyBlockAsm:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBlockAsm:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm:
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x10, DI
- IMULQ R8, DI
- SHRQ $0x32, DI
- SHLQ $0x10, BX
- IMULQ R8, BX
- SHRQ $0x32, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x10, R8
+ IMULQ R9, R8
+ SHRQ $0x32, R8
+ SHLQ $0x10, SI
+ IMULQ R9, SI
+ SHRQ $0x32, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm
- INCL CX
+ INCL DX
JMP search_loop_encodeSnappyBlockAsm
emit_remainder_encodeSnappyBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 5(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBlockAsm
@@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm:
JB three_bytes_emit_remainder_encodeSnappyBlockAsm
CMPL DX, $0x01000000
JB four_bytes_emit_remainder_encodeSnappyBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL DX, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
four_bytes_emit_remainder_encodeSnappyBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
+ MOVB $0xf8, (CX)
+ MOVW DX, 1(CX)
+ MOVB BL, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
three_bytes_emit_remainder_encodeSnappyBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
two_bytes_emit_remainder_encodeSnappyBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBlockAsm
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
one_byte_emit_remainder_encodeSnappyBlockAsm:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm
memmove_long_emit_remainder_encodeSnappyBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
@@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm64K(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm64K:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBlockAsm64K
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBlockAsm64K:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x06, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x06, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm64K
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x10, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x10, R11
+ IMULQ R9, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm64K
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm64K
repeat_extend_back_loop_encodeSnappyBlockAsm64K:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_encodeSnappyBlockAsm64K
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeSnappyBlockAsm64K
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K
repeat_extend_back_end_encodeSnappyBlockAsm64K:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeSnappyBlockAsm64K:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeSnappyBlockAsm64K
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K
JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K
three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeSnappyBlockAsm64K
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
one_byte_repeat_emit_encodeSnappyBlockAsm64K:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveShort
- CMPQ DI, $0x08
+ CMPQ R8, $0x08
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
- CMPQ DI, $0x10
+ CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
- CMPQ DI, $0x20
+ CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
- MOVQ (R8), R9
- MOVQ R9, (AX)
+ MOVQ (R9), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
- MOVQ (R8), R9
- MOVQ -8(R8)(DI*1), R8
- MOVQ R9, (AX)
- MOVQ R8, -8(AX)(DI*1)
+ MOVQ (R9), R10
+ MOVQ -8(R9)(R8*1), R9
+ MOVQ R10, (CX)
+ MOVQ R9, -8(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
- MOVOU (R8), X0
- MOVOU -16(R8)(DI*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU -16(R9)(R8*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveLong
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVQ DI, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVQ R8, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(R8)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(R9)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(R8)(R11*1), X4
- MOVOU -16(R8)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ DI, R11
+ MOVOU -32(R9)(R12*1), X4
+ MOVOU -16(R9)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
JB repeat_extend_forward_end_encodeSnappyBlockAsm64K
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K
matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_encodeSnappyBlockAsm64K:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeSnappyBlockAsm64K
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeSnappyBlockAsm64K:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm64K
no_repeat_found_encodeSnappyBlockAsm64K:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm64K
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm64K
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm64K
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBlockAsm64K
candidate3_match_encodeSnappyBlockAsm64K:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeSnappyBlockAsm64K
candidate2_match_encodeSnappyBlockAsm64K:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBlockAsm64K:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm64K
match_extend_back_loop_encodeSnappyBlockAsm64K:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBlockAsm64K
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBlockAsm64K
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm64K
JMP match_extend_back_loop_encodeSnappyBlockAsm64K
match_extend_back_end_encodeSnappyBlockAsm64K:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm64K:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeSnappyBlockAsm64K
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeSnappyBlockAsm64K
JB three_bytes_match_emit_encodeSnappyBlockAsm64K
three_bytes_match_emit_encodeSnappyBlockAsm64K:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
two_bytes_match_emit_encodeSnappyBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeSnappyBlockAsm64K
JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
one_byte_match_emit_encodeSnappyBlockAsm64K:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K
memmove_long_match_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
match_nolit_loop_encodeSnappyBlockAsm64K:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm64K
matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm64K
matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
JB match_nolit_end_encodeSnappyBlockAsm64K
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeSnappyBlockAsm64K
matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm64K
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeSnappyBlockAsm64K:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm64K
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm64K:
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x10, DI
- IMULQ R8, DI
- SHRQ $0x32, DI
- SHLQ $0x10, BX
- IMULQ R8, BX
- SHRQ $0x32, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x10, R8
+ IMULQ R9, R8
+ SHRQ $0x32, R8
+ SHLQ $0x10, SI
+ IMULQ R9, SI
+ SHRQ $0x32, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm64K
- INCL CX
+ INCL DX
JMP search_loop_encodeSnappyBlockAsm64K
emit_remainder_encodeSnappyBlockAsm64K:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm64K:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBlockAsm64K
@@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K:
JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K
three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBlockAsm64K
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
one_byte_emit_remainder_encodeSnappyBlockAsm64K:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBlockAsm64K:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
MOVOU (SI), X4
@@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000080, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm12B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000080, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBlockAsm12B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBlockAsm12B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm12B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x000000cf1bbcdcbb, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x18, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x000000cf1bbcdcbb, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x18, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x34, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x18, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x18, R11
+ IMULQ R9, R11
+ SHRQ $0x34, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x18, R10
+ IMULQ R9, R10
+ SHRQ $0x34, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm12B
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm12B
repeat_extend_back_loop_encodeSnappyBlockAsm12B:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_encodeSnappyBlockAsm12B
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeSnappyBlockAsm12B
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B
repeat_extend_back_end_encodeSnappyBlockAsm12B:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeSnappyBlockAsm12B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeSnappyBlockAsm12B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B
JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B
three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeSnappyBlockAsm12B
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
one_byte_repeat_emit_encodeSnappyBlockAsm12B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveShort
- CMPQ DI, $0x08
+ CMPQ R8, $0x08
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
- CMPQ DI, $0x10
+ CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
- CMPQ DI, $0x20
+ CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
- MOVQ (R8), R9
- MOVQ R9, (AX)
+ MOVQ (R9), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
- MOVQ (R8), R9
- MOVQ -8(R8)(DI*1), R8
- MOVQ R9, (AX)
- MOVQ R8, -8(AX)(DI*1)
+ MOVQ (R9), R10
+ MOVQ -8(R9)(R8*1), R9
+ MOVQ R10, (CX)
+ MOVQ R9, -8(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
- MOVOU (R8), X0
- MOVOU -16(R8)(DI*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU -16(R9)(R8*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveLong
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVQ DI, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVQ R8, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R8)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(R9)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R8)(R11*1), X4
- MOVOU -16(R8)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ DI, R11
+ MOVOU -32(R9)(R12*1), X4
+ MOVOU -16(R9)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
JB repeat_extend_forward_end_encodeSnappyBlockAsm12B
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B
matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_encodeSnappyBlockAsm12B:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeSnappyBlockAsm12B
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeSnappyBlockAsm12B:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm12B
no_repeat_found_encodeSnappyBlockAsm12B:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm12B
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm12B
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm12B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBlockAsm12B
candidate3_match_encodeSnappyBlockAsm12B:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeSnappyBlockAsm12B
candidate2_match_encodeSnappyBlockAsm12B:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBlockAsm12B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm12B
match_extend_back_loop_encodeSnappyBlockAsm12B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBlockAsm12B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBlockAsm12B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm12B
JMP match_extend_back_loop_encodeSnappyBlockAsm12B
match_extend_back_end_encodeSnappyBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm12B:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeSnappyBlockAsm12B
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeSnappyBlockAsm12B
JB three_bytes_match_emit_encodeSnappyBlockAsm12B
three_bytes_match_emit_encodeSnappyBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
two_bytes_match_emit_encodeSnappyBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeSnappyBlockAsm12B
JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
one_byte_match_emit_encodeSnappyBlockAsm12B:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B
memmove_long_match_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
match_nolit_loop_encodeSnappyBlockAsm12B:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm12B
matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm12B
matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
JB match_nolit_end_encodeSnappyBlockAsm12B
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeSnappyBlockAsm12B
matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm12B
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeSnappyBlockAsm12B:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm12B
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm12B:
- MOVQ $0x000000cf1bbcdcbb, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x18, DI
- IMULQ R8, DI
- SHRQ $0x34, DI
- SHLQ $0x18, BX
- IMULQ R8, BX
- SHRQ $0x34, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x000000cf1bbcdcbb, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x18, R8
+ IMULQ R9, R8
+ SHRQ $0x34, R8
+ SHLQ $0x18, SI
+ IMULQ R9, SI
+ SHRQ $0x34, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm12B
- INCL CX
+ INCL DX
JMP search_loop_encodeSnappyBlockAsm12B
emit_remainder_encodeSnappyBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBlockAsm12B
@@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B:
JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B
three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBlockAsm12B
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
one_byte_emit_remainder_encodeSnappyBlockAsm12B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
@@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000020, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm10B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000020, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBlockAsm10B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBlockAsm10B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm10B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x9e3779b1, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x20, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x36, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x20, R11
+ IMULQ R9, R11
+ SHRQ $0x36, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x20, R10
+ IMULQ R9, R10
+ SHRQ $0x36, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm10B
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm10B
repeat_extend_back_loop_encodeSnappyBlockAsm10B:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_encodeSnappyBlockAsm10B
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeSnappyBlockAsm10B
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B
repeat_extend_back_end_encodeSnappyBlockAsm10B:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeSnappyBlockAsm10B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeSnappyBlockAsm10B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B
JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B
three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeSnappyBlockAsm10B
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
one_byte_repeat_emit_encodeSnappyBlockAsm10B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveShort
- CMPQ DI, $0x08
+ CMPQ R8, $0x08
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
- CMPQ DI, $0x10
+ CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
- CMPQ DI, $0x20
+ CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
- MOVQ (R8), R9
- MOVQ R9, (AX)
+ MOVQ (R9), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
- MOVQ (R8), R9
- MOVQ -8(R8)(DI*1), R8
- MOVQ R9, (AX)
- MOVQ R8, -8(AX)(DI*1)
+ MOVQ (R9), R10
+ MOVQ -8(R9)(R8*1), R9
+ MOVQ R10, (CX)
+ MOVQ R9, -8(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
- MOVOU (R8), X0
- MOVOU -16(R8)(DI*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU -16(R9)(R8*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveLong
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVQ DI, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVQ R8, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R8)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(R9)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R8)(R11*1), X4
- MOVOU -16(R8)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ DI, R11
+ MOVOU -32(R9)(R12*1), X4
+ MOVOU -16(R9)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
JB repeat_extend_forward_end_encodeSnappyBlockAsm10B
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B
matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_encodeSnappyBlockAsm10B:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeSnappyBlockAsm10B
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeSnappyBlockAsm10B:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm10B
no_repeat_found_encodeSnappyBlockAsm10B:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm10B
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm10B
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm10B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBlockAsm10B
candidate3_match_encodeSnappyBlockAsm10B:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeSnappyBlockAsm10B
candidate2_match_encodeSnappyBlockAsm10B:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBlockAsm10B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm10B
match_extend_back_loop_encodeSnappyBlockAsm10B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBlockAsm10B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBlockAsm10B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm10B
JMP match_extend_back_loop_encodeSnappyBlockAsm10B
match_extend_back_end_encodeSnappyBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm10B:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeSnappyBlockAsm10B
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeSnappyBlockAsm10B
JB three_bytes_match_emit_encodeSnappyBlockAsm10B
three_bytes_match_emit_encodeSnappyBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
two_bytes_match_emit_encodeSnappyBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeSnappyBlockAsm10B
JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
one_byte_match_emit_encodeSnappyBlockAsm10B:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B
memmove_long_match_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
match_nolit_loop_encodeSnappyBlockAsm10B:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm10B
matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm10B
matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
JB match_nolit_end_encodeSnappyBlockAsm10B
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeSnappyBlockAsm10B
matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm10B
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeSnappyBlockAsm10B:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm10B
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm10B:
- MOVQ $0x9e3779b1, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x20, DI
- IMULQ R8, DI
- SHRQ $0x36, DI
- SHLQ $0x20, BX
- IMULQ R8, BX
- SHRQ $0x36, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x20, R8
+ IMULQ R9, R8
+ SHRQ $0x36, R8
+ SHLQ $0x20, SI
+ IMULQ R9, SI
+ SHRQ $0x36, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm10B
- INCL CX
+ INCL DX
JMP search_loop_encodeSnappyBlockAsm10B
emit_remainder_encodeSnappyBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBlockAsm10B
@@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B:
JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B
three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBlockAsm10B
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
one_byte_emit_remainder_encodeSnappyBlockAsm10B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
@@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000008, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm8B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000008, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBlockAsm8B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBlockAsm8B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x04, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x04, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm8B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x9e3779b1, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x38, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x20, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x38, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x38, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x20, R11
+ IMULQ R9, R11
+ SHRQ $0x38, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x20, R10
+ IMULQ R9, R10
+ SHRQ $0x38, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm8B
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm8B
repeat_extend_back_loop_encodeSnappyBlockAsm8B:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_encodeSnappyBlockAsm8B
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_encodeSnappyBlockAsm8B
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B
repeat_extend_back_end_encodeSnappyBlockAsm8B:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
repeat_dst_size_check_encodeSnappyBlockAsm8B:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_encodeSnappyBlockAsm8B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B
JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B
three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_encodeSnappyBlockAsm8B
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
one_byte_repeat_emit_encodeSnappyBlockAsm8B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_repeat_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveShort
- CMPQ DI, $0x08
+ CMPQ R8, $0x08
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
- CMPQ DI, $0x10
+ CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
- CMPQ DI, $0x20
+ CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
- MOVQ (R8), R9
- MOVQ R9, (AX)
+ MOVQ (R9), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
- MOVQ (R8), R9
- MOVQ -8(R8)(DI*1), R8
- MOVQ R9, (AX)
- MOVQ R8, -8(AX)(DI*1)
+ MOVQ (R9), R10
+ MOVQ -8(R9)(R8*1), R9
+ MOVQ R10, (CX)
+ MOVQ R9, -8(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
- MOVOU (R8), X0
- MOVOU -16(R8)(DI*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU -16(R9)(R8*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(DI*1), BX
+ LEAQ (CX)(R8*1), SI
// genMemMoveLong
- MOVOU (R8), X0
- MOVOU 16(R8), X1
- MOVOU -32(R8)(DI*1), X2
- MOVOU -16(R8)(DI*1), X3
- MOVQ DI, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (R9), X0
+ MOVOU 16(R9), X1
+ MOVOU -32(R9)(R8*1), X2
+ MOVOU -16(R9)(R8*1), X3
+ MOVQ R8, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R8)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(R9)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R8)(R11*1), X4
- MOVOU -16(R8)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ DI, R11
+ MOVOU -32(R9)(R12*1), X4
+ MOVOU -16(R9)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DI*1)
- MOVOU X3, -16(AX)(DI*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R8*1)
+ MOVOU X3, -16(CX)(R8*1)
+ MOVQ SI, CX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
JB repeat_extend_forward_end_encodeSnappyBlockAsm8B
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B
matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_encodeSnappyBlockAsm8B:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW DI, 1(CX)
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
- LEAL -15(DI), DI
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, DI
- MOVB DI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(R8), R8
+ MOVB DI, 1(CX)
+ SHRL $0x08, DI
+ SHLL $0x05, DI
+ ORL DI, R8
+ MOVB R8, (CX)
+ ADDQ $0x02, CX
JMP repeat_end_emit_encodeSnappyBlockAsm8B
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
- LEAL -2(DI), DI
- MOVB DI, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(R8), R8
+ MOVB R8, (CX)
+ MOVW DI, 1(CX)
+ ADDQ $0x03, CX
repeat_end_emit_encodeSnappyBlockAsm8B:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm8B
no_repeat_found_encodeSnappyBlockAsm8B:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm8B
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm8B
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm8B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBlockAsm8B
candidate3_match_encodeSnappyBlockAsm8B:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_encodeSnappyBlockAsm8B
candidate2_match_encodeSnappyBlockAsm8B:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBlockAsm8B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm8B
match_extend_back_loop_encodeSnappyBlockAsm8B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBlockAsm8B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBlockAsm8B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm8B
JMP match_extend_back_loop_encodeSnappyBlockAsm8B
match_extend_back_end_encodeSnappyBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm8B:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), DI
- CMPL DI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), R8
+ CMPL R8, $0x3c
JB one_byte_match_emit_encodeSnappyBlockAsm8B
- CMPL DI, $0x00000100
+ CMPL R8, $0x00000100
JB two_bytes_match_emit_encodeSnappyBlockAsm8B
JB three_bytes_match_emit_encodeSnappyBlockAsm8B
three_bytes_match_emit_encodeSnappyBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
two_bytes_match_emit_encodeSnappyBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DI, 1(AX)
- ADDQ $0x02, AX
- CMPL DI, $0x40
+ MOVB $0xf0, (CX)
+ MOVB R8, 1(CX)
+ ADDQ $0x02, CX
+ CMPL R8, $0x40
JB memmove_match_emit_encodeSnappyBlockAsm8B
JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
one_byte_match_emit_encodeSnappyBlockAsm8B:
- SHLB $0x02, DI
- MOVB DI, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, R8
+ MOVB R8, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
- MOVQ (SI), R9
- MOVQ R9, (AX)
+ MOVQ (DI), R10
+ MOVQ R10, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
- MOVQ (SI), R9
- MOVQ -8(SI)(R8*1), SI
- MOVQ R9, (AX)
- MOVQ SI, -8(AX)(R8*1)
+ MOVQ (DI), R10
+ MOVQ -8(DI)(R9*1), DI
+ MOVQ R10, (CX)
+ MOVQ DI, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
- MOVOU (SI), X0
- MOVOU -16(SI)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU -16(DI)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
- MOVQ DI, AX
+ MOVQ R8, CX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B
memmove_long_match_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(R8*1), DI
+ LEAQ (CX)(R9*1), R8
// genMemMoveLong
- MOVOU (SI), X0
- MOVOU 16(SI), X1
- MOVOU -32(SI)(R8*1), X2
- MOVOU -16(SI)(R8*1), X3
- MOVQ R8, R10
- SHRQ $0x05, R10
- MOVQ AX, R9
- ANDL $0x0000001f, R9
- MOVQ $0x00000040, R11
- SUBQ R9, R11
- DECQ R10
+ MOVOU (DI), X0
+ MOVOU 16(DI), X1
+ MOVOU -32(DI)(R9*1), X2
+ MOVOU -16(DI)(R9*1), X3
+ MOVQ R9, R11
+ SHRQ $0x05, R11
+ MOVQ CX, R10
+ ANDL $0x0000001f, R10
+ MOVQ $0x00000040, R12
+ SUBQ R10, R12
+ DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(SI)(R11*1), R9
- LEAQ -32(AX)(R11*1), R12
+ LEAQ -32(DI)(R12*1), R10
+ LEAQ -32(CX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
- MOVOU (R9), X4
- MOVOU 16(R9), X5
- MOVOA X4, (R12)
- MOVOA X5, 16(R12)
+ MOVOU (R10), X4
+ MOVOU 16(R10), X5
+ MOVOA X4, (R13)
+ MOVOA X5, 16(R13)
+ ADDQ $0x20, R13
+ ADDQ $0x20, R10
ADDQ $0x20, R12
- ADDQ $0x20, R9
- ADDQ $0x20, R11
- DECQ R10
+ DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(SI)(R11*1), X4
- MOVOU -16(SI)(R11*1), X5
- MOVOA X4, -32(AX)(R11*1)
- MOVOA X5, -16(AX)(R11*1)
- ADDQ $0x20, R11
- CMPQ R8, R11
+ MOVOU -32(DI)(R12*1), X4
+ MOVOU -16(DI)(R12*1), X5
+ MOVOA X4, -32(CX)(R12*1)
+ MOVOA X5, -16(CX)(R12*1)
+ ADDQ $0x20, R12
+ CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ DI, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ R8, CX
emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
match_nolit_loop_encodeSnappyBlockAsm8B:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm8B
matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm8B
matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
JB match_nolit_end_encodeSnappyBlockAsm8B
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_encodeSnappyBlockAsm8B
matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm8B
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_encodeSnappyBlockAsm8B:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
- MOVB $0xee, (AX)
- MOVW BX, 1(AX)
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW SI, 1(CX)
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
- LEAL -15(SI), SI
- MOVB BL, 1(AX)
- SHRL $0x08, BX
- SHLL $0x05, BX
- ORL BX, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
+ LEAL -15(DI), DI
+ MOVB SI, 1(CX)
+ SHRL $0x08, SI
+ SHLL $0x05, SI
+ ORL SI, DI
+ MOVB DI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
- LEAL -2(SI), SI
- MOVB SI, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(DI), DI
+ MOVB DI, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBlockAsm8B
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm8B:
- MOVQ $0x9e3779b1, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x20, DI
- IMULQ R8, DI
- SHRQ $0x38, DI
- SHLQ $0x20, BX
- IMULQ R8, BX
- SHRQ $0x38, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x20, R8
+ IMULQ R9, R8
+ SHRQ $0x38, R8
+ SHLQ $0x20, SI
+ IMULQ R9, SI
+ SHRQ $0x38, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm8B
- INCL CX
+ INCL DX
JMP search_loop_encodeSnappyBlockAsm8B
emit_remainder_encodeSnappyBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBlockAsm8B
@@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B:
JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B
three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBlockAsm8B
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
one_byte_emit_remainder_encodeSnappyBlockAsm8B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
@@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00001200, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00001200, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBetterBlockAsm
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBetterBlockAsm:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x07, BX
- CMPL BX, $0x63
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x07, SI
+ CMPL SI, $0x63
JBE check_maxskip_ok_encodeSnappyBetterBlockAsm
- LEAL 100(CX), BX
+ LEAL 100(DX), SI
JMP check_maxskip_cont_encodeSnappyBetterBlockAsm
check_maxskip_ok_encodeSnappyBetterBlockAsm:
- LEAL 1(CX)(BX*1), BX
+ LEAL 1(DX)(SI*1), SI
check_maxskip_cont_encodeSnappyBetterBlockAsm:
- CMPL BX, 8(SP)
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 524312(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 524312(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x00cf1bbcdcbfa563, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x2f, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x32, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 524288(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 524288(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeSnappyBetterBlockAsm
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeSnappyBetterBlockAsm
no_short_found_encodeSnappyBetterBlockAsm:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBetterBlockAsm
candidateS_match_encodeSnappyBetterBlockAsm:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x2f, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm
match_extend_back_loop_encodeSnappyBetterBlockAsm:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBetterBlockAsm
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBetterBlockAsm
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm
match_extend_back_end_encodeSnappyBetterBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 5(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 5(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm
matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
JB match_nolit_end_encodeSnappyBetterBlockAsm
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm
matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeSnappyBetterBlockAsm:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- CMPL R11, $0x01
+ CMPL R12, $0x01
JA match_length_ok_encodeSnappyBetterBlockAsm
- CMPL DI, $0x0000ffff
+ CMPL R8, $0x0000ffff
JBE match_length_ok_encodeSnappyBetterBlockAsm
- MOVL 20(SP), CX
- INCL CX
+ MOVL 20(SP), DX
+ INCL DX
JMP search_loop_encodeSnappyBetterBlockAsm
match_length_ok_encodeSnappyBetterBlockAsm:
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeSnappyBetterBlockAsm
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeSnappyBetterBlockAsm
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_match_emit_encodeSnappyBetterBlockAsm
- CMPL BX, $0x01000000
+ CMPL SI, $0x01000000
JB four_bytes_match_emit_encodeSnappyBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL BX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL SI, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
four_bytes_match_emit_encodeSnappyBetterBlockAsm:
- MOVL BX, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW BX, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
+ MOVL SI, R11
+ SHRL $0x10, R11
+ MOVB $0xf8, (CX)
+ MOVW SI, 1(CX)
+ MOVB R11, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
three_bytes_match_emit_encodeSnappyBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
two_bytes_match_emit_encodeSnappyBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeSnappyBetterBlockAsm
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
one_byte_match_emit_encodeSnappyBetterBlockAsm:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
memmove_long_match_emit_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
- CMPL DI, $0x00010000
+ CMPL R8, $0x00010000
JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
- MOVB $0xff, (AX)
- MOVL DI, 1(AX)
- LEAL -64(R11), R11
- ADDQ $0x05, AX
- CMPL R11, $0x04
+ MOVB $0xff, (CX)
+ MOVL R8, 1(CX)
+ LEAL -64(R12), R12
+ ADDQ $0x05, CX
+ CMPL R12, $0x04
JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
- TESTL R11, R11
+ TESTL R12, R12
JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
- XORL BX, BX
- LEAL -1(BX)(R11*4), R11
- MOVB R11, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
+ XORL SI, SI
+ LEAL -1(SI)(R12*4), R12
+ MOVB R12, (CX)
+ MOVL R8, 1(CX)
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
- MOVQ $0x00cf1bbcdcbfa563, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x2f, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x32, R10
- SHLQ $0x08, R11
- IMULQ BX, R11
- SHRQ $0x2f, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x32, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 524312(SP)(R10*4)
- MOVL R13, 524312(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x00cf1bbcdcbfa563, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x08, R10
+ IMULQ SI, R10
+ SHRQ $0x2f, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x32, R11
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 524288(AX)(R11*4)
+ MOVL R14, 524288(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeSnappyBetterBlockAsm:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeSnappyBetterBlockAsm
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x2f, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x08, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x2f, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x08, R11
+ IMULQ SI, R11
+ SHRQ $0x2f, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeSnappyBetterBlockAsm
emit_remainder_encodeSnappyBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 5(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm
@@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm:
JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
CMPL DX, $0x01000000
JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
+ MOVB $0xfc, (CX)
+ MOVL DX, 1(CX)
+ ADDQ $0x05, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
+ MOVB $0xf8, (CX)
+ MOVW DX, 1(CX)
+ MOVB BL, 3(CX)
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBetterBlockAsm
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
@@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000900, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm64K:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBetterBlockAsm64K
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBetterBlockAsm64K:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x07, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x07, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm64K
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x30, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x32, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 262168(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 262168(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x00cf1bbcdcbfa563, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x30, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x33, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 262144(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 262144(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeSnappyBetterBlockAsm64K
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeSnappyBetterBlockAsm64K
no_short_found_encodeSnappyBetterBlockAsm64K:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBetterBlockAsm64K
candidateS_match_encodeSnappyBetterBlockAsm64K:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x08, R9
- IMULQ R8, R9
- SHRQ $0x30, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x08, R10
+ IMULQ R9, R10
+ SHRQ $0x30, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm64K:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K
match_extend_back_end_encodeSnappyBetterBlockAsm64K:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBetterBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm64K:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
JB match_nolit_end_encodeSnappyBetterBlockAsm64K
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm64K
matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm64K
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeSnappyBetterBlockAsm64K:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeSnappyBetterBlockAsm64K
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+
+emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm64K
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
- MOVQ $0x00cf1bbcdcbfa563, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x30, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x32, R10
- SHLQ $0x08, R11
- IMULQ BX, R11
- SHRQ $0x30, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x32, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 262168(SP)(R10*4)
- MOVL R13, 262168(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x00cf1bbcdcbfa563, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x08, R10
+ IMULQ SI, R10
+ SHRQ $0x30, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x33, R11
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x30, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x33, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 262144(AX)(R11*4)
+ MOVL R14, 262144(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeSnappyBetterBlockAsm64K:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeSnappyBetterBlockAsm64K
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x08, R9
- IMULQ BX, R9
- SHRQ $0x30, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x08, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x30, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x08, R11
+ IMULQ SI, R11
+ SHRQ $0x30, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeSnappyBetterBlockAsm64K
emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
@@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
MOVOU (SI), X4
@@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000280, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000280, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBetterBlockAsm12B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBetterBlockAsm12B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x06, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x06, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm12B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x34, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 65560(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 65560(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x34, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 65536(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 65536(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeSnappyBetterBlockAsm12B
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeSnappyBetterBlockAsm12B
no_short_found_encodeSnappyBetterBlockAsm12B:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBetterBlockAsm12B
candidateS_match_encodeSnappyBetterBlockAsm12B:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x32, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x32, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm12B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B
match_extend_back_end_encodeSnappyBetterBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm12B:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
JB match_nolit_end_encodeSnappyBetterBlockAsm12B
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm12B
matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm12B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeSnappyBetterBlockAsm12B:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeSnappyBetterBlockAsm12B
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm12B
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
- MOVQ $0x0000cf1bbcdcbf9b, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x32, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x34, R10
- SHLQ $0x10, R11
- IMULQ BX, R11
- SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x34, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 65560(SP)(R10*4)
- MOVL R13, 65560(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x0000cf1bbcdcbf9b, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x10, R10
+ IMULQ SI, R10
+ SHRQ $0x32, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x34, R11
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x32, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x34, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 65536(AX)(R11*4)
+ MOVL R14, 65536(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeSnappyBetterBlockAsm12B:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeSnappyBetterBlockAsm12B
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x32, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x10, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x32, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x10, R11
+ IMULQ SI, R11
+ SHRQ $0x32, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeSnappyBetterBlockAsm12B
emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
@@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
+ JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
+
+emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
@@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x000000a0, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x000000a0, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBetterBlockAsm10B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBetterBlockAsm10B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm10B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x36, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 16408(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 16408(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x34, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x36, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 16384(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 16384(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeSnappyBetterBlockAsm10B
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeSnappyBetterBlockAsm10B
no_short_found_encodeSnappyBetterBlockAsm10B:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBetterBlockAsm10B
candidateS_match_encodeSnappyBetterBlockAsm10B:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x34, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x34, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm10B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B
match_extend_back_end_encodeSnappyBetterBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm10B:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
JB match_nolit_end_encodeSnappyBetterBlockAsm10B
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm10B
matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm10B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeSnappyBetterBlockAsm10B:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeSnappyBetterBlockAsm10B
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
- CMPL DI, $0x00000800
+ CMPL R8, $0x00000800
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm10B
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
- MOVQ $0x0000cf1bbcdcbf9b, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x34, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x36, R10
- SHLQ $0x10, R11
- IMULQ BX, R11
- SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x36, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 16408(SP)(R10*4)
- MOVL R13, 16408(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x0000cf1bbcdcbf9b, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x10, R10
+ IMULQ SI, R10
+ SHRQ $0x34, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x36, R11
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x34, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x36, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 16384(AX)(R11*4)
+ MOVL R14, 16384(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeSnappyBetterBlockAsm10B:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeSnappyBetterBlockAsm10B
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x34, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x10, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x34, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x10, R11
+ IMULQ SI, R11
+ SHRQ $0x34, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeSnappyBetterBlockAsm10B
emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
@@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
@@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000028, CX
- LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64
+ MOVQ tmp+48(FP), AX
+ MOVQ dst_base+0(FP), CX
+ MOVQ $0x00000028, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_encodeSnappyBetterBlockAsm8B
MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
+ MOVQ src_len+32(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
+ MOVQ src_base+24(FP), BX
search_loop_encodeSnappyBetterBlockAsm8B:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x04, BX
- LEAL 1(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x04, SI
+ LEAL 1(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm8B
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ $0x9e3779b1, BX
- MOVQ SI, R9
- MOVQ SI, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
- SHLQ $0x20, R10
- IMULQ BX, R10
- SHRQ $0x38, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 4120(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- MOVL CX, 4120(SP)(R10*4)
- MOVQ (DX)(BX*1), R9
- MOVQ (DX)(DI*1), R10
- CMPQ R9, SI
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ $0x9e3779b1, SI
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x36, R10
+ SHLQ $0x20, R11
+ IMULQ SI, R11
+ SHRQ $0x38, R11
+ MOVL (AX)(R10*4), SI
+ MOVL 4096(AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ MOVL DX, 4096(AX)(R11*4)
+ MOVQ (BX)(SI*1), R10
+ MOVQ (BX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- CMPQ R10, SI
+ CMPQ R11, DI
JNE no_short_found_encodeSnappyBetterBlockAsm8B
- MOVL DI, BX
+ MOVL R8, SI
JMP candidate_match_encodeSnappyBetterBlockAsm8B
no_short_found_encodeSnappyBetterBlockAsm8B:
- CMPL R9, SI
+ CMPL R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- CMPL R10, SI
+ CMPL R11, DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_encodeSnappyBetterBlockAsm8B
candidateS_match_encodeSnappyBetterBlockAsm8B:
- SHRQ $0x08, SI
- MOVQ SI, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x36, R9
- MOVL 24(SP)(R9*4), BX
- INCL CX
- MOVL CX, 24(SP)(R9*4)
- CMPL (DX)(BX*1), SI
+ SHRQ $0x08, DI
+ MOVQ DI, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x36, R10
+ MOVL (AX)(R10*4), SI
+ INCL DX
+ MOVL DX, (AX)(R10*4)
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- DECL CX
- MOVL DI, BX
+ DECL DX
+ MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm8B:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B
match_extend_back_end_encodeSnappyBetterBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_encodeSnappyBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm8B:
- MOVL CX, SI
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), R9
+ MOVL DX, DI
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+32(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), R10
// matchLen
- XORL R11, R11
+ XORL R12, R12
matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVQ (R8)(R11*1), R10
- MOVQ 8(R8)(R11*1), R12
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ MOVQ 8(R9)(R12*1), R13
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
- XORQ 8(R9)(R11*1), R12
+ XORQ 8(R10)(R12*1), R13
JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
- LEAL -16(DI), DI
- LEAL 16(R11), R11
+ LEAL -16(R8), R8
+ LEAL 16(R12), R12
JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R12, R12
+ TZCNTQ R13, R13
#else
- BSFQ R12, R12
+ BSFQ R13, R13
#endif
- SARQ $0x03, R12
- LEAL 8(R11)(R12*1), R11
+ SARQ $0x03, R13
+ LEAL 8(R12)(R13*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
+ MOVQ (R9)(R12*1), R11
+ XORQ (R10)(R12*1), R11
JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
- LEAL -8(DI), DI
- LEAL 8(R11), R11
+ LEAL -8(R8), R8
+ LEAL 8(R12), R12
JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
+ SARQ $0x03, R11
+ LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVL (R8)(R11*1), R10
- CMPL (R9)(R11*1), R10
- JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
- LEAL -4(DI), DI
- LEAL 4(R11), R11
+ MOVL (R9)(R12*1), R11
+ CMPL (R10)(R12*1), R11
+ JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
+ LEAL -4(R8), R8
+ LEAL 4(R12), R12
matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
JB match_nolit_end_encodeSnappyBetterBlockAsm8B
- MOVW (R8)(R11*1), R10
- CMPW (R9)(R11*1), R10
+ MOVW (R9)(R12*1), R11
+ CMPW (R10)(R12*1), R11
JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
- LEAL 2(R11), R11
- SUBL $0x02, DI
+ LEAL 2(R12), R12
+ SUBL $0x02, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm8B
matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
- MOVB (R8)(R11*1), R10
- CMPB (R9)(R11*1), R10
+ MOVB (R9)(R12*1), R11
+ CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm8B
- LEAL 1(R11), R11
+ LEAL 1(R12), R12
match_nolit_end_encodeSnappyBetterBlockAsm8B:
- MOVL CX, DI
- SUBL BX, DI
+ MOVL DX, R8
+ SUBL SI, R8
// Check if repeat
- MOVL DI, 16(SP)
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL R8, 16(SP)
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R9
- SUBL BX, R8
- LEAL -1(R8), BX
- CMPL BX, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R10
+ SUBL SI, R9
+ LEAL -1(R9), SI
+ CMPL SI, $0x3c
JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW BX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW SI, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB BL, 1(AX)
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ MOVB $0xf0, (CX)
+ MOVB SI, 1(CX)
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_match_emit_encodeSnappyBetterBlockAsm8B
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
- SHLB $0x02, BL
- MOVB BL, (AX)
- ADDQ $0x01, AX
+ SHLB $0x02, SI
+ MOVB SI, (CX)
+ ADDQ $0x01, CX
memmove_match_emit_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveShort
- CMPQ R8, $0x08
+ CMPQ R9, $0x08
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
- CMPQ R8, $0x10
+ CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
+ CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
+ MOVQ (R10), R11
+ MOVQ R11, (CX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
+ MOVQ (R10), R11
+ MOVQ -8(R10)(R9*1), R10
+ MOVQ R11, (CX)
+ MOVQ R10, -8(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU -16(R10)(R9*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
- MOVQ BX, AX
+ MOVQ SI, CX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(R8*1), BX
+ LEAQ (CX)(R9*1), SI
// genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R12
- SHRQ $0x05, R12
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R12
+ MOVOU (R10), X0
+ MOVOU 16(R10), X1
+ MOVOU -32(R10)(R9*1), X2
+ MOVOU -16(R10)(R9*1), X3
+ MOVQ R9, R13
+ SHRQ $0x05, R13
+ MOVQ CX, R11
+ ANDL $0x0000001f, R11
+ MOVQ $0x00000040, R14
+ SUBQ R11, R14
+ DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
+ LEAQ -32(R10)(R14*1), R11
+ LEAQ -32(CX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
+ MOVOU (R11), X4
+ MOVOU 16(R11), X5
+ MOVOA X4, (R15)
+ MOVOA X5, 16(R15)
+ ADDQ $0x20, R15
+ ADDQ $0x20, R11
ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R12
+ DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
+ MOVOU -32(R10)(R14*1), X4
+ MOVOU -16(R10)(R14*1), X5
+ MOVOA X4, -32(CX)(R14*1)
+ MOVOA X5, -16(CX)(R14*1)
+ ADDQ $0x20, R14
+ CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ BX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(R9*1)
+ MOVOU X3, -16(CX)(R9*1)
+ MOVQ SI, CX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
- ADDL R11, CX
- ADDL $0x04, R11
- MOVL CX, 12(SP)
+ ADDL R12, DX
+ ADDL $0x04, R12
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL R11, $0x40
+ CMPL R12, $0x40
JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(R11), R11
- ADDQ $0x03, AX
+ MOVB $0xee, (CX)
+ MOVW R8, 1(CX)
+ LEAL -60(R12), R12
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
- MOVL R11, BX
- SHLL $0x02, BX
- CMPL R11, $0x0c
+ MOVL R12, SI
+ SHLL $0x02, SI
+ CMPL R12, $0x0c
JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
- LEAL -15(BX), BX
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, BX
- MOVB BL, (AX)
- ADDQ $0x02, AX
+ LEAL -15(SI), SI
+ MOVB R8, 1(CX)
+ SHRL $0x08, R8
+ SHLL $0x05, R8
+ ORL R8, SI
+ MOVB SI, (CX)
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
- LEAL -2(BX), BX
- MOVB BL, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
+ LEAL -2(SI), SI
+ MOVB SI, (CX)
+ MOVW R8, 1(CX)
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_encodeSnappyBetterBlockAsm8B
- CMPQ AX, (SP)
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
- MOVQ $0x0000cf1bbcdcbf9b, BX
- MOVQ $0x9e3779b1, DI
- LEAQ 1(SI), SI
- LEAQ -2(CX), R8
- MOVQ (DX)(SI*1), R9
- MOVQ 1(DX)(SI*1), R10
- MOVQ (DX)(R8*1), R11
- MOVQ 1(DX)(R8*1), R12
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x36, R9
- SHLQ $0x20, R10
- IMULQ DI, R10
- SHRQ $0x38, R10
- SHLQ $0x10, R11
- IMULQ BX, R11
- SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ DI, R12
- SHRQ $0x38, R12
- LEAQ 1(SI), DI
- LEAQ 1(R8), R13
- MOVL SI, 24(SP)(R9*4)
- MOVL R8, 24(SP)(R11*4)
- MOVL DI, 4120(SP)(R10*4)
- MOVL R13, 4120(SP)(R12*4)
- LEAQ 1(R8)(SI*1), DI
- SHRQ $0x01, DI
- ADDQ $0x01, SI
- SUBQ $0x01, R8
+ MOVQ $0x0000cf1bbcdcbf9b, SI
+ MOVQ $0x9e3779b1, R8
+ LEAQ 1(DI), DI
+ LEAQ -2(DX), R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ 1(BX)(DI*1), R11
+ MOVQ (BX)(R9*1), R12
+ MOVQ 1(BX)(R9*1), R13
+ SHLQ $0x10, R10
+ IMULQ SI, R10
+ SHRQ $0x36, R10
+ SHLQ $0x20, R11
+ IMULQ R8, R11
+ SHRQ $0x38, R11
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x36, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x38, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
+ MOVL DI, (AX)(R10*4)
+ MOVL R9, (AX)(R12*4)
+ MOVL R8, 4096(AX)(R11*4)
+ MOVL R14, 4096(AX)(R13*4)
+ LEAQ 1(R9)(DI*1), R8
+ SHRQ $0x01, R8
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
index_loop_encodeSnappyBetterBlockAsm8B:
- CMPQ DI, R8
+ CMPQ R8, R9
JAE search_loop_encodeSnappyBetterBlockAsm8B
- MOVQ (DX)(SI*1), R9
- MOVQ (DX)(DI*1), R10
- SHLQ $0x10, R9
- IMULQ BX, R9
- SHRQ $0x36, R9
+ MOVQ (BX)(DI*1), R10
+ MOVQ (BX)(R8*1), R11
SHLQ $0x10, R10
- IMULQ BX, R10
+ IMULQ SI, R10
SHRQ $0x36, R10
- MOVL SI, 24(SP)(R9*4)
- MOVL DI, 24(SP)(R10*4)
- ADDQ $0x02, SI
+ SHLQ $0x10, R11
+ IMULQ SI, R11
+ SHRQ $0x36, R11
+ MOVL DI, (AX)(R10*4)
+ MOVL R8, (AX)(R11*4)
ADDQ $0x02, DI
+ ADDQ $0x02, R8
JMP index_loop_encodeSnappyBetterBlockAsm8B
emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+32(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
+ MOVQ $0x00000000, ret+56(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+32(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
@@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
+ MOVB $0xf4, (CX)
+ MOVW DX, 1(CX)
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
+ MOVB $0xf0, (CX)
+ MOVB DL, 1(CX)
+ ADDQ $0x02, CX
CMPL DX, $0x40
JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
+ MOVB DL, (CX)
+ ADDQ $0x01, CX
memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
@@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(BX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(BX*1)
+ MOVB (AX), SI
+ MOVB -1(AX)(BX*1), AL
+ MOVB SI, (CX)
+ MOVB AL, -1(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
+ MOVW (AX), SI
+ MOVB 2(AX), AL
+ MOVW SI, (CX)
+ MOVB AL, 2(CX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
+ MOVL (AX), SI
+ MOVL -4(AX)(BX*1), AX
+ MOVL SI, (CX)
+ MOVL AX, -4(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
+ MOVQ (AX), SI
+ MOVQ -8(AX)(BX*1), AX
+ MOVQ SI, (CX)
+ MOVQ AX, -8(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU -16(AX)(BX*1), X1
+ MOVOU X0, (CX)
+ MOVOU X1, -16(CX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVQ DX, AX
+ MOVQ DX, CX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
+ LEAQ (CX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU -32(AX)(BX*1), X2
+ MOVOU -16(AX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
- MOVQ AX, SI
+ MOVQ CX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
+ LEAQ -32(AX)(R8*1), SI
+ LEAQ -32(CX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
@@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
+ MOVOU -32(AX)(R8*1), X4
+ MOVOU -16(AX)(R8*1), X5
+ MOVOA X4, -32(CX)(R8*1)
+ MOVOA X5, -16(CX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
+ MOVOU X0, (CX)
+ MOVOU X1, 16(CX)
+ MOVOU X2, -32(CX)(BX*1)
+ MOVOU X3, -16(CX)(BX*1)
+ MOVQ DX, CX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
+ MOVQ dst_base+0(FP), AX
+ SUBQ AX, CX
+ MOVQ CX, ret+56(FP)
RET
-// func calcBlockSize(src []byte) int
+// func calcBlockSize(src []byte, tmp *[32768]byte) int
// Requires: BMI, SSE2
-TEXT ·calcBlockSize(SB), $32792-32
- XORQ AX, AX
- MOVQ $0x00000100, CX
- LEAQ 24(SP), DX
+TEXT ·calcBlockSize(SB), $24-40
+ MOVQ tmp+24(FP), AX
+ XORQ CX, CX
+ MOVQ $0x00000100, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_calcBlockSize:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_calcBlockSize
MOVL $0x00000000, 12(SP)
- MOVQ src_len+8(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+0(FP), DX
+ MOVQ src_len+8(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+0(FP), BX
search_loop_calcBlockSize:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x05, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x05, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_calcBlockSize
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x33, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x10, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x33, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x10, R9
- IMULQ R8, R9
- SHRQ $0x33, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x10, R11
+ IMULQ R9, R11
+ SHRQ $0x33, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x10, R10
+ IMULQ R9, R10
+ SHRQ $0x33, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_calcBlockSize
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_calcBlockSize
repeat_extend_back_loop_calcBlockSize:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_calcBlockSize
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_calcBlockSize
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_calcBlockSize
repeat_extend_back_end_calcBlockSize:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 5(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 5(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_calcBlockSize
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
repeat_dst_size_check_calcBlockSize:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_calcBlockSize
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_calcBlockSize
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_calcBlockSize
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB three_bytes_repeat_emit_calcBlockSize
- CMPL BX, $0x01000000
+ CMPL SI, $0x01000000
JB four_bytes_repeat_emit_calcBlockSize
- ADDQ $0x05, AX
+ ADDQ $0x05, CX
JMP memmove_long_repeat_emit_calcBlockSize
four_bytes_repeat_emit_calcBlockSize:
- ADDQ $0x04, AX
+ ADDQ $0x04, CX
JMP memmove_long_repeat_emit_calcBlockSize
three_bytes_repeat_emit_calcBlockSize:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_calcBlockSize
two_bytes_repeat_emit_calcBlockSize:
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_calcBlockSize
JMP memmove_long_repeat_emit_calcBlockSize
one_byte_repeat_emit_calcBlockSize:
- ADDQ $0x01, AX
+ ADDQ $0x01, CX
memmove_repeat_emit_calcBlockSize:
- LEAQ (AX)(DI*1), AX
+ LEAQ (CX)(R8*1), CX
JMP emit_literal_done_repeat_emit_calcBlockSize
memmove_long_repeat_emit_calcBlockSize:
- LEAQ (AX)(DI*1), AX
+ LEAQ (CX)(R8*1), CX
emit_literal_done_repeat_emit_calcBlockSize:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+8(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+8(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_calcBlockSize:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_calcBlockSize
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_calcBlockSize
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_calcBlockSize
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_calcBlockSize
matchlen_bsf_16repeat_extend_calcBlockSize:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_calcBlockSize
matchlen_match8_repeat_extend_calcBlockSize:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_calcBlockSize
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_calcBlockSize
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_calcBlockSize
matchlen_bsf_8_repeat_extend_calcBlockSize:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_calcBlockSize
matchlen_match4_repeat_extend_calcBlockSize:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_calcBlockSize
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_calcBlockSize
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_calcBlockSize:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_calcBlockSize
JB repeat_extend_forward_end_calcBlockSize
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_calcBlockSize
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_calcBlockSize
matchlen_match1_repeat_extend_calcBlockSize:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_calcBlockSize
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_calcBlockSize:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
- CMPL SI, $0x00010000
+ CMPL DI, $0x00010000
JB two_byte_offset_repeat_as_copy_calcBlockSize
four_bytes_loop_back_repeat_as_copy_calcBlockSize:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE four_bytes_remain_repeat_as_copy_calcBlockSize
- LEAL -64(BX), BX
- ADDQ $0x05, AX
- CMPL BX, $0x04
+ LEAL -64(SI), SI
+ ADDQ $0x05, CX
+ CMPL SI, $0x04
JB four_bytes_remain_repeat_as_copy_calcBlockSize
JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize
four_bytes_remain_repeat_as_copy_calcBlockSize:
- TESTL BX, BX
+ TESTL SI, SI
JZ repeat_end_emit_calcBlockSize
- XORL BX, BX
- ADDQ $0x05, AX
+ XORL SI, SI
+ ADDQ $0x05, CX
JMP repeat_end_emit_calcBlockSize
two_byte_offset_repeat_as_copy_calcBlockSize:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_calcBlockSize
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_calcBlockSize
two_byte_offset_short_repeat_as_copy_calcBlockSize:
- MOVL BX, DI
- SHLL $0x02, DI
- CMPL BX, $0x0c
+ MOVL SI, R8
+ SHLL $0x02, R8
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_calcBlockSize
- CMPL SI, $0x00000800
+ CMPL DI, $0x00000800
JAE emit_copy_three_repeat_as_copy_calcBlockSize
- ADDQ $0x02, AX
+ ADDQ $0x02, CX
JMP repeat_end_emit_calcBlockSize
emit_copy_three_repeat_as_copy_calcBlockSize:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
repeat_end_emit_calcBlockSize:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_calcBlockSize
no_repeat_found_calcBlockSize:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_calcBlockSize
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_calcBlockSize
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_calcBlockSize
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_calcBlockSize
candidate3_match_calcBlockSize:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_calcBlockSize
candidate2_match_calcBlockSize:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_calcBlockSize:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_calcBlockSize
match_extend_back_loop_calcBlockSize:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_calcBlockSize
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_calcBlockSize
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_calcBlockSize
JMP match_extend_back_loop_calcBlockSize
match_extend_back_end_calcBlockSize:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 5(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 5(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_calcBlockSize
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
match_dst_size_check_calcBlockSize:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_calcBlockSize
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), DI
+ CMPL DI, $0x3c
JB one_byte_match_emit_calcBlockSize
- CMPL SI, $0x00000100
+ CMPL DI, $0x00000100
JB two_bytes_match_emit_calcBlockSize
- CMPL SI, $0x00010000
+ CMPL DI, $0x00010000
JB three_bytes_match_emit_calcBlockSize
- CMPL SI, $0x01000000
+ CMPL DI, $0x01000000
JB four_bytes_match_emit_calcBlockSize
- ADDQ $0x05, AX
+ ADDQ $0x05, CX
JMP memmove_long_match_emit_calcBlockSize
four_bytes_match_emit_calcBlockSize:
- ADDQ $0x04, AX
+ ADDQ $0x04, CX
JMP memmove_long_match_emit_calcBlockSize
three_bytes_match_emit_calcBlockSize:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_calcBlockSize
two_bytes_match_emit_calcBlockSize:
- ADDQ $0x02, AX
- CMPL SI, $0x40
+ ADDQ $0x02, CX
+ CMPL DI, $0x40
JB memmove_match_emit_calcBlockSize
JMP memmove_long_match_emit_calcBlockSize
one_byte_match_emit_calcBlockSize:
- ADDQ $0x01, AX
+ ADDQ $0x01, CX
memmove_match_emit_calcBlockSize:
- LEAQ (AX)(R8*1), AX
+ LEAQ (CX)(R9*1), CX
JMP emit_literal_done_match_emit_calcBlockSize
memmove_long_match_emit_calcBlockSize:
- LEAQ (AX)(R8*1), AX
+ LEAQ (CX)(R9*1), CX
emit_literal_done_match_emit_calcBlockSize:
match_nolit_loop_calcBlockSize:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+8(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+8(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_calcBlockSize:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_calcBlockSize
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_calcBlockSize
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_calcBlockSize
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_calcBlockSize
matchlen_bsf_16match_nolit_calcBlockSize:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_calcBlockSize
matchlen_match8_match_nolit_calcBlockSize:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_calcBlockSize
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_calcBlockSize
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_calcBlockSize
matchlen_bsf_8_match_nolit_calcBlockSize:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_calcBlockSize
matchlen_match4_match_nolit_calcBlockSize:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_calcBlockSize
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_calcBlockSize
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_calcBlockSize:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_calcBlockSize
JB match_nolit_end_calcBlockSize
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_calcBlockSize
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_calcBlockSize
matchlen_match1_match_nolit_calcBlockSize:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_calcBlockSize
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_calcBlockSize:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
- CMPL BX, $0x00010000
+ CMPL SI, $0x00010000
JB two_byte_offset_match_nolit_calcBlockSize
four_bytes_loop_back_match_nolit_calcBlockSize:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE four_bytes_remain_match_nolit_calcBlockSize
- LEAL -64(R9), R9
- ADDQ $0x05, AX
- CMPL R9, $0x04
+ LEAL -64(R10), R10
+ ADDQ $0x05, CX
+ CMPL R10, $0x04
JB four_bytes_remain_match_nolit_calcBlockSize
JMP four_bytes_loop_back_match_nolit_calcBlockSize
four_bytes_remain_match_nolit_calcBlockSize:
- TESTL R9, R9
+ TESTL R10, R10
JZ match_nolit_emitcopy_end_calcBlockSize
- XORL BX, BX
- ADDQ $0x05, AX
+ XORL SI, SI
+ ADDQ $0x05, CX
JMP match_nolit_emitcopy_end_calcBlockSize
two_byte_offset_match_nolit_calcBlockSize:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_calcBlockSize
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_calcBlockSize
two_byte_offset_short_match_nolit_calcBlockSize:
- MOVL R9, SI
- SHLL $0x02, SI
- CMPL R9, $0x0c
+ MOVL R10, DI
+ SHLL $0x02, DI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_calcBlockSize
- CMPL BX, $0x00000800
+ CMPL SI, $0x00000800
JAE emit_copy_three_match_nolit_calcBlockSize
- ADDQ $0x02, AX
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_calcBlockSize
emit_copy_three_match_nolit_calcBlockSize:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_calcBlockSize:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_calcBlockSize
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_calcBlockSize
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
match_nolit_dst_ok_calcBlockSize:
- MOVQ $0x0000cf1bbcdcbf9b, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x10, DI
- IMULQ R8, DI
- SHRQ $0x33, DI
- SHLQ $0x10, BX
- IMULQ R8, BX
- SHRQ $0x33, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x0000cf1bbcdcbf9b, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x10, R8
+ IMULQ R9, R8
+ SHRQ $0x33, R8
+ SHLQ $0x10, SI
+ IMULQ R9, SI
+ SHRQ $0x33, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_calcBlockSize
- INCL CX
+ INCL DX
JMP search_loop_calcBlockSize
emit_remainder_calcBlockSize:
- MOVQ src_len+8(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+8(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 5(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_calcBlockSize
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
emit_remainder_ok_calcBlockSize:
- MOVQ src_len+8(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+8(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_calcBlockSize
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), CX
- CMPL CX, $0x3c
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
+ LEAL -1(SI), AX
+ CMPL AX, $0x3c
JB one_byte_emit_remainder_calcBlockSize
- CMPL CX, $0x00000100
+ CMPL AX, $0x00000100
JB two_bytes_emit_remainder_calcBlockSize
- CMPL CX, $0x00010000
+ CMPL AX, $0x00010000
JB three_bytes_emit_remainder_calcBlockSize
- CMPL CX, $0x01000000
+ CMPL AX, $0x01000000
JB four_bytes_emit_remainder_calcBlockSize
- ADDQ $0x05, AX
+ ADDQ $0x05, CX
JMP memmove_long_emit_remainder_calcBlockSize
four_bytes_emit_remainder_calcBlockSize:
- ADDQ $0x04, AX
+ ADDQ $0x04, CX
JMP memmove_long_emit_remainder_calcBlockSize
three_bytes_emit_remainder_calcBlockSize:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_calcBlockSize
two_bytes_emit_remainder_calcBlockSize:
- ADDQ $0x02, AX
- CMPL CX, $0x40
+ ADDQ $0x02, CX
+ CMPL AX, $0x40
JB memmove_emit_remainder_calcBlockSize
JMP memmove_long_emit_remainder_calcBlockSize
one_byte_emit_remainder_calcBlockSize:
- ADDQ $0x01, AX
+ ADDQ $0x01, CX
memmove_emit_remainder_calcBlockSize:
- LEAQ (AX)(SI*1), AX
+ LEAQ (CX)(SI*1), AX
+ MOVQ AX, CX
JMP emit_literal_done_emit_remainder_calcBlockSize
memmove_long_emit_remainder_calcBlockSize:
- LEAQ (AX)(SI*1), AX
+ LEAQ (CX)(SI*1), AX
+ MOVQ AX, CX
emit_literal_done_emit_remainder_calcBlockSize:
- MOVQ AX, ret+24(FP)
+ MOVQ CX, ret+32(FP)
RET
-// func calcBlockSizeSmall(src []byte) int
+// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
// Requires: BMI, SSE2
-TEXT ·calcBlockSizeSmall(SB), $2072-32
- XORQ AX, AX
- MOVQ $0x00000010, CX
- LEAQ 24(SP), DX
+TEXT ·calcBlockSizeSmall(SB), $24-40
+ MOVQ tmp+24(FP), AX
+ XORQ CX, CX
+ MOVQ $0x00000010, DX
+ MOVQ AX, BX
PXOR X0, X0
zero_loop_calcBlockSizeSmall:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
+ MOVOU X0, (BX)
+ MOVOU X0, 16(BX)
+ MOVOU X0, 32(BX)
+ MOVOU X0, 48(BX)
+ MOVOU X0, 64(BX)
+ MOVOU X0, 80(BX)
+ MOVOU X0, 96(BX)
+ MOVOU X0, 112(BX)
+ ADDQ $0x80, BX
+ DECQ DX
JNZ zero_loop_calcBlockSizeSmall
MOVL $0x00000000, 12(SP)
- MOVQ src_len+8(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), BX
- MOVL BX, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+0(FP), DX
+ MOVQ src_len+8(FP), DX
+ LEAQ -9(DX), BX
+ LEAQ -8(DX), SI
+ MOVL SI, 8(SP)
+ SHRQ $0x05, DX
+ SUBL DX, BX
+ LEAQ (CX)(BX*1), BX
+ MOVQ BX, (SP)
+ MOVL $0x00000001, DX
+ MOVL DX, 16(SP)
+ MOVQ src_base+0(FP), BX
search_loop_calcBlockSizeSmall:
- MOVL CX, BX
- SUBL 12(SP), BX
- SHRL $0x04, BX
- LEAL 4(CX)(BX*1), BX
- CMPL BX, 8(SP)
+ MOVL DX, SI
+ SUBL 12(SP), SI
+ SHRL $0x04, SI
+ LEAL 4(DX)(SI*1), SI
+ CMPL SI, 8(SP)
JAE emit_remainder_calcBlockSizeSmall
- MOVQ (DX)(CX*1), SI
- MOVL BX, 20(SP)
- MOVQ $0x9e3779b1, R8
- MOVQ SI, R9
- MOVQ SI, R10
- SHRQ $0x08, R10
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x37, R9
+ MOVQ (BX)(DX*1), DI
+ MOVL SI, 20(SP)
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R10
+ MOVQ DI, R11
+ SHRQ $0x08, R11
SHLQ $0x20, R10
- IMULQ R8, R10
+ IMULQ R9, R10
SHRQ $0x37, R10
- MOVL 24(SP)(R9*4), BX
- MOVL 24(SP)(R10*4), DI
- MOVL CX, 24(SP)(R9*4)
- LEAL 1(CX), R9
- MOVL R9, 24(SP)(R10*4)
- MOVQ SI, R9
- SHRQ $0x10, R9
- SHLQ $0x20, R9
- IMULQ R8, R9
- SHRQ $0x37, R9
- MOVL CX, R8
- SUBL 16(SP), R8
- MOVL 1(DX)(R8*1), R10
- MOVQ SI, R8
- SHRQ $0x08, R8
- CMPL R8, R10
+ SHLQ $0x20, R11
+ IMULQ R9, R11
+ SHRQ $0x37, R11
+ MOVL (AX)(R10*4), SI
+ MOVL (AX)(R11*4), R8
+ MOVL DX, (AX)(R10*4)
+ LEAL 1(DX), R10
+ MOVL R10, (AX)(R11*4)
+ MOVQ DI, R10
+ SHRQ $0x10, R10
+ SHLQ $0x20, R10
+ IMULQ R9, R10
+ SHRQ $0x37, R10
+ MOVL DX, R9
+ SUBL 16(SP), R9
+ MOVL 1(BX)(R9*1), R11
+ MOVQ DI, R9
+ SHRQ $0x08, R9
+ CMPL R9, R11
JNE no_repeat_found_calcBlockSizeSmall
- LEAL 1(CX), SI
- MOVL 12(SP), BX
- MOVL SI, DI
- SUBL 16(SP), DI
+ LEAL 1(DX), DI
+ MOVL 12(SP), SI
+ MOVL DI, R8
+ SUBL 16(SP), R8
JZ repeat_extend_back_end_calcBlockSizeSmall
repeat_extend_back_loop_calcBlockSizeSmall:
- CMPL SI, BX
+ CMPL DI, SI
JBE repeat_extend_back_end_calcBlockSizeSmall
- MOVB -1(DX)(DI*1), R8
- MOVB -1(DX)(SI*1), R9
- CMPB R8, R9
+ MOVB -1(BX)(R8*1), R9
+ MOVB -1(BX)(DI*1), R10
+ CMPB R9, R10
JNE repeat_extend_back_end_calcBlockSizeSmall
- LEAL -1(SI), SI
- DECL DI
+ LEAL -1(DI), DI
+ DECL R8
JNZ repeat_extend_back_loop_calcBlockSizeSmall
repeat_extend_back_end_calcBlockSizeSmall:
- MOVL SI, BX
- SUBL 12(SP), BX
- LEAQ 3(AX)(BX*1), BX
- CMPQ BX, (SP)
+ MOVL DI, SI
+ SUBL 12(SP), SI
+ LEAQ 3(CX)(SI*1), SI
+ CMPQ SI, (SP)
JB repeat_dst_size_check_calcBlockSizeSmall
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
repeat_dst_size_check_calcBlockSizeSmall:
- MOVL 12(SP), BX
- CMPL BX, SI
+ MOVL 12(SP), SI
+ CMPL SI, DI
JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall
- MOVL SI, DI
- MOVL SI, 12(SP)
- LEAQ (DX)(BX*1), R8
- SUBL BX, DI
- LEAL -1(DI), BX
- CMPL BX, $0x3c
+ MOVL DI, R8
+ MOVL DI, 12(SP)
+ LEAQ (BX)(SI*1), R9
+ SUBL SI, R8
+ LEAL -1(R8), SI
+ CMPL SI, $0x3c
JB one_byte_repeat_emit_calcBlockSizeSmall
- CMPL BX, $0x00000100
+ CMPL SI, $0x00000100
JB two_bytes_repeat_emit_calcBlockSizeSmall
JB three_bytes_repeat_emit_calcBlockSizeSmall
three_bytes_repeat_emit_calcBlockSizeSmall:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
JMP memmove_long_repeat_emit_calcBlockSizeSmall
two_bytes_repeat_emit_calcBlockSizeSmall:
- ADDQ $0x02, AX
- CMPL BX, $0x40
+ ADDQ $0x02, CX
+ CMPL SI, $0x40
JB memmove_repeat_emit_calcBlockSizeSmall
JMP memmove_long_repeat_emit_calcBlockSizeSmall
one_byte_repeat_emit_calcBlockSizeSmall:
- ADDQ $0x01, AX
+ ADDQ $0x01, CX
memmove_repeat_emit_calcBlockSizeSmall:
- LEAQ (AX)(DI*1), AX
+ LEAQ (CX)(R8*1), CX
JMP emit_literal_done_repeat_emit_calcBlockSizeSmall
memmove_long_repeat_emit_calcBlockSizeSmall:
- LEAQ (AX)(DI*1), AX
+ LEAQ (CX)(R8*1), CX
emit_literal_done_repeat_emit_calcBlockSizeSmall:
- ADDL $0x05, CX
- MOVL CX, BX
- SUBL 16(SP), BX
- MOVQ src_len+8(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(BX*1), BX
+ ADDL $0x05, DX
+ MOVL DX, SI
+ SUBL 16(SP), SI
+ MOVQ src_len+8(FP), R8
+ SUBL DX, R8
+ LEAQ (BX)(DX*1), R9
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R10, R10
+ XORL R11, R11
matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
- CMPL DI, $0x10
+ CMPL R8, $0x10
JB matchlen_match8_repeat_extend_calcBlockSizeSmall
- MOVQ (R8)(R10*1), R9
- MOVQ 8(R8)(R10*1), R11
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
- XORQ 8(BX)(R10*1), R11
+ XORQ 8(SI)(R11*1), R12
JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall
- LEAL -16(DI), DI
- LEAL 16(R10), R10
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
#ifdef GOAMD64_v3
- TZCNTQ R11, R11
+ TZCNTQ R12, R12
#else
- BSFQ R11, R11
+ BSFQ R12, R12
#endif
- SARQ $0x03, R11
- LEAL 8(R10)(R11*1), R10
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
JMP repeat_extend_forward_end_calcBlockSizeSmall
matchlen_match8_repeat_extend_calcBlockSizeSmall:
- CMPL DI, $0x08
+ CMPL R8, $0x08
JB matchlen_match4_repeat_extend_calcBlockSizeSmall
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
+ MOVQ (R9)(R11*1), R10
+ XORQ (SI)(R11*1), R10
JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
- LEAL -8(DI), DI
- LEAL 8(R10), R10
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
JMP matchlen_match4_repeat_extend_calcBlockSizeSmall
matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
#ifdef GOAMD64_v3
- TZCNTQ R9, R9
+ TZCNTQ R10, R10
#else
- BSFQ R9, R9
+ BSFQ R10, R10
#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
+ SARQ $0x03, R10
+ LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_calcBlockSizeSmall
matchlen_match4_repeat_extend_calcBlockSizeSmall:
- CMPL DI, $0x04
+ CMPL R8, $0x04
JB matchlen_match2_repeat_extend_calcBlockSizeSmall
- MOVL (R8)(R10*1), R9
- CMPL (BX)(R10*1), R9
+ MOVL (R9)(R11*1), R10
+ CMPL (SI)(R11*1), R10
JNE matchlen_match2_repeat_extend_calcBlockSizeSmall
- LEAL -4(DI), DI
- LEAL 4(R10), R10
+ LEAL -4(R8), R8
+ LEAL 4(R11), R11
matchlen_match2_repeat_extend_calcBlockSizeSmall:
- CMPL DI, $0x01
+ CMPL R8, $0x01
JE matchlen_match1_repeat_extend_calcBlockSizeSmall
JB repeat_extend_forward_end_calcBlockSizeSmall
- MOVW (R8)(R10*1), R9
- CMPW (BX)(R10*1), R9
+ MOVW (R9)(R11*1), R10
+ CMPW (SI)(R11*1), R10
JNE matchlen_match1_repeat_extend_calcBlockSizeSmall
- LEAL 2(R10), R10
- SUBL $0x02, DI
+ LEAL 2(R11), R11
+ SUBL $0x02, R8
JZ repeat_extend_forward_end_calcBlockSizeSmall
matchlen_match1_repeat_extend_calcBlockSizeSmall:
- MOVB (R8)(R10*1), R9
- CMPB (BX)(R10*1), R9
+ MOVB (R9)(R11*1), R10
+ CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_calcBlockSizeSmall
- LEAL 1(R10), R10
+ LEAL 1(R11), R11
repeat_extend_forward_end_calcBlockSizeSmall:
- ADDL R10, CX
- MOVL CX, BX
- SUBL SI, BX
- MOVL 16(SP), SI
+ ADDL R11, DX
+ MOVL DX, SI
+ SUBL DI, SI
+ MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
- CMPL BX, $0x40
+ CMPL SI, $0x40
JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
- LEAL -60(BX), BX
- ADDQ $0x03, AX
+ LEAL -60(SI), SI
+ ADDQ $0x03, CX
JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall
two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
- MOVL BX, SI
- SHLL $0x02, SI
- CMPL BX, $0x0c
+ MOVL SI, DI
+ SHLL $0x02, DI
+ CMPL SI, $0x0c
JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall
- ADDQ $0x02, AX
+ ADDQ $0x02, CX
JMP repeat_end_emit_calcBlockSizeSmall
emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
repeat_end_emit_calcBlockSizeSmall:
- MOVL CX, 12(SP)
+ MOVL DX, 12(SP)
JMP search_loop_calcBlockSizeSmall
no_repeat_found_calcBlockSizeSmall:
- CMPL (DX)(BX*1), SI
+ CMPL (BX)(SI*1), DI
JEQ candidate_match_calcBlockSizeSmall
- SHRQ $0x08, SI
- MOVL 24(SP)(R9*4), BX
- LEAL 2(CX), R8
- CMPL (DX)(DI*1), SI
+ SHRQ $0x08, DI
+ MOVL (AX)(R10*4), SI
+ LEAL 2(DX), R9
+ CMPL (BX)(R8*1), DI
JEQ candidate2_match_calcBlockSizeSmall
- MOVL R8, 24(SP)(R9*4)
- SHRQ $0x08, SI
- CMPL (DX)(BX*1), SI
+ MOVL R9, (AX)(R10*4)
+ SHRQ $0x08, DI
+ CMPL (BX)(SI*1), DI
JEQ candidate3_match_calcBlockSizeSmall
- MOVL 20(SP), CX
+ MOVL 20(SP), DX
JMP search_loop_calcBlockSizeSmall
candidate3_match_calcBlockSizeSmall:
- ADDL $0x02, CX
+ ADDL $0x02, DX
JMP candidate_match_calcBlockSizeSmall
candidate2_match_calcBlockSizeSmall:
- MOVL R8, 24(SP)(R9*4)
- INCL CX
- MOVL DI, BX
+ MOVL R9, (AX)(R10*4)
+ INCL DX
+ MOVL R8, SI
candidate_match_calcBlockSizeSmall:
- MOVL 12(SP), SI
- TESTL BX, BX
+ MOVL 12(SP), DI
+ TESTL SI, SI
JZ match_extend_back_end_calcBlockSizeSmall
match_extend_back_loop_calcBlockSizeSmall:
- CMPL CX, SI
+ CMPL DX, DI
JBE match_extend_back_end_calcBlockSizeSmall
- MOVB -1(DX)(BX*1), DI
- MOVB -1(DX)(CX*1), R8
- CMPB DI, R8
+ MOVB -1(BX)(SI*1), R8
+ MOVB -1(BX)(DX*1), R9
+ CMPB R8, R9
JNE match_extend_back_end_calcBlockSizeSmall
- LEAL -1(CX), CX
- DECL BX
+ LEAL -1(DX), DX
+ DECL SI
JZ match_extend_back_end_calcBlockSizeSmall
JMP match_extend_back_loop_calcBlockSizeSmall
match_extend_back_end_calcBlockSizeSmall:
- MOVL CX, SI
- SUBL 12(SP), SI
- LEAQ 3(AX)(SI*1), SI
- CMPQ SI, (SP)
+ MOVL DX, DI
+ SUBL 12(SP), DI
+ LEAQ 3(CX)(DI*1), DI
+ CMPQ DI, (SP)
JB match_dst_size_check_calcBlockSizeSmall
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
match_dst_size_check_calcBlockSizeSmall:
- MOVL CX, SI
- MOVL 12(SP), DI
- CMPL DI, SI
+ MOVL DX, DI
+ MOVL 12(SP), R8
+ CMPL R8, DI
JEQ emit_literal_done_match_emit_calcBlockSizeSmall
- MOVL SI, R8
- MOVL SI, 12(SP)
- LEAQ (DX)(DI*1), SI
- SUBL DI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
+ MOVL DI, R9
+ MOVL DI, 12(SP)
+ LEAQ (BX)(R8*1), DI
+ SUBL R8, R9
+ LEAL -1(R9), DI
+ CMPL DI, $0x3c
JB one_byte_match_emit_calcBlockSizeSmall
- CMPL SI, $0x00000100
+ CMPL DI, $0x00000100
JB two_bytes_match_emit_calcBlockSizeSmall
JB three_bytes_match_emit_calcBlockSizeSmall
three_bytes_match_emit_calcBlockSizeSmall:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
JMP memmove_long_match_emit_calcBlockSizeSmall
two_bytes_match_emit_calcBlockSizeSmall:
- ADDQ $0x02, AX
- CMPL SI, $0x40
+ ADDQ $0x02, CX
+ CMPL DI, $0x40
JB memmove_match_emit_calcBlockSizeSmall
JMP memmove_long_match_emit_calcBlockSizeSmall
one_byte_match_emit_calcBlockSizeSmall:
- ADDQ $0x01, AX
+ ADDQ $0x01, CX
memmove_match_emit_calcBlockSizeSmall:
- LEAQ (AX)(R8*1), AX
+ LEAQ (CX)(R9*1), CX
JMP emit_literal_done_match_emit_calcBlockSizeSmall
memmove_long_match_emit_calcBlockSizeSmall:
- LEAQ (AX)(R8*1), AX
+ LEAQ (CX)(R9*1), CX
emit_literal_done_match_emit_calcBlockSizeSmall:
match_nolit_loop_calcBlockSizeSmall:
- MOVL CX, SI
- SUBL BX, SI
- MOVL SI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, BX
- MOVQ src_len+8(FP), SI
- SUBL CX, SI
- LEAQ (DX)(CX*1), DI
- LEAQ (DX)(BX*1), BX
+ MOVL DX, DI
+ SUBL SI, DI
+ MOVL DI, 16(SP)
+ ADDL $0x04, DX
+ ADDL $0x04, SI
+ MOVQ src_len+8(FP), DI
+ SUBL DX, DI
+ LEAQ (BX)(DX*1), R8
+ LEAQ (BX)(SI*1), SI
// matchLen
- XORL R9, R9
+ XORL R10, R10
matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
- CMPL SI, $0x10
+ CMPL DI, $0x10
JB matchlen_match8_match_nolit_calcBlockSizeSmall
- MOVQ (DI)(R9*1), R8
- MOVQ 8(DI)(R9*1), R10
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall
- XORQ 8(BX)(R9*1), R10
+ XORQ 8(SI)(R10*1), R11
JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall
- LEAL -16(SI), SI
- LEAL 16(R9), R9
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall
matchlen_bsf_16match_nolit_calcBlockSizeSmall:
#ifdef GOAMD64_v3
- TZCNTQ R10, R10
+ TZCNTQ R11, R11
#else
- BSFQ R10, R10
+ BSFQ R11, R11
#endif
- SARQ $0x03, R10
- LEAL 8(R9)(R10*1), R9
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
JMP match_nolit_end_calcBlockSizeSmall
matchlen_match8_match_nolit_calcBlockSizeSmall:
- CMPL SI, $0x08
+ CMPL DI, $0x08
JB matchlen_match4_match_nolit_calcBlockSizeSmall
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
+ MOVQ (R8)(R10*1), R9
+ XORQ (SI)(R10*1), R9
JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall
- LEAL -8(SI), SI
- LEAL 8(R9), R9
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
JMP matchlen_match4_match_nolit_calcBlockSizeSmall
matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
#ifdef GOAMD64_v3
- TZCNTQ R8, R8
+ TZCNTQ R9, R9
#else
- BSFQ R8, R8
+ BSFQ R9, R9
#endif
- SARQ $0x03, R8
- LEAL (R9)(R8*1), R9
+ SARQ $0x03, R9
+ LEAL (R10)(R9*1), R10
JMP match_nolit_end_calcBlockSizeSmall
matchlen_match4_match_nolit_calcBlockSizeSmall:
- CMPL SI, $0x04
+ CMPL DI, $0x04
JB matchlen_match2_match_nolit_calcBlockSizeSmall
- MOVL (DI)(R9*1), R8
- CMPL (BX)(R9*1), R8
+ MOVL (R8)(R10*1), R9
+ CMPL (SI)(R10*1), R9
JNE matchlen_match2_match_nolit_calcBlockSizeSmall
- LEAL -4(SI), SI
- LEAL 4(R9), R9
+ LEAL -4(DI), DI
+ LEAL 4(R10), R10
matchlen_match2_match_nolit_calcBlockSizeSmall:
- CMPL SI, $0x01
+ CMPL DI, $0x01
JE matchlen_match1_match_nolit_calcBlockSizeSmall
JB match_nolit_end_calcBlockSizeSmall
- MOVW (DI)(R9*1), R8
- CMPW (BX)(R9*1), R8
+ MOVW (R8)(R10*1), R9
+ CMPW (SI)(R10*1), R9
JNE matchlen_match1_match_nolit_calcBlockSizeSmall
- LEAL 2(R9), R9
- SUBL $0x02, SI
+ LEAL 2(R10), R10
+ SUBL $0x02, DI
JZ match_nolit_end_calcBlockSizeSmall
matchlen_match1_match_nolit_calcBlockSizeSmall:
- MOVB (DI)(R9*1), R8
- CMPB (BX)(R9*1), R8
+ MOVB (R8)(R10*1), R9
+ CMPB (SI)(R10*1), R9
JNE match_nolit_end_calcBlockSizeSmall
- LEAL 1(R9), R9
+ LEAL 1(R10), R10
match_nolit_end_calcBlockSizeSmall:
- ADDL R9, CX
- MOVL 16(SP), BX
- ADDL $0x04, R9
- MOVL CX, 12(SP)
+ ADDL R10, DX
+ MOVL 16(SP), SI
+ ADDL $0x04, R10
+ MOVL DX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_calcBlockSizeSmall:
- CMPL R9, $0x40
+ CMPL R10, $0x40
JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall
- LEAL -60(R9), R9
- ADDQ $0x03, AX
+ LEAL -60(R10), R10
+ ADDQ $0x03, CX
JMP two_byte_offset_match_nolit_calcBlockSizeSmall
two_byte_offset_short_match_nolit_calcBlockSizeSmall:
- MOVL R9, BX
- SHLL $0x02, BX
- CMPL R9, $0x0c
+ MOVL R10, SI
+ SHLL $0x02, SI
+ CMPL R10, $0x0c
JAE emit_copy_three_match_nolit_calcBlockSizeSmall
- ADDQ $0x02, AX
+ ADDQ $0x02, CX
JMP match_nolit_emitcopy_end_calcBlockSizeSmall
emit_copy_three_match_nolit_calcBlockSizeSmall:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
match_nolit_emitcopy_end_calcBlockSizeSmall:
- CMPL CX, 8(SP)
+ CMPL DX, 8(SP)
JAE emit_remainder_calcBlockSizeSmall
- MOVQ -2(DX)(CX*1), SI
- CMPQ AX, (SP)
+ MOVQ -2(BX)(DX*1), DI
+ CMPQ CX, (SP)
JB match_nolit_dst_ok_calcBlockSizeSmall
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
match_nolit_dst_ok_calcBlockSizeSmall:
- MOVQ $0x9e3779b1, R8
- MOVQ SI, DI
- SHRQ $0x10, SI
- MOVQ SI, BX
- SHLQ $0x20, DI
- IMULQ R8, DI
- SHRQ $0x37, DI
- SHLQ $0x20, BX
- IMULQ R8, BX
- SHRQ $0x37, BX
- LEAL -2(CX), R8
- LEAQ 24(SP)(BX*4), R9
- MOVL (R9), BX
- MOVL R8, 24(SP)(DI*4)
- MOVL CX, (R9)
- CMPL (DX)(BX*1), SI
+ MOVQ $0x9e3779b1, R9
+ MOVQ DI, R8
+ SHRQ $0x10, DI
+ MOVQ DI, SI
+ SHLQ $0x20, R8
+ IMULQ R9, R8
+ SHRQ $0x37, R8
+ SHLQ $0x20, SI
+ IMULQ R9, SI
+ SHRQ $0x37, SI
+ LEAL -2(DX), R9
+ LEAQ (AX)(SI*4), R10
+ MOVL (R10), SI
+ MOVL R9, (AX)(R8*4)
+ MOVL DX, (R10)
+ CMPL (BX)(SI*1), DI
JEQ match_nolit_loop_calcBlockSizeSmall
- INCL CX
+ INCL DX
JMP search_loop_calcBlockSizeSmall
emit_remainder_calcBlockSizeSmall:
- MOVQ src_len+8(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
+ MOVQ src_len+8(FP), AX
+ SUBL 12(SP), AX
+ LEAQ 3(CX)(AX*1), AX
+ CMPQ AX, (SP)
JB emit_remainder_ok_calcBlockSizeSmall
- MOVQ $0x00000000, ret+24(FP)
+ MOVQ $0x00000000, ret+32(FP)
RET
emit_remainder_ok_calcBlockSizeSmall:
- MOVQ src_len+8(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
+ MOVQ src_len+8(FP), AX
+ MOVL 12(SP), DX
+ CMPL DX, AX
JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), CX
- CMPL CX, $0x3c
+ MOVL AX, SI
+ MOVL AX, 12(SP)
+ LEAQ (BX)(DX*1), AX
+ SUBL DX, SI
+ LEAL -1(SI), AX
+ CMPL AX, $0x3c
JB one_byte_emit_remainder_calcBlockSizeSmall
- CMPL CX, $0x00000100
+ CMPL AX, $0x00000100
JB two_bytes_emit_remainder_calcBlockSizeSmall
JB three_bytes_emit_remainder_calcBlockSizeSmall
three_bytes_emit_remainder_calcBlockSizeSmall:
- ADDQ $0x03, AX
+ ADDQ $0x03, CX
JMP memmove_long_emit_remainder_calcBlockSizeSmall
two_bytes_emit_remainder_calcBlockSizeSmall:
- ADDQ $0x02, AX
- CMPL CX, $0x40
+ ADDQ $0x02, CX
+ CMPL AX, $0x40
JB memmove_emit_remainder_calcBlockSizeSmall
JMP memmove_long_emit_remainder_calcBlockSizeSmall
one_byte_emit_remainder_calcBlockSizeSmall:
- ADDQ $0x01, AX
+ ADDQ $0x01, CX
memmove_emit_remainder_calcBlockSizeSmall:
- LEAQ (AX)(SI*1), AX
+ LEAQ (CX)(SI*1), AX
+ MOVQ AX, CX
JMP emit_literal_done_emit_remainder_calcBlockSizeSmall
memmove_long_emit_remainder_calcBlockSizeSmall:
- LEAQ (AX)(SI*1), AX
+ LEAQ (CX)(SI*1), AX
+ MOVQ AX, CX
emit_literal_done_emit_remainder_calcBlockSizeSmall:
- MOVQ AX, ret+24(FP)
+ MOVQ CX, ret+32(FP)
RET
// func emitLiteral(dst []byte, lit []byte) int
@@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
- LEAQ -10(AX)(CX*1), CX
+ LEAQ -8(AX)(CX*1), CX
XORQ DI, DI
lz4_s2_loop:
@@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
- LEAQ -10(AX)(CX*1), CX
+ LEAQ -8(AX)(CX*1), CX
XORQ DI, DI
lz4s_s2_loop:
@@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
- LEAQ -10(AX)(CX*1), CX
+ LEAQ -8(AX)(CX*1), CX
lz4_snappy_loop:
CMPQ DX, BX
@@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
- LEAQ -10(AX)(CX*1), CX
+ LEAQ -8(AX)(CX*1), CX
lz4s_snappy_loop:
CMPQ DX, BX
diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go
index 0a46f2b984f..fd15078f7df 100644
--- a/vendor/github.com/klauspost/compress/s2/writer.go
+++ b/vendor/github.com/klauspost/compress/s2/writer.go
@@ -83,11 +83,14 @@ type Writer struct {
snappy bool
flushOnWrite bool
appendIndex bool
+ bufferCB func([]byte)
level uint8
}
type result struct {
b []byte
+ // return when writing
+ ret []byte
// Uncompressed start offset
startOffset int64
}
@@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) {
for write := range toWrite {
// Wait for the data to be available.
input := <-write
+ if input.ret != nil && w.bufferCB != nil {
+ w.bufferCB(input.ret)
+ input.ret = nil
+ }
in := input.b
if len(in) > 0 {
if w.err(nil) == nil {
@@ -341,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) {
// but the input buffer cannot be written to by the caller
// until Flush or Close has been called when concurrency != 1.
//
-// If you cannot control that, use the regular Write function.
+// Use the WriterBufferDone to receive a callback when the buffer is done
+// Processing.
//
// Note that input is not buffered.
// This means that each write will result in discrete blocks being created.
@@ -364,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
}
if w.concurrency == 1 {
_, err := w.writeSync(buf)
+ if w.bufferCB != nil {
+ w.bufferCB(buf)
+ }
return err
}
@@ -378,7 +389,7 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
}
}
-
+ orgBuf := buf
for len(buf) > 0 {
// Cut input.
uncompressed := buf
@@ -397,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
startOffset: w.uncompWritten,
}
w.uncompWritten += int64(len(uncompressed))
+ if len(buf) == 0 && w.bufferCB != nil {
+ res.ret = orgBuf
+ }
go func() {
race.ReadSlice(uncompressed)
@@ -922,7 +936,7 @@ func WriterBetterCompression() WriterOption {
}
// WriterBestCompression will enable better compression.
-// EncodeBetter compresses better than Encode but typically with a
+// EncodeBest compresses better than Encode but typically with a
// big speed decrease on compression.
func WriterBestCompression() WriterOption {
return func(w *Writer) error {
@@ -941,6 +955,17 @@ func WriterUncompressed() WriterOption {
}
}
+// WriterBufferDone will perform a callback when EncodeBuffer has finished
+// writing a buffer to the output and the buffer can safely be reused.
+// If the buffer was split into several blocks, it will be sent after the last block.
+// Callbacks will not be done concurrently.
+func WriterBufferDone(fn func(b []byte)) WriterOption {
+ return func(w *Writer) error {
+ w.bufferCB = fn
+ return nil
+ }
+}
+
// WriterBlockSize allows to override the default block size.
// Blocks will be this size or smaller.
// Minimum size is 4KB and maximum size is 4MB.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 03744fbc765..9c28840c3bd 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -598,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
printf("RLE set to 0x%x, code: %v", symb, v)
}
case compModeFSE:
- println("Reading table for", tableIndex(i))
+ if debugDecoder {
+ println("Reading table for", tableIndex(i))
+ }
if seq.fse == nil || seq.fse.preDefined {
seq.fse = fseDecoderPool.Get().(*fseDecoder)
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index a4f5bf91fc6..84a79fde767 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -179,9 +179,9 @@ encodeLoop:
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -210,12 +210,12 @@ encodeLoop:
// Index match start+1 (long) -> s - 1
index0 := s + repOff
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -241,9 +241,9 @@ encodeLoop:
if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
// Consider history as well.
var seq seq
- lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+ length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -270,11 +270,11 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff2
+ s += length + repOff2
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -708,9 +708,9 @@ encodeLoop:
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -738,12 +738,12 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -772,9 +772,9 @@ encodeLoop:
if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
// Consider history as well.
var seq seq
- lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+ length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -801,11 +801,11 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff2
+ s += length + repOff2
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index a154c18f741..d36be7bd8c2 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -138,9 +138,9 @@ encodeLoop:
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -166,11 +166,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -798,9 +798,9 @@ encodeLoop:
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -826,11 +826,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff
+ s += length + repOff
nextEmit = s
if s >= sLimit {
if debugEncoder {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index 72af7ef0fe0..8f8223cd3a6 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -6,6 +6,7 @@ package zstd
import (
"crypto/rand"
+ "errors"
"fmt"
"io"
"math"
@@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) {
// and write CRC if requested.
func (e *Encoder) Write(p []byte) (n int, err error) {
s := &e.state
+ if s.eofWritten {
+ return 0, ErrEncoderClosed
+ }
for len(p) > 0 {
if len(p)+len(s.filling) < e.o.blockSize {
if e.o.crc {
@@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error {
return nil
}
if final && len(s.filling) > 0 {
- s.current = e.EncodeAll(s.filling, s.current[:0])
+ s.current = e.encodeAll(s.encoder, s.filling, s.current[:0])
var n2 int
n2, s.err = s.w.Write(s.current)
if s.err != nil {
@@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error {
s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
s.nInput += int64(len(s.current))
s.wg.Add(1)
+ if final {
+ s.eofWritten = true
+ }
go func(src []byte) {
if debugEncoder {
println("Adding block,", len(src), "bytes, final:", final)
@@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error {
blk := enc.Block()
enc.Encode(blk, src)
blk.last = final
- if final {
- s.eofWritten = true
- }
// Wait for pending writes.
s.wWg.Wait()
if s.writeErr != nil {
@@ -401,12 +405,20 @@ func (e *Encoder) Flush() error {
if len(s.filling) > 0 {
err := e.nextBlock(false)
if err != nil {
+ // Ignore Flush after Close.
+ if errors.Is(s.err, ErrEncoderClosed) {
+ return nil
+ }
return err
}
}
s.wg.Wait()
s.wWg.Wait()
if s.err != nil {
+ // Ignore Flush after Close.
+ if errors.Is(s.err, ErrEncoderClosed) {
+ return nil
+ }
return s.err
}
return s.writeErr
@@ -422,6 +434,9 @@ func (e *Encoder) Close() error {
}
err := e.nextBlock(true)
if err != nil {
+ if errors.Is(s.err, ErrEncoderClosed) {
+ return nil
+ }
return err
}
if s.frameContentSize > 0 {
@@ -459,6 +474,11 @@ func (e *Encoder) Close() error {
}
_, s.err = s.w.Write(frame)
}
+ if s.err == nil {
+ s.err = ErrEncoderClosed
+ return nil
+ }
+
return s.err
}
@@ -469,6 +489,15 @@ func (e *Encoder) Close() error {
// Data compressed with EncodeAll can be decoded with the Decoder,
// using either a stream or DecodeAll.
func (e *Encoder) EncodeAll(src, dst []byte) []byte {
+ e.init.Do(e.initialize)
+ enc := <-e.encoders
+ defer func() {
+ e.encoders <- enc
+ }()
+ return e.encodeAll(enc, src, dst)
+}
+
+func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte {
if len(src) == 0 {
if e.o.fullZero {
// Add frame header.
@@ -491,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
return dst
}
- e.init.Do(e.initialize)
- enc := <-e.encoders
- defer func() {
- // Release encoder reference to last block.
- // If a non-single block is needed the encoder will reset again.
- e.encoders <- enc
- }()
+
// Use single segments when above minimum window and below window size.
single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
if e.o.single != nil {
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 53e160f7e5a..e47af66e7c9 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error {
}
return err
}
- printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+ if debugDecoder {
+ printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+ }
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
index 8adabd82877..c59f17e07ad 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
default:
- return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
+ return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode)
}
s.seqSize += ctx.litRemain
@@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
return io.ErrUnexpectedEOF
}
- return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
+ return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode)
}
if ctx.litRemain < 0 {
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
index 5b06174b898..f5591fa1e86 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -1814,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
MOVQ 40(SP), AX
ADDQ AX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R10, 32(SP)
// outBase += outPosition
@@ -2376,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
MOVQ 40(SP), CX
ADDQ CX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R9, 32(SP)
// outBase += outPosition
@@ -2896,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
MOVQ 40(SP), AX
ADDQ AX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R10, 32(SP)
// outBase += outPosition
@@ -3560,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
MOVQ 40(SP), CX
ADDQ CX, 48(SP)
- // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ // Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
ADDQ R9, 32(SP)
// outBase += outPosition
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index 4be7cc73671..066bef2a4f0 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -88,6 +88,10 @@ var (
// Close has been called.
ErrDecoderClosed = errors.New("decoder used after Close")
+ // ErrEncoderClosed will be returned if the Encoder was used after
+ // Close has been called.
+ ErrEncoderClosed = errors.New("encoder used after Close")
+
// ErrDecoderNilInput is returned when a nil Reader was provided
// and an operation other than Reset/DecodeAll/Close was attempted.
ErrDecoderNilInput = errors.New("nil input provided as reader")
diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_table.go b/vendor/github.com/mattn/go-runewidth/runewidth_table.go
index e5d890c266f..ad025ad5296 100644
--- a/vendor/github.com/mattn/go-runewidth/runewidth_table.go
+++ b/vendor/github.com/mattn/go-runewidth/runewidth_table.go
@@ -4,20 +4,21 @@ package runewidth
var combining = table{
{0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3},
- {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01},
- {0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0},
- {0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF},
+ {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0CF3, 0x0CF3},
+ {0x0D00, 0x0D01}, {0x135D, 0x135F}, {0x1A7F, 0x1A7F},
+ {0x1AB0, 0x1ACE}, {0x1B6B, 0x1B73}, {0x1DC0, 0x1DFF},
{0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF},
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D},
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1},
{0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A},
- {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11300, 0x11301},
- {0x1133B, 0x1133C}, {0x11366, 0x1136C}, {0x11370, 0x11374},
- {0x16AF0, 0x16AF4}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172},
+ {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x10F82, 0x10F85},
+ {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x11366, 0x1136C},
+ {0x11370, 0x11374}, {0x16AF0, 0x16AF4}, {0x1CF00, 0x1CF2D},
+ {0x1CF30, 0x1CF46}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172},
{0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD},
{0x1D242, 0x1D244}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018},
{0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
- {0x1E8D0, 0x1E8D6},
+ {0x1E08F, 0x1E08F}, {0x1E8D0, 0x1E8D6},
}
var doublewidth = table{
@@ -33,33 +34,34 @@ var doublewidth = table{
{0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797},
{0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C},
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
- {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
- {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
- {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3},
- {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x4DBF},
- {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
- {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19},
- {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B},
- {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4},
- {0x16FF0, 0x16FF1}, {0x17000, 0x187F7}, {0x18800, 0x18CD5},
- {0x18D00, 0x18D08}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152},
- {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004},
- {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
- {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
- {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320},
- {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393},
- {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0},
- {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440},
- {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E},
- {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596},
- {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5},
- {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7},
- {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB},
- {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F978},
- {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA74},
- {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8},
- {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6},
- {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
+ {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x303E},
+ {0x3041, 0x3096}, {0x3099, 0x30FF}, {0x3105, 0x312F},
+ {0x3131, 0x318E}, {0x3190, 0x31E3}, {0x31EF, 0x321E},
+ {0x3220, 0x3247}, {0x3250, 0x4DBF}, {0x4E00, 0xA48C},
+ {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3},
+ {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60},
+ {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1},
+ {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
+ {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
+ {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152},
+ {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB},
+ {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E},
+ {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B},
+ {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265},
+ {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C},
+ {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3},
+ {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E},
+ {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D},
+ {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A},
+ {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F},
+ {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2},
+ {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF}, {0x1F6EB, 0x1F6EC},
+ {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0},
+ {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F9FF},
+ {0x1FA70, 0x1FA7C}, {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD},
+ {0x1FABF, 0x1FAC5}, {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8},
+ {0x1FAF0, 0x1FAF8}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
}
var ambiguous = table{
@@ -154,43 +156,43 @@ var neutral = table{
{0x0402, 0x040F}, {0x0450, 0x0450}, {0x0452, 0x052F},
{0x0531, 0x0556}, {0x0559, 0x058A}, {0x058D, 0x058F},
{0x0591, 0x05C7}, {0x05D0, 0x05EA}, {0x05EF, 0x05F4},
- {0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A},
- {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
- {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
- {0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08C7},
- {0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
- {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
- {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
- {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD},
- {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03},
- {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
- {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
- {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
- {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
- {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76},
- {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
- {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
- {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9},
- {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3},
- {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03},
- {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
- {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
- {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
- {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
- {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
- {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
- {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
- {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2},
- {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0},
- {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C},
- {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
- {0x0C3D, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
- {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C60, 0x0C63},
+ {0x0600, 0x070D}, {0x070F, 0x074A}, {0x074D, 0x07B1},
+ {0x07C0, 0x07FA}, {0x07FD, 0x082D}, {0x0830, 0x083E},
+ {0x0840, 0x085B}, {0x085E, 0x085E}, {0x0860, 0x086A},
+ {0x0870, 0x088E}, {0x0890, 0x0891}, {0x0898, 0x0983},
+ {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8},
+ {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9},
+ {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, {0x09CB, 0x09CE},
+ {0x09D7, 0x09D7}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
+ {0x09E6, 0x09FE}, {0x0A01, 0x0A03}, {0x0A05, 0x0A0A},
+ {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30},
+ {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
+ {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, {0x0A47, 0x0A48},
+ {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, {0x0A59, 0x0A5C},
+ {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76}, {0x0A81, 0x0A83},
+ {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
+ {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9},
+ {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD},
+ {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3}, {0x0AE6, 0x0AF1},
+ {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
+ {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30},
+ {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, {0x0B3C, 0x0B44},
+ {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B55, 0x0B57},
+ {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, {0x0B66, 0x0B77},
+ {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90},
+ {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C},
+ {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA},
+ {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8},
+ {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0}, {0x0BD7, 0x0BD7},
+ {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C}, {0x0C0E, 0x0C10},
+ {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3C, 0x0C44},
+ {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
+ {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D}, {0x0C60, 0x0C63},
{0x0C66, 0x0C6F}, {0x0C77, 0x0C8C}, {0x0C8E, 0x0C90},
{0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9},
{0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},
- {0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3},
- {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D0C},
+ {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE}, {0x0CE0, 0x0CE3},
+ {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF3}, {0x0D00, 0x0D0C},
{0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, {0x0D46, 0x0D48},
{0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, {0x0D66, 0x0D7F},
{0x0D81, 0x0D83}, {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1},
@@ -200,7 +202,7 @@ var neutral = table{
{0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, {0x0E81, 0x0E82},
{0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3},
{0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, {0x0EC0, 0x0EC4},
- {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9},
+ {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECE}, {0x0ED0, 0x0ED9},
{0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0F49, 0x0F6C},
{0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FBE, 0x0FCC},
{0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x10C7, 0x10C7},
@@ -212,20 +214,19 @@ var neutral = table{
{0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
{0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
{0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
- {0x1700, 0x170C}, {0x170E, 0x1714}, {0x1720, 0x1736},
- {0x1740, 0x1753}, {0x1760, 0x176C}, {0x176E, 0x1770},
- {0x1772, 0x1773}, {0x1780, 0x17DD}, {0x17E0, 0x17E9},
- {0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x1810, 0x1819},
- {0x1820, 0x1878}, {0x1880, 0x18AA}, {0x18B0, 0x18F5},
- {0x1900, 0x191E}, {0x1920, 0x192B}, {0x1930, 0x193B},
- {0x1940, 0x1940}, {0x1944, 0x196D}, {0x1970, 0x1974},
- {0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x19D0, 0x19DA},
- {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C},
- {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD},
- {0x1AB0, 0x1AC0}, {0x1B00, 0x1B4B}, {0x1B50, 0x1B7C},
- {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49},
- {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7},
- {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9}, {0x1DFB, 0x1F15},
+ {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753},
+ {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773},
+ {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9},
+ {0x1800, 0x1819}, {0x1820, 0x1878}, {0x1880, 0x18AA},
+ {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1920, 0x192B},
+ {0x1930, 0x193B}, {0x1940, 0x1940}, {0x1944, 0x196D},
+ {0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9},
+ {0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E},
+ {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99},
+ {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE}, {0x1B00, 0x1B4C},
+ {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37},
+ {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA},
+ {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1F15},
{0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
{0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B},
{0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4},
@@ -237,7 +238,7 @@ var neutral = table{
{0x2036, 0x203A}, {0x203C, 0x203D}, {0x203F, 0x2064},
{0x2066, 0x2071}, {0x2075, 0x207E}, {0x2080, 0x2080},
{0x2085, 0x208E}, {0x2090, 0x209C}, {0x20A0, 0x20A8},
- {0x20AA, 0x20AB}, {0x20AD, 0x20BF}, {0x20D0, 0x20F0},
+ {0x20AA, 0x20AB}, {0x20AD, 0x20C0}, {0x20D0, 0x20F0},
{0x2100, 0x2102}, {0x2104, 0x2104}, {0x2106, 0x2108},
{0x210A, 0x2112}, {0x2114, 0x2115}, {0x2117, 0x2120},
{0x2123, 0x2125}, {0x2127, 0x212A}, {0x212C, 0x2152},
@@ -275,15 +276,15 @@ var neutral = table{
{0x2780, 0x2794}, {0x2798, 0x27AF}, {0x27B1, 0x27BE},
{0x27C0, 0x27E5}, {0x27EE, 0x2984}, {0x2987, 0x2B1A},
{0x2B1D, 0x2B4F}, {0x2B51, 0x2B54}, {0x2B5A, 0x2B73},
- {0x2B76, 0x2B95}, {0x2B97, 0x2C2E}, {0x2C30, 0x2C5E},
- {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25}, {0x2D27, 0x2D27},
- {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D70},
- {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE},
- {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6},
- {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE},
- {0x2DE0, 0x2E52}, {0x303F, 0x303F}, {0x4DC0, 0x4DFF},
- {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, {0xA700, 0xA7BF},
- {0xA7C2, 0xA7CA}, {0xA7F5, 0xA82C}, {0xA830, 0xA839},
+ {0x2B76, 0x2B95}, {0x2B97, 0x2CF3}, {0x2CF9, 0x2D25},
+ {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67},
+ {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6},
+ {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE},
+ {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6},
+ {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D}, {0x303F, 0x303F},
+ {0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
+ {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3},
+ {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839},
{0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
{0xA8E0, 0xA953}, {0xA95F, 0xA95F}, {0xA980, 0xA9CD},
{0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
@@ -294,8 +295,8 @@ var neutral = table{
{0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD800, 0xDFFF},
{0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB36},
{0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41},
- {0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFBD3, 0xFD3F},
- {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFD},
+ {0xFB43, 0xFB44}, {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F},
+ {0xFD92, 0xFDC7}, {0xFDCF, 0xFDCF}, {0xFDF0, 0xFDFF},
{0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC},
{0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0x10000, 0x1000B},
{0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D},
@@ -307,44 +308,48 @@ var neutral = table{
{0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5},
{0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3},
{0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563},
- {0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x10740, 0x10755},
- {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808},
- {0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C},
- {0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x108A7, 0x108AF},
- {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108FB, 0x1091B},
- {0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x10980, 0x109B7},
- {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x10A05, 0x10A06},
- {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35},
- {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, {0x10A50, 0x10A58},
- {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6},
- {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B58, 0x10B72},
- {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF},
- {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2},
- {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, {0x10E60, 0x10E7E},
- {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1},
- {0x10F00, 0x10F27}, {0x10F30, 0x10F59}, {0x10FB0, 0x10FCB},
- {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F},
- {0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8},
- {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
- {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
- {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286},
- {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
- {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
- {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
- {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
- {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
- {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
- {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
- {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
- {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
- {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
- {0x11680, 0x116B8}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
- {0x1171D, 0x1172B}, {0x11730, 0x1173F}, {0x11800, 0x1183B},
- {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
- {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
- {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
- {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
- {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AC0, 0x11AF8},
+ {0x1056F, 0x1057A}, {0x1057C, 0x1058A}, {0x1058C, 0x10592},
+ {0x10594, 0x10595}, {0x10597, 0x105A1}, {0x105A3, 0x105B1},
+ {0x105B3, 0x105B9}, {0x105BB, 0x105BC}, {0x10600, 0x10736},
+ {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10780, 0x10785},
+ {0x10787, 0x107B0}, {0x107B2, 0x107BA}, {0x10800, 0x10805},
+ {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838},
+ {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E},
+ {0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5},
+ {0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F},
+ {0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03},
+ {0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17},
+ {0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48},
+ {0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6},
+ {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55},
+ {0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C},
+ {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2},
+ {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39},
+ {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD},
+ {0x10EB0, 0x10EB1}, {0x10EFD, 0x10F27}, {0x10F30, 0x10F59},
+ {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB}, {0x10FE0, 0x10FF6},
+ {0x11000, 0x1104D}, {0x11052, 0x11075}, {0x1107F, 0x110C2},
+ {0x110CD, 0x110CD}, {0x110D0, 0x110E8}, {0x110F0, 0x110F9},
+ {0x11100, 0x11134}, {0x11136, 0x11147}, {0x11150, 0x11176},
+ {0x11180, 0x111DF}, {0x111E1, 0x111F4}, {0x11200, 0x11211},
+ {0x11213, 0x11241}, {0x11280, 0x11286}, {0x11288, 0x11288},
+ {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A9},
+ {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11303},
+ {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328},
+ {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339},
+ {0x1133B, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D},
+ {0x11350, 0x11350}, {0x11357, 0x11357}, {0x1135D, 0x11363},
+ {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11400, 0x1145B},
+ {0x1145D, 0x11461}, {0x11480, 0x114C7}, {0x114D0, 0x114D9},
+ {0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644},
+ {0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B9},
+ {0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B},
+ {0x11730, 0x11746}, {0x11800, 0x1183B}, {0x118A0, 0x118F2},
+ {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913},
+ {0x11915, 0x11916}, {0x11918, 0x11935}, {0x11937, 0x11938},
+ {0x1193B, 0x11946}, {0x11950, 0x11959}, {0x119A0, 0x119A7},
+ {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, {0x11A00, 0x11A47},
+ {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8}, {0x11B00, 0x11B09},
{0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45},
{0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7},
{0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},
@@ -352,30 +357,36 @@ var neutral = table{
{0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65},
{0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91},
{0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8},
+ {0x11F00, 0x11F10}, {0x11F12, 0x11F3A}, {0x11F3E, 0x11F59},
{0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
{0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
- {0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646},
+ {0x12F90, 0x12FF2}, {0x13000, 0x13455}, {0x14400, 0x14646},
{0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69},
- {0x16A6E, 0x16A6F}, {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5},
- {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, {0x16B5B, 0x16B61},
- {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A},
- {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F},
- {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88},
- {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3}, {0x1D000, 0x1D0F5},
- {0x1D100, 0x1D126}, {0x1D129, 0x1D1E8}, {0x1D200, 0x1D245},
- {0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356}, {0x1D360, 0x1D378},
- {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F},
- {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC},
- {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3},
- {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
- {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E},
- {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550},
- {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B},
- {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006},
- {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
- {0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D},
- {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E2C0, 0x1E2F9},
- {0x1E2FF, 0x1E2FF}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6},
+ {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED},
+ {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45}, {0x16B50, 0x16B59},
+ {0x16B5B, 0x16B61}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F},
+ {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87},
+ {0x16F8F, 0x16F9F}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},
+ {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3},
+ {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3},
+ {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D1EA},
+ {0x1D200, 0x1D245}, {0x1D2C0, 0x1D2D3}, {0x1D2E0, 0x1D2F3},
+ {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, {0x1D400, 0x1D454},
+ {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2},
+ {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9},
+ {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505},
+ {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C},
+ {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544},
+ {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5},
+ {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F},
+ {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A},
+ {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021},
+ {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E030, 0x1E06D},
+ {0x1E08F, 0x1E08F}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D},
+ {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE},
+ {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF}, {0x1E4D0, 0x1E4F9},
+ {0x1E7E0, 0x1E7E6}, {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE},
+ {0x1E7F0, 0x1E7FE}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6},
{0x1E900, 0x1E94B}, {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F},
{0x1EC71, 0x1ECB4}, {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03},
{0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24},
@@ -400,8 +411,8 @@ var neutral = table{
{0x1F54F, 0x1F54F}, {0x1F568, 0x1F579}, {0x1F57B, 0x1F594},
{0x1F597, 0x1F5A3}, {0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F},
{0x1F6C6, 0x1F6CB}, {0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4},
- {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773},
- {0x1F780, 0x1F7D8}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847},
+ {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F776},
+ {0x1F77B, 0x1F7D9}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847},
{0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD},
{0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F90B}, {0x1F93B, 0x1F93B},
{0x1F946, 0x1F946}, {0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D},
diff --git a/vendor/github.com/parquet-go/parquet-go/allocator.go b/vendor/github.com/parquet-go/parquet-go/allocator.go
index 0cf2df03197..693ee5a2471 100644
--- a/vendor/github.com/parquet-go/parquet-go/allocator.go
+++ b/vendor/github.com/parquet-go/parquet-go/allocator.go
@@ -1,6 +1,10 @@
package parquet
-import "github.com/parquet-go/parquet-go/internal/unsafecast"
+import (
+ "unsafe"
+
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
+)
type allocator struct{ buffer []byte }
@@ -31,7 +35,7 @@ func (a *allocator) copyBytes(v []byte) []byte {
func (a *allocator) copyString(v string) string {
b := a.makeBytes(len(v))
copy(b, v)
- return unsafecast.BytesToString(b)
+ return unsafecast.String(b)
}
func (a *allocator) reset() {
@@ -54,7 +58,7 @@ func (a *rowAllocator) capture(row Row) {
for i, v := range row {
switch v.Kind() {
case ByteArray, FixedLenByteArray:
- row[i].ptr = unsafecast.AddressOfBytes(a.copyBytes(v.byteArray()))
+ row[i].ptr = unsafe.SliceData(a.copyBytes(v.byteArray()))
}
}
}
diff --git a/vendor/github.com/parquet-go/parquet-go/array.go b/vendor/github.com/parquet-go/parquet-go/array.go
index 48df9715495..774e6f85ff0 100644
--- a/vendor/github.com/parquet-go/parquet-go/array.go
+++ b/vendor/github.com/parquet-go/parquet-go/array.go
@@ -3,23 +3,22 @@ package parquet
import (
"unsafe"
- "github.com/parquet-go/parquet-go/internal/unsafecast"
"github.com/parquet-go/parquet-go/sparse"
)
func makeArrayValue(values []Value, offset uintptr) sparse.Array {
- ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
+ ptr := sliceData(values)
return sparse.UnsafeArray(unsafe.Add(ptr, offset), len(values), unsafe.Sizeof(Value{}))
}
func makeArrayString(values []string) sparse.Array {
str := ""
- ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
+ ptr := sliceData(values)
return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof(str))
}
func makeArrayBE128(values []*[16]byte) sparse.Array {
- ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
+ ptr := sliceData(values)
return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof((*[16]byte)(nil)))
}
@@ -29,7 +28,7 @@ func makeArray(base unsafe.Pointer, length int, offset uintptr) sparse.Array {
func makeArrayOf[T any](s []T) sparse.Array {
var model T
- return makeArray(unsafecast.PointerOf(s), len(s), unsafe.Sizeof(model))
+ return makeArray(sliceData(s), len(s), unsafe.Sizeof(model))
}
func makeSlice[T any](a sparse.Array) []T {
@@ -40,6 +39,10 @@ func slice[T any](p unsafe.Pointer, n int) []T {
return unsafe.Slice((*T)(p), n)
}
+func sliceData[T any](s []T) unsafe.Pointer {
+ return unsafe.Pointer(unsafe.SliceData(s))
+}
+
type sliceHeader struct {
base unsafe.Pointer
len int
diff --git a/vendor/github.com/parquet-go/parquet-go/bloom.go b/vendor/github.com/parquet-go/parquet-go/bloom.go
index 734ac00a9d8..69d54c71712 100644
--- a/vendor/github.com/parquet-go/parquet-go/bloom.go
+++ b/vendor/github.com/parquet-go/parquet-go/bloom.go
@@ -1,6 +1,7 @@
package parquet
import (
+ "encoding/binary"
"io"
"github.com/parquet-go/parquet-go/bloom"
@@ -9,6 +10,7 @@ import (
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/format"
"github.com/parquet-go/parquet-go/internal/unsafecast"
+ "golang.org/x/sys/cpu"
)
// BloomFilter is an interface allowing applications to test whether a key
@@ -162,27 +164,38 @@ func (splitBlockEncoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error)
}
func (splitBlockEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
- splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Int32ToUint32(src))
+ splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint32](src))
return dst, nil
}
func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
- splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Int64ToUint64(src))
+ splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint64](src))
return dst, nil
}
func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
- splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), deprecated.Int96ToBytes(src), 12)
+ if cpu.IsBigEndian {
+ srcLen := len(src)
+ buf := make([]byte, srcLen*12)
+ for idx := range srcLen {
+ binary.LittleEndian.PutUint32(buf[(idx*12):4+(idx*12)], uint32(src[idx][0]))
+ binary.LittleEndian.PutUint32(buf[4+(idx*12):8+(idx*12)], uint32(src[idx][1]))
+ binary.LittleEndian.PutUint32(buf[8+(idx*12):12+(idx*12)], uint32(src[idx][2]))
+ }
+ splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), buf, 12)
+ } else {
+ splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[byte](src), 12)
+ }
return dst, nil
}
func (splitBlockEncoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
- splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Float32ToUint32(src))
+ splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint32](src))
return dst, nil
}
func (splitBlockEncoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
- splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Float64ToUint64(src))
+ splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint64](src))
return dst, nil
}
@@ -210,7 +223,7 @@ func (splitBlockEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint
func (splitBlockEncoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
filter := bloom.MakeSplitBlockFilter(dst)
if size == 16 {
- splitBlockEncodeUint128(filter, unsafecast.BytesToUint128(src))
+ splitBlockEncodeUint128(filter, unsafecast.Slice[[16]byte](src))
} else {
splitBlockEncodeFixedLenByteArray(filter, src, size)
}
diff --git a/vendor/github.com/parquet-go/parquet-go/bloom/filter.go b/vendor/github.com/parquet-go/parquet-go/bloom/filter.go
index 655d815a460..11cc255a1c1 100644
--- a/vendor/github.com/parquet-go/parquet-go/bloom/filter.go
+++ b/vendor/github.com/parquet-go/parquet-go/bloom/filter.go
@@ -3,7 +3,8 @@ package bloom
import (
"io"
"sync"
- "unsafe"
+
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
)
// Filter is an interface representing read-only bloom filters where programs
@@ -21,9 +22,7 @@ type SplitBlockFilter []Block
// MakeSplitBlockFilter constructs a SplitBlockFilter value from the data byte
// slice.
func MakeSplitBlockFilter(data []byte) SplitBlockFilter {
- p := *(*unsafe.Pointer)(unsafe.Pointer(&data))
- n := len(data) / BlockSize
- return unsafe.Slice((*Block)(p), n)
+ return unsafecast.Slice[Block](data)
}
// NumSplitBlocksOf returns the number of blocks in a filter intended to hold
@@ -64,7 +63,7 @@ func (f SplitBlockFilter) Check(x uint64) bool { return filterCheck(f, x) }
// The returned slice shares the memory of f. The method is intended to be used
// to serialize the bloom filter to a storage medium.
func (f SplitBlockFilter) Bytes() []byte {
- return unsafe.Slice(*(**byte)(unsafe.Pointer(&f)), len(f)*BlockSize)
+ return unsafecast.Slice[byte](f)
}
// CheckSplitBlock is similar to bloom.SplitBlockFilter.Check but reads the
diff --git a/vendor/github.com/parquet-go/parquet-go/column.go b/vendor/github.com/parquet-go/parquet-go/column.go
index 0320a6df6c4..51f2d20a097 100644
--- a/vendor/github.com/parquet-go/parquet-go/column.go
+++ b/vendor/github.com/parquet-go/parquet-go/column.go
@@ -352,6 +352,8 @@ func (cl *columnLoader) open(file *File, path []string) (*Column, error) {
c.typ = &groupType{}
if lt := c.schema.LogicalType; lt != nil && lt.Map != nil {
c.typ = &mapType{}
+ } else if lt != nil && lt.List != nil {
+ c.typ = &listType{}
}
c.columns = make([]*Column, numChildren)
@@ -691,7 +693,7 @@ func (c *Column) decodeDataPage(header DataPageHeader, numValues int, repetition
if pageType.Kind() == ByteArray && !isDictionaryEncoding(pageEncoding) {
obuf = buffers.get(4 * (numValues + 1))
defer obuf.unref()
- pageOffsets = unsafecast.BytesToUint32(obuf.data)
+ pageOffsets = unsafecast.Slice[uint32](obuf.data)
}
values := pageType.NewValues(pageValues, pageOffsets)
diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer.go b/vendor/github.com/parquet-go/parquet-go/column_buffer.go
index bc40cc3e17f..8435cb2977c 100644
--- a/vendor/github.com/parquet-go/parquet-go/column_buffer.go
+++ b/vendor/github.com/parquet-go/parquet-go/column_buffer.go
@@ -16,8 +16,12 @@ import (
"github.com/parquet-go/parquet-go/internal/bitpack"
"github.com/parquet-go/parquet-go/internal/unsafecast"
"github.com/parquet-go/parquet-go/sparse"
+ "golang.org/x/sys/cpu"
)
+const offsetOfU64 = unsafe.Offsetof(Value{}.u64)
+const offsetOfPtr = unsafe.Offsetof(Value{}.ptr)
+
// ColumnBuffer is an interface representing columns of a row group.
//
// ColumnBuffer implements sort.Interface as a way to support reordering the
@@ -103,6 +107,29 @@ func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitio
}, nil
}
+// On a big endian system, a boolean/byte value, which is in little endian byte format, is byte aligned
+// to the 7th byte in a u64 (8 bytes) variable.. Hence the data will be available at 7th byte when
+// interpreted as a little endian byte format. So, in order to access a boolean/byte value out of u64 variable,
+// we need to add an offset of "7"...
+// In the same way, an int32/uint32/float value, which is in little endian byte format, is byte aligned
+// to the 4th byte in a u64 (8 bytes) variable.. Hence the data will be available at 4th byte when
+// interpreted as a little endian byte format. So, in order to access an int32/uint32/float value out of u64 variable,
+// we need to add an offset of "4"
+func getOffset(colDict interface{}) uintptr {
+ var offset uintptr = 0
+
+ if cpu.IsBigEndian {
+ switch colDict.(type) {
+ case booleanColumnBuffer, booleanDictionary:
+ offset = 7
+
+ case int32ColumnBuffer, uint32ColumnBuffer, floatColumnBuffer, int32Dictionary, floatDictionary, uint32Dictionary:
+ offset = 4
+ }
+ }
+ return offset
+}
+
type nullableColumnIndex struct {
ColumnIndex
maxDefinitionLevel byte
@@ -828,8 +855,8 @@ func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) {
}
func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ offset := getOffset(*col)
+ col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{})
return len(values), nil
}
@@ -958,7 +985,7 @@ func (col *int32ColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 4) != 0 {
return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
}
- col.values = append(col.values, unsafecast.BytesToInt32(b)...)
+ col.values = append(col.values, unsafecast.Slice[int32](b)...)
return len(b), nil
}
@@ -968,8 +995,8 @@ func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) {
}
func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ offset := getOffset(*col)
+ col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{})
return len(values), nil
}
@@ -1057,7 +1084,7 @@ func (col *int64ColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 8) != 0 {
return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
}
- col.values = append(col.values, unsafecast.BytesToInt64(b)...)
+ col.values = append(col.values, unsafecast.Slice[int64](b)...)
return len(b), nil
}
@@ -1067,8 +1094,7 @@ func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) {
}
func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{})
return len(values), nil
}
@@ -1155,7 +1181,7 @@ func (col *int96ColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 12) != 0 {
return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b))
}
- col.values = append(col.values, deprecated.BytesToInt96(b)...)
+ col.values = append(col.values, unsafecast.Slice[deprecated.Int96](b)...)
return len(b), nil
}
@@ -1252,7 +1278,7 @@ func (col *floatColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 4) != 0 {
return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b))
}
- col.values = append(col.values, unsafecast.BytesToFloat32(b)...)
+ col.values = append(col.values, unsafecast.Slice[float32](b)...)
return len(b), nil
}
@@ -1262,8 +1288,8 @@ func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) {
}
func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ offset := getOffset(*col)
+ col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{})
return len(values), nil
}
@@ -1350,7 +1376,7 @@ func (col *doubleColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 8) != 0 {
return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b))
}
- col.values = append(col.values, unsafecast.BytesToFloat64(b)...)
+ col.values = append(col.values, unsafecast.Slice[float64](b)...)
return len(b), nil
}
@@ -1360,8 +1386,7 @@ func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) {
}
func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{})
return len(values), nil
}
@@ -1505,7 +1530,7 @@ func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes i
baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths))
err = plain.RangeByteArray(values, func(value []byte) error {
- col.append(unsafecast.BytesToString(value))
+ col.append(unsafecast.String(value))
return nil
})
@@ -1515,8 +1540,7 @@ func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes i
}
func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{})
+ col.writeValues(makeArrayValue(values, offsetOfPtr), columnLevels{})
return len(values), nil
}
@@ -1742,7 +1766,7 @@ func (col *uint32ColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 4) != 0 {
return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
}
- col.values = append(col.values, unsafecast.BytesToUint32(b)...)
+ col.values = append(col.values, unsafecast.Slice[uint32](b)...)
return len(b), nil
}
@@ -1752,8 +1776,8 @@ func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) {
}
func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ offset := getOffset(*col)
+ col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{})
return len(values), nil
}
@@ -1840,7 +1864,7 @@ func (col *uint64ColumnBuffer) Write(b []byte) (int, error) {
if (len(b) % 8) != 0 {
return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
}
- col.values = append(col.values, unsafecast.BytesToUint64(b)...)
+ col.values = append(col.values, unsafecast.Slice[uint64](b)...)
return len(b), nil
}
@@ -1850,8 +1874,7 @@ func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) {
}
func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) {
- var model Value
- col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
+ col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{})
return len(values), nil
}
@@ -2356,8 +2379,8 @@ func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRo
mapKey.SetIterKey(it)
mapValue.SetIterValue(it)
- k := makeArray(unsafecast.PointerOfValue(mapKey), 1, keySize)
- v := makeArray(unsafecast.PointerOfValue(mapValue), 1, valueSize)
+ k := makeArray(reflectValueData(mapKey), 1, keySize)
+ v := makeArray(reflectValueData(mapValue), 1, valueSize)
if err := writeKeyValues(columns, k, v, elemLevels); err != nil {
return err
@@ -2440,7 +2463,7 @@ func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath) writeR
val = t.UnixNano()
}
- a := makeArray(unsafecast.PointerOfValue(reflect.ValueOf(val)), 1, elemSize)
+ a := makeArray(reflectValueData(reflect.ValueOf(val)), 1, elemSize)
if err := writeRows(columns, a, levels); err != nil {
return err
}
diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go b/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go
index 9f41875f2db..45717269965 100644
--- a/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go
+++ b/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go
@@ -10,7 +10,7 @@ import (
)
func broadcastValueInt32(dst []int32, src int8) {
- bytealg.Broadcast(unsafecast.Int32ToBytes(dst), byte(src))
+ bytealg.Broadcast(unsafecast.Slice[byte](dst), byte(src))
}
//go:noescape
diff --git a/vendor/github.com/parquet-go/parquet-go/column_index_be.go b/vendor/github.com/parquet-go/parquet-go/column_index_be.go
new file mode 100644
index 00000000000..f3ea2e7bdfb
--- /dev/null
+++ b/vendor/github.com/parquet-go/parquet-go/column_index_be.go
@@ -0,0 +1,854 @@
+// This file gets added on all the big-endian CPU architectures.
+
+//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
+
+package parquet
+
+import (
+ "encoding/binary"
+ "github.com/parquet-go/parquet-go/deprecated"
+ "github.com/parquet-go/parquet-go/encoding/plain"
+ "github.com/parquet-go/parquet-go/format"
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
+ "math"
+)
+
+type ColumnIndex interface {
+ // NumPages returns the number of paged in the column index.
+ NumPages() int
+
+ // Returns the number of null values in the page at the given index.
+ NullCount(int) int64
+
+ // Tells whether the page at the given index contains null values only.
+ NullPage(int) bool
+
+ // PageIndex return min/max bounds for the page at the given index in the
+ // column.
+ MinValue(int) Value
+ MaxValue(int) Value
+
+ // IsAscending returns true if the column index min/max values are sorted
+ // in ascending order (based on the ordering rules of the column's logical
+ // type).
+ IsAscending() bool
+
+ // IsDescending returns true if the column index min/max values are sorted
+ // in descending order (based on the ordering rules of the column's logical
+ // type).
+ IsDescending() bool
+}
+
+// NewColumnIndex constructs a ColumnIndex instance from the given parquet
+// format column index. The kind argument configures the type of values
+func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex {
+ return &formatColumnIndex{
+ kind: kind,
+ index: index,
+ }
+}
+
+type formatColumnIndex struct {
+ kind Kind
+ index *format.ColumnIndex
+}
+
+func (f *formatColumnIndex) NumPages() int {
+ return len(f.index.MinValues)
+}
+
+func (f *formatColumnIndex) NullCount(i int) int64 {
+ if len(f.index.NullCounts) > 0 {
+ return f.index.NullCounts[i]
+ }
+ return 0
+}
+
+func (f *formatColumnIndex) NullPage(i int) bool {
+ return len(f.index.NullPages) > 0 && f.index.NullPages[i]
+}
+
+func (f *formatColumnIndex) MinValue(i int) Value {
+ if f.NullPage(i) {
+ return Value{}
+ }
+ return f.kind.Value(f.index.MinValues[i])
+}
+
+func (f *formatColumnIndex) MaxValue(i int) Value {
+ if f.NullPage(i) {
+ return Value{}
+ }
+ return f.kind.Value(f.index.MaxValues[i])
+}
+
+func (f *formatColumnIndex) IsAscending() bool {
+ return f.index.BoundaryOrder == format.Ascending
+}
+
+func (f *formatColumnIndex) IsDescending() bool {
+ return f.index.BoundaryOrder == format.Descending
+}
+
+type fileColumnIndex struct{ chunk *fileColumnChunk }
+
+func (i fileColumnIndex) NumPages() int {
+ return len(i.columnIndex().NullPages)
+}
+
+func (i fileColumnIndex) NullCount(j int) int64 {
+ index := i.columnIndex()
+ if len(index.NullCounts) > 0 {
+ return index.NullCounts[j]
+ }
+ return 0
+}
+
+func (i fileColumnIndex) NullPage(j int) bool {
+ return isNullPage(j, i.columnIndex())
+}
+
+func (i fileColumnIndex) MinValue(j int) Value {
+ index := i.columnIndex()
+ if isNullPage(j, index) {
+ return Value{}
+ }
+ return i.makeValue(index.MinValues[j])
+}
+
+func (i fileColumnIndex) MaxValue(j int) Value {
+ index := i.columnIndex()
+ if isNullPage(j, index) {
+ return Value{}
+ }
+ return i.makeValue(index.MaxValues[j])
+}
+
+func (i fileColumnIndex) IsAscending() bool {
+ return i.columnIndex().BoundaryOrder == format.Ascending
+}
+
+func (i fileColumnIndex) IsDescending() bool {
+ return i.columnIndex().BoundaryOrder == format.Descending
+}
+
+func (i *fileColumnIndex) makeValue(b []byte) Value {
+ return i.chunk.column.typ.Kind().Value(b)
+}
+
+func (i fileColumnIndex) columnIndex() *format.ColumnIndex { return i.chunk.columnIndex.Load() }
+func isNullPage(j int, index *format.ColumnIndex) bool {
+ return len(index.NullPages) > 0 && index.NullPages[j]
+}
+
+type emptyColumnIndex struct{}
+
+func (emptyColumnIndex) NumPages() int { return 0 }
+func (emptyColumnIndex) NullCount(int) int64 { return 0 }
+func (emptyColumnIndex) NullPage(int) bool { return false }
+func (emptyColumnIndex) MinValue(int) Value { return Value{} }
+func (emptyColumnIndex) MaxValue(int) Value { return Value{} }
+func (emptyColumnIndex) IsAscending() bool { return false }
+func (emptyColumnIndex) IsDescending() bool { return false }
+
+type booleanColumnIndex struct{ page *booleanPage }
+
+func (i booleanColumnIndex) NumPages() int { return 1 }
+func (i booleanColumnIndex) NullCount(int) int64 { return 0 }
+func (i booleanColumnIndex) NullPage(int) bool { return false }
+func (i booleanColumnIndex) MinValue(int) Value { return makeValueBoolean(i.page.min()) }
+func (i booleanColumnIndex) MaxValue(int) Value { return makeValueBoolean(i.page.max()) }
+func (i booleanColumnIndex) IsAscending() bool { return false }
+func (i booleanColumnIndex) IsDescending() bool { return false }
+
+type int32ColumnIndex struct{ page *int32Page }
+
+func (i int32ColumnIndex) NumPages() int { return 1 }
+func (i int32ColumnIndex) NullCount(int) int64 { return 0 }
+func (i int32ColumnIndex) NullPage(int) bool { return false }
+func (i int32ColumnIndex) MinValue(int) Value { return makeValueInt32(i.page.min()) }
+func (i int32ColumnIndex) MaxValue(int) Value { return makeValueInt32(i.page.max()) }
+func (i int32ColumnIndex) IsAscending() bool { return false }
+func (i int32ColumnIndex) IsDescending() bool { return false }
+
+type int64ColumnIndex struct{ page *int64Page }
+
+func (i int64ColumnIndex) NumPages() int { return 1 }
+func (i int64ColumnIndex) NullCount(int) int64 { return 0 }
+func (i int64ColumnIndex) NullPage(int) bool { return false }
+func (i int64ColumnIndex) MinValue(int) Value { return makeValueInt64(i.page.min()) }
+func (i int64ColumnIndex) MaxValue(int) Value { return makeValueInt64(i.page.max()) }
+func (i int64ColumnIndex) IsAscending() bool { return false }
+func (i int64ColumnIndex) IsDescending() bool { return false }
+
+type int96ColumnIndex struct{ page *int96Page }
+
+func (i int96ColumnIndex) NumPages() int { return 1 }
+func (i int96ColumnIndex) NullCount(int) int64 { return 0 }
+func (i int96ColumnIndex) NullPage(int) bool { return false }
+func (i int96ColumnIndex) MinValue(int) Value { return makeValueInt96(i.page.min()) }
+func (i int96ColumnIndex) MaxValue(int) Value { return makeValueInt96(i.page.max()) }
+func (i int96ColumnIndex) IsAscending() bool { return false }
+func (i int96ColumnIndex) IsDescending() bool { return false }
+
+type floatColumnIndex struct{ page *floatPage }
+
+func (i floatColumnIndex) NumPages() int { return 1 }
+func (i floatColumnIndex) NullCount(int) int64 { return 0 }
+func (i floatColumnIndex) NullPage(int) bool { return false }
+func (i floatColumnIndex) MinValue(int) Value { return makeValueFloat(i.page.min()) }
+func (i floatColumnIndex) MaxValue(int) Value { return makeValueFloat(i.page.max()) }
+func (i floatColumnIndex) IsAscending() bool { return false }
+func (i floatColumnIndex) IsDescending() bool { return false }
+
+type doubleColumnIndex struct{ page *doublePage }
+
+func (i doubleColumnIndex) NumPages() int { return 1 }
+func (i doubleColumnIndex) NullCount(int) int64 { return 0 }
+func (i doubleColumnIndex) NullPage(int) bool { return false }
+func (i doubleColumnIndex) MinValue(int) Value { return makeValueDouble(i.page.min()) }
+func (i doubleColumnIndex) MaxValue(int) Value { return makeValueDouble(i.page.max()) }
+func (i doubleColumnIndex) IsAscending() bool { return false }
+func (i doubleColumnIndex) IsDescending() bool { return false }
+
+type byteArrayColumnIndex struct{ page *byteArrayPage }
+
+func (i byteArrayColumnIndex) NumPages() int { return 1 }
+func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 }
+func (i byteArrayColumnIndex) NullPage(int) bool { return false }
+func (i byteArrayColumnIndex) MinValue(int) Value { return makeValueBytes(ByteArray, i.page.min()) }
+func (i byteArrayColumnIndex) MaxValue(int) Value { return makeValueBytes(ByteArray, i.page.max()) }
+func (i byteArrayColumnIndex) IsAscending() bool { return false }
+func (i byteArrayColumnIndex) IsDescending() bool { return false }
+
+type fixedLenByteArrayColumnIndex struct{ page *fixedLenByteArrayPage }
+
+func (i fixedLenByteArrayColumnIndex) NumPages() int { return 1 }
+func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 }
+func (i fixedLenByteArrayColumnIndex) NullPage(int) bool { return false }
+func (i fixedLenByteArrayColumnIndex) MinValue(int) Value {
+ return makeValueBytes(FixedLenByteArray, i.page.min())
+}
+func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value {
+ return makeValueBytes(FixedLenByteArray, i.page.max())
+}
+func (i fixedLenByteArrayColumnIndex) IsAscending() bool { return false }
+func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return false }
+
+type uint32ColumnIndex struct{ page *uint32Page }
+
+func (i uint32ColumnIndex) NumPages() int { return 1 }
+func (i uint32ColumnIndex) NullCount(int) int64 { return 0 }
+func (i uint32ColumnIndex) NullPage(int) bool { return false }
+func (i uint32ColumnIndex) MinValue(int) Value { return makeValueUint32(i.page.min()) }
+func (i uint32ColumnIndex) MaxValue(int) Value { return makeValueUint32(i.page.max()) }
+func (i uint32ColumnIndex) IsAscending() bool { return false }
+func (i uint32ColumnIndex) IsDescending() bool { return false }
+
+type uint64ColumnIndex struct{ page *uint64Page }
+
+func (i uint64ColumnIndex) NumPages() int { return 1 }
+func (i uint64ColumnIndex) NullCount(int) int64 { return 0 }
+func (i uint64ColumnIndex) NullPage(int) bool { return false }
+func (i uint64ColumnIndex) MinValue(int) Value { return makeValueUint64(i.page.min()) }
+func (i uint64ColumnIndex) MaxValue(int) Value { return makeValueUint64(i.page.max()) }
+func (i uint64ColumnIndex) IsAscending() bool { return false }
+func (i uint64ColumnIndex) IsDescending() bool { return false }
+
+type be128ColumnIndex struct{ page *be128Page }
+
+func (i be128ColumnIndex) NumPages() int { return 1 }
+func (i be128ColumnIndex) NullCount(int) int64 { return 0 }
+func (i be128ColumnIndex) NullPage(int) bool { return false }
+func (i be128ColumnIndex) MinValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.min()) }
+func (i be128ColumnIndex) MaxValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.max()) }
+func (i be128ColumnIndex) IsAscending() bool { return false }
+func (i be128ColumnIndex) IsDescending() bool { return false }
+
+// The ColumnIndexer interface is implemented by types that support generating
+// parquet column indexes.
+//
+// The package does not export any types that implement this interface, programs
+// must call NewColumnIndexer on a Type instance to construct column indexers.
+type ColumnIndexer interface {
+ // Resets the column indexer state.
+ Reset()
+
+ // Add a page to the column indexer.
+ IndexPage(numValues, numNulls int64, min, max Value)
+
+ // Generates a format.ColumnIndex value from the current state of the
+ // column indexer.
+ //
+ // The returned value may reference internal buffers, in which case the
+ // values remain valid until the next call to IndexPage or Reset on the
+ // column indexer.
+ ColumnIndex() format.ColumnIndex
+}
+
+type baseColumnIndexer struct {
+ nullPages []bool
+ nullCounts []int64
+}
+
+func (i *baseColumnIndexer) reset() {
+ i.nullPages = i.nullPages[:0]
+ i.nullCounts = i.nullCounts[:0]
+}
+
+func (i *baseColumnIndexer) observe(numValues, numNulls int64) {
+ i.nullPages = append(i.nullPages, numValues == numNulls)
+ i.nullCounts = append(i.nullCounts, numNulls)
+}
+
+func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte, minOrder, maxOrder int) format.ColumnIndex {
+ nullPages := make([]bool, len(i.nullPages))
+ copy(nullPages, i.nullPages)
+ nullCounts := make([]int64, len(i.nullCounts))
+ copy(nullCounts, i.nullCounts)
+ return format.ColumnIndex{
+ NullPages: nullPages,
+ NullCounts: nullCounts,
+ MinValues: minValues,
+ MaxValues: maxValues,
+ BoundaryOrder: boundaryOrderOf(minOrder, maxOrder),
+ }
+}
+
+type booleanColumnIndexer struct {
+ baseColumnIndexer
+ minValues []bool
+ maxValues []bool
+}
+
+func newBooleanColumnIndexer() *booleanColumnIndexer {
+ return new(booleanColumnIndexer)
+}
+
+func (i *booleanColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.boolean())
+ i.maxValues = append(i.maxValues, max.boolean())
+}
+
+func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex {
+ return i.columnIndex(
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 1),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 1),
+ orderOfBool(i.minValues),
+ orderOfBool(i.maxValues),
+ )
+}
+
+type int32ColumnIndexer struct {
+ baseColumnIndexer
+ minValues []int32
+ maxValues []int32
+}
+
+func newInt32ColumnIndexer() *int32ColumnIndexer {
+ return new(int32ColumnIndexer)
+}
+
+func (i *int32ColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.int32())
+ i.maxValues = append(i.maxValues, max.int32())
+}
+
+func reverseInt32MinMaxValues(mLen int, mVal []int32) []byte {
+ buf := make([]byte, mLen*4)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint32(buf[idx:(4+idx)], uint32(mVal[k]))
+ idx += 4
+ }
+ return buf
+}
+
+func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseInt32MinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseInt32MinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 4),
+ splitFixedLenByteArrays(byteMax, 4),
+ orderOfInt32(i.minValues),
+ orderOfInt32(i.maxValues),
+ )
+}
+
+type int64ColumnIndexer struct {
+ baseColumnIndexer
+ minValues []int64
+ maxValues []int64
+}
+
+func newInt64ColumnIndexer() *int64ColumnIndexer {
+ return new(int64ColumnIndexer)
+}
+
+func (i *int64ColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.int64())
+ i.maxValues = append(i.maxValues, max.int64())
+}
+
+func reverseInt64MinMaxValues(mLen int, mVal []int64) []byte {
+ buf := make([]byte, mLen*8)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint64(buf[idx:(8+idx)], uint64(mVal[k]))
+ idx += 8
+ }
+ return buf
+}
+
+func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseInt64MinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseInt64MinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 8),
+ splitFixedLenByteArrays(byteMax, 8),
+ orderOfInt64(i.minValues),
+ orderOfInt64(i.maxValues),
+ )
+}
+
+type int96ColumnIndexer struct {
+ baseColumnIndexer
+ minValues []deprecated.Int96
+ maxValues []deprecated.Int96
+}
+
+func newInt96ColumnIndexer() *int96ColumnIndexer {
+ return new(int96ColumnIndexer)
+}
+
+func (i *int96ColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.Int96())
+ i.maxValues = append(i.maxValues, max.Int96())
+}
+
+func reverseInt96MinMaxValues(mLen int, mVal []deprecated.Int96) []byte {
+ buf := make([]byte, mLen*12)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint32(buf[idx:(4+idx)], uint32(mVal[k][0]))
+ binary.LittleEndian.PutUint32(buf[(4+idx):(8+idx)], uint32(mVal[k][1]))
+ binary.LittleEndian.PutUint32(buf[(8+idx):(12+idx)], uint32(mVal[k][2]))
+ idx += 12
+ }
+ return buf
+}
+
+func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseInt96MinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseInt96MinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 12),
+ splitFixedLenByteArrays(byteMax, 12),
+ deprecated.OrderOfInt96(i.minValues),
+ deprecated.OrderOfInt96(i.maxValues),
+ )
+}
+
+type floatColumnIndexer struct {
+ baseColumnIndexer
+ minValues []float32
+ maxValues []float32
+}
+
+func newFloatColumnIndexer() *floatColumnIndexer {
+ return new(floatColumnIndexer)
+}
+
+func (i *floatColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.float())
+ i.maxValues = append(i.maxValues, max.float())
+}
+
+func reverseFloatMinMaxValues(mLen int, mVal []float32) []byte {
+ buf := make([]byte, mLen*4)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint32(buf[idx:(4+idx)], math.Float32bits(mVal[k]))
+ idx += 4
+ }
+ return buf
+}
+
+func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseFloatMinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseFloatMinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 4),
+ splitFixedLenByteArrays(byteMax, 4),
+ orderOfFloat32(i.minValues),
+ orderOfFloat32(i.maxValues),
+ )
+}
+
+type doubleColumnIndexer struct {
+ baseColumnIndexer
+ minValues []float64
+ maxValues []float64
+}
+
+func newDoubleColumnIndexer() *doubleColumnIndexer {
+ return new(doubleColumnIndexer)
+}
+
+func (i *doubleColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.double())
+ i.maxValues = append(i.maxValues, max.double())
+}
+
+func reverseDoubleMinMaxValues(mLen int, mVal []float64) []byte {
+ buf := make([]byte, mLen*8)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint64(buf[idx:(8+idx)], math.Float64bits(mVal[k]))
+ idx += 8
+ }
+ return buf
+}
+
+func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseDoubleMinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseDoubleMinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 8),
+ splitFixedLenByteArrays(byteMax, 8),
+ orderOfFloat64(i.minValues),
+ orderOfFloat64(i.maxValues),
+ )
+}
+
+type byteArrayColumnIndexer struct {
+ baseColumnIndexer
+ sizeLimit int
+ minValues []byte
+ maxValues []byte
+}
+
+func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer {
+ return &byteArrayColumnIndexer{sizeLimit: sizeLimit}
+}
+
+func (i *byteArrayColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = plain.AppendByteArray(i.minValues, min.byteArray())
+ i.maxValues = plain.AppendByteArray(i.maxValues, max.byteArray())
+}
+
+func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
+ minValues := splitByteArrays(i.minValues)
+ maxValues := splitByteArrays(i.maxValues)
+ if sizeLimit := i.sizeLimit; sizeLimit > 0 {
+ for i, v := range minValues {
+ minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit)
+ }
+ for i, v := range maxValues {
+ maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit)
+ }
+ }
+ return i.columnIndex(
+ minValues,
+ maxValues,
+ orderOfBytes(minValues),
+ orderOfBytes(maxValues),
+ )
+}
+
+type fixedLenByteArrayColumnIndexer struct {
+ baseColumnIndexer
+ size int
+ sizeLimit int
+ minValues []byte
+ maxValues []byte
+}
+
+func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByteArrayColumnIndexer {
+ return &fixedLenByteArrayColumnIndexer{
+ size: size,
+ sizeLimit: sizeLimit,
+ }
+}
+
+func (i *fixedLenByteArrayColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.byteArray()...)
+ i.maxValues = append(i.maxValues, max.byteArray()...)
+}
+
+func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
+ minValues := splitFixedLenByteArrays(i.minValues, i.size)
+ maxValues := splitFixedLenByteArrays(i.maxValues, i.size)
+ if sizeLimit := i.sizeLimit; sizeLimit > 0 {
+ for i, v := range minValues {
+ minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit)
+ }
+ for i, v := range maxValues {
+ maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit)
+ }
+ }
+ return i.columnIndex(
+ minValues,
+ maxValues,
+ orderOfBytes(minValues),
+ orderOfBytes(maxValues),
+ )
+}
+
+type uint32ColumnIndexer struct {
+ baseColumnIndexer
+ minValues []uint32
+ maxValues []uint32
+}
+
+func newUint32ColumnIndexer() *uint32ColumnIndexer {
+ return new(uint32ColumnIndexer)
+}
+
+func (i *uint32ColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.uint32())
+ i.maxValues = append(i.maxValues, max.uint32())
+}
+
+func reverseUint32MinMaxValues(mLen int, mVal []uint32) []byte {
+ buf := make([]byte, mLen*4)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint32(buf[idx:(4+idx)], mVal[k])
+ idx += 4
+ }
+ return buf
+}
+
+func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseUint32MinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseUint32MinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 4),
+ splitFixedLenByteArrays(byteMax, 4),
+ orderOfUint32(i.minValues),
+ orderOfUint32(i.maxValues),
+ )
+}
+
+type uint64ColumnIndexer struct {
+ baseColumnIndexer
+ minValues []uint64
+ maxValues []uint64
+}
+
+func newUint64ColumnIndexer() *uint64ColumnIndexer {
+ return new(uint64ColumnIndexer)
+}
+
+func (i *uint64ColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ i.minValues = append(i.minValues, min.uint64())
+ i.maxValues = append(i.maxValues, max.uint64())
+}
+
+func reverseUint64MinMaxValues(mLen int, mVal []uint64) []byte {
+ buf := make([]byte, mLen*8)
+ idx := 0
+ for k := range mLen {
+ binary.LittleEndian.PutUint64(buf[idx:(8+idx)], mVal[k])
+ idx += 8
+ }
+ return buf
+}
+
+func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex {
+ byteMin := reverseUint64MinMaxValues(len(i.minValues), i.minValues)
+ byteMax := reverseUint64MinMaxValues(len(i.maxValues), i.maxValues)
+
+ return i.columnIndex(
+ splitFixedLenByteArrays(byteMin, 8),
+ splitFixedLenByteArrays(byteMax, 8),
+ orderOfUint64(i.minValues),
+ orderOfUint64(i.maxValues),
+ )
+}
+
+type be128ColumnIndexer struct {
+ baseColumnIndexer
+ minValues [][16]byte
+ maxValues [][16]byte
+}
+
+func newBE128ColumnIndexer() *be128ColumnIndexer {
+ return new(be128ColumnIndexer)
+}
+
+func (i *be128ColumnIndexer) Reset() {
+ i.reset()
+ i.minValues = i.minValues[:0]
+ i.maxValues = i.maxValues[:0]
+}
+
+func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
+ i.observe(numValues, numNulls)
+ if !min.IsNull() {
+ i.minValues = append(i.minValues, *(*[16]byte)(min.byteArray()))
+ }
+ if !max.IsNull() {
+ i.maxValues = append(i.maxValues, *(*[16]byte)(max.byteArray()))
+ }
+}
+
+func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex {
+ minValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 16)
+ maxValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 16)
+ return i.columnIndex(
+ minValues,
+ maxValues,
+ orderOfBytes(minValues),
+ orderOfBytes(maxValues),
+ )
+}
+
+func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte {
+ if len(value) > sizeLimit {
+ value = value[:sizeLimit]
+ }
+ return value
+}
+
+// truncateLargeMaxByteArrayValue truncates the given byte array to the given size limit.
+// If the given byte array is truncated, it is incremented by 1 in place.
+func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte {
+ if len(value) > sizeLimit {
+ value = value[:sizeLimit]
+ incrementByteArrayInplace(value)
+ }
+ return value
+}
+
+// incrementByteArray increments the given byte array by 1.
+// Reference: https://github.com/apache/parquet-java/blob/master/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java#L124
+func incrementByteArrayInplace(value []byte) {
+ for i := len(value) - 1; i >= 0; i-- {
+ value[i]++
+ if value[i] != 0 { // Did not overflow: 0xFF -> 0x00
+ return
+ }
+ }
+ // Fully overflowed, so restore all to 0xFF
+ for i := range value {
+ value[i] = 0xFF
+ }
+}
+
+func splitByteArrays(data []byte) [][]byte {
+ length := 0
+ plain.RangeByteArray(data, func([]byte) error {
+ length++
+ return nil
+ })
+ buffer := make([]byte, 0, len(data)-(4*length))
+ values := make([][]byte, 0, length)
+ plain.RangeByteArray(data, func(value []byte) error {
+ offset := len(buffer)
+ buffer = append(buffer, value...)
+ values = append(values, buffer[offset:])
+ return nil
+ })
+ return values
+}
+
+func splitFixedLenByteArrays(data []byte, size int) [][]byte {
+ data = copyBytes(data)
+ values := make([][]byte, len(data)/size)
+ for i := range values {
+ j := (i + 0) * size
+ k := (i + 1) * size
+ values[i] = data[j:k:k]
+ }
+ return values
+}
+
+func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder {
+ if minOrder == maxOrder {
+ switch {
+ case minOrder > 0:
+ return format.Ascending
+ case minOrder < 0:
+ return format.Descending
+ }
+ }
+ return format.Unordered
+}
diff --git a/vendor/github.com/parquet-go/parquet-go/column_index.go b/vendor/github.com/parquet-go/parquet-go/column_index_le.go
similarity index 90%
rename from vendor/github.com/parquet-go/parquet-go/column_index.go
rename to vendor/github.com/parquet-go/parquet-go/column_index_le.go
index 1252248b6ce..4d8fec4511a 100644
--- a/vendor/github.com/parquet-go/parquet-go/column_index.go
+++ b/vendor/github.com/parquet-go/parquet-go/column_index_le.go
@@ -1,3 +1,7 @@
+// This file gets added on all the little-endian CPU architectures.
+
+//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
+
package parquet
import (
@@ -87,46 +91,55 @@ func (f *formatColumnIndex) IsDescending() bool {
type fileColumnIndex struct{ chunk *fileColumnChunk }
func (i fileColumnIndex) NumPages() int {
- return len(i.chunk.columnIndex.NullPages)
+ return len(i.columnIndex().NullPages)
}
func (i fileColumnIndex) NullCount(j int) int64 {
- if len(i.chunk.columnIndex.NullCounts) > 0 {
- return i.chunk.columnIndex.NullCounts[j]
+ index := i.columnIndex()
+ if len(index.NullCounts) > 0 {
+ return index.NullCounts[j]
}
return 0
}
func (i fileColumnIndex) NullPage(j int) bool {
- return len(i.chunk.columnIndex.NullPages) > 0 && i.chunk.columnIndex.NullPages[j]
+ return isNullPage(j, i.columnIndex())
}
func (i fileColumnIndex) MinValue(j int) Value {
- if i.NullPage(j) {
+ index := i.columnIndex()
+ if isNullPage(j, index) {
return Value{}
}
- return i.makeValue(i.chunk.columnIndex.MinValues[j])
+ return i.makeValue(index.MinValues[j])
}
func (i fileColumnIndex) MaxValue(j int) Value {
- if i.NullPage(j) {
+ index := i.columnIndex()
+ if isNullPage(j, index) {
return Value{}
}
- return i.makeValue(i.chunk.columnIndex.MaxValues[j])
+ return i.makeValue(index.MaxValues[j])
}
func (i fileColumnIndex) IsAscending() bool {
- return i.chunk.columnIndex.BoundaryOrder == format.Ascending
+ return i.columnIndex().BoundaryOrder == format.Ascending
}
func (i fileColumnIndex) IsDescending() bool {
- return i.chunk.columnIndex.BoundaryOrder == format.Descending
+ return i.columnIndex().BoundaryOrder == format.Descending
}
func (i *fileColumnIndex) makeValue(b []byte) Value {
return i.chunk.column.typ.Kind().Value(b)
}
+func (i fileColumnIndex) columnIndex() *format.ColumnIndex { return i.chunk.columnIndex.Load() }
+
+func isNullPage(j int, index *format.ColumnIndex) bool {
+ return len(index.NullPages) > 0 && index.NullPages[j]
+}
+
type emptyColumnIndex struct{}
func (emptyColumnIndex) NumPages() int { return 0 }
@@ -325,8 +338,8 @@ func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Val
func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.BoolToBytes(i.minValues), 1),
- splitFixedLenByteArrays(unsafecast.BoolToBytes(i.maxValues), 1),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 1),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 1),
orderOfBool(i.minValues),
orderOfBool(i.maxValues),
)
@@ -356,8 +369,8 @@ func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value
func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.minValues), 4),
- splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.maxValues), 4),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4),
orderOfInt32(i.minValues),
orderOfInt32(i.maxValues),
)
@@ -387,8 +400,8 @@ func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value
func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.minValues), 8),
- splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.maxValues), 8),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8),
orderOfInt64(i.minValues),
orderOfInt64(i.maxValues),
)
@@ -418,8 +431,8 @@ func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value
func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(deprecated.Int96ToBytes(i.minValues), 12),
- splitFixedLenByteArrays(deprecated.Int96ToBytes(i.maxValues), 12),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 12),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 12),
deprecated.OrderOfInt96(i.minValues),
deprecated.OrderOfInt96(i.maxValues),
)
@@ -449,8 +462,8 @@ func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value
func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.minValues), 4),
- splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.maxValues), 4),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4),
orderOfFloat32(i.minValues),
orderOfFloat32(i.maxValues),
)
@@ -480,8 +493,8 @@ func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu
func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.minValues), 8),
- splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.maxValues), 8),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8),
orderOfFloat64(i.minValues),
orderOfFloat64(i.maxValues),
)
@@ -599,8 +612,8 @@ func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu
func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.minValues), 4),
- splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.maxValues), 4),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4),
orderOfUint32(i.minValues),
orderOfUint32(i.maxValues),
)
@@ -630,8 +643,8 @@ func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu
func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex {
return i.columnIndex(
- splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.minValues), 8),
- splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.maxValues), 8),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8),
+ splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8),
orderOfUint64(i.minValues),
orderOfUint64(i.maxValues),
)
@@ -664,8 +677,8 @@ func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value
}
func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex {
- minValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.minValues), 16)
- maxValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.maxValues), 16)
+ minValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 16)
+ maxValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 16)
return i.columnIndex(
minValues,
maxValues,
diff --git a/vendor/github.com/parquet-go/parquet-go/convert.go b/vendor/github.com/parquet-go/parquet-go/convert.go
index 8b315739c02..96a49a81f63 100644
--- a/vendor/github.com/parquet-go/parquet-go/convert.go
+++ b/vendor/github.com/parquet-go/parquet-go/convert.go
@@ -11,9 +11,12 @@ import (
"sync"
"time"
+ "golang.org/x/sys/cpu"
+
"github.com/parquet-go/parquet-go/deprecated"
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/format"
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
)
// ConvertError is an error type returned by calls to Convert when the conversion
@@ -912,7 +915,15 @@ func convertStringToInt96(v Value) (Value, error) {
b := i.Bytes()
c := make([]byte, 12)
copy(c, b)
- i96 := deprecated.BytesToInt96(c)
+ if cpu.IsBigEndian {
+ bufLen := len(c)
+ for idx := 0; idx < bufLen; idx = idx + 4 {
+ for m, n := (idx + 0), (idx + 3); m < n; m, n = m+1, n-1 {
+ c[m], c[n] = c[n], c[m]
+ }
+ }
+ }
+ i96 := unsafecast.Slice[deprecated.Int96](c)
return v.convertToInt96(i96[0]), nil
}
diff --git a/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go b/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go
index 1bed7a5d7b5..fc6d40648e8 100644
--- a/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go
+++ b/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go
@@ -3,7 +3,6 @@ package deprecated
import (
"math/big"
"math/bits"
- "unsafe"
)
// Int96 is an implementation of the deprecated INT96 parquet type.
@@ -98,21 +97,6 @@ func (i Int96) Len() int {
}
}
-// Int96ToBytes converts the slice of Int96 values to a slice of bytes sharing
-// the same backing array.
-func Int96ToBytes(data []Int96) []byte {
- return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 12*len(data))
-}
-
-// BytesToInt96 converts the byte slice passed as argument to a slice of Int96
-// sharing the same backing array.
-//
-// When the number of bytes in the input is not a multiple of 12, the function
-// truncates it in the returned slice.
-func BytesToInt96(data []byte) []Int96 {
- return unsafe.Slice(*(**Int96)(unsafe.Pointer(&data)), len(data)/12)
-}
-
func MaxLenInt96(data []Int96) int {
max := 0
for i := range data {
diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary.go b/vendor/github.com/parquet-go/parquet-go/dictionary.go
index 3bf2d97f0da..9dce0ff6514 100644
--- a/vendor/github.com/parquet-go/parquet-go/dictionary.go
+++ b/vendor/github.com/parquet-go/parquet-go/dictionary.go
@@ -29,7 +29,7 @@ const (
//
// This constant is used to determine a useful chunk size depending on the
// size of values being inserted in dictionaries. More values of small size
- // can fit in CPU caches, so the inserts can operation on larger chunks.
+ // can fit in CPU caches, so the inserts can operate on larger chunks.
insertsTargetCacheFootprint = 8192
)
@@ -140,8 +140,8 @@ func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i)
func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) }
func (d *booleanDictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.insert(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) {
@@ -171,7 +171,7 @@ func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) {
func (d *booleanDictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(false)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) {
@@ -238,8 +238,8 @@ func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i))
func (d *int32Dictionary) index(i int32) int32 { return d.values[i] }
func (d *int32Dictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.insert(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *int32Dictionary) init(indexes []int32) {
@@ -291,7 +291,8 @@ func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) {
func (d *int32Dictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(0)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) {
@@ -338,8 +339,7 @@ func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i))
func (d *int64Dictionary) index(i int32) int64 { return d.values[i] }
func (d *int64Dictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.insert(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *int64Dictionary) init(indexes []int32) {
@@ -378,7 +378,7 @@ func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) {
func (d *int64Dictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(0)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) {
@@ -520,8 +520,8 @@ func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i))
func (d *floatDictionary) index(i int32) float32 { return d.values[i] }
func (d *floatDictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.insert(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *floatDictionary) init(indexes []int32) {
@@ -560,7 +560,8 @@ func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) {
func (d *floatDictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(0)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) {
@@ -607,8 +608,7 @@ func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d.index(i))
func (d *doubleDictionary) index(i int32) float64 { return d.values[i] }
func (d *doubleDictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.insert(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *doubleDictionary) init(indexes []int32) {
@@ -647,7 +647,7 @@ func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) {
func (d *doubleDictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(0)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) {
@@ -706,8 +706,7 @@ func (d *byteArrayDictionary) Len() int { return d.len() }
func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValueBytes(d.index(int(i))) }
func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
+ d.insert(indexes, makeArrayValue(values, offsetOfPtr))
}
func (d *byteArrayDictionary) init() {
@@ -745,13 +744,13 @@ func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Array) {
func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValueString("")
memsetValues(values, model)
- d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
+ d.lookupString(indexes, makeArrayValue(values, offsetOfPtr))
}
func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) {
if len(indexes) > 0 {
base := d.index(int(indexes[0]))
- minValue := unsafecast.BytesToString(base)
+ minValue := unsafecast.String(base)
maxValue := minValue
values := [64]string{}
@@ -864,13 +863,13 @@ func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, v
func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValueString("")
memsetValues(values, model)
- d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
+ d.lookupString(indexes, makeArrayValue(values, offsetOfPtr))
}
func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, max Value) {
if len(indexes) > 0 {
base := d.index(indexes[0])
- minValue := unsafecast.BytesToString(base)
+ minValue := unsafecast.String(base)
maxValue := minValue
values := [64]string{}
@@ -931,8 +930,8 @@ func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i))
func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] }
func (d *uint32Dictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.insert(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *uint32Dictionary) init(indexes []int32) {
@@ -971,7 +970,8 @@ func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) {
func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(0)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ offset := getOffset(*d)
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset))
}
func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) {
@@ -1018,8 +1018,7 @@ func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i))
func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] }
func (d *uint64Dictionary) Insert(indexes []int32, values []Value) {
- model := Value{}
- d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.insert(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *uint64Dictionary) init(indexes []int32) {
@@ -1058,7 +1057,7 @@ func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) {
func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValue(0)
memsetValues(values, model)
- d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
+ d.lookup(indexes, makeArrayValue(values, offsetOfU64))
}
func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) {
@@ -1176,7 +1175,7 @@ func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) {
func (d *be128Dictionary) Lookup(indexes []int32, values []Value) {
model := d.makeValueString("")
memsetValues(values, model)
- d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
+ d.lookupString(indexes, makeArrayValue(values, offsetOfPtr))
}
func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) {
@@ -1242,7 +1241,7 @@ func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32, data e
copy(tmp, values)
values = tmp
} else {
- clear := values[len(values) : len(values)+size]
+ clear := values[len(values):size]
for i := range clear {
clear[i] = 0
}
diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go b/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go
index be7154ddbde..387e65f7592 100644
--- a/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go
+++ b/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go
@@ -47,25 +47,25 @@ func dictionaryLookupFixedLenByteArrayPointer(dict []byte, len int, indexes []in
func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
- dict := unsafecast.Int32ToUint32(d.values)
+ dict := unsafecast.Slice[uint32](d.values)
dictionaryLookup32(dict, indexes, rows).check()
}
func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
- dict := unsafecast.Int64ToUint64(d.values)
+ dict := unsafecast.Slice[uint64](d.values)
dictionaryLookup64(dict, indexes, rows).check()
}
func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
- dict := unsafecast.Float32ToUint32(d.values)
+ dict := unsafecast.Slice[uint32](d.values)
dictionaryLookup32(dict, indexes, rows).check()
}
func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
- dict := unsafecast.Float64ToUint64(d.values)
+ dict := unsafecast.Slice[uint64](d.values)
dictionaryLookup64(dict, indexes, rows).check()
}
@@ -83,8 +83,7 @@ func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
//
//dictionaryLookupByteArrayString(d.offsets, d.values, indexes, rows).check()
for i, j := range indexes {
- v := d.index(int(j))
- *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
+ *(*string)(rows.Index(i)) = unsafecast.String(d.index(int(j)))
}
}
@@ -92,8 +91,7 @@ func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse.
checkLookupIndexBounds(indexes, rows)
//dictionaryLookupFixedLenByteArrayString(d.data, d.size, indexes, rows).check()
for i, j := range indexes {
- v := d.index(j)
- *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
+ *(*string)(rows.Index(i)) = unsafecast.String(d.index(j))
}
}
@@ -109,7 +107,7 @@ func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) {
func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
- //dict := unsafecast.Uint128ToBytes(d.values)
+ //dict := unsafecast.Slice[byte](d.values)
//dictionaryLookupFixedLenByteArrayString(dict, 16, indexes, rows).check()
s := "0123456789ABCDEF"
for i, j := range indexes {
@@ -120,7 +118,7 @@ func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) {
func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
- //dict := unsafecast.Uint128ToBytes(d.values)
+ //dict := unsafecast.Slice[byte](d.values)
//dictionaryLookupFixedLenByteArrayPointer(dict, 16, indexes, rows).check()
for i, j := range indexes {
*(**[16]byte)(rows.Index(i)) = d.index(j)
diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go b/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go
index a586897fdab..4893415250f 100644
--- a/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go
+++ b/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go
@@ -5,6 +5,7 @@ package parquet
import (
"unsafe"
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
"github.com/parquet-go/parquet-go/sparse"
)
@@ -39,16 +40,14 @@ func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) {
func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
for i, j := range indexes {
- v := d.index(int(j))
- *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
+ *(*string)(rows.Index(i)) = unsafecast.String(d.index(int(j)))
}
}
func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
checkLookupIndexBounds(indexes, rows)
for i, j := range indexes {
- v := d.index(j)
- *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
+ *(*string)(rows.Index(i)) = unsafecast.String(d.index(j))
}
}
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go
index f3a66441aa2..23b0202d7b0 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go
@@ -22,13 +22,13 @@ func (e *Encoding) Encoding() format.Encoding {
func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
dst = resize(dst, 4*len(src))
- encodeFloat(dst, unsafecast.Float32ToBytes(src))
+ encodeFloat(dst, unsafecast.Slice[byte](src))
return dst, nil
}
func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
dst = resize(dst, 8*len(src))
- encodeDouble(dst, unsafecast.Float64ToBytes(src))
+ encodeDouble(dst, unsafecast.Slice[byte](src))
return dst, nil
}
@@ -36,18 +36,18 @@ func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) {
if (len(src) % 4) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src))
}
- buf := resize(unsafecast.Float32ToBytes(dst), len(src))
+ buf := resize(unsafecast.Slice[byte](dst), len(src))
decodeFloat(buf, src)
- return unsafecast.BytesToFloat32(buf), nil
+ return unsafecast.Slice[float32](buf), nil
}
func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) {
if (len(src) % 8) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src))
}
- buf := resize(unsafecast.Float64ToBytes(dst), len(src))
+ buf := resize(unsafecast.Slice[byte](dst), len(src))
decodeDouble(buf, src)
- return unsafecast.BytesToFloat64(buf), nil
+ return unsafecast.Slice[float64](buf), nil
}
func resize(buf []byte, size int) []byte {
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go
index 556fb8cce37..6f5bf15c795 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go
@@ -11,7 +11,7 @@ func encodeFloat(dst, src []byte) {
b2 := dst[2*n : 3*n]
b3 := dst[3*n : 4*n]
- for i, v := range unsafecast.BytesToUint32(src) {
+ for i, v := range unsafecast.Slice[uint32](src) {
b0[i] = byte(v >> 0)
b1[i] = byte(v >> 8)
b2[i] = byte(v >> 16)
@@ -30,7 +30,7 @@ func encodeDouble(dst, src []byte) {
b6 := dst[6*n : 7*n]
b7 := dst[7*n : 8*n]
- for i, v := range unsafecast.BytesToUint64(src) {
+ for i, v := range unsafecast.Slice[uint64](src) {
b0[i] = byte(v >> 0)
b1[i] = byte(v >> 8)
b2[i] = byte(v >> 16)
@@ -49,7 +49,7 @@ func decodeFloat(dst, src []byte) {
b2 := src[2*n : 3*n]
b3 := src[3*n : 4*n]
- dst32 := unsafecast.BytesToUint32(dst)
+ dst32 := unsafecast.Slice[uint32](dst)
for i := range dst32 {
dst32[i] = uint32(b0[i]) |
uint32(b1[i])<<8 |
@@ -69,7 +69,7 @@ func decodeDouble(dst, src []byte) {
b6 := src[6*n : 7*n]
b7 := src[7*n : 8*n]
- dst64 := unsafecast.BytesToUint64(dst)
+ dst64 := unsafecast.Slice[uint64](dst)
for i := range dst64 {
dst64[i] = uint64(b0[i]) |
uint64(b1[i])<<8 |
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go
index 36704974fad..cf9d4cfc9ec 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go
@@ -34,15 +34,15 @@ func (e *BinaryPackedEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, err
}
func (e *BinaryPackedEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
- buf := unsafecast.Int32ToBytes(dst)
+ buf := unsafecast.Slice[byte](dst)
buf, _, err := decodeInt32(buf[:0], src)
- return unsafecast.BytesToInt32(buf), e.wrap(err)
+ return unsafecast.Slice[int32](buf), e.wrap(err)
}
func (e *BinaryPackedEncoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) {
- buf := unsafecast.Int64ToBytes(dst)
+ buf := unsafecast.Slice[byte](dst)
buf, _, err := decodeInt64(buf[:0], src)
- return unsafecast.BytesToInt64(buf), e.wrap(err)
+ return unsafecast.Slice[int64](buf), e.wrap(err)
}
func (e *BinaryPackedEncoding) wrap(err error) error {
@@ -290,7 +290,7 @@ func decodeInt32(dst, src []byte) ([]byte, []byte, error) {
writeOffset := len(dst)
dst = resize(dst, len(dst)+4*totalValues)
- out := unsafecast.BytesToInt32(dst)
+ out := unsafecast.Slice[int32](dst)
out[writeOffset] = int32(firstValue)
writeOffset++
totalValues--
@@ -354,7 +354,7 @@ func decodeInt64(dst, src []byte) ([]byte, []byte, error) {
writeOffset := len(dst)
dst = resize(dst, len(dst)+8*totalValues)
- out := unsafecast.BytesToInt64(dst)
+ out := unsafecast.Slice[int64](dst)
out[writeOffset] = firstValue
writeOffset++
totalValues--
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go
index 5da4c0e933d..11a5a538b1f 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go
@@ -230,7 +230,7 @@ func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) {
case hasAVX2 && bitWidth <= 31:
decodeMiniBlockInt32x27to31bitsAVX2(dst, src, bitWidth)
case bitWidth == 32:
- copy(dst, unsafecast.Uint32ToInt32(src))
+ copy(dst, unsafecast.Slice[int32](src))
default:
decodeMiniBlockInt32Default(dst, src, bitWidth)
}
@@ -249,7 +249,7 @@ func decodeMiniBlockInt64Default(dst []int64, src []uint32, bitWidth uint)
func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) {
switch {
case bitWidth == 64:
- copy(dst, unsafecast.Uint32ToInt64(src))
+ copy(dst, unsafecast.Slice[int64](src))
default:
decodeMiniBlockInt64Default(dst, src, bitWidth)
}
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go
index 7b330eac89b..5fe27797754 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go
@@ -20,8 +20,8 @@ func (buf *int32Buffer) resize(size int) {
}
func (buf *int32Buffer) decode(src []byte) ([]byte, error) {
- values, remain, err := decodeInt32(unsafecast.Int32ToBytes(buf.values[:0]), src)
- buf.values = unsafecast.BytesToInt32(values)
+ values, remain, err := decodeInt32(unsafecast.Slice[byte](buf.values[:0]), src)
+ buf.values = unsafecast.Slice[int32](values)
return remain, err
}
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go
index a107e61d028..8a58af5c39d 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go
@@ -6,6 +6,7 @@ package plain
import (
"encoding/binary"
"fmt"
+ "golang.org/x/sys/cpu"
"io"
"math"
@@ -37,23 +38,67 @@ func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
}
func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
- return append(dst[:0], unsafecast.Int32ToBytes(src)...), nil
+ if cpu.IsBigEndian {
+ srcLen := len(src)
+ byteEnc := make([]byte, (srcLen * 4))
+ idx := 0
+ for k := range srcLen {
+ binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k]))
+ idx += 4
+ }
+ return append(dst[:0], (byteEnc)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[byte](src)...), nil
+ }
}
func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
- return append(dst[:0], unsafecast.Int64ToBytes(src)...), nil
+ if cpu.IsBigEndian {
+ srcLen := len(src)
+ byteEnc := make([]byte, (srcLen * 8))
+ idx := 0
+ for k := range srcLen {
+ binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k]))
+ idx += 8
+ }
+ return append(dst[:0], (byteEnc)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[byte](src)...), nil
+ }
}
func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
- return append(dst[:0], deprecated.Int96ToBytes(src)...), nil
+ return append(dst[:0], unsafecast.Slice[byte](src)...), nil
}
func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
- return append(dst[:0], unsafecast.Float32ToBytes(src)...), nil
+ if cpu.IsBigEndian {
+ srcLen := len(src)
+ byteEnc := make([]byte, (srcLen * 4))
+ idx := 0
+ for k := range srcLen {
+ binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k]))
+ idx += 4
+ }
+ return append(dst[:0], (byteEnc)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[byte](src)...), nil
+ }
}
func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
- return append(dst[:0], unsafecast.Float64ToBytes(src)...), nil
+ if cpu.IsBigEndian {
+ srcLen := len(src)
+ byteEnc := make([]byte, (srcLen * 8))
+ idx := 0
+ for k := range srcLen {
+ binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k]))
+ idx += 8
+ }
+ return append(dst[:0], (byteEnc)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[byte](src)...), nil
+ }
}
func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
@@ -86,35 +131,84 @@ func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
if (len(src) % 4) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src))
}
- return append(dst[:0], unsafecast.BytesToInt32(src)...), nil
+
+ if cpu.IsBigEndian {
+ srcLen := (len(src) / 4)
+ byteDec := make([]int32, srcLen)
+ idx := 0
+ for k := range srcLen {
+ byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))
+ idx += 4
+ }
+ return append(dst[:0], (byteDec)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[int32](src)...), nil
+ }
}
func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) {
if (len(src) % 8) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src))
}
- return append(dst[:0], unsafecast.BytesToInt64(src)...), nil
+
+ if cpu.IsBigEndian {
+ srcLen := (len(src) / 8)
+ byteDec := make([]int64, srcLen)
+ idx := 0
+ for k := range srcLen {
+ byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))
+ idx += 8
+ }
+
+ return append(dst[:0], (byteDec)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[int64](src)...), nil
+ }
}
func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) {
if (len(src) % 12) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src))
}
- return append(dst[:0], deprecated.BytesToInt96(src)...), nil
+ return append(dst[:0], unsafecast.Slice[deprecated.Int96](src)...), nil
}
func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) {
if (len(src) % 4) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src))
}
- return append(dst[:0], unsafecast.BytesToFloat32(src)...), nil
+ if cpu.IsBigEndian {
+ srcLen := (len(src) / 4)
+ byteDec := make([]float32, srcLen)
+ idx := 0
+ for k := range srcLen {
+ byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])))
+ idx += 4
+ }
+
+ return append(dst[:0], (byteDec)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[float32](src)...), nil
+ }
}
func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) {
if (len(src) % 8) != 0 {
return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src))
}
- return append(dst[:0], unsafecast.BytesToFloat64(src)...), nil
+ if cpu.IsBigEndian {
+ srcLen := (len(src) / 8)
+ byteDec := make([]float64, srcLen)
+ idx := 0
+ for k := range srcLen {
+ byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])))
+ idx += 8
+ }
+
+ return append(dst[:0], (byteDec)...), nil
+ } else {
+ return append(dst[:0], unsafecast.Slice[float64](src)...), nil
+ }
}
func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) {
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go b/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go
index 763172de5b3..8304afc0188 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go
@@ -31,9 +31,9 @@ func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, erro
if len(src) == 0 {
return dst[:0], nil
}
- buf := unsafecast.Int32ToBytes(dst)
+ buf := unsafecast.Slice[byte](dst)
buf, err := decodeInt32(buf[:0], src[1:], uint(src[0]))
- return unsafecast.BytesToInt32(buf), e.wrap(err)
+ return unsafecast.Slice[int32](buf), e.wrap(err)
}
func (e *DictionaryEncoding) wrap(err error) error {
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go b/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go
index 4b63ed6d42b..82ddaa1b512 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go
@@ -11,6 +11,8 @@ import (
"io"
"unsafe"
+ "golang.org/x/sys/cpu"
+
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/format"
"github.com/parquet-go/parquet-go/internal/bitpack"
@@ -83,9 +85,9 @@ func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) {
}
func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
- buf := unsafecast.Int32ToBytes(dst)
+ buf := unsafecast.Slice[byte](dst)
buf, err := decodeInt32(buf[:0], src, uint(e.BitWidth))
- return unsafecast.BytesToInt32(buf), e.wrap(err)
+ return unsafecast.Slice[int32](buf), e.wrap(err)
}
func (e *Encoding) wrap(err error) error {
@@ -151,7 +153,17 @@ func encodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) {
}
if len(src) >= 8 {
- words := unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8)
+ words := unsafecast.Slice[uint64](src)
+ if cpu.IsBigEndian {
+ srcLen := (len(src) / 8)
+ idx := 0
+ for k := range srcLen {
+ words[k] = binary.LittleEndian.Uint64((src)[idx:(8 + idx)])
+ idx += 8
+ }
+ } else {
+ words = unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8)
+ }
for i := 0; i < len(words); {
j := i
@@ -196,14 +208,14 @@ func encodeInt32(dst []byte, src []int32, bitWidth uint) ([]byte, error) {
return dst, errEncodeInvalidBitWidth("INT32", bitWidth)
}
if bitWidth == 0 {
- if !isZero(unsafecast.Int32ToBytes(src)) {
+ if !isZero(unsafecast.Slice[byte](src)) {
return dst, errEncodeInvalidBitWidth("INT32", bitWidth)
}
return appendUvarint(dst, uint64(len(src))<<1), nil
}
if len(src) >= 8 {
- words := unsafe.Slice((*[8]int32)(unsafe.Pointer(&src[0])), len(src)/8)
+ words := unsafecast.Slice[[8]int32](src)
for i := 0; i < len(words); {
j := i
@@ -373,7 +385,7 @@ func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) {
in = buf
}
- out := unsafecast.BytesToInt32(dst[offset:])
+ out := unsafecast.Slice[int32](dst[offset:])
bitpack.UnpackInt32(out, in, bitWidth)
i += length
} else {
@@ -385,6 +397,13 @@ func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) {
bits := [4]byte{}
copy(bits[:], src[i:j])
+
+ //swap the bytes in the "bits" array to take care of big endian arch
+ if cpu.IsBigEndian {
+ for m, n := 0, 3; m < n; m, n = m+1, n-1 {
+ bits[m], bits[n] = bits[n], bits[m]
+ }
+ }
dst = appendRepeat(dst, bits[:], count)
i = j
}
@@ -500,7 +519,7 @@ func grow(buf []byte, size int) []byte {
}
func encodeInt32BitpackDefault(dst []byte, src [][8]int32, bitWidth uint) int {
- bits := unsafe.Slice((*int32)(unsafe.Pointer(&src[0])), len(src)*8)
+ bits := unsafecast.Slice[int32](src)
bitpack.PackInt32(dst, bits, bitWidth)
return bitpack.ByteCount(uint(len(src)*8) * bitWidth)
}
diff --git a/vendor/github.com/segmentio/encoding/LICENSE b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/LICENSE
similarity index 100%
rename from vendor/github.com/segmentio/encoding/LICENSE
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/LICENSE
diff --git a/vendor/github.com/segmentio/encoding/thrift/binary.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go
similarity index 98%
rename from vendor/github.com/segmentio/encoding/thrift/binary.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go
index 18d95d9abf9..73f15b03afb 100644
--- a/vendor/github.com/segmentio/encoding/thrift/binary.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go
@@ -7,6 +7,8 @@ import (
"fmt"
"io"
"math"
+
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
)
// BinaryProtocol is a Protocol implementation for the binary thrift protocol.
@@ -96,7 +98,7 @@ func (r *binaryReader) ReadBytes() ([]byte, error) {
func (r *binaryReader) ReadString() (string, error) {
b, err := r.ReadBytes()
- return unsafeBytesToString(b), err
+ return unsafecast.String(b), err
}
func (r *binaryReader) ReadLength() (int, error) {
@@ -126,7 +128,7 @@ func (r *binaryReader) ReadMessage() (Message, error) {
if err != nil {
return m, dontExpectEOF(err)
}
- m.Name = unsafeBytesToString(s)
+ m.Name = unsafecast.String(s)
t, err := r.ReadInt8()
if err != nil {
diff --git a/vendor/github.com/segmentio/encoding/thrift/compact.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go
similarity index 98%
rename from vendor/github.com/segmentio/encoding/thrift/compact.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go
index 6a286572a73..7bca5771deb 100644
--- a/vendor/github.com/segmentio/encoding/thrift/compact.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go
@@ -7,6 +7,8 @@ import (
"fmt"
"io"
"math"
+
+ "github.com/parquet-go/parquet-go/internal/unsafecast"
)
// CompactProtocol is a Protocol implementation for the compact thrift protocol.
@@ -77,7 +79,7 @@ func (r *compactReader) ReadBytes() ([]byte, error) {
func (r *compactReader) ReadString() (string, error) {
b, err := r.ReadBytes()
- return unsafeBytesToString(b), err
+ return unsafecast.String(b), err
}
func (r *compactReader) ReadLength() (int, error) {
diff --git a/vendor/github.com/segmentio/encoding/thrift/debug.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/debug.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/decode.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/decode.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/encode.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/encode.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/error.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/error.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/error.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/error.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/protocol.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/protocol.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/protocol.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/protocol.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/struct.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/struct.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/struct.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/struct.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/thrift.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/thrift.go
similarity index 100%
rename from vendor/github.com/segmentio/encoding/thrift/thrift.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/thrift.go
diff --git a/vendor/github.com/segmentio/encoding/thrift/unsafe.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go
similarity index 85%
rename from vendor/github.com/segmentio/encoding/thrift/unsafe.go
rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go
index 9572b40ef0a..b27c6489d8d 100644
--- a/vendor/github.com/segmentio/encoding/thrift/unsafe.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go
@@ -18,7 +18,3 @@ func makeTypeID(t reflect.Type) typeID {
ptr: (*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1],
}
}
-
-func unsafeBytesToString(b []byte) string {
- return *(*string)(unsafe.Pointer(&b))
-}
diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/values.go b/vendor/github.com/parquet-go/parquet-go/encoding/values.go
index a53a7b9f896..41ab0a23e35 100644
--- a/vendor/github.com/parquet-go/parquet-go/encoding/values.go
+++ b/vendor/github.com/parquet-go/parquet-go/encoding/values.go
@@ -82,27 +82,27 @@ func (v *Values) Boolean() []byte {
func (v *Values) Int32() []int32 {
v.assertKind(Int32)
- return unsafecast.BytesToInt32(v.data)
+ return unsafecast.Slice[int32](v.data)
}
func (v *Values) Int64() []int64 {
v.assertKind(Int64)
- return unsafecast.BytesToInt64(v.data)
+ return unsafecast.Slice[int64](v.data)
}
func (v *Values) Int96() []deprecated.Int96 {
v.assertKind(Int96)
- return deprecated.BytesToInt96(v.data)
+ return unsafecast.Slice[deprecated.Int96](v.data)
}
func (v *Values) Float() []float32 {
v.assertKind(Float)
- return unsafecast.BytesToFloat32(v.data)
+ return unsafecast.Slice[float32](v.data)
}
func (v *Values) Double() []float64 {
v.assertKind(Double)
- return unsafecast.BytesToFloat64(v.data)
+ return unsafecast.Slice[float64](v.data)
}
func (v *Values) ByteArray() (data []byte, offsets []uint32) {
@@ -117,123 +117,86 @@ func (v *Values) FixedLenByteArray() (data []byte, size int) {
func (v *Values) Uint32() []uint32 {
v.assertKind(Int32)
- return unsafecast.BytesToUint32(v.data)
+ return unsafecast.Slice[uint32](v.data)
}
func (v *Values) Uint64() []uint64 {
v.assertKind(Int64)
- return unsafecast.BytesToUint64(v.data)
+ return unsafecast.Slice[uint64](v.data)
}
func (v *Values) Uint128() [][16]byte {
v.assertKind(FixedLenByteArray)
v.assertSize(16)
- return unsafecast.BytesToUint128(v.data)
+ return unsafecast.Slice[[16]byte](v.data)
+}
+
+func makeValues[T any](kind Kind, values []T) Values {
+ return Values{kind: kind, data: unsafecast.Slice[byte](values)}
}
func BooleanValues(values []byte) Values {
- return Values{
- kind: Boolean,
- data: values,
- }
+ return makeValues(Boolean, values)
}
func Int32Values(values []int32) Values {
- return Values{
- kind: Int32,
- data: unsafecast.Int32ToBytes(values),
- }
+ return makeValues(Int32, values)
}
func Int64Values(values []int64) Values {
- return Values{
- kind: Int64,
- data: unsafecast.Int64ToBytes(values),
- }
+ return makeValues(Int64, values)
}
func Int96Values(values []deprecated.Int96) Values {
- return Values{
- kind: Int96,
- data: deprecated.Int96ToBytes(values),
- }
+ return makeValues(Int96, values)
}
func FloatValues(values []float32) Values {
- return Values{
- kind: Float,
- data: unsafecast.Float32ToBytes(values),
- }
+ return makeValues(Float, values)
}
func DoubleValues(values []float64) Values {
- return Values{
- kind: Double,
- data: unsafecast.Float64ToBytes(values),
- }
+ return makeValues(Double, values)
}
func ByteArrayValues(values []byte, offsets []uint32) Values {
- return Values{
- kind: ByteArray,
- data: values,
- offsets: offsets,
- }
+ return Values{kind: ByteArray, data: values, offsets: offsets}
}
func FixedLenByteArrayValues(values []byte, size int) Values {
- return Values{
- kind: FixedLenByteArray,
- size: int32(size),
- data: values,
- }
+ return Values{kind: FixedLenByteArray, size: int32(size), data: values}
}
func Uint32Values(values []uint32) Values {
- return Int32Values(unsafecast.Uint32ToInt32(values))
+ return Int32Values(unsafecast.Slice[int32](values))
}
func Uint64Values(values []uint64) Values {
- return Int64Values(unsafecast.Uint64ToInt64(values))
+ return Int64Values(unsafecast.Slice[int64](values))
}
func Uint128Values(values [][16]byte) Values {
- return FixedLenByteArrayValues(unsafecast.Uint128ToBytes(values), 16)
+ return FixedLenByteArrayValues(unsafecast.Slice[byte](values), 16)
}
func Int32ValuesFromBytes(values []byte) Values {
- return Values{
- kind: Int32,
- data: values,
- }
+ return Values{kind: Int32, data: values}
}
func Int64ValuesFromBytes(values []byte) Values {
- return Values{
- kind: Int64,
- data: values,
- }
+ return Values{kind: Int64, data: values}
}
func Int96ValuesFromBytes(values []byte) Values {
- return Values{
- kind: Int96,
- data: values,
- }
+ return Values{kind: Int96, data: values}
}
func FloatValuesFromBytes(values []byte) Values {
- return Values{
- kind: Float,
- data: values,
- }
+ return Values{kind: Float, data: values}
}
func DoubleValuesFromBytes(values []byte) Values {
- return Values{
- kind: Double,
- data: values,
- }
+ return Values{kind: Double, data: values}
}
func EncodeBoolean(dst []byte, src Values, enc Encoding) ([]byte, error) {
diff --git a/vendor/github.com/parquet-go/parquet-go/file.go b/vendor/github.com/parquet-go/parquet-go/file.go
index 791eb51eece..384042b308c 100644
--- a/vendor/github.com/parquet-go/parquet-go/file.go
+++ b/vendor/github.com/parquet-go/parquet-go/file.go
@@ -9,9 +9,9 @@ import (
"sort"
"strings"
"sync"
+ "sync/atomic"
- "github.com/segmentio/encoding/thrift"
-
+ "github.com/parquet-go/parquet-go/encoding/thrift"
"github.com/parquet-go/parquet-go/format"
)
@@ -391,8 +391,8 @@ func (g *fileRowGroup) init(file *File, schema *Schema, columns []*Column, rowGr
if file.hasIndexes() {
j := (int(rowGroup.Ordinal) * len(columns)) + i
- fileColumnChunks[i].columnIndex = &file.columnIndexes[j]
- fileColumnChunks[i].offsetIndex = &file.offsetIndexes[j]
+ fileColumnChunks[i].columnIndex.Store(&file.columnIndexes[j])
+ fileColumnChunks[i].offsetIndex.Store(&file.offsetIndexes[j])
}
g.columns[i] = &fileColumnChunks[i]
@@ -442,8 +442,8 @@ type fileColumnChunk struct {
column *Column
bloomFilter *bloomFilter
rowGroup *format.RowGroup
- columnIndex *format.ColumnIndex
- offsetIndex *format.OffsetIndex
+ columnIndex atomic.Pointer[format.ColumnIndex]
+ offsetIndex atomic.Pointer[format.OffsetIndex]
chunk *format.ColumnChunk
}
@@ -462,23 +462,25 @@ func (c *fileColumnChunk) Pages() Pages {
}
func (c *fileColumnChunk) ColumnIndex() (ColumnIndex, error) {
- if err := c.readColumnIndex(); err != nil {
+ index, err := c.readColumnIndex()
+ if err != nil {
return nil, err
}
- if c.columnIndex == nil || c.chunk.ColumnIndexOffset == 0 {
+ if index == nil || c.chunk.ColumnIndexOffset == 0 {
return nil, ErrMissingColumnIndex
}
return fileColumnIndex{c}, nil
}
func (c *fileColumnChunk) OffsetIndex() (OffsetIndex, error) {
- if err := c.readOffsetIndex(); err != nil {
+ index, err := c.readOffsetIndex()
+ if err != nil {
return nil, err
}
- if c.offsetIndex == nil || c.chunk.OffsetIndexOffset == 0 {
+ if index == nil || c.chunk.OffsetIndexOffset == 0 {
return nil, ErrMissingOffsetIndex
}
- return (*fileOffsetIndex)(c.offsetIndex), nil
+ return (*fileOffsetIndex)(index), nil
}
func (c *fileColumnChunk) BloomFilter() BloomFilter {
@@ -492,48 +494,59 @@ func (c *fileColumnChunk) NumValues() int64 {
return c.chunk.MetaData.NumValues
}
-func (c *fileColumnChunk) readColumnIndex() error {
- if c.columnIndex != nil {
- return nil
+func (c *fileColumnChunk) readColumnIndex() (*format.ColumnIndex, error) {
+ if index := c.columnIndex.Load(); index != nil {
+ return index, nil
}
chunkMeta := c.file.metadata.RowGroups[c.rowGroup.Ordinal].Columns[c.Column()]
offset, length := chunkMeta.ColumnIndexOffset, chunkMeta.ColumnIndexLength
if offset == 0 {
- return nil
+ return nil, nil
}
indexData := make([]byte, int(length))
var columnIndex format.ColumnIndex
if _, err := readAt(c.file.reader, indexData, offset); err != nil {
- return fmt.Errorf("read %d bytes column index at offset %d: %w", length, offset, err)
+ return nil, fmt.Errorf("read %d bytes column index at offset %d: %w", length, offset, err)
}
if err := thrift.Unmarshal(&c.file.protocol, indexData, &columnIndex); err != nil {
- return fmt.Errorf("decode column index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err)
+ return nil, fmt.Errorf("decode column index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err)
}
- c.columnIndex = &columnIndex
- return nil
+ index := &columnIndex
+ // We do a CAS (and Load on CAS failure) instead of a simple Store for
+ // the nice property that concurrent calling goroutines will only ever
+ // observe a single pointer value for the result.
+ if !c.columnIndex.CompareAndSwap(nil, index) {
+ // another goroutine populated it since we last read the pointer
+ return c.columnIndex.Load(), nil
+ }
+ return index, nil
}
-func (c *fileColumnChunk) readOffsetIndex() error {
- if c.offsetIndex != nil {
- return nil
+func (c *fileColumnChunk) readOffsetIndex() (*format.OffsetIndex, error) {
+ if index := c.offsetIndex.Load(); index != nil {
+ return index, nil
}
chunkMeta := c.file.metadata.RowGroups[c.rowGroup.Ordinal].Columns[c.Column()]
offset, length := chunkMeta.OffsetIndexOffset, chunkMeta.OffsetIndexLength
if offset == 0 {
- return nil
+ return nil, nil
}
indexData := make([]byte, int(length))
var offsetIndex format.OffsetIndex
if _, err := readAt(c.file.reader, indexData, offset); err != nil {
- return fmt.Errorf("read %d bytes offset index at offset %d: %w", length, offset, err)
+ return nil, fmt.Errorf("read %d bytes offset index at offset %d: %w", length, offset, err)
}
if err := thrift.Unmarshal(&c.file.protocol, indexData, &offsetIndex); err != nil {
- return fmt.Errorf("decode offset index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err)
+ return nil, fmt.Errorf("decode offset index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err)
}
- c.offsetIndex = &offsetIndex
- return nil
+ index := &offsetIndex
+ if !c.offsetIndex.CompareAndSwap(nil, index) {
+ // another goroutine populated it since we last read the pointer
+ return c.offsetIndex.Load(), nil
+ }
+ return index, nil
}
type filePages struct {
@@ -745,7 +758,7 @@ func (f *filePages) SeekToRow(rowIndex int64) (err error) {
if f.chunk == nil {
return io.ErrClosedPipe
}
- if f.chunk.offsetIndex == nil {
+ if index := f.chunk.offsetIndex.Load(); index == nil {
_, err = f.section.Seek(f.dataOffset-f.baseOffset, io.SeekStart)
f.skip = rowIndex
f.index = 0
@@ -753,7 +766,7 @@ func (f *filePages) SeekToRow(rowIndex int64) (err error) {
f.index = 1
}
} else {
- pages := f.chunk.offsetIndex.PageLocations
+ pages := index.PageLocations
index := sort.Search(len(pages), func(i int) bool {
return pages[i].FirstRowIndex > rowIndex
}) - 1
diff --git a/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go b/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go
index a95e9e96be9..0a1686f17b6 100644
--- a/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go
+++ b/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go
@@ -98,7 +98,7 @@ func (t *Int32Table) Len() int { return t.len }
func (t *Int32Table) Cap() int { return t.size() }
func (t *Int32Table) Probe(keys, values []int32) int {
- return t.probe(unsafecast.Int32ToUint32(keys), values)
+ return t.probe(unsafecast.Slice[uint32](keys), values)
}
func (t *Int32Table) ProbeArray(keys sparse.Int32Array, values []int32) int {
@@ -118,7 +118,7 @@ func (t *Float32Table) Len() int { return t.len }
func (t *Float32Table) Cap() int { return t.size() }
func (t *Float32Table) Probe(keys []float32, values []int32) int {
- return t.probe(unsafecast.Float32ToUint32(keys), values)
+ return t.probe(unsafecast.Slice[uint32](keys), values)
}
func (t *Float32Table) ProbeArray(keys sparse.Float32Array, values []int32) int {
@@ -342,7 +342,7 @@ func (t *Int64Table) Len() int { return t.len }
func (t *Int64Table) Cap() int { return t.size() }
func (t *Int64Table) Probe(keys []int64, values []int32) int {
- return t.probe(unsafecast.Int64ToUint64(keys), values)
+ return t.probe(unsafecast.Slice[uint64](keys), values)
}
func (t *Int64Table) ProbeArray(keys sparse.Int64Array, values []int32) int {
@@ -362,7 +362,7 @@ func (t *Float64Table) Len() int { return t.len }
func (t *Float64Table) Cap() int { return t.size() }
func (t *Float64Table) Probe(keys []float64, values []int32) int {
- return t.probe(unsafecast.Float64ToUint64(keys), values)
+ return t.probe(unsafecast.Slice[uint64](keys), values)
}
func (t *Float64Table) ProbeArray(keys sparse.Float64Array, values []int32) int {
@@ -639,7 +639,7 @@ func (t *table128) init(cap int, maxLoad float64) {
func (t *table128) kv() (keys [][16]byte, values []int32) {
i := t.cap * 16
- return unsafecast.BytesToUint128(t.table[:i]), unsafecast.BytesToInt32(t.table[i:])
+ return unsafecast.Slice[[16]byte](t.table[:i]), unsafecast.Slice[int32](t.table[i:])
}
func (t *table128) grow(totalValues int) {
@@ -753,8 +753,8 @@ func (t *table128) probeArray(keys sparse.Uint128Array, values []int32) int {
func multiProbe128Default(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int {
modulo := uintptr(tableCap) - 1
offset := uintptr(tableCap) * 16
- tableKeys := unsafecast.BytesToUint128(table[:offset])
- tableValues := unsafecast.BytesToInt32(table[offset:])
+ tableKeys := unsafecast.Slice[[16]byte](table[:offset])
+ tableValues := unsafecast.Slice[int32](table[offset:])
for i, hash := range hashes {
key := keys.Index(i)
diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go
index fd92c18380d..f3932223919 100644
--- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go
+++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go
@@ -29,7 +29,7 @@ func unpackInt32(dst []int32, src []byte, bitWidth uint) {
case hasAVX2 && bitWidth <= 31:
unpackInt32x27to31bitsAVX2(dst, src, bitWidth)
case bitWidth == 32:
- copy(dst, unsafecast.BytesToInt32(src))
+ copy(dst, unsafecast.Slice[int32](src))
default:
unpackInt32Default(dst, src, bitWidth)
}
diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go
index b7d46ba9536..cddbd773a51 100644
--- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go
+++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go
@@ -3,11 +3,27 @@
package bitpack
import (
+ "encoding/binary"
+
+ "golang.org/x/sys/cpu"
+
"github.com/parquet-go/parquet-go/internal/unsafecast"
)
func unpackInt32(dst []int32, src []byte, bitWidth uint) {
- bits := unsafecast.BytesToUint32(src)
+ var bits []uint32
+ if cpu.IsBigEndian {
+ srcLen := (len(src) / 4)
+ bits = make([]uint32, srcLen)
+ idx := 0
+ for k := range srcLen {
+ bits[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)])
+ idx += 4
+ }
+ } else {
+ bits = unsafecast.Slice[uint32](src)
+ }
+
bitMask := uint32(1<= 0 {
+ if i := bytes.IndexByte(unsafecast.Slice[byte](data), 0); i >= 0 {
return i
}
return len(data)
}
func streakOfFalse(data []bool) int {
- if i := bytes.IndexByte(unsafecast.BoolToBytes(data), 1); i >= 0 {
+ if i := bytes.IndexByte(unsafecast.Slice[byte](data), 1); i >= 0 {
return i
}
return len(data)
diff --git a/vendor/github.com/parquet-go/parquet-go/order_purego.go b/vendor/github.com/parquet-go/parquet-go/order_purego.go
index 2011455e152..44c4d7905e3 100644
--- a/vendor/github.com/parquet-go/parquet-go/order_purego.go
+++ b/vendor/github.com/parquet-go/parquet-go/order_purego.go
@@ -2,174 +2,28 @@
package parquet
-// -----------------------------------------------------------------------------
-// TODO: use generics versions of the these functions to reduce the amount of
-// code to maintain when we drop compatilibty with Go version older than 1.18.
-// -----------------------------------------------------------------------------
+import "cmp"
-func orderOfInt32(data []int32) int {
- if len(data) > 1 {
- if int32AreInAscendingOrder(data) {
- return +1
- }
- if int32AreInDescendingOrder(data) {
- return -1
- }
- }
- return 0
-}
-
-func orderOfInt64(data []int64) int {
- if len(data) > 1 {
- if int64AreInAscendingOrder(data) {
- return +1
- }
- if int64AreInDescendingOrder(data) {
- return -1
- }
- }
- return 0
-}
-
-func orderOfUint32(data []uint32) int {
- if len(data) > 1 {
- if uint32AreInAscendingOrder(data) {
- return +1
- }
- if uint32AreInDescendingOrder(data) {
- return -1
- }
- }
- return 0
-}
-
-func orderOfUint64(data []uint64) int {
- if len(data) > 1 {
- if uint64AreInAscendingOrder(data) {
- return +1
- }
- if uint64AreInDescendingOrder(data) {
- return -1
- }
- }
- return 0
-}
-
-func orderOfFloat32(data []float32) int {
- if len(data) > 1 {
- if float32AreInAscendingOrder(data) {
- return +1
- }
- if float32AreInDescendingOrder(data) {
- return -1
- }
- }
- return 0
-}
+func orderOfInt32(data []int32) int { return orderOf(data) }
+func orderOfInt64(data []int64) int { return orderOf(data) }
+func orderOfUint32(data []uint32) int { return orderOf(data) }
+func orderOfUint64(data []uint64) int { return orderOf(data) }
+func orderOfFloat32(data []float32) int { return orderOf(data) }
+func orderOfFloat64(data []float64) int { return orderOf(data) }
-func orderOfFloat64(data []float64) int {
+func orderOf[T cmp.Ordered](data []T) int {
if len(data) > 1 {
- if float64AreInAscendingOrder(data) {
+ if orderIsAscending(data) {
return +1
}
- if float64AreInDescendingOrder(data) {
+ if orderIsDescending(data) {
return -1
}
}
return 0
}
-func int32AreInAscendingOrder(data []int32) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] > data[i] {
- return false
- }
- }
- return true
-}
-
-func int32AreInDescendingOrder(data []int32) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] < data[i] {
- return false
- }
- }
- return true
-}
-
-func int64AreInAscendingOrder(data []int64) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] > data[i] {
- return false
- }
- }
- return true
-}
-
-func int64AreInDescendingOrder(data []int64) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] < data[i] {
- return false
- }
- }
- return true
-}
-
-func uint32AreInAscendingOrder(data []uint32) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] > data[i] {
- return false
- }
- }
- return true
-}
-
-func uint32AreInDescendingOrder(data []uint32) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] < data[i] {
- return false
- }
- }
- return true
-}
-
-func uint64AreInAscendingOrder(data []uint64) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] > data[i] {
- return false
- }
- }
- return true
-}
-
-func uint64AreInDescendingOrder(data []uint64) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] < data[i] {
- return false
- }
- }
- return true
-}
-
-func float32AreInAscendingOrder(data []float32) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] > data[i] {
- return false
- }
- }
- return true
-}
-
-func float32AreInDescendingOrder(data []float32) bool {
- for i := len(data) - 1; i > 0; i-- {
- if data[i-1] < data[i] {
- return false
- }
- }
- return true
-}
-
-func float64AreInAscendingOrder(data []float64) bool {
+func orderIsAscending[T cmp.Ordered](data []T) bool {
for i := len(data) - 1; i > 0; i-- {
if data[i-1] > data[i] {
return false
@@ -178,7 +32,7 @@ func float64AreInAscendingOrder(data []float64) bool {
return true
}
-func float64AreInDescendingOrder(data []float64) bool {
+func orderIsDescending[T cmp.Ordered](data []T) bool {
for i := len(data) - 1; i > 0; i-- {
if data[i-1] < data[i] {
return false
diff --git a/vendor/github.com/parquet-go/parquet-go/page_values.go b/vendor/github.com/parquet-go/parquet-go/page_values.go
index 964220b4ebb..ecbdffb0c8d 100644
--- a/vendor/github.com/parquet-go/parquet-go/page_values.go
+++ b/vendor/github.com/parquet-go/parquet-go/page_values.go
@@ -149,7 +149,7 @@ type int32PageValues struct {
}
func (r *int32PageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadInt32s(unsafecast.BytesToInt32(b))
+ n, err = r.ReadInt32s(unsafecast.Slice[int32](b))
return 4 * n, err
}
@@ -180,7 +180,7 @@ type int64PageValues struct {
}
func (r *int64PageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadInt64s(unsafecast.BytesToInt64(b))
+ n, err = r.ReadInt64s(unsafecast.Slice[int64](b))
return 8 * n, err
}
@@ -211,7 +211,7 @@ type int96PageValues struct {
}
func (r *int96PageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadInt96s(deprecated.BytesToInt96(b))
+ n, err = r.ReadInt96s(unsafecast.Slice[deprecated.Int96](b))
return 12 * n, err
}
@@ -242,7 +242,7 @@ type floatPageValues struct {
}
func (r *floatPageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadFloats(unsafecast.BytesToFloat32(b))
+ n, err = r.ReadFloats(unsafecast.Slice[float32](b))
return 4 * n, err
}
@@ -273,7 +273,7 @@ type doublePageValues struct {
}
func (r *doublePageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b))
+ n, err = r.ReadDoubles(unsafecast.Slice[float64](b))
return 8 * n, err
}
@@ -395,7 +395,7 @@ type uint32PageValues struct {
}
func (r *uint32PageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadUint32s(unsafecast.BytesToUint32(b))
+ n, err = r.ReadUint32s(unsafecast.Slice[uint32](b))
return 4 * n, err
}
@@ -426,7 +426,7 @@ type uint64PageValues struct {
}
func (r *uint64PageValues) Read(b []byte) (n int, err error) {
- n, err = r.ReadUint64s(unsafecast.BytesToUint64(b))
+ n, err = r.ReadUint64s(unsafecast.Slice[uint64](b))
return 8 * n, err
}
diff --git a/vendor/github.com/parquet-go/parquet-go/sparse/array.go b/vendor/github.com/parquet-go/parquet-go/sparse/array.go
index 94285becb8b..fecfb4dc4d7 100644
--- a/vendor/github.com/parquet-go/parquet-go/sparse/array.go
+++ b/vendor/github.com/parquet-go/parquet-go/sparse/array.go
@@ -8,7 +8,7 @@ import (
type Array struct{ array }
func UnsafeArray(base unsafe.Pointer, length int, offset uintptr) Array {
- return Array{makeArray(base, uintptr(length), offset)}
+ return Array{unsafeArray(base, length, offset)}
}
func (a Array) Len() int { return int(a.len) }
@@ -36,8 +36,17 @@ type array struct {
off uintptr
}
-func makeArray(base unsafe.Pointer, length, offset uintptr) array {
- return array{ptr: base, len: length, off: offset}
+func makeArray[T any](base []T) array {
+ var z T
+ return array{
+ ptr: unsafe.Pointer(unsafe.SliceData(base)),
+ len: uintptr(len(base)),
+ off: unsafe.Sizeof(z),
+ }
+}
+
+func unsafeArray(base unsafe.Pointer, length int, offset uintptr) array {
+ return array{ptr: base, len: uintptr(length), off: offset}
}
func (a array) index(i int) unsafe.Pointer {
@@ -72,11 +81,11 @@ func (a array) offset(off uintptr) array {
type BoolArray struct{ array }
func MakeBoolArray(values []bool) BoolArray {
- return BoolArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 1)}
+ return BoolArray{makeArray(values)}
}
func UnsafeBoolArray(base unsafe.Pointer, length int, offset uintptr) BoolArray {
- return BoolArray{makeArray(base, uintptr(length), offset)}
+ return BoolArray{unsafeArray(base, length, offset)}
}
func (a BoolArray) Len() int { return int(a.len) }
@@ -88,11 +97,11 @@ func (a BoolArray) UnsafeArray() Array { return Array{a.array} }
type Int8Array struct{ array }
func MakeInt8Array(values []int8) Int8Array {
- return Int8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Int8Array{makeArray(values)}
}
func UnsafeInt8Array(base unsafe.Pointer, length int, offset uintptr) Int8Array {
- return Int8Array{makeArray(base, uintptr(length), offset)}
+ return Int8Array{unsafeArray(base, length, offset)}
}
func (a Int8Array) Len() int { return int(a.len) }
@@ -104,11 +113,11 @@ func (a Int8Array) UnsafeArray() Array { return Array{a.array} }
type Int16Array struct{ array }
func MakeInt16Array(values []int16) Int16Array {
- return Int16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Int16Array{makeArray(values)}
}
func UnsafeInt16Array(base unsafe.Pointer, length int, offset uintptr) Int16Array {
- return Int16Array{makeArray(base, uintptr(length), offset)}
+ return Int16Array{unsafeArray(base, length, offset)}
}
func (a Int16Array) Len() int { return int(a.len) }
@@ -122,11 +131,11 @@ func (a Int16Array) UnsafeArray() Array { return Array{a.array} }
type Int32Array struct{ array }
func MakeInt32Array(values []int32) Int32Array {
- return Int32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)}
+ return Int32Array{makeArray(values)}
}
func UnsafeInt32Array(base unsafe.Pointer, length int, offset uintptr) Int32Array {
- return Int32Array{makeArray(base, uintptr(length), offset)}
+ return Int32Array{unsafeArray(base, length, offset)}
}
func (a Int32Array) Len() int { return int(a.len) }
@@ -142,11 +151,11 @@ func (a Int32Array) UnsafeArray() Array { return Array{a.array} }
type Int64Array struct{ array }
func MakeInt64Array(values []int64) Int64Array {
- return Int64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Int64Array{makeArray(values)}
}
func UnsafeInt64Array(base unsafe.Pointer, length int, offset uintptr) Int64Array {
- return Int64Array{makeArray(base, uintptr(length), offset)}
+ return Int64Array{unsafeArray(base, length, offset)}
}
func (a Int64Array) Len() int { return int(a.len) }
@@ -164,11 +173,11 @@ func (a Int64Array) UnsafeArray() Array { return Array{a.array} }
type Float32Array struct{ array }
func MakeFloat32Array(values []float32) Float32Array {
- return Float32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)}
+ return Float32Array{makeArray(values)}
}
func UnsafeFloat32Array(base unsafe.Pointer, length int, offset uintptr) Float32Array {
- return Float32Array{makeArray(base, uintptr(length), offset)}
+ return Float32Array{unsafeArray(base, length, offset)}
}
func (a Float32Array) Len() int { return int(a.len) }
@@ -181,11 +190,11 @@ func (a Float32Array) UnsafeArray() Array { return Array{a.array} }
type Float64Array struct{ array }
func MakeFloat64Array(values []float64) Float64Array {
- return Float64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Float64Array{makeArray(values)}
}
func UnsafeFloat64Array(base unsafe.Pointer, length int, offset uintptr) Float64Array {
- return Float64Array{makeArray(base, uintptr(length), offset)}
+ return Float64Array{unsafeArray(base, length, offset)}
}
func (a Float64Array) Len() int { return int(a.len) }
@@ -197,11 +206,11 @@ func (a Float64Array) UnsafeArray() Array { return Array{a.array} }
type Uint8Array struct{ array }
func MakeUint8Array(values []uint8) Uint8Array {
- return Uint8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Uint8Array{makeArray(values)}
}
func UnsafeUint8Array(base unsafe.Pointer, length int, offset uintptr) Uint8Array {
- return Uint8Array{makeArray(base, uintptr(length), offset)}
+ return Uint8Array{unsafeArray(base, length, offset)}
}
func (a Uint8Array) Len() int { return int(a.len) }
@@ -212,11 +221,11 @@ func (a Uint8Array) UnsafeArray() Array { return Array{a.array} }
type Uint16Array struct{ array }
func MakeUint16Array(values []uint16) Uint16Array {
- return Uint16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Uint16Array{makeArray(values)}
}
func UnsafeUint16Array(base unsafe.Pointer, length int, offset uintptr) Uint16Array {
- return Uint16Array{makeArray(base, uintptr(length), offset)}
+ return Uint16Array{unsafeArray(base, length, offset)}
}
func (a Uint16Array) Len() int { return int(a.len) }
@@ -228,11 +237,11 @@ func (a Uint16Array) UnsafeArray() Array { return Array{a.array} }
type Uint32Array struct{ array }
func MakeUint32Array(values []uint32) Uint32Array {
- return Uint32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)}
+ return Uint32Array{makeArray(values)}
}
func UnsafeUint32Array(base unsafe.Pointer, length int, offset uintptr) Uint32Array {
- return Uint32Array{makeArray(base, uintptr(length), offset)}
+ return Uint32Array{unsafeArray(base, length, offset)}
}
func (a Uint32Array) Len() int { return int(a.len) }
@@ -245,11 +254,11 @@ func (a Uint32Array) UnsafeArray() Array { return Array{a.array} }
type Uint64Array struct{ array }
func MakeUint64Array(values []uint64) Uint64Array {
- return Uint64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
+ return Uint64Array{makeArray(values)}
}
func UnsafeUint64Array(base unsafe.Pointer, length int, offset uintptr) Uint64Array {
- return Uint64Array{makeArray(base, uintptr(length), offset)}
+ return Uint64Array{unsafeArray(base, length, offset)}
}
func (a Uint64Array) Len() int { return int(a.len) }
@@ -263,11 +272,11 @@ func (a Uint64Array) UnsafeArray() Array { return Array{a.array} }
type Uint128Array struct{ array }
func MakeUint128Array(values [][16]byte) Uint128Array {
- return Uint128Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 16)}
+ return Uint128Array{makeArray(values)}
}
func UnsafeUint128Array(base unsafe.Pointer, length int, offset uintptr) Uint128Array {
- return Uint128Array{makeArray(base, uintptr(length), offset)}
+ return Uint128Array{unsafeArray(base, length, offset)}
}
func (a Uint128Array) Len() int { return int(a.len) }
@@ -283,11 +292,11 @@ type StringArray struct{ array }
func MakeStringArray(values []string) StringArray {
const sizeOfString = unsafe.Sizeof("")
- return StringArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfString)}
+ return StringArray{makeArray(values)}
}
func UnsafeStringArray(base unsafe.Pointer, length int, offset uintptr) StringArray {
- return StringArray{makeArray(base, uintptr(length), offset)}
+ return StringArray{unsafeArray(base, length, offset)}
}
func (a StringArray) Len() int { return int(a.len) }
@@ -298,12 +307,11 @@ func (a StringArray) UnsafeArray() Array { return Array{a.array} }
type TimeArray struct{ array }
func MakeTimeArray(values []time.Time) TimeArray {
- const sizeOfTime = unsafe.Sizeof(time.Time{})
- return TimeArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfTime)}
+ return TimeArray{makeArray(values)}
}
func UnsafeTimeArray(base unsafe.Pointer, length int, offset uintptr) TimeArray {
- return TimeArray{makeArray(base, uintptr(length), offset)}
+ return TimeArray{unsafeArray(base, length, offset)}
}
func (a TimeArray) Len() int { return int(a.len) }
diff --git a/vendor/github.com/parquet-go/parquet-go/sparse/gather.go b/vendor/github.com/parquet-go/parquet-go/sparse/gather.go
index 746a0930d1c..d7d72d091bf 100644
--- a/vendor/github.com/parquet-go/parquet-go/sparse/gather.go
+++ b/vendor/github.com/parquet-go/parquet-go/sparse/gather.go
@@ -1,21 +1,21 @@
package sparse
-import "unsafe"
+import "github.com/parquet-go/parquet-go/internal/unsafecast"
func GatherInt32(dst []int32, src Int32Array) int {
- return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array())
+ return GatherUint32(unsafecast.Slice[uint32](dst), src.Uint32Array())
}
func GatherInt64(dst []int64, src Int64Array) int {
- return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array())
+ return GatherUint64(unsafecast.Slice[uint64](dst), src.Uint64Array())
}
func GatherFloat32(dst []float32, src Float32Array) int {
- return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array())
+ return GatherUint32(unsafecast.Slice[uint32](dst), src.Uint32Array())
}
func GatherFloat64(dst []float64, src Float64Array) int {
- return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array())
+ return GatherUint64(unsafecast.Slice[uint64](dst), src.Uint64Array())
}
func GatherBits(dst []byte, src Uint8Array) int { return gatherBits(dst, src) }
diff --git a/vendor/github.com/parquet-go/parquet-go/type.go b/vendor/github.com/parquet-go/parquet-go/type.go
index 59be12ab3a6..f5690a22e2b 100644
--- a/vendor/github.com/parquet-go/parquet-go/type.go
+++ b/vendor/github.com/parquet-go/parquet-go/type.go
@@ -12,7 +12,6 @@ import (
"github.com/parquet-go/parquet-go/deprecated"
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/format"
- "github.com/parquet-go/parquet-go/internal/unsafecast"
)
// Kind is an enumeration type representing the physical types supported by the
@@ -901,7 +900,7 @@ func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error {
// overhead we instead convert the reflect.Value holding the
// destination array into a byte slice which allows us to use
// a more efficient call to copy.
- d := unsafe.Slice((*byte)(unsafecast.PointerOfValue(dst)), len(v))
+ d := unsafe.Slice((*byte)(reflectValueData(dst)), len(v))
copy(d, v)
return nil
}
@@ -915,6 +914,10 @@ func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error {
return nil
}
+func reflectValueData(v reflect.Value) unsafe.Pointer {
+ return (*[2]unsafe.Pointer)(unsafe.Pointer(&v))[1]
+}
+
func (t fixedLenByteArrayType) ConvertValue(val Value, typ Type) (Value, error) {
switch typ.(type) {
case *stringType:
diff --git a/vendor/github.com/parquet-go/parquet-go/value.go b/vendor/github.com/parquet-go/parquet-go/value.go
index a48f4344bc7..a85448742fd 100644
--- a/vendor/github.com/parquet-go/parquet-go/value.go
+++ b/vendor/github.com/parquet-go/parquet-go/value.go
@@ -14,7 +14,6 @@ import (
"github.com/google/uuid"
"github.com/parquet-go/parquet-go/deprecated"
"github.com/parquet-go/parquet-go/format"
- "github.com/parquet-go/parquet-go/internal/unsafecast"
)
const (
@@ -419,11 +418,11 @@ func makeValueDouble(value float64) Value {
}
func makeValueBytes(kind Kind, value []byte) Value {
- return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value))
+ return makeValueByteArray(kind, unsafe.SliceData(value), len(value))
}
func makeValueString(kind Kind, value string) Value {
- return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value))
+ return makeValueByteArray(kind, unsafe.StringData(value), len(value))
}
func makeValueFixedLenByteArray(v reflect.Value) Value {
@@ -463,8 +462,8 @@ func (v *Value) float() float32 { return math.Float32frombits(uint32(v.
func (v *Value) double() float64 { return math.Float64frombits(uint64(v.u64)) }
func (v *Value) uint32() uint32 { return uint32(v.u64) }
func (v *Value) uint64() uint64 { return v.u64 }
-func (v *Value) byteArray() []byte { return unsafecast.Bytes(v.ptr, int(v.u64)) }
-func (v *Value) string() string { return unsafecast.BytesToString(v.byteArray()) }
+func (v *Value) byteArray() []byte { return unsafe.Slice(v.ptr, v.u64) }
+func (v *Value) string() string { return unsafe.String(v.ptr, v.u64) }
func (v *Value) be128() *[16]byte { return (*[16]byte)(unsafe.Pointer(v.ptr)) }
func (v *Value) column() int { return int(^v.columnIndex) }
@@ -516,14 +515,14 @@ func (v Value) convertToDouble(x float64) Value {
func (v Value) convertToByteArray(x []byte) Value {
v.kind = ^int8(ByteArray)
- v.ptr = unsafecast.AddressOfBytes(x)
+ v.ptr = unsafe.SliceData(x)
v.u64 = uint64(len(x))
return v
}
func (v Value) convertToFixedLenByteArray(x []byte) Value {
v.kind = ^int8(FixedLenByteArray)
- v.ptr = unsafecast.AddressOfBytes(x)
+ v.ptr = unsafe.SliceData(x)
v.u64 = uint64(len(x))
return v
}
@@ -787,7 +786,7 @@ func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value {
func (v Value) Clone() Value {
switch k := v.Kind(); k {
case ByteArray, FixedLenByteArray:
- v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray()))
+ v.ptr = unsafe.SliceData(copyBytes(v.byteArray()))
}
return v
}
diff --git a/vendor/github.com/parquet-go/parquet-go/writer.go b/vendor/github.com/parquet-go/parquet-go/writer.go
index c02f4e29598..8d9e44c7ee2 100644
--- a/vendor/github.com/parquet-go/parquet-go/writer.go
+++ b/vendor/github.com/parquet-go/parquet-go/writer.go
@@ -17,8 +17,8 @@ import (
"github.com/parquet-go/parquet-go/compress"
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/encoding/plain"
+ "github.com/parquet-go/parquet-go/encoding/thrift"
"github.com/parquet-go/parquet-go/format"
- "github.com/segmentio/encoding/thrift"
)
const (
diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE
index 6a66aea5eaf..2a7cf70da6e 100644
--- a/vendor/golang.org/x/sys/LICENSE
+++ b/vendor/golang.org/x/sys/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
+Copyright 2009 The Go Authors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
- * Neither the name of Google Inc. nor the names of its
+ * Neither the name of Google LLC nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go
index 8fa707aa4ba..02609d5b21d 100644
--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@@ -105,6 +105,8 @@ var ARM64 struct {
HasSVE bool // Scalable Vector Extensions
HasSVE2 bool // Scalable Vector Extensions 2
HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
+ HasDIT bool // Data Independent Timing support
+ HasI8MM bool // Advanced SIMD Int8 matrix multiplication instructions
_ CacheLinePad
}
@@ -199,6 +201,25 @@ var S390X struct {
_ CacheLinePad
}
+// RISCV64 contains the supported CPU features and performance characteristics for riscv64
+// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate
+// the presence of RISC-V extensions.
+//
+// It is safe to assume that all the RV64G extensions are supported and so they are omitted from
+// this structure. As riscv64 Go programs require at least RV64G, the code that populates
+// this structure cannot run successfully if some of the RV64G extensions are missing.
+// The struct is padded to avoid false sharing.
+var RISCV64 struct {
+ _ CacheLinePad
+ HasFastMisaligned bool // Fast misaligned accesses
+ HasC bool // Compressed instruction-set extension
+ HasV bool // Vector extension compatible with RVV 1.0
+ HasZba bool // Address generation instructions extension
+ HasZbb bool // Basic bit-manipulation extension
+ HasZbs bool // Single-bit instructions extension
+ _ CacheLinePad
+}
+
func init() {
archInit()
initOptions()
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
index 0e27a21e1f8..af2aa99f9f0 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
@@ -38,6 +38,8 @@ func initOptions() {
{Name: "dcpop", Feature: &ARM64.HasDCPOP},
{Name: "asimddp", Feature: &ARM64.HasASIMDDP},
{Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM},
+ {Name: "dit", Feature: &ARM64.HasDIT},
+ {Name: "i8mm", Feature: &ARM64.HasI8MM},
}
}
@@ -145,6 +147,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
ARM64.HasLRCPC = true
}
+ switch extractBits(isar1, 52, 55) {
+ case 1:
+ ARM64.HasI8MM = true
+ }
+
// ID_AA64PFR0_EL1
switch extractBits(pfr0, 16, 19) {
case 0:
@@ -168,6 +175,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
parseARM64SVERegister(getzfr0())
}
+
+ switch extractBits(pfr0, 48, 51) {
+ case 1:
+ ARM64.HasDIT = true
+ }
}
func parseARM64SVERegister(zfr0 uint64) {
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
index 3d386d0fc21..08f35ea1773 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
@@ -35,8 +35,10 @@ const (
hwcap_SHA512 = 1 << 21
hwcap_SVE = 1 << 22
hwcap_ASIMDFHM = 1 << 23
+ hwcap_DIT = 1 << 24
hwcap2_SVE2 = 1 << 1
+ hwcap2_I8MM = 1 << 13
)
// linuxKernelCanEmulateCPUID reports whether we're running
@@ -106,9 +108,12 @@ func doinit() {
ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512)
ARM64.HasSVE = isSet(hwCap, hwcap_SVE)
ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
+ ARM64.HasDIT = isSet(hwCap, hwcap_DIT)
+
// HWCAP2 feature bits
ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2)
+ ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM)
}
func isSet(hwc uint, value uint) bool {
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
index cd63e733557..7d902b6847b 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
+//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64
package cpu
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
new file mode 100644
index 00000000000..cb4a0c57280
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
@@ -0,0 +1,137 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe
+// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available.
+//
+// A note on detection of the Vector extension using HWCAP.
+//
+// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5.
+// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe
+// syscall is not available then neither is the Vector extension (which needs kernel support).
+// The riscv_hwprobe syscall should then be all we need to detect the Vector extension.
+// However, some RISC-V board manufacturers ship boards with an older kernel on top of which
+// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe
+// patches. These kernels advertise support for the Vector extension using HWCAP. Falling
+// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not
+// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option.
+//
+// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by
+// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board
+// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified
+// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use
+// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector
+// extension are binary incompatible. HWCAP can then not be used in isolation to populate the
+// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0.
+//
+// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector
+// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype
+// register. This check would allow us to safely detect version 1.0 of the Vector extension
+// with HWCAP, if riscv_hwprobe were not available. However, the check cannot
+// be added until the assembler supports the Vector instructions.
+//
+// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the
+// extensions it advertises support for are explicitly versioned. It's also worth noting that
+// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba.
+// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority
+// of RISC-V extensions.
+//
+// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information.
+
+// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must
+// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall
+// here.
+
+const (
+ // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+ riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4
+ riscv_HWPROBE_IMA_C = 0x2
+ riscv_HWPROBE_IMA_V = 0x4
+ riscv_HWPROBE_EXT_ZBA = 0x8
+ riscv_HWPROBE_EXT_ZBB = 0x10
+ riscv_HWPROBE_EXT_ZBS = 0x20
+ riscv_HWPROBE_KEY_CPUPERF_0 = 0x5
+ riscv_HWPROBE_MISALIGNED_FAST = 0x3
+ riscv_HWPROBE_MISALIGNED_MASK = 0x7
+)
+
+const (
+ // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go.
+ sys_RISCV_HWPROBE = 258
+)
+
+// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+type riscvHWProbePairs struct {
+ key int64
+ value uint64
+}
+
+const (
+ // CPU features
+ hwcap_RISCV_ISA_C = 1 << ('C' - 'A')
+)
+
+func doinit() {
+ // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key
+ // field should be initialised with one of the key constants defined above, e.g.,
+ // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value.
+ // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0.
+
+ pairs := []riscvHWProbePairs{
+ {riscv_HWPROBE_KEY_IMA_EXT_0, 0},
+ {riscv_HWPROBE_KEY_CPUPERF_0, 0},
+ }
+
+ // This call only indicates that extensions are supported if they are implemented on all cores.
+ if riscvHWProbe(pairs, 0) {
+ if pairs[0].key != -1 {
+ v := uint(pairs[0].value)
+ RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C)
+ RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V)
+ RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA)
+ RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB)
+ RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS)
+ }
+ if pairs[1].key != -1 {
+ v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK
+ RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST
+ }
+ }
+
+ // Let's double check with HWCAP if the C extension does not appear to be supported.
+ // This may happen if we're running on a kernel older than 6.4.
+
+ if !RISCV64.HasC {
+ RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C)
+ }
+}
+
+func isSet(hwc uint, value uint) bool {
+ return hwc&value != 0
+}
+
+// riscvHWProbe is a simplified version of the generated wrapper function found in
+// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the
+// cpuCount and cpus parameters which we do not need. We always want to pass 0 for
+// these parameters here so the kernel only reports the extensions that are present
+// on all cores.
+func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool {
+ var _zero uintptr
+ var p0 unsafe.Pointer
+ if len(pairs) > 0 {
+ p0 = unsafe.Pointer(&pairs[0])
+ } else {
+ p0 = unsafe.Pointer(&_zero)
+ }
+
+ _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0)
+ return e1 == 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
index 7f0c79c004b..aca3199c911 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@@ -8,4 +8,13 @@ package cpu
const cacheLineSize = 64
-func initOptions() {}
+func initOptions() {
+ options = []option{
+ {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned},
+ {Name: "c", Feature: &RISCV64.HasC},
+ {Name: "v", Feature: &RISCV64.HasV},
+ {Name: "zba", Feature: &RISCV64.HasZba},
+ {Name: "zbb", Feature: &RISCV64.HasZbb},
+ {Name: "zbs", Feature: &RISCV64.HasZbs},
+ }
+}
diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md
index 7d3c060e122..6e08a76a716 100644
--- a/vendor/golang.org/x/sys/unix/README.md
+++ b/vendor/golang.org/x/sys/unix/README.md
@@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these
into a common file for each OS.
The merge is performed in the following steps:
-1. Construct the set of common code that is idential in all architecture-specific files.
+1. Construct the set of common code that is identical in all architecture-specific files.
2. Write this common code to the merged file.
3. Remove the common code from all architecture-specific files.
diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh
index 4ed2e488b61..ac54ecaba0a 100644
--- a/vendor/golang.org/x/sys/unix/mkerrors.sh
+++ b/vendor/golang.org/x/sys/unix/mkerrors.sh
@@ -58,6 +58,7 @@ includes_Darwin='
#define _DARWIN_USE_64_BIT_INODE
#define __APPLE_USE_RFC_3542
#include
+#include
#include
#include
#include
@@ -551,6 +552,7 @@ ccflags="$@"
$2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ &&
$2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ ||
$2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ ||
+ $2 ~ /^(CONNECT|SAE)_/ ||
$2 ~ /^FIORDCHK$/ ||
$2 ~ /^SIOC/ ||
$2 ~ /^TIOC/ ||
@@ -654,7 +656,7 @@ errors=$(
signals=$(
echo '#include ' | $CC -x c - -E -dM $ccflags |
awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' |
- grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' |
+ grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' |
sort
)
@@ -664,7 +666,7 @@ echo '#include ' | $CC -x c - -E -dM $ccflags |
sort >_error.grep
echo '#include ' | $CC -x c - -E -dM $ccflags |
awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' |
- grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' |
+ grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' |
sort >_signal.grep
echo '// mkerrors.sh' "$@"
diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go
index 67ce6cef2d5..6f15ba1eaff 100644
--- a/vendor/golang.org/x/sys/unix/syscall_aix.go
+++ b/vendor/golang.org/x/sys/unix/syscall_aix.go
@@ -360,7 +360,7 @@ func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int,
var status _C_int
var r Pid_t
err = ERESTART
- // AIX wait4 may return with ERESTART errno, while the processus is still
+ // AIX wait4 may return with ERESTART errno, while the process is still
// active.
for err == ERESTART {
r, err = wait4(Pid_t(pid), &status, options, rusage)
diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go
index 4cc7b005967..099867deede 100644
--- a/vendor/golang.org/x/sys/unix/syscall_darwin.go
+++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go
@@ -402,6 +402,18 @@ func IoctlSetIfreqMTU(fd int, ifreq *IfreqMTU) error {
return ioctlPtr(fd, SIOCSIFMTU, unsafe.Pointer(ifreq))
}
+//sys renamexNp(from string, to string, flag uint32) (err error)
+
+func RenamexNp(from string, to string, flag uint32) (err error) {
+ return renamexNp(from, to, flag)
+}
+
+//sys renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error)
+
+func RenameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) {
+ return renameatxNp(fromfd, from, tofd, to, flag)
+}
+
//sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS_SYSCTL
func Uname(uname *Utsname) error {
@@ -554,6 +566,43 @@ func PthreadFchdir(fd int) (err error) {
return pthread_fchdir_np(fd)
}
+// Connectx calls connectx(2) to initiate a connection on a socket.
+//
+// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument.
+//
+// - srcIf is the optional source interface index. 0 means unspecified.
+// - srcAddr is the optional source address. nil means unspecified.
+// - dstAddr is the destination address.
+//
+// On success, Connectx returns the number of bytes enqueued for transmission.
+func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) {
+ endpoints := SaEndpoints{
+ Srcif: srcIf,
+ }
+
+ if srcAddr != nil {
+ addrp, addrlen, err := srcAddr.sockaddr()
+ if err != nil {
+ return 0, err
+ }
+ endpoints.Srcaddr = (*RawSockaddr)(addrp)
+ endpoints.Srcaddrlen = uint32(addrlen)
+ }
+
+ if dstAddr != nil {
+ addrp, addrlen, err := dstAddr.sockaddr()
+ if err != nil {
+ return 0, err
+ }
+ endpoints.Dstaddr = (*RawSockaddr)(addrp)
+ endpoints.Dstaddrlen = uint32(addrlen)
+ }
+
+ err = connectx(fd, &endpoints, associd, flags, iov, &n, connid)
+ return
+}
+
+//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error)
//sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error)
//sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error)
diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go
index ba46651f8e3..a6a2d2fc2b9 100644
--- a/vendor/golang.org/x/sys/unix/syscall_hurd.go
+++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go
@@ -11,6 +11,7 @@ package unix
int ioctl(int, unsigned long int, uintptr_t);
*/
import "C"
+import "unsafe"
func ioctl(fd int, req uint, arg uintptr) (err error) {
r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg))
diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go
index 5682e2628ad..f08abd434ff 100644
--- a/vendor/golang.org/x/sys/unix/syscall_linux.go
+++ b/vendor/golang.org/x/sys/unix/syscall_linux.go
@@ -1295,6 +1295,48 @@ func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) {
return &value, err
}
+// GetsockoptTCPCCVegasInfo returns algorithm specific congestion control information for a socket using the "vegas"
+// algorithm.
+//
+// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option:
+//
+// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION)
+func GetsockoptTCPCCVegasInfo(fd, level, opt int) (*TCPVegasInfo, error) {
+ var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment
+ vallen := _Socklen(SizeofTCPCCInfo)
+ err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen)
+ out := (*TCPVegasInfo)(unsafe.Pointer(&value[0]))
+ return out, err
+}
+
+// GetsockoptTCPCCDCTCPInfo returns algorithm specific congestion control information for a socket using the "dctp"
+// algorithm.
+//
+// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option:
+//
+// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION)
+func GetsockoptTCPCCDCTCPInfo(fd, level, opt int) (*TCPDCTCPInfo, error) {
+ var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment
+ vallen := _Socklen(SizeofTCPCCInfo)
+ err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen)
+ out := (*TCPDCTCPInfo)(unsafe.Pointer(&value[0]))
+ return out, err
+}
+
+// GetsockoptTCPCCBBRInfo returns algorithm specific congestion control information for a socket using the "bbr"
+// algorithm.
+//
+// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option:
+//
+// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION)
+func GetsockoptTCPCCBBRInfo(fd, level, opt int) (*TCPBBRInfo, error) {
+ var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment
+ vallen := _Socklen(SizeofTCPCCInfo)
+ err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen)
+ out := (*TCPBBRInfo)(unsafe.Pointer(&value[0]))
+ return out, err
+}
+
// GetsockoptString returns the string value of the socket option opt for the
// socket associated with fd at the given socket level.
func GetsockoptString(fd, level, opt int) (string, error) {
@@ -1959,7 +2001,26 @@ func Getpgrp() (pid int) {
//sysnb Getpid() (pid int)
//sysnb Getppid() (ppid int)
//sys Getpriority(which int, who int) (prio int, err error)
-//sys Getrandom(buf []byte, flags int) (n int, err error)
+
+func Getrandom(buf []byte, flags int) (n int, err error) {
+ vdsoRet, supported := vgetrandom(buf, uint32(flags))
+ if supported {
+ if vdsoRet < 0 {
+ return 0, errnoErr(syscall.Errno(-vdsoRet))
+ }
+ return vdsoRet, nil
+ }
+ var p *byte
+ if len(buf) > 0 {
+ p = &buf[0]
+ }
+ r, _, e := Syscall(SYS_GETRANDOM, uintptr(unsafe.Pointer(p)), uintptr(len(buf)), uintptr(flags))
+ if e != 0 {
+ return 0, errnoErr(e)
+ }
+ return int(r), nil
+}
+
//sysnb Getrusage(who int, rusage *Rusage) (err error)
//sysnb Getsid(pid int) (sid int, err error)
//sysnb Gettid() (tid int)
@@ -2592,3 +2653,4 @@ func SchedGetAttr(pid int, flags uint) (*SchedAttr, error) {
}
//sys Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) (err error)
+//sys Mseal(b []byte, flags uint) (err error)
diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go
index cf2ee6c75ef..745e5c7e6c0 100644
--- a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go
+++ b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go
@@ -182,3 +182,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error
}
return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags)
}
+
+const SYS_FSTATAT = SYS_NEWFSTATAT
diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go
index 3d0e98451f8..dd2262a4079 100644
--- a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go
+++ b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go
@@ -214,3 +214,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error
}
return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags)
}
+
+const SYS_FSTATAT = SYS_NEWFSTATAT
diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go
index 6f5a288944d..8cf3670bda6 100644
--- a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go
@@ -187,3 +187,5 @@ func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error
}
return riscvHWProbe(pairs, setSize, set, flags)
}
+
+const SYS_FSTATAT = SYS_NEWFSTATAT
diff --git a/vendor/golang.org/x/sys/unix/syscall_openbsd.go b/vendor/golang.org/x/sys/unix/syscall_openbsd.go
index b25343c71a4..b86ded549c6 100644
--- a/vendor/golang.org/x/sys/unix/syscall_openbsd.go
+++ b/vendor/golang.org/x/sys/unix/syscall_openbsd.go
@@ -293,6 +293,7 @@ func Uname(uname *Utsname) error {
//sys Mkfifoat(dirfd int, path string, mode uint32) (err error)
//sys Mknod(path string, mode uint32, dev int) (err error)
//sys Mknodat(dirfd int, path string, mode uint32, dev int) (err error)
+//sys Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error)
//sys Nanosleep(time *Timespec, leftover *Timespec) (err error)
//sys Open(path string, mode int, perm uint32) (fd int, err error)
//sys Openat(dirfd int, path string, mode int, perm uint32) (fd int, err error)
diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_linux.go b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go
new file mode 100644
index 00000000000..07ac8e09d1b
--- /dev/null
+++ b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go
@@ -0,0 +1,13 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && go1.24
+
+package unix
+
+import _ "unsafe"
+
+//go:linkname vgetrandom runtime.vgetrandom
+//go:noescape
+func vgetrandom(p []byte, flags uint32) (ret int, supported bool)
diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go
new file mode 100644
index 00000000000..297e97bce92
--- /dev/null
+++ b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go
@@ -0,0 +1,11 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux || !go1.24
+
+package unix
+
+func vgetrandom(p []byte, flags uint32) (ret int, supported bool) {
+ return -1, false
+}
diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go
index e40fa85245f..d73c4652e6c 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go
@@ -237,6 +237,9 @@ const (
CLOCK_UPTIME_RAW_APPROX = 0x9
CLONE_NOFOLLOW = 0x1
CLONE_NOOWNERCOPY = 0x2
+ CONNECT_DATA_AUTHENTICATED = 0x4
+ CONNECT_DATA_IDEMPOTENT = 0x2
+ CONNECT_RESUME_ON_READ_WRITE = 0x1
CR0 = 0x0
CR1 = 0x1000
CR2 = 0x2000
@@ -1169,6 +1172,11 @@ const (
PT_WRITE_D = 0x5
PT_WRITE_I = 0x4
PT_WRITE_U = 0x6
+ RENAME_EXCL = 0x4
+ RENAME_NOFOLLOW_ANY = 0x10
+ RENAME_RESERVED1 = 0x8
+ RENAME_SECLUDE = 0x1
+ RENAME_SWAP = 0x2
RLIMIT_AS = 0x5
RLIMIT_CORE = 0x4
RLIMIT_CPU = 0x0
@@ -1260,6 +1268,10 @@ const (
RTV_SSTHRESH = 0x20
RUSAGE_CHILDREN = -0x1
RUSAGE_SELF = 0x0
+ SAE_ASSOCID_ALL = 0xffffffff
+ SAE_ASSOCID_ANY = 0x0
+ SAE_CONNID_ALL = 0xffffffff
+ SAE_CONNID_ANY = 0x0
SCM_CREDS = 0x3
SCM_RIGHTS = 0x1
SCM_TIMESTAMP = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go
index bb02aa6c056..4a55a400588 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go
@@ -237,6 +237,9 @@ const (
CLOCK_UPTIME_RAW_APPROX = 0x9
CLONE_NOFOLLOW = 0x1
CLONE_NOOWNERCOPY = 0x2
+ CONNECT_DATA_AUTHENTICATED = 0x4
+ CONNECT_DATA_IDEMPOTENT = 0x2
+ CONNECT_RESUME_ON_READ_WRITE = 0x1
CR0 = 0x0
CR1 = 0x1000
CR2 = 0x2000
@@ -1169,6 +1172,11 @@ const (
PT_WRITE_D = 0x5
PT_WRITE_I = 0x4
PT_WRITE_U = 0x6
+ RENAME_EXCL = 0x4
+ RENAME_NOFOLLOW_ANY = 0x10
+ RENAME_RESERVED1 = 0x8
+ RENAME_SECLUDE = 0x1
+ RENAME_SWAP = 0x2
RLIMIT_AS = 0x5
RLIMIT_CORE = 0x4
RLIMIT_CPU = 0x0
@@ -1260,6 +1268,10 @@ const (
RTV_SSTHRESH = 0x20
RUSAGE_CHILDREN = -0x1
RUSAGE_SELF = 0x0
+ SAE_ASSOCID_ALL = 0xffffffff
+ SAE_ASSOCID_ANY = 0x0
+ SAE_CONNID_ALL = 0xffffffff
+ SAE_CONNID_ANY = 0x0
SCM_CREDS = 0x3
SCM_RIGHTS = 0x1
SCM_TIMESTAMP = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go
index 877a62b479a..de3b462489c 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go
@@ -457,6 +457,7 @@ const (
B600 = 0x8
B75 = 0x2
B9600 = 0xd
+ BCACHEFS_SUPER_MAGIC = 0xca451a4e
BDEVFS_MAGIC = 0x62646576
BINDERFS_SUPER_MAGIC = 0x6c6f6f70
BINFMTFS_MAGIC = 0x42494e4d
@@ -494,6 +495,7 @@ const (
BPF_F_TEST_REG_INVARIANTS = 0x80
BPF_F_TEST_RND_HI32 = 0x4
BPF_F_TEST_RUN_ON_CPU = 0x1
+ BPF_F_TEST_SKB_CHECKSUM_COMPLETE = 0x4
BPF_F_TEST_STATE_FREQ = 0x8
BPF_F_TEST_XDP_LIVE_FRAMES = 0x2
BPF_F_XDP_DEV_BOUND_ONLY = 0x40
@@ -928,6 +930,7 @@ const (
EPOLL_CTL_ADD = 0x1
EPOLL_CTL_DEL = 0x2
EPOLL_CTL_MOD = 0x3
+ EPOLL_IOC_TYPE = 0x8a
EROFS_SUPER_MAGIC_V1 = 0xe0f5e1e2
ESP_V4_FLOW = 0xa
ESP_V6_FLOW = 0xc
@@ -941,9 +944,6 @@ const (
ETHTOOL_FEC_OFF = 0x4
ETHTOOL_FEC_RS = 0x8
ETHTOOL_FLAG_ALL = 0x7
- ETHTOOL_FLAG_COMPACT_BITSETS = 0x1
- ETHTOOL_FLAG_OMIT_REPLY = 0x2
- ETHTOOL_FLAG_STATS = 0x4
ETHTOOL_FLASHDEV = 0x33
ETHTOOL_FLASH_MAX_FILENAME = 0x80
ETHTOOL_FWVERS_LEN = 0x20
@@ -1705,6 +1705,7 @@ const (
KEXEC_ARCH_S390 = 0x160000
KEXEC_ARCH_SH = 0x2a0000
KEXEC_ARCH_X86_64 = 0x3e0000
+ KEXEC_CRASH_HOTPLUG_SUPPORT = 0x8
KEXEC_FILE_DEBUG = 0x8
KEXEC_FILE_NO_INITRAMFS = 0x4
KEXEC_FILE_ON_CRASH = 0x2
@@ -1780,6 +1781,7 @@ const (
KEY_SPEC_USER_KEYRING = -0x4
KEY_SPEC_USER_SESSION_KEYRING = -0x5
LANDLOCK_ACCESS_FS_EXECUTE = 0x1
+ LANDLOCK_ACCESS_FS_IOCTL_DEV = 0x8000
LANDLOCK_ACCESS_FS_MAKE_BLOCK = 0x800
LANDLOCK_ACCESS_FS_MAKE_CHAR = 0x40
LANDLOCK_ACCESS_FS_MAKE_DIR = 0x80
@@ -1861,6 +1863,19 @@ const (
MAP_FILE = 0x0
MAP_FIXED = 0x10
MAP_FIXED_NOREPLACE = 0x100000
+ MAP_HUGE_16GB = 0x88000000
+ MAP_HUGE_16KB = 0x38000000
+ MAP_HUGE_16MB = 0x60000000
+ MAP_HUGE_1GB = 0x78000000
+ MAP_HUGE_1MB = 0x50000000
+ MAP_HUGE_256MB = 0x70000000
+ MAP_HUGE_2GB = 0x7c000000
+ MAP_HUGE_2MB = 0x54000000
+ MAP_HUGE_32MB = 0x64000000
+ MAP_HUGE_512KB = 0x4c000000
+ MAP_HUGE_512MB = 0x74000000
+ MAP_HUGE_64KB = 0x40000000
+ MAP_HUGE_8MB = 0x5c000000
MAP_HUGE_MASK = 0x3f
MAP_HUGE_SHIFT = 0x1a
MAP_PRIVATE = 0x2
@@ -1908,6 +1923,7 @@ const (
MNT_EXPIRE = 0x4
MNT_FORCE = 0x1
MNT_ID_REQ_SIZE_VER0 = 0x18
+ MNT_ID_REQ_SIZE_VER1 = 0x20
MODULE_INIT_COMPRESSED_FILE = 0x4
MODULE_INIT_IGNORE_MODVERSIONS = 0x1
MODULE_INIT_IGNORE_VERMAGIC = 0x2
@@ -2173,7 +2189,7 @@ const (
NFT_REG_SIZE = 0x10
NFT_REJECT_ICMPX_MAX = 0x3
NFT_RT_MAX = 0x4
- NFT_SECMARK_CTX_MAXLEN = 0x100
+ NFT_SECMARK_CTX_MAXLEN = 0x1000
NFT_SET_MAXNAMELEN = 0x100
NFT_SOCKET_MAX = 0x3
NFT_TABLE_F_MASK = 0x7
@@ -2342,9 +2358,11 @@ const (
PERF_MEM_LVLNUM_IO = 0xa
PERF_MEM_LVLNUM_L1 = 0x1
PERF_MEM_LVLNUM_L2 = 0x2
+ PERF_MEM_LVLNUM_L2_MHB = 0x5
PERF_MEM_LVLNUM_L3 = 0x3
PERF_MEM_LVLNUM_L4 = 0x4
PERF_MEM_LVLNUM_LFB = 0xc
+ PERF_MEM_LVLNUM_MSC = 0x6
PERF_MEM_LVLNUM_NA = 0xf
PERF_MEM_LVLNUM_PMEM = 0xe
PERF_MEM_LVLNUM_RAM = 0xd
@@ -2417,6 +2435,7 @@ const (
PRIO_PGRP = 0x1
PRIO_PROCESS = 0x0
PRIO_USER = 0x2
+ PROCFS_IOCTL_MAGIC = 'f'
PROC_SUPER_MAGIC = 0x9fa0
PROT_EXEC = 0x4
PROT_GROWSDOWN = 0x1000000
@@ -2498,6 +2517,23 @@ const (
PR_PAC_GET_ENABLED_KEYS = 0x3d
PR_PAC_RESET_KEYS = 0x36
PR_PAC_SET_ENABLED_KEYS = 0x3c
+ PR_PPC_DEXCR_CTRL_CLEAR = 0x4
+ PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC = 0x10
+ PR_PPC_DEXCR_CTRL_EDITABLE = 0x1
+ PR_PPC_DEXCR_CTRL_MASK = 0x1f
+ PR_PPC_DEXCR_CTRL_SET = 0x2
+ PR_PPC_DEXCR_CTRL_SET_ONEXEC = 0x8
+ PR_PPC_DEXCR_IBRTPD = 0x1
+ PR_PPC_DEXCR_NPHIE = 0x3
+ PR_PPC_DEXCR_SBHE = 0x0
+ PR_PPC_DEXCR_SRAPD = 0x2
+ PR_PPC_GET_DEXCR = 0x48
+ PR_PPC_SET_DEXCR = 0x49
+ PR_RISCV_CTX_SW_FENCEI_OFF = 0x1
+ PR_RISCV_CTX_SW_FENCEI_ON = 0x0
+ PR_RISCV_SCOPE_PER_PROCESS = 0x0
+ PR_RISCV_SCOPE_PER_THREAD = 0x1
+ PR_RISCV_SET_ICACHE_FLUSH_CTX = 0x47
PR_RISCV_V_GET_CONTROL = 0x46
PR_RISCV_V_SET_CONTROL = 0x45
PR_RISCV_V_VSTATE_CTRL_CUR_MASK = 0x3
@@ -2902,11 +2938,12 @@ const (
RUSAGE_SELF = 0x0
RUSAGE_THREAD = 0x1
RWF_APPEND = 0x10
+ RWF_ATOMIC = 0x40
RWF_DSYNC = 0x2
RWF_HIPRI = 0x1
RWF_NOAPPEND = 0x20
RWF_NOWAIT = 0x8
- RWF_SUPPORTED = 0x3f
+ RWF_SUPPORTED = 0x7f
RWF_SYNC = 0x4
RWF_WRITE_LIFE_NOT_SET = 0x0
SCHED_BATCH = 0x3
@@ -3179,6 +3216,7 @@ const (
STATX_ATTR_MOUNT_ROOT = 0x2000
STATX_ATTR_NODUMP = 0x40
STATX_ATTR_VERITY = 0x100000
+ STATX_ATTR_WRITE_ATOMIC = 0x400000
STATX_BASIC_STATS = 0x7ff
STATX_BLOCKS = 0x400
STATX_BTIME = 0x800
@@ -3192,8 +3230,10 @@ const (
STATX_MTIME = 0x40
STATX_NLINK = 0x4
STATX_SIZE = 0x200
+ STATX_SUBVOL = 0x8000
STATX_TYPE = 0x1
STATX_UID = 0x8
+ STATX_WRITE_ATOMIC = 0x10000
STATX__RESERVED = 0x80000000
SYNC_FILE_RANGE_WAIT_AFTER = 0x4
SYNC_FILE_RANGE_WAIT_BEFORE = 0x1
@@ -3592,6 +3632,7 @@ const (
XDP_UMEM_PGOFF_COMPLETION_RING = 0x180000000
XDP_UMEM_PGOFF_FILL_RING = 0x100000000
XDP_UMEM_REG = 0x4
+ XDP_UMEM_TX_METADATA_LEN = 0x4
XDP_UMEM_TX_SW_CSUM = 0x2
XDP_UMEM_UNALIGNED_CHUNK_FLAG = 0x1
XDP_USE_NEED_WAKEUP = 0x8
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go
index e4bc0bd57c7..8aa6d77c018 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -151,9 +153,14 @@ const (
NFDBITS = 0x20
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
index 689317afdbf..da428f42533 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -151,9 +153,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
index 5cca668ac30..bf45bfec78a 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x20
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
index 14270508b04..71c67162b73 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
ESR_MAGIC = 0x45535201
EXTPROC = 0x10000
@@ -152,9 +154,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go
index 28e39afdcb4..9476628fa02 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -152,9 +154,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
index cd66e92cb42..b9e85f3cf0c 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x80
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x20
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
index c1595eba78e..a48b68a7647 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x80
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
index ee9456b0da7..ea00e8522a1 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x80
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
index 8cfca81e1b5..91c64687176 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x80
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x20
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
index 60b0deb3af7..8cbf38d6390 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x20
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000000
FF1 = 0x4000
@@ -150,9 +152,14 @@ const (
NL3 = 0x300
NLDLY = 0x300
NOFLSH = 0x80000000
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x4
ONLCR = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go
index f90aa7281bf..a2df7341917 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x20
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000000
FF1 = 0x4000
@@ -150,9 +152,14 @@ const (
NL3 = 0x300
NLDLY = 0x300
NOFLSH = 0x80000000
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x4
ONLCR = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go
index ba9e0150338..24791379233 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x20
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000000
FF1 = 0x4000
@@ -150,9 +152,14 @@ const (
NL3 = 0x300
NLDLY = 0x300
NOFLSH = 0x80000000
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x4
ONLCR = 0x2
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go
index 07cdfd6e9fd..d265f146ee0 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go
index 2f1dd214a74..3f2d6443964 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go
@@ -78,6 +78,8 @@ const (
ECHOPRT = 0x400
EFD_CLOEXEC = 0x80000
EFD_NONBLOCK = 0x800
+ EPIOCGPARAMS = 0x80088a02
+ EPIOCSPARAMS = 0x40088a01
EPOLL_CLOEXEC = 0x80000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -148,9 +150,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x8008b705
NS_GET_NSTYPE = 0xb703
NS_GET_OWNER_UID = 0xb704
NS_GET_PARENT = 0xb702
+ NS_GET_PID_FROM_PIDNS = 0x8004b706
+ NS_GET_PID_IN_PIDNS = 0x8004b708
+ NS_GET_TGID_FROM_PIDNS = 0x8004b707
+ NS_GET_TGID_IN_PIDNS = 0x8004b709
NS_GET_USERNS = 0xb701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go
index f40519d9018..5d8b727a1c8 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go
@@ -82,6 +82,8 @@ const (
EFD_CLOEXEC = 0x400000
EFD_NONBLOCK = 0x4000
EMT_TAGOVF = 0x1
+ EPIOCGPARAMS = 0x40088a02
+ EPIOCSPARAMS = 0x80088a01
EPOLL_CLOEXEC = 0x400000
EXTPROC = 0x10000
FF1 = 0x8000
@@ -153,9 +155,14 @@ const (
NFDBITS = 0x40
NLDLY = 0x100
NOFLSH = 0x80
+ NS_GET_MNTNS_ID = 0x4008b705
NS_GET_NSTYPE = 0x2000b703
NS_GET_OWNER_UID = 0x2000b704
NS_GET_PARENT = 0x2000b702
+ NS_GET_PID_FROM_PIDNS = 0x4004b706
+ NS_GET_PID_IN_PIDNS = 0x4004b708
+ NS_GET_TGID_FROM_PIDNS = 0x4004b707
+ NS_GET_TGID_IN_PIDNS = 0x4004b709
NS_GET_USERNS = 0x2000b701
OLCUC = 0x2
ONLCR = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
index da08b2ab3d9..1ec2b1407b1 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go
@@ -581,6 +581,8 @@ const (
AT_EMPTY_PATH = 0x1000
AT_REMOVEDIR = 0x200
RENAME_NOREPLACE = 1 << 0
+ ST_RDONLY = 1
+ ST_NOSUID = 2
)
const (
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go
index 07642c308d3..24b346e1a35 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go
@@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func renamexNp(from string, to string, flag uint32) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(from)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(to)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag))
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_renamex_np_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
+func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(from)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(to)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_renameatx_np_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
var _p0 unsafe.Pointer
if len(mib) > 0 {
@@ -793,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) {
+ var _p0 unsafe.Pointer
+ if len(iov) > 0 {
+ _p0 = unsafe.Pointer(&iov[0])
+ } else {
+ _p0 = unsafe.Pointer(&_zero)
+ }
+ _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_connectx_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) {
_, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags))
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s
index 923e08cb792..ebd213100b3 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s
@@ -223,6 +223,16 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8
DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB)
+TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_renamex_np(SB)
+GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8
+DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB)
+
+TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_renameatx_np(SB)
+GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8
+DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB)
+
TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_sysctl(SB)
GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8
@@ -238,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8
DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB)
+TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_connectx(SB)
+GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8
+DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB)
+
TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_sendfile(SB)
GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go
index 7d73dda6473..824b9c2d5e0 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go
@@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func renamexNp(from string, to string, flag uint32) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(from)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(to)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag))
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_renamex_np_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
+func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(from)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(to)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_renameatx_np_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
var _p0 unsafe.Pointer
if len(mib) > 0 {
@@ -793,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) {
+ var _p0 unsafe.Pointer
+ if len(iov) > 0 {
+ _p0 = unsafe.Pointer(&iov[0])
+ } else {
+ _p0 = unsafe.Pointer(&_zero)
+ }
+ _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_connectx_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) {
_, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags))
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s
index 057700111e7..4f178a22934 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s
@@ -223,6 +223,16 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8
DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB)
+TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_renamex_np(SB)
+GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8
+DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB)
+
+TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_renameatx_np(SB)
+GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8
+DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB)
+
TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_sysctl(SB)
GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8
@@ -238,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8
DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB)
+TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_connectx(SB)
+GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8
+DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB)
+
TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_sendfile(SB)
GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go
index 87d8612a1dc..af30da55780 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go
@@ -971,23 +971,6 @@ func Getpriority(which int, who int) (prio int, err error) {
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
-func Getrandom(buf []byte, flags int) (n int, err error) {
- var _p0 unsafe.Pointer
- if len(buf) > 0 {
- _p0 = unsafe.Pointer(&buf[0])
- } else {
- _p0 = unsafe.Pointer(&_zero)
- }
- r0, _, e1 := Syscall(SYS_GETRANDOM, uintptr(_p0), uintptr(len(buf)), uintptr(flags))
- n = int(r0)
- if e1 != 0 {
- err = errnoErr(e1)
- }
- return
-}
-
-// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
-
func Getrusage(who int, rusage *Rusage) (err error) {
_, _, e1 := RawSyscall(SYS_GETRUSAGE, uintptr(who), uintptr(unsafe.Pointer(rusage)), 0)
if e1 != 0 {
@@ -2229,3 +2212,19 @@ func Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint)
}
return
}
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
+func Mseal(b []byte, flags uint) (err error) {
+ var _p0 unsafe.Pointer
+ if len(b) > 0 {
+ _p0 = unsafe.Pointer(&b[0])
+ } else {
+ _p0 = unsafe.Pointer(&_zero)
+ }
+ _, _, e1 := Syscall(SYS_MSEAL, uintptr(_p0), uintptr(len(b)), uintptr(flags))
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go
index 9dc42410b78..1851df14e87 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s
index 41b5617316c..0b43c693656 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s
@@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4
DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_mount(SB)
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4
+DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_nanosleep(SB)
GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go
index 0d3a0751cd4..e1ec0dbe4ec 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s
index 4019a656f6d..880c6d6e316 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s
@@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8
DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_mount(SB)
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8
+DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_nanosleep(SB)
GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go
index c39f7776db3..7c8452a63e9 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s
index ac4af24f908..b8ef95b0fa1 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s
@@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4
DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_mount(SB)
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4
+DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_nanosleep(SB)
GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go
index 57571d072fe..2ffdf861f75 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s
index f77d532121b..2af3b5c762f 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s
@@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8
DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_mount(SB)
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8
+DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_nanosleep(SB)
GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go
index e62963e67e2..1da08d52675 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s
index fae140b62c9..b7a251353b0 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s
@@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8
DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_mount(SB)
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8
+DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_nanosleep(SB)
GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go
index 00831354c82..6e85b0aac95 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s
index 9d1e0ff06d0..f15dadf0552 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s
@@ -555,6 +555,12 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8
DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ CALL libc_mount(SB)
+ RET
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8
+DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
CALL libc_nanosleep(SB)
RET
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go
index 79029ed5848..28b487df251 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go
@@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) {
+ var _p0 *byte
+ _p0, err = BytePtrFromString(fsType)
+ if err != nil {
+ return
+ }
+ var _p1 *byte
+ _p1, err = BytePtrFromString(dir)
+ if err != nil {
+ return
+ }
+ _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0)
+ if e1 != 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
+var libc_mount_trampoline_addr uintptr
+
+//go:cgo_import_dynamic libc_mount mount "libc.so"
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
func Nanosleep(time *Timespec, leftover *Timespec) (err error) {
_, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0)
if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s
index da115f9a4b6..1e7f321e436 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s
+++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s
@@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0
GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8
DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB)
+TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_mount(SB)
+GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8
+DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB)
+
TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0
JMP libc_nanosleep(SB)
GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go
index 53aef5dc58d..524b0820cbc 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go
@@ -457,4 +457,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go
index 71d524763d3..f485dbf4565 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go
@@ -341,6 +341,7 @@ const (
SYS_STATX = 332
SYS_IO_PGETEVENTS = 333
SYS_RSEQ = 334
+ SYS_URETPROBE = 335
SYS_PIDFD_SEND_SIGNAL = 424
SYS_IO_URING_SETUP = 425
SYS_IO_URING_ENTER = 426
@@ -379,4 +380,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go
index c747706131c..70b35bf3b09 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go
@@ -421,4 +421,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go
index f96e214f6d4..1893e2fe884 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go
@@ -85,7 +85,7 @@ const (
SYS_SPLICE = 76
SYS_TEE = 77
SYS_READLINKAT = 78
- SYS_FSTATAT = 79
+ SYS_NEWFSTATAT = 79
SYS_FSTAT = 80
SYS_SYNC = 81
SYS_FSYNC = 82
@@ -324,4 +324,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go
index 28425346cf1..16a4017da0a 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go
@@ -84,6 +84,8 @@ const (
SYS_SPLICE = 76
SYS_TEE = 77
SYS_READLINKAT = 78
+ SYS_NEWFSTATAT = 79
+ SYS_FSTAT = 80
SYS_SYNC = 81
SYS_FSYNC = 82
SYS_FDATASYNC = 83
@@ -318,4 +320,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go
index d0953018dae..7e567f1efff 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go
@@ -441,4 +441,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 4459
SYS_LSM_SET_SELF_ATTR = 4460
SYS_LSM_LIST_MODULES = 4461
+ SYS_MSEAL = 4462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go
index 295c7f4b818..38ae55e5ef8 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go
@@ -371,4 +371,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 5459
SYS_LSM_SET_SELF_ATTR = 5460
SYS_LSM_LIST_MODULES = 5461
+ SYS_MSEAL = 5462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go
index d1a9eaca7a4..55e92e60a82 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go
@@ -371,4 +371,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 5459
SYS_LSM_SET_SELF_ATTR = 5460
SYS_LSM_LIST_MODULES = 5461
+ SYS_MSEAL = 5462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go
index bec157c39fd..60658d6a021 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go
@@ -441,4 +441,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 4459
SYS_LSM_SET_SELF_ATTR = 4460
SYS_LSM_LIST_MODULES = 4461
+ SYS_MSEAL = 4462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go
index 7ee7bdc435c..e203e8a7ed4 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go
@@ -448,4 +448,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go
index fad1f25b449..5944b97d546 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go
@@ -420,4 +420,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go
index 7d3e16357d6..c66d416dad1 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go
@@ -420,4 +420,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go
index 0ed53ad9f7e..a5459e766f5 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go
@@ -84,7 +84,7 @@ const (
SYS_SPLICE = 76
SYS_TEE = 77
SYS_READLINKAT = 78
- SYS_FSTATAT = 79
+ SYS_NEWFSTATAT = 79
SYS_FSTAT = 80
SYS_SYNC = 81
SYS_FSYNC = 82
@@ -325,4 +325,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go
index 2fba04ad500..01d86825bb9 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go
@@ -386,4 +386,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go
index 621d00d741b..7b703e77cda 100644
--- a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go
+++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go
@@ -399,4 +399,5 @@ const (
SYS_LSM_GET_SELF_ATTR = 459
SYS_LSM_SET_SELF_ATTR = 460
SYS_LSM_LIST_MODULES = 461
+ SYS_MSEAL = 462
)
diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
index 091d107f3a5..d003c3d4378 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
@@ -306,6 +306,19 @@ type XVSockPgen struct {
type _Socklen uint32
+type SaeAssocID uint32
+
+type SaeConnID uint32
+
+type SaEndpoints struct {
+ Srcif uint32
+ Srcaddr *RawSockaddr
+ Srcaddrlen uint32
+ Dstaddr *RawSockaddr
+ Dstaddrlen uint32
+ _ [4]byte
+}
+
type Xucred struct {
Version uint32
Uid uint32
diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
index 28ff4ef74d0..0d45a941aae 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
@@ -306,6 +306,19 @@ type XVSockPgen struct {
type _Socklen uint32
+type SaeAssocID uint32
+
+type SaeConnID uint32
+
+type SaEndpoints struct {
+ Srcif uint32
+ Srcaddr *RawSockaddr
+ Srcaddrlen uint32
+ Dstaddr *RawSockaddr
+ Dstaddrlen uint32
+ _ [4]byte
+}
+
type Xucred struct {
Version uint32
Uid uint32
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go
index 6cbd094a3aa..51e13eb055f 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go
@@ -625,6 +625,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go
index 7c03b6ee77f..d002d8ef3cc 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go
@@ -630,6 +630,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go
index 422107ee8b1..3f863d898dd 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go
@@ -616,6 +616,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go
index 505a12acfd9..61c72931066 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go
@@ -610,6 +610,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go
index cc986c79006..b5d17414f03 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go
@@ -612,6 +612,7 @@ const (
POLLRDNORM = 0x40
POLLWRBAND = 0x100
POLLWRNORM = 0x4
+ POLLRDHUP = 0x4000
)
type CapRights struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go
index 4740b834854..3a69e454962 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_linux.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go
@@ -87,30 +87,35 @@ type StatxTimestamp struct {
}
type Statx_t struct {
- Mask uint32
- Blksize uint32
- Attributes uint64
- Nlink uint32
- Uid uint32
- Gid uint32
- Mode uint16
- _ [1]uint16
- Ino uint64
- Size uint64
- Blocks uint64
- Attributes_mask uint64
- Atime StatxTimestamp
- Btime StatxTimestamp
- Ctime StatxTimestamp
- Mtime StatxTimestamp
- Rdev_major uint32
- Rdev_minor uint32
- Dev_major uint32
- Dev_minor uint32
- Mnt_id uint64
- Dio_mem_align uint32
- Dio_offset_align uint32
- _ [12]uint64
+ Mask uint32
+ Blksize uint32
+ Attributes uint64
+ Nlink uint32
+ Uid uint32
+ Gid uint32
+ Mode uint16
+ _ [1]uint16
+ Ino uint64
+ Size uint64
+ Blocks uint64
+ Attributes_mask uint64
+ Atime StatxTimestamp
+ Btime StatxTimestamp
+ Ctime StatxTimestamp
+ Mtime StatxTimestamp
+ Rdev_major uint32
+ Rdev_minor uint32
+ Dev_major uint32
+ Dev_minor uint32
+ Mnt_id uint64
+ Dio_mem_align uint32
+ Dio_offset_align uint32
+ Subvol uint64
+ Atomic_write_unit_min uint32
+ Atomic_write_unit_max uint32
+ Atomic_write_segments_max uint32
+ _ [1]uint32
+ _ [9]uint64
}
type Fsid struct {
@@ -515,6 +520,29 @@ type TCPInfo struct {
Total_rto_time uint32
}
+type TCPVegasInfo struct {
+ Enabled uint32
+ Rttcnt uint32
+ Rtt uint32
+ Minrtt uint32
+}
+
+type TCPDCTCPInfo struct {
+ Enabled uint16
+ Ce_state uint16
+ Alpha uint32
+ Ab_ecn uint32
+ Ab_tot uint32
+}
+
+type TCPBBRInfo struct {
+ Bw_lo uint32
+ Bw_hi uint32
+ Min_rtt uint32
+ Pacing_gain uint32
+ Cwnd_gain uint32
+}
+
type CanFilter struct {
Id uint32
Mask uint32
@@ -556,6 +584,7 @@ const (
SizeofICMPv6Filter = 0x20
SizeofUcred = 0xc
SizeofTCPInfo = 0xf8
+ SizeofTCPCCInfo = 0x14
SizeofCanFilter = 0x8
SizeofTCPRepairOpt = 0x8
)
@@ -2485,7 +2514,7 @@ type XDPMmapOffsets struct {
type XDPUmemReg struct {
Addr uint64
Len uint64
- Chunk_size uint32
+ Size uint32
Headroom uint32
Flags uint32
Tx_metadata_len uint32
@@ -3473,7 +3502,7 @@ const (
DEVLINK_PORT_FN_ATTR_STATE = 0x2
DEVLINK_PORT_FN_ATTR_OPSTATE = 0x3
DEVLINK_PORT_FN_ATTR_CAPS = 0x4
- DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x5
+ DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x6
)
type FsverityDigest struct {
@@ -3765,7 +3794,7 @@ const (
ETHTOOL_MSG_PSE_GET = 0x24
ETHTOOL_MSG_PSE_SET = 0x25
ETHTOOL_MSG_RSS_GET = 0x26
- ETHTOOL_MSG_USER_MAX = 0x2b
+ ETHTOOL_MSG_USER_MAX = 0x2c
ETHTOOL_MSG_KERNEL_NONE = 0x0
ETHTOOL_MSG_STRSET_GET_REPLY = 0x1
ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2
@@ -3805,7 +3834,10 @@ const (
ETHTOOL_MSG_MODULE_NTF = 0x24
ETHTOOL_MSG_PSE_GET_REPLY = 0x25
ETHTOOL_MSG_RSS_GET_REPLY = 0x26
- ETHTOOL_MSG_KERNEL_MAX = 0x2b
+ ETHTOOL_MSG_KERNEL_MAX = 0x2c
+ ETHTOOL_FLAG_COMPACT_BITSETS = 0x1
+ ETHTOOL_FLAG_OMIT_REPLY = 0x2
+ ETHTOOL_FLAG_STATS = 0x4
ETHTOOL_A_HEADER_UNSPEC = 0x0
ETHTOOL_A_HEADER_DEV_INDEX = 0x1
ETHTOOL_A_HEADER_DEV_NAME = 0x2
@@ -3947,7 +3979,7 @@ const (
ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL = 0x17
ETHTOOL_A_COALESCE_USE_CQE_MODE_TX = 0x18
ETHTOOL_A_COALESCE_USE_CQE_MODE_RX = 0x19
- ETHTOOL_A_COALESCE_MAX = 0x1c
+ ETHTOOL_A_COALESCE_MAX = 0x1e
ETHTOOL_A_PAUSE_UNSPEC = 0x0
ETHTOOL_A_PAUSE_HEADER = 0x1
ETHTOOL_A_PAUSE_AUTONEG = 0x2
@@ -3975,7 +4007,7 @@ const (
ETHTOOL_A_TSINFO_TX_TYPES = 0x3
ETHTOOL_A_TSINFO_RX_FILTERS = 0x4
ETHTOOL_A_TSINFO_PHC_INDEX = 0x5
- ETHTOOL_A_TSINFO_MAX = 0x5
+ ETHTOOL_A_TSINFO_MAX = 0x6
ETHTOOL_A_CABLE_TEST_UNSPEC = 0x0
ETHTOOL_A_CABLE_TEST_HEADER = 0x1
ETHTOOL_A_CABLE_TEST_MAX = 0x1
@@ -4605,7 +4637,7 @@ const (
NL80211_ATTR_MAC_HINT = 0xc8
NL80211_ATTR_MAC_MASK = 0xd7
NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca
- NL80211_ATTR_MAX = 0x14a
+ NL80211_ATTR_MAX = 0x14c
NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4
NL80211_ATTR_MAX_CSA_COUNTERS = 0xce
NL80211_ATTR_MAX_MATCH_SETS = 0x85
@@ -5209,7 +5241,7 @@ const (
NL80211_FREQUENCY_ATTR_GO_CONCURRENT = 0xf
NL80211_FREQUENCY_ATTR_INDOOR_ONLY = 0xe
NL80211_FREQUENCY_ATTR_IR_CONCURRENT = 0xf
- NL80211_FREQUENCY_ATTR_MAX = 0x20
+ NL80211_FREQUENCY_ATTR_MAX = 0x21
NL80211_FREQUENCY_ATTR_MAX_TX_POWER = 0x6
NL80211_FREQUENCY_ATTR_NO_10MHZ = 0x11
NL80211_FREQUENCY_ATTR_NO_160MHZ = 0xc
diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go
index 15adc04142f..ad05b51a603 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go
@@ -727,6 +727,37 @@ const (
RISCV_HWPROBE_EXT_ZBA = 0x8
RISCV_HWPROBE_EXT_ZBB = 0x10
RISCV_HWPROBE_EXT_ZBS = 0x20
+ RISCV_HWPROBE_EXT_ZICBOZ = 0x40
+ RISCV_HWPROBE_EXT_ZBC = 0x80
+ RISCV_HWPROBE_EXT_ZBKB = 0x100
+ RISCV_HWPROBE_EXT_ZBKC = 0x200
+ RISCV_HWPROBE_EXT_ZBKX = 0x400
+ RISCV_HWPROBE_EXT_ZKND = 0x800
+ RISCV_HWPROBE_EXT_ZKNE = 0x1000
+ RISCV_HWPROBE_EXT_ZKNH = 0x2000
+ RISCV_HWPROBE_EXT_ZKSED = 0x4000
+ RISCV_HWPROBE_EXT_ZKSH = 0x8000
+ RISCV_HWPROBE_EXT_ZKT = 0x10000
+ RISCV_HWPROBE_EXT_ZVBB = 0x20000
+ RISCV_HWPROBE_EXT_ZVBC = 0x40000
+ RISCV_HWPROBE_EXT_ZVKB = 0x80000
+ RISCV_HWPROBE_EXT_ZVKG = 0x100000
+ RISCV_HWPROBE_EXT_ZVKNED = 0x200000
+ RISCV_HWPROBE_EXT_ZVKNHA = 0x400000
+ RISCV_HWPROBE_EXT_ZVKNHB = 0x800000
+ RISCV_HWPROBE_EXT_ZVKSED = 0x1000000
+ RISCV_HWPROBE_EXT_ZVKSH = 0x2000000
+ RISCV_HWPROBE_EXT_ZVKT = 0x4000000
+ RISCV_HWPROBE_EXT_ZFH = 0x8000000
+ RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000
+ RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000
+ RISCV_HWPROBE_EXT_ZVFH = 0x40000000
+ RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000
+ RISCV_HWPROBE_EXT_ZFA = 0x100000000
+ RISCV_HWPROBE_EXT_ZTSO = 0x200000000
+ RISCV_HWPROBE_EXT_ZACAS = 0x400000000
+ RISCV_HWPROBE_EXT_ZICOND = 0x800000000
+ RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000
RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5
RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0
RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1
@@ -734,4 +765,6 @@ const (
RISCV_HWPROBE_MISALIGNED_FAST = 0x3
RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4
RISCV_HWPROBE_MISALIGNED_MASK = 0x7
+ RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6
+ RISCV_HWPROBE_WHICH_CPUS = 0x1
)
diff --git a/vendor/golang.org/x/sys/windows/dll_windows.go b/vendor/golang.org/x/sys/windows/dll_windows.go
index 115341fba66..4e613cf6335 100644
--- a/vendor/golang.org/x/sys/windows/dll_windows.go
+++ b/vendor/golang.org/x/sys/windows/dll_windows.go
@@ -65,7 +65,7 @@ func LoadDLL(name string) (dll *DLL, err error) {
return d, nil
}
-// MustLoadDLL is like LoadDLL but panics if load operation failes.
+// MustLoadDLL is like LoadDLL but panics if load operation fails.
func MustLoadDLL(name string) *DLL {
d, e := LoadDLL(name)
if e != nil {
diff --git a/vendor/golang.org/x/sys/windows/security_windows.go b/vendor/golang.org/x/sys/windows/security_windows.go
index 97651b5bd04..b6e1ab76f82 100644
--- a/vendor/golang.org/x/sys/windows/security_windows.go
+++ b/vendor/golang.org/x/sys/windows/security_windows.go
@@ -1179,7 +1179,7 @@ type OBJECTS_AND_NAME struct {
//sys makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURITY_DESCRIPTOR, selfRelativeSDSize *uint32) (err error) = advapi32.MakeSelfRelativeSD
//sys setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCESS, oldACL *ACL, newACL **ACL) (ret error) = advapi32.SetEntriesInAclW
-//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) = advapi32.GetAce
+//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) = advapi32.GetAce
// Control returns the security descriptor control bits.
func (sd *SECURITY_DESCRIPTOR) Control() (control SECURITY_DESCRIPTOR_CONTROL, revision uint32, err error) {
diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go
index 6525c62f3c2..5cee9a3143f 100644
--- a/vendor/golang.org/x/sys/windows/syscall_windows.go
+++ b/vendor/golang.org/x/sys/windows/syscall_windows.go
@@ -17,8 +17,10 @@ import (
"unsafe"
)
-type Handle uintptr
-type HWND uintptr
+type (
+ Handle uintptr
+ HWND uintptr
+)
const (
InvalidHandle = ^Handle(0)
@@ -211,6 +213,10 @@ func NewCallbackCDecl(fn interface{}) uintptr {
//sys OpenProcess(desiredAccess uint32, inheritHandle bool, processId uint32) (handle Handle, err error)
//sys ShellExecute(hwnd Handle, verb *uint16, file *uint16, args *uint16, cwd *uint16, showCmd int32) (err error) [failretval<=32] = shell32.ShellExecuteW
//sys GetWindowThreadProcessId(hwnd HWND, pid *uint32) (tid uint32, err error) = user32.GetWindowThreadProcessId
+//sys LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) [failretval==0] = user32.LoadKeyboardLayoutW
+//sys UnloadKeyboardLayout(hkl Handle) (err error) = user32.UnloadKeyboardLayout
+//sys GetKeyboardLayout(tid uint32) (hkl Handle) = user32.GetKeyboardLayout
+//sys ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) = user32.ToUnicodeEx
//sys GetShellWindow() (shellWindow HWND) = user32.GetShellWindow
//sys MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) [failretval==0] = user32.MessageBoxW
//sys ExitWindowsEx(flags uint32, reason uint32) (err error) = user32.ExitWindowsEx
@@ -307,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr {
//sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode
//sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo
//sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition
+//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP
+//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP
+//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP
+//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP
//sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW
//sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW
//sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole
@@ -1368,9 +1378,11 @@ func SetsockoptLinger(fd Handle, level, opt int, l *Linger) (err error) {
func SetsockoptInet4Addr(fd Handle, level, opt int, value [4]byte) (err error) {
return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(&value[0])), 4)
}
+
func SetsockoptIPMreq(fd Handle, level, opt int, mreq *IPMreq) (err error) {
return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(mreq)), int32(unsafe.Sizeof(*mreq)))
}
+
func SetsockoptIPv6Mreq(fd Handle, level, opt int, mreq *IPv6Mreq) (err error) {
return syscall.EWINDOWS
}
diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go
index d8cb71db0a6..7b97a154c95 100644
--- a/vendor/golang.org/x/sys/windows/types_windows.go
+++ b/vendor/golang.org/x/sys/windows/types_windows.go
@@ -1060,6 +1060,7 @@ const (
SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6
SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4
SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12
+ SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15
// cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460
@@ -2003,7 +2004,21 @@ const (
MOVEFILE_FAIL_IF_NOT_TRACKABLE = 0x20
)
-const GAA_FLAG_INCLUDE_PREFIX = 0x00000010
+// Flags for GetAdaptersAddresses, see
+// https://learn.microsoft.com/en-us/windows/win32/api/iphlpapi/nf-iphlpapi-getadaptersaddresses.
+const (
+ GAA_FLAG_SKIP_UNICAST = 0x1
+ GAA_FLAG_SKIP_ANYCAST = 0x2
+ GAA_FLAG_SKIP_MULTICAST = 0x4
+ GAA_FLAG_SKIP_DNS_SERVER = 0x8
+ GAA_FLAG_INCLUDE_PREFIX = 0x10
+ GAA_FLAG_SKIP_FRIENDLY_NAME = 0x20
+ GAA_FLAG_INCLUDE_WINS_INFO = 0x40
+ GAA_FLAG_INCLUDE_GATEWAYS = 0x80
+ GAA_FLAG_INCLUDE_ALL_INTERFACES = 0x100
+ GAA_FLAG_INCLUDE_ALL_COMPARTMENTS = 0x200
+ GAA_FLAG_INCLUDE_TUNNEL_BINDINGORDER = 0x400
+)
const (
IF_TYPE_OTHER = 1
@@ -2017,6 +2032,50 @@ const (
IF_TYPE_IEEE1394 = 144
)
+// Enum NL_PREFIX_ORIGIN for [IpAdapterUnicastAddress], see
+// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_prefix_origin
+const (
+ IpPrefixOriginOther = 0
+ IpPrefixOriginManual = 1
+ IpPrefixOriginWellKnown = 2
+ IpPrefixOriginDhcp = 3
+ IpPrefixOriginRouterAdvertisement = 4
+ IpPrefixOriginUnchanged = 1 << 4
+)
+
+// Enum NL_SUFFIX_ORIGIN for [IpAdapterUnicastAddress], see
+// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_suffix_origin
+const (
+ NlsoOther = 0
+ NlsoManual = 1
+ NlsoWellKnown = 2
+ NlsoDhcp = 3
+ NlsoLinkLayerAddress = 4
+ NlsoRandom = 5
+ IpSuffixOriginOther = 0
+ IpSuffixOriginManual = 1
+ IpSuffixOriginWellKnown = 2
+ IpSuffixOriginDhcp = 3
+ IpSuffixOriginLinkLayerAddress = 4
+ IpSuffixOriginRandom = 5
+ IpSuffixOriginUnchanged = 1 << 4
+)
+
+// Enum NL_DAD_STATE for [IpAdapterUnicastAddress], see
+// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_dad_state
+const (
+ NldsInvalid = 0
+ NldsTentative = 1
+ NldsDuplicate = 2
+ NldsDeprecated = 3
+ NldsPreferred = 4
+ IpDadStateInvalid = 0
+ IpDadStateTentative = 1
+ IpDadStateDuplicate = 2
+ IpDadStateDeprecated = 3
+ IpDadStatePreferred = 4
+)
+
type SocketAddress struct {
Sockaddr *syscall.RawSockaddrAny
SockaddrLength int32
@@ -3404,3 +3463,14 @@ type DCB struct {
EvtChar byte
wReserved1 uint16
}
+
+// Keyboard Layout Flags.
+// See https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-loadkeyboardlayoutw
+const (
+ KLF_ACTIVATE = 0x00000001
+ KLF_SUBSTITUTE_OK = 0x00000002
+ KLF_REORDER = 0x00000008
+ KLF_REPLACELANG = 0x00000010
+ KLF_NOTELLSHELL = 0x00000080
+ KLF_SETFORPROCESS = 0x00000100
+)
diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go
index eba761018aa..4c2e1bdc01e 100644
--- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go
+++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go
@@ -247,7 +247,9 @@ var (
procGetCommandLineW = modkernel32.NewProc("GetCommandLineW")
procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW")
procGetComputerNameW = modkernel32.NewProc("GetComputerNameW")
+ procGetConsoleCP = modkernel32.NewProc("GetConsoleCP")
procGetConsoleMode = modkernel32.NewProc("GetConsoleMode")
+ procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP")
procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo")
procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW")
procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId")
@@ -347,8 +349,10 @@ var (
procSetCommMask = modkernel32.NewProc("SetCommMask")
procSetCommState = modkernel32.NewProc("SetCommState")
procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts")
+ procSetConsoleCP = modkernel32.NewProc("SetConsoleCP")
procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition")
procSetConsoleMode = modkernel32.NewProc("SetConsoleMode")
+ procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP")
procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW")
procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories")
procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW")
@@ -478,12 +482,16 @@ var (
procGetDesktopWindow = moduser32.NewProc("GetDesktopWindow")
procGetForegroundWindow = moduser32.NewProc("GetForegroundWindow")
procGetGUIThreadInfo = moduser32.NewProc("GetGUIThreadInfo")
+ procGetKeyboardLayout = moduser32.NewProc("GetKeyboardLayout")
procGetShellWindow = moduser32.NewProc("GetShellWindow")
procGetWindowThreadProcessId = moduser32.NewProc("GetWindowThreadProcessId")
procIsWindow = moduser32.NewProc("IsWindow")
procIsWindowUnicode = moduser32.NewProc("IsWindowUnicode")
procIsWindowVisible = moduser32.NewProc("IsWindowVisible")
+ procLoadKeyboardLayoutW = moduser32.NewProc("LoadKeyboardLayoutW")
procMessageBoxW = moduser32.NewProc("MessageBoxW")
+ procToUnicodeEx = moduser32.NewProc("ToUnicodeEx")
+ procUnloadKeyboardLayout = moduser32.NewProc("UnloadKeyboardLayout")
procCreateEnvironmentBlock = moduserenv.NewProc("CreateEnvironmentBlock")
procDestroyEnvironmentBlock = moduserenv.NewProc("DestroyEnvironmentBlock")
procGetUserProfileDirectoryW = moduserenv.NewProc("GetUserProfileDirectoryW")
@@ -789,6 +797,14 @@ func FreeSid(sid *SID) (err error) {
return
}
+func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) {
+ r1, _, e1 := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce)))
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func GetLengthSid(sid *SID) (len uint32) {
r0, _, _ := syscall.Syscall(procGetLengthSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0)
len = uint32(r0)
@@ -1225,14 +1241,6 @@ func setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCE
return
}
-func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) {
- r0, _, _ := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce)))
- if r0 == 0 {
- ret = GetLastError()
- }
- return
-}
-
func SetKernelObjectSecurity(handle Handle, securityInformation SECURITY_INFORMATION, securityDescriptor *SECURITY_DESCRIPTOR) (err error) {
r1, _, e1 := syscall.Syscall(procSetKernelObjectSecurity.Addr(), 3, uintptr(handle), uintptr(securityInformation), uintptr(unsafe.Pointer(securityDescriptor)))
if r1 == 0 {
@@ -2158,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) {
return
}
+func GetConsoleCP() (cp uint32, err error) {
+ r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0)
+ cp = uint32(r0)
+ if cp == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func GetConsoleMode(console Handle, mode *uint32) (err error) {
r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0)
if r1 == 0 {
@@ -2166,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) {
return
}
+func GetConsoleOutputCP() (cp uint32, err error) {
+ r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0)
+ cp = uint32(r0)
+ if cp == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) {
r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0)
if r1 == 0 {
@@ -3034,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) {
return
}
+func SetConsoleCP(cp uint32) (err error) {
+ r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func setConsoleCursorPosition(console Handle, position uint32) (err error) {
r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0)
if r1 == 0 {
@@ -3050,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) {
return
}
+func SetConsoleOutputCP(cp uint32) (err error) {
+ r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func SetCurrentDirectory(path *uint16) (err error) {
r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0)
if r1 == 0 {
@@ -4082,6 +4124,12 @@ func GetGUIThreadInfo(thread uint32, info *GUIThreadInfo) (err error) {
return
}
+func GetKeyboardLayout(tid uint32) (hkl Handle) {
+ r0, _, _ := syscall.Syscall(procGetKeyboardLayout.Addr(), 1, uintptr(tid), 0, 0)
+ hkl = Handle(r0)
+ return
+}
+
func GetShellWindow() (shellWindow HWND) {
r0, _, _ := syscall.Syscall(procGetShellWindow.Addr(), 0, 0, 0, 0)
shellWindow = HWND(r0)
@@ -4115,6 +4163,15 @@ func IsWindowVisible(hwnd HWND) (isVisible bool) {
return
}
+func LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) {
+ r0, _, e1 := syscall.Syscall(procLoadKeyboardLayoutW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(flags), 0)
+ hkl = Handle(r0)
+ if hkl == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) {
r0, _, e1 := syscall.Syscall6(procMessageBoxW.Addr(), 4, uintptr(hwnd), uintptr(unsafe.Pointer(text)), uintptr(unsafe.Pointer(caption)), uintptr(boxtype), 0, 0)
ret = int32(r0)
@@ -4124,6 +4181,20 @@ func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret i
return
}
+func ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) {
+ r0, _, _ := syscall.Syscall9(procToUnicodeEx.Addr(), 7, uintptr(vkey), uintptr(scancode), uintptr(unsafe.Pointer(keystate)), uintptr(unsafe.Pointer(pwszBuff)), uintptr(cchBuff), uintptr(flags), uintptr(hkl), 0, 0)
+ ret = int32(r0)
+ return
+}
+
+func UnloadKeyboardLayout(hkl Handle) (err error) {
+ r1, _, e1 := syscall.Syscall(procUnloadKeyboardLayout.Addr(), 1, uintptr(hkl), 0, 0)
+ if r1 == 0 {
+ err = errnoErr(e1)
+ }
+ return
+}
+
func CreateEnvironmentBlock(block **uint16, token Token, inheritExisting bool) (err error) {
var _p0 uint32
if inheritExisting {
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 1fa09e2acba..cf80b900ffe 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -137,7 +137,7 @@ github.com/alicebob/miniredis/v2/geohash
github.com/alicebob/miniredis/v2/hyperloglog
github.com/alicebob/miniredis/v2/metro
github.com/alicebob/miniredis/v2/server
-# github.com/andybalholm/brotli v1.1.0
+# github.com/andybalholm/brotli v1.1.1
## explicit; go 1.13
github.com/andybalholm/brotli
github.com/andybalholm/brotli/matchfinder
@@ -758,8 +758,8 @@ github.com/jsternberg/zap-logfmt
# github.com/julienschmidt/httprouter v1.3.0
## explicit; go 1.7
github.com/julienschmidt/httprouter
-# github.com/klauspost/compress v1.17.9
-## explicit; go 1.20
+# github.com/klauspost/compress v1.17.11
+## explicit; go 1.21
github.com/klauspost/compress
github.com/klauspost/compress/flate
github.com/klauspost/compress/fse
@@ -807,7 +807,7 @@ github.com/mattn/go-colorable
# github.com/mattn/go-isatty v0.0.20
## explicit; go 1.15
github.com/mattn/go-isatty
-# github.com/mattn/go-runewidth v0.0.15
+# github.com/mattn/go-runewidth v0.0.16
## explicit; go 1.9
github.com/mattn/go-runewidth
# github.com/miekg/dns v1.1.61
@@ -980,8 +980,8 @@ github.com/opentracing/opentracing-go/log
github.com/openzipkin/zipkin-go/model
github.com/openzipkin/zipkin-go/proto/zipkin_proto3
github.com/openzipkin/zipkin-go/reporter
-# github.com/parquet-go/parquet-go v0.23.0
-## explicit; go 1.21
+# github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe
+## explicit; go 1.22
github.com/parquet-go/parquet-go
github.com/parquet-go/parquet-go/bloom
github.com/parquet-go/parquet-go/bloom/xxhash
@@ -999,6 +999,7 @@ github.com/parquet-go/parquet-go/encoding/bytestreamsplit
github.com/parquet-go/parquet-go/encoding/delta
github.com/parquet-go/parquet-go/encoding/plain
github.com/parquet-go/parquet-go/encoding/rle
+github.com/parquet-go/parquet-go/encoding/thrift
github.com/parquet-go/parquet-go/format
github.com/parquet-go/parquet-go/hashprobe
github.com/parquet-go/parquet-go/hashprobe/aeshash
@@ -1157,9 +1158,6 @@ github.com/sagikazarmark/slog-shim
# github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529
## explicit
github.com/sean-/seed
-# github.com/segmentio/encoding v0.4.0
-## explicit; go 1.18
-github.com/segmentio/encoding/thrift
# github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e
## explicit
github.com/segmentio/fasthash/fnv1a
@@ -1748,7 +1746,7 @@ golang.org/x/oauth2/jwt
## explicit; go 1.18
golang.org/x/sync/errgroup
golang.org/x/sync/semaphore
-# golang.org/x/sys v0.22.0
+# golang.org/x/sys v0.26.0
## explicit; go 1.18
golang.org/x/sys/cpu
golang.org/x/sys/unix