From e37f481778c778ccf006b7d0e7a5c56fcef108fd Mon Sep 17 00:00:00 2001 From: Pavol Loffay Date: Tue, 15 Oct 2024 14:20:11 +0200 Subject: [PATCH] Support Tempo on IBM s390x (#4175) * Support Tempo on IBM s390x Signed-off-by: Pavol Loffay * Fix Signed-off-by: Pavol Loffay * Fix Signed-off-by: Pavol Loffay * Fix Signed-off-by: Pavol Loffay * update serverless go.sum Signed-off-by: Joe Elliott --------- Signed-off-by: Pavol Loffay Signed-off-by: Joe Elliott Co-authored-by: Joe Elliott --- CHANGELOG.md | 1 + cmd/tempo-serverless/cloud-run/go.mod | 11 +- cmd/tempo-serverless/cloud-run/go.sum | 24 +- cmd/tempo-serverless/lambda/go.mod | 11 +- cmd/tempo-serverless/lambda/go.sum | 24 +- go.mod | 11 +- go.sum | 24 +- .../github.com/andybalholm/brotli/encoder.go | 9 + .../andybalholm/brotli/matchfinder/emitter.go | 11 - .../andybalholm/brotli/matchfinder/m4.go | 43 +- .../klauspost/compress/.goreleaser.yml | 6 +- .../github.com/klauspost/compress/README.md | 29 +- .../klauspost/compress/flate/deflate.go | 2 +- .../klauspost/compress/flate/inflate.go | 74 +- .../klauspost/compress/fse/decompress.go | 2 +- .../klauspost/compress/gzhttp/compress.go | 80 +- .../compress/gzhttp/compress_go119.go | 9 - .../compress/gzhttp/compress_go120.go | 9 - .../klauspost/compress/gzhttp/transport.go | 19 +- .../klauspost/compress/huff0/decompress.go | 4 +- .../klauspost/compress/s2/encode.go | 25 +- .../klauspost/compress/s2/encode_amd64.go | 201 +- .../klauspost/compress/s2/encode_go.go | 4 +- .../compress/s2/encodeblock_amd64.go | 44 +- .../klauspost/compress/s2/encodeblock_amd64.s | 21920 ++++++++-------- .../klauspost/compress/s2/writer.go | 31 +- .../klauspost/compress/zstd/blockdec.go | 4 +- .../klauspost/compress/zstd/enc_better.go | 32 +- .../klauspost/compress/zstd/enc_dfast.go | 16 +- .../klauspost/compress/zstd/encoder.go | 45 +- .../klauspost/compress/zstd/framedec.go | 4 +- .../klauspost/compress/zstd/seqdec_amd64.go | 4 +- .../klauspost/compress/zstd/seqdec_amd64.s | 8 +- .../klauspost/compress/zstd/zstd.go | 4 + .../mattn/go-runewidth/runewidth_table.go | 323 +- .../parquet-go/parquet-go/allocator.go | 10 +- .../github.com/parquet-go/parquet-go/array.go | 13 +- .../github.com/parquet-go/parquet-go/bloom.go | 25 +- .../parquet-go/parquet-go/bloom/filter.go | 9 +- .../parquet-go/parquet-go/column.go | 4 +- .../parquet-go/parquet-go/column_buffer.go | 77 +- .../parquet-go/column_buffer_amd64.go | 2 +- .../parquet-go/parquet-go/column_index_be.go | 854 + .../{column_index.go => column_index_le.go} | 69 +- .../parquet-go/parquet-go/convert.go | 13 +- .../parquet-go/parquet-go/deprecated/int96.go | 16 - .../parquet-go/parquet-go/dictionary.go | 59 +- .../parquet-go/parquet-go/dictionary_amd64.go | 18 +- .../parquet-go/dictionary_purego.go | 7 +- .../bytestreamsplit/bytestreamsplit.go | 12 +- .../bytestreamsplit/bytestreamsplit_purego.go | 8 +- .../encoding/delta/binary_packed.go | 12 +- .../encoding/delta/binary_packed_amd64.go | 4 +- .../parquet-go/encoding/delta/delta.go | 4 +- .../parquet-go/encoding/plain/plain.go | 114 +- .../parquet-go/encoding/rle/dictionary.go | 4 +- .../parquet-go/parquet-go/encoding/rle/rle.go | 33 +- .../parquet-go/encoding/thrift}/LICENSE | 0 .../parquet-go}/encoding/thrift/binary.go | 6 +- .../parquet-go}/encoding/thrift/compact.go | 4 +- .../parquet-go}/encoding/thrift/debug.go | 0 .../parquet-go}/encoding/thrift/decode.go | 0 .../parquet-go}/encoding/thrift/encode.go | 0 .../parquet-go}/encoding/thrift/error.go | 0 .../parquet-go}/encoding/thrift/protocol.go | 0 .../parquet-go}/encoding/thrift/struct.go | 0 .../parquet-go}/encoding/thrift/thrift.go | 0 .../parquet-go}/encoding/thrift/unsafe.go | 4 - .../parquet-go/parquet-go/encoding/values.go | 93 +- .../github.com/parquet-go/parquet-go/file.go | 71 +- .../parquet-go/hashprobe/hashprobe.go | 14 +- .../internal/bitpack/unpack_int32_amd64.go | 2 +- .../internal/bitpack/unpack_int32_purego.go | 18 +- .../internal/bitpack/unpack_int64_amd64.go | 2 +- .../internal/bitpack/unpack_int64_purego.go | 22 +- .../internal/unsafecast/unsafecast.go | 144 +- .../github.com/parquet-go/parquet-go/node.go | 7 + .../github.com/parquet-go/parquet-go/order.go | 4 +- .../parquet-go/parquet-go/order_purego.go | 170 +- .../parquet-go/parquet-go/page_values.go | 14 +- .../parquet-go/parquet-go/sparse/array.go | 72 +- .../parquet-go/parquet-go/sparse/gather.go | 10 +- .../github.com/parquet-go/parquet-go/type.go | 7 +- .../github.com/parquet-go/parquet-go/value.go | 15 +- .../parquet-go/parquet-go/writer.go | 2 +- vendor/golang.org/x/sys/LICENSE | 4 +- vendor/golang.org/x/sys/cpu/cpu.go | 21 + vendor/golang.org/x/sys/cpu/cpu_arm64.go | 12 + .../golang.org/x/sys/cpu/cpu_linux_arm64.go | 5 + .../golang.org/x/sys/cpu/cpu_linux_noinit.go | 2 +- .../golang.org/x/sys/cpu/cpu_linux_riscv64.go | 137 + vendor/golang.org/x/sys/cpu/cpu_riscv64.go | 11 +- vendor/golang.org/x/sys/unix/README.md | 2 +- vendor/golang.org/x/sys/unix/mkerrors.sh | 6 +- vendor/golang.org/x/sys/unix/syscall_aix.go | 2 +- .../golang.org/x/sys/unix/syscall_darwin.go | 49 + vendor/golang.org/x/sys/unix/syscall_hurd.go | 1 + vendor/golang.org/x/sys/unix/syscall_linux.go | 64 +- .../x/sys/unix/syscall_linux_arm64.go | 2 + .../x/sys/unix/syscall_linux_loong64.go | 2 + .../x/sys/unix/syscall_linux_riscv64.go | 2 + .../golang.org/x/sys/unix/syscall_openbsd.go | 1 + .../golang.org/x/sys/unix/vgetrandom_linux.go | 13 + .../x/sys/unix/vgetrandom_unsupported.go | 11 + .../x/sys/unix/zerrors_darwin_amd64.go | 12 + .../x/sys/unix/zerrors_darwin_arm64.go | 12 + vendor/golang.org/x/sys/unix/zerrors_linux.go | 51 +- .../x/sys/unix/zerrors_linux_386.go | 7 + .../x/sys/unix/zerrors_linux_amd64.go | 7 + .../x/sys/unix/zerrors_linux_arm.go | 7 + .../x/sys/unix/zerrors_linux_arm64.go | 7 + .../x/sys/unix/zerrors_linux_loong64.go | 7 + .../x/sys/unix/zerrors_linux_mips.go | 7 + .../x/sys/unix/zerrors_linux_mips64.go | 7 + .../x/sys/unix/zerrors_linux_mips64le.go | 7 + .../x/sys/unix/zerrors_linux_mipsle.go | 7 + .../x/sys/unix/zerrors_linux_ppc.go | 7 + .../x/sys/unix/zerrors_linux_ppc64.go | 7 + .../x/sys/unix/zerrors_linux_ppc64le.go | 7 + .../x/sys/unix/zerrors_linux_riscv64.go | 7 + .../x/sys/unix/zerrors_linux_s390x.go | 7 + .../x/sys/unix/zerrors_linux_sparc64.go | 7 + .../x/sys/unix/zerrors_zos_s390x.go | 2 + .../x/sys/unix/zsyscall_darwin_amd64.go | 68 + .../x/sys/unix/zsyscall_darwin_amd64.s | 15 + .../x/sys/unix/zsyscall_darwin_arm64.go | 68 + .../x/sys/unix/zsyscall_darwin_arm64.s | 15 + .../golang.org/x/sys/unix/zsyscall_linux.go | 33 +- .../x/sys/unix/zsyscall_openbsd_386.go | 24 + .../x/sys/unix/zsyscall_openbsd_386.s | 5 + .../x/sys/unix/zsyscall_openbsd_amd64.go | 24 + .../x/sys/unix/zsyscall_openbsd_amd64.s | 5 + .../x/sys/unix/zsyscall_openbsd_arm.go | 24 + .../x/sys/unix/zsyscall_openbsd_arm.s | 5 + .../x/sys/unix/zsyscall_openbsd_arm64.go | 24 + .../x/sys/unix/zsyscall_openbsd_arm64.s | 5 + .../x/sys/unix/zsyscall_openbsd_mips64.go | 24 + .../x/sys/unix/zsyscall_openbsd_mips64.s | 5 + .../x/sys/unix/zsyscall_openbsd_ppc64.go | 24 + .../x/sys/unix/zsyscall_openbsd_ppc64.s | 6 + .../x/sys/unix/zsyscall_openbsd_riscv64.go | 24 + .../x/sys/unix/zsyscall_openbsd_riscv64.s | 5 + .../x/sys/unix/zsysnum_linux_386.go | 1 + .../x/sys/unix/zsysnum_linux_amd64.go | 2 + .../x/sys/unix/zsysnum_linux_arm.go | 1 + .../x/sys/unix/zsysnum_linux_arm64.go | 3 +- .../x/sys/unix/zsysnum_linux_loong64.go | 3 + .../x/sys/unix/zsysnum_linux_mips.go | 1 + .../x/sys/unix/zsysnum_linux_mips64.go | 1 + .../x/sys/unix/zsysnum_linux_mips64le.go | 1 + .../x/sys/unix/zsysnum_linux_mipsle.go | 1 + .../x/sys/unix/zsysnum_linux_ppc.go | 1 + .../x/sys/unix/zsysnum_linux_ppc64.go | 1 + .../x/sys/unix/zsysnum_linux_ppc64le.go | 1 + .../x/sys/unix/zsysnum_linux_riscv64.go | 3 +- .../x/sys/unix/zsysnum_linux_s390x.go | 1 + .../x/sys/unix/zsysnum_linux_sparc64.go | 1 + .../x/sys/unix/ztypes_darwin_amd64.go | 13 + .../x/sys/unix/ztypes_darwin_arm64.go | 13 + .../x/sys/unix/ztypes_freebsd_386.go | 1 + .../x/sys/unix/ztypes_freebsd_amd64.go | 1 + .../x/sys/unix/ztypes_freebsd_arm.go | 1 + .../x/sys/unix/ztypes_freebsd_arm64.go | 1 + .../x/sys/unix/ztypes_freebsd_riscv64.go | 1 + vendor/golang.org/x/sys/unix/ztypes_linux.go | 96 +- .../x/sys/unix/ztypes_linux_riscv64.go | 33 + .../golang.org/x/sys/windows/dll_windows.go | 2 +- .../x/sys/windows/security_windows.go | 2 +- .../x/sys/windows/syscall_windows.go | 16 +- .../golang.org/x/sys/windows/types_windows.go | 72 +- .../x/sys/windows/zsyscall_windows.go | 87 +- vendor/modules.txt | 18 +- 172 files changed, 14354 insertions(+), 12073 deletions(-) delete mode 100644 vendor/github.com/klauspost/compress/gzhttp/compress_go119.go delete mode 100644 vendor/github.com/klauspost/compress/gzhttp/compress_go120.go create mode 100644 vendor/github.com/parquet-go/parquet-go/column_index_be.go rename vendor/github.com/parquet-go/parquet-go/{column_index.go => column_index_le.go} (90%) rename vendor/github.com/{segmentio/encoding => parquet-go/parquet-go/encoding/thrift}/LICENSE (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/binary.go (98%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/compact.go (98%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/debug.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/decode.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/encode.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/error.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/protocol.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/struct.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/thrift.go (100%) rename vendor/github.com/{segmentio => parquet-go/parquet-go}/encoding/thrift/unsafe.go (85%) create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_linux.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d0500edbfe..0ff3e8df725 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## main / unreleased +* [ENHANCEMENT] Support Tempo on IBM s390x [#4172](https://github.com/grafana/tempo/pull/4175) (@pavolloffay) * [ENHANCEMENT] Changed log level from INFO to DEBUG for the TempoDB Find operation using traceId to reduce excessive/unwanted logs in log search. [#4179](https://github.com/grafana/tempo/pull/4179) (@Aki0x137) * [ENHANCEMENT] tempo-query: separate tls settings for server and client [#4177](https://github.com/grafana/tempo/pull/4177) (@frzifus) * [ENHANCEMENT] Pushdown collection of results from generators in the querier [#4119](https://github.com/grafana/tempo/pull/4119) (@electron0zero) diff --git a/cmd/tempo-serverless/cloud-run/go.mod b/cmd/tempo-serverless/cloud-run/go.mod index f8bd78a3d37..e48440c67d1 100644 --- a/cmd/tempo-serverless/cloud-run/go.mod +++ b/cmd/tempo-serverless/cloud-run/go.mod @@ -20,7 +20,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/thrift v0.20.0 // indirect github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -59,11 +59,11 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/dns v1.1.61 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.70 // indirect @@ -78,7 +78,7 @@ require ( github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/parquet-go/parquet-go v0.23.0 // indirect + github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect github.com/pelletier/go-toml/v2 v2.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pires/go-proxyproto v0.7.0 // indirect @@ -94,7 +94,6 @@ require ( github.com/rs/xid v1.5.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect - github.com/segmentio/encoding v0.4.0 // indirect github.com/sercand/kuberesolver/v5 v5.1.1 // indirect github.com/sony/gobreaker v0.4.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect @@ -126,7 +125,7 @@ require ( golang.org/x/net v0.27.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.23.0 // indirect diff --git a/cmd/tempo-serverless/cloud-run/go.sum b/cmd/tempo-serverless/cloud-run/go.sum index 06d0d648896..11625dca355 100644 --- a/cmd/tempo-serverless/cloud-run/go.sum +++ b/cmd/tempo-serverless/cloud-run/go.sum @@ -35,8 +35,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGn github.com/alicebob/miniredis v2.5.0+incompatible h1:yBHoLpsyjupjz3NL3MhKMVkR41j82Yjf3KFv7ApYzUI= github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M= github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= @@ -170,8 +170,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -188,8 +188,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs= github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= @@ -230,8 +230,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk= -github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -269,8 +269,6 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= -github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= -github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY= github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ= github.com/sony/gobreaker v0.4.1 h1:oMnRNZXX5j85zso6xCPRNPtmAycat+WcoKbklScLDgQ= @@ -313,6 +311,8 @@ github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 h1:5mLPGnFdSsevFRFc9q3yYbBkB6tsm4aCwwQV/j1JQAQ= @@ -388,8 +388,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= diff --git a/cmd/tempo-serverless/lambda/go.mod b/cmd/tempo-serverless/lambda/go.mod index 2463d03d31c..3eb799d9ff8 100644 --- a/cmd/tempo-serverless/lambda/go.mod +++ b/cmd/tempo-serverless/lambda/go.mod @@ -22,7 +22,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/thrift v0.20.0 // indirect github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -62,11 +62,11 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/dns v1.1.61 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.70 // indirect @@ -81,7 +81,7 @@ require ( github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/parquet-go/parquet-go v0.23.0 // indirect + github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe // indirect github.com/pelletier/go-toml/v2 v2.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pires/go-proxyproto v0.7.0 // indirect @@ -98,7 +98,6 @@ require ( github.com/rs/xid v1.5.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect - github.com/segmentio/encoding v0.4.0 // indirect github.com/sercand/kuberesolver/v5 v5.1.1 // indirect github.com/sony/gobreaker v0.4.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect @@ -130,7 +129,7 @@ require ( golang.org/x/net v0.27.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.23.0 // indirect diff --git a/cmd/tempo-serverless/lambda/go.sum b/cmd/tempo-serverless/lambda/go.sum index 80a339939e6..70937341c86 100644 --- a/cmd/tempo-serverless/lambda/go.sum +++ b/cmd/tempo-serverless/lambda/go.sum @@ -35,8 +35,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGn github.com/alicebob/miniredis v2.5.0+incompatible h1:yBHoLpsyjupjz3NL3MhKMVkR41j82Yjf3KFv7ApYzUI= github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M= github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/aws/aws-lambda-go v1.28.0 h1:fZiik1PZqW2IyAN4rj+Y0UBaO1IDFlsNo9Zz/XnArK4= @@ -174,8 +174,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -192,8 +192,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs= github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= @@ -234,8 +234,8 @@ github.com/opentracing-contrib/go-stdlib v1.0.0/go.mod h1:qtI1ogk+2JhVPIXVc6q+NH github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk= -github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -274,8 +274,6 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= -github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= -github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY= github.com/sercand/kuberesolver/v5 v5.1.1/go.mod h1:Fs1KbKhVRnB2aDWN12NjKCB+RgYMWZJ294T3BtmVCpQ= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= @@ -321,6 +319,8 @@ github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 h1:5mLPGnFdSsevFRFc9q3yYbBkB6tsm4aCwwQV/j1JQAQ= @@ -396,8 +396,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= diff --git a/go.mod b/go.mod index 9e4c1f6095c..3d8fadad039 100644 --- a/go.mod +++ b/go.mod @@ -33,7 +33,7 @@ require ( github.com/jedib0t/go-pretty/v6 v6.2.4 github.com/json-iterator/go v1.1.12 github.com/jsternberg/zap-logfmt v1.2.0 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.11 github.com/minio/minio-go/v7 v7.0.70 github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c github.com/olekukonko/tablewriter v0.0.5 @@ -98,7 +98,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.102.0 - github.com/parquet-go/parquet-go v0.23.0 + github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe github.com/stoewer/parquet-cli v0.0.7 go.opentelemetry.io/collector/config/configgrpc v0.102.1 go.opentelemetry.io/collector/config/confighttp v0.102.1 @@ -133,7 +133,7 @@ require ( github.com/alecthomas/participle/v2 v2.1.1 // indirect github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/thrift v0.20.0 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect @@ -216,7 +216,7 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/miekg/dns v1.1.61 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect @@ -260,7 +260,6 @@ require ( github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect - github.com/segmentio/encoding v0.4.0 // indirect github.com/sercand/kuberesolver/v5 v5.1.1 // indirect github.com/shirou/gopsutil/v3 v3.24.4 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect @@ -321,7 +320,7 @@ require ( golang.org/x/crypto v0.25.0 // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/mod v0.19.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.26.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/tools v0.23.0 // indirect gonum.org/v1/gonum v0.15.0 // indirect diff --git a/go.sum b/go.sum index 972730dd25a..0bed50c515b 100644 --- a/go.sum +++ b/go.sum @@ -93,8 +93,8 @@ github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a h1:HbKu58rmZp github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc= github.com/alicebob/miniredis/v2 v2.21.0 h1:CdmwIlKUWFBDS+4464GtQiQ0R1vpzOgu4Vnd74rBL7M= github.com/alicebob/miniredis/v2 v2.21.0/go.mod h1:XNqvJdQJv5mSuVMc0ynneafpnL/zv52acZ6kqeS0t88= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= @@ -566,8 +566,8 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -616,8 +616,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= @@ -731,8 +731,8 @@ github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7s github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/parquet-go/parquet-go v0.23.0 h1:dyEU5oiHCtbASyItMCD2tXtT2nPmoPbKpqf0+nnGrmk= -github.com/parquet-go/parquet-go v0.23.0/go.mod h1:MnwbUcFHU6uBYMymKAlPPAw9yh3kE1wWl6Gl1uLdkNk= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= +github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -825,8 +825,6 @@ github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 h1:BkTk4gynLjguayxrYxZoMZjBnA github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/segmentio/encoding v0.4.0 h1:MEBYvRqiUB2nfR2criEXWqwdY6HJOUrCn5hboVOVmy8= -github.com/segmentio/encoding v0.4.0/go.mod h1:/d03Cd8PoaDeceuhUUUQWjU0KhWjrmYrWPgtJHYZSnI= github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e h1:uO75wNGioszjmIzcY/tvdDYKRLVvzggtAmmJkn9j4GQ= github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e/go.mod h1:tm/wZFQ8e24NYaBGIlnO2WGCAi67re4HHuOm0sftE/M= github.com/sercand/kuberesolver/v5 v5.1.1 h1:CYH+d67G0sGBj7q5wLK61yzqJJ8gLLC8aeprPTHb6yY= @@ -906,6 +904,8 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -1258,8 +1258,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= diff --git a/vendor/github.com/andybalholm/brotli/encoder.go b/vendor/github.com/andybalholm/brotli/encoder.go index 650d1e42b49..1928382596e 100644 --- a/vendor/github.com/andybalholm/brotli/encoder.go +++ b/vendor/github.com/andybalholm/brotli/encoder.go @@ -21,6 +21,15 @@ func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, la e.wroteHeader = true } + if len(src) == 0 { + if lastBlock { + e.bw.writeBits(2, 3) // islast + isempty + e.bw.jumpToByteBoundary() + return e.bw.dst + } + return dst + } + var literalHisto [256]uint32 var commandHisto [704]uint32 var distanceHisto [64]uint32 diff --git a/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go b/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go index 37ed8e13340..507d1cae64c 100644 --- a/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go +++ b/vendor/github.com/andybalholm/brotli/matchfinder/emitter.go @@ -32,14 +32,3 @@ func (e *matchEmitter) emit(m absoluteMatch) { }) e.NextEmit = m.End } - -// trim shortens m if it extends past maxEnd. Then if the length is at least -// minLength, the match is emitted. -func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) { - if m.End > maxEnd { - m.End = maxEnd - } - if m.End-m.Start >= minLength { - e.emit(m) - } -} diff --git a/vendor/github.com/andybalholm/brotli/matchfinder/m4.go b/vendor/github.com/andybalholm/brotli/matchfinder/m4.go index 5b2acba2e14..818947255df 100644 --- a/vendor/github.com/andybalholm/brotli/matchfinder/m4.go +++ b/vendor/github.com/andybalholm/brotli/matchfinder/m4.go @@ -56,7 +56,7 @@ func (q *M4) Reset() { } func (q *M4) score(m absoluteMatch) int { - return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost + return (m.End-m.Start)*256 + (bits.LeadingZeros32(uint32(m.Start-m.Match))-32)*q.DistanceBitCost } func (q *M4) FindMatches(dst []Match, src []byte) []Match { @@ -112,7 +112,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { // We have found some matches, and we're far enough along that we probably // won't find overlapping matches, so we might as well emit them. if matches[1] != (absoluteMatch{}) { - e.trim(matches[1], matches[0].Start, q.MinLength) + if matches[1].End > matches[0].Start { + matches[1].End = matches[0].Start + } + if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 { + e.emit(matches[1]) + } } e.emit(matches[0]) matches = [3]absoluteMatch{} @@ -139,12 +144,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { // Look for a match. var currentMatch absoluteMatch - if i-candidate != matches[0].Start-matches[0].Match { - if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { - m := extendMatch2(src, i, candidate, e.NextEmit) - if m.End-m.Start > q.MinLength { - currentMatch = m - } + if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { + m := extendMatch2(src, i, candidate, e.NextEmit) + if m.End-m.Start > q.MinLength && q.score(m) > 0 { + currentMatch = m } } @@ -157,12 +160,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { if candidate <= 0 || i-candidate > q.MaxDistance { break } - if i-candidate != matches[0].Start-matches[0].Match { - if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { - m := extendMatch2(src, i, candidate, e.NextEmit) - if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) { - currentMatch = m - } + if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) { + m := extendMatch2(src, i, candidate, e.NextEmit) + if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) { + currentMatch = m } } } @@ -217,14 +218,24 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { default: // Emit the first match, shortening it if necessary to avoid overlap with the second. - e.trim(matches[2], matches[1].Start, q.MinLength) + if matches[2].End > matches[1].Start { + matches[2].End = matches[1].Start + } + if matches[2].End-matches[2].Start >= q.MinLength && q.score(matches[2]) > 0 { + e.emit(matches[2]) + } matches[2] = absoluteMatch{} } } // We've found all the matches now; emit the remaining ones. if matches[1] != (absoluteMatch{}) { - e.trim(matches[1], matches[0].Start, q.MinLength) + if matches[1].End > matches[0].Start { + matches[1].End = matches[0].Start + } + if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 { + e.emit(matches[1]) + } } if matches[0] != (absoluteMatch{}) { e.emit(matches[0]) diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index a22953805c6..4528059ca68 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -1,5 +1,5 @@ -# This is an example goreleaser.yaml file with some sane defaults. -# Make sure to check the documentation at http://goreleaser.com +version: 2 + before: hooks: - ./gen.sh @@ -99,7 +99,7 @@ archives: checksum: name_template: 'checksums.txt' snapshot: - name_template: "{{ .Tag }}-next" + version_template: "{{ .Tag }}-next" changelog: sort: asc filters: diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 05c7359e481..de264c85a5a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,27 @@ This package provides various compression algorithms. # changelog +* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) + * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 + * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 + * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 + * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 + * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 + +* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) + * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 + * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 + * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971 + * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951 + +* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8) + * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885 + * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938 + +* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7) + * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927 + * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930 + * Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 @@ -81,7 +102,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 - * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 + * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 @@ -136,7 +157,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 - * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657 + * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657 * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 @@ -339,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when * s2: Fix binaries. * Feb 25, 2021 (v1.11.8) - * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended. * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) @@ -518,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when * Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. * Feb 19, 2016: Handle small payloads faster in level 1-3. * Feb 19, 2016: Added faster level 2 + 3 compression modes. -* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5. * Feb 14, 2016: Snappy: Merge upstream changes. * Feb 14, 2016: Snappy: Fix aggressive skipping. * Feb 14, 2016: Snappy: Update benchmark. diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 66d1657d2c6..af53fb860cc 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -861,7 +861,7 @@ func (d *compressor) reset(w io.Writer) { } switch d.compressionLevel.chain { case 0: - // level was NoCompression or ConstantCompresssion. + // level was NoCompression or ConstantCompression. d.windowEnd = 0 default: s := d.state diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 2f410d64f5a..0d7b437f1c6 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -298,6 +298,14 @@ const ( huffmanGenericReader ) +// flushMode tells decompressor when to return data +type flushMode uint8 + +const ( + syncFlush flushMode = iota // return data after sync flush block + partialFlush // return data after each block +) + // Decompress state. type decompressor struct { // Input source. @@ -332,6 +340,8 @@ type decompressor struct { nb uint final bool + + flushMode flushMode } func (f *decompressor) nextBlock() { @@ -618,7 +628,10 @@ func (f *decompressor) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + if f.flushMode == syncFlush { + f.toRead = f.dict.readFlush() + } + f.finishBlock() return } @@ -657,8 +670,12 @@ func (f *decompressor) finishBlock() { if f.dict.availRead() > 0 { f.toRead = f.dict.readFlush() } + f.err = io.EOF + } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() } + f.step = nextBlock } @@ -789,15 +806,25 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { return nil } -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement io.ByteReader, -// the decompressor may read more data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser -// when finished reading. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReader(r io.Reader) io.ReadCloser { +type ReaderOpt func(*decompressor) + +// WithPartialBlock tells decompressor to return after each block, +// so it can read data written with partial flush +func WithPartialBlock() ReaderOpt { + return func(f *decompressor) { + f.flushMode = partialFlush + } +} + +// WithDict initializes the reader with a preset dictionary +func WithDict(dict []byte) ReaderOpt { + return func(f *decompressor) { + f.dict.init(maxMatchOffset, dict) + } +} + +// NewReaderOpts returns new reader with provided options +func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { fixedHuffmanDecoderInit() var f decompressor @@ -806,9 +833,26 @@ func NewReader(r io.Reader) io.ReadCloser { f.codebits = new([numCodes]int) f.step = nextBlock f.dict.init(maxMatchOffset, nil) + + for _, opt := range opts { + opt(&f) + } + return &f } +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + return NewReaderOpts(r) +} + // NewReaderDict is like NewReader but initializes the reader // with a preset dictionary. The returned Reader behaves as if // the uncompressed data stream started with the given dictionary, @@ -817,13 +861,5 @@ func NewReader(r io.Reader) io.ReadCloser { // // The ReadCloser returned by NewReader also implements Resetter. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = nextBlock - f.dict.init(maxMatchOffset, dict) - return &f + return NewReaderOpts(r, WithDict(dict)) } diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index cc05d0f7ea9..0c7dd4ffef9 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -15,7 +15,7 @@ const ( // It is possible, but by no way guaranteed that corrupt data will // return an error. // It is up to the caller to verify integrity of the returned data. -// Use a predefined Scrach to set maximum acceptable output size. +// Use a predefined Scratch to set maximum acceptable output size. func Decompress(b []byte, s *Scratch) ([]byte, error) { s, err := s.prepare(b) if err != nil { diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress.go b/vendor/github.com/klauspost/compress/gzhttp/compress.go index 289ae3e2ee8..52e3077ec4e 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/compress.go +++ b/vendor/github.com/klauspost/compress/gzhttp/compress.go @@ -131,15 +131,15 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) { // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. if cl >= w.minSize || len(w.buf) >= w.minSize { - // If a Content-Type wasn't specified, infer it from the current buffer. - if ct == "" { + // If a Content-Type wasn't specified, infer it from the current buffer when the response has a body. + if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 { ct = http.DetectContentType(w.buf) - } - // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) - // Set the header only if the key does not exist - if _, ok := hdr[contentType]; w.setContentType && !ok { - hdr.Set(contentType, ct) + // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) + // Set the header only if the key does not exist + if _, ok := hdr[contentType]; w.setContentType && !ok { + hdr.Set(contentType, ct) + } } // If the Content-Type is acceptable to GZIP, initialize the GZIP writer. @@ -306,7 +306,7 @@ func (w *GzipResponseWriter) startPlain() error { func (w *GzipResponseWriter) WriteHeader(code int) { // Handle informational headers // This is gated to not forward 1xx responses on builds prior to go1.20. - if shouldWrite1xxResponses() && code >= 100 && code <= 199 { + if code >= 100 && code <= 199 { w.ResponseWriter.WriteHeader(code) return } @@ -324,6 +324,20 @@ func (w *GzipResponseWriter) init() { w.gw = w.gwFactory.New(w.ResponseWriter, w.level) } +// bodyAllowedForStatus reports whether a given response status code +// permits a body. See RFC 7230, section 3.3. +func bodyAllowedForStatus(status int) bool { + switch { + case status >= 100 && status <= 199: + return false + case status == 204: + return false + case status == 304: + return false + } + return true +} + // Close will close the gzip.Writer and will put it back in the gzipWriterPool. func (w *GzipResponseWriter) Close() error { if w.ignore { @@ -335,7 +349,9 @@ func (w *GzipResponseWriter) Close() error { ce = w.Header().Get(contentEncoding) cr = w.Header().Get(contentRange) ) - if ct == "" { + + // Detects the response content-type when it does not exist and the response has a body. + if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 { ct = http.DetectContentType(w.buf) // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) @@ -379,7 +395,8 @@ func (w *GzipResponseWriter) Flush() { cr = w.Header().Get(contentRange) ) - if ct == "" { + // Detects the response content-type when it does not exist and the response has a body. + if ct == "" && bodyAllowedForStatus(w.code) && len(w.buf) > 0 { ct = http.DetectContentType(w.buf) // Handles the intended case of setting a nil Content-Type (as for http/server or http/fs) @@ -464,6 +481,11 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) { return func(h http.Handler) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { w.Header().Add(vary, acceptEncoding) + if c.allowCompressedRequests && contentGzip(r) { + r.Header.Del(contentEncoding) + r.Body = &gzipReader{body: r.Body} + } + if acceptsGzip(r) { gw := grwPool.Get().(*GzipResponseWriter) *gw = GzipResponseWriter{ @@ -536,17 +558,18 @@ func (pct parsedContentType) equals(mediaType string, params map[string]string) // Used for functional configuration. type config struct { - minSize int - level int - writer writer.GzipWriterFactory - contentTypes func(ct string) bool - keepAcceptRanges bool - setContentType bool - suffixETag string - dropETag bool - jitterBuffer int - randomJitter string - sha256Jitter bool + minSize int + level int + writer writer.GzipWriterFactory + contentTypes func(ct string) bool + keepAcceptRanges bool + setContentType bool + suffixETag string + dropETag bool + jitterBuffer int + randomJitter string + sha256Jitter bool + allowCompressedRequests bool } func (c *config) validate() error { @@ -579,6 +602,15 @@ func MinSize(size int) option { } } +// AllowCompressedRequests will enable or disable RFC 7694 compressed requests. +// By default this is Disabled. +// See https://datatracker.ietf.org/doc/html/rfc7694 +func AllowCompressedRequests(b bool) option { + return func(c *config) { + c.allowCompressedRequests = b + } +} + // CompressionLevel sets the compression level func CompressionLevel(level int) option { return func(c *config) { @@ -752,6 +784,12 @@ func RandomJitter(n, buffer int, paranoid bool) option { } } +// contentGzip returns true if the given HTTP request indicates that it gzipped. +func contentGzip(r *http.Request) bool { + // See more detail in `acceptsGzip` + return r.Method != http.MethodHead && r.Body != nil && parseEncodingGzip(r.Header.Get(contentEncoding)) > 0 +} + // acceptsGzip returns true if the given HTTP request indicates that it will // accept a gzipped response. func acceptsGzip(r *http.Request) bool { diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go b/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go deleted file mode 100644 index 97fc25acbc9..00000000000 --- a/vendor/github.com/klauspost/compress/gzhttp/compress_go119.go +++ /dev/null @@ -1,9 +0,0 @@ -//go:build !go1.20 -// +build !go1.20 - -package gzhttp - -// shouldWrite1xxResponses indicates whether the current build supports writes of 1xx status codes. -func shouldWrite1xxResponses() bool { - return false -} diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go b/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go deleted file mode 100644 index 2b65f67c795..00000000000 --- a/vendor/github.com/klauspost/compress/gzhttp/compress_go120.go +++ /dev/null @@ -1,9 +0,0 @@ -//go:build go1.20 -// +build go1.20 - -package gzhttp - -// shouldWrite1xxResponses indicates whether the current build supports writes of 1xx status codes. -func shouldWrite1xxResponses() bool { - return true -} diff --git a/vendor/github.com/klauspost/compress/gzhttp/transport.go b/vendor/github.com/klauspost/compress/gzhttp/transport.go index 623aea2ed8a..3914a06e013 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/transport.go +++ b/vendor/github.com/klauspost/compress/gzhttp/transport.go @@ -61,10 +61,21 @@ func TransportCustomEval(fn func(header http.Header) bool) transportOption { } } +// TransportAlwaysDecompress will always decompress the response, +// regardless of whether we requested it or not. +// Default is false, which will pass compressed data through +// if we did not request compression. +func TransportAlwaysDecompress(enabled bool) transportOption { + return func(c *gzRoundtripper) { + c.alwaysDecomp = enabled + } +} + type gzRoundtripper struct { parent http.RoundTripper acceptEncoding string withZstd, withGzip bool + alwaysDecomp bool customEval func(header http.Header) bool } @@ -90,15 +101,19 @@ func (g *gzRoundtripper) RoundTrip(req *http.Request) (*http.Response, error) { } resp, err := g.parent.RoundTrip(req) - if err != nil || !requestedComp { + if err != nil { return resp, err } - decompress := false + decompress := g.alwaysDecomp if g.customEval != nil { if !g.customEval(resp.Header) { return resp, nil } decompress = true + } else { + if !requestedComp && !g.alwaysDecomp { + return resp, nil + } } // Decompress if (decompress || g.withGzip) && asciiEqualFold(resp.Header.Get("Content-Encoding"), "gzip") { diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 54bd08b25c0..0f56b02d747 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 0 { - fmt.Fprintf(w, "%d errros in base, stopping\n", errs) + fmt.Fprintf(w, "%d errors in base, stopping\n", errs) continue } // Ensure that all combinations are covered. @@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 20 { - fmt.Fprintf(w, "%d errros, stopping\n", errs) + fmt.Fprintf(w, "%d errors, stopping\n", errs) break } } diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 0c9088adfee..20b802270a7 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -9,6 +9,9 @@ import ( "encoding/binary" "math" "math/bits" + "sync" + + "github.com/klauspost/compress/internal/race" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +var estblockPool [2]sync.Pool + // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) { return -1 } if len(src) <= 1024 { - d = calcBlockSizeSmall(src) + const sz, pool = 2048, 0 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSizeSmall(src, tmp) } else { - d = calcBlockSize(src) + const sz, pool = 32768, 1 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSize(src, tmp) } if d == 0 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 4f45206a4ef..7aadd255fe3 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,10 +3,16 @@ package s2 -import "github.com/klauspost/compress/internal/race" +import ( + "sync" + + "github.com/klauspost/compress/internal/race" +) const hasAmd64Asm = true +var encPools [4]sync.Pool + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) { ) if len(src) >= 4<<20 { - return encodeBlockAsm(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBlockAsm4MB(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm8B(dst, src, tmp) } +var encBetterPools [5]sync.Pool + // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) { ) if len(src) > 4<<20 { - return encodeBetterBlockAsm(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBetterBlockAsm4MB(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeSnappyBlockAsm64K(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBetterBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm(dst, src, tmp) } + if len(src) >= limit12B { - return encodeSnappyBetterBlockAsm64K(dst, src) + const sz, pool = 294912, 4 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm8B(dst, src, tmp) } diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 6b393c34d37..dd1c973ca51 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int { } // input must be > inputMargin -func calcBlockSize(src []byte) (d int) { +func calcBlockSize(src []byte, _ *[32768]byte) (d int) { // Initialize the hash table. const ( tableBits = 13 @@ -503,7 +503,7 @@ emitRemainder: } // length must be > inputMargin. -func calcBlockSizeSmall(src []byte) (d int) { +func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) { // Initialize the hash table. const ( tableBits = 9 diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 297e41501ba..f43aa815435 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -11,154 +11,154 @@ func _dummy_() // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm(dst []byte, src []byte) int +func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm4MB(dst []byte, src []byte) int +func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm12B(dst []byte, src []byte) int +func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm10B(dst []byte, src []byte) int +func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm8B(dst []byte, src []byte) int +func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm(dst []byte, src []byte) int +func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm12B(dst []byte, src []byte) int +func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm10B(dst []byte, src []byte) int +func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm8B(dst []byte, src []byte) int +func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm(dst []byte, src []byte) int +func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSize(src []byte) int +func calcBlockSize(src []byte, tmp *[32768]byte) int // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 1024 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSizeSmall(src []byte) int +func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 2ff5b334017..df9be687be7 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func encodeBlockAsm(dst []byte, src []byte) int +// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm matchlen_bsf_16repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match8_repeat_extend_encodeBlockAsm: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm JB repeat_extend_forward_end_encodeBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - MOVL SI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + MOVL DI, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm matchlen_bsf_16match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match8_match_nolit_encodeBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm JB match_nolit_end_encodeBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - MOVL BX, DI - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + MOVL SI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm - INCL CX + INCL DX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm @@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm: JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm4MB(dst []byte, src []byte) int +// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm4MB(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 4(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 4(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm4MB: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match8_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm4MB JB repeat_extend_forward_end_encodeBlockAsm4MB - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm4MB - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm4MB: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm4MB - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB matchlen_bsf_16match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match8_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm4MB JB match_nolit_end_encodeBlockAsm4MB - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm4MB: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm4MB - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 - + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 + // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB - INCL CX + INCL DX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm4MB @@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB: JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm12B(dst []byte, src []byte) int +// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm12B JB three_bytes_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B matchlen_bsf_16repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match8_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm12B JB repeat_extend_forward_end_encodeBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm12B JB three_bytes_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B matchlen_bsf_16match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match8_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm12B JB match_nolit_end_encodeBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBlockAsm12B + two_byte_offset_short_match_nolit_encodeBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm12B @@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B: JB three_bytes_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm10B(dst []byte, src []byte) int +// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm10B JB three_bytes_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B matchlen_bsf_16repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match8_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm10B JB repeat_extend_forward_end_encodeBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm10B JB three_bytes_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B matchlen_bsf_16match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match8_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm10B JB match_nolit_end_encodeBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm10B @@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B: JB three_bytes_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm8B(dst []byte, src []byte) int +// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 - JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 + JNE no_repeat_found_encodeBlockAsm8B + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm8B JB three_bytes_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B matchlen_bsf_16repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match8_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm8B JB repeat_extend_forward_end_encodeBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm8B - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm8B JB three_bytes_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) - ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B matchlen_bsf_16match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match8_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm8B JB match_nolit_end_encodeBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm8B @@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B: JB three_bytes_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm(dst []byte, src []byte) int +// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm matchlen_bsf_16match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match8_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm JB match_nolit_end_encodeBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - MOVL DI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + MOVL R8, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_repeat_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - CMPQ AX, (SP) - JB match_nolit_dst_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + CMPQ CX, (SP) + JB match_nolit_dst_ok_encodeBetterBlockAsm + MOVQ $0x00000000, ret+56(FP) + RET + +match_nolit_dst_ok_encodeBetterBlockAsm: + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm @@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm: JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB JB match_nolit_end_encodeBetterBlockAsm4MB - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm4MB - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm4MB @@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match8_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B JB match_nolit_end_encodeBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm12B JB three_bytes_match_emit_encodeBetterBlockAsm12B three_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm12B @@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeBetterBlockAsm12B three_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 - JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match8_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B JB match_nolit_end_encodeBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm10B JB three_bytes_match_emit_encodeBetterBlockAsm10B three_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm10B @@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeBetterBlockAsm10B three_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match8_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B JB match_nolit_end_encodeBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm8B JB three_bytes_match_emit_encodeBetterBlockAsm8B three_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x04 + CMPQ R8, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ DI, $0x08 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R8), R9 - MOVL R9, (AX) + MOVL (R9), R10 + MOVL R10, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R8), R9 - MOVL -4(R8)(DI*1), R8 - MOVL R9, (AX) - MOVL R8, -4(AX)(DI*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (CX) + MOVL R9, -4(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R12 - SUBQ R9, R12 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R12*1), R9 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R10 ADDQ $0x20, R13 - ADDQ $0x20, R9 - ADDQ $0x20, R12 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R12*1), X4 - MOVOU -16(R8)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ DI, R12 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm8B @@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeBetterBlockAsm8B three_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL BX, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL SI, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match8_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm JB repeat_extend_forward_end_encodeSnappyBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match8_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm JB match_nolit_end_encodeSnappyBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm @@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm64K: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K three_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K JB repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm64K JB three_bytes_match_emit_encodeSnappyBlockAsm64K three_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K JB match_nolit_end_encodeSnappyBlockAsm64K - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm64K: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm64K @@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K three_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K -emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) +emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B JB repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm12B JB three_bytes_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B JB match_nolit_end_encodeSnappyBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm12B @@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B JB repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm10B JB three_bytes_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B JB match_nolit_end_encodeSnappyBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm10B @@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B JB repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm8B JB three_bytes_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B JB match_nolit_end_encodeSnappyBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm8B @@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm JB match_nolit_end_encodeSnappyBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeSnappyBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm @@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000900, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 262168(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 262168(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL 262144(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 262144(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K JB match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + +emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x30, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 262168(SP)(R10*4) - MOVL R13, 262168(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x33, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x33, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 262144(AX)(R11*4) + MOVL R14, 262144(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x30, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x30, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K @@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B JB match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B @@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) - JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B - -emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) + JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B + +emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B JB match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B @@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 - JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 + JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B JB match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B @@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_ JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func calcBlockSize(src []byte) int +// func calcBlockSize(src []byte, tmp *[32768]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSize(SB), $32792-32 - XORQ AX, AX - MOVQ $0x00000100, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSize(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000100, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSize: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSize MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSize: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x33, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x33, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSize - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSize repeat_extend_back_loop_calcBlockSize: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSize - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSize - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSize: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSize - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSize - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_repeat_emit_calcBlockSize four_bytes_repeat_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_repeat_emit_calcBlockSize three_bytes_repeat_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSize two_bytes_repeat_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSize JMP memmove_long_repeat_emit_calcBlockSize one_byte_repeat_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSize memmove_long_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSize: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSize: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSize - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSize matchlen_bsf_16repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match8_repeat_extend_calcBlockSize: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSize matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSize - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSize JB repeat_extend_forward_end_calcBlockSize - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSize - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSize - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSize: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_calcBlockSize four_bytes_loop_back_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_calcBlockSize - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_calcBlockSize JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize four_bytes_remain_repeat_as_copy_calcBlockSize: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP repeat_end_emit_calcBlockSize two_byte_offset_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSize - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSize two_byte_offset_short_repeat_as_copy_calcBlockSize: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSize - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSize emit_copy_three_repeat_as_copy_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSize: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSize no_repeat_found_calcBlockSize: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSize - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSize - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSize - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSize candidate3_match_calcBlockSize: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSize candidate2_match_calcBlockSize: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSize: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSize match_extend_back_loop_calcBlockSize: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSize - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSize - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSize JMP match_extend_back_loop_calcBlockSize match_extend_back_end_calcBlockSize: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSize: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSize - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSize - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSize - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB three_bytes_match_emit_calcBlockSize - CMPL SI, $0x01000000 + CMPL DI, $0x01000000 JB four_bytes_match_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_match_emit_calcBlockSize four_bytes_match_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_match_emit_calcBlockSize three_bytes_match_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSize two_bytes_match_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSize JMP memmove_long_match_emit_calcBlockSize one_byte_match_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSize memmove_long_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSize: match_nolit_loop_calcBlockSize: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSize: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSize - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSize matchlen_bsf_16match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match8_match_nolit_calcBlockSize: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSize matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSize - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSize JB match_nolit_end_calcBlockSize - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSize - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSize - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSize: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_calcBlockSize four_bytes_loop_back_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_calcBlockSize - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_calcBlockSize JMP four_bytes_loop_back_match_nolit_calcBlockSize four_bytes_remain_match_nolit_calcBlockSize: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_calcBlockSize two_byte_offset_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSize - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSize two_byte_offset_short_match_nolit_calcBlockSize: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSize - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSize emit_copy_three_match_nolit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSize: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSize: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x33, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x33, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x33, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x33, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSize - INCL CX + INCL DX JMP search_loop_calcBlockSize emit_remainder_calcBlockSize: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSize: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSize - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSize - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x00010000 + CMPL AX, $0x00010000 JB three_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x01000000 + CMPL AX, $0x01000000 JB four_bytes_emit_remainder_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_emit_remainder_calcBlockSize four_bytes_emit_remainder_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_emit_remainder_calcBlockSize three_bytes_emit_remainder_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSize two_bytes_emit_remainder_calcBlockSize: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSize JMP memmove_long_emit_remainder_calcBlockSize one_byte_emit_remainder_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSize memmove_long_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSize: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET -// func calcBlockSizeSmall(src []byte) int +// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSizeSmall(SB), $2072-32 - XORQ AX, AX - MOVQ $0x00000010, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSizeSmall(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000010, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSizeSmall: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSizeSmall MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSizeSmall: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x37, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x37, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x37, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSizeSmall - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSizeSmall repeat_extend_back_loop_calcBlockSizeSmall: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSizeSmall - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSizeSmall: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSizeSmall - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSizeSmall JB three_bytes_repeat_emit_calcBlockSizeSmall three_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSizeSmall two_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSizeSmall JMP memmove_long_repeat_emit_calcBlockSizeSmall one_byte_repeat_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSizeSmall memmove_long_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSizeSmall: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall matchlen_bsf_16repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match8_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSizeSmall matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSizeSmall JB repeat_extend_forward_end_calcBlockSizeSmall - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSizeSmall - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSizeSmall: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_calcBlockSizeSmall: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: - MOVL BX, SI - SHLL $0x02, SI - CMPL BX, $0x0c + MOVL SI, DI + SHLL $0x02, DI + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSizeSmall emit_copy_three_repeat_as_copy_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSizeSmall: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSizeSmall no_repeat_found_calcBlockSizeSmall: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSizeSmall - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSizeSmall - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSizeSmall - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSizeSmall candidate3_match_calcBlockSizeSmall: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSizeSmall candidate2_match_calcBlockSizeSmall: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSizeSmall: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSizeSmall match_extend_back_loop_calcBlockSizeSmall: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSizeSmall - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSizeSmall JMP match_extend_back_loop_calcBlockSizeSmall match_extend_back_end_calcBlockSizeSmall: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSizeSmall: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSizeSmall - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSizeSmall - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSizeSmall JB three_bytes_match_emit_calcBlockSizeSmall three_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSizeSmall two_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSizeSmall JMP memmove_long_match_emit_calcBlockSizeSmall one_byte_match_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSizeSmall memmove_long_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSizeSmall: match_nolit_loop_calcBlockSizeSmall: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall matchlen_bsf_16match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match8_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSizeSmall matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSizeSmall JB match_nolit_end_calcBlockSizeSmall - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSizeSmall - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSizeSmall: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_calcBlockSizeSmall: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSizeSmall two_byte_offset_short_match_nolit_calcBlockSizeSmall: - MOVL R9, BX - SHLL $0x02, BX - CMPL R9, $0x0c + MOVL R10, SI + SHLL $0x02, SI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSizeSmall emit_copy_three_match_nolit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSizeSmall: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSizeSmall: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x37, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x37, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x37, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x37, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSizeSmall - INCL CX + INCL DX JMP search_loop_calcBlockSizeSmall emit_remainder_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSizeSmall - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSizeSmall JB three_bytes_emit_remainder_calcBlockSizeSmall three_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSizeSmall two_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSizeSmall JMP memmove_long_emit_remainder_calcBlockSizeSmall one_byte_emit_remainder_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSizeSmall memmove_long_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSizeSmall: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET // func emitLiteral(dst []byte, lit []byte) int @@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4_s2_loop: @@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4s_s2_loop: @@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4_snappy_loop: CMPQ DX, BX @@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4s_snappy_loop: CMPQ DX, BX diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go index 0a46f2b984f..fd15078f7df 100644 --- a/vendor/github.com/klauspost/compress/s2/writer.go +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -83,11 +83,14 @@ type Writer struct { snappy bool flushOnWrite bool appendIndex bool + bufferCB func([]byte) level uint8 } type result struct { b []byte + // return when writing + ret []byte // Uncompressed start offset startOffset int64 } @@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) { for write := range toWrite { // Wait for the data to be available. input := <-write + if input.ret != nil && w.bufferCB != nil { + w.bufferCB(input.ret) + input.ret = nil + } in := input.b if len(in) > 0 { if w.err(nil) == nil { @@ -341,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { // but the input buffer cannot be written to by the caller // until Flush or Close has been called when concurrency != 1. // -// If you cannot control that, use the regular Write function. +// Use the WriterBufferDone to receive a callback when the buffer is done +// Processing. // // Note that input is not buffered. // This means that each write will result in discrete blocks being created. @@ -364,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { } if w.concurrency == 1 { _, err := w.writeSync(buf) + if w.bufferCB != nil { + w.bufferCB(buf) + } return err } @@ -378,7 +389,7 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } - + orgBuf := buf for len(buf) > 0 { // Cut input. uncompressed := buf @@ -397,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { startOffset: w.uncompWritten, } w.uncompWritten += int64(len(uncompressed)) + if len(buf) == 0 && w.bufferCB != nil { + res.ret = orgBuf + } go func() { race.ReadSlice(uncompressed) @@ -922,7 +936,7 @@ func WriterBetterCompression() WriterOption { } // WriterBestCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a +// EncodeBest compresses better than Encode but typically with a // big speed decrease on compression. func WriterBestCompression() WriterOption { return func(w *Writer) error { @@ -941,6 +955,17 @@ func WriterUncompressed() WriterOption { } } +// WriterBufferDone will perform a callback when EncodeBuffer has finished +// writing a buffer to the output and the buffer can safely be reused. +// If the buffer was split into several blocks, it will be sent after the last block. +// Callbacks will not be done concurrently. +func WriterBufferDone(fn func(b []byte)) WriterOption { + return func(w *Writer) error { + w.bufferCB = fn + return nil + } +} + // WriterBlockSize allows to override the default block size. // Blocks will be this size or smaller. // Minimum size is 4KB and maximum size is 4MB. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 03744fbc765..9c28840c3bd 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -598,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { printf("RLE set to 0x%x, code: %v", symb, v) } case compModeFSE: - println("Reading table for", tableIndex(i)) + if debugDecoder { + println("Reading table for", tableIndex(i)) + } if seq.fse == nil || seq.fse.preDefined { seq.fse = fseDecoderPool.Get().(*fseDecoder) } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index a4f5bf91fc6..84a79fde767 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -179,9 +179,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -210,12 +210,12 @@ encodeLoop: // Index match start+1 (long) -> s - 1 index0 := s + repOff - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -241,9 +241,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -270,11 +270,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -708,9 +708,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -738,12 +738,12 @@ encodeLoop: blk.sequences = append(blk.sequences, seq) // Index match start+1 (long) -> s - 1 - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -772,9 +772,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -801,11 +801,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index a154c18f741..d36be7bd8c2 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -138,9 +138,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -166,11 +166,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -798,9 +798,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -826,11 +826,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 72af7ef0fe0..8f8223cd3a6 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -6,6 +6,7 @@ package zstd import ( "crypto/rand" + "errors" "fmt" "io" "math" @@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) { // and write CRC if requested. func (e *Encoder) Write(p []byte) (n int, err error) { s := &e.state + if s.eofWritten { + return 0, ErrEncoderClosed + } for len(p) > 0 { if len(p)+len(s.filling) < e.o.blockSize { if e.o.crc { @@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error { return nil } if final && len(s.filling) > 0 { - s.current = e.EncodeAll(s.filling, s.current[:0]) + s.current = e.encodeAll(s.encoder, s.filling, s.current[:0]) var n2 int n2, s.err = s.w.Write(s.current) if s.err != nil { @@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) s.wg.Add(1) + if final { + s.eofWritten = true + } go func(src []byte) { if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) @@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error { blk := enc.Block() enc.Encode(blk, src) blk.last = final - if final { - s.eofWritten = true - } // Wait for pending writes. s.wWg.Wait() if s.writeErr != nil { @@ -401,12 +405,20 @@ func (e *Encoder) Flush() error { if len(s.filling) > 0 { err := e.nextBlock(false) if err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } } s.wg.Wait() s.wWg.Wait() if s.err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return s.err } return s.writeErr @@ -422,6 +434,9 @@ func (e *Encoder) Close() error { } err := e.nextBlock(true) if err != nil { + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } if s.frameContentSize > 0 { @@ -459,6 +474,11 @@ func (e *Encoder) Close() error { } _, s.err = s.w.Write(frame) } + if s.err == nil { + s.err = ErrEncoderClosed + return nil + } + return s.err } @@ -469,6 +489,15 @@ func (e *Encoder) Close() error { // Data compressed with EncodeAll can be decoded with the Decoder, // using either a stream or DecodeAll. func (e *Encoder) EncodeAll(src, dst []byte) []byte { + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + e.encoders <- enc + }() + return e.encodeAll(enc, src, dst) +} + +func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte { if len(src) == 0 { if e.o.fullZero { // Add frame header. @@ -491,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(e.initialize) - enc := <-e.encoders - defer func() { - // Release encoder reference to last block. - // If a non-single block is needed the encoder will reset again. - e.encoders <- enc - }() + // Use single segments when above minimum window and below window size. single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 53e160f7e5a..e47af66e7c9 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error { } return err } - printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + if debugDecoder { + printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + } windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 8adabd82877..c59f17e07ad 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) default: - return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode) } s.seqSize += ctx.litRemain @@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { return io.ErrUnexpectedEOF } - return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode) } if ctx.litRemain < 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 5b06174b898..f5591fa1e86 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -1814,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -2376,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition @@ -2896,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -3560,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 4be7cc73671..066bef2a4f0 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -88,6 +88,10 @@ var ( // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + // ErrEncoderClosed will be returned if the Encoder was used after + // Close has been called. + ErrEncoderClosed = errors.New("encoder used after Close") + // ErrDecoderNilInput is returned when a nil Reader was provided // and an operation other than Reset/DecodeAll/Close was attempted. ErrDecoderNilInput = errors.New("nil input provided as reader") diff --git a/vendor/github.com/mattn/go-runewidth/runewidth_table.go b/vendor/github.com/mattn/go-runewidth/runewidth_table.go index e5d890c266f..ad025ad5296 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth_table.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth_table.go @@ -4,20 +4,21 @@ package runewidth var combining = table{ {0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3}, - {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01}, - {0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, - {0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, + {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0CF3, 0x0CF3}, + {0x0D00, 0x0D01}, {0x135D, 0x135F}, {0x1A7F, 0x1A7F}, + {0x1AB0, 0x1ACE}, {0x1B6B, 0x1B73}, {0x1DC0, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF}, {0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A}, - {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11300, 0x11301}, - {0x1133B, 0x1133C}, {0x11366, 0x1136C}, {0x11370, 0x11374}, - {0x16AF0, 0x16AF4}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, + {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x10F82, 0x10F85}, + {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x11366, 0x1136C}, + {0x11370, 0x11374}, {0x16AF0, 0x16AF4}, {0x1CF00, 0x1CF2D}, + {0x1CF30, 0x1CF46}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, - {0x1E8D0, 0x1E8D6}, + {0x1E08F, 0x1E08F}, {0x1E8D0, 0x1E8D6}, } var doublewidth = table{ @@ -33,33 +34,34 @@ var doublewidth = table{ {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C}, {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99}, - {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, - {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF}, - {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3}, - {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x4DBF}, - {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, - {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, - {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, - {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, - {0x16FF0, 0x16FF1}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, - {0x18D00, 0x18D08}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, - {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004}, - {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, - {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, - {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320}, - {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, - {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, - {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, - {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, - {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, - {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, - {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7}, - {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, - {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F978}, - {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA74}, - {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, - {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, - {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, + {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x303E}, + {0x3041, 0x3096}, {0x3099, 0x30FF}, {0x3105, 0x312F}, + {0x3131, 0x318E}, {0x3190, 0x31E3}, {0x31EF, 0x321E}, + {0x3220, 0x3247}, {0x3250, 0x4DBF}, {0x4E00, 0xA48C}, + {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3}, + {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, + {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, + {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1}, + {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, + {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, + {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, + {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, + {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, + {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, + {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, + {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, + {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, + {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, + {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, + {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, + {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, + {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, + {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF}, {0x1F6EB, 0x1F6EC}, + {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0}, + {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F9FF}, + {0x1FA70, 0x1FA7C}, {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, + {0x1FABF, 0x1FAC5}, {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, + {0x1FAF0, 0x1FAF8}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, } var ambiguous = table{ @@ -154,43 +156,43 @@ var neutral = table{ {0x0402, 0x040F}, {0x0450, 0x0450}, {0x0452, 0x052F}, {0x0531, 0x0556}, {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7}, {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, - {0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A}, - {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D}, - {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E}, - {0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08C7}, - {0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990}, - {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, - {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, - {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD}, - {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03}, - {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, - {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, - {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, - {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, - {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76}, - {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, - {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, - {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9}, - {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3}, - {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03}, - {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, - {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, - {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, - {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, - {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, - {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, - {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, - {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2}, - {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0}, - {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C}, - {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, - {0x0C3D, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, - {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C60, 0x0C63}, + {0x0600, 0x070D}, {0x070F, 0x074A}, {0x074D, 0x07B1}, + {0x07C0, 0x07FA}, {0x07FD, 0x082D}, {0x0830, 0x083E}, + {0x0840, 0x085B}, {0x085E, 0x085E}, {0x0860, 0x086A}, + {0x0870, 0x088E}, {0x0890, 0x0891}, {0x0898, 0x0983}, + {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8}, + {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9}, + {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, {0x09CB, 0x09CE}, + {0x09D7, 0x09D7}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3}, + {0x09E6, 0x09FE}, {0x0A01, 0x0A03}, {0x0A05, 0x0A0A}, + {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, + {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, + {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, + {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, {0x0A59, 0x0A5C}, + {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76}, {0x0A81, 0x0A83}, + {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, + {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, + {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, + {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3}, {0x0AE6, 0x0AF1}, + {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C}, + {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, + {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, {0x0B3C, 0x0B44}, + {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B55, 0x0B57}, + {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, {0x0B66, 0x0B77}, + {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, + {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C}, + {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, + {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, + {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0}, {0x0BD7, 0x0BD7}, + {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C}, {0x0C0E, 0x0C10}, + {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3C, 0x0C44}, + {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, + {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D}, {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C}, {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, - {0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3}, - {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D0C}, + {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE}, {0x0CE0, 0x0CE3}, + {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF3}, {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1}, @@ -200,7 +202,7 @@ var neutral = table{ {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, {0x0EC0, 0x0EC4}, - {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, + {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECE}, {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x10C7, 0x10C7}, @@ -212,20 +214,19 @@ var neutral = table{ {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8}, - {0x1700, 0x170C}, {0x170E, 0x1714}, {0x1720, 0x1736}, - {0x1740, 0x1753}, {0x1760, 0x176C}, {0x176E, 0x1770}, - {0x1772, 0x1773}, {0x1780, 0x17DD}, {0x17E0, 0x17E9}, - {0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x1810, 0x1819}, - {0x1820, 0x1878}, {0x1880, 0x18AA}, {0x18B0, 0x18F5}, - {0x1900, 0x191E}, {0x1920, 0x192B}, {0x1930, 0x193B}, - {0x1940, 0x1940}, {0x1944, 0x196D}, {0x1970, 0x1974}, - {0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, - {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, - {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, - {0x1AB0, 0x1AC0}, {0x1B00, 0x1B4B}, {0x1B50, 0x1B7C}, - {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, - {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, - {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9}, {0x1DFB, 0x1F15}, + {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753}, + {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773}, + {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9}, + {0x1800, 0x1819}, {0x1820, 0x1878}, {0x1880, 0x18AA}, + {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1920, 0x192B}, + {0x1930, 0x193B}, {0x1940, 0x1940}, {0x1944, 0x196D}, + {0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9}, + {0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, + {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, + {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE}, {0x1B00, 0x1B4C}, + {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, + {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, + {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, @@ -237,7 +238,7 @@ var neutral = table{ {0x2036, 0x203A}, {0x203C, 0x203D}, {0x203F, 0x2064}, {0x2066, 0x2071}, {0x2075, 0x207E}, {0x2080, 0x2080}, {0x2085, 0x208E}, {0x2090, 0x209C}, {0x20A0, 0x20A8}, - {0x20AA, 0x20AB}, {0x20AD, 0x20BF}, {0x20D0, 0x20F0}, + {0x20AA, 0x20AB}, {0x20AD, 0x20C0}, {0x20D0, 0x20F0}, {0x2100, 0x2102}, {0x2104, 0x2104}, {0x2106, 0x2108}, {0x210A, 0x2112}, {0x2114, 0x2115}, {0x2117, 0x2120}, {0x2123, 0x2125}, {0x2127, 0x212A}, {0x212C, 0x2152}, @@ -275,15 +276,15 @@ var neutral = table{ {0x2780, 0x2794}, {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x27C0, 0x27E5}, {0x27EE, 0x2984}, {0x2987, 0x2B1A}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54}, {0x2B5A, 0x2B73}, - {0x2B76, 0x2B95}, {0x2B97, 0x2C2E}, {0x2C30, 0x2C5E}, - {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, - {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, - {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, - {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, - {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, - {0x2DE0, 0x2E52}, {0x303F, 0x303F}, {0x4DC0, 0x4DFF}, - {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, {0xA700, 0xA7BF}, - {0xA7C2, 0xA7CA}, {0xA7F5, 0xA82C}, {0xA830, 0xA839}, + {0x2B76, 0x2B95}, {0x2B97, 0x2CF3}, {0x2CF9, 0x2D25}, + {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, + {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, + {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, + {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, + {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D}, {0x303F, 0x303F}, + {0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, + {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3}, + {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839}, {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9}, {0xA8E0, 0xA953}, {0xA95F, 0xA95F}, {0xA980, 0xA9CD}, {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36}, @@ -294,8 +295,8 @@ var neutral = table{ {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD800, 0xDFFF}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, - {0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFBD3, 0xFD3F}, - {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFD}, + {0xFB43, 0xFB44}, {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, + {0xFD92, 0xFDC7}, {0xFDCF, 0xFDCF}, {0xFDF0, 0xFDFF}, {0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, @@ -307,44 +308,48 @@ var neutral = table{ {0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, - {0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x10740, 0x10755}, - {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, - {0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C}, - {0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x108A7, 0x108AF}, - {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, - {0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x10980, 0x109B7}, - {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, - {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, - {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, - {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, - {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, - {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, - {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, - {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, - {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, - {0x10F00, 0x10F27}, {0x10F30, 0x10F59}, {0x10FB0, 0x10FCB}, - {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F}, - {0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8}, - {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147}, - {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4}, - {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286}, - {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, - {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, - {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, - {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, - {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348}, - {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357}, - {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, - {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7}, - {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD}, - {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C}, - {0x11680, 0x116B8}, {0x116C0, 0x116C9}, {0x11700, 0x1171A}, - {0x1171D, 0x1172B}, {0x11730, 0x1173F}, {0x11800, 0x1183B}, - {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909}, - {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935}, - {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959}, - {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, - {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AC0, 0x11AF8}, + {0x1056F, 0x1057A}, {0x1057C, 0x1058A}, {0x1058C, 0x10592}, + {0x10594, 0x10595}, {0x10597, 0x105A1}, {0x105A3, 0x105B1}, + {0x105B3, 0x105B9}, {0x105BB, 0x105BC}, {0x10600, 0x10736}, + {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10780, 0x10785}, + {0x10787, 0x107B0}, {0x107B2, 0x107BA}, {0x10800, 0x10805}, + {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838}, + {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E}, + {0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, + {0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F}, + {0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, + {0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, + {0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, + {0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, + {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, + {0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, + {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, + {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, + {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, + {0x10EB0, 0x10EB1}, {0x10EFD, 0x10F27}, {0x10F30, 0x10F59}, + {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB}, {0x10FE0, 0x10FF6}, + {0x11000, 0x1104D}, {0x11052, 0x11075}, {0x1107F, 0x110C2}, + {0x110CD, 0x110CD}, {0x110D0, 0x110E8}, {0x110F0, 0x110F9}, + {0x11100, 0x11134}, {0x11136, 0x11147}, {0x11150, 0x11176}, + {0x11180, 0x111DF}, {0x111E1, 0x111F4}, {0x11200, 0x11211}, + {0x11213, 0x11241}, {0x11280, 0x11286}, {0x11288, 0x11288}, + {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A9}, + {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11303}, + {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, + {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339}, + {0x1133B, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, + {0x11350, 0x11350}, {0x11357, 0x11357}, {0x1135D, 0x11363}, + {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11400, 0x1145B}, + {0x1145D, 0x11461}, {0x11480, 0x114C7}, {0x114D0, 0x114D9}, + {0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644}, + {0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B9}, + {0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B}, + {0x11730, 0x11746}, {0x11800, 0x1183B}, {0x118A0, 0x118F2}, + {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913}, + {0x11915, 0x11916}, {0x11918, 0x11935}, {0x11937, 0x11938}, + {0x1193B, 0x11946}, {0x11950, 0x11959}, {0x119A0, 0x119A7}, + {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, {0x11A00, 0x11A47}, + {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8}, {0x11B00, 0x11B09}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, @@ -352,30 +357,36 @@ var neutral = table{ {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8}, + {0x11F00, 0x11F10}, {0x11F12, 0x11F3A}, {0x11F3E, 0x11F59}, {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399}, {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543}, - {0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646}, + {0x12F90, 0x12FF2}, {0x13000, 0x13455}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, - {0x16A6E, 0x16A6F}, {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5}, - {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, {0x16B5B, 0x16B61}, - {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A}, - {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F}, - {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, - {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3}, {0x1D000, 0x1D0F5}, - {0x1D100, 0x1D126}, {0x1D129, 0x1D1E8}, {0x1D200, 0x1D245}, - {0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, - {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, - {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, - {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, - {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, - {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, - {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, - {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, - {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, - {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, - {0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, - {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E2C0, 0x1E2F9}, - {0x1E2FF, 0x1E2FF}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, + {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED}, + {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, + {0x16B5B, 0x16B61}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, + {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, + {0x16F8F, 0x16F9F}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, + {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3}, + {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3}, + {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D1EA}, + {0x1D200, 0x1D245}, {0x1D2C0, 0x1D2D3}, {0x1D2E0, 0x1D2F3}, + {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, {0x1D400, 0x1D454}, + {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, + {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, + {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, + {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C}, + {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, + {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, + {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F}, + {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A}, + {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, + {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E030, 0x1E06D}, + {0x1E08F, 0x1E08F}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, + {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE}, + {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF}, {0x1E4D0, 0x1E4F9}, + {0x1E7E0, 0x1E7E6}, {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, + {0x1E7F0, 0x1E7FE}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B}, {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4}, {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, @@ -400,8 +411,8 @@ var neutral = table{ {0x1F54F, 0x1F54F}, {0x1F568, 0x1F579}, {0x1F57B, 0x1F594}, {0x1F597, 0x1F5A3}, {0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F}, {0x1F6C6, 0x1F6CB}, {0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4}, - {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773}, - {0x1F780, 0x1F7D8}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, + {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F776}, + {0x1F77B, 0x1F7D9}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F90B}, {0x1F93B, 0x1F93B}, {0x1F946, 0x1F946}, {0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D}, diff --git a/vendor/github.com/parquet-go/parquet-go/allocator.go b/vendor/github.com/parquet-go/parquet-go/allocator.go index 0cf2df03197..693ee5a2471 100644 --- a/vendor/github.com/parquet-go/parquet-go/allocator.go +++ b/vendor/github.com/parquet-go/parquet-go/allocator.go @@ -1,6 +1,10 @@ package parquet -import "github.com/parquet-go/parquet-go/internal/unsafecast" +import ( + "unsafe" + + "github.com/parquet-go/parquet-go/internal/unsafecast" +) type allocator struct{ buffer []byte } @@ -31,7 +35,7 @@ func (a *allocator) copyBytes(v []byte) []byte { func (a *allocator) copyString(v string) string { b := a.makeBytes(len(v)) copy(b, v) - return unsafecast.BytesToString(b) + return unsafecast.String(b) } func (a *allocator) reset() { @@ -54,7 +58,7 @@ func (a *rowAllocator) capture(row Row) { for i, v := range row { switch v.Kind() { case ByteArray, FixedLenByteArray: - row[i].ptr = unsafecast.AddressOfBytes(a.copyBytes(v.byteArray())) + row[i].ptr = unsafe.SliceData(a.copyBytes(v.byteArray())) } } } diff --git a/vendor/github.com/parquet-go/parquet-go/array.go b/vendor/github.com/parquet-go/parquet-go/array.go index 48df9715495..774e6f85ff0 100644 --- a/vendor/github.com/parquet-go/parquet-go/array.go +++ b/vendor/github.com/parquet-go/parquet-go/array.go @@ -3,23 +3,22 @@ package parquet import ( "unsafe" - "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" ) func makeArrayValue(values []Value, offset uintptr) sparse.Array { - ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values)) + ptr := sliceData(values) return sparse.UnsafeArray(unsafe.Add(ptr, offset), len(values), unsafe.Sizeof(Value{})) } func makeArrayString(values []string) sparse.Array { str := "" - ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values)) + ptr := sliceData(values) return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof(str)) } func makeArrayBE128(values []*[16]byte) sparse.Array { - ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values)) + ptr := sliceData(values) return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof((*[16]byte)(nil))) } @@ -29,7 +28,7 @@ func makeArray(base unsafe.Pointer, length int, offset uintptr) sparse.Array { func makeArrayOf[T any](s []T) sparse.Array { var model T - return makeArray(unsafecast.PointerOf(s), len(s), unsafe.Sizeof(model)) + return makeArray(sliceData(s), len(s), unsafe.Sizeof(model)) } func makeSlice[T any](a sparse.Array) []T { @@ -40,6 +39,10 @@ func slice[T any](p unsafe.Pointer, n int) []T { return unsafe.Slice((*T)(p), n) } +func sliceData[T any](s []T) unsafe.Pointer { + return unsafe.Pointer(unsafe.SliceData(s)) +} + type sliceHeader struct { base unsafe.Pointer len int diff --git a/vendor/github.com/parquet-go/parquet-go/bloom.go b/vendor/github.com/parquet-go/parquet-go/bloom.go index 734ac00a9d8..69d54c71712 100644 --- a/vendor/github.com/parquet-go/parquet-go/bloom.go +++ b/vendor/github.com/parquet-go/parquet-go/bloom.go @@ -1,6 +1,7 @@ package parquet import ( + "encoding/binary" "io" "github.com/parquet-go/parquet-go/bloom" @@ -9,6 +10,7 @@ import ( "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" "github.com/parquet-go/parquet-go/internal/unsafecast" + "golang.org/x/sys/cpu" ) // BloomFilter is an interface allowing applications to test whether a key @@ -162,27 +164,38 @@ func (splitBlockEncoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) } func (splitBlockEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { - splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Int32ToUint32(src)) + splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint32](src)) return dst, nil } func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { - splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Int64ToUint64(src)) + splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint64](src)) return dst, nil } func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { - splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), deprecated.Int96ToBytes(src), 12) + if cpu.IsBigEndian { + srcLen := len(src) + buf := make([]byte, srcLen*12) + for idx := range srcLen { + binary.LittleEndian.PutUint32(buf[(idx*12):4+(idx*12)], uint32(src[idx][0])) + binary.LittleEndian.PutUint32(buf[4+(idx*12):8+(idx*12)], uint32(src[idx][1])) + binary.LittleEndian.PutUint32(buf[8+(idx*12):12+(idx*12)], uint32(src[idx][2])) + } + splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), buf, 12) + } else { + splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[byte](src), 12) + } return dst, nil } func (splitBlockEncoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { - splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Float32ToUint32(src)) + splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint32](src)) return dst, nil } func (splitBlockEncoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { - splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Float64ToUint64(src)) + splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[uint64](src)) return dst, nil } @@ -210,7 +223,7 @@ func (splitBlockEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint func (splitBlockEncoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) { filter := bloom.MakeSplitBlockFilter(dst) if size == 16 { - splitBlockEncodeUint128(filter, unsafecast.BytesToUint128(src)) + splitBlockEncodeUint128(filter, unsafecast.Slice[[16]byte](src)) } else { splitBlockEncodeFixedLenByteArray(filter, src, size) } diff --git a/vendor/github.com/parquet-go/parquet-go/bloom/filter.go b/vendor/github.com/parquet-go/parquet-go/bloom/filter.go index 655d815a460..11cc255a1c1 100644 --- a/vendor/github.com/parquet-go/parquet-go/bloom/filter.go +++ b/vendor/github.com/parquet-go/parquet-go/bloom/filter.go @@ -3,7 +3,8 @@ package bloom import ( "io" "sync" - "unsafe" + + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // Filter is an interface representing read-only bloom filters where programs @@ -21,9 +22,7 @@ type SplitBlockFilter []Block // MakeSplitBlockFilter constructs a SplitBlockFilter value from the data byte // slice. func MakeSplitBlockFilter(data []byte) SplitBlockFilter { - p := *(*unsafe.Pointer)(unsafe.Pointer(&data)) - n := len(data) / BlockSize - return unsafe.Slice((*Block)(p), n) + return unsafecast.Slice[Block](data) } // NumSplitBlocksOf returns the number of blocks in a filter intended to hold @@ -64,7 +63,7 @@ func (f SplitBlockFilter) Check(x uint64) bool { return filterCheck(f, x) } // The returned slice shares the memory of f. The method is intended to be used // to serialize the bloom filter to a storage medium. func (f SplitBlockFilter) Bytes() []byte { - return unsafe.Slice(*(**byte)(unsafe.Pointer(&f)), len(f)*BlockSize) + return unsafecast.Slice[byte](f) } // CheckSplitBlock is similar to bloom.SplitBlockFilter.Check but reads the diff --git a/vendor/github.com/parquet-go/parquet-go/column.go b/vendor/github.com/parquet-go/parquet-go/column.go index 0320a6df6c4..51f2d20a097 100644 --- a/vendor/github.com/parquet-go/parquet-go/column.go +++ b/vendor/github.com/parquet-go/parquet-go/column.go @@ -352,6 +352,8 @@ func (cl *columnLoader) open(file *File, path []string) (*Column, error) { c.typ = &groupType{} if lt := c.schema.LogicalType; lt != nil && lt.Map != nil { c.typ = &mapType{} + } else if lt != nil && lt.List != nil { + c.typ = &listType{} } c.columns = make([]*Column, numChildren) @@ -691,7 +693,7 @@ func (c *Column) decodeDataPage(header DataPageHeader, numValues int, repetition if pageType.Kind() == ByteArray && !isDictionaryEncoding(pageEncoding) { obuf = buffers.get(4 * (numValues + 1)) defer obuf.unref() - pageOffsets = unsafecast.BytesToUint32(obuf.data) + pageOffsets = unsafecast.Slice[uint32](obuf.data) } values := pageType.NewValues(pageValues, pageOffsets) diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer.go b/vendor/github.com/parquet-go/parquet-go/column_buffer.go index bc40cc3e17f..8435cb2977c 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_buffer.go +++ b/vendor/github.com/parquet-go/parquet-go/column_buffer.go @@ -16,8 +16,12 @@ import ( "github.com/parquet-go/parquet-go/internal/bitpack" "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" + "golang.org/x/sys/cpu" ) +const offsetOfU64 = unsafe.Offsetof(Value{}.u64) +const offsetOfPtr = unsafe.Offsetof(Value{}.ptr) + // ColumnBuffer is an interface representing columns of a row group. // // ColumnBuffer implements sort.Interface as a way to support reordering the @@ -103,6 +107,29 @@ func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitio }, nil } +// On a big endian system, a boolean/byte value, which is in little endian byte format, is byte aligned +// to the 7th byte in a u64 (8 bytes) variable.. Hence the data will be available at 7th byte when +// interpreted as a little endian byte format. So, in order to access a boolean/byte value out of u64 variable, +// we need to add an offset of "7"... +// In the same way, an int32/uint32/float value, which is in little endian byte format, is byte aligned +// to the 4th byte in a u64 (8 bytes) variable.. Hence the data will be available at 4th byte when +// interpreted as a little endian byte format. So, in order to access an int32/uint32/float value out of u64 variable, +// we need to add an offset of "4" +func getOffset(colDict interface{}) uintptr { + var offset uintptr = 0 + + if cpu.IsBigEndian { + switch colDict.(type) { + case booleanColumnBuffer, booleanDictionary: + offset = 7 + + case int32ColumnBuffer, uint32ColumnBuffer, floatColumnBuffer, int32Dictionary, floatDictionary, uint32Dictionary: + offset = 4 + } + } + return offset +} + type nullableColumnIndex struct { ColumnIndex maxDefinitionLevel byte @@ -828,8 +855,8 @@ func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) { } func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -958,7 +985,7 @@ func (col *int32ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 4) != 0 { return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToInt32(b)...) + col.values = append(col.values, unsafecast.Slice[int32](b)...) return len(b), nil } @@ -968,8 +995,8 @@ func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) { } func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -1057,7 +1084,7 @@ func (col *int64ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 8) != 0 { return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToInt64(b)...) + col.values = append(col.values, unsafecast.Slice[int64](b)...) return len(b), nil } @@ -1067,8 +1094,7 @@ func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) { } func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{}) return len(values), nil } @@ -1155,7 +1181,7 @@ func (col *int96ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 12) != 0 { return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b)) } - col.values = append(col.values, deprecated.BytesToInt96(b)...) + col.values = append(col.values, unsafecast.Slice[deprecated.Int96](b)...) return len(b), nil } @@ -1252,7 +1278,7 @@ func (col *floatColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 4) != 0 { return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToFloat32(b)...) + col.values = append(col.values, unsafecast.Slice[float32](b)...) return len(b), nil } @@ -1262,8 +1288,8 @@ func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) { } func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -1350,7 +1376,7 @@ func (col *doubleColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 8) != 0 { return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToFloat64(b)...) + col.values = append(col.values, unsafecast.Slice[float64](b)...) return len(b), nil } @@ -1360,8 +1386,7 @@ func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) { } func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{}) return len(values), nil } @@ -1505,7 +1530,7 @@ func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes i baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths)) err = plain.RangeByteArray(values, func(value []byte) error { - col.append(unsafecast.BytesToString(value)) + col.append(unsafecast.String(value)) return nil }) @@ -1515,8 +1540,7 @@ func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes i } func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfPtr), columnLevels{}) return len(values), nil } @@ -1742,7 +1766,7 @@ func (col *uint32ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 4) != 0 { return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToUint32(b)...) + col.values = append(col.values, unsafecast.Slice[uint32](b)...) return len(b), nil } @@ -1752,8 +1776,8 @@ func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) { } func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + offset := getOffset(*col) + col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) return len(values), nil } @@ -1840,7 +1864,7 @@ func (col *uint64ColumnBuffer) Write(b []byte) (int, error) { if (len(b) % 8) != 0 { return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b)) } - col.values = append(col.values, unsafecast.BytesToUint64(b)...) + col.values = append(col.values, unsafecast.Slice[uint64](b)...) return len(b), nil } @@ -1850,8 +1874,7 @@ func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) { } func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) { - var model Value - col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU64), columnLevels{}) return len(values), nil } @@ -2356,8 +2379,8 @@ func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRo mapKey.SetIterKey(it) mapValue.SetIterValue(it) - k := makeArray(unsafecast.PointerOfValue(mapKey), 1, keySize) - v := makeArray(unsafecast.PointerOfValue(mapValue), 1, valueSize) + k := makeArray(reflectValueData(mapKey), 1, keySize) + v := makeArray(reflectValueData(mapValue), 1, valueSize) if err := writeKeyValues(columns, k, v, elemLevels); err != nil { return err @@ -2440,7 +2463,7 @@ func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath) writeR val = t.UnixNano() } - a := makeArray(unsafecast.PointerOfValue(reflect.ValueOf(val)), 1, elemSize) + a := makeArray(reflectValueData(reflect.ValueOf(val)), 1, elemSize) if err := writeRows(columns, a, levels); err != nil { return err } diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go b/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go index 9f41875f2db..45717269965 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/column_buffer_amd64.go @@ -10,7 +10,7 @@ import ( ) func broadcastValueInt32(dst []int32, src int8) { - bytealg.Broadcast(unsafecast.Int32ToBytes(dst), byte(src)) + bytealg.Broadcast(unsafecast.Slice[byte](dst), byte(src)) } //go:noescape diff --git a/vendor/github.com/parquet-go/parquet-go/column_index_be.go b/vendor/github.com/parquet-go/parquet-go/column_index_be.go new file mode 100644 index 00000000000..f3ea2e7bdfb --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/column_index_be.go @@ -0,0 +1,854 @@ +// This file gets added on all the big-endian CPU architectures. + +//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64 + +package parquet + +import ( + "encoding/binary" + "github.com/parquet-go/parquet-go/deprecated" + "github.com/parquet-go/parquet-go/encoding/plain" + "github.com/parquet-go/parquet-go/format" + "github.com/parquet-go/parquet-go/internal/unsafecast" + "math" +) + +type ColumnIndex interface { + // NumPages returns the number of paged in the column index. + NumPages() int + + // Returns the number of null values in the page at the given index. + NullCount(int) int64 + + // Tells whether the page at the given index contains null values only. + NullPage(int) bool + + // PageIndex return min/max bounds for the page at the given index in the + // column. + MinValue(int) Value + MaxValue(int) Value + + // IsAscending returns true if the column index min/max values are sorted + // in ascending order (based on the ordering rules of the column's logical + // type). + IsAscending() bool + + // IsDescending returns true if the column index min/max values are sorted + // in descending order (based on the ordering rules of the column's logical + // type). + IsDescending() bool +} + +// NewColumnIndex constructs a ColumnIndex instance from the given parquet +// format column index. The kind argument configures the type of values +func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex { + return &formatColumnIndex{ + kind: kind, + index: index, + } +} + +type formatColumnIndex struct { + kind Kind + index *format.ColumnIndex +} + +func (f *formatColumnIndex) NumPages() int { + return len(f.index.MinValues) +} + +func (f *formatColumnIndex) NullCount(i int) int64 { + if len(f.index.NullCounts) > 0 { + return f.index.NullCounts[i] + } + return 0 +} + +func (f *formatColumnIndex) NullPage(i int) bool { + return len(f.index.NullPages) > 0 && f.index.NullPages[i] +} + +func (f *formatColumnIndex) MinValue(i int) Value { + if f.NullPage(i) { + return Value{} + } + return f.kind.Value(f.index.MinValues[i]) +} + +func (f *formatColumnIndex) MaxValue(i int) Value { + if f.NullPage(i) { + return Value{} + } + return f.kind.Value(f.index.MaxValues[i]) +} + +func (f *formatColumnIndex) IsAscending() bool { + return f.index.BoundaryOrder == format.Ascending +} + +func (f *formatColumnIndex) IsDescending() bool { + return f.index.BoundaryOrder == format.Descending +} + +type fileColumnIndex struct{ chunk *fileColumnChunk } + +func (i fileColumnIndex) NumPages() int { + return len(i.columnIndex().NullPages) +} + +func (i fileColumnIndex) NullCount(j int) int64 { + index := i.columnIndex() + if len(index.NullCounts) > 0 { + return index.NullCounts[j] + } + return 0 +} + +func (i fileColumnIndex) NullPage(j int) bool { + return isNullPage(j, i.columnIndex()) +} + +func (i fileColumnIndex) MinValue(j int) Value { + index := i.columnIndex() + if isNullPage(j, index) { + return Value{} + } + return i.makeValue(index.MinValues[j]) +} + +func (i fileColumnIndex) MaxValue(j int) Value { + index := i.columnIndex() + if isNullPage(j, index) { + return Value{} + } + return i.makeValue(index.MaxValues[j]) +} + +func (i fileColumnIndex) IsAscending() bool { + return i.columnIndex().BoundaryOrder == format.Ascending +} + +func (i fileColumnIndex) IsDescending() bool { + return i.columnIndex().BoundaryOrder == format.Descending +} + +func (i *fileColumnIndex) makeValue(b []byte) Value { + return i.chunk.column.typ.Kind().Value(b) +} + +func (i fileColumnIndex) columnIndex() *format.ColumnIndex { return i.chunk.columnIndex.Load() } +func isNullPage(j int, index *format.ColumnIndex) bool { + return len(index.NullPages) > 0 && index.NullPages[j] +} + +type emptyColumnIndex struct{} + +func (emptyColumnIndex) NumPages() int { return 0 } +func (emptyColumnIndex) NullCount(int) int64 { return 0 } +func (emptyColumnIndex) NullPage(int) bool { return false } +func (emptyColumnIndex) MinValue(int) Value { return Value{} } +func (emptyColumnIndex) MaxValue(int) Value { return Value{} } +func (emptyColumnIndex) IsAscending() bool { return false } +func (emptyColumnIndex) IsDescending() bool { return false } + +type booleanColumnIndex struct{ page *booleanPage } + +func (i booleanColumnIndex) NumPages() int { return 1 } +func (i booleanColumnIndex) NullCount(int) int64 { return 0 } +func (i booleanColumnIndex) NullPage(int) bool { return false } +func (i booleanColumnIndex) MinValue(int) Value { return makeValueBoolean(i.page.min()) } +func (i booleanColumnIndex) MaxValue(int) Value { return makeValueBoolean(i.page.max()) } +func (i booleanColumnIndex) IsAscending() bool { return false } +func (i booleanColumnIndex) IsDescending() bool { return false } + +type int32ColumnIndex struct{ page *int32Page } + +func (i int32ColumnIndex) NumPages() int { return 1 } +func (i int32ColumnIndex) NullCount(int) int64 { return 0 } +func (i int32ColumnIndex) NullPage(int) bool { return false } +func (i int32ColumnIndex) MinValue(int) Value { return makeValueInt32(i.page.min()) } +func (i int32ColumnIndex) MaxValue(int) Value { return makeValueInt32(i.page.max()) } +func (i int32ColumnIndex) IsAscending() bool { return false } +func (i int32ColumnIndex) IsDescending() bool { return false } + +type int64ColumnIndex struct{ page *int64Page } + +func (i int64ColumnIndex) NumPages() int { return 1 } +func (i int64ColumnIndex) NullCount(int) int64 { return 0 } +func (i int64ColumnIndex) NullPage(int) bool { return false } +func (i int64ColumnIndex) MinValue(int) Value { return makeValueInt64(i.page.min()) } +func (i int64ColumnIndex) MaxValue(int) Value { return makeValueInt64(i.page.max()) } +func (i int64ColumnIndex) IsAscending() bool { return false } +func (i int64ColumnIndex) IsDescending() bool { return false } + +type int96ColumnIndex struct{ page *int96Page } + +func (i int96ColumnIndex) NumPages() int { return 1 } +func (i int96ColumnIndex) NullCount(int) int64 { return 0 } +func (i int96ColumnIndex) NullPage(int) bool { return false } +func (i int96ColumnIndex) MinValue(int) Value { return makeValueInt96(i.page.min()) } +func (i int96ColumnIndex) MaxValue(int) Value { return makeValueInt96(i.page.max()) } +func (i int96ColumnIndex) IsAscending() bool { return false } +func (i int96ColumnIndex) IsDescending() bool { return false } + +type floatColumnIndex struct{ page *floatPage } + +func (i floatColumnIndex) NumPages() int { return 1 } +func (i floatColumnIndex) NullCount(int) int64 { return 0 } +func (i floatColumnIndex) NullPage(int) bool { return false } +func (i floatColumnIndex) MinValue(int) Value { return makeValueFloat(i.page.min()) } +func (i floatColumnIndex) MaxValue(int) Value { return makeValueFloat(i.page.max()) } +func (i floatColumnIndex) IsAscending() bool { return false } +func (i floatColumnIndex) IsDescending() bool { return false } + +type doubleColumnIndex struct{ page *doublePage } + +func (i doubleColumnIndex) NumPages() int { return 1 } +func (i doubleColumnIndex) NullCount(int) int64 { return 0 } +func (i doubleColumnIndex) NullPage(int) bool { return false } +func (i doubleColumnIndex) MinValue(int) Value { return makeValueDouble(i.page.min()) } +func (i doubleColumnIndex) MaxValue(int) Value { return makeValueDouble(i.page.max()) } +func (i doubleColumnIndex) IsAscending() bool { return false } +func (i doubleColumnIndex) IsDescending() bool { return false } + +type byteArrayColumnIndex struct{ page *byteArrayPage } + +func (i byteArrayColumnIndex) NumPages() int { return 1 } +func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 } +func (i byteArrayColumnIndex) NullPage(int) bool { return false } +func (i byteArrayColumnIndex) MinValue(int) Value { return makeValueBytes(ByteArray, i.page.min()) } +func (i byteArrayColumnIndex) MaxValue(int) Value { return makeValueBytes(ByteArray, i.page.max()) } +func (i byteArrayColumnIndex) IsAscending() bool { return false } +func (i byteArrayColumnIndex) IsDescending() bool { return false } + +type fixedLenByteArrayColumnIndex struct{ page *fixedLenByteArrayPage } + +func (i fixedLenByteArrayColumnIndex) NumPages() int { return 1 } +func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 } +func (i fixedLenByteArrayColumnIndex) NullPage(int) bool { return false } +func (i fixedLenByteArrayColumnIndex) MinValue(int) Value { + return makeValueBytes(FixedLenByteArray, i.page.min()) +} +func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value { + return makeValueBytes(FixedLenByteArray, i.page.max()) +} +func (i fixedLenByteArrayColumnIndex) IsAscending() bool { return false } +func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return false } + +type uint32ColumnIndex struct{ page *uint32Page } + +func (i uint32ColumnIndex) NumPages() int { return 1 } +func (i uint32ColumnIndex) NullCount(int) int64 { return 0 } +func (i uint32ColumnIndex) NullPage(int) bool { return false } +func (i uint32ColumnIndex) MinValue(int) Value { return makeValueUint32(i.page.min()) } +func (i uint32ColumnIndex) MaxValue(int) Value { return makeValueUint32(i.page.max()) } +func (i uint32ColumnIndex) IsAscending() bool { return false } +func (i uint32ColumnIndex) IsDescending() bool { return false } + +type uint64ColumnIndex struct{ page *uint64Page } + +func (i uint64ColumnIndex) NumPages() int { return 1 } +func (i uint64ColumnIndex) NullCount(int) int64 { return 0 } +func (i uint64ColumnIndex) NullPage(int) bool { return false } +func (i uint64ColumnIndex) MinValue(int) Value { return makeValueUint64(i.page.min()) } +func (i uint64ColumnIndex) MaxValue(int) Value { return makeValueUint64(i.page.max()) } +func (i uint64ColumnIndex) IsAscending() bool { return false } +func (i uint64ColumnIndex) IsDescending() bool { return false } + +type be128ColumnIndex struct{ page *be128Page } + +func (i be128ColumnIndex) NumPages() int { return 1 } +func (i be128ColumnIndex) NullCount(int) int64 { return 0 } +func (i be128ColumnIndex) NullPage(int) bool { return false } +func (i be128ColumnIndex) MinValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.min()) } +func (i be128ColumnIndex) MaxValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.max()) } +func (i be128ColumnIndex) IsAscending() bool { return false } +func (i be128ColumnIndex) IsDescending() bool { return false } + +// The ColumnIndexer interface is implemented by types that support generating +// parquet column indexes. +// +// The package does not export any types that implement this interface, programs +// must call NewColumnIndexer on a Type instance to construct column indexers. +type ColumnIndexer interface { + // Resets the column indexer state. + Reset() + + // Add a page to the column indexer. + IndexPage(numValues, numNulls int64, min, max Value) + + // Generates a format.ColumnIndex value from the current state of the + // column indexer. + // + // The returned value may reference internal buffers, in which case the + // values remain valid until the next call to IndexPage or Reset on the + // column indexer. + ColumnIndex() format.ColumnIndex +} + +type baseColumnIndexer struct { + nullPages []bool + nullCounts []int64 +} + +func (i *baseColumnIndexer) reset() { + i.nullPages = i.nullPages[:0] + i.nullCounts = i.nullCounts[:0] +} + +func (i *baseColumnIndexer) observe(numValues, numNulls int64) { + i.nullPages = append(i.nullPages, numValues == numNulls) + i.nullCounts = append(i.nullCounts, numNulls) +} + +func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte, minOrder, maxOrder int) format.ColumnIndex { + nullPages := make([]bool, len(i.nullPages)) + copy(nullPages, i.nullPages) + nullCounts := make([]int64, len(i.nullCounts)) + copy(nullCounts, i.nullCounts) + return format.ColumnIndex{ + NullPages: nullPages, + NullCounts: nullCounts, + MinValues: minValues, + MaxValues: maxValues, + BoundaryOrder: boundaryOrderOf(minOrder, maxOrder), + } +} + +type booleanColumnIndexer struct { + baseColumnIndexer + minValues []bool + maxValues []bool +} + +func newBooleanColumnIndexer() *booleanColumnIndexer { + return new(booleanColumnIndexer) +} + +func (i *booleanColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.boolean()) + i.maxValues = append(i.maxValues, max.boolean()) +} + +func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex { + return i.columnIndex( + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 1), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 1), + orderOfBool(i.minValues), + orderOfBool(i.maxValues), + ) +} + +type int32ColumnIndexer struct { + baseColumnIndexer + minValues []int32 + maxValues []int32 +} + +func newInt32ColumnIndexer() *int32ColumnIndexer { + return new(int32ColumnIndexer) +} + +func (i *int32ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.int32()) + i.maxValues = append(i.maxValues, max.int32()) +} + +func reverseInt32MinMaxValues(mLen int, mVal []int32) []byte { + buf := make([]byte, mLen*4) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], uint32(mVal[k])) + idx += 4 + } + return buf +} + +func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseInt32MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseInt32MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 4), + splitFixedLenByteArrays(byteMax, 4), + orderOfInt32(i.minValues), + orderOfInt32(i.maxValues), + ) +} + +type int64ColumnIndexer struct { + baseColumnIndexer + minValues []int64 + maxValues []int64 +} + +func newInt64ColumnIndexer() *int64ColumnIndexer { + return new(int64ColumnIndexer) +} + +func (i *int64ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.int64()) + i.maxValues = append(i.maxValues, max.int64()) +} + +func reverseInt64MinMaxValues(mLen int, mVal []int64) []byte { + buf := make([]byte, mLen*8) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint64(buf[idx:(8+idx)], uint64(mVal[k])) + idx += 8 + } + return buf +} + +func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseInt64MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseInt64MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 8), + splitFixedLenByteArrays(byteMax, 8), + orderOfInt64(i.minValues), + orderOfInt64(i.maxValues), + ) +} + +type int96ColumnIndexer struct { + baseColumnIndexer + minValues []deprecated.Int96 + maxValues []deprecated.Int96 +} + +func newInt96ColumnIndexer() *int96ColumnIndexer { + return new(int96ColumnIndexer) +} + +func (i *int96ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.Int96()) + i.maxValues = append(i.maxValues, max.Int96()) +} + +func reverseInt96MinMaxValues(mLen int, mVal []deprecated.Int96) []byte { + buf := make([]byte, mLen*12) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], uint32(mVal[k][0])) + binary.LittleEndian.PutUint32(buf[(4+idx):(8+idx)], uint32(mVal[k][1])) + binary.LittleEndian.PutUint32(buf[(8+idx):(12+idx)], uint32(mVal[k][2])) + idx += 12 + } + return buf +} + +func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseInt96MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseInt96MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 12), + splitFixedLenByteArrays(byteMax, 12), + deprecated.OrderOfInt96(i.minValues), + deprecated.OrderOfInt96(i.maxValues), + ) +} + +type floatColumnIndexer struct { + baseColumnIndexer + minValues []float32 + maxValues []float32 +} + +func newFloatColumnIndexer() *floatColumnIndexer { + return new(floatColumnIndexer) +} + +func (i *floatColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.float()) + i.maxValues = append(i.maxValues, max.float()) +} + +func reverseFloatMinMaxValues(mLen int, mVal []float32) []byte { + buf := make([]byte, mLen*4) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], math.Float32bits(mVal[k])) + idx += 4 + } + return buf +} + +func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseFloatMinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseFloatMinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 4), + splitFixedLenByteArrays(byteMax, 4), + orderOfFloat32(i.minValues), + orderOfFloat32(i.maxValues), + ) +} + +type doubleColumnIndexer struct { + baseColumnIndexer + minValues []float64 + maxValues []float64 +} + +func newDoubleColumnIndexer() *doubleColumnIndexer { + return new(doubleColumnIndexer) +} + +func (i *doubleColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.double()) + i.maxValues = append(i.maxValues, max.double()) +} + +func reverseDoubleMinMaxValues(mLen int, mVal []float64) []byte { + buf := make([]byte, mLen*8) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint64(buf[idx:(8+idx)], math.Float64bits(mVal[k])) + idx += 8 + } + return buf +} + +func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseDoubleMinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseDoubleMinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 8), + splitFixedLenByteArrays(byteMax, 8), + orderOfFloat64(i.minValues), + orderOfFloat64(i.maxValues), + ) +} + +type byteArrayColumnIndexer struct { + baseColumnIndexer + sizeLimit int + minValues []byte + maxValues []byte +} + +func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer { + return &byteArrayColumnIndexer{sizeLimit: sizeLimit} +} + +func (i *byteArrayColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = plain.AppendByteArray(i.minValues, min.byteArray()) + i.maxValues = plain.AppendByteArray(i.maxValues, max.byteArray()) +} + +func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex { + minValues := splitByteArrays(i.minValues) + maxValues := splitByteArrays(i.maxValues) + if sizeLimit := i.sizeLimit; sizeLimit > 0 { + for i, v := range minValues { + minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit) + } + for i, v := range maxValues { + maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit) + } + } + return i.columnIndex( + minValues, + maxValues, + orderOfBytes(minValues), + orderOfBytes(maxValues), + ) +} + +type fixedLenByteArrayColumnIndexer struct { + baseColumnIndexer + size int + sizeLimit int + minValues []byte + maxValues []byte +} + +func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByteArrayColumnIndexer { + return &fixedLenByteArrayColumnIndexer{ + size: size, + sizeLimit: sizeLimit, + } +} + +func (i *fixedLenByteArrayColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.byteArray()...) + i.maxValues = append(i.maxValues, max.byteArray()...) +} + +func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIndex { + minValues := splitFixedLenByteArrays(i.minValues, i.size) + maxValues := splitFixedLenByteArrays(i.maxValues, i.size) + if sizeLimit := i.sizeLimit; sizeLimit > 0 { + for i, v := range minValues { + minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit) + } + for i, v := range maxValues { + maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit) + } + } + return i.columnIndex( + minValues, + maxValues, + orderOfBytes(minValues), + orderOfBytes(maxValues), + ) +} + +type uint32ColumnIndexer struct { + baseColumnIndexer + minValues []uint32 + maxValues []uint32 +} + +func newUint32ColumnIndexer() *uint32ColumnIndexer { + return new(uint32ColumnIndexer) +} + +func (i *uint32ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.uint32()) + i.maxValues = append(i.maxValues, max.uint32()) +} + +func reverseUint32MinMaxValues(mLen int, mVal []uint32) []byte { + buf := make([]byte, mLen*4) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint32(buf[idx:(4+idx)], mVal[k]) + idx += 4 + } + return buf +} + +func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseUint32MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseUint32MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 4), + splitFixedLenByteArrays(byteMax, 4), + orderOfUint32(i.minValues), + orderOfUint32(i.maxValues), + ) +} + +type uint64ColumnIndexer struct { + baseColumnIndexer + minValues []uint64 + maxValues []uint64 +} + +func newUint64ColumnIndexer() *uint64ColumnIndexer { + return new(uint64ColumnIndexer) +} + +func (i *uint64ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + i.minValues = append(i.minValues, min.uint64()) + i.maxValues = append(i.maxValues, max.uint64()) +} + +func reverseUint64MinMaxValues(mLen int, mVal []uint64) []byte { + buf := make([]byte, mLen*8) + idx := 0 + for k := range mLen { + binary.LittleEndian.PutUint64(buf[idx:(8+idx)], mVal[k]) + idx += 8 + } + return buf +} + +func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex { + byteMin := reverseUint64MinMaxValues(len(i.minValues), i.minValues) + byteMax := reverseUint64MinMaxValues(len(i.maxValues), i.maxValues) + + return i.columnIndex( + splitFixedLenByteArrays(byteMin, 8), + splitFixedLenByteArrays(byteMax, 8), + orderOfUint64(i.minValues), + orderOfUint64(i.maxValues), + ) +} + +type be128ColumnIndexer struct { + baseColumnIndexer + minValues [][16]byte + maxValues [][16]byte +} + +func newBE128ColumnIndexer() *be128ColumnIndexer { + return new(be128ColumnIndexer) +} + +func (i *be128ColumnIndexer) Reset() { + i.reset() + i.minValues = i.minValues[:0] + i.maxValues = i.maxValues[:0] +} + +func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { + i.observe(numValues, numNulls) + if !min.IsNull() { + i.minValues = append(i.minValues, *(*[16]byte)(min.byteArray())) + } + if !max.IsNull() { + i.maxValues = append(i.maxValues, *(*[16]byte)(max.byteArray())) + } +} + +func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex { + minValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 16) + maxValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 16) + return i.columnIndex( + minValues, + maxValues, + orderOfBytes(minValues), + orderOfBytes(maxValues), + ) +} + +func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte { + if len(value) > sizeLimit { + value = value[:sizeLimit] + } + return value +} + +// truncateLargeMaxByteArrayValue truncates the given byte array to the given size limit. +// If the given byte array is truncated, it is incremented by 1 in place. +func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte { + if len(value) > sizeLimit { + value = value[:sizeLimit] + incrementByteArrayInplace(value) + } + return value +} + +// incrementByteArray increments the given byte array by 1. +// Reference: https://github.com/apache/parquet-java/blob/master/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java#L124 +func incrementByteArrayInplace(value []byte) { + for i := len(value) - 1; i >= 0; i-- { + value[i]++ + if value[i] != 0 { // Did not overflow: 0xFF -> 0x00 + return + } + } + // Fully overflowed, so restore all to 0xFF + for i := range value { + value[i] = 0xFF + } +} + +func splitByteArrays(data []byte) [][]byte { + length := 0 + plain.RangeByteArray(data, func([]byte) error { + length++ + return nil + }) + buffer := make([]byte, 0, len(data)-(4*length)) + values := make([][]byte, 0, length) + plain.RangeByteArray(data, func(value []byte) error { + offset := len(buffer) + buffer = append(buffer, value...) + values = append(values, buffer[offset:]) + return nil + }) + return values +} + +func splitFixedLenByteArrays(data []byte, size int) [][]byte { + data = copyBytes(data) + values := make([][]byte, len(data)/size) + for i := range values { + j := (i + 0) * size + k := (i + 1) * size + values[i] = data[j:k:k] + } + return values +} + +func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder { + if minOrder == maxOrder { + switch { + case minOrder > 0: + return format.Ascending + case minOrder < 0: + return format.Descending + } + } + return format.Unordered +} diff --git a/vendor/github.com/parquet-go/parquet-go/column_index.go b/vendor/github.com/parquet-go/parquet-go/column_index_le.go similarity index 90% rename from vendor/github.com/parquet-go/parquet-go/column_index.go rename to vendor/github.com/parquet-go/parquet-go/column_index_le.go index 1252248b6ce..4d8fec4511a 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_index.go +++ b/vendor/github.com/parquet-go/parquet-go/column_index_le.go @@ -1,3 +1,7 @@ +// This file gets added on all the little-endian CPU architectures. + +//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm + package parquet import ( @@ -87,46 +91,55 @@ func (f *formatColumnIndex) IsDescending() bool { type fileColumnIndex struct{ chunk *fileColumnChunk } func (i fileColumnIndex) NumPages() int { - return len(i.chunk.columnIndex.NullPages) + return len(i.columnIndex().NullPages) } func (i fileColumnIndex) NullCount(j int) int64 { - if len(i.chunk.columnIndex.NullCounts) > 0 { - return i.chunk.columnIndex.NullCounts[j] + index := i.columnIndex() + if len(index.NullCounts) > 0 { + return index.NullCounts[j] } return 0 } func (i fileColumnIndex) NullPage(j int) bool { - return len(i.chunk.columnIndex.NullPages) > 0 && i.chunk.columnIndex.NullPages[j] + return isNullPage(j, i.columnIndex()) } func (i fileColumnIndex) MinValue(j int) Value { - if i.NullPage(j) { + index := i.columnIndex() + if isNullPage(j, index) { return Value{} } - return i.makeValue(i.chunk.columnIndex.MinValues[j]) + return i.makeValue(index.MinValues[j]) } func (i fileColumnIndex) MaxValue(j int) Value { - if i.NullPage(j) { + index := i.columnIndex() + if isNullPage(j, index) { return Value{} } - return i.makeValue(i.chunk.columnIndex.MaxValues[j]) + return i.makeValue(index.MaxValues[j]) } func (i fileColumnIndex) IsAscending() bool { - return i.chunk.columnIndex.BoundaryOrder == format.Ascending + return i.columnIndex().BoundaryOrder == format.Ascending } func (i fileColumnIndex) IsDescending() bool { - return i.chunk.columnIndex.BoundaryOrder == format.Descending + return i.columnIndex().BoundaryOrder == format.Descending } func (i *fileColumnIndex) makeValue(b []byte) Value { return i.chunk.column.typ.Kind().Value(b) } +func (i fileColumnIndex) columnIndex() *format.ColumnIndex { return i.chunk.columnIndex.Load() } + +func isNullPage(j int, index *format.ColumnIndex) bool { + return len(index.NullPages) > 0 && index.NullPages[j] +} + type emptyColumnIndex struct{} func (emptyColumnIndex) NumPages() int { return 0 } @@ -325,8 +338,8 @@ func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Val func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.BoolToBytes(i.minValues), 1), - splitFixedLenByteArrays(unsafecast.BoolToBytes(i.maxValues), 1), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 1), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 1), orderOfBool(i.minValues), orderOfBool(i.maxValues), ) @@ -356,8 +369,8 @@ func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.minValues), 4), - splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.maxValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4), orderOfInt32(i.minValues), orderOfInt32(i.maxValues), ) @@ -387,8 +400,8 @@ func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.minValues), 8), - splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.maxValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8), orderOfInt64(i.minValues), orderOfInt64(i.maxValues), ) @@ -418,8 +431,8 @@ func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(deprecated.Int96ToBytes(i.minValues), 12), - splitFixedLenByteArrays(deprecated.Int96ToBytes(i.maxValues), 12), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 12), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 12), deprecated.OrderOfInt96(i.minValues), deprecated.OrderOfInt96(i.maxValues), ) @@ -449,8 +462,8 @@ func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.minValues), 4), - splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.maxValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4), orderOfFloat32(i.minValues), orderOfFloat32(i.maxValues), ) @@ -480,8 +493,8 @@ func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.minValues), 8), - splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.maxValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8), orderOfFloat64(i.minValues), orderOfFloat64(i.maxValues), ) @@ -599,8 +612,8 @@ func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.minValues), 4), - splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.maxValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 4), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 4), orderOfUint32(i.minValues), orderOfUint32(i.maxValues), ) @@ -630,8 +643,8 @@ func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Valu func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex { return i.columnIndex( - splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.minValues), 8), - splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.maxValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 8), + splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 8), orderOfUint64(i.minValues), orderOfUint64(i.maxValues), ) @@ -664,8 +677,8 @@ func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value } func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex { - minValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.minValues), 16) - maxValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.maxValues), 16) + minValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.minValues), 16) + maxValues := splitFixedLenByteArrays(unsafecast.Slice[byte](i.maxValues), 16) return i.columnIndex( minValues, maxValues, diff --git a/vendor/github.com/parquet-go/parquet-go/convert.go b/vendor/github.com/parquet-go/parquet-go/convert.go index 8b315739c02..96a49a81f63 100644 --- a/vendor/github.com/parquet-go/parquet-go/convert.go +++ b/vendor/github.com/parquet-go/parquet-go/convert.go @@ -11,9 +11,12 @@ import ( "sync" "time" + "golang.org/x/sys/cpu" + "github.com/parquet-go/parquet-go/deprecated" "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // ConvertError is an error type returned by calls to Convert when the conversion @@ -912,7 +915,15 @@ func convertStringToInt96(v Value) (Value, error) { b := i.Bytes() c := make([]byte, 12) copy(c, b) - i96 := deprecated.BytesToInt96(c) + if cpu.IsBigEndian { + bufLen := len(c) + for idx := 0; idx < bufLen; idx = idx + 4 { + for m, n := (idx + 0), (idx + 3); m < n; m, n = m+1, n-1 { + c[m], c[n] = c[n], c[m] + } + } + } + i96 := unsafecast.Slice[deprecated.Int96](c) return v.convertToInt96(i96[0]), nil } diff --git a/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go b/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go index 1bed7a5d7b5..fc6d40648e8 100644 --- a/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go +++ b/vendor/github.com/parquet-go/parquet-go/deprecated/int96.go @@ -3,7 +3,6 @@ package deprecated import ( "math/big" "math/bits" - "unsafe" ) // Int96 is an implementation of the deprecated INT96 parquet type. @@ -98,21 +97,6 @@ func (i Int96) Len() int { } } -// Int96ToBytes converts the slice of Int96 values to a slice of bytes sharing -// the same backing array. -func Int96ToBytes(data []Int96) []byte { - return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 12*len(data)) -} - -// BytesToInt96 converts the byte slice passed as argument to a slice of Int96 -// sharing the same backing array. -// -// When the number of bytes in the input is not a multiple of 12, the function -// truncates it in the returned slice. -func BytesToInt96(data []byte) []Int96 { - return unsafe.Slice(*(**Int96)(unsafe.Pointer(&data)), len(data)/12) -} - func MaxLenInt96(data []Int96) int { max := 0 for i := range data { diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary.go b/vendor/github.com/parquet-go/parquet-go/dictionary.go index 3bf2d97f0da..9dce0ff6514 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary.go @@ -29,7 +29,7 @@ const ( // // This constant is used to determine a useful chunk size depending on the // size of values being inserted in dictionaries. More values of small size - // can fit in CPU caches, so the inserts can operation on larger chunks. + // can fit in CPU caches, so the inserts can operate on larger chunks. insertsTargetCacheFootprint = 8192 ) @@ -140,8 +140,8 @@ func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i) func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) } func (d *booleanDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { @@ -171,7 +171,7 @@ func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { func (d *booleanDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(false) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) { @@ -238,8 +238,8 @@ func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *int32Dictionary) index(i int32) int32 { return d.values[i] } func (d *int32Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *int32Dictionary) init(indexes []int32) { @@ -291,7 +291,8 @@ func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *int32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -338,8 +339,7 @@ func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *int64Dictionary) index(i int32) int64 { return d.values[i] } func (d *int64Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.insert(indexes, makeArrayValue(values, offsetOfU64)) } func (d *int64Dictionary) init(indexes []int32) { @@ -378,7 +378,7 @@ func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *int64Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -520,8 +520,8 @@ func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *floatDictionary) index(i int32) float32 { return d.values[i] } func (d *floatDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *floatDictionary) init(indexes []int32) { @@ -560,7 +560,8 @@ func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) { func (d *floatDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) { @@ -607,8 +608,7 @@ func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *doubleDictionary) index(i int32) float64 { return d.values[i] } func (d *doubleDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.insert(indexes, makeArrayValue(values, offsetOfU64)) } func (d *doubleDictionary) init(indexes []int32) { @@ -647,7 +647,7 @@ func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) { func (d *doubleDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) { @@ -706,8 +706,7 @@ func (d *byteArrayDictionary) Len() int { return d.len() } func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValueBytes(d.index(int(i))) } func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.insert(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *byteArrayDictionary) init() { @@ -745,13 +744,13 @@ func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Array) { func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValueString("") memsetValues(values, model) - d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.lookupString(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) { if len(indexes) > 0 { base := d.index(int(indexes[0])) - minValue := unsafecast.BytesToString(base) + minValue := unsafecast.String(base) maxValue := minValue values := [64]string{} @@ -864,13 +863,13 @@ func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, v func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValueString("") memsetValues(values, model) - d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.lookupString(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, max Value) { if len(indexes) > 0 { base := d.index(indexes[0]) - minValue := unsafecast.BytesToString(base) + minValue := unsafecast.String(base) maxValue := minValue values := [64]string{} @@ -931,8 +930,8 @@ func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] } func (d *uint32Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *uint32Dictionary) init(indexes []int32) { @@ -971,7 +970,8 @@ func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + offset := getOffset(*d) + d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) } func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -1018,8 +1018,7 @@ func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] } func (d *uint64Dictionary) Insert(indexes []int32, values []Value) { - model := Value{} - d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.insert(indexes, makeArrayValue(values, offsetOfU64)) } func (d *uint64Dictionary) init(indexes []int32) { @@ -1058,7 +1057,7 @@ func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) + d.lookup(indexes, makeArrayValue(values, offsetOfU64)) } func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -1176,7 +1175,7 @@ func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *be128Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValueString("") memsetValues(values, model) - d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) + d.lookupString(indexes, makeArrayValue(values, offsetOfPtr)) } func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -1242,7 +1241,7 @@ func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32, data e copy(tmp, values) values = tmp } else { - clear := values[len(values) : len(values)+size] + clear := values[len(values):size] for i := range clear { clear[i] = 0 } diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go b/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go index be7154ddbde..387e65f7592 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go @@ -47,25 +47,25 @@ func dictionaryLookupFixedLenByteArrayPointer(dict []byte, len int, indexes []in func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Int32ToUint32(d.values) + dict := unsafecast.Slice[uint32](d.values) dictionaryLookup32(dict, indexes, rows).check() } func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Int64ToUint64(d.values) + dict := unsafecast.Slice[uint64](d.values) dictionaryLookup64(dict, indexes, rows).check() } func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Float32ToUint32(d.values) + dict := unsafecast.Slice[uint32](d.values) dictionaryLookup32(dict, indexes, rows).check() } func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - dict := unsafecast.Float64ToUint64(d.values) + dict := unsafecast.Slice[uint64](d.values) dictionaryLookup64(dict, indexes, rows).check() } @@ -83,8 +83,7 @@ func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) { // //dictionaryLookupByteArrayString(d.offsets, d.values, indexes, rows).check() for i, j := range indexes { - v := d.index(int(j)) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(int(j))) } } @@ -92,8 +91,7 @@ func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse. checkLookupIndexBounds(indexes, rows) //dictionaryLookupFixedLenByteArrayString(d.data, d.size, indexes, rows).check() for i, j := range indexes { - v := d.index(j) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(j)) } } @@ -109,7 +107,7 @@ func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) { func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - //dict := unsafecast.Uint128ToBytes(d.values) + //dict := unsafecast.Slice[byte](d.values) //dictionaryLookupFixedLenByteArrayString(dict, 16, indexes, rows).check() s := "0123456789ABCDEF" for i, j := range indexes { @@ -120,7 +118,7 @@ func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) { func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) - //dict := unsafecast.Uint128ToBytes(d.values) + //dict := unsafecast.Slice[byte](d.values) //dictionaryLookupFixedLenByteArrayPointer(dict, 16, indexes, rows).check() for i, j := range indexes { *(**[16]byte)(rows.Index(i)) = d.index(j) diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go b/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go index a586897fdab..4893415250f 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary_purego.go @@ -5,6 +5,7 @@ package parquet import ( "unsafe" + "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" ) @@ -39,16 +40,14 @@ func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) { func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) for i, j := range indexes { - v := d.index(int(j)) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(int(j))) } } func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) { checkLookupIndexBounds(indexes, rows) for i, j := range indexes { - v := d.index(j) - *(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v)) + *(*string)(rows.Index(i)) = unsafecast.String(d.index(j)) } } diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go index f3a66441aa2..23b0202d7b0 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit.go @@ -22,13 +22,13 @@ func (e *Encoding) Encoding() format.Encoding { func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { dst = resize(dst, 4*len(src)) - encodeFloat(dst, unsafecast.Float32ToBytes(src)) + encodeFloat(dst, unsafecast.Slice[byte](src)) return dst, nil } func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { dst = resize(dst, 8*len(src)) - encodeDouble(dst, unsafecast.Float64ToBytes(src)) + encodeDouble(dst, unsafecast.Slice[byte](src)) return dst, nil } @@ -36,18 +36,18 @@ func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { if (len(src) % 4) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) } - buf := resize(unsafecast.Float32ToBytes(dst), len(src)) + buf := resize(unsafecast.Slice[byte](dst), len(src)) decodeFloat(buf, src) - return unsafecast.BytesToFloat32(buf), nil + return unsafecast.Slice[float32](buf), nil } func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { if (len(src) % 8) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) } - buf := resize(unsafecast.Float64ToBytes(dst), len(src)) + buf := resize(unsafecast.Slice[byte](dst), len(src)) decodeDouble(buf, src) - return unsafecast.BytesToFloat64(buf), nil + return unsafecast.Slice[float64](buf), nil } func resize(buf []byte, size int) []byte { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go index 556fb8cce37..6f5bf15c795 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/bytestreamsplit/bytestreamsplit_purego.go @@ -11,7 +11,7 @@ func encodeFloat(dst, src []byte) { b2 := dst[2*n : 3*n] b3 := dst[3*n : 4*n] - for i, v := range unsafecast.BytesToUint32(src) { + for i, v := range unsafecast.Slice[uint32](src) { b0[i] = byte(v >> 0) b1[i] = byte(v >> 8) b2[i] = byte(v >> 16) @@ -30,7 +30,7 @@ func encodeDouble(dst, src []byte) { b6 := dst[6*n : 7*n] b7 := dst[7*n : 8*n] - for i, v := range unsafecast.BytesToUint64(src) { + for i, v := range unsafecast.Slice[uint64](src) { b0[i] = byte(v >> 0) b1[i] = byte(v >> 8) b2[i] = byte(v >> 16) @@ -49,7 +49,7 @@ func decodeFloat(dst, src []byte) { b2 := src[2*n : 3*n] b3 := src[3*n : 4*n] - dst32 := unsafecast.BytesToUint32(dst) + dst32 := unsafecast.Slice[uint32](dst) for i := range dst32 { dst32[i] = uint32(b0[i]) | uint32(b1[i])<<8 | @@ -69,7 +69,7 @@ func decodeDouble(dst, src []byte) { b6 := src[6*n : 7*n] b7 := src[7*n : 8*n] - dst64 := unsafecast.BytesToUint64(dst) + dst64 := unsafecast.Slice[uint64](dst) for i := range dst64 { dst64[i] = uint64(b0[i]) | uint64(b1[i])<<8 | diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go index 36704974fad..cf9d4cfc9ec 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed.go @@ -34,15 +34,15 @@ func (e *BinaryPackedEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, err } func (e *BinaryPackedEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { - buf := unsafecast.Int32ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, _, err := decodeInt32(buf[:0], src) - return unsafecast.BytesToInt32(buf), e.wrap(err) + return unsafecast.Slice[int32](buf), e.wrap(err) } func (e *BinaryPackedEncoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { - buf := unsafecast.Int64ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, _, err := decodeInt64(buf[:0], src) - return unsafecast.BytesToInt64(buf), e.wrap(err) + return unsafecast.Slice[int64](buf), e.wrap(err) } func (e *BinaryPackedEncoding) wrap(err error) error { @@ -290,7 +290,7 @@ func decodeInt32(dst, src []byte) ([]byte, []byte, error) { writeOffset := len(dst) dst = resize(dst, len(dst)+4*totalValues) - out := unsafecast.BytesToInt32(dst) + out := unsafecast.Slice[int32](dst) out[writeOffset] = int32(firstValue) writeOffset++ totalValues-- @@ -354,7 +354,7 @@ func decodeInt64(dst, src []byte) ([]byte, []byte, error) { writeOffset := len(dst) dst = resize(dst, len(dst)+8*totalValues) - out := unsafecast.BytesToInt64(dst) + out := unsafecast.Slice[int64](dst) out[writeOffset] = firstValue writeOffset++ totalValues-- diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go index 5da4c0e933d..11a5a538b1f 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/binary_packed_amd64.go @@ -230,7 +230,7 @@ func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) { case hasAVX2 && bitWidth <= 31: decodeMiniBlockInt32x27to31bitsAVX2(dst, src, bitWidth) case bitWidth == 32: - copy(dst, unsafecast.Uint32ToInt32(src)) + copy(dst, unsafecast.Slice[int32](src)) default: decodeMiniBlockInt32Default(dst, src, bitWidth) } @@ -249,7 +249,7 @@ func decodeMiniBlockInt64Default(dst []int64, src []uint32, bitWidth uint) func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) { switch { case bitWidth == 64: - copy(dst, unsafecast.Uint32ToInt64(src)) + copy(dst, unsafecast.Slice[int64](src)) default: decodeMiniBlockInt64Default(dst, src, bitWidth) } diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go b/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go index 7b330eac89b..5fe27797754 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go @@ -20,8 +20,8 @@ func (buf *int32Buffer) resize(size int) { } func (buf *int32Buffer) decode(src []byte) ([]byte, error) { - values, remain, err := decodeInt32(unsafecast.Int32ToBytes(buf.values[:0]), src) - buf.values = unsafecast.BytesToInt32(values) + values, remain, err := decodeInt32(unsafecast.Slice[byte](buf.values[:0]), src) + buf.values = unsafecast.Slice[int32](values) return remain, err } diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go index a107e61d028..8a58af5c39d 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go @@ -6,6 +6,7 @@ package plain import ( "encoding/binary" "fmt" + "golang.org/x/sys/cpu" "io" "math" @@ -37,23 +38,67 @@ func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) { } func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { - return append(dst[:0], unsafecast.Int32ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { - return append(dst[:0], unsafecast.Int64ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { - return append(dst[:0], deprecated.Int96ToBytes(src)...), nil + return append(dst[:0], unsafecast.Slice[byte](src)...), nil } func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { - return append(dst[:0], unsafecast.Float32ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { - return append(dst[:0], unsafecast.Float64ToBytes(src)...), nil + if cpu.IsBigEndian { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil + } else { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil + } } func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) { @@ -86,35 +131,84 @@ func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { if (len(src) % 4) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) } - return append(dst[:0], unsafecast.BytesToInt32(src)...), nil + + if cpu.IsBigEndian { + srcLen := (len(src) / 4) + byteDec := make([]int32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])) + idx += 4 + } + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[int32](src)...), nil + } } func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { if (len(src) % 8) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) } - return append(dst[:0], unsafecast.BytesToInt64(src)...), nil + + if cpu.IsBigEndian { + srcLen := (len(src) / 8) + byteDec := make([]int64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])) + idx += 8 + } + + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[int64](src)...), nil + } } func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) { if (len(src) % 12) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src)) } - return append(dst[:0], deprecated.BytesToInt96(src)...), nil + return append(dst[:0], unsafecast.Slice[deprecated.Int96](src)...), nil } func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { if (len(src) % 4) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) } - return append(dst[:0], unsafecast.BytesToFloat32(src)...), nil + if cpu.IsBigEndian { + srcLen := (len(src) / 4) + byteDec := make([]float32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))) + idx += 4 + } + + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[float32](src)...), nil + } } func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { if (len(src) % 8) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) } - return append(dst[:0], unsafecast.BytesToFloat64(src)...), nil + if cpu.IsBigEndian { + srcLen := (len(src) / 8) + byteDec := make([]float64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))) + idx += 8 + } + + return append(dst[:0], (byteDec)...), nil + } else { + return append(dst[:0], unsafecast.Slice[float64](src)...), nil + } } func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go b/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go index 763172de5b3..8304afc0188 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/rle/dictionary.go @@ -31,9 +31,9 @@ func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, erro if len(src) == 0 { return dst[:0], nil } - buf := unsafecast.Int32ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, err := decodeInt32(buf[:0], src[1:], uint(src[0])) - return unsafecast.BytesToInt32(buf), e.wrap(err) + return unsafecast.Slice[int32](buf), e.wrap(err) } func (e *DictionaryEncoding) wrap(err error) error { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go b/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go index 4b63ed6d42b..82ddaa1b512 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go @@ -11,6 +11,8 @@ import ( "io" "unsafe" + "golang.org/x/sys/cpu" + "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" "github.com/parquet-go/parquet-go/internal/bitpack" @@ -83,9 +85,9 @@ func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) { } func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { - buf := unsafecast.Int32ToBytes(dst) + buf := unsafecast.Slice[byte](dst) buf, err := decodeInt32(buf[:0], src, uint(e.BitWidth)) - return unsafecast.BytesToInt32(buf), e.wrap(err) + return unsafecast.Slice[int32](buf), e.wrap(err) } func (e *Encoding) wrap(err error) error { @@ -151,7 +153,17 @@ func encodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) { } if len(src) >= 8 { - words := unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8) + words := unsafecast.Slice[uint64](src) + if cpu.IsBigEndian { + srcLen := (len(src) / 8) + idx := 0 + for k := range srcLen { + words[k] = binary.LittleEndian.Uint64((src)[idx:(8 + idx)]) + idx += 8 + } + } else { + words = unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8) + } for i := 0; i < len(words); { j := i @@ -196,14 +208,14 @@ func encodeInt32(dst []byte, src []int32, bitWidth uint) ([]byte, error) { return dst, errEncodeInvalidBitWidth("INT32", bitWidth) } if bitWidth == 0 { - if !isZero(unsafecast.Int32ToBytes(src)) { + if !isZero(unsafecast.Slice[byte](src)) { return dst, errEncodeInvalidBitWidth("INT32", bitWidth) } return appendUvarint(dst, uint64(len(src))<<1), nil } if len(src) >= 8 { - words := unsafe.Slice((*[8]int32)(unsafe.Pointer(&src[0])), len(src)/8) + words := unsafecast.Slice[[8]int32](src) for i := 0; i < len(words); { j := i @@ -373,7 +385,7 @@ func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) { in = buf } - out := unsafecast.BytesToInt32(dst[offset:]) + out := unsafecast.Slice[int32](dst[offset:]) bitpack.UnpackInt32(out, in, bitWidth) i += length } else { @@ -385,6 +397,13 @@ func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) { bits := [4]byte{} copy(bits[:], src[i:j]) + + //swap the bytes in the "bits" array to take care of big endian arch + if cpu.IsBigEndian { + for m, n := 0, 3; m < n; m, n = m+1, n-1 { + bits[m], bits[n] = bits[n], bits[m] + } + } dst = appendRepeat(dst, bits[:], count) i = j } @@ -500,7 +519,7 @@ func grow(buf []byte, size int) []byte { } func encodeInt32BitpackDefault(dst []byte, src [][8]int32, bitWidth uint) int { - bits := unsafe.Slice((*int32)(unsafe.Pointer(&src[0])), len(src)*8) + bits := unsafecast.Slice[int32](src) bitpack.PackInt32(dst, bits, bitWidth) return bitpack.ByteCount(uint(len(src)*8) * bitWidth) } diff --git a/vendor/github.com/segmentio/encoding/LICENSE b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/LICENSE similarity index 100% rename from vendor/github.com/segmentio/encoding/LICENSE rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/LICENSE diff --git a/vendor/github.com/segmentio/encoding/thrift/binary.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go similarity index 98% rename from vendor/github.com/segmentio/encoding/thrift/binary.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go index 18d95d9abf9..73f15b03afb 100644 --- a/vendor/github.com/segmentio/encoding/thrift/binary.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/binary.go @@ -7,6 +7,8 @@ import ( "fmt" "io" "math" + + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // BinaryProtocol is a Protocol implementation for the binary thrift protocol. @@ -96,7 +98,7 @@ func (r *binaryReader) ReadBytes() ([]byte, error) { func (r *binaryReader) ReadString() (string, error) { b, err := r.ReadBytes() - return unsafeBytesToString(b), err + return unsafecast.String(b), err } func (r *binaryReader) ReadLength() (int, error) { @@ -126,7 +128,7 @@ func (r *binaryReader) ReadMessage() (Message, error) { if err != nil { return m, dontExpectEOF(err) } - m.Name = unsafeBytesToString(s) + m.Name = unsafecast.String(s) t, err := r.ReadInt8() if err != nil { diff --git a/vendor/github.com/segmentio/encoding/thrift/compact.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go similarity index 98% rename from vendor/github.com/segmentio/encoding/thrift/compact.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go index 6a286572a73..7bca5771deb 100644 --- a/vendor/github.com/segmentio/encoding/thrift/compact.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/compact.go @@ -7,6 +7,8 @@ import ( "fmt" "io" "math" + + "github.com/parquet-go/parquet-go/internal/unsafecast" ) // CompactProtocol is a Protocol implementation for the compact thrift protocol. @@ -77,7 +79,7 @@ func (r *compactReader) ReadBytes() ([]byte, error) { func (r *compactReader) ReadString() (string, error) { b, err := r.ReadBytes() - return unsafeBytesToString(b), err + return unsafecast.String(b), err } func (r *compactReader) ReadLength() (int, error) { diff --git a/vendor/github.com/segmentio/encoding/thrift/debug.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/debug.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go diff --git a/vendor/github.com/segmentio/encoding/thrift/decode.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/decode.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go diff --git a/vendor/github.com/segmentio/encoding/thrift/encode.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/encode.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go diff --git a/vendor/github.com/segmentio/encoding/thrift/error.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/error.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/error.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/error.go diff --git a/vendor/github.com/segmentio/encoding/thrift/protocol.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/protocol.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/protocol.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/protocol.go diff --git a/vendor/github.com/segmentio/encoding/thrift/struct.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/struct.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/struct.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/struct.go diff --git a/vendor/github.com/segmentio/encoding/thrift/thrift.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/thrift.go similarity index 100% rename from vendor/github.com/segmentio/encoding/thrift/thrift.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/thrift.go diff --git a/vendor/github.com/segmentio/encoding/thrift/unsafe.go b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go similarity index 85% rename from vendor/github.com/segmentio/encoding/thrift/unsafe.go rename to vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go index 9572b40ef0a..b27c6489d8d 100644 --- a/vendor/github.com/segmentio/encoding/thrift/unsafe.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/thrift/unsafe.go @@ -18,7 +18,3 @@ func makeTypeID(t reflect.Type) typeID { ptr: (*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1], } } - -func unsafeBytesToString(b []byte) string { - return *(*string)(unsafe.Pointer(&b)) -} diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/values.go b/vendor/github.com/parquet-go/parquet-go/encoding/values.go index a53a7b9f896..41ab0a23e35 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/values.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/values.go @@ -82,27 +82,27 @@ func (v *Values) Boolean() []byte { func (v *Values) Int32() []int32 { v.assertKind(Int32) - return unsafecast.BytesToInt32(v.data) + return unsafecast.Slice[int32](v.data) } func (v *Values) Int64() []int64 { v.assertKind(Int64) - return unsafecast.BytesToInt64(v.data) + return unsafecast.Slice[int64](v.data) } func (v *Values) Int96() []deprecated.Int96 { v.assertKind(Int96) - return deprecated.BytesToInt96(v.data) + return unsafecast.Slice[deprecated.Int96](v.data) } func (v *Values) Float() []float32 { v.assertKind(Float) - return unsafecast.BytesToFloat32(v.data) + return unsafecast.Slice[float32](v.data) } func (v *Values) Double() []float64 { v.assertKind(Double) - return unsafecast.BytesToFloat64(v.data) + return unsafecast.Slice[float64](v.data) } func (v *Values) ByteArray() (data []byte, offsets []uint32) { @@ -117,123 +117,86 @@ func (v *Values) FixedLenByteArray() (data []byte, size int) { func (v *Values) Uint32() []uint32 { v.assertKind(Int32) - return unsafecast.BytesToUint32(v.data) + return unsafecast.Slice[uint32](v.data) } func (v *Values) Uint64() []uint64 { v.assertKind(Int64) - return unsafecast.BytesToUint64(v.data) + return unsafecast.Slice[uint64](v.data) } func (v *Values) Uint128() [][16]byte { v.assertKind(FixedLenByteArray) v.assertSize(16) - return unsafecast.BytesToUint128(v.data) + return unsafecast.Slice[[16]byte](v.data) +} + +func makeValues[T any](kind Kind, values []T) Values { + return Values{kind: kind, data: unsafecast.Slice[byte](values)} } func BooleanValues(values []byte) Values { - return Values{ - kind: Boolean, - data: values, - } + return makeValues(Boolean, values) } func Int32Values(values []int32) Values { - return Values{ - kind: Int32, - data: unsafecast.Int32ToBytes(values), - } + return makeValues(Int32, values) } func Int64Values(values []int64) Values { - return Values{ - kind: Int64, - data: unsafecast.Int64ToBytes(values), - } + return makeValues(Int64, values) } func Int96Values(values []deprecated.Int96) Values { - return Values{ - kind: Int96, - data: deprecated.Int96ToBytes(values), - } + return makeValues(Int96, values) } func FloatValues(values []float32) Values { - return Values{ - kind: Float, - data: unsafecast.Float32ToBytes(values), - } + return makeValues(Float, values) } func DoubleValues(values []float64) Values { - return Values{ - kind: Double, - data: unsafecast.Float64ToBytes(values), - } + return makeValues(Double, values) } func ByteArrayValues(values []byte, offsets []uint32) Values { - return Values{ - kind: ByteArray, - data: values, - offsets: offsets, - } + return Values{kind: ByteArray, data: values, offsets: offsets} } func FixedLenByteArrayValues(values []byte, size int) Values { - return Values{ - kind: FixedLenByteArray, - size: int32(size), - data: values, - } + return Values{kind: FixedLenByteArray, size: int32(size), data: values} } func Uint32Values(values []uint32) Values { - return Int32Values(unsafecast.Uint32ToInt32(values)) + return Int32Values(unsafecast.Slice[int32](values)) } func Uint64Values(values []uint64) Values { - return Int64Values(unsafecast.Uint64ToInt64(values)) + return Int64Values(unsafecast.Slice[int64](values)) } func Uint128Values(values [][16]byte) Values { - return FixedLenByteArrayValues(unsafecast.Uint128ToBytes(values), 16) + return FixedLenByteArrayValues(unsafecast.Slice[byte](values), 16) } func Int32ValuesFromBytes(values []byte) Values { - return Values{ - kind: Int32, - data: values, - } + return Values{kind: Int32, data: values} } func Int64ValuesFromBytes(values []byte) Values { - return Values{ - kind: Int64, - data: values, - } + return Values{kind: Int64, data: values} } func Int96ValuesFromBytes(values []byte) Values { - return Values{ - kind: Int96, - data: values, - } + return Values{kind: Int96, data: values} } func FloatValuesFromBytes(values []byte) Values { - return Values{ - kind: Float, - data: values, - } + return Values{kind: Float, data: values} } func DoubleValuesFromBytes(values []byte) Values { - return Values{ - kind: Double, - data: values, - } + return Values{kind: Double, data: values} } func EncodeBoolean(dst []byte, src Values, enc Encoding) ([]byte, error) { diff --git a/vendor/github.com/parquet-go/parquet-go/file.go b/vendor/github.com/parquet-go/parquet-go/file.go index 791eb51eece..384042b308c 100644 --- a/vendor/github.com/parquet-go/parquet-go/file.go +++ b/vendor/github.com/parquet-go/parquet-go/file.go @@ -9,9 +9,9 @@ import ( "sort" "strings" "sync" + "sync/atomic" - "github.com/segmentio/encoding/thrift" - + "github.com/parquet-go/parquet-go/encoding/thrift" "github.com/parquet-go/parquet-go/format" ) @@ -391,8 +391,8 @@ func (g *fileRowGroup) init(file *File, schema *Schema, columns []*Column, rowGr if file.hasIndexes() { j := (int(rowGroup.Ordinal) * len(columns)) + i - fileColumnChunks[i].columnIndex = &file.columnIndexes[j] - fileColumnChunks[i].offsetIndex = &file.offsetIndexes[j] + fileColumnChunks[i].columnIndex.Store(&file.columnIndexes[j]) + fileColumnChunks[i].offsetIndex.Store(&file.offsetIndexes[j]) } g.columns[i] = &fileColumnChunks[i] @@ -442,8 +442,8 @@ type fileColumnChunk struct { column *Column bloomFilter *bloomFilter rowGroup *format.RowGroup - columnIndex *format.ColumnIndex - offsetIndex *format.OffsetIndex + columnIndex atomic.Pointer[format.ColumnIndex] + offsetIndex atomic.Pointer[format.OffsetIndex] chunk *format.ColumnChunk } @@ -462,23 +462,25 @@ func (c *fileColumnChunk) Pages() Pages { } func (c *fileColumnChunk) ColumnIndex() (ColumnIndex, error) { - if err := c.readColumnIndex(); err != nil { + index, err := c.readColumnIndex() + if err != nil { return nil, err } - if c.columnIndex == nil || c.chunk.ColumnIndexOffset == 0 { + if index == nil || c.chunk.ColumnIndexOffset == 0 { return nil, ErrMissingColumnIndex } return fileColumnIndex{c}, nil } func (c *fileColumnChunk) OffsetIndex() (OffsetIndex, error) { - if err := c.readOffsetIndex(); err != nil { + index, err := c.readOffsetIndex() + if err != nil { return nil, err } - if c.offsetIndex == nil || c.chunk.OffsetIndexOffset == 0 { + if index == nil || c.chunk.OffsetIndexOffset == 0 { return nil, ErrMissingOffsetIndex } - return (*fileOffsetIndex)(c.offsetIndex), nil + return (*fileOffsetIndex)(index), nil } func (c *fileColumnChunk) BloomFilter() BloomFilter { @@ -492,48 +494,59 @@ func (c *fileColumnChunk) NumValues() int64 { return c.chunk.MetaData.NumValues } -func (c *fileColumnChunk) readColumnIndex() error { - if c.columnIndex != nil { - return nil +func (c *fileColumnChunk) readColumnIndex() (*format.ColumnIndex, error) { + if index := c.columnIndex.Load(); index != nil { + return index, nil } chunkMeta := c.file.metadata.RowGroups[c.rowGroup.Ordinal].Columns[c.Column()] offset, length := chunkMeta.ColumnIndexOffset, chunkMeta.ColumnIndexLength if offset == 0 { - return nil + return nil, nil } indexData := make([]byte, int(length)) var columnIndex format.ColumnIndex if _, err := readAt(c.file.reader, indexData, offset); err != nil { - return fmt.Errorf("read %d bytes column index at offset %d: %w", length, offset, err) + return nil, fmt.Errorf("read %d bytes column index at offset %d: %w", length, offset, err) } if err := thrift.Unmarshal(&c.file.protocol, indexData, &columnIndex); err != nil { - return fmt.Errorf("decode column index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) + return nil, fmt.Errorf("decode column index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) } - c.columnIndex = &columnIndex - return nil + index := &columnIndex + // We do a CAS (and Load on CAS failure) instead of a simple Store for + // the nice property that concurrent calling goroutines will only ever + // observe a single pointer value for the result. + if !c.columnIndex.CompareAndSwap(nil, index) { + // another goroutine populated it since we last read the pointer + return c.columnIndex.Load(), nil + } + return index, nil } -func (c *fileColumnChunk) readOffsetIndex() error { - if c.offsetIndex != nil { - return nil +func (c *fileColumnChunk) readOffsetIndex() (*format.OffsetIndex, error) { + if index := c.offsetIndex.Load(); index != nil { + return index, nil } chunkMeta := c.file.metadata.RowGroups[c.rowGroup.Ordinal].Columns[c.Column()] offset, length := chunkMeta.OffsetIndexOffset, chunkMeta.OffsetIndexLength if offset == 0 { - return nil + return nil, nil } indexData := make([]byte, int(length)) var offsetIndex format.OffsetIndex if _, err := readAt(c.file.reader, indexData, offset); err != nil { - return fmt.Errorf("read %d bytes offset index at offset %d: %w", length, offset, err) + return nil, fmt.Errorf("read %d bytes offset index at offset %d: %w", length, offset, err) } if err := thrift.Unmarshal(&c.file.protocol, indexData, &offsetIndex); err != nil { - return fmt.Errorf("decode offset index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) + return nil, fmt.Errorf("decode offset index: rowGroup=%d columnChunk=%d/%d: %w", c.rowGroup.Ordinal, c.Column(), len(c.rowGroup.Columns), err) } - c.offsetIndex = &offsetIndex - return nil + index := &offsetIndex + if !c.offsetIndex.CompareAndSwap(nil, index) { + // another goroutine populated it since we last read the pointer + return c.offsetIndex.Load(), nil + } + return index, nil } type filePages struct { @@ -745,7 +758,7 @@ func (f *filePages) SeekToRow(rowIndex int64) (err error) { if f.chunk == nil { return io.ErrClosedPipe } - if f.chunk.offsetIndex == nil { + if index := f.chunk.offsetIndex.Load(); index == nil { _, err = f.section.Seek(f.dataOffset-f.baseOffset, io.SeekStart) f.skip = rowIndex f.index = 0 @@ -753,7 +766,7 @@ func (f *filePages) SeekToRow(rowIndex int64) (err error) { f.index = 1 } } else { - pages := f.chunk.offsetIndex.PageLocations + pages := index.PageLocations index := sort.Search(len(pages), func(i int) bool { return pages[i].FirstRowIndex > rowIndex }) - 1 diff --git a/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go b/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go index a95e9e96be9..0a1686f17b6 100644 --- a/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go +++ b/vendor/github.com/parquet-go/parquet-go/hashprobe/hashprobe.go @@ -98,7 +98,7 @@ func (t *Int32Table) Len() int { return t.len } func (t *Int32Table) Cap() int { return t.size() } func (t *Int32Table) Probe(keys, values []int32) int { - return t.probe(unsafecast.Int32ToUint32(keys), values) + return t.probe(unsafecast.Slice[uint32](keys), values) } func (t *Int32Table) ProbeArray(keys sparse.Int32Array, values []int32) int { @@ -118,7 +118,7 @@ func (t *Float32Table) Len() int { return t.len } func (t *Float32Table) Cap() int { return t.size() } func (t *Float32Table) Probe(keys []float32, values []int32) int { - return t.probe(unsafecast.Float32ToUint32(keys), values) + return t.probe(unsafecast.Slice[uint32](keys), values) } func (t *Float32Table) ProbeArray(keys sparse.Float32Array, values []int32) int { @@ -342,7 +342,7 @@ func (t *Int64Table) Len() int { return t.len } func (t *Int64Table) Cap() int { return t.size() } func (t *Int64Table) Probe(keys []int64, values []int32) int { - return t.probe(unsafecast.Int64ToUint64(keys), values) + return t.probe(unsafecast.Slice[uint64](keys), values) } func (t *Int64Table) ProbeArray(keys sparse.Int64Array, values []int32) int { @@ -362,7 +362,7 @@ func (t *Float64Table) Len() int { return t.len } func (t *Float64Table) Cap() int { return t.size() } func (t *Float64Table) Probe(keys []float64, values []int32) int { - return t.probe(unsafecast.Float64ToUint64(keys), values) + return t.probe(unsafecast.Slice[uint64](keys), values) } func (t *Float64Table) ProbeArray(keys sparse.Float64Array, values []int32) int { @@ -639,7 +639,7 @@ func (t *table128) init(cap int, maxLoad float64) { func (t *table128) kv() (keys [][16]byte, values []int32) { i := t.cap * 16 - return unsafecast.BytesToUint128(t.table[:i]), unsafecast.BytesToInt32(t.table[i:]) + return unsafecast.Slice[[16]byte](t.table[:i]), unsafecast.Slice[int32](t.table[i:]) } func (t *table128) grow(totalValues int) { @@ -753,8 +753,8 @@ func (t *table128) probeArray(keys sparse.Uint128Array, values []int32) int { func multiProbe128Default(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int { modulo := uintptr(tableCap) - 1 offset := uintptr(tableCap) * 16 - tableKeys := unsafecast.BytesToUint128(table[:offset]) - tableValues := unsafecast.BytesToInt32(table[offset:]) + tableKeys := unsafecast.Slice[[16]byte](table[:offset]) + tableValues := unsafecast.Slice[int32](table[offset:]) for i, hash := range hashes { key := keys.Index(i) diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go index fd92c18380d..f3932223919 100644 --- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_amd64.go @@ -29,7 +29,7 @@ func unpackInt32(dst []int32, src []byte, bitWidth uint) { case hasAVX2 && bitWidth <= 31: unpackInt32x27to31bitsAVX2(dst, src, bitWidth) case bitWidth == 32: - copy(dst, unsafecast.BytesToInt32(src)) + copy(dst, unsafecast.Slice[int32](src)) default: unpackInt32Default(dst, src, bitWidth) } diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go index b7d46ba9536..cddbd773a51 100644 --- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go @@ -3,11 +3,27 @@ package bitpack import ( + "encoding/binary" + + "golang.org/x/sys/cpu" + "github.com/parquet-go/parquet-go/internal/unsafecast" ) func unpackInt32(dst []int32, src []byte, bitWidth uint) { - bits := unsafecast.BytesToUint32(src) + var bits []uint32 + if cpu.IsBigEndian { + srcLen := (len(src) / 4) + bits = make([]uint32, srcLen) + idx := 0 + for k := range srcLen { + bits[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)]) + idx += 4 + } + } else { + bits = unsafecast.Slice[uint32](src) + } + bitMask := uint32(1<= 0 { + if i := bytes.IndexByte(unsafecast.Slice[byte](data), 0); i >= 0 { return i } return len(data) } func streakOfFalse(data []bool) int { - if i := bytes.IndexByte(unsafecast.BoolToBytes(data), 1); i >= 0 { + if i := bytes.IndexByte(unsafecast.Slice[byte](data), 1); i >= 0 { return i } return len(data) diff --git a/vendor/github.com/parquet-go/parquet-go/order_purego.go b/vendor/github.com/parquet-go/parquet-go/order_purego.go index 2011455e152..44c4d7905e3 100644 --- a/vendor/github.com/parquet-go/parquet-go/order_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/order_purego.go @@ -2,174 +2,28 @@ package parquet -// ----------------------------------------------------------------------------- -// TODO: use generics versions of the these functions to reduce the amount of -// code to maintain when we drop compatilibty with Go version older than 1.18. -// ----------------------------------------------------------------------------- +import "cmp" -func orderOfInt32(data []int32) int { - if len(data) > 1 { - if int32AreInAscendingOrder(data) { - return +1 - } - if int32AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfInt64(data []int64) int { - if len(data) > 1 { - if int64AreInAscendingOrder(data) { - return +1 - } - if int64AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfUint32(data []uint32) int { - if len(data) > 1 { - if uint32AreInAscendingOrder(data) { - return +1 - } - if uint32AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfUint64(data []uint64) int { - if len(data) > 1 { - if uint64AreInAscendingOrder(data) { - return +1 - } - if uint64AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} - -func orderOfFloat32(data []float32) int { - if len(data) > 1 { - if float32AreInAscendingOrder(data) { - return +1 - } - if float32AreInDescendingOrder(data) { - return -1 - } - } - return 0 -} +func orderOfInt32(data []int32) int { return orderOf(data) } +func orderOfInt64(data []int64) int { return orderOf(data) } +func orderOfUint32(data []uint32) int { return orderOf(data) } +func orderOfUint64(data []uint64) int { return orderOf(data) } +func orderOfFloat32(data []float32) int { return orderOf(data) } +func orderOfFloat64(data []float64) int { return orderOf(data) } -func orderOfFloat64(data []float64) int { +func orderOf[T cmp.Ordered](data []T) int { if len(data) > 1 { - if float64AreInAscendingOrder(data) { + if orderIsAscending(data) { return +1 } - if float64AreInDescendingOrder(data) { + if orderIsDescending(data) { return -1 } } return 0 } -func int32AreInAscendingOrder(data []int32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func int32AreInDescendingOrder(data []int32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func int64AreInAscendingOrder(data []int64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func int64AreInDescendingOrder(data []int64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func uint32AreInAscendingOrder(data []uint32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func uint32AreInDescendingOrder(data []uint32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func uint64AreInAscendingOrder(data []uint64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func uint64AreInDescendingOrder(data []uint64) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func float32AreInAscendingOrder(data []float32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] > data[i] { - return false - } - } - return true -} - -func float32AreInDescendingOrder(data []float32) bool { - for i := len(data) - 1; i > 0; i-- { - if data[i-1] < data[i] { - return false - } - } - return true -} - -func float64AreInAscendingOrder(data []float64) bool { +func orderIsAscending[T cmp.Ordered](data []T) bool { for i := len(data) - 1; i > 0; i-- { if data[i-1] > data[i] { return false @@ -178,7 +32,7 @@ func float64AreInAscendingOrder(data []float64) bool { return true } -func float64AreInDescendingOrder(data []float64) bool { +func orderIsDescending[T cmp.Ordered](data []T) bool { for i := len(data) - 1; i > 0; i-- { if data[i-1] < data[i] { return false diff --git a/vendor/github.com/parquet-go/parquet-go/page_values.go b/vendor/github.com/parquet-go/parquet-go/page_values.go index 964220b4ebb..ecbdffb0c8d 100644 --- a/vendor/github.com/parquet-go/parquet-go/page_values.go +++ b/vendor/github.com/parquet-go/parquet-go/page_values.go @@ -149,7 +149,7 @@ type int32PageValues struct { } func (r *int32PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadInt32s(unsafecast.BytesToInt32(b)) + n, err = r.ReadInt32s(unsafecast.Slice[int32](b)) return 4 * n, err } @@ -180,7 +180,7 @@ type int64PageValues struct { } func (r *int64PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadInt64s(unsafecast.BytesToInt64(b)) + n, err = r.ReadInt64s(unsafecast.Slice[int64](b)) return 8 * n, err } @@ -211,7 +211,7 @@ type int96PageValues struct { } func (r *int96PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadInt96s(deprecated.BytesToInt96(b)) + n, err = r.ReadInt96s(unsafecast.Slice[deprecated.Int96](b)) return 12 * n, err } @@ -242,7 +242,7 @@ type floatPageValues struct { } func (r *floatPageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadFloats(unsafecast.BytesToFloat32(b)) + n, err = r.ReadFloats(unsafecast.Slice[float32](b)) return 4 * n, err } @@ -273,7 +273,7 @@ type doublePageValues struct { } func (r *doublePageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b)) + n, err = r.ReadDoubles(unsafecast.Slice[float64](b)) return 8 * n, err } @@ -395,7 +395,7 @@ type uint32PageValues struct { } func (r *uint32PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadUint32s(unsafecast.BytesToUint32(b)) + n, err = r.ReadUint32s(unsafecast.Slice[uint32](b)) return 4 * n, err } @@ -426,7 +426,7 @@ type uint64PageValues struct { } func (r *uint64PageValues) Read(b []byte) (n int, err error) { - n, err = r.ReadUint64s(unsafecast.BytesToUint64(b)) + n, err = r.ReadUint64s(unsafecast.Slice[uint64](b)) return 8 * n, err } diff --git a/vendor/github.com/parquet-go/parquet-go/sparse/array.go b/vendor/github.com/parquet-go/parquet-go/sparse/array.go index 94285becb8b..fecfb4dc4d7 100644 --- a/vendor/github.com/parquet-go/parquet-go/sparse/array.go +++ b/vendor/github.com/parquet-go/parquet-go/sparse/array.go @@ -8,7 +8,7 @@ import ( type Array struct{ array } func UnsafeArray(base unsafe.Pointer, length int, offset uintptr) Array { - return Array{makeArray(base, uintptr(length), offset)} + return Array{unsafeArray(base, length, offset)} } func (a Array) Len() int { return int(a.len) } @@ -36,8 +36,17 @@ type array struct { off uintptr } -func makeArray(base unsafe.Pointer, length, offset uintptr) array { - return array{ptr: base, len: length, off: offset} +func makeArray[T any](base []T) array { + var z T + return array{ + ptr: unsafe.Pointer(unsafe.SliceData(base)), + len: uintptr(len(base)), + off: unsafe.Sizeof(z), + } +} + +func unsafeArray(base unsafe.Pointer, length int, offset uintptr) array { + return array{ptr: base, len: uintptr(length), off: offset} } func (a array) index(i int) unsafe.Pointer { @@ -72,11 +81,11 @@ func (a array) offset(off uintptr) array { type BoolArray struct{ array } func MakeBoolArray(values []bool) BoolArray { - return BoolArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 1)} + return BoolArray{makeArray(values)} } func UnsafeBoolArray(base unsafe.Pointer, length int, offset uintptr) BoolArray { - return BoolArray{makeArray(base, uintptr(length), offset)} + return BoolArray{unsafeArray(base, length, offset)} } func (a BoolArray) Len() int { return int(a.len) } @@ -88,11 +97,11 @@ func (a BoolArray) UnsafeArray() Array { return Array{a.array} } type Int8Array struct{ array } func MakeInt8Array(values []int8) Int8Array { - return Int8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Int8Array{makeArray(values)} } func UnsafeInt8Array(base unsafe.Pointer, length int, offset uintptr) Int8Array { - return Int8Array{makeArray(base, uintptr(length), offset)} + return Int8Array{unsafeArray(base, length, offset)} } func (a Int8Array) Len() int { return int(a.len) } @@ -104,11 +113,11 @@ func (a Int8Array) UnsafeArray() Array { return Array{a.array} } type Int16Array struct{ array } func MakeInt16Array(values []int16) Int16Array { - return Int16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Int16Array{makeArray(values)} } func UnsafeInt16Array(base unsafe.Pointer, length int, offset uintptr) Int16Array { - return Int16Array{makeArray(base, uintptr(length), offset)} + return Int16Array{unsafeArray(base, length, offset)} } func (a Int16Array) Len() int { return int(a.len) } @@ -122,11 +131,11 @@ func (a Int16Array) UnsafeArray() Array { return Array{a.array} } type Int32Array struct{ array } func MakeInt32Array(values []int32) Int32Array { - return Int32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)} + return Int32Array{makeArray(values)} } func UnsafeInt32Array(base unsafe.Pointer, length int, offset uintptr) Int32Array { - return Int32Array{makeArray(base, uintptr(length), offset)} + return Int32Array{unsafeArray(base, length, offset)} } func (a Int32Array) Len() int { return int(a.len) } @@ -142,11 +151,11 @@ func (a Int32Array) UnsafeArray() Array { return Array{a.array} } type Int64Array struct{ array } func MakeInt64Array(values []int64) Int64Array { - return Int64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Int64Array{makeArray(values)} } func UnsafeInt64Array(base unsafe.Pointer, length int, offset uintptr) Int64Array { - return Int64Array{makeArray(base, uintptr(length), offset)} + return Int64Array{unsafeArray(base, length, offset)} } func (a Int64Array) Len() int { return int(a.len) } @@ -164,11 +173,11 @@ func (a Int64Array) UnsafeArray() Array { return Array{a.array} } type Float32Array struct{ array } func MakeFloat32Array(values []float32) Float32Array { - return Float32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)} + return Float32Array{makeArray(values)} } func UnsafeFloat32Array(base unsafe.Pointer, length int, offset uintptr) Float32Array { - return Float32Array{makeArray(base, uintptr(length), offset)} + return Float32Array{unsafeArray(base, length, offset)} } func (a Float32Array) Len() int { return int(a.len) } @@ -181,11 +190,11 @@ func (a Float32Array) UnsafeArray() Array { return Array{a.array} } type Float64Array struct{ array } func MakeFloat64Array(values []float64) Float64Array { - return Float64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Float64Array{makeArray(values)} } func UnsafeFloat64Array(base unsafe.Pointer, length int, offset uintptr) Float64Array { - return Float64Array{makeArray(base, uintptr(length), offset)} + return Float64Array{unsafeArray(base, length, offset)} } func (a Float64Array) Len() int { return int(a.len) } @@ -197,11 +206,11 @@ func (a Float64Array) UnsafeArray() Array { return Array{a.array} } type Uint8Array struct{ array } func MakeUint8Array(values []uint8) Uint8Array { - return Uint8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Uint8Array{makeArray(values)} } func UnsafeUint8Array(base unsafe.Pointer, length int, offset uintptr) Uint8Array { - return Uint8Array{makeArray(base, uintptr(length), offset)} + return Uint8Array{unsafeArray(base, length, offset)} } func (a Uint8Array) Len() int { return int(a.len) } @@ -212,11 +221,11 @@ func (a Uint8Array) UnsafeArray() Array { return Array{a.array} } type Uint16Array struct{ array } func MakeUint16Array(values []uint16) Uint16Array { - return Uint16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Uint16Array{makeArray(values)} } func UnsafeUint16Array(base unsafe.Pointer, length int, offset uintptr) Uint16Array { - return Uint16Array{makeArray(base, uintptr(length), offset)} + return Uint16Array{unsafeArray(base, length, offset)} } func (a Uint16Array) Len() int { return int(a.len) } @@ -228,11 +237,11 @@ func (a Uint16Array) UnsafeArray() Array { return Array{a.array} } type Uint32Array struct{ array } func MakeUint32Array(values []uint32) Uint32Array { - return Uint32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)} + return Uint32Array{makeArray(values)} } func UnsafeUint32Array(base unsafe.Pointer, length int, offset uintptr) Uint32Array { - return Uint32Array{makeArray(base, uintptr(length), offset)} + return Uint32Array{unsafeArray(base, length, offset)} } func (a Uint32Array) Len() int { return int(a.len) } @@ -245,11 +254,11 @@ func (a Uint32Array) UnsafeArray() Array { return Array{a.array} } type Uint64Array struct{ array } func MakeUint64Array(values []uint64) Uint64Array { - return Uint64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)} + return Uint64Array{makeArray(values)} } func UnsafeUint64Array(base unsafe.Pointer, length int, offset uintptr) Uint64Array { - return Uint64Array{makeArray(base, uintptr(length), offset)} + return Uint64Array{unsafeArray(base, length, offset)} } func (a Uint64Array) Len() int { return int(a.len) } @@ -263,11 +272,11 @@ func (a Uint64Array) UnsafeArray() Array { return Array{a.array} } type Uint128Array struct{ array } func MakeUint128Array(values [][16]byte) Uint128Array { - return Uint128Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 16)} + return Uint128Array{makeArray(values)} } func UnsafeUint128Array(base unsafe.Pointer, length int, offset uintptr) Uint128Array { - return Uint128Array{makeArray(base, uintptr(length), offset)} + return Uint128Array{unsafeArray(base, length, offset)} } func (a Uint128Array) Len() int { return int(a.len) } @@ -283,11 +292,11 @@ type StringArray struct{ array } func MakeStringArray(values []string) StringArray { const sizeOfString = unsafe.Sizeof("") - return StringArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfString)} + return StringArray{makeArray(values)} } func UnsafeStringArray(base unsafe.Pointer, length int, offset uintptr) StringArray { - return StringArray{makeArray(base, uintptr(length), offset)} + return StringArray{unsafeArray(base, length, offset)} } func (a StringArray) Len() int { return int(a.len) } @@ -298,12 +307,11 @@ func (a StringArray) UnsafeArray() Array { return Array{a.array} } type TimeArray struct{ array } func MakeTimeArray(values []time.Time) TimeArray { - const sizeOfTime = unsafe.Sizeof(time.Time{}) - return TimeArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfTime)} + return TimeArray{makeArray(values)} } func UnsafeTimeArray(base unsafe.Pointer, length int, offset uintptr) TimeArray { - return TimeArray{makeArray(base, uintptr(length), offset)} + return TimeArray{unsafeArray(base, length, offset)} } func (a TimeArray) Len() int { return int(a.len) } diff --git a/vendor/github.com/parquet-go/parquet-go/sparse/gather.go b/vendor/github.com/parquet-go/parquet-go/sparse/gather.go index 746a0930d1c..d7d72d091bf 100644 --- a/vendor/github.com/parquet-go/parquet-go/sparse/gather.go +++ b/vendor/github.com/parquet-go/parquet-go/sparse/gather.go @@ -1,21 +1,21 @@ package sparse -import "unsafe" +import "github.com/parquet-go/parquet-go/internal/unsafecast" func GatherInt32(dst []int32, src Int32Array) int { - return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array()) + return GatherUint32(unsafecast.Slice[uint32](dst), src.Uint32Array()) } func GatherInt64(dst []int64, src Int64Array) int { - return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array()) + return GatherUint64(unsafecast.Slice[uint64](dst), src.Uint64Array()) } func GatherFloat32(dst []float32, src Float32Array) int { - return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array()) + return GatherUint32(unsafecast.Slice[uint32](dst), src.Uint32Array()) } func GatherFloat64(dst []float64, src Float64Array) int { - return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array()) + return GatherUint64(unsafecast.Slice[uint64](dst), src.Uint64Array()) } func GatherBits(dst []byte, src Uint8Array) int { return gatherBits(dst, src) } diff --git a/vendor/github.com/parquet-go/parquet-go/type.go b/vendor/github.com/parquet-go/parquet-go/type.go index 59be12ab3a6..f5690a22e2b 100644 --- a/vendor/github.com/parquet-go/parquet-go/type.go +++ b/vendor/github.com/parquet-go/parquet-go/type.go @@ -12,7 +12,6 @@ import ( "github.com/parquet-go/parquet-go/deprecated" "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" - "github.com/parquet-go/parquet-go/internal/unsafecast" ) // Kind is an enumeration type representing the physical types supported by the @@ -901,7 +900,7 @@ func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error { // overhead we instead convert the reflect.Value holding the // destination array into a byte slice which allows us to use // a more efficient call to copy. - d := unsafe.Slice((*byte)(unsafecast.PointerOfValue(dst)), len(v)) + d := unsafe.Slice((*byte)(reflectValueData(dst)), len(v)) copy(d, v) return nil } @@ -915,6 +914,10 @@ func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error { return nil } +func reflectValueData(v reflect.Value) unsafe.Pointer { + return (*[2]unsafe.Pointer)(unsafe.Pointer(&v))[1] +} + func (t fixedLenByteArrayType) ConvertValue(val Value, typ Type) (Value, error) { switch typ.(type) { case *stringType: diff --git a/vendor/github.com/parquet-go/parquet-go/value.go b/vendor/github.com/parquet-go/parquet-go/value.go index a48f4344bc7..a85448742fd 100644 --- a/vendor/github.com/parquet-go/parquet-go/value.go +++ b/vendor/github.com/parquet-go/parquet-go/value.go @@ -14,7 +14,6 @@ import ( "github.com/google/uuid" "github.com/parquet-go/parquet-go/deprecated" "github.com/parquet-go/parquet-go/format" - "github.com/parquet-go/parquet-go/internal/unsafecast" ) const ( @@ -419,11 +418,11 @@ func makeValueDouble(value float64) Value { } func makeValueBytes(kind Kind, value []byte) Value { - return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value)) + return makeValueByteArray(kind, unsafe.SliceData(value), len(value)) } func makeValueString(kind Kind, value string) Value { - return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value)) + return makeValueByteArray(kind, unsafe.StringData(value), len(value)) } func makeValueFixedLenByteArray(v reflect.Value) Value { @@ -463,8 +462,8 @@ func (v *Value) float() float32 { return math.Float32frombits(uint32(v. func (v *Value) double() float64 { return math.Float64frombits(uint64(v.u64)) } func (v *Value) uint32() uint32 { return uint32(v.u64) } func (v *Value) uint64() uint64 { return v.u64 } -func (v *Value) byteArray() []byte { return unsafecast.Bytes(v.ptr, int(v.u64)) } -func (v *Value) string() string { return unsafecast.BytesToString(v.byteArray()) } +func (v *Value) byteArray() []byte { return unsafe.Slice(v.ptr, v.u64) } +func (v *Value) string() string { return unsafe.String(v.ptr, v.u64) } func (v *Value) be128() *[16]byte { return (*[16]byte)(unsafe.Pointer(v.ptr)) } func (v *Value) column() int { return int(^v.columnIndex) } @@ -516,14 +515,14 @@ func (v Value) convertToDouble(x float64) Value { func (v Value) convertToByteArray(x []byte) Value { v.kind = ^int8(ByteArray) - v.ptr = unsafecast.AddressOfBytes(x) + v.ptr = unsafe.SliceData(x) v.u64 = uint64(len(x)) return v } func (v Value) convertToFixedLenByteArray(x []byte) Value { v.kind = ^int8(FixedLenByteArray) - v.ptr = unsafecast.AddressOfBytes(x) + v.ptr = unsafe.SliceData(x) v.u64 = uint64(len(x)) return v } @@ -787,7 +786,7 @@ func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value { func (v Value) Clone() Value { switch k := v.Kind(); k { case ByteArray, FixedLenByteArray: - v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray())) + v.ptr = unsafe.SliceData(copyBytes(v.byteArray())) } return v } diff --git a/vendor/github.com/parquet-go/parquet-go/writer.go b/vendor/github.com/parquet-go/parquet-go/writer.go index c02f4e29598..8d9e44c7ee2 100644 --- a/vendor/github.com/parquet-go/parquet-go/writer.go +++ b/vendor/github.com/parquet-go/parquet-go/writer.go @@ -17,8 +17,8 @@ import ( "github.com/parquet-go/parquet-go/compress" "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/encoding/plain" + "github.com/parquet-go/parquet-go/encoding/thrift" "github.com/parquet-go/parquet-go/format" - "github.com/segmentio/encoding/thrift" ) const ( diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE index 6a66aea5eaf..2a7cf70da6e 100644 --- a/vendor/golang.org/x/sys/LICENSE +++ b/vendor/golang.org/x/sys/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index 8fa707aa4ba..02609d5b21d 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -105,6 +105,8 @@ var ARM64 struct { HasSVE bool // Scalable Vector Extensions HasSVE2 bool // Scalable Vector Extensions 2 HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 + HasDIT bool // Data Independent Timing support + HasI8MM bool // Advanced SIMD Int8 matrix multiplication instructions _ CacheLinePad } @@ -199,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go index 0e27a21e1f8..af2aa99f9f0 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go @@ -38,6 +38,8 @@ func initOptions() { {Name: "dcpop", Feature: &ARM64.HasDCPOP}, {Name: "asimddp", Feature: &ARM64.HasASIMDDP}, {Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM}, + {Name: "dit", Feature: &ARM64.HasDIT}, + {Name: "i8mm", Feature: &ARM64.HasI8MM}, } } @@ -145,6 +147,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { ARM64.HasLRCPC = true } + switch extractBits(isar1, 52, 55) { + case 1: + ARM64.HasI8MM = true + } + // ID_AA64PFR0_EL1 switch extractBits(pfr0, 16, 19) { case 0: @@ -168,6 +175,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { parseARM64SVERegister(getzfr0()) } + + switch extractBits(pfr0, 48, 51) { + case 1: + ARM64.HasDIT = true + } } func parseARM64SVERegister(zfr0 uint64) { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go index 3d386d0fc21..08f35ea1773 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -35,8 +35,10 @@ const ( hwcap_SHA512 = 1 << 21 hwcap_SVE = 1 << 22 hwcap_ASIMDFHM = 1 << 23 + hwcap_DIT = 1 << 24 hwcap2_SVE2 = 1 << 1 + hwcap2_I8MM = 1 << 13 ) // linuxKernelCanEmulateCPUID reports whether we're running @@ -106,9 +108,12 @@ func doinit() { ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) ARM64.HasSVE = isSet(hwCap, hwcap_SVE) ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) + ARM64.HasDIT = isSet(hwCap, hwcap_DIT) + // HWCAP2 feature bits ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) + ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM) } func isSet(hwc uint, value uint) bool { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e733557..7d902b6847b 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 00000000000..cb4a0c57280 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c004b..aca3199c911 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md index 7d3c060e122..6e08a76a716 100644 --- a/vendor/golang.org/x/sys/unix/README.md +++ b/vendor/golang.org/x/sys/unix/README.md @@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these into a common file for each OS. The merge is performed in the following steps: -1. Construct the set of common code that is idential in all architecture-specific files. +1. Construct the set of common code that is identical in all architecture-specific files. 2. Write this common code to the merged file. 3. Remove the common code from all architecture-specific files. diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index 4ed2e488b61..ac54ecaba0a 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -58,6 +58,7 @@ includes_Darwin=' #define _DARWIN_USE_64_BIT_INODE #define __APPLE_USE_RFC_3542 #include +#include #include #include #include @@ -551,6 +552,7 @@ ccflags="$@" $2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ && $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ || $2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ || + $2 ~ /^(CONNECT|SAE)_/ || $2 ~ /^FIORDCHK$/ || $2 ~ /^SIOC/ || $2 ~ /^TIOC/ || @@ -654,7 +656,7 @@ errors=$( signals=$( echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort ) @@ -664,7 +666,7 @@ echo '#include ' | $CC -x c - -E -dM $ccflags | sort >_error.grep echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort >_signal.grep echo '// mkerrors.sh' "$@" diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index 67ce6cef2d5..6f15ba1eaff 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -360,7 +360,7 @@ func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, var status _C_int var r Pid_t err = ERESTART - // AIX wait4 may return with ERESTART errno, while the processus is still + // AIX wait4 may return with ERESTART errno, while the process is still // active. for err == ERESTART { r, err = wait4(Pid_t(pid), &status, options, rusage) diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 4cc7b005967..099867deede 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -402,6 +402,18 @@ func IoctlSetIfreqMTU(fd int, ifreq *IfreqMTU) error { return ioctlPtr(fd, SIOCSIFMTU, unsafe.Pointer(ifreq)) } +//sys renamexNp(from string, to string, flag uint32) (err error) + +func RenamexNp(from string, to string, flag uint32) (err error) { + return renamexNp(from, to, flag) +} + +//sys renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) + +func RenameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + return renameatxNp(fromfd, from, tofd, to, flag) +} + //sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS_SYSCTL func Uname(uname *Utsname) error { @@ -554,6 +566,43 @@ func PthreadFchdir(fd int) (err error) { return pthread_fchdir_np(fd) } +// Connectx calls connectx(2) to initiate a connection on a socket. +// +// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument. +// +// - srcIf is the optional source interface index. 0 means unspecified. +// - srcAddr is the optional source address. nil means unspecified. +// - dstAddr is the destination address. +// +// On success, Connectx returns the number of bytes enqueued for transmission. +func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) { + endpoints := SaEndpoints{ + Srcif: srcIf, + } + + if srcAddr != nil { + addrp, addrlen, err := srcAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Srcaddr = (*RawSockaddr)(addrp) + endpoints.Srcaddrlen = uint32(addrlen) + } + + if dstAddr != nil { + addrp, addrlen, err := dstAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Dstaddr = (*RawSockaddr)(addrp) + endpoints.Dstaddrlen = uint32(addrlen) + } + + err = connectx(fd, &endpoints, associd, flags, iov, &n, connid) + return +} + +//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index ba46651f8e3..a6a2d2fc2b9 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -11,6 +11,7 @@ package unix int ioctl(int, unsigned long int, uintptr_t); */ import "C" +import "unsafe" func ioctl(fd int, req uint, arg uintptr) (err error) { r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg)) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 5682e2628ad..f08abd434ff 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1295,6 +1295,48 @@ func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) { return &value, err } +// GetsockoptTCPCCVegasInfo returns algorithm specific congestion control information for a socket using the "vegas" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCVegasInfo(fd, level, opt int) (*TCPVegasInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPVegasInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCDCTCPInfo returns algorithm specific congestion control information for a socket using the "dctp" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCDCTCPInfo(fd, level, opt int) (*TCPDCTCPInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPDCTCPInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCBBRInfo returns algorithm specific congestion control information for a socket using the "bbr" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCBBRInfo(fd, level, opt int) (*TCPBBRInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPBBRInfo)(unsafe.Pointer(&value[0])) + return out, err +} + // GetsockoptString returns the string value of the socket option opt for the // socket associated with fd at the given socket level. func GetsockoptString(fd, level, opt int) (string, error) { @@ -1959,7 +2001,26 @@ func Getpgrp() (pid int) { //sysnb Getpid() (pid int) //sysnb Getppid() (ppid int) //sys Getpriority(which int, who int) (prio int, err error) -//sys Getrandom(buf []byte, flags int) (n int, err error) + +func Getrandom(buf []byte, flags int) (n int, err error) { + vdsoRet, supported := vgetrandom(buf, uint32(flags)) + if supported { + if vdsoRet < 0 { + return 0, errnoErr(syscall.Errno(-vdsoRet)) + } + return vdsoRet, nil + } + var p *byte + if len(buf) > 0 { + p = &buf[0] + } + r, _, e := Syscall(SYS_GETRANDOM, uintptr(unsafe.Pointer(p)), uintptr(len(buf)), uintptr(flags)) + if e != 0 { + return 0, errnoErr(e) + } + return int(r), nil +} + //sysnb Getrusage(who int, rusage *Rusage) (err error) //sysnb Getsid(pid int) (sid int, err error) //sysnb Gettid() (tid int) @@ -2592,3 +2653,4 @@ func SchedGetAttr(pid int, flags uint) (*SchedAttr, error) { } //sys Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) (err error) +//sys Mseal(b []byte, flags uint) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go index cf2ee6c75ef..745e5c7e6c0 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go @@ -182,3 +182,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go index 3d0e98451f8..dd2262a4079 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go @@ -214,3 +214,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go index 6f5a288944d..8cf3670bda6 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go @@ -187,3 +187,5 @@ func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error } return riscvHWProbe(pairs, setSize, set, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_openbsd.go b/vendor/golang.org/x/sys/unix/syscall_openbsd.go index b25343c71a4..b86ded549c6 100644 --- a/vendor/golang.org/x/sys/unix/syscall_openbsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_openbsd.go @@ -293,6 +293,7 @@ func Uname(uname *Utsname) error { //sys Mkfifoat(dirfd int, path string, mode uint32) (err error) //sys Mknod(path string, mode uint32, dev int) (err error) //sys Mknodat(dirfd int, path string, mode uint32, dev int) (err error) +//sys Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) //sys Nanosleep(time *Timespec, leftover *Timespec) (err error) //sys Open(path string, mode int, perm uint32) (fd int, err error) //sys Openat(dirfd int, path string, mode int, perm uint32) (fd int, err error) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_linux.go b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go new file mode 100644 index 00000000000..07ac8e09d1b --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && go1.24 + +package unix + +import _ "unsafe" + +//go:linkname vgetrandom runtime.vgetrandom +//go:noescape +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go new file mode 100644 index 00000000000..297e97bce92 --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !go1.24 + +package unix + +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) { + return -1, false +} diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go index e40fa85245f..d73c4652e6c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1169,6 +1172,11 @@ const ( PT_WRITE_D = 0x5 PT_WRITE_I = 0x4 PT_WRITE_U = 0x6 + RENAME_EXCL = 0x4 + RENAME_NOFOLLOW_ANY = 0x10 + RENAME_RESERVED1 = 0x8 + RENAME_SECLUDE = 0x1 + RENAME_SWAP = 0x2 RLIMIT_AS = 0x5 RLIMIT_CORE = 0x4 RLIMIT_CPU = 0x0 @@ -1260,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go index bb02aa6c056..4a55a400588 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1169,6 +1172,11 @@ const ( PT_WRITE_D = 0x5 PT_WRITE_I = 0x4 PT_WRITE_U = 0x6 + RENAME_EXCL = 0x4 + RENAME_NOFOLLOW_ANY = 0x10 + RENAME_RESERVED1 = 0x8 + RENAME_SECLUDE = 0x1 + RENAME_SWAP = 0x2 RLIMIT_AS = 0x5 RLIMIT_CORE = 0x4 RLIMIT_CPU = 0x0 @@ -1260,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index 877a62b479a..de3b462489c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -457,6 +457,7 @@ const ( B600 = 0x8 B75 = 0x2 B9600 = 0xd + BCACHEFS_SUPER_MAGIC = 0xca451a4e BDEVFS_MAGIC = 0x62646576 BINDERFS_SUPER_MAGIC = 0x6c6f6f70 BINFMTFS_MAGIC = 0x42494e4d @@ -494,6 +495,7 @@ const ( BPF_F_TEST_REG_INVARIANTS = 0x80 BPF_F_TEST_RND_HI32 = 0x4 BPF_F_TEST_RUN_ON_CPU = 0x1 + BPF_F_TEST_SKB_CHECKSUM_COMPLETE = 0x4 BPF_F_TEST_STATE_FREQ = 0x8 BPF_F_TEST_XDP_LIVE_FRAMES = 0x2 BPF_F_XDP_DEV_BOUND_ONLY = 0x40 @@ -928,6 +930,7 @@ const ( EPOLL_CTL_ADD = 0x1 EPOLL_CTL_DEL = 0x2 EPOLL_CTL_MOD = 0x3 + EPOLL_IOC_TYPE = 0x8a EROFS_SUPER_MAGIC_V1 = 0xe0f5e1e2 ESP_V4_FLOW = 0xa ESP_V6_FLOW = 0xc @@ -941,9 +944,6 @@ const ( ETHTOOL_FEC_OFF = 0x4 ETHTOOL_FEC_RS = 0x8 ETHTOOL_FLAG_ALL = 0x7 - ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 - ETHTOOL_FLAG_OMIT_REPLY = 0x2 - ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_FLASHDEV = 0x33 ETHTOOL_FLASH_MAX_FILENAME = 0x80 ETHTOOL_FWVERS_LEN = 0x20 @@ -1705,6 +1705,7 @@ const ( KEXEC_ARCH_S390 = 0x160000 KEXEC_ARCH_SH = 0x2a0000 KEXEC_ARCH_X86_64 = 0x3e0000 + KEXEC_CRASH_HOTPLUG_SUPPORT = 0x8 KEXEC_FILE_DEBUG = 0x8 KEXEC_FILE_NO_INITRAMFS = 0x4 KEXEC_FILE_ON_CRASH = 0x2 @@ -1780,6 +1781,7 @@ const ( KEY_SPEC_USER_KEYRING = -0x4 KEY_SPEC_USER_SESSION_KEYRING = -0x5 LANDLOCK_ACCESS_FS_EXECUTE = 0x1 + LANDLOCK_ACCESS_FS_IOCTL_DEV = 0x8000 LANDLOCK_ACCESS_FS_MAKE_BLOCK = 0x800 LANDLOCK_ACCESS_FS_MAKE_CHAR = 0x40 LANDLOCK_ACCESS_FS_MAKE_DIR = 0x80 @@ -1861,6 +1863,19 @@ const ( MAP_FILE = 0x0 MAP_FIXED = 0x10 MAP_FIXED_NOREPLACE = 0x100000 + MAP_HUGE_16GB = 0x88000000 + MAP_HUGE_16KB = 0x38000000 + MAP_HUGE_16MB = 0x60000000 + MAP_HUGE_1GB = 0x78000000 + MAP_HUGE_1MB = 0x50000000 + MAP_HUGE_256MB = 0x70000000 + MAP_HUGE_2GB = 0x7c000000 + MAP_HUGE_2MB = 0x54000000 + MAP_HUGE_32MB = 0x64000000 + MAP_HUGE_512KB = 0x4c000000 + MAP_HUGE_512MB = 0x74000000 + MAP_HUGE_64KB = 0x40000000 + MAP_HUGE_8MB = 0x5c000000 MAP_HUGE_MASK = 0x3f MAP_HUGE_SHIFT = 0x1a MAP_PRIVATE = 0x2 @@ -1908,6 +1923,7 @@ const ( MNT_EXPIRE = 0x4 MNT_FORCE = 0x1 MNT_ID_REQ_SIZE_VER0 = 0x18 + MNT_ID_REQ_SIZE_VER1 = 0x20 MODULE_INIT_COMPRESSED_FILE = 0x4 MODULE_INIT_IGNORE_MODVERSIONS = 0x1 MODULE_INIT_IGNORE_VERMAGIC = 0x2 @@ -2173,7 +2189,7 @@ const ( NFT_REG_SIZE = 0x10 NFT_REJECT_ICMPX_MAX = 0x3 NFT_RT_MAX = 0x4 - NFT_SECMARK_CTX_MAXLEN = 0x100 + NFT_SECMARK_CTX_MAXLEN = 0x1000 NFT_SET_MAXNAMELEN = 0x100 NFT_SOCKET_MAX = 0x3 NFT_TABLE_F_MASK = 0x7 @@ -2342,9 +2358,11 @@ const ( PERF_MEM_LVLNUM_IO = 0xa PERF_MEM_LVLNUM_L1 = 0x1 PERF_MEM_LVLNUM_L2 = 0x2 + PERF_MEM_LVLNUM_L2_MHB = 0x5 PERF_MEM_LVLNUM_L3 = 0x3 PERF_MEM_LVLNUM_L4 = 0x4 PERF_MEM_LVLNUM_LFB = 0xc + PERF_MEM_LVLNUM_MSC = 0x6 PERF_MEM_LVLNUM_NA = 0xf PERF_MEM_LVLNUM_PMEM = 0xe PERF_MEM_LVLNUM_RAM = 0xd @@ -2417,6 +2435,7 @@ const ( PRIO_PGRP = 0x1 PRIO_PROCESS = 0x0 PRIO_USER = 0x2 + PROCFS_IOCTL_MAGIC = 'f' PROC_SUPER_MAGIC = 0x9fa0 PROT_EXEC = 0x4 PROT_GROWSDOWN = 0x1000000 @@ -2498,6 +2517,23 @@ const ( PR_PAC_GET_ENABLED_KEYS = 0x3d PR_PAC_RESET_KEYS = 0x36 PR_PAC_SET_ENABLED_KEYS = 0x3c + PR_PPC_DEXCR_CTRL_CLEAR = 0x4 + PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC = 0x10 + PR_PPC_DEXCR_CTRL_EDITABLE = 0x1 + PR_PPC_DEXCR_CTRL_MASK = 0x1f + PR_PPC_DEXCR_CTRL_SET = 0x2 + PR_PPC_DEXCR_CTRL_SET_ONEXEC = 0x8 + PR_PPC_DEXCR_IBRTPD = 0x1 + PR_PPC_DEXCR_NPHIE = 0x3 + PR_PPC_DEXCR_SBHE = 0x0 + PR_PPC_DEXCR_SRAPD = 0x2 + PR_PPC_GET_DEXCR = 0x48 + PR_PPC_SET_DEXCR = 0x49 + PR_RISCV_CTX_SW_FENCEI_OFF = 0x1 + PR_RISCV_CTX_SW_FENCEI_ON = 0x0 + PR_RISCV_SCOPE_PER_PROCESS = 0x0 + PR_RISCV_SCOPE_PER_THREAD = 0x1 + PR_RISCV_SET_ICACHE_FLUSH_CTX = 0x47 PR_RISCV_V_GET_CONTROL = 0x46 PR_RISCV_V_SET_CONTROL = 0x45 PR_RISCV_V_VSTATE_CTRL_CUR_MASK = 0x3 @@ -2902,11 +2938,12 @@ const ( RUSAGE_SELF = 0x0 RUSAGE_THREAD = 0x1 RWF_APPEND = 0x10 + RWF_ATOMIC = 0x40 RWF_DSYNC = 0x2 RWF_HIPRI = 0x1 RWF_NOAPPEND = 0x20 RWF_NOWAIT = 0x8 - RWF_SUPPORTED = 0x3f + RWF_SUPPORTED = 0x7f RWF_SYNC = 0x4 RWF_WRITE_LIFE_NOT_SET = 0x0 SCHED_BATCH = 0x3 @@ -3179,6 +3216,7 @@ const ( STATX_ATTR_MOUNT_ROOT = 0x2000 STATX_ATTR_NODUMP = 0x40 STATX_ATTR_VERITY = 0x100000 + STATX_ATTR_WRITE_ATOMIC = 0x400000 STATX_BASIC_STATS = 0x7ff STATX_BLOCKS = 0x400 STATX_BTIME = 0x800 @@ -3192,8 +3230,10 @@ const ( STATX_MTIME = 0x40 STATX_NLINK = 0x4 STATX_SIZE = 0x200 + STATX_SUBVOL = 0x8000 STATX_TYPE = 0x1 STATX_UID = 0x8 + STATX_WRITE_ATOMIC = 0x10000 STATX__RESERVED = 0x80000000 SYNC_FILE_RANGE_WAIT_AFTER = 0x4 SYNC_FILE_RANGE_WAIT_BEFORE = 0x1 @@ -3592,6 +3632,7 @@ const ( XDP_UMEM_PGOFF_COMPLETION_RING = 0x180000000 XDP_UMEM_PGOFF_FILL_RING = 0x100000000 XDP_UMEM_REG = 0x4 + XDP_UMEM_TX_METADATA_LEN = 0x4 XDP_UMEM_TX_SW_CSUM = 0x2 XDP_UMEM_UNALIGNED_CHUNK_FLAG = 0x1 XDP_USE_NEED_WAKEUP = 0x8 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index e4bc0bd57c7..8aa6d77c018 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -151,9 +153,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index 689317afdbf..da428f42533 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -151,9 +153,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index 5cca668ac30..bf45bfec78a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index 14270508b04..71c67162b73 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 ESR_MAGIC = 0x45535201 EXTPROC = 0x10000 @@ -152,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index 28e39afdcb4..9476628fa02 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -152,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index cd66e92cb42..b9e85f3cf0c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index c1595eba78e..a48b68a7647 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index ee9456b0da7..ea00e8522a1 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index 8cfca81e1b5..91c64687176 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 60b0deb3af7..8cbf38d6390 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index f90aa7281bf..a2df7341917 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index ba9e0150338..24791379233 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index 07cdfd6e9fd..d265f146ee0 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index 2f1dd214a74..3f2d6443964 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index f40519d9018..5d8b727a1c8 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -82,6 +82,8 @@ const ( EFD_CLOEXEC = 0x400000 EFD_NONBLOCK = 0x4000 EMT_TAGOVF = 0x1 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x400000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -153,9 +155,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go index da08b2ab3d9..1ec2b1407b1 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go @@ -581,6 +581,8 @@ const ( AT_EMPTY_PATH = 0x1000 AT_REMOVEDIR = 0x200 RENAME_NOREPLACE = 1 << 0 + ST_RDONLY = 1 + ST_NOSUID = 2 ) const ( diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index 07642c308d3..24b346e1a35 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func renamexNp(from string, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renamex_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renameatx_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -793,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index 923e08cb792..ebd213100b3 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -223,6 +223,16 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB) +TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renamex_np(SB) +GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB) + +TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renameatx_np(SB) +GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB) + TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sysctl(SB) GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 @@ -238,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 7d73dda6473..824b9c2d5e0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func renamexNp(from string, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renamex_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renameatx_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -793,6 +841,26 @@ var libc_pthread_fchdir_np_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index 057700111e7..4f178a22934 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -223,6 +223,16 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB) +TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renamex_np(SB) +GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB) + +TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renameatx_np(SB) +GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB) + TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sysctl(SB) GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 @@ -238,6 +248,11 @@ TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 87d8612a1dc..af30da55780 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -971,23 +971,6 @@ func Getpriority(which int, who int) (prio int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Getrandom(buf []byte, flags int) (n int, err error) { - var _p0 unsafe.Pointer - if len(buf) > 0 { - _p0 = unsafe.Pointer(&buf[0]) - } else { - _p0 = unsafe.Pointer(&_zero) - } - r0, _, e1 := Syscall(SYS_GETRANDOM, uintptr(_p0), uintptr(len(buf)), uintptr(flags)) - n = int(r0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Getrusage(who int, rusage *Rusage) (err error) { _, _, e1 := RawSyscall(SYS_GETRUSAGE, uintptr(who), uintptr(unsafe.Pointer(rusage)), 0) if e1 != 0 { @@ -2229,3 +2212,19 @@ func Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) } return } + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func Mseal(b []byte, flags uint) (err error) { + var _p0 unsafe.Pointer + if len(b) > 0 { + _p0 = unsafe.Pointer(&b[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := Syscall(SYS_MSEAL, uintptr(_p0), uintptr(len(b)), uintptr(flags)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go index 9dc42410b78..1851df14e87 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s index 41b5617316c..0b43c693656 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4 DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4 +DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go index 0d3a0751cd4..e1ec0dbe4ec 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s index 4019a656f6d..880c6d6e316 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go index c39f7776db3..7c8452a63e9 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s index ac4af24f908..b8ef95b0fa1 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4 DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4 +DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go index 57571d072fe..2ffdf861f75 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s index f77d532121b..2af3b5c762f 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go index e62963e67e2..1da08d52675 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s index fae140b62c9..b7a251353b0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go index 00831354c82..6e85b0aac95 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s index 9d1e0ff06d0..f15dadf0552 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s @@ -555,6 +555,12 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + CALL libc_mount(SB) + RET +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 CALL libc_nanosleep(SB) RET diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go index 79029ed5848..28b487df251 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s index da115f9a4b6..1e7f321e436 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go index 53aef5dc58d..524b0820cbc 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go @@ -457,4 +457,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go index 71d524763d3..f485dbf4565 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go @@ -341,6 +341,7 @@ const ( SYS_STATX = 332 SYS_IO_PGETEVENTS = 333 SYS_RSEQ = 334 + SYS_URETPROBE = 335 SYS_PIDFD_SEND_SIGNAL = 424 SYS_IO_URING_SETUP = 425 SYS_IO_URING_ENTER = 426 @@ -379,4 +380,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go index c747706131c..70b35bf3b09 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go @@ -421,4 +421,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go index f96e214f6d4..1893e2fe884 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go @@ -85,7 +85,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 @@ -324,4 +324,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go index 28425346cf1..16a4017da0a 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go @@ -84,6 +84,8 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 + SYS_NEWFSTATAT = 79 + SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 SYS_FDATASYNC = 83 @@ -318,4 +320,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go index d0953018dae..7e567f1efff 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go @@ -441,4 +441,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 4459 SYS_LSM_SET_SELF_ATTR = 4460 SYS_LSM_LIST_MODULES = 4461 + SYS_MSEAL = 4462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go index 295c7f4b818..38ae55e5ef8 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go @@ -371,4 +371,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 5459 SYS_LSM_SET_SELF_ATTR = 5460 SYS_LSM_LIST_MODULES = 5461 + SYS_MSEAL = 5462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go index d1a9eaca7a4..55e92e60a82 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go @@ -371,4 +371,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 5459 SYS_LSM_SET_SELF_ATTR = 5460 SYS_LSM_LIST_MODULES = 5461 + SYS_MSEAL = 5462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go index bec157c39fd..60658d6a021 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go @@ -441,4 +441,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 4459 SYS_LSM_SET_SELF_ATTR = 4460 SYS_LSM_LIST_MODULES = 4461 + SYS_MSEAL = 4462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go index 7ee7bdc435c..e203e8a7ed4 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go @@ -448,4 +448,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go index fad1f25b449..5944b97d546 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go @@ -420,4 +420,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go index 7d3e16357d6..c66d416dad1 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go @@ -420,4 +420,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go index 0ed53ad9f7e..a5459e766f5 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go @@ -84,7 +84,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 @@ -325,4 +325,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go index 2fba04ad500..01d86825bb9 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go @@ -386,4 +386,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go index 621d00d741b..7b703e77cda 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go @@ -399,4 +399,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index 091d107f3a5..d003c3d4378 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 28ff4ef74d0..0d45a941aae 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index 6cbd094a3aa..51e13eb055f 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -625,6 +625,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 7c03b6ee77f..d002d8ef3cc 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -630,6 +630,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index 422107ee8b1..3f863d898dd 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -616,6 +616,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 505a12acfd9..61c72931066 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -610,6 +610,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index cc986c79006..b5d17414f03 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -612,6 +612,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 4740b834854..3a69e454962 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -87,30 +87,35 @@ type StatxTimestamp struct { } type Statx_t struct { - Mask uint32 - Blksize uint32 - Attributes uint64 - Nlink uint32 - Uid uint32 - Gid uint32 - Mode uint16 - _ [1]uint16 - Ino uint64 - Size uint64 - Blocks uint64 - Attributes_mask uint64 - Atime StatxTimestamp - Btime StatxTimestamp - Ctime StatxTimestamp - Mtime StatxTimestamp - Rdev_major uint32 - Rdev_minor uint32 - Dev_major uint32 - Dev_minor uint32 - Mnt_id uint64 - Dio_mem_align uint32 - Dio_offset_align uint32 - _ [12]uint64 + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + Uid uint32 + Gid uint32 + Mode uint16 + _ [1]uint16 + Ino uint64 + Size uint64 + Blocks uint64 + Attributes_mask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + Rdev_major uint32 + Rdev_minor uint32 + Dev_major uint32 + Dev_minor uint32 + Mnt_id uint64 + Dio_mem_align uint32 + Dio_offset_align uint32 + Subvol uint64 + Atomic_write_unit_min uint32 + Atomic_write_unit_max uint32 + Atomic_write_segments_max uint32 + _ [1]uint32 + _ [9]uint64 } type Fsid struct { @@ -515,6 +520,29 @@ type TCPInfo struct { Total_rto_time uint32 } +type TCPVegasInfo struct { + Enabled uint32 + Rttcnt uint32 + Rtt uint32 + Minrtt uint32 +} + +type TCPDCTCPInfo struct { + Enabled uint16 + Ce_state uint16 + Alpha uint32 + Ab_ecn uint32 + Ab_tot uint32 +} + +type TCPBBRInfo struct { + Bw_lo uint32 + Bw_hi uint32 + Min_rtt uint32 + Pacing_gain uint32 + Cwnd_gain uint32 +} + type CanFilter struct { Id uint32 Mask uint32 @@ -556,6 +584,7 @@ const ( SizeofICMPv6Filter = 0x20 SizeofUcred = 0xc SizeofTCPInfo = 0xf8 + SizeofTCPCCInfo = 0x14 SizeofCanFilter = 0x8 SizeofTCPRepairOpt = 0x8 ) @@ -2485,7 +2514,7 @@ type XDPMmapOffsets struct { type XDPUmemReg struct { Addr uint64 Len uint64 - Chunk_size uint32 + Size uint32 Headroom uint32 Flags uint32 Tx_metadata_len uint32 @@ -3473,7 +3502,7 @@ const ( DEVLINK_PORT_FN_ATTR_STATE = 0x2 DEVLINK_PORT_FN_ATTR_OPSTATE = 0x3 DEVLINK_PORT_FN_ATTR_CAPS = 0x4 - DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x5 + DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x6 ) type FsverityDigest struct { @@ -3765,7 +3794,7 @@ const ( ETHTOOL_MSG_PSE_GET = 0x24 ETHTOOL_MSG_PSE_SET = 0x25 ETHTOOL_MSG_RSS_GET = 0x26 - ETHTOOL_MSG_USER_MAX = 0x2b + ETHTOOL_MSG_USER_MAX = 0x2c ETHTOOL_MSG_KERNEL_NONE = 0x0 ETHTOOL_MSG_STRSET_GET_REPLY = 0x1 ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2 @@ -3805,7 +3834,10 @@ const ( ETHTOOL_MSG_MODULE_NTF = 0x24 ETHTOOL_MSG_PSE_GET_REPLY = 0x25 ETHTOOL_MSG_RSS_GET_REPLY = 0x26 - ETHTOOL_MSG_KERNEL_MAX = 0x2b + ETHTOOL_MSG_KERNEL_MAX = 0x2c + ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 + ETHTOOL_FLAG_OMIT_REPLY = 0x2 + ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_A_HEADER_UNSPEC = 0x0 ETHTOOL_A_HEADER_DEV_INDEX = 0x1 ETHTOOL_A_HEADER_DEV_NAME = 0x2 @@ -3947,7 +3979,7 @@ const ( ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL = 0x17 ETHTOOL_A_COALESCE_USE_CQE_MODE_TX = 0x18 ETHTOOL_A_COALESCE_USE_CQE_MODE_RX = 0x19 - ETHTOOL_A_COALESCE_MAX = 0x1c + ETHTOOL_A_COALESCE_MAX = 0x1e ETHTOOL_A_PAUSE_UNSPEC = 0x0 ETHTOOL_A_PAUSE_HEADER = 0x1 ETHTOOL_A_PAUSE_AUTONEG = 0x2 @@ -3975,7 +4007,7 @@ const ( ETHTOOL_A_TSINFO_TX_TYPES = 0x3 ETHTOOL_A_TSINFO_RX_FILTERS = 0x4 ETHTOOL_A_TSINFO_PHC_INDEX = 0x5 - ETHTOOL_A_TSINFO_MAX = 0x5 + ETHTOOL_A_TSINFO_MAX = 0x6 ETHTOOL_A_CABLE_TEST_UNSPEC = 0x0 ETHTOOL_A_CABLE_TEST_HEADER = 0x1 ETHTOOL_A_CABLE_TEST_MAX = 0x1 @@ -4605,7 +4637,7 @@ const ( NL80211_ATTR_MAC_HINT = 0xc8 NL80211_ATTR_MAC_MASK = 0xd7 NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca - NL80211_ATTR_MAX = 0x14a + NL80211_ATTR_MAX = 0x14c NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4 NL80211_ATTR_MAX_CSA_COUNTERS = 0xce NL80211_ATTR_MAX_MATCH_SETS = 0x85 @@ -5209,7 +5241,7 @@ const ( NL80211_FREQUENCY_ATTR_GO_CONCURRENT = 0xf NL80211_FREQUENCY_ATTR_INDOOR_ONLY = 0xe NL80211_FREQUENCY_ATTR_IR_CONCURRENT = 0xf - NL80211_FREQUENCY_ATTR_MAX = 0x20 + NL80211_FREQUENCY_ATTR_MAX = 0x21 NL80211_FREQUENCY_ATTR_MAX_TX_POWER = 0x6 NL80211_FREQUENCY_ATTR_NO_10MHZ = 0x11 NL80211_FREQUENCY_ATTR_NO_160MHZ = 0xc diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 15adc04142f..ad05b51a603 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -727,6 +727,37 @@ const ( RISCV_HWPROBE_EXT_ZBA = 0x8 RISCV_HWPROBE_EXT_ZBB = 0x10 RISCV_HWPROBE_EXT_ZBS = 0x20 + RISCV_HWPROBE_EXT_ZICBOZ = 0x40 + RISCV_HWPROBE_EXT_ZBC = 0x80 + RISCV_HWPROBE_EXT_ZBKB = 0x100 + RISCV_HWPROBE_EXT_ZBKC = 0x200 + RISCV_HWPROBE_EXT_ZBKX = 0x400 + RISCV_HWPROBE_EXT_ZKND = 0x800 + RISCV_HWPROBE_EXT_ZKNE = 0x1000 + RISCV_HWPROBE_EXT_ZKNH = 0x2000 + RISCV_HWPROBE_EXT_ZKSED = 0x4000 + RISCV_HWPROBE_EXT_ZKSH = 0x8000 + RISCV_HWPROBE_EXT_ZKT = 0x10000 + RISCV_HWPROBE_EXT_ZVBB = 0x20000 + RISCV_HWPROBE_EXT_ZVBC = 0x40000 + RISCV_HWPROBE_EXT_ZVKB = 0x80000 + RISCV_HWPROBE_EXT_ZVKG = 0x100000 + RISCV_HWPROBE_EXT_ZVKNED = 0x200000 + RISCV_HWPROBE_EXT_ZVKNHA = 0x400000 + RISCV_HWPROBE_EXT_ZVKNHB = 0x800000 + RISCV_HWPROBE_EXT_ZVKSED = 0x1000000 + RISCV_HWPROBE_EXT_ZVKSH = 0x2000000 + RISCV_HWPROBE_EXT_ZVKT = 0x4000000 + RISCV_HWPROBE_EXT_ZFH = 0x8000000 + RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000 + RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000 + RISCV_HWPROBE_EXT_ZVFH = 0x40000000 + RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000 + RISCV_HWPROBE_EXT_ZFA = 0x100000000 + RISCV_HWPROBE_EXT_ZTSO = 0x200000000 + RISCV_HWPROBE_EXT_ZACAS = 0x400000000 + RISCV_HWPROBE_EXT_ZICOND = 0x800000000 + RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000 RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5 RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0 RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1 @@ -734,4 +765,6 @@ const ( RISCV_HWPROBE_MISALIGNED_FAST = 0x3 RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4 RISCV_HWPROBE_MISALIGNED_MASK = 0x7 + RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 + RISCV_HWPROBE_WHICH_CPUS = 0x1 ) diff --git a/vendor/golang.org/x/sys/windows/dll_windows.go b/vendor/golang.org/x/sys/windows/dll_windows.go index 115341fba66..4e613cf6335 100644 --- a/vendor/golang.org/x/sys/windows/dll_windows.go +++ b/vendor/golang.org/x/sys/windows/dll_windows.go @@ -65,7 +65,7 @@ func LoadDLL(name string) (dll *DLL, err error) { return d, nil } -// MustLoadDLL is like LoadDLL but panics if load operation failes. +// MustLoadDLL is like LoadDLL but panics if load operation fails. func MustLoadDLL(name string) *DLL { d, e := LoadDLL(name) if e != nil { diff --git a/vendor/golang.org/x/sys/windows/security_windows.go b/vendor/golang.org/x/sys/windows/security_windows.go index 97651b5bd04..b6e1ab76f82 100644 --- a/vendor/golang.org/x/sys/windows/security_windows.go +++ b/vendor/golang.org/x/sys/windows/security_windows.go @@ -1179,7 +1179,7 @@ type OBJECTS_AND_NAME struct { //sys makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURITY_DESCRIPTOR, selfRelativeSDSize *uint32) (err error) = advapi32.MakeSelfRelativeSD //sys setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCESS, oldACL *ACL, newACL **ACL) (ret error) = advapi32.SetEntriesInAclW -//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) = advapi32.GetAce +//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) = advapi32.GetAce // Control returns the security descriptor control bits. func (sd *SECURITY_DESCRIPTOR) Control() (control SECURITY_DESCRIPTOR_CONTROL, revision uint32, err error) { diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 6525c62f3c2..5cee9a3143f 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -17,8 +17,10 @@ import ( "unsafe" ) -type Handle uintptr -type HWND uintptr +type ( + Handle uintptr + HWND uintptr +) const ( InvalidHandle = ^Handle(0) @@ -211,6 +213,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys OpenProcess(desiredAccess uint32, inheritHandle bool, processId uint32) (handle Handle, err error) //sys ShellExecute(hwnd Handle, verb *uint16, file *uint16, args *uint16, cwd *uint16, showCmd int32) (err error) [failretval<=32] = shell32.ShellExecuteW //sys GetWindowThreadProcessId(hwnd HWND, pid *uint32) (tid uint32, err error) = user32.GetWindowThreadProcessId +//sys LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) [failretval==0] = user32.LoadKeyboardLayoutW +//sys UnloadKeyboardLayout(hkl Handle) (err error) = user32.UnloadKeyboardLayout +//sys GetKeyboardLayout(tid uint32) (hkl Handle) = user32.GetKeyboardLayout +//sys ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) = user32.ToUnicodeEx //sys GetShellWindow() (shellWindow HWND) = user32.GetShellWindow //sys MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) [failretval==0] = user32.MessageBoxW //sys ExitWindowsEx(flags uint32, reason uint32) (err error) = user32.ExitWindowsEx @@ -307,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode //sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo //sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition +//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP +//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP +//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP +//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP //sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW //sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW //sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole @@ -1368,9 +1378,11 @@ func SetsockoptLinger(fd Handle, level, opt int, l *Linger) (err error) { func SetsockoptInet4Addr(fd Handle, level, opt int, value [4]byte) (err error) { return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(&value[0])), 4) } + func SetsockoptIPMreq(fd Handle, level, opt int, mreq *IPMreq) (err error) { return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(mreq)), int32(unsafe.Sizeof(*mreq))) } + func SetsockoptIPv6Mreq(fd Handle, level, opt int, mreq *IPv6Mreq) (err error) { return syscall.EWINDOWS } diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index d8cb71db0a6..7b97a154c95 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -1060,6 +1060,7 @@ const ( SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6 SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4 SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12 + SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15 // cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460 @@ -2003,7 +2004,21 @@ const ( MOVEFILE_FAIL_IF_NOT_TRACKABLE = 0x20 ) -const GAA_FLAG_INCLUDE_PREFIX = 0x00000010 +// Flags for GetAdaptersAddresses, see +// https://learn.microsoft.com/en-us/windows/win32/api/iphlpapi/nf-iphlpapi-getadaptersaddresses. +const ( + GAA_FLAG_SKIP_UNICAST = 0x1 + GAA_FLAG_SKIP_ANYCAST = 0x2 + GAA_FLAG_SKIP_MULTICAST = 0x4 + GAA_FLAG_SKIP_DNS_SERVER = 0x8 + GAA_FLAG_INCLUDE_PREFIX = 0x10 + GAA_FLAG_SKIP_FRIENDLY_NAME = 0x20 + GAA_FLAG_INCLUDE_WINS_INFO = 0x40 + GAA_FLAG_INCLUDE_GATEWAYS = 0x80 + GAA_FLAG_INCLUDE_ALL_INTERFACES = 0x100 + GAA_FLAG_INCLUDE_ALL_COMPARTMENTS = 0x200 + GAA_FLAG_INCLUDE_TUNNEL_BINDINGORDER = 0x400 +) const ( IF_TYPE_OTHER = 1 @@ -2017,6 +2032,50 @@ const ( IF_TYPE_IEEE1394 = 144 ) +// Enum NL_PREFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_prefix_origin +const ( + IpPrefixOriginOther = 0 + IpPrefixOriginManual = 1 + IpPrefixOriginWellKnown = 2 + IpPrefixOriginDhcp = 3 + IpPrefixOriginRouterAdvertisement = 4 + IpPrefixOriginUnchanged = 1 << 4 +) + +// Enum NL_SUFFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_suffix_origin +const ( + NlsoOther = 0 + NlsoManual = 1 + NlsoWellKnown = 2 + NlsoDhcp = 3 + NlsoLinkLayerAddress = 4 + NlsoRandom = 5 + IpSuffixOriginOther = 0 + IpSuffixOriginManual = 1 + IpSuffixOriginWellKnown = 2 + IpSuffixOriginDhcp = 3 + IpSuffixOriginLinkLayerAddress = 4 + IpSuffixOriginRandom = 5 + IpSuffixOriginUnchanged = 1 << 4 +) + +// Enum NL_DAD_STATE for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_dad_state +const ( + NldsInvalid = 0 + NldsTentative = 1 + NldsDuplicate = 2 + NldsDeprecated = 3 + NldsPreferred = 4 + IpDadStateInvalid = 0 + IpDadStateTentative = 1 + IpDadStateDuplicate = 2 + IpDadStateDeprecated = 3 + IpDadStatePreferred = 4 +) + type SocketAddress struct { Sockaddr *syscall.RawSockaddrAny SockaddrLength int32 @@ -3404,3 +3463,14 @@ type DCB struct { EvtChar byte wReserved1 uint16 } + +// Keyboard Layout Flags. +// See https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-loadkeyboardlayoutw +const ( + KLF_ACTIVATE = 0x00000001 + KLF_SUBSTITUTE_OK = 0x00000002 + KLF_REORDER = 0x00000008 + KLF_REPLACELANG = 0x00000010 + KLF_NOTELLSHELL = 0x00000080 + KLF_SETFORPROCESS = 0x00000100 +) diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index eba761018aa..4c2e1bdc01e 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -247,7 +247,9 @@ var ( procGetCommandLineW = modkernel32.NewProc("GetCommandLineW") procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") procGetComputerNameW = modkernel32.NewProc("GetComputerNameW") + procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") procGetConsoleMode = modkernel32.NewProc("GetConsoleMode") + procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP") procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo") procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW") procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId") @@ -347,8 +349,10 @@ var ( procSetCommMask = modkernel32.NewProc("SetCommMask") procSetCommState = modkernel32.NewProc("SetCommState") procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts") + procSetConsoleCP = modkernel32.NewProc("SetConsoleCP") procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition") procSetConsoleMode = modkernel32.NewProc("SetConsoleMode") + procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP") procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW") procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories") procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW") @@ -478,12 +482,16 @@ var ( procGetDesktopWindow = moduser32.NewProc("GetDesktopWindow") procGetForegroundWindow = moduser32.NewProc("GetForegroundWindow") procGetGUIThreadInfo = moduser32.NewProc("GetGUIThreadInfo") + procGetKeyboardLayout = moduser32.NewProc("GetKeyboardLayout") procGetShellWindow = moduser32.NewProc("GetShellWindow") procGetWindowThreadProcessId = moduser32.NewProc("GetWindowThreadProcessId") procIsWindow = moduser32.NewProc("IsWindow") procIsWindowUnicode = moduser32.NewProc("IsWindowUnicode") procIsWindowVisible = moduser32.NewProc("IsWindowVisible") + procLoadKeyboardLayoutW = moduser32.NewProc("LoadKeyboardLayoutW") procMessageBoxW = moduser32.NewProc("MessageBoxW") + procToUnicodeEx = moduser32.NewProc("ToUnicodeEx") + procUnloadKeyboardLayout = moduser32.NewProc("UnloadKeyboardLayout") procCreateEnvironmentBlock = moduserenv.NewProc("CreateEnvironmentBlock") procDestroyEnvironmentBlock = moduserenv.NewProc("DestroyEnvironmentBlock") procGetUserProfileDirectoryW = moduserenv.NewProc("GetUserProfileDirectoryW") @@ -789,6 +797,14 @@ func FreeSid(sid *SID) (err error) { return } +func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) { + r1, _, e1 := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func GetLengthSid(sid *SID) (len uint32) { r0, _, _ := syscall.Syscall(procGetLengthSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) len = uint32(r0) @@ -1225,14 +1241,6 @@ func setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCE return } -func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) { - r0, _, _ := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) - if r0 == 0 { - ret = GetLastError() - } - return -} - func SetKernelObjectSecurity(handle Handle, securityInformation SECURITY_INFORMATION, securityDescriptor *SECURITY_DESCRIPTOR) (err error) { r1, _, e1 := syscall.Syscall(procSetKernelObjectSecurity.Addr(), 3, uintptr(handle), uintptr(securityInformation), uintptr(unsafe.Pointer(securityDescriptor))) if r1 == 0 { @@ -2158,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) { return } +func GetConsoleCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleMode(console Handle, mode *uint32) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0) if r1 == 0 { @@ -2166,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) { return } +func GetConsoleOutputCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0) if r1 == 0 { @@ -3034,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { return } +func SetConsoleCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func setConsoleCursorPosition(console Handle, position uint32) (err error) { r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0) if r1 == 0 { @@ -3050,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) { return } +func SetConsoleOutputCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func SetCurrentDirectory(path *uint16) (err error) { r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) if r1 == 0 { @@ -4082,6 +4124,12 @@ func GetGUIThreadInfo(thread uint32, info *GUIThreadInfo) (err error) { return } +func GetKeyboardLayout(tid uint32) (hkl Handle) { + r0, _, _ := syscall.Syscall(procGetKeyboardLayout.Addr(), 1, uintptr(tid), 0, 0) + hkl = Handle(r0) + return +} + func GetShellWindow() (shellWindow HWND) { r0, _, _ := syscall.Syscall(procGetShellWindow.Addr(), 0, 0, 0, 0) shellWindow = HWND(r0) @@ -4115,6 +4163,15 @@ func IsWindowVisible(hwnd HWND) (isVisible bool) { return } +func LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) { + r0, _, e1 := syscall.Syscall(procLoadKeyboardLayoutW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(flags), 0) + hkl = Handle(r0) + if hkl == 0 { + err = errnoErr(e1) + } + return +} + func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) { r0, _, e1 := syscall.Syscall6(procMessageBoxW.Addr(), 4, uintptr(hwnd), uintptr(unsafe.Pointer(text)), uintptr(unsafe.Pointer(caption)), uintptr(boxtype), 0, 0) ret = int32(r0) @@ -4124,6 +4181,20 @@ func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret i return } +func ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) { + r0, _, _ := syscall.Syscall9(procToUnicodeEx.Addr(), 7, uintptr(vkey), uintptr(scancode), uintptr(unsafe.Pointer(keystate)), uintptr(unsafe.Pointer(pwszBuff)), uintptr(cchBuff), uintptr(flags), uintptr(hkl), 0, 0) + ret = int32(r0) + return +} + +func UnloadKeyboardLayout(hkl Handle) (err error) { + r1, _, e1 := syscall.Syscall(procUnloadKeyboardLayout.Addr(), 1, uintptr(hkl), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func CreateEnvironmentBlock(block **uint16, token Token, inheritExisting bool) (err error) { var _p0 uint32 if inheritExisting { diff --git a/vendor/modules.txt b/vendor/modules.txt index 1fa09e2acba..cf80b900ffe 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -137,7 +137,7 @@ github.com/alicebob/miniredis/v2/geohash github.com/alicebob/miniredis/v2/hyperloglog github.com/alicebob/miniredis/v2/metro github.com/alicebob/miniredis/v2/server -# github.com/andybalholm/brotli v1.1.0 +# github.com/andybalholm/brotli v1.1.1 ## explicit; go 1.13 github.com/andybalholm/brotli github.com/andybalholm/brotli/matchfinder @@ -758,8 +758,8 @@ github.com/jsternberg/zap-logfmt # github.com/julienschmidt/httprouter v1.3.0 ## explicit; go 1.7 github.com/julienschmidt/httprouter -# github.com/klauspost/compress v1.17.9 -## explicit; go 1.20 +# github.com/klauspost/compress v1.17.11 +## explicit; go 1.21 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse @@ -807,7 +807,7 @@ github.com/mattn/go-colorable # github.com/mattn/go-isatty v0.0.20 ## explicit; go 1.15 github.com/mattn/go-isatty -# github.com/mattn/go-runewidth v0.0.15 +# github.com/mattn/go-runewidth v0.0.16 ## explicit; go 1.9 github.com/mattn/go-runewidth # github.com/miekg/dns v1.1.61 @@ -980,8 +980,8 @@ github.com/opentracing/opentracing-go/log github.com/openzipkin/zipkin-go/model github.com/openzipkin/zipkin-go/proto/zipkin_proto3 github.com/openzipkin/zipkin-go/reporter -# github.com/parquet-go/parquet-go v0.23.0 -## explicit; go 1.21 +# github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe +## explicit; go 1.22 github.com/parquet-go/parquet-go github.com/parquet-go/parquet-go/bloom github.com/parquet-go/parquet-go/bloom/xxhash @@ -999,6 +999,7 @@ github.com/parquet-go/parquet-go/encoding/bytestreamsplit github.com/parquet-go/parquet-go/encoding/delta github.com/parquet-go/parquet-go/encoding/plain github.com/parquet-go/parquet-go/encoding/rle +github.com/parquet-go/parquet-go/encoding/thrift github.com/parquet-go/parquet-go/format github.com/parquet-go/parquet-go/hashprobe github.com/parquet-go/parquet-go/hashprobe/aeshash @@ -1157,9 +1158,6 @@ github.com/sagikazarmark/slog-shim # github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 ## explicit github.com/sean-/seed -# github.com/segmentio/encoding v0.4.0 -## explicit; go 1.18 -github.com/segmentio/encoding/thrift # github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e ## explicit github.com/segmentio/fasthash/fnv1a @@ -1748,7 +1746,7 @@ golang.org/x/oauth2/jwt ## explicit; go 1.18 golang.org/x/sync/errgroup golang.org/x/sync/semaphore -# golang.org/x/sys v0.22.0 +# golang.org/x/sys v0.26.0 ## explicit; go 1.18 golang.org/x/sys/cpu golang.org/x/sys/unix