diff --git a/.github/workflows/modules-zstd.yml b/.github/workflows/modules-zstd.yml new file mode 100644 index 0000000000..a2daaa681e --- /dev/null +++ b/.github/workflows/modules-zstd.yml @@ -0,0 +1,73 @@ +name: Modules (ZSTD) +on: + # Avoid triggering on pushes to /all/ open PR branches. + push: + branches: + - main + paths: + - 'xls/modules/zstd/**' + pull_request: + branches: + - main + paths: + - 'xls/modules/zstd/**' + # This lets us trigger manually from the UI. + workflow_dispatch: + +jobs: + test: + name: Test ZSTD module (opt) + runs-on: + labels: ubuntu-22.04-64core + timeout-minutes: 600 + continue-on-error: true + steps: + - uses: actions/checkout@v2 + + - name: Restore Nightly Bazel Cache + uses: actions/cache/restore@v4 + with: + path: "~/.cache/bazel" + key: bazel-cache-nightly-${{ runner.os }}-${{ github.sha }} + restore-keys: bazel-cache-nightly-${{ runner.os }}- + + - name: Install dependencies via apt + run: sudo apt-get install python3-distutils python3-dev python-is-python3 libtinfo5 build-essential liblapack-dev libblas-dev gfortran + + - name: Bazel Build Tools (opt) + run: | + bazel build -c opt --test_output=errors -- //xls/dslx:interpreter_main //xls/dslx/ir_convert:ir_converter_main //xls/tools:opt_main //xls/tools:codegen_main + + - name: Build ZSTD Module (opt) + run: | + bazel build -c opt --test_output=errors -- //xls/modules/zstd:all + + - name: Test ZSTD Module - DSLX Tests (opt) + if: ${{ !cancelled() }} + run: | + bazel test -c opt --test_output=errors -- $(bazel query 'filter(".*_dslx_test", kind(rule, //xls/modules/zstd/...))') + + - name: Test ZSTD Module - CC Tests (opt) + if: ${{ !cancelled() }} + run: | + bazel test -c opt --test_output=errors -- $(bazel query 'filter(".*_cc_test", kind(rule, //xls/modules/zstd/...))') + + - name: Build ZSTD verilog targets (opt) + if: ${{ !cancelled() }} + run: | + bazel build -c opt -- $(bazel query 'filter(".*_verilog", kind(rule, //xls/modules/zstd/...))') + + - name: Build and run ZSTD IR benchmark rules (opt) + if: ${{ !cancelled() }} + run: | + bazel run -c opt -- $(bazel query 'filter(".*_ir_benchmark", kind(rule, //xls/modules/zstd/...))') + + - name: Build and run synthesis benchmarks of the ZSTD module (opt) + if: ${{ !cancelled() }} + run: | + bazel run -c opt -- $(bazel query 'filter(".*_benchmark_synth", kind(rule, //xls/modules/zstd/...))') + + - name: Build ZSTD place and route targets (opt) + if: ${{ !cancelled() }} + run: | + bazel build -c opt -- $(bazel query 'filter(".*_place_and_route", kind(rule, //xls/modules/zstd/...))') diff --git a/dependency_support/com_github_facebook_zstd/bundled.BUILD.bazel b/dependency_support/com_github_facebook_zstd/bundled.BUILD.bazel index dbe80f692e..037ba422b7 100644 --- a/dependency_support/com_github_facebook_zstd/bundled.BUILD.bazel +++ b/dependency_support/com_github_facebook_zstd/bundled.BUILD.bazel @@ -1,4 +1,4 @@ -# Copyright 2023 The XLS Authors +# Copyright 2024 The XLS Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,38 +12,203 @@ # See the License for the specific language governing permissions and # limitations under the License. -package(default_visibility = ["//visibility:public"]) +""" Builds zstd. +""" -licenses(["notice"]) +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") -# Builds everything together similarly to the zstd cmake file: -# https://github.com/facebook/zstd/blob/dev/build/cmake/lib/CMakeLists.txt -# but with legacy support and defines as in the zstd BUCK file: -# https://github.com/facebook/zstd/blob/dev/lib/BUCK +package(default_visibility = ["//visibility:public"]) -cc_library( - name = "zstd", +filegroup( + name = "common_sources", srcs = glob([ - "lib/common/*.h", "lib/common/*.c", - "lib/compress/*.h", + "lib/common/*.h", + ]), +) + +filegroup( + name = "compress_sources", + srcs = glob([ "lib/compress/*.c", - "lib/decompress/*.h", + "lib/compress/*.h", + ]), +) + +filegroup( + name = "decompress_sources", + srcs = glob([ "lib/decompress/*.c", - "lib/decompress/*.S", - "lib/deprecated/*.h", - "lib/deprecated/*.c", - "lib/dictBuilder/*.h", + "lib/decompress/*.h", + ]) + select({ + "@platforms//os:windows": [], + "//conditions:default": glob(["lib/decompress/*.S"]), + }), +) + +filegroup( + name = "dictbuilder_sources", + srcs = glob([ "lib/dictBuilder/*.c", - "lib/legacy/*.h", - "lib/legacy/*.c", + "lib/dictBuilder/*.h", ]), +) + +cc_library( + name = "zstd", + srcs = [ + ":common_sources", + ":compress_sources", + ":decompress_sources", + ":dictbuilder_sources", + ], hdrs = [ + "lib/zdict.h", "lib/zstd.h", + "lib/zstd_errors.h", + ], + includes = ["lib"], + linkopts = ["-pthread"], + linkstatic = True, + local_defines = [ + "XXH_NAMESPACE=ZSTD_", + "ZSTD_MULTITHREAD", + "ZSTD_BUILD_SHARED=OFF", + "ZSTD_BUILD_STATIC=ON", + ] + select({ + "@platforms//os:windows": ["ZSTD_DISABLE_ASM"], + "//conditions:default": [], + }), +) + +cc_binary( + name = "zstd_cli", + srcs = glob( + include = [ + "programs/*.c", + "programs/*.h", + ], + exclude = [ + "programs/datagen.c", + "programs/datagen.h", + "programs/platform.h", + "programs/util.h", + ], + ), + deps = [ + ":datagen", + ":util", + ":zstd", + ], +) + +cc_library( + name = "util", + srcs = [ + "programs/platform.h", + "programs/util.c", + ], + hdrs = [ + "lib/common/compiler.h", + "lib/common/debug.h", + "lib/common/mem.h", + "lib/common/portability_macros.h", + "lib/common/zstd_deps.h", + "programs/util.h", + ], +) + +cc_library( + name = "datagen", + srcs = [ + "programs/datagen.c", + "programs/platform.h", + ], + hdrs = ["programs/datagen.h"], + deps = [":util"], +) + +cc_binary( + name = "datagen_cli", + srcs = [ + "programs/lorem.c", + "programs/lorem.h", + "tests/datagencli.c", + "tests/loremOut.c", + "tests/loremOut.h", + ], + includes = [ + "programs", + "tests", + ], + deps = [":datagen"], +) + +cc_test( + name = "fullbench", + srcs = [ + "lib/decompress/zstd_decompress_internal.h", + "programs/benchfn.c", + "programs/benchfn.h", + "programs/benchzstd.c", + "programs/benchzstd.h", + "programs/lorem.c", + "programs/lorem.h", + "programs/timefn.c", + "programs/timefn.h", + "tests/fullbench.c", + "tests/loremOut.c", + "tests/loremOut.h", + ], + copts = select({ + "@platforms//os:windows": [], + "//conditions:default": ["-Wno-deprecated-declarations"], + }), + includes = [ + "lib/common", + "programs", + "tests", + ], + deps = [ + ":datagen", + ":zstd", + ], +) + +# NOTE: Required because of direct zstd_compress.c include in decodecorpus sources +cc_library( + name = "decodecorpus_includes", + hdrs = [ + "lib/compress/zstd_compress.c", + ], +) + +cc_binary( + name = "decodecorpus", + srcs = [ + "tests/decodecorpus.c", + ] + glob( + [ + "programs/*.c", + "programs/*.h", + ], + exclude = [ + "programs/zstdcli.c", + ], + ), + deps = [ + ":zstd", + ":decodecorpus_includes", + ], + includes = [ + "lib/", + "lib/common/", + "lib/compress/", + "lib/dictBuilder/", + "lib/deprecated/", + "programs/", ], - strip_include_prefix = "lib", local_defines = [ - "ZSTD_LEGACY_SUPPORT=4", "XXH_NAMESPACE=ZSTD_", ], visibility = ["//visibility:public"], diff --git a/dependency_support/load_external.bzl b/dependency_support/load_external.bzl index 7376f6699e..7aa893589b 100644 --- a/dependency_support/load_external.bzl +++ b/dependency_support/load_external.bzl @@ -192,15 +192,6 @@ def load_external_repositories(): url = "https://github.com/google/riegeli/archive/cb68d579f108c96831b6a7815da43ff24b4e5242.tar.gz", ) - # Needed by fuzztest. Release 2024-03-30, current as of 2024-06-26 - http_archive( - name = "net_zstd", - build_file = "@com_google_riegeli//third_party:net_zstd.BUILD", - sha256 = "30f35f71c1203369dc979ecde0400ffea93c27391bfd2ac5a9715d2173d92ff7", - strip_prefix = "zstd-1.5.6/lib", - urls = ["https://github.com/facebook/zstd/archive/v1.5.6.tar.gz"], - ) - # Needed by fuzztest. Release 2024-05-21, current as of 2024-06-26 http_archive( name = "snappy", @@ -331,3 +322,17 @@ def load_external_repositories(): urls = ["https://github.com/bazelbuild/rules_pkg/releases/download/1.0.0/rules_pkg-1.0.0.tar.gz"], sha256 = "cad05f864a32799f6f9022891de91ac78f30e0fa07dc68abac92a628121b5b11", ) + + # Used in C++ tests of the ZSTD Module + # Transitive dependency of fuzztest (required by riegeli in fuzztest workspace) + # Version fdfb2aff released on 2024-07-31 + # https://github.com/facebook/zstd/commit/fdfb2aff39dc498372d8c9e5f2330b692fea9794 + # Updated 2024-08-08 + http_archive( + name = "zstd", + sha256 = "9ace5a1b3c477048c6e034fe88d2abb5d1402ced199cae8e9eef32fdc32204df", + strip_prefix = "zstd-fdfb2aff39dc498372d8c9e5f2330b692fea9794", + urls = ["https://github.com/facebook/zstd/archive/fdfb2aff39dc498372d8c9e5f2330b692fea9794.zip"], + build_file = "//dependency_support/com_github_facebook_zstd:bundled.BUILD.bazel", + ) + diff --git a/xls/examples/ram.x b/xls/examples/ram.x index f42865ae7f..73a931b2fc 100644 --- a/xls/examples/ram.x +++ b/xls/examples/ram.x @@ -55,7 +55,7 @@ pub fn ReadWordReq(addr:uN[ADDR_WIDTH]) -> } // Behavior of reads and writes to the same address in the same "tick". -enum SimultaneousReadWriteBehavior : u2 { +pub enum SimultaneousReadWriteBehavior : u2 { // The read shows the contents at the address before the write. READ_BEFORE_WRITE = 0, // The read shows the contents at the address after the write. @@ -160,7 +160,7 @@ fn write_word_test() { // Function to compute num partitions (e.g. mask width) for a data_width-wide // word divided into word_partition_size-chunks. -fn num_partitions(word_partition_size: u32, data_width: u32) -> u32 { +pub fn num_partitions(word_partition_size: u32, data_width: u32) -> u32 { match word_partition_size { u32:0 => u32:0, _ => (word_partition_size + data_width - u32:1) / word_partition_size, @@ -251,7 +251,7 @@ proc RamModel mem[read_req.addr], SimultaneousReadWriteBehavior::WRITE_BEFORE_READ => value_to_write, - SimultaneousReadWriteBehavior::ASSERT_NO_CONFLICT => { - // Assertion failure, we have a conflicting read and write. - assert_eq(true, false); - mem[read_req.addr] // Need to return something. - }, + SimultaneousReadWriteBehavior::ASSERT_NO_CONFLICT => fail!("conflicting_read_and_write", mem[read_req.addr]), + _ => fail!("impossible_case", uN[DATA_WIDTH]:0), } } else { mem[read_req.addr] }; let read_resp_value = ReadResp { diff --git a/xls/modules/zstd/BUILD b/xls/modules/zstd/BUILD new file mode 100644 index 0000000000..a8d45f4943 --- /dev/null +++ b/xls/modules/zstd/BUILD @@ -0,0 +1,1068 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Build rules for XLS ZSTD codec implementation. + +load("@rules_hdl//place_and_route:build_defs.bzl", "place_and_route") +load("@rules_hdl//synthesis:build_defs.bzl", "benchmark_synth", "synthesize_rtl") +load("@rules_hdl//verilog:providers.bzl", "verilog_library") +load( + "//xls/build_rules:xls_build_defs.bzl", + "xls_benchmark_ir", + "xls_benchmark_verilog", + "xls_dslx_ir", + "xls_dslx_library", + "xls_dslx_test", + "xls_dslx_verilog", +) + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//xls:xls_users"], + licenses = ["notice"], +) + +xls_dslx_library( + name = "buffer_dslx", + srcs = [ + "buffer.x", + ], +) + +xls_dslx_test( + name = "buffer_dslx_test", + library = ":buffer_dslx", + tags = ["manual"], +) + +xls_dslx_library( + name = "window_buffer_dslx", + srcs = [ + "window_buffer.x", + ], + deps = [ + ":buffer_dslx", + ], +) + +xls_dslx_test( + name = "window_buffer_dslx_test", + library = ":window_buffer_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "window_buffer_verilog", + codegen_args = { + "module_name": "WindowBuffer64", + "delay_model": "asap7", + "pipeline_stages": "2", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "WindowBuffer64", + library = ":window_buffer_dslx", + # TODO: 2024-01-25: Workaround for https://github.com/google/xls/issues/869 + # Force proc inlining and set last internal proc as top proc for IR optimization + opt_ir_args = { + "inline_procs": "true", + "top": "__window_buffer__WindowBuffer64__WindowBuffer_0__64_32_48_next", + }, + tags = ["manual"], + verilog_file = "window_buffer.v", +) + +xls_benchmark_ir( + name = "window_buffer_opt_ir_benchmark", + src = ":window_buffer_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "2", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "window_buffer_verilog_lib", + srcs = [ + ":window_buffer.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "window_buffer_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "WindowBuffer64", + deps = [ + ":window_buffer_verilog_lib", + ], +) + +benchmark_synth( + name = "window_buffer_benchmark_synth", + synth_target = ":window_buffer_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "window_buffer_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":window_buffer_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "magic_dslx", + srcs = [ + "magic.x", + ], + deps = [ + ":buffer_dslx", + ], +) + +xls_dslx_test( + name = "magic_dslx_test", + library = ":magic_dslx", + tags = ["manual"], +) + +cc_library( + name = "data_generator", + srcs = ["data_generator.cc"], + hdrs = ["data_generator.h"], + data = [ + "@zstd//:decodecorpus", + ], + deps = [ + "//xls/common:subprocess", + "//xls/common/file:get_runfile_path", + "//xls/common/status:status_macros", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", + ], +) + +xls_dslx_library( + name = "frame_header_dslx", + srcs = [ + "frame_header.x", + ], + deps = [ + ":buffer_dslx", + ], +) + +xls_dslx_library( + name = "common_dslx", + srcs = [ + "common.x", + ], + deps = [], +) + +xls_dslx_test( + name = "frame_header_dslx_test", + library = ":frame_header_dslx", + tags = ["manual"], +) + +xls_dslx_library( + name = "frame_header_test_dslx", + srcs = [ + "frame_header_test.x", + ], + deps = [ + ":buffer_dslx", + ":frame_header_dslx", + ], +) + +cc_test( + name = "frame_header_cc_test", + srcs = [ + "frame_header_test.cc", + ], + data = [ + ":frame_header_test_dslx", + ], + shard_count = 50, + tags = ["manual"], + deps = [ + ":data_generator", + "//xls/common:xls_gunit_main", + "//xls/common/file:filesystem", + "//xls/common/file:get_runfile_path", + "//xls/common/fuzzing:fuzztest", + "//xls/common/status:matchers", + "//xls/dslx:create_import_data", + "//xls/dslx:import_data", + "//xls/dslx:parse_and_typecheck", + "//xls/dslx/ir_convert:convert_options", + "//xls/dslx/ir_convert:ir_converter", + "//xls/dslx/type_system:parametric_env", + "//xls/ir:bits", + "//xls/ir:ir_test_base", + "//xls/ir:value", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest", + "@zstd", + ], +) + +xls_dslx_verilog( + name = "frame_header_verilog", + codegen_args = { + "module_name": "FrameHeaderDecoder", + "delay_model": "asap7", + "pipeline_stages": "9", + "reset": "rst", + "reset_data_path": "false", + "use_system_verilog": "false", + }, + dslx_top = "parse_frame_header_128", + library = ":frame_header_test_dslx", + tags = ["manual"], + verilog_file = "frame_header.v", +) + +xls_benchmark_ir( + name = "frame_header_opt_ir_benchmark", + src = ":frame_header_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "9", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "frame_header_verilog_lib", + srcs = [ + ":frame_header.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "frame_header_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "FrameHeaderDecoder", + deps = [ + ":frame_header_verilog_lib", + ], +) + +benchmark_synth( + name = "frame_header_benchmark_synth", + synth_target = ":frame_header_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "frame_header_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":frame_header_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "raw_block_dec_dslx", + srcs = [ + "raw_block_dec.x", + ], + deps = [ + ":buffer_dslx", + ":common_dslx", + ], +) + +xls_dslx_test( + name = "raw_block_dec_dslx_test", + library = ":raw_block_dec_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "raw_block_dec_verilog", + codegen_args = { + "module_name": "RawBlockDecoder", + "delay_model": "asap7", + "pipeline_stages": "2", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "RawBlockDecoder", + library = ":raw_block_dec_dslx", + tags = ["manual"], + verilog_file = "raw_block_dec.v", +) + +xls_benchmark_ir( + name = "raw_block_dec_opt_ir_benchmark", + src = ":raw_block_dec_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "2", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "raw_block_dec_verilog_lib", + srcs = [ + ":raw_block_dec.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "raw_block_dec_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "RawBlockDecoder", + deps = [ + ":raw_block_dec_verilog_lib", + ], +) + +benchmark_synth( + name = "raw_block_dec_benchmark_synth", + synth_target = ":raw_block_dec_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "raw_block_dec_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":raw_block_dec_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "rle_block_dec_dslx", + srcs = [ + "rle_block_dec.x", + ], + deps = [ + ":buffer_dslx", + ":common_dslx", + "//xls/modules/rle:rle_common_dslx", + "//xls/modules/rle:rle_dec_dslx", + ], +) + +xls_dslx_test( + name = "rle_block_dec_dslx_test", + library = ":rle_block_dec_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "rle_block_dec_verilog", + codegen_args = { + "module_name": "RleBlockDecoder", + "delay_model": "asap7", + "pipeline_stages": "3", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "RleBlockDecoder", + library = ":rle_block_dec_dslx", + # TODO: 2024-01-15: Workaround for https://github.com/google/xls/issues/869 + # Force proc inlining and set last internal proc as top proc for IR optimization + opt_ir_args = { + "inline_procs": "true", + "top": "__rle_block_dec__RleBlockDecoder__BatchPacker_0_next", + }, + tags = ["manual"], + verilog_file = "rle_block_dec.v", +) + +xls_benchmark_ir( + name = "rle_block_dec_opt_ir_benchmark", + src = ":rle_block_dec_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "3", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "rle_block_dec_verilog_lib", + srcs = [ + ":rle_block_dec.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "rle_block_dec_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "RleBlockDecoder", + deps = [ + ":rle_block_dec_verilog_lib", + ], +) + +benchmark_synth( + name = "rle_block_dec_benchmark_synth", + synth_target = ":rle_block_dec_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "rle_block_dec_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":rle_block_dec_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "block_header_dslx", + srcs = [ + "block_header.x", + ], + deps = [ + ":buffer_dslx", + ":common_dslx", + ], +) + +xls_dslx_test( + name = "block_header_dslx_test", + library = ":block_header_dslx", + tags = ["manual"], +) + +xls_dslx_library( + name = "dec_mux_dslx", + srcs = [ + "dec_mux.x", + ], + deps = [ + ":common_dslx", + ], +) + +xls_dslx_test( + name = "dec_mux_dslx_test", + library = ":dec_mux_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "dec_mux_verilog", + codegen_args = { + "module_name": "DecoderMux", + "delay_model": "asap7", + "pipeline_stages": "2", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "DecoderMux", + library = ":dec_mux_dslx", + tags = ["manual"], + verilog_file = "dec_mux.v", +) + +xls_benchmark_ir( + name = "dec_mux_opt_ir_benchmark", + src = ":dec_mux_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "2", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "dec_mux_verilog_lib", + srcs = [ + ":dec_mux.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "dec_mux_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "DecoderMux", + deps = [ + ":dec_mux_verilog_lib", + ], +) + +benchmark_synth( + name = "dec_mux_benchmark_synth", + synth_target = ":dec_mux_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "dec_mux_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":dec_mux_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "dec_demux_dslx", + srcs = [ + "dec_demux.x", + ], + deps = [ + ":block_header_dslx", + ":common_dslx", + ], +) + +xls_dslx_test( + name = "dec_demux_dslx_test", + library = ":dec_demux_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "dec_demux_verilog", + codegen_args = { + "module_name": "DecoderDemux", + "delay_model": "asap7", + "pipeline_stages": "2", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "DecoderDemux", + library = ":dec_demux_dslx", + tags = ["manual"], + verilog_file = "dec_demux.v", +) + +xls_benchmark_ir( + name = "dec_demux_opt_ir_benchmark", + src = ":dec_demux_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "2", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "dec_demux_verilog_lib", + srcs = [ + ":dec_demux.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "dec_demux_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "DecoderDemux", + deps = [ + ":dec_demux_verilog_lib", + ], +) + +benchmark_synth( + name = "dec_demux_benchmark_synth", + synth_target = ":dec_demux_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "dec_demux_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":dec_demux_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "5", +) + +xls_dslx_library( + name = "block_dec_dslx", + srcs = [ + "block_dec.x", + ], + deps = [ + ":common_dslx", + ":dec_demux_dslx", + ":dec_mux_dslx", + ":raw_block_dec_dslx", + ":rle_block_dec_dslx", + ], +) + +xls_dslx_test( + name = "block_dec_dslx_test", + library = ":block_dec_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "block_dec_verilog", + codegen_args = { + "module_name": "BlockDecoder", + "delay_model": "asap7", + "pipeline_stages": "2", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "BlockDecoder", + library = ":block_dec_dslx", + # TODO: 2024-01-15: Workaround for https://github.com/google/xls/issues/869 + # Force proc inlining and set last internal proc as top proc for IR optimization + opt_ir_args = { + "inline_procs": "true", + "top": "__xls_modules_zstd_dec_mux__BlockDecoder__DecoderMux_0_next", + }, + tags = ["manual"], + verilog_file = "block_dec.v", +) + +xls_benchmark_ir( + name = "block_dec_opt_ir_benchmark", + src = ":block_dec_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "2", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "block_dec_verilog_lib", + srcs = [ + ":block_dec.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "block_dec_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "BlockDecoder", + deps = [ + ":block_dec_verilog_lib", + ], +) + +benchmark_synth( + name = "block_dec_benchmark_synth", + synth_target = ":block_dec_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "block_dec_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":block_dec_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "ram_printer_dslx", + srcs = ["ram_printer.x"], + deps = [ + "//xls/examples:ram_dslx", + ], +) + +xls_dslx_test( + name = "ram_printer_dslx_test", + library = ":ram_printer_dslx", + tags = ["manual"], +) + +xls_dslx_library( + name = "sequence_executor_dslx", + srcs = [ + "sequence_executor.x", + ], + deps = [ + ":common_dslx", + ":ram_printer_dslx", + "//xls/examples:ram_dslx", + ], +) + +xls_dslx_test( + name = "sequence_executor_dslx_test", + dslx_test_args = { + "compare": "none", + }, + library = ":sequence_executor_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "sequence_executor_verilog", + codegen_args = { + "module_name": "sequence_executor", + "generator": "pipeline", + "delay_model": "asap7", + "ram_configurations": ",".join([ + "{ram_name}:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( + latency = 5, + ram_name = "ram{}".format(num), + rd_req = "sequence_executor__rd_req_m{}_s".format(num), + rd_resp = "sequence_executor__rd_resp_m{}_r".format(num), + wr_req = "sequence_executor__wr_req_m{}_s".format(num), + wr_resp = "sequence_executor__wr_resp_m{}_r".format(num), + ) + for num in range(7) + ]), + "pipeline_stages": "8", + "reset": "rst", + "reset_data_path": "true", + "reset_active_low": "false", + "reset_asynchronous": "true", + "flop_inputs": "false", + "flop_single_value_channels": "false", + "flop_outputs": "false", + "worst_case_throughput": "1", + "use_system_verilog": "false", + }, + dslx_top = "SequenceExecutorZstd", + library = ":sequence_executor_dslx", + opt_ir_args = { + "inline_procs": "true", + "top": "__sequence_executor__SequenceExecutorZstd__SequenceExecutor_0__64_0_0_0_13_8192_65536_next", + }, + tags = ["manual"], + verilog_file = "sequence_executor.v", +) + +xls_benchmark_ir( + name = "sequence_executor_ir_benchmark", + src = ":sequence_executor_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "8", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +xls_benchmark_verilog( + name = "sequence_executor_verilog_benchmark", + tags = ["manual"], + verilog_target = "sequence_executor_verilog", +) + +verilog_library( + name = "sequence_executor_lib", + srcs = [ + ":sequence_executor.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "sequence_executor_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "sequence_executor", + deps = [ + ":sequence_executor_lib", + ], +) + +benchmark_synth( + name = "sequence_executor_benchmark_synth", + synth_target = ":sequence_executor_asap7", + tags = ["manual"], +) + +place_and_route( + name = "sequence_executor_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":sequence_executor_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "repacketizer_dslx", + srcs = [ + "repacketizer.x", + ], + deps = [ + ":common_dslx", + ], +) + +xls_dslx_test( + name = "repacketizer_dslx_test", + library = ":repacketizer_dslx", + tags = ["manual"], +) + +xls_dslx_verilog( + name = "repacketizer_verilog", + codegen_args = { + "module_name": "Repacketizer", + "delay_model": "asap7", + "pipeline_stages": "2", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "Repacketizer", + library = ":repacketizer_dslx", + tags = ["manual"], + verilog_file = "repacketizer.v", +) + +xls_benchmark_ir( + name = "repacketizer_opt_ir_benchmark", + src = ":repacketizer_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "2", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "repacketizer_verilog_lib", + srcs = [ + ":repacketizer.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "repacketizer_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "Repacketizer", + deps = [ + ":repacketizer_verilog_lib", + ], +) + +benchmark_synth( + name = "repacketizer_benchmark_synth", + synth_target = ":repacketizer_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "repacketizer_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":repacketizer_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "zstd_dec_dslx", + srcs = [ + "zstd_dec.x", + ], + deps = [ + ":block_dec_dslx", + ":block_header_dslx", + ":buffer_dslx", + ":common_dslx", + ":frame_header_dslx", + ":frame_header_test_dslx", + ":magic_dslx", + ":ram_printer_dslx", + ":repacketizer_dslx", + ":sequence_executor_dslx", + "//xls/examples:ram_dslx", + ], +) + +xls_dslx_verilog( + name = "zstd_dec_verilog", + codegen_args = { + "module_name": "ZstdDecoder", + "generator": "pipeline", + "delay_model": "asap7", + "ram_configurations": ",".join([ + "{ram_name}:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( + latency = 5, + ram_name = "ram{}".format(num), + rd_req = "zstd_dec__ram_rd_req_{}_s".format(num), + rd_resp = "zstd_dec__ram_rd_resp_{}_r".format(num), + wr_req = "zstd_dec__ram_wr_req_{}_s".format(num), + wr_resp = "zstd_dec__ram_wr_resp_{}_r".format(num), + ) + for num in range(7) + ]), + "pipeline_stages": "10", + "reset": "rst", + "reset_data_path": "true", + "reset_active_low": "false", + "reset_asynchronous": "true", + "flop_inputs": "false", + "flop_single_value_channels": "false", + "flop_outputs": "false", + "worst_case_throughput": "1", + "use_system_verilog": "false", + }, + dslx_top = "ZstdDecoder", + library = ":zstd_dec_dslx", + # TODO: 2024-01-15: Workaround for https://github.com/google/xls/issues/869 + # Force proc inlining for IR optimization + opt_ir_args = { + "inline_procs": "true", + }, + tags = ["manual"], + verilog_file = "zstd_dec.v", +) + +xls_dslx_ir( + name = "zstd_dec_test_ir", + dslx_top = "ZstdDecoderTest", + ir_file = "zstd_dec_test.ir", + library = ":zstd_dec_dslx", + tags = ["manual"], +) + +cc_test( + name = "zstd_dec_cc_test", + size = "large", + srcs = [ + "zstd_dec_test.cc", + ], + data = [ + ":zstd_dec_test.ir", + ], + shard_count = 50, + tags = ["manual"], + deps = [ + ":data_generator", + "//xls/common:xls_gunit_main", + "//xls/common/file:filesystem", + "//xls/common/file:get_runfile_path", + "//xls/common/status:matchers", + "//xls/interpreter:channel_queue", + "//xls/interpreter:serial_proc_runtime", + "//xls/ir", + "//xls/ir:bits", + "//xls/ir:channel", + "//xls/ir:events", + "//xls/ir:ir_parser", + "//xls/ir:value", + "//xls/jit:jit_proc_runtime", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest", + "@zstd", + ], +) + +xls_benchmark_ir( + name = "zstd_dec_opt_ir_benchmark", + src = ":zstd_dec_verilog.opt.ir", + benchmark_ir_args = { + #TODO: rewrite ram in opt_ir step to perform valid IR benchmark + "pipeline_stages": "1", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +verilog_library( + name = "zstd_dec_verilog_lib", + srcs = [ + ":zstd_dec.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "zstd_dec_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "ZstdDecoder", + deps = [ + ":zstd_dec_verilog_lib", + ], +) + +benchmark_synth( + name = "zstd_dec_benchmark_synth", + synth_target = ":zstd_dec_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "zstd_dec_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":zstd_dec_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) diff --git a/xls/modules/zstd/README.md b/xls/modules/zstd/README.md new file mode 100644 index 0000000000..eff9097fbe --- /dev/null +++ b/xls/modules/zstd/README.md @@ -0,0 +1,383 @@ +# ZSTD decoder + +The ZSTD decoder decompresses the correctly formed ZSTD frames and blocks. +It implements the [RFC 8878](https://www.rfc-editor.org/rfc/rfc8878.html) decompression algorithm. +Overview of the decoder architecture is presented on the diagram below. +The decoder comprises: +* frame decoder, +* block dispatcher, +* 3 types of processing units: RAW, RLE, and compressed, +* command aggregator, +* history buffer, +* repacketizer. + +Incoming ZSTD frames are processed in the following order: +1. magic number is detected, +2. frame header is parsed, +3. ZSTD data blocks are being redirected to correct processing unit based on the block header, +4. processing unit results are aggregated in correct order into a stream +and routed to the history buffer, +5. data block outputs are assembled based on the history buffer contents and update history, +6. decoded data is processed by repacketizer in order to prepare the final output of the decoder, +7. (optional) calculated checksum is compared against frame checksum. + +![](img/ZSTD_decoder.png) + +## ZSTD decoder architecture + +### Top level Proc +This state machine is responsible for receiving encoded ZSTD frames, buffering the input and passing it to decoder's internal components based on the state of the proc. +The states defined for the processing of ZSTD frame are as follows: + +```mermaid +stateDiagram + direction LR + + [*] --> DECODE_MAGIC_NUMBER + + DECODE_MAGIC_NUMBER --> DECODE_MAGIC_NUMBER: Not enough data + DECODE_MAGIC_NUMBER --> DECODE_FRAME_HEADER: Got magic number + DECODE_MAGIC_NUMBER --> ERROR: Corrupted + + DECODE_FRAME_HEADER --> DECODE_FRAME_HEADER: Not enough data + DECODE_FRAME_HEADER --> DECODE_BLOCK_HEADER: Header decoded + DECODE_FRAME_HEADER --> ERROR: Unsupported window size + DECODE_FRAME_HEADER --> ERROR: Corrupted + + DECODE_BLOCK_HEADER --> DECODE_BLOCK_HEADER: Not enough data + DECODE_BLOCK_HEADER --> FEED_BLOCK_DECODER: Feed raw data + DECODE_BLOCK_HEADER --> FEED_BLOCK_DECODER: Feed RLE data + DECODE_BLOCK_HEADER --> FEED_BLOCK_DECODER: Feed compressed data + DECODE_BLOCK_HEADER --> ERROR: Corrupted + + state if_decode_checksum <> + state if_block_done <> + + FEED_BLOCK_DECODER --> if_decode_checksum: Is the checksum available? + if_decode_checksum --> DECODE_CHECKSUM: True + if_decode_checksum --> DECODE_MAGIC_NUMBER: False + FEED_BLOCK_DECODER --> if_block_done: Is the block decoding done? + if_block_done --> DECODE_BLOCK_HEADER: Decode next block + if_block_done --> FEED_BLOCK_DECODER: Continue feeding + + DECODE_CHECKSUM --> DECODE_MAGIC_NUMBER: Frame decoded + + ERROR --> [*] +``` + +After going through initial stages of decoding magic number and frame header, decoder starts the block division process. +It decodes block headers to calculate how many bytes must be sent to the block dispatcher and when the current frame's last data block is being processed. +Knowing that, it starts feeding the block decoder with data required for decoding current block. +After transmitting all data required for current block, it loops around to the block header decoding state and when next block header is not found it decodes checksum when it was requested in frame header or finishes ZSTD frame decoding and loops around to magic number decoding. + +### ZSTD frame header decoder +This part of the design starts with detecting the ZSTD magic number. +Then it parses and decodes the frame header's content and checks the header's correctness. +If the frame header has the checksum option enabled, this will enable `DECODE_CHECKSUM` stage at the end of the frame decoding where the frame's checksum will be computed and compared with the checksum embedded at the end of the frame stream. + +### Block dispatcher (demux) +At this stage, block headers are parsed and removed from the block data stream. +Based on parse values, it directs the block data stream to either RAW, RLE or compressed block sections. +For this task it uses an 8 byte native interface: a 64-bit data bus and a 64-bit length field that contains the number of correct bits on the data bus. +It also attaches a unique block ID value to each processed data block. +The IDs are sequential starting from 0 and are reset only after receiving and processing the current frame's last data block. + +### RAW +This proc passes the received data directly to its output channel. +It preserves the block ID and attaches a tag, stating that the data contains literals and should be placed in the history buffer unchanged, to each data output. + +### RLE decoder +This proc receives a tuple (s, N), where s is an 8 bit symbol and N is an accompanying `symbol_count`. +The proc produces `N*s` repeats of the given symbol. +This step preserves the block ID and attaches the literals tag to all its outputs. + +### Compressed block decoder +This part of the design is responsible for decoding the compressed data blocks. +It ingests the bytes stream, internally translates and interprets incoming data. +Only this part of the design creates data chunks tagged both with `literals` and/or `copy`. +This step preserves the block ID. +More in depth description can be found in [Compressed block decoder architecture](#compressed-block-decoder-architecture) paragraph of this doc. + +### Commands aggregator (mux) +This stage takes the output from either RAW, RLE or Command constructor and sends it to the History buffer and command execution stage. +This stage orders streams based on the ID value assigned by the block dispatcher. +It is expected that single base decoders (RAW, RLE, compressed block decoder) will be continuously transmitting a single ID to the point of sending the `last` signal which marks the last packet of currently decoded block. +That ID can change only when mux receives the `last` signal or `last` and `last_block` signals. + +It works as a priority mux that waits for a stream with the expected ID. +It continues to read that stream until the `last` signal is set, then it switches to the next stream ID. + +The command aggregator starts by waiting for `ID = 0`, after receiving the `last` signal it expects `ID = 1` and so on. +Only when both `last` and `last_block` are set the command aggregator will wait for `ID = 0`. + +### History buffer and command execution +This stage receives data which is tagged either `literals` or `copy`. +This stage will show the following behavior, depending on the tag: +* `literals` + * Packet contents placed as newest in the history buffer, + * Packet contents copied to the decoder's output, +* `copy` + * Wait for all previous writes to be completed, + * Copy `copy_length` literals starting `offset _length` from the newest in history buffer to the decoder's output, + * Copy `copy_length` literals starting `offset _length` from the newest in history buffer to the history buffer as the newest. + +### Compressed block decoder architecture +This part of the design is responsible for processing the compressed blocks up to the `literals`/`copy` command sequence. +This sequence is then processed by the history buffer to generate expected data output. +Overview of the architecture is provided on the diagram below. +The architecture is split into 2 paths: literals path and sequence path. +Architecture is split into 3 paths: literals path, FSE encoded Huffman trees and sequence path. +Literals path uses Hufman trees to decode some types of compressed blocks: Compressed and Treeless blocks. + +![](img/ZSTD_compressed_block_decoder.png) + +#### Compressed block dispatcher +This proc parses literals section headers to calculate block compression format, Huffmman tree size (if applicable based on compression format), compressed and regenerated sizes for literals. +If compressed block format is `Compressed_Literals_Block`, dispatcher reads Huffman tree header byte from Huffman bitstream, and directs expected number of bytes to the Huffman tree decoder. +Following this step, the proc sends an appropriate number of bytes to the literals decoder dispatcher. + +After sending literals to literals decompression, it redirects the remaining bytes to the sequence parsing stages. + +#### Command Constructor +This stage takes literals length, offset length and copy length. +When `literals length` is greater than 0, it will send a request to the literals buffer to obtain `literals length` literals and then send them to the history buffer. +Then based on the offset and copy length it either creates a match command using the provided offset and match lengths, or uses repeated offset and updates the repeated offset memory. +Formed commands are sent to the Commands aggregator (mux). + +### Literals path architecture + +![](img/ZSTD_compressed_block_literals_decoder.png) + +#### Literals decoder dispatcher +This proc parses and consumes the literals section header. +Based on the received values it passes the remaining bytes to RAW/RLE/Huffman tree/Huffman code decoders. +It also controls the 4 stream operation mode [4-stream mode in RFC](https://www.rfc-editor.org/rfc/rfc8878.html#name-jump_table). + +All packets sent to the Huffman bitstream buffer will be tagged either `in_progress` or `finished`. +If the compressed literals use the 4 streams encoding, the dispatcher will send the `finished` tag 4 times, each time a fully compressed stream is sent to the bitstream buffer. + +#### RAW Literals +This stage simply passes the incoming bytes as literals to the literals buffer. + +#### RLE Literals +This stage works similarly to the [RLE stage](#rle-decoder) for RLE data blocks. + +#### Huffman bitstream buffer +This stage takes data from the literals decoder dispatcher and stores it in the buffer memory. +Once the data with the `finished` tag set is received, this stage sends a tuple containing (start, end) positions for the current bitstream to the Huffman codes decoder. +This stage receives a response from the Huffman codes decoder when decoding is done and all bits got processed. +Upon receiving this message, the buffer will reclaim free space. + +#### Huffman codes decoder +This stage receives bitstream pointers from the Huffman bitstream buffer and Huffman tree configuration from the Huffman tree builder. +It accesses the bitstream buffers memory to retrieve bitstream data in reversed byte order and runs it through an array of comparators to decode Huffman code to correct literals values. + +#### Literals buffer +This stage receives data either from RAW, RLE or Huffman decoder and stores it. +Upon receiving the literals copy command from the Command Constructor for `N` number of bytes, it provides a reply with `N` literals. + +### FSE Huffman decoder architecture + +![](img/ZSTD_compressed_block_Huffman_decoder.png) + +#### Huffman tree decoder dispatcher +This stage parses and consumes the Huffman tree description header. +Based on the value of the Huffman descriptor header, it passes the tree description to the FSE decoder or to direct weight extraction. + +#### FSE weight decoder +This stage performs multiple functions. +1. It decodes and builds the FSE distribution table. +2. It stores all remaining bitstream data. +3. After receiving the last byte, it translates the bitstream to Huffman weights using 2 interleaved FSE streams. + +#### Direct weight decoder +This stage takes the incoming bytes and translates them to the stream of Huffman tree weights. +The first byte of the transfer defines the number of symbols to be decoded. + +#### Weight aggregator +This stage receives tree weights either from the FSE decoder or the direct decoder and transfers them to Huffman tree builder. +This stage also resolves the number of bits of the final weight and the max number of bits required in the tree representation. +This stage will emit the weights and number of symbols of the same weight before the current symbol for all possible byte values. + +#### Huffman tree builder +This stage takes `max_number_of_bits` (maximal length of Huffman code) as the first value, then the number of symbols with lower weight for each possible weight (11 bytes), followed by a tuple (number of preceding symbols with the same weight, symbol's_weight). +It's expected to receive weights for all possible byte values in the correct order. +Based on this information, this stage will configure the Huffman codes decoder. + +### Sequence path architecture + +![](img/ZSTD_compressed_block_sequence_decoder.png) + +#### Sequence Header parser and dispatcher +This stage parses and consumes `Sequences_Section_Header`. +Based on the parsed data, it redirects FSE description to the FSE table decoder and triggers Literals FSE, Offset FSE or Match FSE decoder to reconfigure its values based on the FSE table decoder. +After parsing the FSE tables, this stage buffers bitstream and starts sending bytes, starting from the last one received as per ZSTD format. +Bytes are sent to all decoders at the same time. +This stage monitors and triggers sequence decoding phases starting from initialization, followed by decode and state advance. +FSE decoders send each other the number of bits they read. + +#### Literals FSE decoder +This stage reconfigures its FSE table when triggered from [sequence header parse and dispatcher](#sequence-header-parser-and-dispatcher). +It initializes its state as the first FSE decoder. +In the decode phase, this stage is the last one to decode extra raw bits from the bitstream, and the number of ingested bits is transmitted to all other decoders. +This stage is the first stage to get a new FSE state from the bitstream, and it transmits the number of bits it used. + +#### Offset FSE decoder +This stage reconfigures its FSE table when triggered from [sequence header parse and dispatcher](#sequence-header-parser-and-dispatcher). +It initializes its state as the second FSE decoder. +In the decode phase, this stage is the first one to decode extra raw bits from bitstream, and the number of ingested bits is transmitted to all other decoders. +This stage is the last decoder to update its FSE state after the decode phase, and it transmits the number of used bits to other decoders. + +#### Match FSE decoder +This stage reconfigures its FSE table when triggered from [sequence header parse and dispatcher](#sequence-header-parser-and-dispatcher). +It initializes its state as the last FSE decoder. +In the decode phase, this stage is the second one to decode extra raw bits from the bitstream, and the number of ingested bits is transmitted to all other decoders. +This stage is the second stage to update its state after the decode phase, and the number of used bits is sent to all other decoders. + +### Repacketizer +This proc is used at the end of the processing flow in the ZSTD decoder. +It gathers the output of `SequenceExecutor` proc and processes it to form final output packets of the ZSTD decoder. +Input packets coming from the `SequenceExecutor` consist of: + +* data - bit vector of constant length +* length - field describing how many bits in bit vector are valid +* last - flag which marks the last packet in currently decoded ZSTD frame. + +It is not guaranteed that all bits in data bit vectors in packets received from `SequenceExecutor` are valid as those can include padding bits which were added in previous decoding steps and now have to be removed. +Repacketizer buffers input packets, removes the padding bits and forms new packets with all bits of the bit vector valid, meaning that all bits are decoded data. +Newly formed packets are then sent out to the output of the whole ZSTD decoder. + +## Testing methodology + +Testing of the `ZSTD decoder` is carried out on two levels: + +* Decoder components +* Integrated decoder + +Each component of the decoder is tested individually in DSLX tests. +Testing on the DSLX level allows the creation of small test cases that test for both positive and negative outcomes of a given part of the design. +When need be, those test cases can be also modified by the user to better understand how the component operates. + +Tests of the integrated ZSTD decoder are written in C++. +The objective of those is to verify the functionality of the decoder as a whole. +Testing setup for the ZSTD decoder is based on comparing the simulated decoding results against the decoding of the reference library. +Currently, due to the restrictions from the ZSTD frame generator, it is possible to test only the positive cases (decoding valid ZSTD frames). + +### Failure points + +#### User-facing decoder errors + +The design will fail the tests under the following conditions: + +* Straightforward failures: + * Top Level State Machine transitions to `ERROR` state + * Simulation encounters `assert!()` or `fail!()` statements + * The decoding result from the simulation has a different size than the results from the reference library + * The decoding result from the simulation has different contents than the results from the reference library +* Caveats: + * Timeout occurred while waiting for a valid `Magic Number` to start the decoding process + * Other timeouts occurring while waiting on channel operations (To be fixed) + +Currently, all mentioned conditions lead to an eventual test failure. +Most of those cases are handled properly while some are yet to be reworked to finish faster or to provide more information about the error. +For example, in case of transitioning to the `ERROR` state, the test will timeout on channel operations waiting to read from the decoder output. +In case of waiting for a valid `Magic Number`, the decoder will transition to an `ERROR` state without registering the correct `Magic Number` on the input channel which will lead to a similar timeout. + +Those cases should be handled in a way that allows for early failure of the test. +It can be done through a Proc parameter enabled for tests that change the behavior of the logic, e.g. launching `assert!()` when the decoder enters the `ERROR` state. +Another idea is to use a special output channel for signaling internal states and errors to monitor the decoder for the errors encountered during decoding. +For example, in an invalid `Magic Number`, the test case should expect a certain type of error reported on this channel at the very beginning of the simulation. + +#### Failures in ZSTD Decoder components + +It is important to note that some of the errors (e.g. errors in magic number or frame header decoding) are easy to trigger in the integration test cases by manual modification of the generated ZSTD frames. +However, the majority of the errors require modification of the deeper parts of the raw ZSTD frame which is significantly harder. +Because of that, it is better to rely on DSLX tests for the individual components where inputs for the test cases are smaller, easier to understand and modify when needed. + +The components of the ZSTD decoder can fail on `assert!()` and `fail!()` statements or propagate specific error states to the Top Level Proc and cause it to transition to the `ERROR` state. +The following enumeration will describe how to trigger each possible ZSTD Decoder error. + +The `ERROR` state can be encountered under the following conditions when running Top Level Proc C++ tests but also in DSLX tests for the specific components: +* Corrupted data on the `Magic Number` decoding stage + * Provide data for the decoding with the first 4 bytes not being the valid `Magic Number` (0xFD2FB528) +* Corrupted data during frame header decoding + * Set the `Reserved bit` in the frame header descriptor +* Unsupported Window Size during frame header decoding + * Set `Window Size` in frame header to value greater than `max window size` calculated from current `WINDOW_LOG_MAX` (by default in Top Level Proc tests `Window Size` must be greater than `0x78000000` to trigger the error) +* Corrupted data during Block Header decoding + * Set the `Block Type` of any block in the ZSTD frame to `RESERVED` + +The `assert!()` or `fail!()` will occur in: +* Buffer + * Add data to the buffer with `buffer_append()` when it is already full or unable to fit the whole length of the data + * Fetch data from the buffer with `buffer_pop()` when it is empty or have not enough data +* DecoderDemux + * Receive more than one `raw` or `compressed` block in a single `BlockDataPacket` +* RawBlockDecoder + * Receive `BlockDataPacket` with `ID` different than the previous packet which did not have the `last` flag set +* DecoderMux + * At the beginning of the simulation or after receiving `ExtendedBlockDataPacket` with `last` and `last_block` (decoding new ZSTD frame) set receive on channels `raw_r`, `rle_r` and `cmp_r` `ExtendedBlockDataPackets` without any of those having `ID==0` + * Receive `ExtendedBlockDataPacket` with a smaller `ID` than any of the previously processed packets during the current ZSTD frame decoding +* SequenceExecutor + * Receive `SequenceExecutorPacket` with `msg_type==SEQUENCE` and `content` field with value: `0` + +There are also several `impossible cases` covered by `assert!()` and `fail!()`: + +* Frame header decoder + * `Window Descriptor` does not exist after checking that it is available in the frame header + * `Frame Content Size` does not exist after checking that it is available in the frame header + * `Dictionary ID Flag` has an illegal value + * `Frame Content Size Flag` has an illegal value +* DecoderDemux + * Data packet has a different `Block Type` than `RAW`, `RLE` or `COMPRESSED` +* SequenceExecutor + * Proc transitions to `SEQUENCE_READ` state after receiving `SequenceExecutorPacket` with `msg_type` different than `SEQUENCE` or the message was invalid +* Top Level Proc + * Block header type is different than `RAW`, `RLE`, `COMPRESSED` + * There is not enough data to feed the `BlockDecoder`, even though the previous check indicated a valid amount of data in the buffer + +### Testing against [libzstd](https://github.com/facebook/zstd) + +Design is verified by comparing decoding results to the reference library `libzstd`. +ZSTD frames used for testing are generated with [decodecorpus](https://github.com/facebook/zstd/blob/dev/tests/decodecorpus.c) utility. +The generated frame is then decoded with `libzstd`. + +#### Positive test cases + +If the results of decoding with `libzstd` are valid, the test runs the same encoded frame through the simulation of DSLX design. +The output of the simulation is gathered and compared with the results of `libzstd` in terms of its size and contents. + +Encoded ZSTD frame is generated with the function `GenerateFrame(int seed, BlockType btype)` from [data_generator](https://github.com/antmicro/xls/blob/52186-zstd-top/xls/modules/zstd/data_generator.cc) library. +This function takes as arguments the seed for the generator and enum which codes the type of blocks that should be generated in a given frame. +The available block types are: + +* RAW +* RLE +* COMPRESSED +* RANDOM + +The function returns a vector of bytes representing a valid encoded ZSTD frame. +Such generated frame can be passed to `ParseAndCompareWithZstd(std::vector frame)` which is responsible for decoding the frame, running simulation and comparing the results. + +Tests are available in the `zstd_dec_test.cc` file and can be launched with the following Bazel command: + +``` +bazel test //xls/modules/zstd:zstd_dec_cc_test +``` + +#### Negative test cases + +Currently, `decodecorpus` does not support generating ZSTD frames with subtle errors that trigger failure points provided in the ZSTD Decoder. +Because of that, it is not possible to efficiently provide valuable negative tests for the integrated ZSTD Decoder. + +The alternatives for writing negative tests include: + +* Generating a well-known valid ZSTD frame from a specific generator seed and then tweaking the raw bits in this frame to trigger the error response from the decoder +* Using [FuzzTest](https://github.com/google/fuzztest) to create multiple randomized test cases for the decoder and then compare `libzstd` decoder failure with `ZSTD Decoder` failure. + +### Known Limitations + +* **[WIP]** Bugs in the current flow cause failures in some of the test cases of decoding ZSTD frame with RLE block types +* **[WIP]** Compressed block type is not supported +* Checksum is not being verified + diff --git a/xls/modules/zstd/block_dec.x b/xls/modules/zstd/block_dec.x new file mode 100644 index 0000000000..6797484d59 --- /dev/null +++ b/xls/modules/zstd/block_dec.x @@ -0,0 +1,170 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import xls.modules.zstd.common; +import xls.modules.zstd.dec_demux as demux; +import xls.modules.zstd.raw_block_dec as raw; +import xls.modules.zstd.rle_block_dec as rle; +import xls.modules.zstd.dec_mux as mux; + +type BlockDataPacket = common::BlockDataPacket; +type BlockData = common::BlockData; +type BlockPacketLength = common::BlockPacketLength; +type ExtendedBlockDataPacket = common::ExtendedBlockDataPacket; +type CopyOrMatchContent = common::CopyOrMatchContent; +type CopyOrMatchLength = common::CopyOrMatchLength; +type SequenceExecutorPacket = common::SequenceExecutorPacket; +type SequenceExecutorMessageType = common::SequenceExecutorMessageType; + +// Proc responsible for connecting internal procs used in Block data decoding. +// It handles incoming block data packets by redirecting those to demuxer which passes those to +// block decoder procs specific for given block type. Results are then gathered by mux which +// transfers decoded data further. The connections are visualised on the following diagram: +// +// Block Decoder +// ┌───────────────────────────────────────┐ +// │ Raw Block Decoder │ +// │ ┌───────────────────┐ │ +// │ ┌─► ├┐ │ +// │ Demux │ └───────────────────┘│ Mux │ +// │┌─────┐│ Rle Block Decoder │ ┌─────┐│ +// ││ ├┘ ┌───────────────────┐└─► ││ +// ──┼► ├──► ├──► ├┼─► +// ││ ├┐ └───────────────────┘┌─► ││ +// │└─────┘│ Cmp Block Decoder │ └─────┘│ +// │ │ ┌───────────────────┐│ │ +// │ └─► ├┘ │ +// │ └───────────────────┘ │ +// └───────────────────────────────────────┘ + +proc BlockDecoder { + input_r: chan in; + output_s: chan out; + + config (input_r: chan in, output_s: chan out) { + let (demux_raw_s, demux_raw_r) = chan("demux_raw"); + let (demux_rle_s, demux_rle_r) = chan("demux_rle"); + let (demux_cmp_s, demux_cmp_r) = chan("demux_cmp"); + let (mux_raw_s, mux_raw_r) = chan("mux_raw"); + let (mux_rle_s, mux_rle_r) = chan("mux_rle"); + let (mux_cmp_s, mux_cmp_r) = chan("mux_cmp"); + + spawn demux::DecoderDemux(input_r, demux_raw_s, demux_rle_s, demux_cmp_s); + spawn raw::RawBlockDecoder(demux_raw_r, mux_raw_s); + spawn rle::RleBlockDecoder(demux_rle_r, mux_rle_s); + // TODO(lpawelcz): 2023-11-28 change to compressed block decoder proc + spawn raw::RawBlockDecoder(demux_cmp_r, mux_cmp_s); + spawn mux::DecoderMux(mux_raw_r, mux_rle_r, mux_cmp_r, output_s); + + (input_r, output_s) + } + + init { } + + next(state: ()) { } +} + +#[test_proc] +proc BlockDecoderTest { + terminator: chan out; + input_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (input_s, input_r) = chan("input"); + let (output_s, output_r) = chan("output"); + + spawn BlockDecoder(input_r, output_s); + + (terminator, input_s, output_r) + } + + next(state: ()) { + let tok = join(); + let EncodedDataBlocksPackets: BlockDataPacket[13] = [ + // RAW Block 1 byte + BlockDataPacket { id: u32:0, last: true, last_block: false, data: BlockData:0xDE000008, length: BlockPacketLength:32 }, + // RAW Block 2 bytes + BlockDataPacket { id: u32:1, last: true, last_block: false, data: BlockData:0xDEAD000010, length: BlockPacketLength:40 }, + // RAW Block 4 bytes + BlockDataPacket { id: u32:2, last: true, last_block: false, data: BlockData:0xDEADBEEF000020, length: BlockPacketLength:56 }, + // RAW Block 5 bytes (block header takes one full packet) + BlockDataPacket { id: u32:3, last: true, last_block: false, data: BlockData:0xDEADBEEFEF000028, length: BlockPacketLength:64 }, + // RAW Block 24 bytes (multi-packet block header with unaligned data in the last packet) + BlockDataPacket { id: u32:4, last: false, last_block: false, data: BlockData:0x12345678900000C0, length: BlockPacketLength:64 }, + BlockDataPacket { id: u32:4, last: false, last_block: false, data: BlockData:0x1234567890ABCDEF, length: BlockPacketLength:64 }, + BlockDataPacket { id: u32:4, last: false, last_block: false, data: BlockData:0xFEDCBA0987654321, length: BlockPacketLength:64 }, + BlockDataPacket { id: u32:4, last: true, last_block: false, data: BlockData:0xF0F0F0, length: BlockPacketLength:24 }, + + // RLE Block 1 byte + BlockDataPacket { id: u32:5, last: true, last_block: false, data: BlockData:0x6700000a, length: BlockPacketLength:32 }, + // RLE Block 2 bytes + BlockDataPacket { id: u32:6, last: true, last_block: false, data: BlockData:0x45000012, length: BlockPacketLength:32 }, + // RLE Block 4 bytes + BlockDataPacket { id: u32:7, last: true, last_block: false, data: BlockData:0x23000022, length: BlockPacketLength:32 }, + // RLE Block 8 bytes (block takes one full packet) + BlockDataPacket { id: u32:8, last: true, last_block: false, data: BlockData:0x10000042, length: BlockPacketLength:32 }, + // RLE Block 26 bytes (multi-packet block header with unaligned data in the last packet) + BlockDataPacket { id: u32:9, last: true, last_block: true, data: BlockData:0xDE0000d2, length: BlockPacketLength:32 }, + ]; + + let tok = for ((counter, block_packet), tok): ((u32, BlockDataPacket), token) in enumerate(EncodedDataBlocksPackets) { + let tok = send(tok, input_s, block_packet); + trace_fmt!("Sent #{} encoded block packet, {:#x}", counter + u32:1, block_packet); + (tok) + }(tok); + + let DecodedDataBlocksPackets: SequenceExecutorPacket[16] = [ + // RAW Block 1 byte + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDE, length: CopyOrMatchLength:8 }, + // RAW Block 2 bytes + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEAD, length: CopyOrMatchLength:16 }, + // RAW Block 4 bytes + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEADBEEF, length: CopyOrMatchLength:32 }, + // RAW Block 5 bytes (block header takes one full packet) + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEADBEEFEF, length: CopyOrMatchLength:40 }, + // RAW Block 24 bytes (multi-packet block header with unaligned data in the last packet) + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x1234567890, length: CopyOrMatchLength:40 }, + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x1234567890ABCDEF, length: CopyOrMatchLength:64 }, + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xFEDCBA0987654321, length: CopyOrMatchLength:64 }, + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xF0F0F0, length: CopyOrMatchLength:24 }, + + // RLE Block 1 byte + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x67, length: CopyOrMatchLength:8 }, + // RLE Block 2 bytes + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x4545, length: CopyOrMatchLength:16 }, + // RLE Block 4 bytes + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x23232323, length: CopyOrMatchLength:32 }, + // RLE Block 8 bytes (block takes one full packet) + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x1010101010101010, length: CopyOrMatchLength:64 }, + // RLE Block 26 bytes (multi-packet block header with unaligned data in the last packet) + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDEDEDEDEDEDEDE, length: CopyOrMatchLength:64 }, + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDEDEDEDEDEDEDE, length: CopyOrMatchLength:64 }, + SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDEDEDEDEDEDEDE, length: CopyOrMatchLength:64 }, + SequenceExecutorPacket { last: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDE, length: CopyOrMatchLength:16 }, + ]; + + let tok = for ((counter, expected_block_packet), tok): ((u32, SequenceExecutorPacket), token) in enumerate(DecodedDataBlocksPackets) { + let (tok, decoded_block_packet) = recv(tok, output_r); + trace_fmt!("Received #{} decoded block packet, data: 0x{:x}", counter + u32:1, decoded_block_packet); + trace_fmt!("Expected #{} decoded block packet, data: 0x{:x}", counter + u32:1, expected_block_packet); + assert_eq(decoded_block_packet, expected_block_packet); + (tok) + }(tok); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/block_header.x b/xls/modules/zstd/block_header.x new file mode 100644 index 0000000000..455b3295e1 --- /dev/null +++ b/xls/modules/zstd/block_header.x @@ -0,0 +1,108 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains utilities related to ZSTD Block Header parsing. +// More information about the ZSTD Block Header can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.2 + +import std; +import xls.modules.zstd.buffer as buff; +import xls.modules.zstd.common as common; + +type Buffer = buff::Buffer; +type BufferStatus = buff::BufferStatus; +type BlockType = common::BlockType; + +// Status values reported by the block header parsing function +pub enum BlockHeaderStatus: u2 { + OK = 0, + CORRUPTED = 1, + NO_ENOUGH_DATA = 2, +} + +// Structure for data obtained from decoding Block_Header +pub struct BlockHeader { + last: bool, + btype: BlockType, + size: u21, +} + +// Structure for returning results of block header parsing +pub struct BlockHeaderResult { + buffer: Buffer, + status: BlockHeaderStatus, + header: BlockHeader, +} + +// Auxiliary constant that can be used to initialize Proc's state +// with empty FrameHeader, because `zero!` cannot be used in that context +pub const ZERO_BLOCK_HEADER = zero!(); + +// Extracts Block_Header fields from 24-bit chunk of data +// that is assumed to be a valid Block_Header +pub fn extract_block_header(data:u24) -> BlockHeader { + BlockHeader { + size: data[3:24], + btype: data[1:3] as BlockType, + last: data[0:1], + } +} + +// Parses a Buffer and extracts information from a Block_Header. Returns BufferResult +// with outcome of operations on buffer and information extracted from the Block_Header. +pub fn parse_block_header(buffer: Buffer) -> BlockHeaderResult { + let (result, data) = buff::buffer_fixed_pop_checked(buffer); + + match result.status { + BufferStatus::OK => { + let block_header = extract_block_header(data); + if (block_header.btype != BlockType::RESERVED) { + BlockHeaderResult {status: BlockHeaderStatus::OK, header: block_header, buffer: result.buffer} + } else { + BlockHeaderResult {status: BlockHeaderStatus::CORRUPTED, header: zero!(), buffer: buffer} + } + }, + _ => { + trace_fmt!("parse_block_header: Not enough data to parse block header! {}", buffer.length); + BlockHeaderResult {status: BlockHeaderStatus::NO_ENOUGH_DATA, header: zero!(), buffer: buffer} + } + } +} + +#[test] +fn test_parse_block_header() { + let buffer = Buffer { content: u32:0x8001 , length: u32:24}; + let result = parse_block_header(buffer); + assert_eq(result, BlockHeaderResult { + status: BlockHeaderStatus::OK, + header: BlockHeader { last: u1:1, btype: BlockType::RAW, size: u21:0x1000 }, + buffer: Buffer { content: u32:0, length: u32:0 } + }); + + let buffer = Buffer { content: u32:0x91A2, length: u32:24}; + let result = parse_block_header(buffer); + assert_eq(result, BlockHeaderResult { + status: BlockHeaderStatus::OK, + header: BlockHeader { last: u1:0, btype: BlockType::RLE, size: u21:0x1234 }, + buffer: Buffer { content: u32:0, length: u32:0 } + }); + + let buffer = Buffer { content: u32:0x001, length: u32:16}; + let result = parse_block_header(buffer); + assert_eq(result, BlockHeaderResult { + status: BlockHeaderStatus::NO_ENOUGH_DATA, + header: zero!(), + buffer: Buffer { content: u32:0x001, length: u32:16 } + }); +} diff --git a/xls/modules/zstd/buffer.x b/xls/modules/zstd/buffer.x new file mode 100644 index 0000000000..d4f9acfa20 --- /dev/null +++ b/xls/modules/zstd/buffer.x @@ -0,0 +1,355 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains implementation of a Buffer structure that acts as +// a simple FIFO. Additionally, the file provides various functions that +// can simplify access to the stored. +// +// The utility functions containing the `_checked` suffix serve two purposes: +// they perform the actual operation and return information on whether +// the operation was successful. If you are sure that the precondition is +// always true, you can use the function with the same name but without +// the `_checked` suffix. + +import std; + +// Structure to hold the buffered data +pub struct Buffer { + content: bits[CAPACITY], + length: u32 +} + +// Status values reported by the functions operating on a Buffer +pub enum BufferStatus : u2 { + OK = 0, + NO_ENOUGH_SPACE = 1, + NO_ENOUGH_DATA = 2, +} + +// Structure for returning Buffer and BufferStatus together +pub struct BufferResult { + buffer: Buffer, + status: BufferStatus +} + +// Checks whether a `buffer` can fit `data` +pub fn buffer_can_fit(buffer: Buffer, data: bits[DSIZE]) -> bool { + buffer.length + DSIZE <= CAPACITY +} + +#[test] +fn test_buffer_can_fit() { + let buffer = Buffer { content: u32:0, length: u32:0 }; + assert_eq(buffer_can_fit(buffer, bits[0]:0), true); + assert_eq(buffer_can_fit(buffer, u16:0), true); + assert_eq(buffer_can_fit(buffer, u32:0), true); + assert_eq(buffer_can_fit(buffer, u33:0), false); + + let buffer = Buffer { content: u32:0, length: u32:16 }; + assert_eq(buffer_can_fit(buffer, bits[0]:0), true); + assert_eq(buffer_can_fit(buffer, u16:0), true); + assert_eq(buffer_can_fit(buffer, u17:0), false); + assert_eq(buffer_can_fit(buffer, u32:0), false); + + let buffer = Buffer { content: u32:0, length: u32:32 }; + assert_eq(buffer_can_fit(buffer, bits[0]:0), true); + assert_eq(buffer_can_fit(buffer, u1:0), false); + assert_eq(buffer_can_fit(buffer, u16:0), false); + assert_eq(buffer_can_fit(buffer, u32:0), false); +} + +// Checks whether a `buffer` has at least `length` amount of data +pub fn buffer_has_at_least(buffer: Buffer, length: u32) -> bool { + length <= buffer.length +} + +#[test] +fn test_buffer_has_at_least() { + let buffer = Buffer { content: u32:0, length: u32:0 }; + assert_eq(buffer_has_at_least(buffer, u32:0), true); + assert_eq(buffer_has_at_least(buffer, u32:16), false); + assert_eq(buffer_has_at_least(buffer, u32:32), false); + assert_eq(buffer_has_at_least(buffer, u32:33), false); + + let buffer = Buffer { content: u32:0, length: u32:16 }; + assert_eq(buffer_has_at_least(buffer, u32:0), true); + assert_eq(buffer_has_at_least(buffer, u32:16), true); + assert_eq(buffer_has_at_least(buffer, u32:32), false); + assert_eq(buffer_has_at_least(buffer, u32:33), false); + + let buffer = Buffer { content: u32:0, length: u32:32 }; + assert_eq(buffer_has_at_least(buffer, u32:0), true); + assert_eq(buffer_has_at_least(buffer, u32:16), true); + assert_eq(buffer_has_at_least(buffer, u32:32), true); + assert_eq(buffer_has_at_least(buffer, u32:33), false); +} + +// Returns a new buffer with `data` appended to the original `buffer`. +// It will fail if the buffer cannot fit the data. For calls that need better +// error handling, check `buffer_append_checked` +pub fn buffer_append (buffer: Buffer, data: bits[DSIZE]) -> Buffer { + if buffer_can_fit(buffer, data) == false { + trace_fmt!("Not enough space in the buffer! {} + {} <= {}", buffer.length, DSIZE, CAPACITY); + fail!("not_enough_space", buffer) + } else { + Buffer { + content: (data as bits[CAPACITY] << buffer.length) | buffer.content, + length: DSIZE + buffer.length + } + } +} + +#[test] +fn test_buffer_append() { + let buffer = Buffer { content: u32:0, length: u32:0 }; + let buffer = buffer_append(buffer, u16:0xBEEF); + assert_eq(buffer, Buffer { content: u32:0xBEEF, length: u32:16 }); + let buffer = buffer_append(buffer, u16:0xDEAD); + assert_eq(buffer, Buffer { content: u32:0xDEADBEEF, length: u32:32 }); +} + +// Returns a new buffer with the `data` appended to the original `buffer` if +// the buffer has enough space. Otherwise, it returns an unmodified buffer +// along with an error. The results are stored in the BufferResult structure. +pub fn buffer_append_checked (buffer: Buffer, data: bits[DSIZE]) -> BufferResult { + if buffer_can_fit(buffer, data) == false { + BufferResult { status: BufferStatus::NO_ENOUGH_SPACE, buffer } + } else { + BufferResult { + status: BufferStatus::OK, + buffer: buffer_append(buffer, data) + } + } +} + +#[test] +fn test_buffer_append_checked() { + let buffer = Buffer { content: u32:0, length: u32:0 }; + + let result1 = buffer_append_checked(buffer, u16:0xBEEF); + assert_eq(result1, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0xBEEF, length: u32:16 } + }); + + let result2 = buffer_append_checked(result1.buffer, u16:0xDEAD); + assert_eq(result2, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0xDEADBEEF, length: u32:32 } + }); + + let result3 = buffer_append_checked(result2.buffer, u16:0xCAFE); + assert_eq(result3, BufferResult { + status: BufferStatus::NO_ENOUGH_SPACE, + buffer: result2.buffer + }); +} + +// Returns `length` amount of data from a `buffer` and a new buffer with +// the data removed. Since the Buffer structure acts as a simple FIFO, +// it pops the data in the same order as they were added to the buffer. +// If the buffer does not have enough data to meet the specified length, +// the function will fail. For calls that need better error handling, +// check `buffer_pop_checked`. +pub fn buffer_pop(buffer: Buffer, length: u32) -> (Buffer, bits[CAPACITY]) { + if buffer_has_at_least(buffer, length) == false { + trace_fmt!("Not enough data in the buffer!"); + fail!("not_enough_data", (buffer, bits[CAPACITY]:0)) + } else { + let mask = (bits[CAPACITY]:1 << length) - bits[CAPACITY]:1; + ( + Buffer { + content: buffer.content >> length, + length: buffer.length - length + }, + buffer.content & mask + ) + } +} + +#[test] +fn test_buffer_pop() { + let buffer = Buffer { content: u32:0xDEADBEEF, length: u32:32 }; + let (buffer, data) = buffer_pop(buffer, u32:16); + assert_eq(data, u32:0xBEEF); + assert_eq(buffer, Buffer { content: u32:0xDEAD, length: u32:16 }); + let (buffer, data) = buffer_pop(buffer, u32:16); + assert_eq(data, u32:0xDEAD); + assert_eq(buffer, Buffer { content: u32:0, length: u32:0 }); +} + +// Returns `length` amount of data from a `buffer`, a new buffer with +// the data removed and a positive status, if the buffer contains enough data. +// Otherwise, it returns unmodified buffer, zeroed data field and error. +// Since the Buffer structure acts as a simple FIFO, it pops the data in +// the same order as they were added to the buffer. +// The results are stored in the BufferResult structure. +pub fn buffer_pop_checked (buffer: Buffer, length: u32) -> (BufferResult, bits[CAPACITY]) { + if buffer_has_at_least(buffer, length) == false { + ( + BufferResult { status: BufferStatus::NO_ENOUGH_DATA, buffer }, + bits[CAPACITY]:0 + ) + } else { + let (buffer_leftover, content) = buffer_pop(buffer, length); + ( + BufferResult { + status: BufferStatus::OK, + buffer: buffer_leftover + }, + content + ) + } +} + +#[test] +fn test_buffer_pop_checked() { + let buffer = Buffer { content: u32:0xDEADBEEF, length: u32:32 }; + + let (result1, data1) = buffer_pop_checked(buffer, u32:16); + assert_eq(result1, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0xDEAD, length: u32:16 } + }); + assert_eq(data1, u32:0xBEEF); + + let (result2, data2) = buffer_pop_checked(result1.buffer, u32:16); + assert_eq(result2, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0, length: u32:0 } + }); + assert_eq(data2, u32:0xDEAD); + + let (result3, data3) = buffer_pop_checked(result2.buffer, u32:16); + assert_eq(result3, BufferResult { + status: BufferStatus::NO_ENOUGH_DATA, + buffer: result2.buffer + }); + assert_eq(data3, u32:0); +} + +// Behaves like `buffer_pop` except that the length of the popped data can be +// set using a DSIZE function parameter. For calls that need better error +// handling, check `buffer_fixed_pop_checked`. +pub fn buffer_fixed_pop (buffer: Buffer) -> (Buffer, bits[DSIZE]) { + let (buffer, value) = buffer_pop(buffer, DSIZE); + (buffer, value as bits[DSIZE]) +} + +#[test] +fn test_buffer_fixed_pop() { + let buffer = Buffer { content: u32:0xDEADBEEF, length: u32:32 }; + let (buffer, data) = buffer_fixed_pop(buffer); + assert_eq(data, u16:0xBEEF); + assert_eq(buffer, Buffer { content: u32:0xDEAD, length: u32:16 }); + let (buffer, data) = buffer_fixed_pop(buffer); + assert_eq(data, u16:0xDEAD); + assert_eq(buffer, Buffer { content: u32:0, length: u32:0 }); +} + +// Behaves like `buffer_pop_checked` except that the length of the popped data +// can be set using a DSIZE function parameter. +pub fn buffer_fixed_pop_checked (buffer: Buffer) -> (BufferResult, bits[DSIZE]) { + let (result, value) = buffer_pop_checked(buffer, DSIZE); + (result, value as bits[DSIZE]) +} + +#[test] +fn test_buffer_fixed_pop_checked() { + let buffer = Buffer { content: u32:0xDEADBEEF, length: u32:32 }; + let (result1, data1) = buffer_fixed_pop_checked(buffer); + assert_eq(result1, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0xDEAD, length: u32:16 } + }); + assert_eq(data1, u16:0xBEEF); + + let (result2, data2) = buffer_fixed_pop_checked(result1.buffer); + assert_eq(result2, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0, length: u32:0 } + }); + assert_eq(data2, u16:0xDEAD); + + let (result3, data3) = buffer_fixed_pop_checked(result2.buffer); + assert_eq(result3, BufferResult { + status: BufferStatus::NO_ENOUGH_DATA, + buffer: result2.buffer + }); + assert_eq(data3, u16:0); +} + +// Returns `length` amount of data from a `buffer`. +// It will fail if the buffer has no sufficient amount of data. +// For calls that need better error handling, check `buffer_peek_checked`. +pub fn buffer_peek(buffer: Buffer, length: u32) -> bits[CAPACITY] { + if buffer_has_at_least(buffer, length) == false { + trace_fmt!("Not enough data in the buffer!"); + fail!("not_enough_data", bits[CAPACITY]:0) + } else { + let mask = (bits[CAPACITY]:1 << length) - bits[CAPACITY]:1; + buffer.content & mask + } +} + +#[test] +fn test_buffer_peek() { + let buffer = Buffer { content: u32:0xDEADBEEF, length: u32:32 }; + assert_eq(buffer_peek(buffer, u32:0), u32:0); + assert_eq(buffer_peek(buffer, u32:16), u32:0xBEEF); + assert_eq(buffer_peek(buffer, u32:32), u32:0xDEADBEEF); +} + +// Returns a new buffer with the `data` and a positive status if +// the buffer has enough data. Otherwise, it returns a zeroed-data and error. +// The results are stored in the BufferResult structure. +pub fn buffer_peek_checked (buffer: Buffer, length: u32) -> (BufferStatus, bits[CAPACITY]) { + if buffer_has_at_least(buffer, length) == false { + (BufferStatus::NO_ENOUGH_DATA, bits[CAPACITY]:0) + } else { + let mask = (bits[CAPACITY]:1 << length) - bits[CAPACITY]:1; + (BufferStatus::OK, buffer.content & mask) + } +} + +#[test] +fn test_buffer_peek_checked() { + let buffer = Buffer { content: u32:0xDEADBEEF, length: u32:32 }; + + let (status1, data1) = buffer_peek_checked(buffer, u32:0); + assert_eq(status1, BufferStatus::OK); + assert_eq(data1, u32:0); + + let (status2, data2) = buffer_peek_checked(buffer, u32:16); + assert_eq(status2, BufferStatus::OK); + assert_eq(data2, u32:0xBEEF); + + let (status3, data3) = buffer_peek_checked(buffer, u32:32); + assert_eq(status3, BufferStatus::OK); + assert_eq(data3, u32:0xDEADBEEF); + + let (status4, data4) = buffer_peek_checked(buffer, u32:64); + assert_eq(status4, BufferStatus::NO_ENOUGH_DATA); + assert_eq(data4, u32:0); +} + +// Creates a new buffer +pub fn buffer_new() -> Buffer { + Buffer { content: bits[CAPACITY]:0, length: u32:0 } +} + +#[test] +fn test_buffer_new() { + assert_eq(buffer_new(), Buffer { content: u32:0, length: u32:0 }); +} diff --git a/xls/modules/zstd/common.x b/xls/modules/zstd/common.x new file mode 100644 index 0000000000..8c6b1f1c5d --- /dev/null +++ b/xls/modules/zstd/common.x @@ -0,0 +1,67 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub const DATA_WIDTH = u32:64; +pub const MAX_ID = u32::MAX; +pub const SYMBOL_WIDTH = u32:8; +pub const BLOCK_SIZE_WIDTH = u32:21; +pub const OFFSET_WIDTH = u32:22; +pub const HISTORY_BUFFER_SIZE_KB = u32:64; +pub const BUFFER_WIDTH = u32:128; + +pub type BlockData = bits[DATA_WIDTH]; +pub type BlockPacketLength = u32; +pub type BlockSize = bits[BLOCK_SIZE_WIDTH]; +pub type CopyOrMatchContent = BlockData; +pub type CopyOrMatchLength = u64; +pub type Offset = bits[OFFSET_WIDTH]; + +pub enum BlockType : u2 { + RAW = 0, + RLE = 1, + COMPRESSED = 2, + RESERVED = 3, +} + +pub struct BlockDataPacket { + last: bool, + last_block: bool, + id: u32, + data: BlockData, + length: BlockPacketLength, +} + +pub enum SequenceExecutorMessageType : u1 { + LITERAL = 0, + SEQUENCE = 1, +} + +pub struct ExtendedBlockDataPacket { + msg_type: SequenceExecutorMessageType, + packet: BlockDataPacket, +} + +pub struct SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType, + length: CopyOrMatchLength, // Literal length or match length + content: CopyOrMatchContent, // Literal data or match offset + last: bool, // Last packet in frame +} + +// Defines output format of the ZSTD Decoder +pub struct ZstdDecodedPacket { + data: BlockData, + length: BlockPacketLength, // valid bits in data + last: bool, // Last decoded packet in frame +} diff --git a/xls/modules/zstd/data_generator.cc b/xls/modules/zstd/data_generator.cc new file mode 100644 index 0000000000..6cd2f94ce2 --- /dev/null +++ b/xls/modules/zstd/data_generator.cc @@ -0,0 +1,128 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "xls/modules/zstd/data_generator.h" + +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/time/time.h" +#include "absl/types/span.h" +#include "xls/common/file/get_runfile_path.h" +#include "xls/common/status/status_macros.h" +#include "xls/common/subprocess.h" + +namespace xls::zstd { + +static absl::StatusOr> ReadFileAsRawData( + const std::filesystem::path& path) { + std::ifstream file(path, std::ios::binary); + if (!file.is_open()) { + return absl::NotFoundError("Unable to open a test file"); + } + + std::vector raw_data((std::istreambuf_iterator(file)), + (std::istreambuf_iterator())); + return raw_data; +} + +static std::string CreateNameForGeneratedFile( + absl::Span args, std::string_view ext, + std::optional prefix) { + std::string output; + + if (prefix.has_value()) { + output += prefix.value(); + output += "_"; + } + + for (auto const& x : args) { + output += x; + } + std::erase(output, ' '); + absl::c_replace(output, '-', '_'); + absl::c_replace(output, '=', '_'); + + output += ext; + + return output; +} + +static absl::StatusOr CallDecodecorpus( + absl::Span args, + const std::optional& cwd = std::nullopt, + std::optional timeout = std::nullopt) { + XLS_ASSIGN_OR_RETURN(std::filesystem::path path, + xls::GetXlsRunfilePath("external/zstd/decodecorpus")); + + std::vector cmd = {path}; + cmd.insert(cmd.end(), args.begin(), args.end()); + return SubprocessErrorAsStatus(xls::InvokeSubprocess(cmd)); +} + +absl::StatusOr> GenerateFrameHeader(int seed, bool magic) { + std::array args; + args[0] = "-s" + std::to_string(seed); + args[1] = (magic) ? "" : "--no-magic"; + args[2] = "--frame-header-only"; + std::filesystem::path output_path = + std::filesystem::temp_directory_path() / + std::filesystem::path( + CreateNameForGeneratedFile(absl::MakeSpan(args), ".zstd", "fh")); + args[3] = "-p" + std::string(output_path); + + XLS_ASSIGN_OR_RETURN(auto result, CallDecodecorpus(args)); + auto raw_data = ReadFileAsRawData(output_path); + std::remove(output_path.c_str()); + return raw_data; +} + +absl::StatusOr> GenerateFrame(int seed, BlockType btype) { + std::vector args; + args.push_back("-s" + std::to_string(seed)); + if (btype != BlockType::RANDOM) { + args.push_back("--block-type=" + std::to_string(btype)); + } + if (btype == BlockType::RLE) { + args.push_back("--content-size"); + } + // Test payloads up to 16KB + args.push_back("--max-content-size-log=14"); + std::filesystem::path output_path = + std::filesystem::temp_directory_path() / + std::filesystem::path( + CreateNameForGeneratedFile(absl::MakeSpan(args), ".zstd", "frame")); + args.push_back("-p" + std::string(output_path)); + + XLS_ASSIGN_OR_RETURN(auto result, CallDecodecorpus(args)); + auto raw_data = ReadFileAsRawData(output_path); + std::remove(output_path.c_str()); + return raw_data; +} + +} // namespace xls::zstd diff --git a/xls/modules/zstd/data_generator.h b/xls/modules/zstd/data_generator.h new file mode 100644 index 0000000000..7ba5437bc9 --- /dev/null +++ b/xls/modules/zstd/data_generator.h @@ -0,0 +1,37 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef XLS_MODULES_ZSTD_DATA_GENERATOR_H_ +#define XLS_MODULES_ZSTD_DATA_GENERATOR_H_ + +#include +#include + +#include "absl/status/statusor.h" + +namespace xls::zstd { + +enum BlockType { + RAW, + RLE, + COMPRESSED, + RANDOM, +}; + +absl::StatusOr> GenerateFrameHeader(int seed, bool magic); +absl::StatusOr> GenerateFrame(int seed, BlockType btype); + +} // namespace xls::zstd + +#endif // XLS_MODULES_ZSTD_DATA_GENERATOR_H_ diff --git a/xls/modules/zstd/dec_demux.x b/xls/modules/zstd/dec_demux.x new file mode 100644 index 0000000000..5bcd380f91 --- /dev/null +++ b/xls/modules/zstd/dec_demux.x @@ -0,0 +1,273 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains DecoderDemux Proc, which is responsible for +// parsing Block_Header and sending the obtained data to the Raw, RLE, +// or Compressed Block decoders. + +import std; +import xls.modules.zstd.common as common; +import xls.modules.zstd.block_header as block_header; + +type BlockDataPacket = common::BlockDataPacket; + +const DATA_WIDTH = common::DATA_WIDTH; + +enum DecoderDemuxStatus : u2 { + IDLE = 0, + PASS_RAW = 1, + PASS_RLE = 2, + PASS_COMPRESSED = 3, +} + +struct DecoderDemuxState { + status: DecoderDemuxStatus, + byte_to_pass: u21, + send_data: u21, + id: u32, + last_packet: BlockDataPacket, +} + +// It's safe to assume that data contains full header and some extra data. +// Previous stage aligns block header and data, it also guarantees +// new block headers in new packets. +fn handle_idle_state(data: BlockDataPacket, state: DecoderDemuxState) + -> DecoderDemuxState { + let header = block_header::extract_block_header(data.data[0:24] as u24); + let data = BlockDataPacket { + data: data.data[24:] as bits[DATA_WIDTH], + length: data.length - u32:24, + id: state.id, + ..data + }; + match header.btype { + common::BlockType::RAW => { + DecoderDemuxState { + status: DecoderDemuxStatus::PASS_RAW, + byte_to_pass: header.size, + send_data: u21:0, + last_packet: data, + ..state + } + }, + common::BlockType::RLE => { + DecoderDemuxState { + status: DecoderDemuxStatus::PASS_RLE, + byte_to_pass: header.size, + send_data: u21:0, + last_packet: data, + ..state + } + }, + common::BlockType::COMPRESSED => { + DecoderDemuxState { + status: DecoderDemuxStatus::PASS_COMPRESSED, + byte_to_pass: header.size, + send_data: u21:0, + last_packet: data, + ..state + } + }, + _ => { + fail!("Should_never_happen", state) + } + } +} + +const ZERO_DECODER_DEMUX_STATE = zero!(); +const ZERO_DATA = zero!(); + +pub proc DecoderDemux { + input_r: chan in; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + + init {(ZERO_DECODER_DEMUX_STATE)} + + config ( + input_r: chan in, + raw_s: chan out, + rle_s: chan out, + cmp_s: chan out, + ) {( + input_r, + raw_s, + rle_s, + cmp_s + )} + + next (state: DecoderDemuxState) { + let tok = join(); + let (tok, data) = recv_if(tok, input_r, !state.last_packet.last, ZERO_DATA); + if (!state.last_packet.last) { + trace_fmt!("DecoderDemux: recv: {:#x}", data); + } else {}; + let (send_raw, send_rle, send_cmp, new_state) = match state.status { + DecoderDemuxStatus::IDLE => + (false, false, false, handle_idle_state(data, state)), + DecoderDemuxStatus::PASS_RAW => { + let new_state = DecoderDemuxState { + send_data: state.send_data + (state.last_packet.length >> 3) as u21, + last_packet: data, + ..state + }; + (true, false, false, new_state) + }, + DecoderDemuxStatus::PASS_RLE => { + let new_state = DecoderDemuxState { + send_data: state.send_data + state.byte_to_pass, + last_packet: data, + ..state + }; + (false, true, false, new_state) + }, + DecoderDemuxStatus::PASS_COMPRESSED => { + let new_state = DecoderDemuxState { + send_data: state.send_data +(state.last_packet.length >> 3) as u21, + last_packet: data, + ..state + }; + (false, false, true, new_state) + }, + _ => fail!("IDLE_STATE_IMPOSSIBLE", (false, false, false, state)) + }; + + let end_state = if (send_raw || send_rle || send_cmp) { + let max_packet_width = DATA_WIDTH; + let block_size_bits = u32:24 + (state.byte_to_pass as u32 << 3); + if (!send_rle) && ((block_size_bits <= max_packet_width) && + ((block_size_bits) != state.last_packet.length) && !state.last_packet.last) { + // Demuxer expect that blocks would be received in a separate packets, + // even if 2 block would fit entirely or even partially in a single packet. + // It is the job of top-level ZSTD decoder to split each block into at least one + // BlockDataPacket. + // For Raw and Compressed blocks it is illegal to have block of size smaller than + // max size of packet and have packet length greater than this size. + fail!("Should_never_happen", state) + } else { + state + }; + let data_to_send = BlockDataPacket {id: state.id, ..state.last_packet}; + let tok = send_if(tok, raw_s, send_raw, data_to_send); + if (send_raw) { + trace_fmt!("DecoderDemux: send_raw: {:#x}", data_to_send); + } else {}; + // RLE module expects single byte in data field + // and block length in length field. This is different from + // Raw and Compressed modules. + let rle_data = BlockDataPacket{ + data: state.last_packet.data[0:8] as bits[DATA_WIDTH], + length: state.byte_to_pass as u32, + id: state.id, + ..state.last_packet + }; + let tok = send_if(tok, rle_s, send_rle, rle_data); + if (send_rle) { + trace_fmt!("DecoderDemux: send_rle: {:#x}", rle_data); + } else {}; + let tok = send_if(tok, cmp_s, send_cmp, data_to_send); + if (send_cmp) { + trace_fmt!("DecoderDemux: send_cmp: {:#x}", data_to_send); + } else {}; + let end_state = if (new_state.send_data == new_state.byte_to_pass) { + let next_id = if (state.last_packet.last && state.last_packet.last_block) { + u32: 0 + } else { + state.id + u32:1 + }; + DecoderDemuxState { + status: DecoderDemuxStatus::IDLE, + byte_to_pass: u21:0, + send_data: u21:0, + id: next_id, + last_packet: ZERO_DATA, + } + } else { + new_state + }; + end_state + } else { + new_state + }; + + end_state + } +} + +#[test_proc] +proc DecoderDemuxTest { + terminator: chan out; + input_s: chan out; + raw_r: chan in; + rle_r: chan in; + cmp_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (input_s, input_r) = chan("input"); + + spawn DecoderDemux(input_r, raw_s, rle_s, cmp_s); + (terminator, input_s, raw_r, rle_r, cmp_r) + } + + next(state: ()) { + let tok = join(); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x11111111110000c0, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x2222222222111111, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x3333333333222222, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000333333, length: u32:24 }); + + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xAAAAAAAAAA000100, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xBBBBBBBBBBAAAAAA, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xCCCCCCCCCCBBBBBB, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000CCCCCC, length: u32:24 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0xDDDDDDDDDDDDDDDD, length: u32:64 }); + + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000FF000102, length: u32:32 }); + + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x4444444444000145, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x5555555555444444, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x6666666666555555, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x7777777777666666, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x8888888888777777, length: u32:64 }); + let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: true, data: bits[DATA_WIDTH]:0x0000000000888888, length: u32:24 }); + + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000001111111111, length: u32:40 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x2222222222111111, length: u32:64 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x3333333333222222, length: u32:64 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000333333, length: u32:24 }); + + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x000000AAAAAAAAAA, length: u32:40 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xBBBBBBBBBBAAAAAA, length: u32:64 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xCCCCCCCCCCBBBBBB, length: u32:64 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000CCCCCC, length: u32:24 }); + let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0xDDDDDDDDDDDDDDDD, length: u32:64 }); + + let (tok, data) = recv(tok, rle_r); assert_eq(data, BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0xFF, length: u32:32 }); + + let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x0000004444444444, length: u32:40 }); + let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x5555555555444444, length: u32:64 }); + let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x6666666666555555, length: u32:64 }); + let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x7777777777666666, length: u32:64 }); + let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x8888888888777777, length: u32:64 }); + let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: true, data: bits[DATA_WIDTH]:0x0000000000888888, length: u32:24 }); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/dec_mux.x b/xls/modules/zstd/dec_mux.x new file mode 100644 index 0000000000..59778ff304 --- /dev/null +++ b/xls/modules/zstd/dec_mux.x @@ -0,0 +1,494 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains the DecoderMux Proc, which collects data from +// specialized Raw, RLE, and Compressed Block decoders and re-sends them in +// the correct order. + +import std; +import xls.modules.zstd.common as common; + +type BlockDataPacket = common::BlockDataPacket; +type ExtendedBlockDataPacket = common::ExtendedBlockDataPacket; +type BlockData = common::BlockData; +type BlockPacketLength = common::BlockPacketLength; +type CopyOrMatchContent = common::CopyOrMatchContent; +type CopyOrMatchLength = common::CopyOrMatchLength; +type SequenceExecutorMessageType = common::SequenceExecutorMessageType; +type SequenceExecutorPacket = common::SequenceExecutorPacket; + +const MAX_ID = common::DATA_WIDTH; +const DATA_WIDTH = common::DATA_WIDTH; + +struct DecoderMuxState { + prev_id: u32, + prev_last: bool, + prev_last_block: bool, + prev_valid: bool, + raw_data: ExtendedBlockDataPacket, + raw_data_valid: bool, + raw_data_valid_next_frame: bool, + rle_data: ExtendedBlockDataPacket, + rle_data_valid: bool, + rle_data_valid_next_frame: bool, + compressed_data: ExtendedBlockDataPacket, + compressed_data_valid: bool, + compressed_data_valid_next_frame: bool, +} + +const ZERO_DECODER_MUX_STATE = zero!(); + +pub proc DecoderMux { + raw_r: chan in; + rle_r: chan in; + cmp_r: chan in; + output_s: chan out; + + init {( DecoderMuxState { prev_id: u32:0xFFFFFFFF, prev_last: true, prev_last_block: true, ..ZERO_DECODER_MUX_STATE } )} + + config ( + raw_r: chan in, + rle_r: chan in, + cmp_r: chan in, + output_s: chan out, + ) {(raw_r, rle_r, cmp_r, output_s)} + + next (state: DecoderMuxState) { + let tok = join(); + let (tok, raw_data, raw_data_valid) = recv_if_non_blocking( + tok, raw_r, !state.raw_data_valid && !state.raw_data_valid_next_frame, zero!()); + let state = if (raw_data_valid) { + let state = if (raw_data.packet.id <= state.prev_id && state.prev_last && state.prev_valid && !state.prev_last_block) { + // received ID the same as previous, but `last` occurred + // this might be a packet from the next frame + let raw_data_valid_next_frame = raw_data_valid; + DecoderMuxState {raw_data, raw_data_valid_next_frame, ..state} + } else { + DecoderMuxState {raw_data, raw_data_valid, ..state} + }; + state + } else { state }; + + let (tok, rle_data, rle_data_valid) = recv_if_non_blocking( + tok, rle_r, !state.rle_data_valid && !state.rle_data_valid_next_frame, zero!()); + let state = if (rle_data_valid) { + trace_fmt!("DecoderMux: received RLE data packet {:#x}", rle_data); + let state = if (rle_data.packet.id <= state.prev_id && state.prev_last && state.prev_valid && !state.prev_last_block) { + // received ID the same as previous, but `last` occurred + // this might be a packet from the next frame + let rle_data_valid_next_frame = rle_data_valid; + DecoderMuxState {rle_data, rle_data_valid_next_frame, ..state} + } else { + DecoderMuxState {rle_data, rle_data_valid, ..state} + }; + state + } else { state }; + + let (tok, compressed_data, compressed_data_valid) = recv_if_non_blocking( + tok, cmp_r, !state.compressed_data_valid && !state.compressed_data_valid_next_frame, zero!()); + let state = if (compressed_data_valid) { + trace_fmt!("DecoderMux: received compressed data packet {:#x}", compressed_data); + let state = if (compressed_data.packet.id <= state.prev_id && state.prev_last && state.prev_valid && !state.prev_last_block) { + // received ID the same as previous, but `last` occurred + // this might be a packet from the next frame + let compressed_data_valid_next_frame = compressed_data_valid; + DecoderMuxState {compressed_data, compressed_data_valid_next_frame, ..state} + } else { + DecoderMuxState {compressed_data, compressed_data_valid, ..state} + }; + state + } else { state }; + + let raw_id = if state.raw_data_valid { state.raw_data.packet.id } else { MAX_ID }; + let rle_id = if state.rle_data_valid { state.rle_data.packet.id } else { MAX_ID }; + let compressed_id = if state.compressed_data_valid { state.compressed_data.packet.id } else { MAX_ID }; + let any_valid = state.raw_data_valid || state.rle_data_valid || state.compressed_data_valid; + let all_valid = state.raw_data_valid && state.rle_data_valid && state.compressed_data_valid; + + let state = if (any_valid) { + let min_id = std::umin(std::umin(rle_id, raw_id), compressed_id); + trace_fmt!("DecoderMux: rle_id: {}, raw_id: {}, compressed_id: {}", rle_id, raw_id, compressed_id); + trace_fmt!("DecoderMux: min_id: {}", min_id); + + assert!((state.prev_id <= min_id) || !state.prev_valid || state.prev_last_block, "wrong_id"); + assert!(!state.prev_last_block || !state.prev_last || min_id == u32:0, "wrong_id_expected_0"); + assert!(state.prev_last_block || !state.prev_last || !all_valid || (min_id == (state.prev_id + u32:1)) || (min_id == state.prev_id), "id_continuity_failure"); + + let (do_send, data_to_send, state) = + if (state.raw_data_valid && + (((state.raw_data.packet.id == (state.prev_id + u32:1)) && state.prev_last) || + ((state.raw_data.packet.id == state.prev_id) && !state.prev_last))) { + assert!(!state.raw_data_valid_next_frame, "raw_packet_valid_in_current_and_next_frame"); + (true, + SequenceExecutorPacket { + msg_type: state.raw_data.msg_type, + length: state.raw_data.packet.length as CopyOrMatchLength, + content: state.raw_data.packet.data as CopyOrMatchContent, + last: state.raw_data.packet.last && state.raw_data.packet.last_block, + }, + DecoderMuxState { + raw_data_valid: false, + raw_data_valid_next_frame: if (state.raw_data.packet.last_block) {false} else {state.raw_data_valid_next_frame}, + rle_data_valid: if (state.raw_data.packet.last_block) {state.rle_data_valid_next_frame} else {state.rle_data_valid}, + rle_data_valid_next_frame: if (state.raw_data.packet.last_block) {false} else {state.rle_data_valid_next_frame}, + compressed_data_valid: if (state.raw_data.packet.last_block) {state.compressed_data_valid_next_frame} else {state.compressed_data_valid}, + compressed_data_valid_next_frame: if (state.raw_data.packet.last_block) {false} else {state.compressed_data_valid_next_frame}, + prev_valid : true, + prev_id: if (state.raw_data.packet.last_block && state.raw_data.packet.last) {u32:0xffffffff} else {state.raw_data.packet.id}, + prev_last: state.raw_data.packet.last, + prev_last_block: state.raw_data.packet.last_block, + ..state}) + } else if (state.rle_data_valid && + (((state.rle_data.packet.id == (state.prev_id + u32:1)) && state.prev_last) || + ((state.rle_data.packet.id == state.prev_id) && !state.prev_last))) { + assert!(!state.rle_data_valid_next_frame, "rle_packet_valid_in_current_and_next_frame"); + (true, + SequenceExecutorPacket { + msg_type: state.rle_data.msg_type, + length: state.rle_data.packet.length as CopyOrMatchLength, + content: state.rle_data.packet.data as CopyOrMatchContent, + last: state.rle_data.packet.last && state.rle_data.packet.last_block, + }, + DecoderMuxState { + raw_data_valid: if (state.rle_data.packet.last_block) {state.raw_data_valid_next_frame} else {state.raw_data_valid}, + raw_data_valid_next_frame: if (state.rle_data.packet.last_block) {false} else {state.raw_data_valid_next_frame}, + rle_data_valid: false, + rle_data_valid_next_frame: if (state.rle_data.packet.last_block) {false} else {state.rle_data_valid_next_frame}, + compressed_data_valid: if (state.rle_data.packet.last_block) {state.compressed_data_valid_next_frame} else {state.compressed_data_valid}, + compressed_data_valid_next_frame: if (state.rle_data.packet.last_block) {false} else {state.compressed_data_valid_next_frame}, + prev_valid : true, + prev_id: if (state.rle_data.packet.last_block && state.rle_data.packet.last) {u32:0xffffffff} else {state.rle_data.packet.id}, + prev_last: state.rle_data.packet.last, + prev_last_block: state.rle_data.packet.last_block, + ..state}) + } else if (state.compressed_data_valid && + (((state.compressed_data.packet.id == (state.prev_id + u32:1)) && state.prev_last) || + ((state.compressed_data.packet.id == state.prev_id) && !state.prev_last))) { + assert!(!state.compressed_data_valid_next_frame, "compressed_packet_valid_in_current_and_next_frame"); + (true, + SequenceExecutorPacket { + msg_type: state.compressed_data.msg_type, + length: state.compressed_data.packet.length as CopyOrMatchLength, + content: state.compressed_data.packet.data as CopyOrMatchContent, + last: state.compressed_data.packet.last && state.compressed_data.packet.last_block, + }, + DecoderMuxState { + raw_data_valid: if (state.compressed_data.packet.last_block) {state.raw_data_valid_next_frame} else {state.raw_data_valid}, + raw_data_valid_next_frame: if (state.compressed_data.packet.last_block) {false} else {state.raw_data_valid_next_frame}, + rle_data_valid: if (state.compressed_data.packet.last_block) {state.rle_data_valid_next_frame} else {state.rle_data_valid}, + rle_data_valid_next_frame: if (state.compressed_data.packet.last_block) {false} else {state.rle_data_valid_next_frame}, + compressed_data_valid: false, + compressed_data_valid_next_frame: if (state.compressed_data.packet.last_block) {false} else {state.compressed_data_valid_next_frame}, + prev_valid : true, + prev_id: if (state.compressed_data.packet.last_block && state.compressed_data.packet.last) {u32:0xffffffff} else {state.compressed_data.packet.id}, + prev_last: state.compressed_data.packet.last, + prev_last_block: state.compressed_data.packet.last_block, + ..state}) + } else { + (false, zero!(), state) + }; + + let tok = send_if(tok, output_s, do_send, data_to_send); + if (do_send) { + trace_fmt!("DecoderMux: sent {:#x}", data_to_send); + } else {()}; + state + } else { + state + }; + + state + } +} + +#[test_proc] +proc DecoderMuxTest { + terminator: chan out; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (output_s, output_r) = chan("output"); + + spawn DecoderMux(raw_r, rle_r, cmp_r, output_s); + (terminator, raw_s, rle_s, cmp_s, output_r) + } + + next(state: ()) { + let tok = join(); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x11111111, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x22222222, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: BlockData:0xAAAAAAAA, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x33333333, length: BlockPacketLength:32 }}); + let tok = send(tok, cmp_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: true, data: BlockData:0x00000000, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0xBBBBBBBB, length: BlockPacketLength:32 }}); + + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x11111111, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x22222222, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x33333333, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xAAAAAAAA, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xBBBBBBBB, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x00000000, length: CopyOrMatchLength:32 }); + + send(tok, terminator, true); + } +} + +#[test_proc] +proc DecoderMuxEmptyRawBlocksTest { + terminator: chan out; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (output_s, output_r) = chan("output"); + + spawn DecoderMux(raw_r, rle_r, cmp_r, output_s); + (terminator, raw_s, rle_s, cmp_s, output_r) + } + + next(state: ()) { + let tok = join(); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x11111111, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0x22222222, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: false, data: BlockData:0x33333333, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:4, last: bool: true, last_block: bool: true, data: BlockData:0x0, length: BlockPacketLength:0 }}); + + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x11111111, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x22222222, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0, length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x33333333, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + + send(tok, terminator, true); + } +} + +#[test_proc] +proc DecoderMuxEmptyRleBlocksTest { + terminator: chan out; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (output_s, output_r) = chan("output"); + + spawn DecoderMux(raw_r, rle_r, cmp_r, output_s); + (terminator, raw_s, rle_s, cmp_s, output_r) + } + + next(state: ()) { + let tok = join(); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x11111111, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0x22222222, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: false, data: BlockData:0x33333333, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:4, last: bool: true, last_block: bool: true, data: BlockData:0x0, length: BlockPacketLength:0 }}); + + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x11111111, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x22222222, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0, length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x33333333, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0, length: CopyOrMatchLength:0 }); + + send(tok, terminator, true); + } +} + +#[test_proc] +proc DecoderMuxEmptyBlockBetweenRegularBlocksOnTheSameInputChannelTest { + terminator: chan out; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (output_s, output_r) = chan("output"); + + spawn DecoderMux(raw_r, rle_r, cmp_r, output_s); + (terminator, raw_s, rle_s, cmp_s, output_r) + } + + next(state: ()) { + let tok = join(); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x11111111, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x22222222, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x33333333, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: false, last_block: bool: false, data: BlockData:0xAAAAAAAA, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0xBBBBBBBB, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: true, data: BlockData:0x00000000, length: BlockPacketLength:32 }}); + + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x11111111, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x22222222, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x33333333, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0, length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xAAAAAAAA, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xBBBBBBBB, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x00000000, length: CopyOrMatchLength:32 }); + + send(tok, terminator, true); + } +} + +#[test_proc] +proc DecoderMuxEmptyBlockBetweenRegularBlocksOnDifferentInputChannelsTest { + terminator: chan out; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (output_s, output_r) = chan("output"); + + spawn DecoderMux(raw_r, rle_r, cmp_r, output_s); + (terminator, raw_s, rle_s, cmp_s, output_r) + } + + next(state: ()) { + let tok = join(); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x11111111, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x22222222, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: false, last_block: bool: false, data: BlockData:0xAAAAAAAA, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x33333333, length: BlockPacketLength:32 }}); + let tok = send(tok, cmp_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: true, data: BlockData:0x00000000, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0xBBBBBBBB, length: BlockPacketLength:32 }}); + + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x11111111, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x22222222, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x33333333, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0, length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xAAAAAAAA, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xBBBBBBBB, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x00000000, length: CopyOrMatchLength:32 }); + + send(tok, terminator, true); + } +} + +#[test_proc] +proc DecoderMuxMultipleFramesTest { + terminator: chan out; + raw_s: chan out; + rle_s: chan out; + cmp_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (raw_s, raw_r) = chan("raw"); + let (rle_s, rle_r) = chan("rle"); + let (cmp_s, cmp_r) = chan("cmp"); + let (output_s, output_r) = chan("output"); + + spawn DecoderMux(raw_r, rle_r, cmp_r, output_s); + (terminator, raw_s, rle_s, cmp_s, output_r) + } + + next(state: ()) { + let tok = join(); + // Frame #1 + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x11111111, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: BlockData:0x22222222, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x33333333, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: BlockData:0xAAAAAAAA, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0xBBBBBBBB, length: BlockPacketLength:32 }}); + let tok = send(tok, cmp_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0xCCCCCCCC, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: false, data: BlockData:0xDDDDDDDD, length: BlockPacketLength:32 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:4, last: bool: true, last_block: bool: false, data: BlockData:0xEEEEEEEE, length: BlockPacketLength:32 }}); + let tok = send(tok, cmp_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:5, last: bool: true, last_block: bool: true, data: BlockData:0xFFFFFFFF, length: BlockPacketLength:32 }}); + // Frame #2 + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x44444444, length: BlockPacketLength:32 }}); + let tok = send(tok, cmp_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, cmp_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, rle_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: true, data: BlockData:0x0, length: BlockPacketLength:0 }}); + // Frame #3 + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: BlockData:0x55555555, length: BlockPacketLength:32 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:4, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:5, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:6, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:7, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:8, last: bool: true, last_block: bool: false, data: BlockData:0x0, length: BlockPacketLength:0 }}); + let tok = send(tok, raw_s, ExtendedBlockDataPacket { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { id: u32:9, last: bool: true, last_block: bool: true, data: BlockData:0x0, length: BlockPacketLength:0 }}); + + // Frame #1 + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x11111111, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x22222222, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x33333333, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xAAAAAAAA, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xBBBBBBBB, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xCCCCCCCC, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDDDDDDDD, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xEEEEEEEE, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xFFFFFFFF, length: CopyOrMatchLength:32 }); + // Frame #2 + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x44444444, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + // Frame #3 + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x55555555, length: CopyOrMatchLength:32 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + let (tok, data) = recv(tok, output_r); assert_eq(data, SequenceExecutorPacket {last: bool: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x0 , length: CopyOrMatchLength:0 }); + + send(tok, terminator, true); + } +} + diff --git a/xls/modules/zstd/frame_header.x b/xls/modules/zstd/frame_header.x new file mode 100644 index 0000000000..858d64ac53 --- /dev/null +++ b/xls/modules/zstd/frame_header.x @@ -0,0 +1,692 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains utilities related to ZSTD Frame Header parsing. +// More information about the ZSTD Frame Header can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1 + +import std; +import xls.modules.zstd.buffer as buff; + +type Buffer = buff::Buffer; +type BufferStatus = buff::BufferStatus; +type BufferResult = buff::BufferResult; + +pub type WindowSize = u64; +type FrameContentSize = u64; +type DictionaryId = u32; + +// Maximal mantissa value for calculating maximal accepted window_size +// as per https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor +const MAX_MANTISSA = WindowSize:0b111; + +// Structure for holding ZSTD Frame_Header_Descriptor data, as in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.1 +pub struct FrameHeaderDescriptor { + frame_content_size_flag: u2, + single_segment_flag: u1, + unused: u1, + reserved: u1, + content_checksum_flag: u1, + dictionary_id_flag: u2, +} + +// Structure for data obtained from decoding the Frame_Header_Descriptor +pub struct FrameHeader { + window_size: WindowSize, + frame_content_size: FrameContentSize, + dictionary_id: DictionaryId, + content_checksum_flag: u1, +} + +// Status values reported by the frame header parsing function +pub enum FrameHeaderStatus: u2 { + OK = 0, + CORRUPTED = 1, + NO_ENOUGH_DATA = 2, + UNSUPPORTED_WINDOW_SIZE = 3, +} + +// structure for returning results of parsing a frame header +pub struct FrameHeaderResult { + status: FrameHeaderStatus, + header: FrameHeader, + buffer: Buffer, +} + +// Auxiliary constant that can be used to initialize Proc's state +// with empty FrameHeader, because `zero!` cannot be used in that context +pub const ZERO_FRAME_HEADER = zero!(); +pub const FRAME_CONTENT_SIZE_NOT_PROVIDED_VALUE = FrameContentSize::MAX; + +// Extracts Frame_Header_Descriptor fields from 8-bit chunk of data +// that is assumed to be a valid Frame_Header_Descriptor +fn extract_frame_header_descriptor(data:u8) -> FrameHeaderDescriptor { + FrameHeaderDescriptor { + frame_content_size_flag: data[6:8], + single_segment_flag: data[5:6], + unused: data[4:5], + reserved: data[3:4], + content_checksum_flag: data[2:3], + dictionary_id_flag: data[0:2], + } +} + +#[test] +fn test_extract_frame_header_descriptor() { + assert_eq( + extract_frame_header_descriptor(u8:0xA4), + FrameHeaderDescriptor { + frame_content_size_flag: u2:0x2, + single_segment_flag: u1:0x1, + unused: u1:0x0, + reserved: u1:0x0, + content_checksum_flag: u1:0x1, + dictionary_id_flag: u2:0x0 + } + ); + + assert_eq( + extract_frame_header_descriptor(u8:0x0), + FrameHeaderDescriptor { + frame_content_size_flag: u2:0x0, + single_segment_flag: u1:0x0, + unused: u1:0x0, + reserved: u1:0x0, + content_checksum_flag: u1:0x0, + dictionary_id_flag: u2:0x0 + } + ); +} + +// Parses a Buffer and extracts information from the Frame_Header_Descriptor. +// The Buffer is assumed to contain a valid Frame_Header_Descriptor. The function +// returns BufferResult with the outcome of the operations on the buffer and +// information extracted from the Frame_Header_Descriptor +fn parse_frame_header_descriptor(buffer: Buffer) -> (BufferResult, FrameHeaderDescriptor) { + let (result, data) = buff::buffer_fixed_pop_checked(buffer); + match result.status { + BufferStatus::OK => { + let frame_header_desc = extract_frame_header_descriptor(data); + (result, frame_header_desc) + }, + _ => (result, zero!()) + } +} + +#[test] +fn test_parse_frame_header_descriptor() { + let buffer = Buffer { content: u32:0xA4, length: u32:8 }; + let (result, header) = parse_frame_header_descriptor(buffer); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0, length: u32:0 }, + }); + assert_eq(header, FrameHeaderDescriptor { + frame_content_size_flag: u2:0x2, + single_segment_flag: u1:0x1, + unused: u1:0x0, + reserved: u1:0x0, + content_checksum_flag: u1:0x1, + dictionary_id_flag: u2:0x0 + }); + + let buffer = Buffer { content: u32:0x0, length: u32:8 }; + let (result, header) = parse_frame_header_descriptor(buffer); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0, length: u32:0 }, + }); + assert_eq(header, FrameHeaderDescriptor { + frame_content_size_flag: u2:0x0, + single_segment_flag: u1:0x0, + unused: u1:0x0, + reserved: u1:0x0, + content_checksum_flag: u1:0x0, + dictionary_id_flag: u2:0x0 + }); + + let buffer = Buffer { content: u32:0x0, length: u32:0 }; + let (result, header) = parse_frame_header_descriptor(buffer); + assert_eq(result, BufferResult { + status: BufferStatus::NO_ENOUGH_DATA, + buffer: Buffer { content: u32:0, length: u32:0 }, + }); + assert_eq(header, zero!()); +} + +// Returns a boolean showing if the Window_Descriptor section exists +// for the frame with the given FrameHeaderDescriptor +fn window_descriptor_exists(desc: FrameHeaderDescriptor) -> bool { + desc.single_segment_flag == u1:0 +} + +#[test] +fn test_window_descriptor_exists() { + let zero_desc = zero!(); + + let desc_with_ss = FrameHeaderDescriptor {single_segment_flag: u1:1, ..zero_desc}; + assert_eq(window_descriptor_exists(desc_with_ss), false); + + let desc_without_ss = FrameHeaderDescriptor {single_segment_flag: u1:0, ..zero_desc}; + assert_eq(window_descriptor_exists(desc_without_ss), true); +} + +// Extracts window size from 8-bit chunk of data +// that is assumed to be a valid Window_Descriptor +fn extract_window_size_from_window_descriptor(data: u8) -> u64 { + let exponent = data >> u8:3; + let mantissa = data & u8:7; + + let window_base = u64:1 << (u64:10 + exponent as u64); + let window_add = (window_base >> u64:3) * (mantissa as u64); + + window_base + window_add +} + +#[test] +fn test_extract_window_size_from_window_descriptor() { + assert_eq(extract_window_size_from_window_descriptor(u8:0x0), u64:0x400); + assert_eq(extract_window_size_from_window_descriptor(u8:0x9), u64:0x900); + assert_eq(extract_window_size_from_window_descriptor(u8:0xFF), u64:0x3c000000000); +} + +// Parses a Buffer with data and extracts information from the Window_Descriptor +// The buffer is assumed to contain a valid Window_Descriptor that is related to +// the same frame as the provided FrameHeaderDescriptor. The function returns +// BufferResult with the outcome of the operations on the buffer and window size. +fn parse_window_descriptor(buffer: Buffer, desc: FrameHeaderDescriptor) -> (BufferResult, WindowSize) { + assert!(window_descriptor_exists(desc), "window_descriptor_does_not_exist"); + + let (result, data) = buff::buffer_fixed_pop_checked(buffer); + match result.status { + BufferStatus::OK => { + let window_size = extract_window_size_from_window_descriptor(data); + (result, window_size) + }, + _ => (result, u64:0) + } +} + +#[test] +fn test_parse_window_descriptor() { + let zero_desc = zero!(); + let desc_without_ss = FrameHeaderDescriptor {single_segment_flag: u1:0, ..zero_desc}; + + let buffer = Buffer { content: u32:0xF, length: u32:0x4 }; + let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); + assert_eq(result, BufferResult { + status: BufferStatus::NO_ENOUGH_DATA, + buffer: Buffer { content: u32:0xF, length: u32:0x4 }, + }); + assert_eq(window_size, u64:0); + + let buffer = Buffer { content: u32:0x0, length: u32:0x8 }; + let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0 }, + }); + assert_eq(window_size, u64:0x400); + + let buffer = Buffer { content: u32:0x9, length: u32:0x8 }; + let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0 }, + }); + assert_eq(window_size, u64:0x900); + + let buffer = Buffer { content: u32:0xFF, length: u32:0x8 }; + let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0 }, + }); + assert_eq(window_size, u64:0x3c000000000); +} + +// Parses a Buffer with data and extracts information from the Dictionary_ID +// The buffer is assumed to contain a valid Dictionary_ID that is related to +// the same frame as the provided FrameHeaderDescriptor. The function returns +// BufferResult with the outcome of the operations on the buffer and dictionary ID +fn parse_dictionary_id(buffer: Buffer, desc: FrameHeaderDescriptor) -> (BufferResult, DictionaryId) { + let bytes = match desc.dictionary_id_flag { + u2:0 => u32:0, + u2:1 => u32:1, + u2:2 => u32:2, + u2:3 => u32:4, + _ => fail!("not_possible", u32:0) + }; + + let (result, data) = buff::buffer_pop_checked(buffer, bytes * u32:8); + match result.status { + BufferStatus::OK => (result, data as u32), + _ => (result, u32:0) + } +} + +#[test] +fn test_parse_dictionary_id() { + let zero_desc = zero!(); + + let buffer = Buffer { content: u32:0x0, length: u32:0x0 }; + let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0, ..zero_desc}; + let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0x0 }, + }); + assert_eq(dictionary_id, u32:0); + + let buffer = Buffer { content: u32:0x12, length: u32:0x8 }; + let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x1, ..zero_desc}; + let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0 }, + }); + assert_eq(dictionary_id, u32:0x12); + + let buffer = Buffer { content: u32:0x1234, length: u32:0x10 }; + let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x2, ..zero_desc}; + let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0 }, + }); + assert_eq(dictionary_id, u32:0x1234); + + let buffer = Buffer { content: u32:0x12345678, length: u32:0x20 }; + let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x3, ..zero_desc}; + let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u32:0x0, length: u32:0 }, + }); + assert_eq(dictionary_id, u32:0x12345678); + + let buffer = Buffer { content: u32:0x1234, length: u32:0x10 }; + let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x3, ..zero_desc}; + let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::NO_ENOUGH_DATA, + buffer: Buffer { content: u32:0x1234, length: u32:0x10 }, + }); + assert_eq(dictionary_id, u32:0x0); +} + +// Returns boolean showing if the Frame_Content_Size section exists for +// the frame with the given FrameHeaderDescriptor. +fn frame_content_size_exists(desc: FrameHeaderDescriptor) -> bool { + desc.single_segment_flag != u1:0 || desc.frame_content_size_flag != u2:0 +} + +#[test] +fn test_frame_content_size_exists() { + let zero_desc = zero!(); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:0, frame_content_size_flag: u2:0, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), false); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:0, frame_content_size_flag: u2:2, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), true); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:1, frame_content_size_flag: u2:0, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), true); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:1, frame_content_size_flag: u2:3, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), true); +} + +// Parses a Buffer with data and extracts information from the Frame_Content_Size +// The buffer is assumed to contain a valid Frame_Content_Size that is related to +// the same frame as the provided FrameHeaderDescriptor. The function returns +// BufferResult with the outcome of the operations on the buffer and frame content size. +fn parse_frame_content_size(buffer: Buffer, desc: FrameHeaderDescriptor) -> (BufferResult, FrameContentSize) { + assert!(frame_content_size_exists(desc), "frame_content_size_does_not_exist"); + + let bytes = match desc.frame_content_size_flag { + u2:0 => u32:1, + u2:1 => u32:2, + u2:2 => u32:4, + u2:3 => u32:8, + _ => fail!("not_possible", u32:0) + }; + + let (result, data) = buff::buffer_pop_checked(buffer, bytes * u32:8); + match (result.status, bytes) { + (BufferStatus::OK, u32:2) => (result, data as u64 + u64:256), + (BufferStatus::OK, _) => (result, data as u64), + (_, _) => (result, u64:0) + } +} + +#[test] +fn test_parse_frame_content_size() { + let zero_desc = zero!(); + + let buffer = Buffer { content: u64:0x12, length: u32:8 }; + let frame_header_desc = FrameHeaderDescriptor { + frame_content_size_flag: u2:0, + single_segment_flag: u1:1, + ..zero_desc + }; + let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u64:0x0, length: u32:0x0 }, + }); + assert_eq(frame_content_size, u64:0x12); + + let buffer = Buffer { content: u64:0x1234, length: u32:0x10 }; + let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:1, ..zero_desc}; + let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u64:0x0, length: u32:0x0 }, + }); + assert_eq(frame_content_size, u64:0x1234 + u64:256); + + let buffer = Buffer { content: u64:0x12345678, length: u32:0x20 }; + let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:2, ..zero_desc}; + let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u64:0x0, length: u32:0x0 }, + }); + assert_eq(frame_content_size, u64:0x12345678); + + let buffer = Buffer { content: u64:0x1234567890ABCDEF, length: u32:0x40 }; + let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:3, ..zero_desc}; + let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::OK, + buffer: Buffer { content: u64:0x0, length: u32:0x0 }, + }); + assert_eq(frame_content_size, u64:0x1234567890ABCDEF); + + let buffer = Buffer { content: u32:0x12345678, length: u32:0x20 }; + let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:0x3, ..zero_desc}; + let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); + assert_eq(result, BufferResult { + status: BufferStatus::NO_ENOUGH_DATA, + buffer: Buffer { content: u32:0x12345678, length: u32:0x20 }, + }); + assert_eq(frame_content_size, u64:0x0); +} + +// Calculate maximal accepted window_size for given WINDOW_LOG_MAX and return whether given +// window_size should be accepted or discarded. +// Based on window_size calculation from: RFC 8878 +// https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor +fn window_size_valid(window_size: WindowSize) -> bool { + let max_window_size = (WindowSize:1 << WINDOW_LOG_MAX) + (((WindowSize:1 << WINDOW_LOG_MAX) >> WindowSize:3) * MAX_MANTISSA); + + window_size <= max_window_size +} + +// Parses a Buffer with data and extracts Frame_Header information. The buffer +// is assumed to contain a valid Frame_Header The function returns FrameHeaderResult +// with BufferResult that contains outcome of the operations on the Buffer, +// FrameHeader with the extracted frame header if the parsing was successful, +// and the status of the operation in FrameHeaderStatus. On failure, the returned +// buffer is the same as the input buffer. +// WINDOW_LOG_MAX is the base 2 logarithm used for calculating the maximal allowed +// window_size. Frame header parsing function must discard all frames that +// have window_size above the maximal allowed window_size. +// CAPACITY is the buffer capacity +pub fn parse_frame_header(buffer: Buffer) -> FrameHeaderResult { + trace_fmt!("parse_frame_header: ==== Parsing ==== \n"); + trace_fmt!("parse_frame_header: initial buffer: {:#x}", buffer); + + let (result, desc) = parse_frame_header_descriptor(buffer); + trace_fmt!("parse_frame_header: buffer after parsing header descriptor: {:#x}", result.buffer); + + let (result, header) = match result.status { + BufferStatus::OK => { + let (result, window_size) = if window_descriptor_exists(desc) { + trace_fmt!("parse_frame_header: window_descriptor exists, parse it"); + parse_window_descriptor(result.buffer, desc) + } else { + trace_fmt!("parse_frame_header: window_descriptor does not exist, skip parsing it"); + (result, u64:0) + }; + trace_fmt!("parse_frame_header: buffer after parsing window_descriptor: {:#x}", result.buffer); + + match result.status { + BufferStatus::OK => { + trace_fmt!("parse_frame_header: parse dictionary_id"); + let (result, dictionary_id) = parse_dictionary_id(result.buffer, desc); + trace_fmt!("parse_frame_header: buffer after parsing dictionary_id: {:#x}", result.buffer); + + match result.status { + BufferStatus::OK => { + let (result, frame_content_size) = if frame_content_size_exists(desc) { + trace_fmt!("parse_frame_header: frame_content_size exists, parse it"); + parse_frame_content_size(result.buffer, desc) + } else { + trace_fmt!("parse_frame_header: frame_content_size does not exist, skip parsing it"); + (result, FRAME_CONTENT_SIZE_NOT_PROVIDED_VALUE) + }; + trace_fmt!("parse_frame_header: buffer after parsing frame_content_size: {:#x}", result.buffer); + + match result.status { + BufferStatus::OK => { + trace_fmt!("parse_frame_header: calculate frame header!"); + let window_size = match window_descriptor_exists(desc) { + true => window_size, + _ => frame_content_size, + }; + + ( + result, + FrameHeader { + window_size: window_size, + frame_content_size: frame_content_size, + dictionary_id: dictionary_id, + content_checksum_flag: desc.content_checksum_flag, + } + ) + }, + _ => { + trace_fmt!("parse_frame_header: Not enough data to parse frame_content_size!"); + (result, zero!()) + } + } + }, + _ => { + trace_fmt!("parse_frame_header: Not enough data to parse dictionary_id!"); + (result, zero!()) + } + } + }, + _ => { + trace_fmt!("parse_frame_header: Not enough data to parse window_descriptor!"); + (result, zero!()) + } + } + }, + _ => { + trace_fmt!("parse_frame_header: Not enough data to parse frame_header_descriptor!"); + (result, zero!()) + } + }; + + let (status, buffer) = match result.status { + BufferStatus::OK => (FrameHeaderStatus::OK, result.buffer), + _ => (FrameHeaderStatus::NO_ENOUGH_DATA, buffer) + }; + + let frame_header_result = FrameHeaderResult { status: status, header: header, buffer: buffer }; + + // libzstd always reports NO_ENOUGH_DATA errors before CORRUPTED caused by + // reserved bit being set + if (desc.reserved == u1:1 && frame_header_result.status != FrameHeaderStatus::NO_ENOUGH_DATA) { + trace_fmt!("parse_frame_header: frame descriptor corrupted!"); + // Critical failure - requires resetting the whole decoder + FrameHeaderResult { + status: FrameHeaderStatus::CORRUPTED, + buffer: zero!(), + header: zero!(), + } + } else if (!window_size_valid(header.window_size)) { + trace_fmt!("parse_frame_header: frame discarded: window_size to big: {}", header.window_size); + FrameHeaderResult { + status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, + buffer: zero!(), + header: zero!(), + } + } else { + frame_header_result + } +} + +// The largest allowed WindowLog for DSLX tests +pub const TEST_WINDOW_LOG_MAX = WindowSize:22; + +#[test] +fn test_parse_frame_header() { + // normal cases + let buffer = Buffer { content: bits[128]:0x1234567890ABCDEF_CAFE_09_C2, length: u32:96 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::OK, + buffer: Buffer { + content: bits[128]:0x0, + length: u32:0, + }, + header: FrameHeader { + window_size: u64:0x900, + frame_content_size: u64:0x1234567890ABCDEF, + dictionary_id: u32:0xCAFE, + content_checksum_flag: u1:0, + } + }); + + // SingleSegmentFlag is set and FrameContentSize is bigger than accepted window_size + let buffer = Buffer { content: bits[128]:0x1234567890ABCDEF_CAFE_E2, length: u32:88 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, + buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, + header: zero!() + }); + + let buffer = Buffer { content: bits[128]:0xaa20, length: u32:16 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::OK, + buffer: Buffer { + content: bits[128]:0x0, + length: u32:0, + }, + header: FrameHeader { + window_size: u64:0xaa, + frame_content_size: u64:0xaa, + dictionary_id: u32:0x0, + content_checksum_flag: u1:0, + }, + }); + + // when buffer is too short + let buffer = Buffer { content: bits[128]:0x0, length: u32:0 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::NO_ENOUGH_DATA, + buffer: buffer, + header: zero!() + }); + + let buffer = Buffer { content: bits[128]:0xC2, length: u32:8 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::NO_ENOUGH_DATA, + buffer: buffer, + header: zero!() + }); + + let buffer = Buffer { content: bits[128]:0x09_C2, length: u32:16 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::NO_ENOUGH_DATA, + buffer: buffer, + header: zero!() + }); + + let buffer = Buffer { content: bits[128]:0x1234_09_C2, length: u32:32 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::NO_ENOUGH_DATA, + buffer: buffer, + header: zero!() + }); + + let buffer = Buffer { content: bits[128]:0x1234_09_C2, length: u32:32 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::NO_ENOUGH_DATA, + buffer: buffer, + header: zero!() + }); + + // when frame header descriptor is corrupted + let buffer = Buffer { content: bits[128]:0x1234567890ABCDEF_1234_09_CA, length: u32:96 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::CORRUPTED, + buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, + header: zero!() + }); + + // Frame Header is discarded because Window size required by frame is too big for given decoder + // configuration + let buffer = Buffer { content: bits[128]:0xd310, length: u32:16 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, + buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, + header: zero!() + }); + + // Frame Header is discarded because Frame Content Size required by frame is too big for given decoder + // configuration + let buffer = Buffer { content: bits[128]:0xf45b5b5b0db1, length: u32:48 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, + buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, + header: FrameHeader { + window_size: u64:0x0, + frame_content_size: u64:0x0, + dictionary_id: u32:0x0, + content_checksum_flag: u1:0, + }, + }); + + // Frame Header is discarded because Frame Content Size required by frame is too big (above 64bits) for given decoder + // configuration + let buffer = Buffer { content: bits[128]:0xc0659db6813a16b33f3da53a79e4, length: u32:112 }; + let frame_header_result = parse_frame_header(buffer); + assert_eq(frame_header_result, FrameHeaderResult { + status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, + buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, + header: FrameHeader { + window_size: u64:0x0, + frame_content_size: u64:0x0, + dictionary_id: u32:0x0, + content_checksum_flag: u1:0, + }, + }); +} diff --git a/xls/modules/zstd/frame_header_test.cc b/xls/modules/zstd/frame_header_test.cc new file mode 100644 index 0000000000..300c973975 --- /dev/null +++ b/xls/modules/zstd/frame_header_test.cc @@ -0,0 +1,379 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define ZSTD_STATIC_LINKING_ONLY 1 + +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/types/span.h" +#include "external/zstd/lib/zstd.h" +#include "external/zstd/lib/zstd_errors.h" +#include "gtest/gtest.h" +#include "xls/common/file/filesystem.h" +#include "xls/common/file/get_runfile_path.h" +#include "xls/common/fuzzing/fuzztest.h" +#include "xls/common/status/matchers.h" +#include "xls/dslx/create_import_data.h" +#include "xls/dslx/import_data.h" +#include "xls/dslx/ir_convert/convert_options.h" +#include "xls/dslx/ir_convert/ir_converter.h" +#include "xls/dslx/parse_and_typecheck.h" +#include "xls/dslx/type_system/parametric_env.h" +#include "xls/ir/bits.h" +#include "xls/ir/ir_test_base.h" +#include "xls/ir/value.h" +#include "xls/modules/zstd/data_generator.h" + +namespace xls { +namespace { + +// Must be in sync with FrameHeaderStatus from +// xls/modules/zstd/frame_header.x +enum FrameHeaderStatus : uint8_t { + OK, + CORRUPTED, + NO_ENOUGH_DATA, + UNSUPPORTED_WINDOW_SIZE +}; + +class FrameHeaderTest : public xls::IrTestBase { + public: + // Prepare simulation environment + void SetUp() override { + XLS_ASSERT_OK_AND_ASSIGN(std::filesystem::path path, + xls::GetXlsRunfilePath(this->file)); + XLS_ASSERT_OK_AND_ASSIGN(std::string module_text, + xls::GetFileContents(path)); + + auto import_data = xls::dslx::CreateImportDataForTest(); + XLS_ASSERT_OK_AND_ASSIGN( + xls::dslx::TypecheckedModule checked_module, + xls::dslx::ParseAndTypecheck(module_text, this->file_name, + this->module_name, &import_data)); + + auto options = xls::dslx::ConvertOptions{}; + /* FIXME: The following code should work with a parametrized version of + * the `parse_frame_header` function. However, it seems that + * the symbolic_bindings are not correctly propagated inside + * ConvertOneFunction. To leverage the problem, a simple specialization + * of the function is used (`parse_frame_header_128`). + * Once the problem is solved, we can restore the code below. + */ + // auto symbolic_bindings = xls::dslx::ParametricEnv( + // absl::flat_hash_map{ + // {"CAPACITY", xls::dslx::InterpValue::MakeUBits(/*bit_count=*/32, + // /*value=*/32)}}); + dslx::ParametricEnv* symbolic_bindings = nullptr; + XLS_ASSERT_OK_AND_ASSIGN( + this->converted, xls::dslx::ConvertOneFunction( + checked_module.module, function_name, &import_data, + symbolic_bindings, options)); + } + + // Parse Buffer contents with ZSTD library, prepare inputs for DSLX simulation + // based on the buffer contents, form the expected output from the simulation, + // run the simulation of frame header parser and compare the results against + // expected values. + void ParseAndCompareWithZstd(absl::Span buffer) { + absl::Span input_buffer; + ZSTD_frameHeader zstd_fh; + size_t result; + std::vector buffer_extended(dslx_buffer_size_bytes, 0); + + // Extend buffer contents to 128 bits if necessary + if (buffer.size() < dslx_buffer_size_bytes) { + std::copy(buffer.begin(), buffer.end(), buffer_extended.begin()); + input_buffer = absl::MakeSpan(buffer_extended); + } else { + input_buffer = buffer; + } + + // Parse input buffer with libzstd and write it as ZSTD_frameHeader + ASSERT_TRUE(!buffer.empty() && buffer.data() != nullptr); + result = ZSTD_getFrameHeader_advanced( + &zstd_fh, buffer.data(), buffer.size(), ZSTD_f_zstd1_magicless); + + // Decide on the expected status + FrameHeaderStatus expected_status = FrameHeaderStatus::OK; + if (result != 0) { + if (ZSTD_isError(result)) { + switch (ZSTD_getErrorCode(result)) { + case ZSTD_error_frameParameter_windowTooLarge: + expected_status = FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE; + break; + case ZSTD_error_frameParameter_unsupported: + // Occurs when reserved_bit == 1, should result in CORRUPTED state + default: + // Provided data is corrupted. Unable to correctly parse ZSTD frame. + expected_status = FrameHeaderStatus::CORRUPTED; + break; + } + } else { + // Provided data is to small to correctly parse ZSTD frame, should + // have `result` bytes, got `buffer.size()` bytes. + expected_status = FrameHeaderStatus::NO_ENOUGH_DATA; + } + // Make sure that the FCS does not exceed max window buffer size + // Frame Header decoding failed - Special case - difference between the + // reference library and the decoder + } else if (!window_size_valid(zstd_fh.windowSize)) { + expected_status = FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE; + } + + auto input = CreateDSLXSimulationInput(buffer.size(), input_buffer); + absl::flat_hash_map hashed_input = {{"buffer", input}}; + + auto expected_frame_header_result = CreateExpectedFrameHeaderResult( + &zstd_fh, input, buffer, expected_status); + + RunAndExpectEq(hashed_input, expected_frame_header_result, this->converted, + true, true); + } + + const char* file = "xls/modules/zstd/frame_header_test.x"; + const char* module_name = "frame_header_test"; + const char* file_name = "frame_header_test.x"; + const char* function_name = "parse_frame_header_128"; + std::string converted; + + private: + const size_t dslx_buffer_size = 128; + const size_t dslx_buffer_size_bytes = + (dslx_buffer_size + CHAR_BIT - 1) / CHAR_BIT; + // Largest allowed WindowLog accepted by libzstd decompression function + // https://github.com/facebook/zstd/blob/v1.5.6/lib/decompress/zstd_decompress.c#L515 + // Use only in C++ tests when comparing DSLX ZSTD Decoder with libzstd + // Must be in sync with TEST_WINDOW_LOG_MAX_LIBZSTD in frame_header_test.x + const uint64_t TEST_WINDOW_LOG_MAX_LIBZSTD = 30; + + // Maximal mantissa value for calculating maximal accepted window_size + // as per https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor + const uint64_t MAX_MANTISSA = 0b111; + + // Calculate maximal accepted window_size for given WINDOW_LOG_MAX and return + // whether given window_size should be accepted or discarded. Based on + // window_size calculation from: RFC 8878 + // https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor + bool window_size_valid(uint64_t window_size) { + auto max_window_size = + (1 << TEST_WINDOW_LOG_MAX_LIBZSTD) + + (((1 << TEST_WINDOW_LOG_MAX_LIBZSTD) >> 3) * MAX_MANTISSA); + + return window_size <= max_window_size; + } + + void PrintZSTDFrameHeader(ZSTD_frameHeader* fh) { + std::cout << std::hex; + std::cout << "zstd_fh->frameContentSize: 0x" << fh->frameContentSize + << "\n"; + std::cout << "zstd_fh->windowSize: 0x" << fh->windowSize << "\n"; + std::cout << "zstd_fh->blockSizeMax: 0x" << fh->blockSizeMax << "\n"; + std::cout << "zstd_fh->frameType: 0x" << fh->frameType << "\n"; + std::cout << "zstd_fh->headerSize: 0x" << fh->headerSize << "\n"; + std::cout << "zstd_fh->dictID: 0x" << fh->dictID << "\n"; + std::cout << "zstd_fh->checksumFlag: 0x" << fh->checksumFlag << "\n"; + } + + // Form DSLX Value representing ZSTD Frame header based on data parsed with + // ZSTD library. Represents DSLX struct `FrameHeader`. + Value CreateExpectedFrameHeader(ZSTD_frameHeader* fh, + FrameHeaderStatus expected_status) { + if (expected_status == FrameHeaderStatus::CORRUPTED || + expected_status == FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE) { + return Value::Tuple({ + /*window_size:*/ Value(UBits(0, 64)), + /*frame_content_size:*/ Value(UBits(0, 64)), + /*dictionary_id:*/ Value(UBits(0, 32)), + /*content_checksum_flag: */ Value(UBits(0, 1)), + }); + } + return Value::Tuple({ + /*window_size:*/ Value(UBits(fh->windowSize, 64)), + /*frame_content_size:*/ Value(UBits(fh->frameContentSize, 64)), + /*dictionary_id:*/ Value(UBits(fh->dictID, 32)), + /*content_checksum_flag: */ Value(UBits(fh->checksumFlag, 1)), + }); + } + + // Create DSLX Value representing Buffer contents after parsing frame header + // in simulation. Represents DSLX struct `Buffer`. + Value CreateExpectedBuffer(Value dslx_simulation_input, + absl::Span input_buffer, + size_t consumed_bytes_count, + FrameHeaderStatus expected_status) { + // Return original buffer contents + if (expected_status == FrameHeaderStatus::NO_ENOUGH_DATA) { + return dslx_simulation_input; + } + // Critical failure - return empty buffer + if (expected_status == FrameHeaderStatus::CORRUPTED || + expected_status == FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE) { + return Value::Tuple({/*contents:*/ Value(UBits(0, dslx_buffer_size)), + /*length:*/ Value(UBits(0, 32))}); + } + + // Frame Header parsing succeeded. Expect output buffer contents with + // removed first `consumed_bytes_count` bytes and extended to + // dslx_buffer_size if necessary + size_t bytes_to_extend = + dslx_buffer_size_bytes - (input_buffer.size() - consumed_bytes_count); + std::vector output_buffer(input_buffer.begin() + consumed_bytes_count, + input_buffer.end()); + for (int i = 0; i < bytes_to_extend; i++) { + output_buffer.push_back(0); + } + + auto expected_buffer_contents = + Value(Bits::FromBytes(output_buffer, dslx_buffer_size)); + size_t output_buffer_size_bits = + (input_buffer.size() - consumed_bytes_count) * CHAR_BIT; + size_t expected_buffer_size = output_buffer_size_bits > dslx_buffer_size + ? dslx_buffer_size + : output_buffer_size_bits; + + return Value::Tuple({/*contents:*/ expected_buffer_contents, + /*length:*/ Value(UBits(expected_buffer_size, 32))}); + } + + // Prepare DSLX Value representing Full Result of frame header parsing + // simulation. It consists of expected status, parsing result and buffer + // contents after parsing. Represents DSLX struct `FrameHeaderResult`. + Value CreateExpectedFrameHeaderResult(ZSTD_frameHeader* fh, + Value dslx_simulation_input, + absl::Span input_buffer, + FrameHeaderStatus expected_status) { + auto expected_buffer = + CreateExpectedBuffer(std::move(dslx_simulation_input), input_buffer, + fh->headerSize, expected_status); + auto expected_frame_header = CreateExpectedFrameHeader(fh, expected_status); + return Value::Tuple({/*status:*/ Value(UBits(expected_status, 2)), + /*header:*/ expected_frame_header, + /*buffer:*/ expected_buffer}); + } + + // Return DSLX Value used as input argument for running frame header parsing + // simulation. Represents DSLX struct `Buffer`. + Value CreateDSLXSimulationInput(size_t buffer_size, + absl::Span input_buffer) { + size_t size = buffer_size; + + // ignore buffer contents that won't fit into specialized buffer + if (buffer_size > dslx_buffer_size_bytes) { + size = dslx_buffer_size_bytes; + } + + return Value::Tuple( + {/*contents:*/ Value(Bits::FromBytes(input_buffer, dslx_buffer_size)), + /*length:*/ Value(UBits(size * CHAR_BIT, 32))}); + } +}; + +/* TESTS */ + +TEST(ZstdLib, Version) { ASSERT_EQ(ZSTD_VERSION_STRING, "1.5.6"); } + +TEST_F(FrameHeaderTest, Success) { + std::vector buffer{0xC2, 0x09, 0xFE, 0xCA, 0xEF, 0xCD, + 0xAB, 0x90, 0x78, 0x56, 0x34, 0x12}; + this->ParseAndCompareWithZstd(buffer); +} + +TEST_F(FrameHeaderTest, FailCorruptedReservedBit) { + std::vector buffer{0xEA, 0xFE, 0xCA, 0xEF, 0xCD, 0xAB, + 0x90, 0x78, 0x56, 0x34, 0x12}; + this->ParseAndCompareWithZstd(buffer); +} + +TEST_F(FrameHeaderTest, FailUnsupportedWindowSizeTooBig) { + std::vector buffer{0x10, 0xD3}; + this->ParseAndCompareWithZstd(buffer); +} + +TEST_F(FrameHeaderTest, FailNoEnoughData) { + std::vector buffer{0xD3, 0xED}; + this->ParseAndCompareWithZstd(buffer); +} + +// NO_ENOUGH_DATA has priority over CORRUPTED from reserved bit +TEST_F(FrameHeaderTest, FailNoEnoughDataReservedBit) { + std::vector buffer{0xED, 0xD3}; + this->ParseAndCompareWithZstd(buffer); +} + +TEST_F(FrameHeaderTest, FailUnsupportedFrameContentSizeThroughSingleSegment) { + std::vector buffer{0261, 015, 91, 91, 91, 0364}; + this->ParseAndCompareWithZstd(buffer); +} + +TEST_F(FrameHeaderTest, + FailUnsupportedVeryLargeFrameContentSizeThroughSingleSegment) { + std::vector buffer{0344, 'y', ':', 0245, '=', '?', 0263, + 0026, ':', 0201, 0266, 0235, 'e', 0300}; + this->ParseAndCompareWithZstd(buffer); +} + +TEST_F(FrameHeaderTest, FailUnsupportedWindowSize) { + std::vector buffer{'S', 0301, 'i', 0320, 0, 0256, 'd', 'D', + 0226, 'F', 'Z', 'Z', 0332, 0370, 'A'}; + this->ParseAndCompareWithZstd(buffer); +} + +class FrameHeaderSeededTest : public FrameHeaderTest, + public ::testing::WithParamInterface { + public: + static const uint32_t random_headers_count = 50; +}; + +// Test `random_headers_count` instances of randomly generated valid +// frame headers, generated with `decodecorpus` tool. +TEST_P(FrameHeaderSeededTest, ParseMultipleFrameHeaders) { + auto seed = GetParam(); + auto frame_header = zstd::GenerateFrameHeader(seed, false); + ASSERT_TRUE(frame_header.ok()); + this->ParseAndCompareWithZstd(frame_header.value()); +} + +INSTANTIATE_TEST_SUITE_P( + FrameHeaderSeededTest, FrameHeaderSeededTest, + ::testing::Range(0, FrameHeaderSeededTest::random_headers_count)); + +class FrameHeaderFuzzTest + : public fuzztest::PerFuzzTestFixtureAdapter { + public: + void ParseMultipleRandomFrameHeaders( + const std::vector& frame_header) { + this->ParseAndCompareWithZstd(frame_header); + } +}; + +// Perform UNDETERMINISTIC FuzzTests with input vectors of variable length and +// contents. Frame Headers generated by FuzzTests can be invalid. +// This test checks if negative cases are handled correctly. +FUZZ_TEST_F(FrameHeaderFuzzTest, ParseMultipleRandomFrameHeaders) + .WithDomains(fuzztest::Arbitrary>() + .WithMinSize(1) + .WithMaxSize(16)); + +} // namespace +} // namespace xls diff --git a/xls/modules/zstd/frame_header_test.x b/xls/modules/zstd/frame_header_test.x new file mode 100644 index 0000000000..9216dfab8d --- /dev/null +++ b/xls/modules/zstd/frame_header_test.x @@ -0,0 +1,30 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; +import xls.modules.zstd.buffer as buff; +import xls.modules.zstd.frame_header as frame_header; + +type Buffer = buff::Buffer; +type FrameHeaderResult = frame_header::FrameHeaderResult; +type WindowSize = frame_header::WindowSize; + +// Largest allowed WindowLog accepted by libzstd decompression function +// https://github.com/facebook/zstd/blob/v1.4.7/lib/decompress/zstd_decompress.c#L296 +// Use only in C++ tests when comparing DSLX ZSTD Decoder with libzstd +pub const TEST_WINDOW_LOG_MAX_LIBZSTD = WindowSize:30; + +pub fn parse_frame_header_128(buffer: Buffer<128>) -> FrameHeaderResult<128> { + frame_header::parse_frame_header(buffer) +} diff --git a/xls/modules/zstd/img/ZSTD_compressed_block_Huffman_decoder.png b/xls/modules/zstd/img/ZSTD_compressed_block_Huffman_decoder.png new file mode 100644 index 0000000000..625fa98e94 Binary files /dev/null and b/xls/modules/zstd/img/ZSTD_compressed_block_Huffman_decoder.png differ diff --git a/xls/modules/zstd/img/ZSTD_compressed_block_decoder.png b/xls/modules/zstd/img/ZSTD_compressed_block_decoder.png new file mode 100644 index 0000000000..a790965f7c Binary files /dev/null and b/xls/modules/zstd/img/ZSTD_compressed_block_decoder.png differ diff --git a/xls/modules/zstd/img/ZSTD_compressed_block_literals_decoder.png b/xls/modules/zstd/img/ZSTD_compressed_block_literals_decoder.png new file mode 100644 index 0000000000..d89c12c936 Binary files /dev/null and b/xls/modules/zstd/img/ZSTD_compressed_block_literals_decoder.png differ diff --git a/xls/modules/zstd/img/ZSTD_compressed_block_sequence_decoder.png b/xls/modules/zstd/img/ZSTD_compressed_block_sequence_decoder.png new file mode 100644 index 0000000000..4c6bfb7720 Binary files /dev/null and b/xls/modules/zstd/img/ZSTD_compressed_block_sequence_decoder.png differ diff --git a/xls/modules/zstd/img/ZSTD_decoder.png b/xls/modules/zstd/img/ZSTD_decoder.png new file mode 100644 index 0000000000..f157751512 Binary files /dev/null and b/xls/modules/zstd/img/ZSTD_decoder.png differ diff --git a/xls/modules/zstd/magic.x b/xls/modules/zstd/magic.x new file mode 100644 index 0000000000..196f2f528f --- /dev/null +++ b/xls/modules/zstd/magic.x @@ -0,0 +1,89 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains utilities related to ZSTD magic number parsing +// More information about the ZSTD Magic Number can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1 + +import std; +import xls.modules.zstd.buffer as buff; + +type Buffer = buff::Buffer; +type BufferStatus = buff::BufferStatus; + +// Magic number value, as in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1 +const MAGIC_NUMBER = u32:0xFD2FB528; + +// Status values reported by the magic number parsing function +pub enum MagicStatus: u2 { + OK = 0, + CORRUPTED = 1, + NO_ENOUGH_DATA = 2, +} + +// structure for returning results of magic number parsing +pub struct MagicResult { + buffer: Buffer, + status: MagicStatus, +} + +// Parses a Buffer and checks if it contains the magic number. +// The buffer is assumed to contain a valid beginning of the ZSTD file. +// The function returns MagicResult structure with the buffer after parsing +// the magic number and the status of the operation. On failure, the returned +// buffer is the same as the input buffer. +pub fn parse_magic_number(buffer: Buffer) -> MagicResult { + let (result, data) = buff::buffer_fixed_pop_checked(buffer); + + match result.status { + BufferStatus::OK => { + if data == MAGIC_NUMBER { + trace_fmt!("parse_magic_number: Magic number found!"); + MagicResult {status: MagicStatus::OK, buffer: result.buffer} + } else { + trace_fmt!("parse_magic_number: Magic number not found!"); + MagicResult {status: MagicStatus::CORRUPTED, buffer: buffer} + } + }, + _ => { + trace_fmt!("parse_frame_header: Not enough data to parse magic number!"); + MagicResult {status: MagicStatus::NO_ENOUGH_DATA, buffer: buffer} + } + } +} + +#[test] +fn test_parse_magic_number() { + let buffer = Buffer { content: MAGIC_NUMBER, length: u32:32}; + let result = parse_magic_number(buffer); + assert_eq(result, MagicResult { + status: MagicStatus::OK, + buffer: Buffer {content: u32:0, length: u32:0}, + }); + + let buffer = Buffer { content: u32:0x12345678, length: u32:32}; + let result = parse_magic_number(buffer); + assert_eq(result, MagicResult { + status: MagicStatus::CORRUPTED, + buffer: buffer + }); + + let buffer = Buffer { content: u32:0x1234, length: u32:16}; + let result = parse_magic_number(buffer); + assert_eq(result, MagicResult { + status: MagicStatus::NO_ENOUGH_DATA, + buffer: buffer, + }); +} diff --git a/xls/modules/zstd/ram_printer.x b/xls/modules/zstd/ram_printer.x new file mode 100644 index 0000000000..d887a05bcc --- /dev/null +++ b/xls/modules/zstd/ram_printer.x @@ -0,0 +1,159 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; +import xls.examples.ram; + +enum RamPrinterStatus : u2 { + IDLE = 0, + BUSY = 1, +} + +struct RamPrinterState { status: RamPrinterStatus, addr: bits[ADDR_WIDTH] } + +proc RamPrinter +{ + print_r: chan<()> in; + finish_s: chan<()> out; + rd_req_s: chan>[NUM_MEMORIES] out; + rd_resp_r: chan>[NUM_MEMORIES] in; + + config(print_r: chan<()> in, finish_s: chan<()> out, + rd_req_s: chan>[NUM_MEMORIES] out, + rd_resp_r: chan>[NUM_MEMORIES] in) { + (print_r, finish_s, rd_req_s, rd_resp_r) + } + + init { RamPrinterState { status: RamPrinterStatus::IDLE, addr: bits[ADDR_WIDTH]:0 } } + + next(state: RamPrinterState) { + let tok = join(); + let is_idle = state.status == RamPrinterStatus::IDLE; + let (tok, _) = recv_if(tok, print_r, is_idle, ()); + + let (tok, row) = for (i, (tok, row)): (u32, (token, bits[DATA_WIDTH][NUM_MEMORIES])) in + range(u32:0, NUM_MEMORIES) { + let tok = send(tok, rd_req_s[i], ram::ReadWordReq(state.addr)); + let (tok, resp) = recv(tok, rd_resp_r[i]); + let row = update(row, i, resp.data); + (tok, row) + }((tok, bits[DATA_WIDTH][NUM_MEMORIES]:[bits[DATA_WIDTH]:0, ...])); + + let is_start = state.addr == bits[ADDR_WIDTH]:0; + let is_last = state.addr == (SIZE - u32:1) as bits[ADDR_WIDTH]; + + if is_start { trace_fmt!(" ========= RAM content ========= "); } else { }; + trace_fmt!(" {}: {:x} ", state.addr, array_rev(row)); + + let tok = send_if(tok, finish_s, is_last, ()); + + if is_last { + RamPrinterState { addr: bits[ADDR_WIDTH]:0, status: RamPrinterStatus::IDLE } + } else { + RamPrinterState { + addr: state.addr + bits[ADDR_WIDTH]:1, status: RamPrinterStatus::BUSY + } + } + } +} + +const TEST_NUM_MEMORIES = u32:8; +const TEST_SIZE = u32:10; +const TEST_DATA_WIDTH = u32:8; +const TEST_WORD_PARTITION_SIZE = u32:1; +const TEST_NUM_PARTITIONS = ram::num_partitions(TEST_WORD_PARTITION_SIZE, TEST_DATA_WIDTH); +const TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_ADDR_WIDTH = std::clog2(TEST_SIZE); +const TEST_INITIALIZED = true; + +type TestAddr = uN[TEST_ADDR_WIDTH]; +type TestData = uN[TEST_DATA_WIDTH]; + +fn TestWriteWordReq + (addr: TestAddr, data: TestData) + -> ram::WriteReq { + ram::WriteWordReq(addr, data) +} + +fn TestReadWordReq(addr: TestAddr) -> ram::ReadReq { + ram::ReadWordReq(addr) +} + +#[test_proc] +proc RamPrinterTest { + terminator: chan out; + rd_req_s: chan>[TEST_NUM_MEMORIES] out; + rd_resp_r: chan>[TEST_NUM_MEMORIES] in; + wr_req_s: chan>[TEST_NUM_MEMORIES] out; + wr_resp_r: chan[TEST_NUM_MEMORIES] in; + print_s: chan<()> out; + finish_r: chan<()> in; + + config(terminator: chan out) { + let (rd_req_s, rd_req_r) = chan>[TEST_NUM_MEMORIES]("rd_req"); + let (rd_resp_s, rd_resp_r) = chan>[TEST_NUM_MEMORIES]("rd_resp"); + let (wr_req_s, wr_req_r) = chan>[TEST_NUM_MEMORIES]("wr_req"); + let (wr_resp_s, wr_resp_r) = chan[TEST_NUM_MEMORIES]("wr_resp"); + let (print_s, print_r) = chan<()>("print"); + let (finish_s, finish_r) = chan<()>("finish"); + + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[0], rd_resp_s[0], wr_req_r[0], wr_resp_s[0]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[1], rd_resp_s[1], wr_req_r[1], wr_resp_s[1]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[2], rd_resp_s[2], wr_req_r[2], wr_resp_s[2]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[3], rd_resp_s[3], wr_req_r[3], wr_resp_s[3]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[4], rd_resp_s[4], wr_req_r[4], wr_resp_s[4]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[5], rd_resp_s[5], wr_req_r[5], wr_resp_s[5]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[6], rd_resp_s[6], wr_req_r[6], wr_resp_s[6]); + spawn ram::RamModel< + TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED>( + rd_req_r[7], rd_resp_s[7], wr_req_r[7], wr_resp_s[7]); + + spawn RamPrinter< + TEST_DATA_WIDTH, TEST_SIZE, TEST_NUM_PARTITIONS, TEST_ADDR_WIDTH, TEST_NUM_MEMORIES>( + print_r, finish_s, rd_req_s, rd_resp_r); + + (terminator, rd_req_s, rd_resp_r, wr_req_s, wr_resp_r, print_s, finish_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let tok = send(tok, wr_req_s[0], TestWriteWordReq(TestAddr:2, TestData:0x10)); + let tok = send(tok, wr_req_s[1], TestWriteWordReq(TestAddr:2, TestData:0x20)); + let tok = send(tok, wr_req_s[2], TestWriteWordReq(TestAddr:2, TestData:0x30)); + let tok = send(tok, wr_req_s[3], TestWriteWordReq(TestAddr:2, TestData:0x40)); + let tok = send(tok, wr_req_s[4], TestWriteWordReq(TestAddr:2, TestData:0x50)); + let tok = send(tok, wr_req_s[5], TestWriteWordReq(TestAddr:2, TestData:0x60)); + let tok = send(tok, wr_req_s[6], TestWriteWordReq(TestAddr:2, TestData:0x70)); + let tok = send(tok, wr_req_s[7], TestWriteWordReq(TestAddr:2, TestData:0x80)); + let tok = send(tok, print_s, ()); + let (tok, _) = recv(tok, finish_r); + let tok = send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/raw_block_dec.x b/xls/modules/zstd/raw_block_dec.x new file mode 100644 index 0000000000..a3656011b0 --- /dev/null +++ b/xls/modules/zstd/raw_block_dec.x @@ -0,0 +1,118 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains the implementation of RawBlockDecoder responsible for decoding +// ZSTD Raw Blocks. More information about Raw Block's format can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.2.2 + +import xls.modules.zstd.common as common; + +type BlockDataPacket = common::BlockDataPacket; +type BlockPacketLength = common::BlockPacketLength; +type BlockData = common::BlockData; +type ExtendedBlockDataPacket = common::ExtendedBlockDataPacket; +type CopyOrMatchContent = common::CopyOrMatchContent; +type CopyOrMatchLength = common::CopyOrMatchLength; +type SequenceExecutorMessageType = common::SequenceExecutorMessageType; + +struct RawBlockDecoderState { + prev_id: u32, // ID of the previous block + prev_last: bool, // if the previous packet was the last one that makes up the whole block + prev_valid: bool, // if prev_id and prev_last contain valid data +} + +const ZERO_RAW_BLOCK_DECODER_STATE = zero!(); + +// RawBlockDecoder is responsible for decoding Raw Blocks, +// it should be a part of the ZSTD Decoder pipeline. +pub proc RawBlockDecoder { + input_r: chan in; + output_s: chan out; + + init { (ZERO_RAW_BLOCK_DECODER_STATE) } + + config( + input_r: chan in, + output_s: chan out + ) {(input_r, output_s)} + + next(state: RawBlockDecoderState) { + let tok = join(); + let (tok, data) = recv(tok, input_r); + if state.prev_valid && (data.id != state.prev_id) && (state.prev_last == false) { + trace_fmt!("ID changed but previous packet have no last!"); + fail!("no_last", ()); + } else {}; + + let output_data = ExtendedBlockDataPacket { + // Decoded RAW block is always a literal + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: data.last, + last_block: data.last_block, + id: data.id, + data: data.data as BlockData, + length: data.length as BlockPacketLength, + }, + }; + + let tok = send(tok, output_s, output_data); + + RawBlockDecoderState { + prev_valid: true, + prev_id: output_data.packet.id, + prev_last: output_data.packet.last + } + } +} + +#[test_proc] +proc RawBlockDecoderTest { + terminator: chan out; + dec_input_s: chan out; + dec_output_r: chan in; + + config(terminator: chan out) { + let (dec_input_s, dec_input_r) = chan("dec_input"); + let (dec_output_s, dec_output_r) = chan("dec_output"); + spawn RawBlockDecoder(dec_input_r, dec_output_s); + (terminator, dec_input_s, dec_output_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let data_to_send: BlockDataPacket[5] = [ + BlockDataPacket { id: u32:1, last: u1:false, last_block: u1:false, data: BlockData:1, length: BlockPacketLength:32 }, + BlockDataPacket { id: u32:1, last: u1:false, last_block: u1:false, data: BlockData:2, length: BlockPacketLength:32 }, + BlockDataPacket { id: u32:1, last: u1:true, last_block: u1:false, data: BlockData:3, length: BlockPacketLength:32 }, + BlockDataPacket { id: u32:2, last: u1:false, last_block: u1:false, data: BlockData:4, length: BlockPacketLength:32 }, + BlockDataPacket { id: u32:2, last: u1:true, last_block: u1:true, data: BlockData:5, length: BlockPacketLength:32 }, + ]; + + let tok = for ((_, data), tok): ((u32, BlockDataPacket), token) in enumerate(data_to_send) { + let tok = send(tok, dec_input_s, data); + let (tok, received_data) = recv(tok, dec_output_r); + let expected_data = ExtendedBlockDataPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: data, + }; + assert_eq(expected_data, received_data); + (tok) + }(tok); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/repacketizer.x b/xls/modules/zstd/repacketizer.x new file mode 100644 index 0000000000..3123cd67d0 --- /dev/null +++ b/xls/modules/zstd/repacketizer.x @@ -0,0 +1,215 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Repacketizer +// +// Remove invalid bytes from input packets, +// form new packets with all bits valid if possible. + +import std; +import xls.modules.zstd.common as common; + +type ZstdDecodedPacket = common::ZstdDecodedPacket; +type BlockData = common::BlockData; +type BlockPacketLength = common::BlockPacketLength; + +const DATA_WIDTH = common::DATA_WIDTH; + +struct RepacketizerState { + repacked_data: BlockData, + valid_length: BlockPacketLength, + to_fill: BlockPacketLength, + send_last_leftover: bool +} + +const ZERO_ZSTD_DECODED_PACKET = zero!(); +const ZERO_REPACKETIZER_STATE = zero!(); +const INIT_REPACKETIZER_STATE = RepacketizerState {to_fill: DATA_WIDTH, ..ZERO_REPACKETIZER_STATE}; + +proc Repacketizer { + input_r: chan in; + output_s: chan out; + + init {(INIT_REPACKETIZER_STATE)} + + config ( + input_r: chan in, + output_s: chan out, + ) { + (input_r, output_s) + } + + next (state: RepacketizerState) { + let tok = join(); + // Don't receive if we process leftovers + let (tok, decoded_packet) = recv_if(tok, input_r, !state.send_last_leftover, ZERO_ZSTD_DECODED_PACKET); + + // Will be able to send repacketized packet in current next() evaluation + let send_now = state.to_fill <= decoded_packet.length || decoded_packet.last || state.send_last_leftover; + // Received last packet in frame which won't fit into currently processed repacketized packet. + // Set flag indicating that Repacketizer will send another packet to finish the frame in + // next evaluation. + let next_send_last_leftover = decoded_packet.last && state.to_fill < decoded_packet.length; + + let combined_length = state.valid_length + decoded_packet.length; + let leftover_length = (combined_length - DATA_WIDTH) as s32; + let next_valid_length = if leftover_length >= s32:0 {leftover_length as BlockPacketLength} else {combined_length}; + let next_to_fill = DATA_WIDTH - next_valid_length; + + let current_valid_length = if leftover_length >= s32:0 {DATA_WIDTH} else {combined_length}; + let bits_to_take_length = if leftover_length >= s32:0 {state.to_fill} else {decoded_packet.length}; + + // Append lest signifiant bits of received packet to most significant positions of repacked data buffer + let masked_data = ((BlockData:1 << bits_to_take_length) - BlockData:1) & decoded_packet.data; + let repacked_data = state.repacked_data | (masked_data << state.valid_length); + + // Prepare buffer state for the next evaluation - take leftover most significant bits of + // received packet + let leftover_mask = (BlockData:1 << (decoded_packet.length - bits_to_take_length)) - BlockData:1; + let leftover_masked_data = (decoded_packet.data >> bits_to_take_length) & leftover_mask; + let next_repacked_data = if (send_now) {leftover_masked_data} else {repacked_data}; + + let packet_to_send = ZstdDecodedPacket { + data: repacked_data, + length: current_valid_length, + last: state.send_last_leftover || (decoded_packet.last && !next_send_last_leftover), + }; + let tok = send_if(tok, output_s, send_now, packet_to_send); + + let next_state = if (state.send_last_leftover || (decoded_packet.last && !next_send_last_leftover)) { + INIT_REPACKETIZER_STATE + } else { + RepacketizerState { + repacked_data: next_repacked_data, + valid_length: next_valid_length, + to_fill: next_to_fill, + send_last_leftover: next_send_last_leftover, + } + }; + + trace_fmt!("Repacketizer: state: {:#x}", state); + if (!state.send_last_leftover) { + trace_fmt!("Repacketizer: Received packet: {:#x}", decoded_packet); + } else {}; + trace_fmt!("Repacketizer: send_now: {}", send_now); + trace_fmt!("Repacketizer: next_send_last_leftover: {}", next_send_last_leftover); + trace_fmt!("Repacketizer: combined_length: {}", combined_length); + trace_fmt!("Repacketizer: leftover_length: {}", leftover_length); + trace_fmt!("Repacketizer: next_valid_length: {}", next_valid_length); + trace_fmt!("Repacketizer: next_to_fill: {}", next_to_fill); + trace_fmt!("Repacketizer: current_valid_length: {}", current_valid_length); + trace_fmt!("Repacketizer: bits_to_take_length: {}", bits_to_take_length); + trace_fmt!("Repacketizer: masked_data: {:#x}", masked_data); + trace_fmt!("Repacketizer: repacked_data: {:#x}", repacked_data); + trace_fmt!("Repacketizer: leftover_mask: {:#x}", leftover_mask); + trace_fmt!("Repacketizer: leftover_masked_data: {:#x}", leftover_masked_data); + trace_fmt!("Repacketizer: next_repacked_data: {:#x}", next_repacked_data); + if (send_now) { + trace_fmt!("Repacketizer: Sent repacketized packet: {:#x}", packet_to_send); + } else {}; + trace_fmt!("Repacketizer: next_state: {:#x}", next_state); + + next_state + } +} + +#[test_proc] +proc RepacketizerTest { + terminator: chan out; + input_s: chan out; + output_r: chan in; + + init {} + + config (terminator: chan out) { + let (input_s, input_r) = chan("input"); + let (output_s, output_r) = chan("output"); + + spawn Repacketizer(input_r, output_s); + (terminator, input_s, output_r) + } + + next(state: ()) { + let tok = join(); + let DecodedInputs: ZstdDecodedPacket[24] = [ + // Full packet - no need for removing alignment zeros + ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, + // Data in 4 packets - should be batched together into one full output packet + ZstdDecodedPacket {data: BlockData:0x78, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x56, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x1234, length: BlockPacketLength:16, last:false}, + ZstdDecodedPacket {data: BlockData:0xDEADBEEF, length: BlockPacketLength:32, last:false}, + // Small last packet - should be send out separatelly + ZstdDecodedPacket {data: BlockData:0x9A, length: BlockPacketLength:8, last:true}, + // One not-full packet and consecutive last packet packet in frame which completes previous packet and + // starts new one which should be marked as last + ZstdDecodedPacket {data: BlockData:0xADBEEF12345678, length: BlockPacketLength:56, last:false}, + ZstdDecodedPacket {data: BlockData:0x9ADE, length: BlockPacketLength:16, last:true}, + // 8 1-byte packets forming single output packet + ZstdDecodedPacket {data: BlockData:0xEF, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0xCD, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0xAB, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x89, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x67, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x45, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x23, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x01, length: BlockPacketLength:8, last:false}, + // 7 1-byte packets and 1 8-byte packet forming 1 full and 1 7-byte output packet + // marked as last + ZstdDecodedPacket {data: BlockData:0xEF, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0xCD, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0xAB, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x89, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x67, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x45, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0x23, length: BlockPacketLength:8, last:false}, + ZstdDecodedPacket {data: BlockData:0xFEDCBA9876543201, length: BlockPacketLength:64, last:true}, + ]; + + let DecodedOutputs: ZstdDecodedPacket[8] = [ + // Full packet - no need for removing alignment zeros + ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, + // Data in 4 packets - should be batched together into one full output packet + ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, + // Small last packet - should be send out separatelly + ZstdDecodedPacket {data: BlockData:0x9A, length: BlockPacketLength:8, last:true}, + // One not-full packet and consecutive last packet packet in frame which completes previous packet and + // starts new one which should be marked as last + ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, + ZstdDecodedPacket {data: BlockData:0x9A, length: BlockPacketLength:8, last:true}, + // 8 1-byte packets forming single output packet + ZstdDecodedPacket {data: BlockData:0x0123456789ABCDEF, length: BlockPacketLength:64, last:false}, + // 7 1-byte packets and 1 8-byte packet forming 1 full and 1 7-byte output packet + // marked as last + ZstdDecodedPacket {data: BlockData:0x0123456789ABCDEF, length: BlockPacketLength:64, last:false}, + ZstdDecodedPacket {data: BlockData:0xFEDCBA98765432, length: BlockPacketLength:56, last:true}, + ]; + + let tok = for ((counter, decoded_input), tok): ((u32, ZstdDecodedPacket), token) in enumerate(DecodedInputs) { + let tok = send(tok, input_s, decoded_input); + trace_fmt!("Sent #{} decoded zero-filled packet, {:#x}", counter + u32:1, decoded_input); + (tok) + } (tok); + + let tok = for ((counter, expected_output), tok): ((u32, ZstdDecodedPacket), token) in enumerate(DecodedOutputs) { + let (tok, decoded_output) = recv(tok, output_r); + trace_fmt!("Received #{} decoded non-zero-filled packet, {:#x}", counter + u32:1, decoded_output); + trace_fmt!("Expected #{} decoded non-zero-filled packet, {:#x}", counter + u32:1, expected_output); + assert_eq(decoded_output, expected_output); + (tok) + } (tok); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/rle_block_dec.x b/xls/modules/zstd/rle_block_dec.x new file mode 100644 index 0000000000..232d9a6381 --- /dev/null +++ b/xls/modules/zstd/rle_block_dec.x @@ -0,0 +1,756 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains the implementation of RleBlockDecoder responsible for decoding +// ZSTD RLE Blocks. More Information about Rle Block's format can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.2.2 +// +// The implementation consist of 3 procs: +// * RleDataPacker +// * RunLengthDecoder +// * BatchPacker +// Connections between those is represented on the diagram below: +// +// RleBlockDecoder +// ┌─────────────────────────────────────────────────────────────┐ +// │ RleDataPacker RunLengthDecoder BatchPacker │ +// │ ┌───────────────┐ ┌──────────────────┐ ┌─────────────┐ │ +// ───┼─►│ ├──►│ ├──►│ ├─┼──► +// │ └───────┬───────┘ └──────────────────┘ └─────────────┘ │ +// │ │ ▲ │ +// │ │ SynchronizationData │ │ +// │ └─────────────────────────────────────────┘ │ +// └─────────────────────────────────────────────────────────────┘ +// +// RleDataPacker is responsible for receiving the incoming packets of block data, converting +// those to format accepted by RunLengthDecoder and passing the data to the actual decoder block. +// It also extracts from the input packets the synchronization data like block_id and last_block +// and then passes those to BatchPacker proc. +// RunLengthDecoder decodes RLE blocks and outputs one symbol for each transaction on output +// channel. +// BatchPacker then gathers those symbols into packets, appends synchronization data received from +// RleDataPacker and passes such packets to the output of the RleBlockDecoder. + +import xls.modules.zstd.common; +import xls.modules.rle.rle_dec; +import xls.modules.rle.rle_common; + +const SYMBOL_WIDTH = common::SYMBOL_WIDTH; +const BLOCK_SIZE_WIDTH = common::BLOCK_SIZE_WIDTH; +const DATA_WIDTH = common::DATA_WIDTH; +const BATCH_SIZE = DATA_WIDTH / SYMBOL_WIDTH; + +type BlockDataPacket = common::BlockDataPacket; +type BlockPacketLength = common::BlockPacketLength; +type BlockData = common::BlockData; +type BlockSize = common::BlockSize; + +type ExtendedBlockDataPacket = common::ExtendedBlockDataPacket; +type CopyOrMatchContent = common::CopyOrMatchContent; +type CopyOrMatchLength = common::CopyOrMatchLength; +type SequenceExecutorMessageType = common::SequenceExecutorMessageType; + +type RleInput = rle_common::CompressedData; +type RleOutput = rle_common::PlainData; +type Symbol = bits[SYMBOL_WIDTH]; +type SymbolCount = BlockSize; + +struct BlockSyncData { + last_block: bool, + count: SymbolCount, + id: u32 +} + +proc RleDataPacker { + block_data_r: chan in; + rle_data_s: chan out; + sync_s: chan out; + + config( + block_data_r: chan in, + rle_data_s: chan out, + sync_s: chan out + ) { + (block_data_r, rle_data_s, sync_s) + } + + init { } + + next(state: ()) { + let tok = join(); + let (tok, input) = recv(tok, block_data_r); + let rle_dec_data = RleInput { + symbol: input.data as Symbol, count: input.length as SymbolCount, last: true + }; + // send RLE packet for decoding unless it has symbol count == 0 + let send_always = rle_dec_data.count != SymbolCount:0; + let data_tok = send_if(tok, rle_data_s, send_always, rle_dec_data); + let sync_data = BlockSyncData { last_block: input.last_block, count: rle_dec_data.count, id: input.id }; + // send last block packet even if it has symbol count == 0 + let sync_tok = send(data_tok, sync_s, sync_data); + } +} + +type RleTestVector = (Symbol, SymbolCount); + +#[test_proc] +proc RleDataPacker_test { + terminator: chan out; + in_s: chan out; + out_r: chan in; + sync_r: chan in; + + config(terminator: chan out) { + let (in_s, in_r) = chan("in"); + let (out_s, out_r) = chan("out"); + let (sync_s, sync_r) = chan("sync"); + + spawn RleDataPacker(in_r, out_s, sync_s); + + (terminator, in_s, out_r, sync_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let EncodedRleBlocks: RleTestVector[6] = [ + (Symbol:0x1, SymbolCount:0x1), + (Symbol:0x2, SymbolCount:0x2), + (Symbol:0x3, SymbolCount:0x4), + (Symbol:0x4, SymbolCount:0x8), + (Symbol:0x5, SymbolCount:0x10), + (Symbol:0x6, SymbolCount:0x1F), + ]; + let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { + let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); + let data_in = BlockDataPacket { + last: true, + last_block, + id: counter, + data: block.0 as BlockData, + length: block.1 as BlockPacketLength + }; + let tok = send(tok, in_s, data_in); + trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); + + let data_out = RleInput { + last: true, symbol: block.0 as Symbol, count: block.1 as BlockSize + }; + let (tok, dec_output) = recv(tok, out_r); + trace_fmt!("Received #{} packed rle encoded block, {:#x}", counter + u32:1, dec_output); + assert_eq(dec_output, data_out); + + let sync_out = BlockSyncData { + id: counter, + count: block.1, + last_block: counter == (array_size(EncodedRleBlocks) - u32:1), + }; + let (tok, sync_output) = recv(tok, sync_r); + trace_fmt!("Received #{} synchronization data, {:#x}", counter + u32:1, sync_output); + assert_eq(sync_output, sync_out); + (tok) + }(tok); + send(tok, terminator, true); + } +} + +#[test_proc] +proc RleDataPacker_empty_blocks_test { + terminator: chan out; + in_s: chan out; + out_r: chan in; + sync_r: chan in; + + config(terminator: chan out) { + let (in_s, in_r) = chan("in"); + let (out_s, out_r) = chan("out"); + let (sync_s, sync_r) = chan("sync"); + + spawn RleDataPacker(in_r, out_s, sync_s); + + (terminator, in_s, out_r, sync_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let EncodedRleBlocks: RleTestVector[8] = [ + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0x1, SymbolCount:0x1), + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0x3, SymbolCount:0x4), + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0x5, SymbolCount:0x10), + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0xFF, SymbolCount:0x0), + ]; + let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { + let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); + let data_in = BlockDataPacket { + last: true, + last_block, + id: counter, + data: block.0 as BlockData, + length: block.1 as BlockPacketLength + }; + let tok = send(tok, in_s, data_in); + trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); + (tok) + }(tok); + + let RleInputs: RleInput[3] = [ + RleInput {last: true, symbol: Symbol:0x1, count: BlockSize:0x1}, + RleInput {last: true, symbol: Symbol:0x3, count: BlockSize:0x4}, + RleInput {last: true, symbol: Symbol:0x5, count: BlockSize:0x10}, + ]; + let tok = for ((counter, rle_in), tok): ((u32, RleInput), token) in enumerate(RleInputs) { + let (tok, dec_output) = recv(tok, out_r); + trace_fmt!("Received #{} packed rle encoded block, {:#x}", counter + u32:1, dec_output); + assert_eq(dec_output, rle_in); + (tok) + }(tok); + + let BlockSyncDataInputs: BlockSyncData[8] = [ + BlockSyncData { id: 0, count: BlockSize:0x0, last_block: false }, + BlockSyncData { id: 1, count: BlockSize:0x1, last_block: false }, + BlockSyncData { id: 2, count: BlockSize:0x0, last_block: false }, + BlockSyncData { id: 3, count: BlockSize:0x4, last_block: false }, + BlockSyncData { id: 4, count: BlockSize:0x0, last_block: false }, + BlockSyncData { id: 5, count: BlockSize:0x10, last_block: false }, + BlockSyncData { id: 6, count: BlockSize:0x0, last_block: false }, + BlockSyncData { id: 7, count: BlockSize:0x0, last_block: true }, + ]; + let tok = for ((counter, sync_data), tok): ((u32, BlockSyncData), token) in enumerate(BlockSyncDataInputs) { + let (tok, sync_output) = recv(tok, sync_r); + trace_fmt!("Received #{} synchronization data, {:#x}", counter + u32:1, sync_output); + assert_eq(sync_output, sync_data); + (tok) + }(tok); + send(tok, terminator, true); + } +} + +struct BatchPackerState { + batch: BlockData, + symbols_in_batch: BlockPacketLength, + symbols_in_block: BlockPacketLength, + prev_last: bool, + prev_sync: BlockSyncData, +} + +const ZERO_BATCH_STATE = zero!(); +const ZERO_BLOCK_SYNC_DATA = zero!(); +const ZERO_RLE_OUTPUT = zero!(); +const EMPTY_RLE_OUTPUT = RleOutput {last: true, ..ZERO_RLE_OUTPUT}; + +proc BatchPacker { + rle_data_r: chan in; + sync_r: chan in; + block_data_s: chan out; + + config( + rle_data_r: chan in, + sync_r: chan in, + block_data_s: chan out + ) { + (rle_data_r, sync_r, block_data_s) + } + + // Init the state to signal new batch to process + init { (BatchPackerState { prev_last: true, ..ZERO_BATCH_STATE }) } + + next(state: BatchPackerState) { + let tok = join(); + trace_fmt!("start state: {:#x}", state); + let prev_expected_symbols_in_block = state.prev_sync.count as BlockPacketLength; + let symbols_in_batch = state.symbols_in_batch; + let symbols_in_block = state.symbols_in_block; + let block_in_progress = (symbols_in_block != prev_expected_symbols_in_block); + trace_fmt!("block_in_progress: {:#x}", block_in_progress); + + // Finished receiving RLE data of the previous block + // Proceed with receiving sync data for the next block + let start_new_block = !block_in_progress; + let (tok, sync_data) = recv_if(tok, sync_r, start_new_block, state.prev_sync); + if (start_new_block) { + trace_fmt!("received sync_data: {:#x}", sync_data); + } else { + trace_fmt!("got sync_data from the state: {:#x}", sync_data); + }; + + let expected_symbols_in_block = if (start_new_block) { sync_data.count as BlockPacketLength } else { prev_expected_symbols_in_block }; + trace_fmt!("expected_symbols_in_block: {:#x}", expected_symbols_in_block); + + let batch = state.batch; + let empty_block = (expected_symbols_in_block == BlockPacketLength:0); + trace_fmt!("batch: {:#x}", batch); + trace_fmt!("empty_block: {:#x}", empty_block); + + let do_recv_rle = !empty_block && block_in_progress; + let default_rle_output = if (empty_block) { EMPTY_RLE_OUTPUT } else { ZERO_RLE_OUTPUT }; + let (tok, decoded_data) = recv_if(tok, rle_data_r, do_recv_rle, default_rle_output); + if (do_recv_rle) { + trace_fmt!("received rle_data: {:#x}", decoded_data); + } else { + trace_fmt!("got empty rle_data: {:#x}", decoded_data); + }; + + let (batch, symbols_in_batch, symbols_in_block) = if (do_recv_rle) { + // TODO: Improve performance: remove variable shift + let shift = symbols_in_batch << u32:3; // multiply by 8 bits + let updated_batch = batch | ((decoded_data.symbol as BlockData) << shift); + let updated_symbols_in_batch = symbols_in_batch + BlockPacketLength:1; + let updated_symbols_in_block = symbols_in_block + BlockPacketLength:1; + (updated_batch, updated_symbols_in_batch, updated_symbols_in_block) + } else { + (batch, symbols_in_batch, symbols_in_block) + }; + trace_fmt!("updated batch: {:#x}", batch); + trace_fmt!("updated symbols_in_batch: {:#x}", symbols_in_batch); + trace_fmt!("updated symbols_in_block: {:#x}", symbols_in_block); + + let block_in_progress = (symbols_in_block != expected_symbols_in_block); + trace_fmt!("updated block_in_progress: {:#x}", block_in_progress); + + // Last should not occur when batch is still being processed + assert!(!(!block_in_progress ^ decoded_data.last), "corrupted_decoding_flow"); + + let batch_full = symbols_in_batch >= BATCH_SIZE; + trace_fmt!("batch_full: {:#x}", batch_full); + // Send decoded RLE packet when + // - batch size reached the maximal size + // - RLE block decoding is finished + // - Decoded RLE block is empty and is the last block in ZSTD frame + let last = decoded_data.last || (sync_data.last_block && empty_block); + let do_send_batch = (batch_full || last); + trace_fmt!("do_send_batch: {:#x}", do_send_batch); + + let decoded_batch_data = ExtendedBlockDataPacket { + // Decoded RLE block is always a literal + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: last, + last_block: sync_data.last_block, + id: sync_data.id, + data: batch as BlockData, + // length in bits + length: (symbols_in_batch << 3) as BlockPacketLength, + } + }; + + let data_tok = + send_if(tok, block_data_s, do_send_batch, decoded_batch_data); + if (do_send_batch) { + trace_fmt!("sent decoded_batch_data: {:#x}", decoded_batch_data); + } else { + trace_fmt!("decoded_batch_data: {:#x}", decoded_batch_data); + }; + + let (new_batch, new_symbols_in_batch) = if (do_send_batch) { + (BlockData:0, BlockPacketLength:0) + } else { + (batch, symbols_in_batch) + }; + + let (new_sync_data, new_symbols_in_block) = if (decoded_data.last || (sync_data.last_block && empty_block)) { + (ZERO_BLOCK_SYNC_DATA, BlockPacketLength:0) + } else { + (sync_data, symbols_in_block) + }; + + let new_state = BatchPackerState { + batch: new_batch, + symbols_in_batch: new_symbols_in_batch, + symbols_in_block: new_symbols_in_block, + prev_last: decoded_data.last, + prev_sync: new_sync_data + }; + + trace_fmt!("new_state: {:#x}", new_state); + + new_state + } +} + +type BatchTestVector = (Symbol, bool); + +#[test_proc] +proc BatchPacker_test { + terminator: chan out; + in_s: chan out; + sync_s: chan out; + out_r: chan in; + + config(terminator: chan out) { + let (in_s, in_r) = chan("in"); + let (sync_s, sync_r) = chan("sync"); + let (out_s, out_r) = chan("out"); + + spawn BatchPacker(in_r, sync_r, out_s); + + (terminator, in_s, sync_s, out_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let SyncData: BlockSyncData[6] = [ + BlockSyncData { last_block: false, count: SymbolCount:1, id: u32:0 }, + BlockSyncData { last_block: false, count: SymbolCount:2, id: u32:1 }, + BlockSyncData { last_block: false, count: SymbolCount:4, id: u32:2 }, + BlockSyncData { last_block: false, count: SymbolCount:8, id: u32:3 }, + BlockSyncData { last_block: false, count: SymbolCount:16, id: u32:4 }, + BlockSyncData { last_block: true, count: SymbolCount:31, id: u32:5 }, + ]; + let tok = for ((counter, sync_data), tok): ((u32, BlockSyncData), token) in enumerate(SyncData) { + let tok = send(tok, sync_s, sync_data); + trace_fmt!("Sent #{} synchronization data, {:#x}", counter + u32:1, sync_data); + (tok) + }(tok); + + let DecodedRleBlocks: BatchTestVector[62] = [ + // 1st block + (Symbol:0x01, bool:true), + // 2nd block + (Symbol:0x02, bool:false), (Symbol:0x02, bool:true), + // 3rd block + (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), + (Symbol:0x03, bool:true), + // 4th block + (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), + (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), + (Symbol:0x04, bool:false), (Symbol:0x04, bool:true), + // 5th block + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:true), + // 6th block + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), + (Symbol:0x06, bool:true), + ]; + let tok = for ((counter, test_data), tok): ((u32, BatchTestVector), token) in enumerate(DecodedRleBlocks) { + let symbol = test_data.0 as Symbol; + let last = test_data.1; + let data_in = RleOutput { symbol, last }; + let tok = send(tok, in_s, data_in); + trace_fmt!("Sent #{} decoded rle symbol, {:#x}", counter + u32:1, data_in); + (tok) + }(tok); + + let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[10] = [ + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x01, length: BlockPacketLength:8}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x0202, length: BlockPacketLength:16}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x03030303, length: BlockPacketLength:32}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x0404040404040404, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:true, id: u32:5, data: BlockData:0x06060606060606, length: BlockPacketLength:56}}, + ]; + + let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { + let (tok, dec_output) = recv(tok, out_r); + trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); + assert_eq(dec_output, expected); + (tok) + }(tok); + send(tok, terminator, true); + } +} + +#[test_proc] +proc BatchPacker_empty_blocks_test { + terminator: chan out; + in_s: chan out; + sync_s: chan out; + out_r: chan in; + + config(terminator: chan out) { + let (in_s, in_r) = chan("in"); + let (sync_s, sync_r) = chan("sync"); + let (out_s, out_r) = chan("out"); + + spawn BatchPacker(in_r, sync_r, out_s); + + (terminator, in_s, sync_s, out_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let SyncData: BlockSyncData[8] = [ + BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:0 }, + BlockSyncData { last_block: false, count: SymbolCount:1, id: u32:1 }, + BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:2 }, + BlockSyncData { last_block: false, count: SymbolCount:4, id: u32:3 }, + BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:4 }, + BlockSyncData { last_block: false, count: SymbolCount:16, id: u32:5 }, + BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:6 }, + BlockSyncData { last_block: true, count: SymbolCount:0, id: u32:7 }, + ]; + let tok = for ((counter, sync_data), tok): ((u32, BlockSyncData), token) in enumerate(SyncData) { + let tok = send(tok, sync_s, sync_data); + trace_fmt!("Sent #{} synchronization data, {:#x}", counter + u32:1, sync_data); + (tok) + }(tok); + + let DecodedRleBlocks: BatchTestVector[21] = [ + // 0 block + // EMPTY + // 1st block + (Symbol:0x01, bool:true), + // 2nd block + // EMPTY + // 3rd block + (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), + (Symbol:0x03, bool:true), + // 4th block + // EMPTY + // 5th block + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), + (Symbol:0x05, bool:true), + // 6th block + // EMPTY + // 7th block + // EMPTY + ]; + let tok = for ((counter, test_data), tok): ((u32, BatchTestVector), token) in enumerate(DecodedRleBlocks) { + let symbol = test_data.0 as Symbol; + let last = test_data.1; + let data_in = RleOutput { symbol, last }; + let tok = send(tok, in_s, data_in); + trace_fmt!("Sent #{} decoded rle symbol, {:#x}", counter + u32:1, data_in); + (tok) + }(tok); + + let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[9] = [ + // 0 block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 1st block + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x01, length: BlockPacketLength:8}}, + // 2nd block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 3rd block + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x03030303, length: BlockPacketLength:32}}, + // 4th block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 5th block + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + // 6th block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:6, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 7th block + // EMPTY with LAST_BLOCK + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:true, id: u32:7, data: BlockData:0x0, length: BlockPacketLength:0}}, + ]; + + let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { + let (tok, dec_output) = recv(tok, out_r); + trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); + assert_eq(dec_output, expected); + (tok) + }(tok); + send(tok, terminator, true); + } +} + +pub proc RleBlockDecoder { + input_r: chan in; + output_s: chan out; + + config(input_r: chan in, output_s: chan out) { + let (in_s, in_r) = chan("in"); + let (out_s, out_r) = chan("out"); + let (sync_s, sync_r) = chan("sync"); + + spawn RleDataPacker(input_r, in_s, sync_s); + spawn rle_dec::RunLengthDecoder( + in_r, out_s); + spawn BatchPacker(out_r, sync_r, output_s); + + (input_r, output_s) + } + + init { } + + next(state: ()) { } +} + +#[test_proc] +proc RleBlockDecoder_test { + terminator: chan out; + in_s: chan out; + out_r: chan in; + + config(terminator: chan out) { + let (in_s, in_r) = chan("in"); + let (out_s, out_r) = chan("out"); + + spawn RleBlockDecoder(in_r, out_s); + + (terminator, in_s, out_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let EncodedRleBlocks: RleTestVector[6] = [ + (Symbol:0x1, SymbolCount:0x1), + (Symbol:0x2, SymbolCount:0x2), + (Symbol:0x3, SymbolCount:0x4), + (Symbol:0x4, SymbolCount:0x8), + (Symbol:0x5, SymbolCount:0x10), + (Symbol:0x6, SymbolCount:0x1F), + ]; + let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { + let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); + let data_in = BlockDataPacket { + last: true, // RLE block fits into single packet, each will be last for given block + last_block, + id: counter, + data: block.0 as BlockData, + length: block.1 as BlockPacketLength + }; + let tok = send(tok, in_s, data_in); + trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); + (tok) + }(tok); + + let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[10] = [ + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x01, length: BlockPacketLength:8}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x0202, length: BlockPacketLength:16}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x03030303, length: BlockPacketLength:32}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x0404040404040404, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:true, id: u32:5, data: BlockData:0x06060606060606, length: BlockPacketLength:56}}, + ]; + + let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { + let (tok, dec_output) = recv(tok, out_r); + trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); + assert_eq(dec_output, expected); + (tok) + }(tok); + send(tok, terminator, true); + } +} + +#[test_proc] +proc RleBlockDecoder_empty_blocks_test { + terminator: chan out; + in_s: chan out; + out_r: chan in; + + config(terminator: chan out) { + let (in_s, in_r) = chan("in"); + let (out_s, out_r) = chan("out"); + + spawn RleBlockDecoder(in_r, out_s); + + (terminator, in_s, out_r) + } + + init { } + + next(state: ()) { + let tok = join(); + let EncodedRleBlocks: RleTestVector[8] = [ + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0x1, SymbolCount:0x1), + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0x3, SymbolCount:0x4), + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0x5, SymbolCount:0x10), + (Symbol:0xFF, SymbolCount:0x0), + (Symbol:0xFF, SymbolCount:0x0), + ]; + let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { + let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); + let data_in = BlockDataPacket { + last: true, // RLE block fits into single packet, each will be last for given block + last_block, + id: counter, + data: block.0 as BlockData, + length: block.1 as BlockPacketLength + }; + let tok = send(tok, in_s, data_in); + trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); + (tok) + }(tok); + + let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[9] = [ + // 0 block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 1st block + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x01, length: BlockPacketLength:8}}, + // 2nd block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 3rd block + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x03030303, length: BlockPacketLength:32}}, + // 4th block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 5th block + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, + // 6th block + // EMPTY + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:6, data: BlockData:0x0, length: BlockPacketLength:0}}, + // 7th block + // EMPTY with LAST_BLOCK + ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:true, id: u32:7, data: BlockData:0x0, length: BlockPacketLength:0}}, + ]; + + let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { + let (tok, dec_output) = recv(tok, out_r); + trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); + assert_eq(dec_output, expected); + (tok) + }(tok); + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/sequence_executor.x b/xls/modules/zstd/sequence_executor.x new file mode 100644 index 0000000000..7d5e7a8d08 --- /dev/null +++ b/xls/modules/zstd/sequence_executor.x @@ -0,0 +1,1716 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; +import xls.modules.zstd.common as common; +import xls.modules.zstd.ram_printer as ram_printer; +import xls.examples.ram; + +type BlockData = common::BlockData; +type SequenceExecutorMessageType = common::SequenceExecutorMessageType; +type SequenceExecutorPacket = common::SequenceExecutorPacket; +type CopyOrMatchContent = common::CopyOrMatchContent; +type CopyOrMatchLength = common::CopyOrMatchLength; +type ZstdDecodedPacket = common::ZstdDecodedPacket; +type BlockPacketLength = common::BlockPacketLength; +type Offset = common::Offset; + +fn calculate_ram_addr_width(hb_size_kb: u32, ram_data_width: u32, ram_num: u32) -> u32 { + ((hb_size_kb * u32:1024 * u32:8) / ram_data_width) / ram_num +} + +// Configurable RAM parameters +pub const RAM_DATA_WIDTH = common::SYMBOL_WIDTH; +const RAM_NUM = u32:8; + +type RamData = bits[RAM_DATA_WIDTH]; + +// Constants calculated from RAM parameters +const RAM_NUM_WIDTH = std::clog2(RAM_NUM); +pub const RAM_WORD_PARTITION_SIZE = RAM_DATA_WIDTH; +const RAM_ORDER_WIDTH = std::clog2(RAM_DATA_WIDTH); +pub const RAM_NUM_PARTITIONS = ram::num_partitions(RAM_WORD_PARTITION_SIZE, RAM_DATA_WIDTH); +const RAM_REQ_MASK_ALL = std::unsigned_max_value(); +const RAM_REQ_MASK_NONE = bits[RAM_NUM_PARTITIONS]:0; + +type RamNumber = bits[RAM_NUM_WIDTH]; +type RamOrder = bits[RAM_ORDER_WIDTH]; + +pub fn ram_size(hb_size_kb: u32) -> u32 { (hb_size_kb * u32:1024 * u32:8) / RAM_DATA_WIDTH / RAM_NUM } + +fn ram_addr_width(hb_size_kb: u32) -> u32 { std::clog2(ram_size(hb_size_kb)) } + +// RAM related constants common for tests +const TEST_HISTORY_BUFFER_SIZE_KB = u32:1; +const TEST_RAM_SIZE = ram_size(TEST_HISTORY_BUFFER_SIZE_KB); +const TEST_RAM_ADDR_WIDTH = ram_addr_width(TEST_HISTORY_BUFFER_SIZE_KB); +pub const TEST_RAM_INITIALIZED = true; +pub const TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; + +type TestRamAddr = bits[TEST_RAM_ADDR_WIDTH]; +type TestWriteReq = ram::WriteReq; +type TestWriteResp = ram::WriteResp; +type TestReadReq = ram::ReadReq; +type TestReadResp = ram::ReadResp; + +struct HistoryBufferPtr { number: RamNumber, addr: bits[RAM_ADDR_WIDTH] } + +type HistoryBufferLength = u32; + +enum SequenceExecutorStatus : u2 { + IDLE = 0, + LITERAL_WRITE = 1, + SEQUENCE_READ = 2, + SEQUENCE_WRITE = 3, +} + +struct SequenceExecutorState { + status: SequenceExecutorStatus, + // Packet handling + packet: SequenceExecutorPacket, + packet_valid: bool, + // History Buffer handling + hyp_ptr: HistoryBufferPtr, + real_ptr: HistoryBufferPtr, + hb_len: HistoryBufferLength, + // Repeat Offset handling + repeat_offsets: Offset[3], + repeat_req: bool, + seq_cnt: bool, +} + +fn decode_literal_packet(packet: SequenceExecutorPacket) -> ZstdDecodedPacket { + ZstdDecodedPacket { + data: packet.content, length: packet.length as BlockPacketLength, last: packet.last + } +} + +#[test] +fn test_decode_literal_packet() { + let content = CopyOrMatchContent:0xAA00BB11CC22DD33; + let length = CopyOrMatchLength:64; + let last = false; + + assert_eq( + decode_literal_packet( + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length, content, last + }), + ZstdDecodedPacket { + length: length as BlockPacketLength, + data: content, last + }) +} + +fn round_up_to_pow2(x: uN[N]) -> uN[N] { + let base = x[Y_CLOG2 as s32:]; + let reminder = x[0:Y_CLOG2 as s32] != bits[Y_CLOG2]:0; + (base as uN[N] + reminder as uN[N]) << Y_CLOG2 +} + +#[test] +fn test_round_up_to_pow2() { + assert_eq(round_up_to_pow2(u16:0), u16:0); + assert_eq(round_up_to_pow2(u16:1), u16:8); + assert_eq(round_up_to_pow2(u16:7), u16:8); + assert_eq(round_up_to_pow2(u16:8), u16:8); + assert_eq(round_up_to_pow2(u16:9), u16:16); + assert_eq(round_up_to_pow2(u16:9), u16:16); +} + +fn hb_ptr_from_offset_back + + (ptr: HistoryBufferPtr, offset: Offset) -> HistoryBufferPtr { + + const_assert!(common::OFFSET_WIDTH < u32:32); + type RamAddr = bits[RAM_ADDR_WIDTH]; + + let buff_change = std::mod_pow2(offset as u32, RAM_NUM) as RamNumber; + let rounded_offset = round_up_to_pow2(offset as u32 + u32:1); + let max_row_span = std::div_pow2(rounded_offset, RAM_NUM) as RamAddr; + let (number, addr_change) = if ptr.number >= buff_change { + (ptr.number - buff_change, max_row_span - RamAddr:1) + } else { + ((RAM_NUM + ptr.number as u32 - buff_change as u32) as RamNumber, max_row_span) + }; + let addr = if ptr.addr > addr_change { + ptr.addr - addr_change + } else { + (RAM_SIZE + ptr.addr as u32 - addr_change as u32) as RamAddr + }; + HistoryBufferPtr { number, addr } +} + +#[test] +fn test_hb_ptr_from_offset_back() { + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:0), + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:1), + HistoryBufferPtr { number: RamNumber:3, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:2), + HistoryBufferPtr { number: RamNumber:2, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:3), + HistoryBufferPtr { number: RamNumber:1, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:4), + HistoryBufferPtr { number: RamNumber:0, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:5), + HistoryBufferPtr { number: RamNumber:7, addr: TestRamAddr:1 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:6), + HistoryBufferPtr { number: RamNumber:6, addr: TestRamAddr:1 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:7), + HistoryBufferPtr { number: RamNumber:5, addr: TestRamAddr:1 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:8), + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:1 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:15), + HistoryBufferPtr { number: RamNumber:5, addr: TestRamAddr:0 }); + assert_eq( + hb_ptr_from_offset_back( + HistoryBufferPtr { number: RamNumber:0, addr: TestRamAddr:0 }, Offset:1), + HistoryBufferPtr { number: RamNumber:7, addr: (TEST_RAM_SIZE - u32:1) as TestRamAddr }); +} + +fn hb_ptr_from_offset_forw + + (ptr: HistoryBufferPtr, offset: Offset) -> HistoryBufferPtr { + + type RamAddr = bits[RAM_ADDR_WIDTH]; + const MAX_ADDR = (RAM_SIZE - u32:1) as RamAddr; + + let buff_change = std::mod_pow2(offset as u32, RAM_NUM) as RamNumber; + let rounded_offset = round_up_to_pow2(offset as u32 + u32:1); + let max_row_span = std::div_pow2(rounded_offset, RAM_NUM) as RamAddr; + let (number, addr_change) = if ptr.number as u32 + buff_change as u32 < RAM_NUM { + (ptr.number + buff_change, max_row_span - RamAddr:1) + } else { + ((buff_change as u32 - (RAM_NUM - ptr.number as u32)) as RamNumber, max_row_span) + }; + + let addr = if ptr.addr + addr_change <= MAX_ADDR { + ptr.addr + addr_change + } else { + (addr_change - (MAX_ADDR - ptr.addr)) + }; + + HistoryBufferPtr { number, addr } +} + +#[test] +fn test_hb_ptr_from_offset_forw() { + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:0), + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:1), + HistoryBufferPtr { number: RamNumber:5, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:2), + HistoryBufferPtr { number: RamNumber:6, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:3), + HistoryBufferPtr { number: RamNumber:7, addr: TestRamAddr:2 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:4), + HistoryBufferPtr { number: RamNumber:0, addr: TestRamAddr:3 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:5), + HistoryBufferPtr { number: RamNumber:1, addr: TestRamAddr:3 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:6), + HistoryBufferPtr { number: RamNumber:2, addr: TestRamAddr:3 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:7), + HistoryBufferPtr { number: RamNumber:3, addr: TestRamAddr:3 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:8), + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:3 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:4, addr: TestRamAddr:2 }, Offset:15), + HistoryBufferPtr { number: RamNumber:3, addr: TestRamAddr:4 }); + assert_eq( + hb_ptr_from_offset_forw( + HistoryBufferPtr { number: RamNumber:7, addr: (TEST_RAM_SIZE - u32:1) as TestRamAddr }, + Offset:1), HistoryBufferPtr { number: RamNumber:0, addr: TestRamAddr:0 }); +} + +fn literal_packet_to_single_write_req + + (ptr: HistoryBufferPtr, literal: SequenceExecutorPacket, number: RamNumber) + -> ram::WriteReq { + + let offset = std::mod_pow2(RAM_NUM - ptr.number as u32 + number as u32, RAM_NUM) as Offset; + let we = literal.length >= (offset as CopyOrMatchLength + CopyOrMatchLength:1) << CopyOrMatchLength:3; + let hb = hb_ptr_from_offset_forw(ptr, offset); + + if we { + ram::WriteReq { + data: literal.content[offset as u32 << u32:3+:RamData] as RamData, + addr: hb.addr, + mask: std::unsigned_max_value() + } + } else { + ram::WriteReq { + addr: bits[RAM_ADDR_WIDTH]:0, + data: bits[RAM_DATA_WIDTH]:0, + mask: bits[RAM_NUM_PARTITIONS]:0 + } + } +} + +#[test] +fn test_literal_packet_to_single_write_req() { + // BEFORE: AFTER: + // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + // 1 | | | | | | | | | 1 | | | | | | | | | + // 2 | o| | | | | | | | 2 |11| | | | | | | | + // 3 | | | | | | | | | 3 | | o|77|66|55|44|33|22| + // 4 | | | | | | | | | 4 | | | | | | | | | + + let ptr = HistoryBufferPtr { number: RamNumber:7, addr: TestRamAddr:2 }; + let literals = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + content: CopyOrMatchContent:0x77_6655_4433_2211, + length: CopyOrMatchLength:56, + last: false + }; + assert_eq( + literal_packet_to_single_write_req(ptr, literals, RamNumber:0), + TestWriteReq { data: RamData:0x22, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }); + assert_eq( + literal_packet_to_single_write_req(ptr, literals, RamNumber:3), + TestWriteReq { data: RamData:0x55, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }); + assert_eq( + literal_packet_to_single_write_req(ptr, literals, RamNumber:6), + zero!()); +} + +fn literal_packet_to_write_reqs + + (ptr: HistoryBufferPtr, literal: SequenceExecutorPacket) + -> (ram::WriteReq[RAM_NUM], HistoryBufferPtr) { + type WriteReq = ram::WriteReq; + let result = WriteReq[RAM_NUM]:[ + literal_packet_to_single_write_req(ptr, literal, RamNumber:0), + literal_packet_to_single_write_req(ptr, literal, RamNumber:1), + literal_packet_to_single_write_req(ptr, literal, RamNumber:2), + literal_packet_to_single_write_req(ptr, literal, RamNumber:3), + literal_packet_to_single_write_req(ptr, literal, RamNumber:4), + literal_packet_to_single_write_req(ptr, literal, RamNumber:5), + literal_packet_to_single_write_req(ptr, literal, RamNumber:6), + literal_packet_to_single_write_req(ptr, literal, RamNumber:7), + ]; + + let ptr_offset = literal.length >> CopyOrMatchLength:3; + (result, hb_ptr_from_offset_forw(ptr, ptr_offset as Offset)) +} + +#[test] +fn test_literal_packet_to_write_reqs() { + // BEFORE: AFTER: + // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + // 1 | | | | | | | | | 1 | | | | | | | | | + // 2 | o| | | | | | | | 2 |11| | | | | | | | + // 3 | | | | | | | | | 3 | | | | | | | | o| + // 4 | | | | | | | | | 4 | | | | | | | | | + + let ptr = HistoryBufferPtr { number: RamNumber:7, addr: TestRamAddr:0x2 }; + let literals = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + content: CopyOrMatchContent:0x11, + length: CopyOrMatchLength:8, + last: false + }; + assert_eq( + literal_packet_to_write_reqs(ptr, literals), + ( + TestWriteReq[RAM_NUM]:[ + zero!(), zero!(), zero!(), + zero!(), zero!(), zero!(), + zero!(), + TestWriteReq { data: RamData:0x11, addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, + ], HistoryBufferPtr { number: RamNumber:0, addr: TestRamAddr:0x3 }, + )); + + // BEFORE: AFTER: + // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + // 1 | | | | | | | | | 1 | | | | | | | | | + // 2 | o| | | | | | | | 2 |11| | | | | | | | + // 3 | | | | | | | | | 3 | o|88|77|66|55|44|33|22| + // 4 | | | | | | | | | 4 | | | | | | | | | + + let ptr = HistoryBufferPtr { number: RamNumber:7, addr: TestRamAddr:2 }; + let literals = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + content: CopyOrMatchContent:0x8877_6655_4433_2211, + length: CopyOrMatchLength:64, + last: false + }; + assert_eq( + literal_packet_to_write_reqs(ptr, literals), + ( + TestWriteReq[RAM_NUM]:[ + TestWriteReq { data: RamData:0x22, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x33, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x44, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x55, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x66, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x77, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x88, addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestWriteReq { data: RamData:0x11, addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, + ], HistoryBufferPtr { number: RamNumber:7, addr: TestRamAddr:3 }, + )); +} + +fn max_hb_ptr_for_sequence_packet + + (ptr: HistoryBufferPtr, seq: SequenceExecutorPacket) + -> HistoryBufferPtr { + hb_ptr_from_offset_back(ptr, seq.content as Offset) +} + +fn sequence_packet_to_single_read_req + + (ptr: HistoryBufferPtr, max_ptr: HistoryBufferPtr, + seq: SequenceExecutorPacket, number: RamNumber) + -> (ram::ReadReq, RamOrder) { + type ReadReq = ram::ReadReq; + let offset_change = if max_ptr.number > number { + RAM_NUM - max_ptr.number as u32 + number as u32 + } else { + number as u32 - max_ptr.number as u32 + }; + let offset = (seq.content as u32 - offset_change) as Offset; + let re = (offset_change as CopyOrMatchLength) < seq.length; + let hb = hb_ptr_from_offset_back(ptr, offset); + + if re { + (ReadReq { addr: hb.addr, mask: RAM_REQ_MASK_ALL }, offset_change as RamOrder) + } else { + (zero!(), RamOrder:0) + } +} + +#[test] +fn test_sequence_packet_to_single_read_req() { + // BEFORE: AFTER: + // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + // 1 | x| x| | | | | | | 1 | | | | | | | | | + // 2 | | | | | | | x| x| 2 | | | | | | | | | + // 3 | | | | | | | o| | 3 | | | o| y| y| y| y| | + // 4 | | | | | | | | | 4 | | | | | | | | | + + let ptr = HistoryBufferPtr { number: RamNumber:1, addr: TestRamAddr:0x3 }; + let sequence = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + content: CopyOrMatchContent:11, + length: CopyOrMatchLength:4, + last: false + }; + let max_ptr = max_hb_ptr_for_sequence_packet(ptr, sequence); + + assert_eq( + sequence_packet_to_single_read_req( + ptr, max_ptr, sequence, RamNumber:0), + (TestReadReq { addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, RamOrder:2)); + + assert_eq( + sequence_packet_to_single_read_req( + ptr, max_ptr, sequence, RamNumber:1), + (TestReadReq { addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, RamOrder:3)); + + assert_eq( + sequence_packet_to_single_read_req( + ptr, max_ptr, sequence, RamNumber:2), (zero!(), RamOrder:0)); + + assert_eq( + sequence_packet_to_single_read_req( + ptr, max_ptr, sequence, RamNumber:7), + (TestReadReq { addr: TestRamAddr:0x1, mask: RAM_REQ_MASK_ALL }, RamOrder:1)); + + assert_eq( + sequence_packet_to_single_read_req( + ptr, max_ptr, sequence, RamNumber:6), + (TestReadReq { addr: TestRamAddr:0x1, mask: RAM_REQ_MASK_ALL }, RamOrder:0)); +} + +fn sequence_packet_to_read_reqs + + (ptr: HistoryBufferPtr, seq: SequenceExecutorPacket, hb_len: HistoryBufferLength) + -> (ram::ReadReq[RAM_NUM], RamOrder[RAM_NUM], SequenceExecutorPacket, bool) { + type ReadReq = ram::ReadReq; + + let max_len = std::umin(seq.length as u32, std::umin(RAM_NUM, hb_len)); + + let (next_seq, next_seq_valid) = if seq.length > max_len as CopyOrMatchLength { + ( + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: seq.length - max_len as CopyOrMatchLength, + content: seq.content, + last: seq.last + }, true, + ) + } else { + (zero!(), false) + }; + + let max_ptr = max_hb_ptr_for_sequence_packet(ptr, seq); + let (req0, order0) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:0); + let (req1, order1) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:1); + let (req2, order2) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:2); + let (req3, order3) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:3); + let (req4, order4) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:4); + let (req5, order5) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:5); + let (req6, order6) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:6); + let (req7, order7) = + sequence_packet_to_single_read_req(ptr, max_ptr, seq, RamNumber:7); + + let reqs = ReadReq[RAM_NUM]:[req0, req1, req2, req3, req4, req5, req6, req7]; + let orders = RamOrder[RAM_NUM]:[order0, order1, order2, order3, order4, order5, order6, order7]; + (reqs, orders, next_seq, next_seq_valid) +} + +#[test] +fn test_sequence_packet_to_read_reqs() { + // BEFORE: AFTER: + // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + // 1 | x| x| | | | | | | 1 | | | | | | | | | + // 2 | | | | | | | x| x| 2 | | | | | | | | | + // 3 | | | | | | | o| | 3 | | | | | | | o| | + // 4 | | | | | | | | | 4 | | | | | | | | | + + let ptr = HistoryBufferPtr { number: RamNumber:1, addr: TestRamAddr:0x3 }; + let sequence = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + content: CopyOrMatchContent:11, + length: CopyOrMatchLength:4, + last: false + }; + let result = sequence_packet_to_read_reqs( + ptr, sequence, HistoryBufferLength:20); + let expected = ( + TestReadReq[RAM_NUM]:[ + TestReadReq { addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, zero!(), + zero!(), zero!(), zero!(), + TestReadReq { addr: TestRamAddr:0x1, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x1, mask: RAM_REQ_MASK_ALL }, + ], + RamOrder[RAM_NUM]:[ + RamOrder:2, RamOrder:3, zero!(), zero!(), zero!(), + zero!(), RamOrder:0, RamOrder:1, + ], zero!(), false, + ); + assert_eq(result, expected); + + // BEFORE: AFTER: + // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + // 1 | | | | | | | | | 1 | | | | | | | | | + // 2 | x| x| | | | | | | 2 | | | | | | | | | + // 3 | | | x| x| x| x| x| x| 3 | | x| | | | | | | + // 4 | | | | | | | | o| 4 | | | | | | | | o| + + let ptr = HistoryBufferPtr { number: RamNumber:0, addr: TestRamAddr:0x4 }; + let sequence = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + content: CopyOrMatchContent:10, + length: CopyOrMatchLength:9, + last: false + }; + let result = sequence_packet_to_read_reqs( + ptr, sequence, HistoryBufferLength:20); + let expected = ( + TestReadReq[RAM_NUM]:[ + TestReadReq { addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x3, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, + TestReadReq { addr: TestRamAddr:0x2, mask: RAM_REQ_MASK_ALL }, + ], + RamOrder[RAM_NUM]:[ + RamOrder:2, RamOrder:3, RamOrder:4, RamOrder:5, RamOrder:6, RamOrder:7, RamOrder:0, + RamOrder:1, + ], + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + content: CopyOrMatchContent:10, + length: CopyOrMatchLength:1, + last: false + }, true, + ); + assert_eq(result, expected); +} + +struct RamWrRespHandlerData { + resp: bool[RAM_NUM], + ptr: HistoryBufferPtr, +} + +fn create_ram_wr_data + (reqs: ram::WriteReq[RAM_NUM], ptr: HistoryBufferPtr) -> (bool, RamWrRespHandlerData) { + let do_write = for (i, do_write): (u32, bool) in range(u32:0, RAM_NUM) { + do_write || reqs[i].mask + }(false); + + let resp = bool[RAM_NUM]:[ + ((reqs[0]).mask != RAM_REQ_MASK_NONE), + ((reqs[1]).mask != RAM_REQ_MASK_NONE), + ((reqs[2]).mask != RAM_REQ_MASK_NONE), + ((reqs[3]).mask != RAM_REQ_MASK_NONE), + ((reqs[4]).mask != RAM_REQ_MASK_NONE), + ((reqs[5]).mask != RAM_REQ_MASK_NONE), + ((reqs[6]).mask != RAM_REQ_MASK_NONE), + ((reqs[7]).mask != RAM_REQ_MASK_NONE), + ]; + + (do_write, RamWrRespHandlerData { resp, ptr }) +} + +proc RamWrRespHandler { + input_r: chan in; + output_s: chan out; + wr_resp_m0_r: chan in; + wr_resp_m1_r: chan in; + wr_resp_m2_r: chan in; + wr_resp_m3_r: chan in; + wr_resp_m4_r: chan in; + wr_resp_m5_r: chan in; + wr_resp_m6_r: chan in; + wr_resp_m7_r: chan in; + + config(input_r: chan> in, + output_s: chan> out, + wr_resp_m0_r: chan in, wr_resp_m1_r: chan in, + wr_resp_m2_r: chan in, wr_resp_m3_r: chan in, + wr_resp_m4_r: chan in, wr_resp_m5_r: chan in, + wr_resp_m6_r: chan in, wr_resp_m7_r: chan in) { + ( + input_r, output_s, wr_resp_m0_r, wr_resp_m1_r, wr_resp_m2_r, wr_resp_m3_r, wr_resp_m4_r, + wr_resp_m5_r, wr_resp_m6_r, wr_resp_m7_r, + ) + } + + init { } + + next(state: ()) { + let tok0 = join(); + let (tok1, input) = recv(tok0, input_r); + + let (tok2_0, _) = recv_if(tok1, wr_resp_m0_r, input.resp[0], zero!()); + let (tok2_1, _) = recv_if(tok1, wr_resp_m1_r, input.resp[1], zero!()); + let (tok2_2, _) = recv_if(tok1, wr_resp_m2_r, input.resp[2], zero!()); + let (tok2_3, _) = recv_if(tok1, wr_resp_m3_r, input.resp[3], zero!()); + let (tok2_4, _) = recv_if(tok1, wr_resp_m4_r, input.resp[4], zero!()); + let (tok2_5, _) = recv_if(tok1, wr_resp_m5_r, input.resp[5], zero!()); + let (tok2_6, _) = recv_if(tok1, wr_resp_m6_r, input.resp[6], zero!()); + let (tok2_7, _) = recv_if(tok1, wr_resp_m7_r, input.resp[7], zero!()); + let tok2 = join(tok2_0, tok2_1, tok2_2, tok2_3, tok2_4, tok2_5, tok2_6, tok2_7); + + let tok3 = send(tok2, output_s, input.ptr); + } +} + +struct RamRdRespHandlerData { + resp: bool[RAM_NUM], + order: RamOrder[RAM_NUM], + last: bool +} + +fn create_ram_rd_data + (reqs: ram::ReadReq[RAM_NUM], order: RamOrder[RAM_NUM], last: bool, next_packet_valid: bool) -> (bool, RamRdRespHandlerData) { + let do_read = for (i, do_read): (u32, bool) in range(u32:0, RAM_NUM) { + do_read || reqs[i].mask + }(false); + + let resp = bool[RAM_NUM]:[ + ((reqs[0]).mask != RAM_REQ_MASK_NONE), + ((reqs[1]).mask != RAM_REQ_MASK_NONE), + ((reqs[2]).mask != RAM_REQ_MASK_NONE), + ((reqs[3]).mask != RAM_REQ_MASK_NONE), + ((reqs[4]).mask != RAM_REQ_MASK_NONE), + ((reqs[5]).mask != RAM_REQ_MASK_NONE), + ((reqs[6]).mask != RAM_REQ_MASK_NONE), + ((reqs[7]).mask != RAM_REQ_MASK_NONE), + ]; + + let last = if next_packet_valid { false } else { last }; + (do_read, RamRdRespHandlerData { resp, order, last }) +} + +proc RamRdRespHandler { + input_r: chan in; + output_s: chan out; + rd_resp_m0_r: chan> in; + rd_resp_m1_r: chan> in; + rd_resp_m2_r: chan> in; + rd_resp_m3_r: chan> in; + rd_resp_m4_r: chan> in; + rd_resp_m5_r: chan> in; + rd_resp_m6_r: chan> in; + rd_resp_m7_r: chan> in; + + config(input_r: chan in, output_s: chan out, + rd_resp_m0_r: chan> in, + rd_resp_m1_r: chan> in, + rd_resp_m2_r: chan> in, + rd_resp_m3_r: chan> in, + rd_resp_m4_r: chan> in, + rd_resp_m5_r: chan> in, + rd_resp_m6_r: chan> in, + rd_resp_m7_r: chan> in) { + ( + input_r, output_s, rd_resp_m0_r, rd_resp_m1_r, rd_resp_m2_r, rd_resp_m3_r, rd_resp_m4_r, + rd_resp_m5_r, rd_resp_m6_r, rd_resp_m7_r, + ) + } + + init { } + + next(state: ()) { + let tok0 = join(); + type ReadResp = ram::ReadResp; + + let (tok1, input) = recv(tok0, input_r); + + let (tok2_0, resp_0) = recv_if(tok1, rd_resp_m0_r, input.resp[0], zero!()); + let (tok2_1, resp_1) = recv_if(tok1, rd_resp_m1_r, input.resp[1], zero!()); + let (tok2_2, resp_2) = recv_if(tok1, rd_resp_m2_r, input.resp[2], zero!()); + let (tok2_3, resp_3) = recv_if(tok1, rd_resp_m3_r, input.resp[3], zero!()); + let (tok2_4, resp_4) = recv_if(tok1, rd_resp_m4_r, input.resp[4], zero!()); + let (tok2_5, resp_5) = recv_if(tok1, rd_resp_m5_r, input.resp[5], zero!()); + let (tok2_6, resp_6) = recv_if(tok1, rd_resp_m6_r, input.resp[6], zero!()); + let (tok2_7, resp_7) = recv_if(tok1, rd_resp_m7_r, input.resp[7], zero!()); + let tok2 = join(tok2_0, tok2_1, tok2_2, tok2_3, tok2_4, tok2_5, tok2_6, tok2_7); + + let content = (resp_0.data as CopyOrMatchContent) << (input.order[0] as CopyOrMatchContent << 3) | + (resp_1.data as CopyOrMatchContent) << (input.order[1] as CopyOrMatchContent << 3) | + (resp_2.data as CopyOrMatchContent) << (input.order[2] as CopyOrMatchContent << 3) | + (resp_3.data as CopyOrMatchContent) << (input.order[3] as CopyOrMatchContent << 3) | + (resp_4.data as CopyOrMatchContent) << (input.order[4] as CopyOrMatchContent << 3) | + (resp_5.data as CopyOrMatchContent) << (input.order[5] as CopyOrMatchContent << 3) | + (resp_6.data as CopyOrMatchContent) << (input.order[6] as CopyOrMatchContent << 3) | + (resp_7.data as CopyOrMatchContent) << (input.order[7] as CopyOrMatchContent << 3); + + let converted = std::convert_to_bits_msb0(input.resp); + let length = std::popcount(converted) << 3; + + let output_data = SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: length as CopyOrMatchLength, + content: content as CopyOrMatchContent, + last: input.last, + }; + + let tok3 = send(tok2, output_s, output_data); + } +} + +fn handle_reapeated_offset_for_sequences + (seq: SequenceExecutorPacket, repeat_offsets: Offset[3], repeat_req: bool) + -> (SequenceExecutorPacket, Offset[3]) { + let modified_repeat_offsets = if repeat_req { + Offset[3]:[repeat_offsets[1], repeat_offsets[2], repeat_offsets[0] - Offset:1] + } else { + repeat_offsets + }; + + let (seq, final_repeat_offsets) = if seq.content == CopyOrMatchContent:0 { + fail!( + "match_offset_zero_not_allowed", + (zero!(), Offset[3]:[Offset:0, ...])) + } else if seq.content == CopyOrMatchContent:1 { + let offset = modified_repeat_offsets[0]; + ( + SequenceExecutorPacket { content: offset as CopyOrMatchContent, ..seq }, + Offset[3]:[ + offset, repeat_offsets[1], repeat_offsets[2], + ], + ) + } else if seq.content == CopyOrMatchContent:2 { + let offset = modified_repeat_offsets[1]; + ( + SequenceExecutorPacket { content: offset as CopyOrMatchContent, ..seq }, + Offset[3]:[ + offset, repeat_offsets[0], repeat_offsets[2], + ], + ) + } else if seq.content == CopyOrMatchContent:3 { + let offset = modified_repeat_offsets[2]; + ( + SequenceExecutorPacket { content: offset as CopyOrMatchContent, ..seq }, + Offset[3]:[ + offset, repeat_offsets[0], repeat_offsets[1], + ], + ) + } else { + let offset = seq.content as Offset - Offset:3; + ( + SequenceExecutorPacket { content: offset as CopyOrMatchContent, ..seq }, + Offset[3]:[ + offset, repeat_offsets[0], repeat_offsets[1], + ], + ) + }; + (seq, final_repeat_offsets) +} + +proc SequenceExecutor +{ + input_r: chan in; + output_s: chan out; + ram_comp_input_s: chan> out; + ram_comp_output_r: chan> in; + ram_resp_input_s: chan out; + ram_resp_output_r: chan in; + rd_req_m0_s: chan> out; + rd_req_m1_s: chan> out; + rd_req_m2_s: chan> out; + rd_req_m3_s: chan> out; + rd_req_m4_s: chan> out; + rd_req_m5_s: chan> out; + rd_req_m6_s: chan> out; + rd_req_m7_s: chan> out; + wr_req_m0_s: chan> out; + wr_req_m1_s: chan> out; + wr_req_m2_s: chan> out; + wr_req_m3_s: chan> out; + wr_req_m4_s: chan> out; + wr_req_m5_s: chan> out; + wr_req_m6_s: chan> out; + wr_req_m7_s: chan> out; + + config( + input_r: chan in, + output_s: chan out, + ram_resp_output_r: chan in, + ram_resp_output_s: chan out, + rd_req_m0_s: chan> out, + rd_req_m1_s: chan> out, + rd_req_m2_s: chan> out, + rd_req_m3_s: chan> out, + rd_req_m4_s: chan> out, + rd_req_m5_s: chan> out, + rd_req_m6_s: chan> out, + rd_req_m7_s: chan> out, + rd_resp_m0_r: chan> in, + rd_resp_m1_r: chan> in, + rd_resp_m2_r: chan> in, + rd_resp_m3_r: chan> in, + rd_resp_m4_r: chan> in, + rd_resp_m5_r: chan> in, + rd_resp_m6_r: chan> in, + rd_resp_m7_r: chan> in, + wr_req_m0_s: chan> out, + wr_req_m1_s: chan> out, + wr_req_m2_s: chan> out, + wr_req_m3_s: chan> out, + wr_req_m4_s: chan> out, + wr_req_m5_s: chan> out, + wr_req_m6_s: chan> out, + wr_req_m7_s: chan> out, + wr_resp_m0_r: chan in, + wr_resp_m1_r: chan in, + wr_resp_m2_r: chan in, + wr_resp_m3_r: chan in, + wr_resp_m4_r: chan in, + wr_resp_m5_r: chan in, + wr_resp_m6_r: chan in, + wr_resp_m7_r: chan in + ) { + let (ram_comp_input_s, ram_comp_input_r) = chan, u32:1>("ram_comp_input"); + let (ram_comp_output_s, ram_comp_output_r) = chan, u32:1>("ram_comp_output"); + let (ram_resp_input_s, ram_resp_input_r) = chan("ram_resp_input"); + + spawn RamWrRespHandler( + ram_comp_input_r, ram_comp_output_s, + wr_resp_m0_r, wr_resp_m1_r, wr_resp_m2_r, wr_resp_m3_r, + wr_resp_m4_r, wr_resp_m5_r, wr_resp_m6_r, wr_resp_m7_r); + + spawn RamRdRespHandler( + ram_resp_input_r, ram_resp_output_s, + rd_resp_m0_r, rd_resp_m1_r, rd_resp_m2_r, + rd_resp_m3_r, rd_resp_m4_r, rd_resp_m5_r, rd_resp_m6_r, rd_resp_m7_r); + + ( + input_r, output_s, + ram_comp_input_s, ram_comp_output_r, + ram_resp_input_s, ram_resp_output_r, + rd_req_m0_s, rd_req_m1_s, rd_req_m2_s, rd_req_m3_s, + rd_req_m4_s, rd_req_m5_s, rd_req_m6_s, rd_req_m7_s, + wr_req_m0_s, wr_req_m1_s, wr_req_m2_s, wr_req_m3_s, + wr_req_m4_s, wr_req_m5_s, wr_req_m6_s, wr_req_m7_s, + ) + } + + init { + const_assert!(INIT_HB_PTR_RAM < RAM_NUM); + const_assert!(INIT_HB_PTR_ADDR <= (std::unsigned_max_value() as u32)); + + type RamAddr = bits[RAM_ADDR_WIDTH]; + let INIT_HB_PTR = HistoryBufferPtr { + number: INIT_HB_PTR_RAM as RamNumber, addr: INIT_HB_PTR_ADDR as RamAddr + }; + SequenceExecutorState { + status: SequenceExecutorStatus::IDLE, + packet: zero!(), + packet_valid: false, + hyp_ptr: INIT_HB_PTR, + real_ptr: INIT_HB_PTR, + hb_len: INIT_HB_LENGTH, + repeat_offsets: Offset[3]:[Offset:1, Offset:4, Offset:8], + repeat_req: false, + seq_cnt: false + } + } + + next(state: SequenceExecutorState) { + let tok0 = join(); + type Status = SequenceExecutorStatus; + type State = SequenceExecutorState; + type MsgType = SequenceExecutorMessageType; + type Packet = SequenceExecutorPacket; + type ReadReq = ram::ReadReq; + type ReadResp = ram::ReadResp; + type WriteReq = ram::WriteReq; + type WriteResp = ram::WriteResp; + + const ZERO_READ_REQS = ReadReq[RAM_NUM]:[zero!(), ...]; + const ZERO_WRITE_REQS = WriteReq[RAM_NUM]:[zero!(), ...]; + const ZERO_ORDER = RamOrder[RAM_NUM]:[RamOrder:0, ...]; + + // Recieve literals and sequences from the input channel ... + let do_recv_input = !state.packet_valid && state.status != Status::SEQUENCE_READ && + state.status != Status::SEQUENCE_WRITE; + let (tok1_0, input_packet, input_packet_valid) = + recv_if_non_blocking(tok0, input_r, do_recv_input, zero!()); + + // ... or our own sequences from the looped channel + let do_recv_ram = + (state.status == Status::SEQUENCE_READ || state.status == Status::SEQUENCE_WRITE); + let (tok1_1, ram_packet, ram_packet_valid) = + recv_if_non_blocking(tok0, ram_resp_output_r, do_recv_ram, zero!()); + + // Read RAM write completion, used for monitoring the real state + // of the RAM and eventually changing the state to IDLE. + // Going through the IDLE state is required for changing between + // Literals and Sequences (and the other way around) and between every + // Sequence read from the input (original sequence from the ZSTD stream). + let (tok1_2, real_ptr, real_ptr_valid) = + recv_non_blocking(tok0, ram_comp_output_r, zero!()); + if real_ptr_valid { + trace_fmt!("SequenceExecutor:: Received completion update"); + } else { }; + + let real_ptr = if real_ptr_valid { real_ptr } else { state.real_ptr }; + let tok1 = join(tok1_0, tok1_1, tok1_2); + + // Since we either get data from input, from frame, or from state, + // we are always working on a single packet. The current state + // can be use to determine the source of the packet. + let (packet, packet_valid) = if input_packet_valid { + (input_packet, true) + } else if ram_packet_valid { + (ram_packet, true) + } else { + (state.packet, state.packet_valid) + }; + + // if we are in the IDLE state and have a valid packet stored in the state, + // or we have a new packet from the input go to the corresponding + // processing step immediately. (added to be able to process a single + // literal in one next() evaluation) + let status = match (state.status, packet_valid, packet.msg_type) { + (Status::IDLE, true, MsgType::LITERAL) => Status::LITERAL_WRITE, + (Status::IDLE, true, MsgType::SEQUENCE) => Status::SEQUENCE_READ, + _ => state.status, + }; + + let NO_VALID_PACKET_STATE = State { packet, packet_valid, real_ptr, ..state }; + let (write_reqs, read_reqs, order, new_state) = match ( + status, packet_valid, packet.msg_type + ) { + // Handling LITERAL_WRITE + (Status::LITERAL_WRITE, true, MsgType::LITERAL) => { + trace_fmt!("SequenceExecutor:: Handling LITERAL packet in LITERAL_WRITE step"); + let (write_reqs, new_hyp_ptr) = + literal_packet_to_write_reqs(state.hyp_ptr, packet); + let new_repeat_req = packet.length == CopyOrMatchLength:0; + let hb_add = (packet.length >> 3) as HistoryBufferLength; + let new_hb_len = std::mod_pow2(state.hb_len + hb_add, RAM_SIZE_TOTAL); + ( + write_reqs, ZERO_READ_REQS, ZERO_ORDER, + State { + status: Status::LITERAL_WRITE, + packet: zero!(), + packet_valid: false, + hyp_ptr: new_hyp_ptr, + real_ptr, + repeat_offsets: state.repeat_offsets, + repeat_req: new_repeat_req, + hb_len: new_hb_len, + seq_cnt: false + }, + ) + }, + (Status::LITERAL_WRITE, _, _) => { + let status = + if real_ptr == state.hyp_ptr { Status::IDLE } else { Status::LITERAL_WRITE }; + ( + ZERO_WRITE_REQS, ZERO_READ_REQS, ZERO_ORDER, + State { status, ..NO_VALID_PACKET_STATE }, + ) + }, + // Handling SEQUENCE_READ + (Status::SEQUENCE_READ, true, MsgType::SEQUENCE) => { + trace_fmt!("Handling SEQUENCE in SEQUENCE_READ state"); + let (packet, new_repeat_offsets) = if !state.seq_cnt { + handle_reapeated_offset_for_sequences( + packet, state.repeat_offsets, state.repeat_req) + } else { + (packet, state.repeat_offsets) + }; + let (read_reqs, order, packet, packet_valid) = sequence_packet_to_read_reqs< + HISTORY_BUFFER_SIZE_KB>( + state.hyp_ptr, packet, state.hb_len); + + ( + ZERO_WRITE_REQS, read_reqs, order, + SequenceExecutorState { + status: Status::SEQUENCE_WRITE, + packet, + packet_valid, + hyp_ptr: state.hyp_ptr, + real_ptr, + repeat_offsets: new_repeat_offsets, + repeat_req: false, + hb_len: state.hb_len, + seq_cnt: packet_valid + }, + ) + }, + (Status::SEQUENCE_READ, _, _) => { + let ZERO_RETURN = (ZERO_WRITE_REQS, ZERO_READ_REQS, ZERO_ORDER, zero!()); + fail!("should_no_happen", (ZERO_RETURN)) + }, + // Handling SEQUENCE_WRITE + (Status::SEQUENCE_WRITE, true, MsgType::LITERAL) => { + trace_fmt!("Handling LITERAL in SEQUENCE_WRITE state: {}", status); + let (write_reqs, new_hyp_ptr) = + literal_packet_to_write_reqs(state.hyp_ptr, packet); + let hb_add = packet.length as HistoryBufferLength; + let new_hb_len = std::mod_pow2(state.hb_len + hb_add, RAM_SIZE_TOTAL); + + ( + write_reqs, ZERO_READ_REQS, ZERO_ORDER, + SequenceExecutorState { + status: zero!(), + packet: state.packet, + packet_valid: state.packet_valid, + hyp_ptr: new_hyp_ptr, + real_ptr, + repeat_offsets: state.repeat_offsets, + repeat_req: state.repeat_req, + hb_len: new_hb_len, + seq_cnt: state.seq_cnt + }, + ) + }, + (Status::SEQUENCE_WRITE, _, _) => { + let status = if real_ptr == state.hyp_ptr { + Status::IDLE + } else if state.seq_cnt { + Status::SEQUENCE_READ + } else { + Status::SEQUENCE_WRITE + }; + ( + ZERO_WRITE_REQS, ZERO_READ_REQS, ZERO_ORDER, + State { status, ..NO_VALID_PACKET_STATE }, + ) + }, + // Handling IDLE + _ => { + let status = Status::IDLE; + ( + ZERO_WRITE_REQS, ZERO_READ_REQS, ZERO_ORDER, + State { status, ..NO_VALID_PACKET_STATE }, + ) + }, + }; + + let tok2_1 = send_if(tok1, wr_req_m0_s, (write_reqs[0]).mask != RAM_REQ_MASK_NONE, write_reqs[0]); + let tok2_2 = send_if(tok1, wr_req_m1_s, (write_reqs[1]).mask != RAM_REQ_MASK_NONE, write_reqs[1]); + let tok2_3 = send_if(tok1, wr_req_m2_s, (write_reqs[2]).mask != RAM_REQ_MASK_NONE, write_reqs[2]); + let tok2_4 = send_if(tok1, wr_req_m3_s, (write_reqs[3]).mask != RAM_REQ_MASK_NONE, write_reqs[3]); + let tok2_5 = send_if(tok1, wr_req_m4_s, (write_reqs[4]).mask != RAM_REQ_MASK_NONE, write_reqs[4]); + let tok2_6 = send_if(tok1, wr_req_m5_s, (write_reqs[5]).mask != RAM_REQ_MASK_NONE, write_reqs[5]); + let tok2_7 = send_if(tok1, wr_req_m6_s, (write_reqs[6]).mask != RAM_REQ_MASK_NONE, write_reqs[6]); + let tok2_8 = send_if(tok1, wr_req_m7_s, (write_reqs[7]).mask != RAM_REQ_MASK_NONE, write_reqs[7]); + + // Write to output ask for completion + let (do_write, wr_resp_handler_data) = create_ram_wr_data(write_reqs, new_state.hyp_ptr); + if do_write { + trace_fmt!("Sending request to RamWrRespHandler: {:#x}", wr_resp_handler_data); + } else { }; + let tok2_9 = send_if(tok1, ram_comp_input_s, do_write, wr_resp_handler_data); + + let output_data = decode_literal_packet(packet); + let do_write_output = do_write || (packet.last && packet.msg_type == SequenceExecutorMessageType::LITERAL); + if do_write_output { trace_fmt!("Sending output data: {:#x}", output_data); } else { }; + let tok2_10 = send_if(tok1, output_s, do_write_output, output_data); + + // Ask for response + let tok2_11 = send_if(tok1, rd_req_m0_s, (read_reqs[0]).mask != RAM_REQ_MASK_NONE, read_reqs[0]); + let tok2_12 = send_if(tok1, rd_req_m1_s, (read_reqs[1]).mask != RAM_REQ_MASK_NONE, read_reqs[1]); + let tok2_13 = send_if(tok1, rd_req_m2_s, (read_reqs[2]).mask != RAM_REQ_MASK_NONE, read_reqs[2]); + let tok2_14 = send_if(tok1, rd_req_m3_s, (read_reqs[3]).mask != RAM_REQ_MASK_NONE, read_reqs[3]); + let tok2_15 = send_if(tok1, rd_req_m4_s, (read_reqs[4]).mask != RAM_REQ_MASK_NONE, read_reqs[4]); + let tok2_16 = send_if(tok1, rd_req_m5_s, (read_reqs[5]).mask != RAM_REQ_MASK_NONE, read_reqs[5]); + let tok2_17 = send_if(tok1, rd_req_m6_s, (read_reqs[6]).mask != RAM_REQ_MASK_NONE, read_reqs[6]); + let tok2_18 = send_if(tok1, rd_req_m7_s, (read_reqs[7]).mask != RAM_REQ_MASK_NONE, read_reqs[7]); + + let (do_read, rd_resp_handler_data) = + create_ram_rd_data + (read_reqs, order, packet.last, new_state.packet_valid); + if do_read { + trace_fmt!("Sending request to RamRdRespHandler: {:#x}", rd_resp_handler_data); + } else { }; + let tok2_19 = send_if(tok1, ram_resp_input_s, do_read, rd_resp_handler_data); + + new_state + } +} + +pub const ZSTD_HISTORY_BUFFER_SIZE_KB: u32 = u32:64; +const ZSTD_RAM_SIZE = ram_size(ZSTD_HISTORY_BUFFER_SIZE_KB); +pub const ZSTD_RAM_ADDR_WIDTH = ram_addr_width(ZSTD_HISTORY_BUFFER_SIZE_KB); + +pub proc SequenceExecutorZstd { + + init { } + + config( + input_r: chan in, + output_s: chan out, + looped_channel_r: chan in, + looped_channel_s: chan out, + rd_req_m0_s: chan> out, + rd_req_m1_s: chan> out, + rd_req_m2_s: chan> out, + rd_req_m3_s: chan> out, + rd_req_m4_s: chan> out, + rd_req_m5_s: chan> out, + rd_req_m6_s: chan> out, + rd_req_m7_s: chan> out, + rd_resp_m0_r: chan> in, + rd_resp_m1_r: chan> in, + rd_resp_m2_r: chan> in, + rd_resp_m3_r: chan> in, + rd_resp_m4_r: chan> in, + rd_resp_m5_r: chan> in, + rd_resp_m6_r: chan> in, + rd_resp_m7_r: chan> in, + wr_req_m0_s: chan> out, + wr_req_m1_s: chan> out, + wr_req_m2_s: chan> out, + wr_req_m3_s: chan> out, + wr_req_m4_s: chan> out, + wr_req_m5_s: chan> out, + wr_req_m6_s: chan> out, + wr_req_m7_s: chan> out, + wr_resp_m0_r: chan in, + wr_resp_m1_r: chan in, + wr_resp_m2_r: chan in, + wr_resp_m3_r: chan in, + wr_resp_m4_r: chan in, + wr_resp_m5_r: chan in, + wr_resp_m6_r: chan in, + wr_resp_m7_r: chan in + ) { + spawn SequenceExecutor ( + input_r, output_s, + looped_channel_r, looped_channel_s, + rd_req_m0_s, rd_req_m1_s, rd_req_m2_s, rd_req_m3_s, + rd_req_m4_s, rd_req_m5_s, rd_req_m6_s, rd_req_m7_s, + rd_resp_m0_r, rd_resp_m1_r, rd_resp_m2_r, rd_resp_m3_r, + rd_resp_m4_r, rd_resp_m5_r, rd_resp_m6_r, rd_resp_m7_r, + wr_req_m0_s, wr_req_m1_s, wr_req_m2_s, wr_req_m3_s, + wr_req_m4_s, wr_req_m5_s, wr_req_m6_s, wr_req_m7_s, + wr_resp_m0_r, wr_resp_m1_r, wr_resp_m2_r, wr_resp_m3_r, + wr_resp_m4_r, wr_resp_m5_r, wr_resp_m6_r, wr_resp_m7_r + ); + } + + next (state: ()) { } +} + +const LITERAL_TEST_INPUT_DATA = SequenceExecutorPacket[8]:[ + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:64, + content: CopyOrMatchContent:0xAA00BB11CC22DD33, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:64, + content: CopyOrMatchContent:0x447733220088CCFF, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:32, + content: CopyOrMatchContent:0x88AA0022, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:32, + content: CopyOrMatchContent:0xFFEEDD11, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:64, + content: CopyOrMatchContent:0x9DAF8B41C913EFDA, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:64, + content: CopyOrMatchContent:0x157D8C7EB8B97CA3, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:0, + content: CopyOrMatchContent:0x0, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:0, + content: CopyOrMatchContent:0x0, + last: true + }, +]; + +const LITERAL_TEST_MEMORY_CONTENT:(TestRamAddr, RamData)[3][RAM_NUM] = [ + [ + (TestRamAddr:127, RamData:0x33), + (TestRamAddr:0, RamData:0xFF), + (TestRamAddr:1, RamData:0x22) + ], + [ + (TestRamAddr:127, RamData:0xDD), + (TestRamAddr:0, RamData:0xCC), + (TestRamAddr:1, RamData:0x00) + ], + [ + (TestRamAddr:127, RamData:0x22), + (TestRamAddr:0, RamData:0x88), + (TestRamAddr:1, RamData:0xAA) + ], + [ + (TestRamAddr:127, RamData:0xCC), + (TestRamAddr:0, RamData:0x00), + (TestRamAddr:1, RamData:0x88) + ], + [ + (TestRamAddr:127, RamData:0x11), + (TestRamAddr:0, RamData:0x22), + (TestRamAddr:1, RamData:0x11) + ], + [ + (TestRamAddr:127, RamData:0xBB), + (TestRamAddr:0, RamData:0x33), + (TestRamAddr:1, RamData:0xDD) + ], + [ + (TestRamAddr:127, RamData:0x00), + (TestRamAddr:0, RamData:0x77), + (TestRamAddr:1, RamData:0xEE) + ], + [ + (TestRamAddr:127, RamData:0xAA), + (TestRamAddr:0, RamData:0x44), + (TestRamAddr:1, RamData:0xFF) + ], +]; + +#[test_proc] +proc SequenceExecutorLiteralsTest { + terminator: chan out; + + input_s: chan> out; + output_r: chan in; + + print_start_s: chan<()> out; + print_finish_r: chan<()> in; + + ram_rd_req_s: chan[RAM_NUM] out; + ram_rd_resp_r: chan[RAM_NUM] in; + ram_wr_req_s: chan[RAM_NUM] out; + ram_wr_resp_r: chan[RAM_NUM] in; + + config(terminator: chan out) { + let (input_s, input_r) = chan>("input"); + let (output_s, output_r) = chan("output"); + + let (looped_channel_s, looped_channel_r) = chan("looped_channels"); + + let (print_start_s, print_start_r) = chan<()>("print_start"); + let (print_finish_s, print_finish_r) = chan<()>("print_finish"); + + let (ram_rd_req_s, ram_rd_req_r) = chan[RAM_NUM]("ram_rd_req"); + let (ram_rd_resp_s, ram_rd_resp_r) = chan[RAM_NUM]("ram_rd_resp"); + let (ram_wr_req_s, ram_wr_req_r) = chan[RAM_NUM]("ram_wr_req"); + let (ram_wr_resp_s, ram_wr_resp_r) = chan[RAM_NUM]("ram_wr_resp"); + + let INIT_HB_PTR_ADDR = u32:127; + spawn SequenceExecutor< + TEST_HISTORY_BUFFER_SIZE_KB, + TEST_RAM_SIZE, + TEST_RAM_ADDR_WIDTH, + INIT_HB_PTR_ADDR, + > ( + input_r, output_s, + looped_channel_r, looped_channel_s, + ram_rd_req_s[0], ram_rd_req_s[1], ram_rd_req_s[2], ram_rd_req_s[3], + ram_rd_req_s[4], ram_rd_req_s[5], ram_rd_req_s[6], ram_rd_req_s[7], + ram_rd_resp_r[0], ram_rd_resp_r[1], ram_rd_resp_r[2], ram_rd_resp_r[3], + ram_rd_resp_r[4], ram_rd_resp_r[5], ram_rd_resp_r[6], ram_rd_resp_r[7], + ram_wr_req_s[0], ram_wr_req_s[1], ram_wr_req_s[2], ram_wr_req_s[3], + ram_wr_req_s[4], ram_wr_req_s[5], ram_wr_req_s[6], ram_wr_req_s[7], + ram_wr_resp_r[0], ram_wr_resp_r[1], ram_wr_resp_r[2], ram_wr_resp_r[3], + ram_wr_resp_r[4], ram_wr_resp_r[5], ram_wr_resp_r[6], ram_wr_resp_r[7] + ); + + spawn ram_printer::RamPrinter< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_NUM_PARTITIONS, + TEST_RAM_ADDR_WIDTH, RAM_NUM> + (print_start_r, print_finish_s, ram_rd_req_s, ram_rd_resp_r); + + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[0], ram_rd_resp_s[0], ram_wr_req_r[0], ram_wr_resp_s[0]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[1], ram_rd_resp_s[1], ram_wr_req_r[1], ram_wr_resp_s[1]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[2], ram_rd_resp_s[2], ram_wr_req_r[2], ram_wr_resp_s[2]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[3], ram_rd_resp_s[3], ram_wr_req_r[3], ram_wr_resp_s[3]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[4], ram_rd_resp_s[4], ram_wr_req_r[4], ram_wr_resp_s[4]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[5], ram_rd_resp_s[5], ram_wr_req_r[5], ram_wr_resp_s[5]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[6], ram_rd_resp_s[6], ram_wr_req_r[6], ram_wr_resp_s[6]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[7], ram_rd_resp_s[7], ram_wr_req_r[7], ram_wr_resp_s[7]); + + ( + terminator, + input_s, output_r, + print_start_s, print_finish_r, + ram_rd_req_s, ram_rd_resp_r, + ram_wr_req_s, ram_wr_resp_r + ) + } + + init { } + + next(state: ()) { + let tok = join(); + for (i, ()): (u32, ()) in range(u32:0, array_size(LITERAL_TEST_INPUT_DATA)) { + let tok = send(tok, input_s, LITERAL_TEST_INPUT_DATA[i]); + // Don't receive when there's an empty literals packet which is not last + if (LITERAL_TEST_INPUT_DATA[i].msg_type != SequenceExecutorMessageType::LITERAL || + LITERAL_TEST_INPUT_DATA[i].length != CopyOrMatchLength:0 || + LITERAL_TEST_INPUT_DATA[i].last) { + let (tok, recv_data) = recv(tok, output_r); + let expected = decode_literal_packet(LITERAL_TEST_INPUT_DATA[i]); + assert_eq(expected, recv_data); + } else {} + }(()); + + for (i, ()): (u32, ()) in range(u32:0, RAM_NUM) { + for (j, ()): (u32, ()) in range(u32:0, array_size(LITERAL_TEST_MEMORY_CONTENT[0])) { + let addr = LITERAL_TEST_MEMORY_CONTENT[i][j].0; + let tok = send(tok, ram_rd_req_s[i], TestReadReq { addr, mask: RAM_REQ_MASK_ALL }); + let (tok, resp) = recv(tok, ram_rd_resp_r[i]); + let expected = LITERAL_TEST_MEMORY_CONTENT[i][j].1; + assert_eq(expected, resp.data); + }(()); + }(()); + + // Print RAM content + let tok = send(tok, print_start_s, ()); + let (tok, _) = recv(tok, print_finish_r); + + send(tok, terminator, true); + } +} + +const SEQUENCE_TEST_INPUT_SEQUENCES = SequenceExecutorPacket[11]: [ + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:9, + content: CopyOrMatchContent:13, + last: false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:1, + content: CopyOrMatchContent:7, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:1, + content: CopyOrMatchContent:8, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:5, + content: CopyOrMatchContent:13, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:3, + content: CopyOrMatchContent:3, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:1, + content: CopyOrMatchContent:1, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:0, + content: CopyOrMatchContent:0, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:1, + content: CopyOrMatchContent:3, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::LITERAL, + length: CopyOrMatchLength:0, + content: CopyOrMatchContent:0, + last:false + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:10, + content: CopyOrMatchContent:2, + last:true, + }, + SequenceExecutorPacket { + msg_type: SequenceExecutorMessageType::SEQUENCE, + length: CopyOrMatchLength:1, + content: CopyOrMatchContent:3, + last:false + }, +]; + +const SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS:ZstdDecodedPacket[11] = [ + ZstdDecodedPacket { + data: BlockData:0x8C_7E_B8_B9_7C_A3_9D_AF, + length: BlockPacketLength:64, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0x7D, + length: BlockPacketLength:8, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0xB8, + length: BlockPacketLength:8, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0xB8, + length: BlockPacketLength:8, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0xB8_B9_7C_A3_9D, + length: BlockPacketLength:40, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0xB9_7C_A3, + length: BlockPacketLength:24, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0xB8, + length: BlockPacketLength:8, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0x7C, + length: BlockPacketLength:8, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0xB9_7C_A3_B8_B9_7C_A3_9D, + length: BlockPacketLength:64, + last: false + }, + ZstdDecodedPacket { + data: BlockData:0x7C_B8, + length: BlockPacketLength:16, + last: true + }, + ZstdDecodedPacket { + data: BlockData:0x9D, + length: BlockPacketLength:8, + last: false + } +]; + +#[test_proc] +proc SequenceExecutorSequenceTest { + terminator: chan out; + + input_s: chan out; + output_r: chan in; + + print_start_s: chan<()> out; + print_finish_r: chan<()> in; + + ram_rd_req_s: chan[RAM_NUM] out; + ram_rd_resp_r: chan[RAM_NUM] in; + ram_wr_req_s: chan[RAM_NUM] out; + ram_wr_resp_r: chan[RAM_NUM] in; + + config(terminator: chan out) { + let (input_s, input_r) = chan("input"); + let (output_s, output_r) = chan("output"); + + let (looped_channel_s, looped_channel_r) = chan("looped_channel"); + + let (print_start_s, print_start_r) = chan<()>("print_start"); + let (print_finish_s, print_finish_r) = chan<()>("print_finish"); + + let (ram_rd_req_s, ram_rd_req_r) = chan[RAM_NUM]("ram_rd_req"); + let (ram_rd_resp_s, ram_rd_resp_r) = chan[RAM_NUM]("ram_rd_resp"); + let (ram_wr_req_s, ram_wr_req_r) = chan[RAM_NUM]("ram_wr_req"); + let (ram_wr_resp_s, ram_wr_resp_r) = chan[RAM_NUM]("ram_wr_resp"); + + let INIT_HB_PTR_ADDR = u32:127; + spawn SequenceExecutor< + TEST_HISTORY_BUFFER_SIZE_KB, + TEST_RAM_SIZE, + TEST_RAM_ADDR_WIDTH, + INIT_HB_PTR_ADDR, + > ( + input_r, output_s, + looped_channel_r, looped_channel_s, + ram_rd_req_s[0], ram_rd_req_s[1], ram_rd_req_s[2], ram_rd_req_s[3], + ram_rd_req_s[4], ram_rd_req_s[5], ram_rd_req_s[6], ram_rd_req_s[7], + ram_rd_resp_r[0], ram_rd_resp_r[1], ram_rd_resp_r[2], ram_rd_resp_r[3], + ram_rd_resp_r[4], ram_rd_resp_r[5], ram_rd_resp_r[6], ram_rd_resp_r[7], + ram_wr_req_s[0], ram_wr_req_s[1], ram_wr_req_s[2], ram_wr_req_s[3], + ram_wr_req_s[4], ram_wr_req_s[5], ram_wr_req_s[6], ram_wr_req_s[7], + ram_wr_resp_r[0], ram_wr_resp_r[1], ram_wr_resp_r[2], ram_wr_resp_r[3], + ram_wr_resp_r[4], ram_wr_resp_r[5], ram_wr_resp_r[6], ram_wr_resp_r[7] + ); + + spawn ram_printer::RamPrinter< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_NUM_PARTITIONS, + TEST_RAM_ADDR_WIDTH, RAM_NUM> + (print_start_r, print_finish_s, ram_rd_req_s, ram_rd_resp_r); + + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[0], ram_rd_resp_s[0], ram_wr_req_r[0], ram_wr_resp_s[0]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[1], ram_rd_resp_s[1], ram_wr_req_r[1], ram_wr_resp_s[1]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[2], ram_rd_resp_s[2], ram_wr_req_r[2], ram_wr_resp_s[2]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[3], ram_rd_resp_s[3], ram_wr_req_r[3], ram_wr_resp_s[3]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[4], ram_rd_resp_s[4], ram_wr_req_r[4], ram_wr_resp_s[4]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[5], ram_rd_resp_s[5], ram_wr_req_r[5], ram_wr_resp_s[5]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[6], ram_rd_resp_s[6], ram_wr_req_r[6], ram_wr_resp_s[6]); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED> + (ram_rd_req_r[7], ram_rd_resp_s[7], ram_wr_req_r[7], ram_wr_resp_s[7]); + + ( + terminator, + input_s, output_r, + print_start_s, print_finish_r, + ram_rd_req_s, ram_rd_resp_r, ram_wr_req_s, ram_wr_resp_r + ) + } + + init { } + + next(state: ()) { + let tok = join(); + for (i, ()): (u32, ()) in range(u32:0, array_size(LITERAL_TEST_INPUT_DATA)) { + let tok = send(tok, input_s, LITERAL_TEST_INPUT_DATA[i]); + // Don't receive when there's an empty literal packet which is not last + if (LITERAL_TEST_INPUT_DATA[i].msg_type != SequenceExecutorMessageType::LITERAL || + LITERAL_TEST_INPUT_DATA[i].length != CopyOrMatchLength:0 || + LITERAL_TEST_INPUT_DATA[i].last) { + let (tok, recv_data) = recv(tok, output_r); + let expected = decode_literal_packet(LITERAL_TEST_INPUT_DATA[i]); + assert_eq(expected, recv_data); + } else {} + }(()); + + // Print RAM content + let tok = send(tok, print_start_s, ()); + let (tok, _) = recv(tok, print_finish_r); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[0]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[0], recv_data); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[1], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[1]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[2], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[2]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[3], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[3]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[4], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[4]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[5], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[5]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[6], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[6]); + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[7]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[7], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[8]); + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[9]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[8], recv_data); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[9], recv_data); + + let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[10]); + let (tok, recv_data) = recv(tok, output_r); + assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[10], recv_data); + + // Print RAM content + let tok = send(tok, print_start_s, ()); + let (tok, _) = recv(tok, print_finish_r); + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/window_buffer.x b/xls/modules/zstd/window_buffer.x new file mode 100644 index 0000000000..d07bc24bf2 --- /dev/null +++ b/xls/modules/zstd/window_buffer.x @@ -0,0 +1,137 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains the implementation of a Proc which can be used to +// receive data through transactions of one width and output that data +// in transactions of other width. + +import std; +import xls.modules.zstd.buffer as buff; + +type Buffer = buff::Buffer; + +// WindowBuffer is a simple Proc that uses the Buffer structure to aggregate data +// in transactions of length and output it in transactions of +// length. defines the maximal size of the buffer. + +proc WindowBuffer { + input_r: chan in; + output_s: chan out; + + config( + input_r: chan in, + output_s: chan out + ) { (input_r, output_s) } + + init { buff::buffer_new() } + + next(buffer: Buffer) { + let tok = join(); + const_assert!(BUFFER_SIZE >= INPUT_WIDTH); + const_assert!(BUFFER_SIZE >= OUTPUT_WIDTH); + let (tok, recv_data, valid) = recv_non_blocking(tok, input_r, uN[INPUT_WIDTH]:0); + let buffer = if (valid) { + buff::buffer_append(buffer, recv_data) + } else { + buffer + }; + + if buffer.length >= OUTPUT_WIDTH { + let (buffer, data_to_send) = buff::buffer_fixed_pop(buffer); + let tok = send(tok, output_s, data_to_send); + buffer + } else { + buffer + } + } +} + +#[test_proc] +proc WindowBufferTest { + terminator: chan out; + data32_s: chan out; + data48_r: chan in; + + config(terminator: chan out) { + let (data32_s, data32_r) = chan("data32"); + let (data48_s, data48_r) = chan("data48"); + spawn WindowBuffer(data32_r, data48_s); + (terminator, data32_s, data48_r) + } + + init {} + + next(state: ()) { + let tok = join(); + let tok = send(tok, data32_s, u32:0xDEADBEEF); + let tok = send(tok, data32_s, u32:0xBEEFCAFE); + let tok = send(tok, data32_s, u32:0xCAFEDEAD); + + let (tok, received_data) = recv(tok, data48_r); + assert_eq(received_data, u48:0xCAFE_DEAD_BEEF); + let (tok, received_data) = recv(tok, data48_r); + assert_eq(received_data, u48:0xCAFE_DEAD_BEEF); + + send(tok, terminator, true); + } +} + +#[test_proc] +proc WindowBufferReverseTest { + terminator: chan out; + data48_s: chan out; + data32_r: chan in; + + config(terminator: chan out) { + let (data48_s, data48_r) = chan("data48"); + let (data32_s, data32_r) = chan("data32"); + spawn WindowBuffer(data48_r, data32_s); + (terminator, data48_s, data32_r) + } + + init {} + + next(state: ()) { + let tok = join(); + let tok = send(tok, data48_s, u48:0xCAFEDEADBEEF); + let tok = send(tok, data48_s, u48:0xCAFEDEADBEEF); + + let (tok, received_data) = recv(tok, data32_r); + assert_eq(received_data, u32:0xDEADBEEF); + let (tok, received_data) = recv(tok, data32_r); + assert_eq(received_data, u32:0xBEEFCAFE); + let (tok, received_data) = recv(tok, data32_r); + assert_eq(received_data, u32:0xCAFEDEAD); + + send(tok, terminator, true); + } +} + +// Sample for codegen +proc WindowBuffer64 { + input_r: chan in; + output_s: chan out; + + config( + input_r: chan in, + output_s: chan out + ) { + spawn WindowBuffer(input_r, output_s); + (input_r, output_s) + } + + init {} + + next(state: ()) {} +} diff --git a/xls/modules/zstd/zstd_dec.x b/xls/modules/zstd/zstd_dec.x new file mode 100644 index 0000000000..259361de8f --- /dev/null +++ b/xls/modules/zstd/zstd_dec.x @@ -0,0 +1,499 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains work-in-progress ZSTD decoder implementation +// More information about ZSTD decoding can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878 + +import std; +import xls.modules.zstd.block_header; +import xls.modules.zstd.block_dec; +import xls.modules.zstd.sequence_executor; +import xls.modules.zstd.buffer as buff; +import xls.modules.zstd.common; +import xls.modules.zstd.frame_header; +import xls.modules.zstd.frame_header_test; +import xls.modules.zstd.magic; +import xls.modules.zstd.repacketizer; +import xls.examples.ram; + +type Buffer = buff::Buffer; +type BlockDataPacket = common::BlockDataPacket; +type BlockData = common::BlockData; +type BlockSize = common::BlockSize; +type SequenceExecutorPacket = common::SequenceExecutorPacket; +type ZstdDecodedPacket = common::ZstdDecodedPacket; + +// TODO: all of this porboably should be in common.x +const TEST_WINDOW_LOG_MAX_LIBZSTD = frame_header_test::TEST_WINDOW_LOG_MAX_LIBZSTD; + +const ZSTD_RAM_ADDR_WIDTH = sequence_executor::ZSTD_RAM_ADDR_WIDTH; +const RAM_DATA_WIDTH = sequence_executor::RAM_DATA_WIDTH; +const RAM_NUM_PARTITIONS = sequence_executor::RAM_NUM_PARTITIONS; +const ZSTD_HISTORY_BUFFER_SIZE_KB = sequence_executor::ZSTD_HISTORY_BUFFER_SIZE_KB; + +const BUFFER_WIDTH = common::BUFFER_WIDTH; +const DATA_WIDTH = common::DATA_WIDTH; +const ZERO_FRAME_HEADER = frame_header::ZERO_FRAME_HEADER; +const ZERO_BLOCK_HEADER = block_header::ZERO_BLOCK_HEADER; + +enum ZstdDecoderStatus : u8 { + DECODE_MAGIC_NUMBER = 0, + DECODE_FRAME_HEADER = 1, + DECODE_BLOCK_HEADER = 2, + FEED_BLOCK_DECODER = 3, + DECODE_CHECKSUM = 4, + ERROR = 255, +} + +struct ZstdDecoderState { + status: ZstdDecoderStatus, + buffer: Buffer, + frame_header: frame_header::FrameHeader, + block_size_bytes: BlockSize, + last: bool, + bytes_sent: BlockSize, +} + +const ZERO_DECODER_STATE = zero!(); + +fn decode_magic_number(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { + trace_fmt!("zstd_dec: decode_magic_number: DECODING NEW FRAME"); + trace_fmt!("zstd_dec: decode_magic_number: state: {:#x}", state); + trace_fmt!("zstd_dec: decode_magic_number: Decoding magic number"); + let magic_result = magic::parse_magic_number(state.buffer); + trace_fmt!("zstd_dec: decode_magic_number: magic_result: {:#x}", magic_result); + let new_state = match magic_result.status { + magic::MagicStatus::OK => ZstdDecoderState { + status: ZstdDecoderStatus::DECODE_FRAME_HEADER, + buffer: magic_result.buffer, + ..state + }, + magic::MagicStatus::CORRUPTED => ZstdDecoderState { + status: ZstdDecoderStatus::ERROR, + ..ZERO_DECODER_STATE + }, + magic::MagicStatus::NO_ENOUGH_DATA => state, + _ => state, + }; + trace_fmt!("zstd_dec: decode_magic_number: new_state: {:#x}", new_state); + + (false, zero!(), new_state) +} + +fn decode_frame_header(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { + trace_fmt!("zstd_dec: decode_frame_header: DECODING FRAME HEADER"); + trace_fmt!("zstd_dec: decode_frame_header: state: {:#x}", state); + let frame_header_result = frame_header::parse_frame_header(state.buffer); + trace_fmt!("zstd_dec: decode_frame_header: frame_header_result: {:#x}", frame_header_result); + let new_state = match frame_header_result.status { + frame_header::FrameHeaderStatus::OK => ZstdDecoderState { + status: ZstdDecoderStatus::DECODE_BLOCK_HEADER, + buffer: frame_header_result.buffer, + frame_header: frame_header_result.header, + ..state + }, + frame_header::FrameHeaderStatus::CORRUPTED => ZstdDecoderState { + status: ZstdDecoderStatus::ERROR, + ..ZERO_DECODER_STATE + }, + frame_header::FrameHeaderStatus::NO_ENOUGH_DATA => state, + frame_header::FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE => ZstdDecoderState { + status: ZstdDecoderStatus::ERROR, + ..ZERO_DECODER_STATE + }, + _ => state, + }; + trace_fmt!("zstd_dec: decode_frame_header: new_state: {:#x}", new_state); + + (false, zero!(), new_state) +} + +fn decode_block_header(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { + trace_fmt!("zstd_dec: decode_block_header: DECODING BLOCK HEADER"); + trace_fmt!("zstd_dec: decode_block_header: state: {:#x}", state); + let block_header_result = block_header::parse_block_header(state.buffer); + trace_fmt!("zstd_dec: decode_block_header: block_header_result: {:#x}", block_header_result); + let new_state = match block_header_result.status { + block_header::BlockHeaderStatus::OK => { + trace_fmt!("zstd_dec: BlockHeader: {:#x}", block_header_result.header); + match block_header_result.header.btype { + common::BlockType::RAW => ZstdDecoderState { + status: ZstdDecoderStatus::FEED_BLOCK_DECODER, + buffer: state.buffer, + block_size_bytes: block_header_result.header.size as BlockSize + BlockSize:3, + last: block_header_result.header.last, + bytes_sent: BlockSize:0, + ..state + }, + common::BlockType::RLE => ZstdDecoderState { + status: ZstdDecoderStatus::FEED_BLOCK_DECODER, + buffer: state.buffer, + block_size_bytes: BlockSize:4, + last: block_header_result.header.last, + bytes_sent: BlockSize:0, + ..state + }, + common::BlockType::COMPRESSED => ZstdDecoderState { + status: ZstdDecoderStatus::FEED_BLOCK_DECODER, + buffer: state.buffer, + block_size_bytes: block_header_result.header.size as BlockSize + BlockSize:3, + last: block_header_result.header.last, + bytes_sent: BlockSize:0, + ..state + }, + _ => { + fail!("impossible_case", state) + } + } + }, + block_header::BlockHeaderStatus::CORRUPTED => ZstdDecoderState { + status: ZstdDecoderStatus::ERROR, + ..ZERO_DECODER_STATE + }, + block_header::BlockHeaderStatus::NO_ENOUGH_DATA => state, + _ => state, + }; + trace_fmt!("zstd_dec: decode_block_header: new_state: {:#x}", new_state); + + (false, zero!(), new_state) +} + +fn feed_block_decoder(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { + trace_fmt!("zstd_dec: feed_block_decoder: FEEDING BLOCK DECODER"); + trace_fmt!("zstd_dec: feed_block_decoder: state: {:#x}", state); + let remaining_bytes_to_send = state.block_size_bytes - state.bytes_sent; + trace_fmt!("zstd_dec: feed_block_decoder: remaining_bytes_to_send: {}", remaining_bytes_to_send); + let buffer_length_bytes = state.buffer.length >> 3; + trace_fmt!("zstd_dec: feed_block_decoder: buffer_length_bytes: {}", buffer_length_bytes); + let data_width_bytes = (DATA_WIDTH >> 3) as BlockSize; + trace_fmt!("zstd_dec: feed_block_decoder: data_width_bytes: {}", data_width_bytes); + let remaining_bytes_to_send_now = std::umin(remaining_bytes_to_send, data_width_bytes); + trace_fmt!("zstd_dec: feed_block_decoder: remaining_bytes_to_send_now: {}", remaining_bytes_to_send_now); + if (buffer_length_bytes >= remaining_bytes_to_send_now as u32) { + let remaining_bits_to_send_now = (remaining_bytes_to_send_now as u32) << 3; + trace_fmt!("zstd_dec: feed_block_decoder: remaining_bits_to_send_now: {}", remaining_bits_to_send_now); + let last_packet = (remaining_bytes_to_send == remaining_bytes_to_send_now); + trace_fmt!("zstd_dec: feed_block_decoder: last_packet: {}", last_packet); + let (buffer_result, data_to_send) = buff::buffer_pop_checked(state.buffer, remaining_bits_to_send_now); + match buffer_result.status { + buff::BufferStatus::OK => { + let decoder_channel_data = BlockDataPacket { + last: last_packet, + last_block: state.last, + id: u32:0, + data: data_to_send[0: DATA_WIDTH as s32], + length: remaining_bits_to_send_now, + }; + let new_fsm_status = if (last_packet) { + if (state.last) { + if (state.frame_header.content_checksum_flag) { + ZstdDecoderStatus::DECODE_CHECKSUM + } else { + ZstdDecoderStatus::DECODE_MAGIC_NUMBER + } + } else { + ZstdDecoderStatus::DECODE_BLOCK_HEADER + } + } else { + ZstdDecoderStatus::FEED_BLOCK_DECODER + }; + trace_fmt!("zstd_dec: feed_block_decoder: packet to decode: {:#x}", decoder_channel_data); + let new_state = (true, decoder_channel_data, ZstdDecoderState { + bytes_sent: state.bytes_sent + remaining_bytes_to_send_now, + buffer: buffer_result.buffer, + status: new_fsm_status, + ..state + }); + trace_fmt!("zstd_dec: feed_block_decoder: new_state: {:#x}", new_state); + new_state + }, + _ => { + fail!("should_not_happen_1", (false, zero!(), state)) + } + } + } else { + trace_fmt!("zstd_dec: feed_block_decoder: Not enough data for intermediate FEED_BLOCK_DECODER block dump"); + (false, zero!(), state) + } +} + +fn decode_checksum(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { + trace_fmt!("zstd_dec: decode_checksum: DECODE CHECKSUM"); + trace_fmt!("zstd_dec: decode_checksum: state: {:#x}", state); + // Pop fixed checksum size of 4 bytes + let (buffer_result, _) = buff::buffer_pop_checked(state.buffer, u32:32); + + let new_state = ZstdDecoderState { + status: ZstdDecoderStatus::DECODE_MAGIC_NUMBER, + buffer: buffer_result.buffer, + ..state + }; + trace_fmt!("zstd_dec: decode_checksum: new_state: {:#x}", new_state); + + (false, zero!(), new_state) +} + +pub proc ZstdDecoder { + input_r: chan in; + block_dec_in_s: chan out; + output_s: chan out; + looped_channel_r: chan in; + looped_channel_s: chan out; + ram_rd_req_0_s: chan> out; + ram_rd_req_1_s: chan> out; + ram_rd_req_2_s: chan> out; + ram_rd_req_3_s: chan> out; + ram_rd_req_4_s: chan> out; + ram_rd_req_5_s: chan> out; + ram_rd_req_6_s: chan> out; + ram_rd_req_7_s: chan> out; + ram_rd_resp_0_r: chan> in; + ram_rd_resp_1_r: chan> in; + ram_rd_resp_2_r: chan> in; + ram_rd_resp_3_r: chan> in; + ram_rd_resp_4_r: chan> in; + ram_rd_resp_5_r: chan> in; + ram_rd_resp_6_r: chan> in; + ram_rd_resp_7_r: chan> in; + ram_wr_req_0_s: chan> out; + ram_wr_req_1_s: chan> out; + ram_wr_req_2_s: chan> out; + ram_wr_req_3_s: chan> out; + ram_wr_req_4_s: chan> out; + ram_wr_req_5_s: chan> out; + ram_wr_req_6_s: chan> out; + ram_wr_req_7_s: chan> out; + ram_wr_resp_0_r: chan in; + ram_wr_resp_1_r: chan in; + ram_wr_resp_2_r: chan in; + ram_wr_resp_3_r: chan in; + ram_wr_resp_4_r: chan in; + ram_wr_resp_5_r: chan in; + ram_wr_resp_6_r: chan in; + ram_wr_resp_7_r: chan in; + + init {(ZERO_DECODER_STATE)} + + config ( + input_r: chan in, + output_s: chan out, + looped_channel_r: chan in, + looped_channel_s: chan out, + ram_rd_req_0_s: chan> out, + ram_rd_req_1_s: chan> out, + ram_rd_req_2_s: chan> out, + ram_rd_req_3_s: chan> out, + ram_rd_req_4_s: chan> out, + ram_rd_req_5_s: chan> out, + ram_rd_req_6_s: chan> out, + ram_rd_req_7_s: chan> out, + ram_rd_resp_0_r: chan> in, + ram_rd_resp_1_r: chan> in, + ram_rd_resp_2_r: chan> in, + ram_rd_resp_3_r: chan> in, + ram_rd_resp_4_r: chan> in, + ram_rd_resp_5_r: chan> in, + ram_rd_resp_6_r: chan> in, + ram_rd_resp_7_r: chan> in, + ram_wr_req_0_s: chan> out, + ram_wr_req_1_s: chan> out, + ram_wr_req_2_s: chan> out, + ram_wr_req_3_s: chan> out, + ram_wr_req_4_s: chan> out, + ram_wr_req_5_s: chan> out, + ram_wr_req_6_s: chan> out, + ram_wr_req_7_s: chan> out, + ram_wr_resp_0_r: chan in, + ram_wr_resp_1_r: chan in, + ram_wr_resp_2_r: chan in, + ram_wr_resp_3_r: chan in, + ram_wr_resp_4_r: chan in, + ram_wr_resp_5_r: chan in, + ram_wr_resp_6_r: chan in, + ram_wr_resp_7_r: chan in, + ) { + let (block_dec_in_s, block_dec_in_r) = chan("block_dec_in"); + let (seq_exec_in_s, seq_exec_in_r) = chan("seq_exec_in"); + let (repacketizer_in_s, repacketizer_in_r) = chan("repacketizer_in"); + + spawn block_dec::BlockDecoder(block_dec_in_r, seq_exec_in_s); + + spawn sequence_executor::SequenceExecutor( + seq_exec_in_r, repacketizer_in_s, + looped_channel_r, looped_channel_s, + ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, + ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, + ram_rd_resp_0_r, ram_rd_resp_1_r, ram_rd_resp_2_r, ram_rd_resp_3_r, + ram_rd_resp_4_r, ram_rd_resp_5_r, ram_rd_resp_6_r, ram_rd_resp_7_r, + ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, + ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, + ram_wr_resp_0_r, ram_wr_resp_1_r, ram_wr_resp_2_r, ram_wr_resp_3_r, + ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r, + ); + + spawn repacketizer::Repacketizer(repacketizer_in_r, output_s); + + (input_r, block_dec_in_s, output_s, looped_channel_r, looped_channel_s, + ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, + ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, + ram_rd_resp_0_r, ram_rd_resp_1_r, ram_rd_resp_2_r, ram_rd_resp_3_r, + ram_rd_resp_4_r, ram_rd_resp_5_r, ram_rd_resp_6_r, ram_rd_resp_7_r, + ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, + ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, + ram_wr_resp_0_r, ram_wr_resp_1_r, ram_wr_resp_2_r, ram_wr_resp_3_r, + ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r) + } + + next (state: ZstdDecoderState) { + let tok = join(); + trace_fmt!("zstd_dec: next(): state: {:#x}", state); + let can_fit = buff::buffer_can_fit(state.buffer, BlockData:0); + trace_fmt!("zstd_dec: next(): can_fit: {}", can_fit); + let (tok, data, recv_valid) = recv_if_non_blocking(tok, input_r, can_fit, BlockData:0); + let state = if (can_fit && recv_valid) { + let buffer = buff::buffer_append(state.buffer, data); + trace_fmt!("zstd_dec: next(): received more data: {:#x}", data); + ZstdDecoderState {buffer, ..state} + } else { + state + }; + trace_fmt!("zstd_dec: next(): state after receive: {:#x}", state); + + let (do_send, data_to_send, state) = match state.status { + ZstdDecoderStatus::DECODE_MAGIC_NUMBER => + decode_magic_number(state), + ZstdDecoderStatus::DECODE_FRAME_HEADER => + decode_frame_header(state), + ZstdDecoderStatus::DECODE_BLOCK_HEADER => + decode_block_header(state), + ZstdDecoderStatus::FEED_BLOCK_DECODER => + feed_block_decoder(state), + ZstdDecoderStatus::DECODE_CHECKSUM => + decode_checksum(state), + _ => (false, zero!(), state) + }; + + trace_fmt!("zstd_dec: next(): do_send: {:#x}, data_to_send: {:#x}, state: {:#x}", do_send, data_to_send, state); + let tok = send_if(tok, block_dec_in_s, do_send, data_to_send); + + state + } +} + +const TEST_RAM_SIZE = sequence_executor::ram_size(ZSTD_HISTORY_BUFFER_SIZE_KB); +const RAM_WORD_PARTITION_SIZE = sequence_executor::RAM_WORD_PARTITION_SIZE; +const TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR = sequence_executor::TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR; +const TEST_RAM_INITIALIZED = sequence_executor::TEST_RAM_INITIALIZED; +const TEST_RAM_ASSERT_VALID_READ:bool = {false}; + +pub proc ZstdDecoderTest { + input_r: chan in; + output_s: chan out; + + init {()} + + config ( + input_r: chan in, + output_s: chan out, + ) { + let (looped_channel_s, looped_channel_r) = chan("looped_channel"); + + let (ram_rd_req_0_s, ram_rd_req_0_r) = chan, u32:1>("ram_rd_req_0"); + let (ram_rd_req_1_s, ram_rd_req_1_r) = chan, u32:1>("ram_rd_req_1"); + let (ram_rd_req_2_s, ram_rd_req_2_r) = chan, u32:1>("ram_rd_req_2"); + let (ram_rd_req_3_s, ram_rd_req_3_r) = chan, u32:1>("ram_rd_req_3"); + let (ram_rd_req_4_s, ram_rd_req_4_r) = chan, u32:1>("ram_rd_req_4"); + let (ram_rd_req_5_s, ram_rd_req_5_r) = chan, u32:1>("ram_rd_req_5"); + let (ram_rd_req_6_s, ram_rd_req_6_r) = chan, u32:1>("ram_rd_req_6"); + let (ram_rd_req_7_s, ram_rd_req_7_r) = chan, u32:1>("ram_rd_req_7"); + + let (ram_rd_resp_0_s, ram_rd_resp_0_r) = chan, u32:1>("ram_rd_resp_0"); + let (ram_rd_resp_1_s, ram_rd_resp_1_r) = chan, u32:1>("ram_rd_resp_1"); + let (ram_rd_resp_2_s, ram_rd_resp_2_r) = chan, u32:1>("ram_rd_resp_2"); + let (ram_rd_resp_3_s, ram_rd_resp_3_r) = chan, u32:1>("ram_rd_resp_3"); + let (ram_rd_resp_4_s, ram_rd_resp_4_r) = chan, u32:1>("ram_rd_resp_4"); + let (ram_rd_resp_5_s, ram_rd_resp_5_r) = chan, u32:1>("ram_rd_resp_5"); + let (ram_rd_resp_6_s, ram_rd_resp_6_r) = chan, u32:1>("ram_rd_resp_6"); + let (ram_rd_resp_7_s, ram_rd_resp_7_r) = chan, u32:1>("ram_rd_resp_7"); + + let (ram_wr_req_0_s, ram_wr_req_0_r) = chan, u32:1>("ram_wr_req_0"); + let (ram_wr_req_1_s, ram_wr_req_1_r) = chan, u32:1>("ram_wr_req_1"); + let (ram_wr_req_2_s, ram_wr_req_2_r) = chan, u32:1>("ram_wr_req_2"); + let (ram_wr_req_3_s, ram_wr_req_3_r) = chan, u32:1>("ram_wr_req_3"); + let (ram_wr_req_4_s, ram_wr_req_4_r) = chan, u32:1>("ram_wr_req_4"); + let (ram_wr_req_5_s, ram_wr_req_5_r) = chan, u32:1>("ram_wr_req_5"); + let (ram_wr_req_6_s, ram_wr_req_6_r) = chan, u32:1>("ram_wr_req_6"); + let (ram_wr_req_7_s, ram_wr_req_7_r) = chan, u32:1>("ram_wr_req_7"); + + let (ram_wr_resp_0_s, ram_wr_resp_0_r) = chan("ram_wr_resp_0"); + let (ram_wr_resp_1_s, ram_wr_resp_1_r) = chan("ram_wr_resp_1"); + let (ram_wr_resp_2_s, ram_wr_resp_2_r) = chan("ram_wr_resp_2"); + let (ram_wr_resp_3_s, ram_wr_resp_3_r) = chan("ram_wr_resp_3"); + let (ram_wr_resp_4_s, ram_wr_resp_4_r) = chan("ram_wr_resp_4"); + let (ram_wr_resp_5_s, ram_wr_resp_5_r) = chan("ram_wr_resp_5"); + let (ram_wr_resp_6_s, ram_wr_resp_6_r) = chan("ram_wr_resp_6"); + let (ram_wr_resp_7_s, ram_wr_resp_7_r) = chan("ram_wr_resp_7"); + + spawn ZstdDecoder( + input_r, output_s, + looped_channel_r, looped_channel_s, + ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, + ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, + ram_rd_resp_0_r, ram_rd_resp_1_r, ram_rd_resp_2_r, ram_rd_resp_3_r, + ram_rd_resp_4_r, ram_rd_resp_5_r, ram_rd_resp_6_r, ram_rd_resp_7_r, + ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, + ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, + ram_wr_resp_0_r, ram_wr_resp_1_r, ram_wr_resp_2_r, ram_wr_resp_3_r, + ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r, + ); + + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_0_r, ram_rd_resp_0_s, ram_wr_req_0_r, ram_wr_resp_0_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_1_r, ram_rd_resp_1_s, ram_wr_req_1_r, ram_wr_resp_1_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_2_r, ram_rd_resp_2_s, ram_wr_req_2_r, ram_wr_resp_2_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_3_r, ram_rd_resp_3_s, ram_wr_req_3_r, ram_wr_resp_3_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_4_r, ram_rd_resp_4_s, ram_wr_req_4_r, ram_wr_resp_4_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_5_r, ram_rd_resp_5_s, ram_wr_req_5_r, ram_wr_resp_5_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_6_r, ram_rd_resp_6_s, ram_wr_req_6_r, ram_wr_resp_6_s); + spawn ram::RamModel< + RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> + (ram_rd_req_7_r, ram_rd_resp_7_s, ram_wr_req_7_r, ram_wr_resp_7_s); + + (input_r, output_s) + } + + next (state: ()) {} +} diff --git a/xls/modules/zstd/zstd_dec_test.cc b/xls/modules/zstd/zstd_dec_test.cc new file mode 100644 index 0000000000..0ed0b91fb8 --- /dev/null +++ b/xls/modules/zstd/zstd_dec_test.cc @@ -0,0 +1,297 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/status/statusor.h" +#include "absl/types/span.h" +#include "external/zstd/lib/zstd.h" +#include "gtest/gtest.h" +#include "xls/common/file/filesystem.h" +#include "xls/common/file/get_runfile_path.h" +#include "xls/common/status/matchers.h" +#include "xls/interpreter/channel_queue.h" +#include "xls/interpreter/serial_proc_runtime.h" +#include "xls/ir/bits.h" +#include "xls/ir/channel.h" +#include "xls/ir/events.h" +#include "xls/ir/ir_parser.h" +#include "xls/ir/package.h" +#include "xls/ir/proc.h" +#include "xls/ir/value.h" +#include "xls/jit/jit_proc_runtime.h" +#include "xls/modules/zstd/data_generator.h" + +namespace xls { +namespace { + +class ZstdDecodedPacket { + public: + static std::optional MakeZstdDecodedPacket( + const Value& packet) { + // Expect tuple + if (!packet.IsTuple()) { + return std::nullopt; + } + // Expect exactly 3 fields + if (packet.size() != 3) { + return std::nullopt; + } + for (int i = 0; i < 3; i++) { + // Expect fields to be Bits + if (!packet.element(i).IsBits()) { + return std::nullopt; + } + // All fields must fit in 64bits + if (!packet.element(i).bits().FitsInUint64()) { + return std::nullopt; + } + } + + std::vector data = packet.element(0).bits().ToBytes(); + absl::StatusOr len = packet.element(1).bits().ToUint64(); + if (!len.ok()) { + return std::nullopt; + } + uint64_t length = *len; + bool last = packet.element(2).bits().IsOne(); + + return ZstdDecodedPacket(data, length, last); + } + + std::vector& GetData() { return data; } + + uint64_t GetLength() { return length; } + + bool IsLast() { return last; } + + std::string PrintData() const { + std::stringstream s; + for (int j = 0; j < sizeof(uint64_t) && j < data.size(); j++) { + s << "0x" << std::setw(2) << std::setfill('0') << std::right << std::hex + << static_cast(data[j]) << std::dec << ", "; + } + return s.str(); + } + + private: + ZstdDecodedPacket(std::vector data, uint64_t length, bool last) + : data(std::move(data)), length(length), last(last) {} + + std::vector data; + uint64_t length; + bool last; +}; + +class ZstdDecoderTest : public ::testing::Test { + public: + void SetUp() override { + XLS_ASSERT_OK_AND_ASSIGN(std::filesystem::path ir_path, + xls::GetXlsRunfilePath(this->ir_file)); + XLS_ASSERT_OK_AND_ASSIGN(std::string ir_text, + xls::GetFileContents(ir_path)); + XLS_ASSERT_OK_AND_ASSIGN(this->package, xls::Parser::ParsePackage(ir_text)); + XLS_ASSERT_OK_AND_ASSIGN(this->interpreter, + CreateJitSerialProcRuntime(this->package.get())); + + auto& queue_manager = this->interpreter->queue_manager(); + XLS_ASSERT_OK_AND_ASSIGN(this->recv_queue, queue_manager.GetQueueByName( + this->recv_channel_name)); + XLS_ASSERT_OK_AND_ASSIGN(this->send_queue, queue_manager.GetQueueByName( + this->send_channel_name)); + } + + void PrintTraceMessages(const std::string& pname) { + XLS_ASSERT_OK_AND_ASSIGN(Proc * proc, this->package->GetProc(pname)); + const InterpreterEvents& events = + this->interpreter->GetInterpreterEvents(proc); + + if (!events.trace_msgs.empty()) { + for (const auto& tm : events.trace_msgs) { + std::cout << "[TRACE] " << tm.message << "\n"; + } + } + } + + const char* proc_name = "__zstd_dec__ZstdDecoderTest_0_next"; + const char* recv_channel_name = "zstd_dec__output_s"; + const char* send_channel_name = "zstd_dec__input_r"; + + const char* ir_file = "xls/modules/zstd/zstd_dec_test.ir"; + + std::unique_ptr package; + std::unique_ptr interpreter; + ChannelQueue *recv_queue, *send_queue; + + void PrintVector(absl::Span vec) { + for (int i = 0; i < vec.size(); i += 8) { + std::cout << "0x" << std::hex << std::setw(3) << std::left << i + << std::dec << ": "; + for (int j = 0; j < sizeof(uint64_t) && (i + j) < vec.size(); j++) { + std::cout << std::setfill('0') << std::setw(2) << std::hex + << static_cast(vec[i + j]) << std::dec << " "; + } + std::cout << "\n"; + } + } + + void DecompressWithLibZSTD(std::vector encoded_frame, + std::vector& decoded_frame) { + size_t buff_out_size = ZSTD_DStreamOutSize(); + uint8_t* const buff_out = new uint8_t[buff_out_size]; + + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + EXPECT_FALSE(dctx == nullptr); + + void* const frame = static_cast(encoded_frame.data()); + size_t const frame_size = encoded_frame.size(); + // Put the whole frame in the buffer + ZSTD_inBuffer input_buffer = {frame, frame_size, 0}; + + while (input_buffer.pos < input_buffer.size) { + ZSTD_outBuffer output_buffer = {buff_out, buff_out_size, 0}; + size_t decomp_result = + ZSTD_decompressStream(dctx, &output_buffer, &input_buffer); + bool decomp_success = ZSTD_isError(decomp_result) != 0u; + EXPECT_FALSE(decomp_success); + + // Append output buffer contents to output vector + decoded_frame.insert( + decoded_frame.end(), static_cast(output_buffer.dst), + (static_cast(output_buffer.dst) + output_buffer.pos)); + + EXPECT_TRUE(decomp_result == 0 && output_buffer.pos < output_buffer.size); + } + + ZSTD_freeDCtx(dctx); + delete[] buff_out; + } + + void ParseAndCompareWithZstd(std::vector frame) { + std::vector lib_decomp; + DecompressWithLibZSTD(frame, lib_decomp); + size_t lib_decomp_size = lib_decomp.size(); + std::cout << "lib_decomp_size: " << lib_decomp_size << "\n"; + + std::vector sim_decomp; + size_t sim_decomp_size_words = + (lib_decomp_size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + size_t sim_decomp_size_bytes = + (lib_decomp_size + sizeof(uint64_t) - 1) * sizeof(uint64_t); + sim_decomp.reserve(sim_decomp_size_bytes); + + // Send compressed frame to decoder simulation + for (int i = 0; i < frame.size(); i += 8) { + auto span = absl::MakeSpan(frame.data() + i, 8); + auto value = Value(Bits::FromBytes(span, 64)); + XLS_EXPECT_OK(this->send_queue->Write(value)); + XLS_EXPECT_OK(this->interpreter->Tick()); + } + PrintTraceMessages("__zstd_dec__ZstdDecoderTest_0_next"); + + // Tick decoder simulation until we get expected amount of output data + // batches on output channel queue + std::optional ticks_timeout = std::nullopt; + absl::flat_hash_map output_counts = { + {this->recv_queue->channel(), sim_decomp_size_words}}; + XLS_EXPECT_OK( + this->interpreter->TickUntilOutput(output_counts, ticks_timeout)); + + // Read decompressed data from output channel queue + for (int i = 0; i < sim_decomp_size_words; i++) { + auto read_value = this->recv_queue->Read(); + EXPECT_EQ(read_value.has_value(), true); + auto packet = + ZstdDecodedPacket::MakeZstdDecodedPacket(read_value.value()); + EXPECT_EQ(packet.has_value(), true); + auto word_vec = packet->GetData(); + auto valid_length = packet->GetLength() / CHAR_BIT; + std::copy(begin(word_vec), begin(word_vec) + valid_length, + back_inserter(sim_decomp)); + } + + EXPECT_EQ(lib_decomp_size, sim_decomp.size()); + for (int i = 0; i < lib_decomp_size; i++) { + EXPECT_EQ(lib_decomp[i], sim_decomp[i]); + } + } +}; + +/* TESTS */ + +TEST(ZstdLib, Version) { ASSERT_EQ(ZSTD_VERSION_STRING, "1.5.6"); } + +TEST_F(ZstdDecoderTest, ParseFrameWithRawBlocks) { + int seed = 3; // Arbitrary seed value for small ZSTD frame + auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RAW); + EXPECT_TRUE(frame.ok()); + this->ParseAndCompareWithZstd(frame.value()); +} + +TEST_F(ZstdDecoderTest, ParseFrameWithRleBlocks) { + int seed = 3; // Arbitrary seed value for small ZSTD frame + auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RLE); + EXPECT_TRUE(frame.ok()); + this->ParseAndCompareWithZstd(frame.value()); +} + +class ZstdDecoderSeededTest : public ZstdDecoderTest, + public ::testing::WithParamInterface { + public: + static const uint32_t seed_generator_start = 0; + static const uint32_t random_frames_count = 100; +}; + +// Test `random_frames_count` instances of randomly generated valid +// frames, generated with `decodecorpus` tool. + +TEST_P(ZstdDecoderSeededTest, ParseMultipleFramesWithRawBlocks) { + auto seed = GetParam(); + auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RAW); + EXPECT_TRUE(frame.ok()); + this->ParseAndCompareWithZstd(frame.value()); +} + +TEST_P(ZstdDecoderSeededTest, ParseMultipleFramesWithRleBlocks) { + auto seed = GetParam(); + auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RLE); + EXPECT_TRUE(frame.ok()); + this->ParseAndCompareWithZstd(frame.value()); +} + +INSTANTIATE_TEST_SUITE_P( + ZstdDecoderSeededTest, ZstdDecoderSeededTest, + ::testing::Range(ZstdDecoderSeededTest::seed_generator_start, + ZstdDecoderSeededTest::seed_generator_start + + ZstdDecoderSeededTest::random_frames_count)); + +} // namespace +} // namespace xls