diff --git a/deps/zlib/CMakeLists.txt b/deps/zlib/CMakeLists.txt index 66f7d04966afa5..59d77c3a413628 100644 --- a/deps/zlib/CMakeLists.txt +++ b/deps/zlib/CMakeLists.txt @@ -24,6 +24,7 @@ check_include_file(stddef.h HAVE_STDDEF_H) option(ENABLE_SIMD_OPTIMIZATIONS "Enable all SIMD optimizations" OFF) option(ENABLE_SIMD_AVX512 "Enable SIMD AXV512 optimizations" OFF) option(USE_ZLIB_RABIN_KARP_HASH "Enable bitstream compatibility with canonical zlib" OFF) +option(ENABLE_INTEL_QAT_COMPRESSION "Enable Intel Quick Assist Technology use for compression" OFF) option(BUILD_UNITTESTS "Enable standalone unit tests build" OFF) option(BUILD_MINIZIP_BIN "Enable building minzip_bin tool" OFF) option(BUILD_ZPIPE "Enable building zpipe tool" OFF) @@ -228,6 +229,22 @@ if (ENABLE_SIMD_OPTIMIZATIONS) endif() endif() +if (ENABLE_INTEL_QAT_COMPRESSION) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/deflate_qat.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/io_buffers.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/memory.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat_buffer_list.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat_instance.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/session.cpp) + list(APPEND ZLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp/qat_task.cpp) + + # TODO(gustavoa): Find a way to include the qatzpp headers without having the + # presubmit check throw errors. + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/contrib/qat/qatzpp) + add_compile_definitions(QAT_COMPRESSION_ENABLED) +endif() + # parse the full version number from zlib.h and include in ZLIB_FULL_VERSION file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents) string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*" @@ -254,6 +271,15 @@ add_library(zlibstatic STATIC ${ZLIB_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HD set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL) set_target_properties(zlib PROPERTIES SOVERSION 1) +if (ENABLE_INTEL_QAT_COMPRESSION) + target_include_directories(zlib PUBLIC ${QATZPP_INCLUDE_DIRS}) + target_link_libraries(zlib ${QATZPP_LIBRARY}) + target_link_libraries(zlib qat) + target_include_directories(zlibstatic PUBLIC ${QATZPP_INCLUDE_DIRS}) + target_link_libraries(zlibstatic ${QATZPP_LIBRARY}) + target_link_libraries(zlibstatic qat) +endif() + if(NOT CYGWIN) # This property causes shared libraries on Linux to have the full version # encoded into their final filename. We disable this on Cygwin because diff --git a/deps/zlib/contrib/qat/deflate_qat.cpp b/deps/zlib/contrib/qat/deflate_qat.cpp new file mode 100644 index 00000000000000..bfe45472bb51b1 --- /dev/null +++ b/deps/zlib/contrib/qat/deflate_qat.cpp @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include "deflate_qat.h" +#include "deflate.h" + +#include "session.hpp" +#include "qat_instance.hpp" +#include "qat_buffer_list.hpp" +#include "qat.hpp" + +#include + +/* +* TODO(gustavoa): Make the input size adjustable from the memlevel +* attribute on deflateInit. +*/ +static constexpr size_t kInputSize = 1024 * 1024; + +/* QAT Instances obtained available from the library. */ +static std::vector> qat_instances; + +/* +* TODO(gustavoa): Verify if the ordering of the struct fields won't create +* unnecessary holes in the structure that requires extraneous padding. +*/ +struct qat_deflate { + std::unique_ptr qat_session; + + /* QAT requires contiguous physical pages. Cannot be allocated using + * malloc/new. + */ + uint8_t *input_buffer; + uint8_t *output_buffer; + + /* Pointer to the next byte in the output buffer. */ + uint8_t *pending_out; + + unsigned input_buffer_size; + unsigned output_buffer_size; + + unsigned pending_in_count; + unsigned pending_out_count; +}; + +static std::unique_ptr qat_create_session(int level, int wrap) +{ + CpaDcChecksum checksum = CPA_DC_NONE; + + switch(wrap) { + case 1: + checksum = CPA_DC_ADLER32; + break; + case 2: + checksum = CPA_DC_CRC32; + break; + } + + return std::make_unique( + qat_instances[0], + (CpaDcCompLvl)level, + checksum, + 0 + ); +} + + +int qat_deflate_init() +{ + return (qat::Initialize()) ? Z_ERRNO : Z_OK; +} + +struct qat_deflate* qat_deflate_state_init(int level, int wrap) +{ + if (qat_instances.empty()) { + qat_instances = qat::Instance::Create(); + } + if (qat_instances.empty()) { + return nullptr; + } + + struct qat_deflate *qat_deflate = new struct qat_deflate; + if (!qat_deflate) { + return nullptr; + } + + /* TODO(gustavoa): Find a way to utilize all the available instances for the same + * process. + */ + qat_instances[0]->Start(); + + qat_deflate->qat_session = qat_create_session(level, wrap); + + qat_deflate->input_buffer_size = kInputSize; + qat_deflate->input_buffer = qat::AllocBlockArray(kInputSize, 0); + qat_deflate->output_buffer_size = + qat_deflate->qat_session->GetDeflateBound(qat_deflate->input_buffer_size); + qat_deflate->pending_out = qat_deflate->output_buffer = + qat::AllocBlockArray(qat_deflate->output_buffer_size, 0); + + qat_deflate->pending_in_count = qat_deflate->pending_out_count = 0; + + if (!qat_deflate->input_buffer || !qat_deflate->output_buffer) { + return nullptr; + } + + return qat_deflate; +} + +static unsigned qat_read_buf(z_streamp strm, struct qat_deflate* qat, unsigned size) +{ + unsigned len = strm->avail_in; + + if (len > size) { + len = size; + } + if (len == 0) return 0; + + strm->avail_in -= len; + strm->total_in += len; + + zmemcpy( + qat->input_buffer + qat->pending_in_count, + strm->next_in, + len + ); + + strm->next_in += len; + qat->pending_in_count += len; + + return len; +} + +void qat_flush_pending(deflate_state* s) +{ + unsigned len; + z_streamp strm = s->strm; + struct qat_deflate* qat = s->qat_s; + + len = qat->pending_out_count; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, qat->pending_out, len); + + qat->pending_out += len; + qat->pending_out_count -= len; + strm->next_out += len; + strm->avail_out -= len; + strm->total_out += len; + if (qat->pending_out_count == 0) { + qat->pending_out = qat->output_buffer; + } +} + +static int qat_compress_pending(deflate_state*s, int flush) +{ + struct qat_deflate* qat = s->qat_s; + uint32_t metadata_size; + + /* TODO(gustavoa): find a way to make qatzpp setup this number internally. */ + cpaDcBufferListGetMetaSize(qat->qat_session->getInstance()->GetHandle(), 1, &metadata_size); + + auto job = qat->qat_session->Deflate( + std::make_unique( + std::make_unique( + qat->input_buffer, + qat->pending_in_count, + metadata_size + ), + std::make_unique( + qat->output_buffer, + qat->output_buffer_size, + metadata_size + ) + ), (flush == Z_FINISH && s->strm->avail_in == 0) + ); + + job->WaitCompletion(); + + /* + * TODO(gustavoa): make QAT perform the checksum combine. + */ + if (s->wrap == 2) { + s->strm->adler = crc32_combine( + s->strm->adler, + job->GetResults()->checksum, + job->GetResults()->consumed + ); + } else if (s->wrap == 1) { + s->strm->adler = adler32( + s->strm->adler, + qat->input_buffer, + job->GetResults()->consumed + ); + } + + qat->pending_out_count = job->GetResults()->produced; + qat->pending_in_count -= job->GetResults()->consumed; + + if(qat->pending_in_count != 0) { + /* Copy any remaining bytes to the beginning of the buffer. */ + zmemcpy( + qat->input_buffer, + qat->input_buffer + job->GetResults()->consumed, + qat->pending_in_count + ); + } + + return 0; +} + +qat_block_state qat_deflate_step(deflate_state* s, int flush) +{ + z_streamp strm = s->strm; + struct qat_deflate* qat_state = s->qat_s; + + for (;;) { + if (qat_state->pending_in_count < qat_state->input_buffer_size) { + qat_read_buf( + strm, + qat_state, + qat_state->input_buffer_size - qat_state->pending_in_count + ); + if (qat_state->pending_in_count < qat_state->input_buffer_size && flush == Z_NO_FLUSH) { + return qat_block_need_more; + } else { + qat_compress_pending(s, flush); + } + if (strm->avail_in == 0) { + break; + } + } else { + qat_compress_pending(s, flush); + } + + qat_flush_pending(s); + if (strm->avail_out == 0) { + return (flush == Z_FINISH) ? qat_block_finish_started : qat_block_need_more; + } + } + + if (flush == Z_FINISH) { + qat_flush_pending(s); + if (strm->avail_out == 0) { + return qat_block_finish_started; + } else { + return qat_block_finish_done; + } + } + + qat_flush_pending(s); + if (strm->avail_out == 0) { + return qat_block_done; + } + + return qat_block_need_more; +} + +int qat_deflate_state_free(deflate_state* s) +{ + struct qat_deflate* qat_state = s->qat_s; + if (qat_state->input_buffer) { + qat::Free(qat_state->input_buffer); + } + if (qat_state->output_buffer) { + qat::Free(qat_state->output_buffer); + } + + qat_state->qat_session.reset(); + delete qat_state; + s->qat_s = nullptr; + + return Z_OK; +} + +struct qat_deflate *qat_deflate_copy(deflate_state *ss) +{ + struct qat_deflate *sqat = ss->qat_s; + struct qat_deflate *dqat = nullptr; + + if (!sqat) { + return nullptr; + } + + dqat = new struct qat_deflate; + + dqat->qat_session = qat_create_session(ss->level, ss->wrap); + + dqat->input_buffer_size = sqat->input_buffer_size; + dqat->input_buffer = qat::AllocBlockArray(dqat->input_buffer_size, 0); + + dqat->output_buffer_size = sqat->output_buffer_size; + dqat->output_buffer = qat::AllocBlockArray(dqat->output_buffer_size, 0); + + dqat->pending_in_count = sqat->pending_in_count; + dqat->pending_out_count = sqat->pending_out_count; + + dqat->pending_out = + dqat->output_buffer + (sqat->pending_out - sqat->output_buffer); + + zmemcpy(dqat->input_buffer, sqat->input_buffer, dqat->input_buffer_size); + zmemcpy(dqat->output_buffer, sqat->output_buffer, dqat->output_buffer_size); + + return dqat; +} + diff --git a/deps/zlib/contrib/qat/deflate_qat.h b/deps/zlib/contrib/qat/deflate_qat.h new file mode 100644 index 00000000000000..3c7aa116b7dc70 --- /dev/null +++ b/deps/zlib/contrib/qat/deflate_qat.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef DEFLATE_QAT_H +#define DEFLATE_QAT_H + +#include "deflate.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* This is a 1:1 mapping of the block states that deflate_fast, deflate_slow, + * deflate_rle, etc.. return. + * The added 'qat_failure' value is used for signaling the caller to revert + * back into software mode. + */ +typedef enum { + qat_block_need_more, + qat_block_done, + qat_block_finish_started, + qat_block_finish_done, + qat_failure +} qat_block_state; + +/* Initialize QAT for the calling process if it has not been yet initialized. */ +int qat_deflate_init(); + +/* Initialize a QAT stream state for a deflate_state object. */ +struct qat_deflate *qat_deflate_state_init(int level, int wra); + +/* Flush QAT output buffer into the zstream.next_out pointer. */ +void qat_flush_pending(deflate_state*); + +/* Compresses/copies/flushes any data in the internal QAT state + * input/output buffers. +*/ +qat_block_state qat_deflate_step(deflate_state*, int flush); + +/* Frees all the QAT-related buffers and objects for a given deflate_state. */ +int qat_deflate_state_free(deflate_state*); + +struct qat_deflate *qat_deflate_copy(deflate_state *ss); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/io_buffers.cpp b/deps/zlib/contrib/qat/qatzpp/io_buffers.cpp new file mode 100644 index 00000000000000..2870292be17251 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/io_buffers.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include +#include + +#include "io_buffers.h" +#include "qat_instance.hpp" + +namespace qat +{ + +IOBuffers::IOBuffers() +{ +} + +IOBuffers::IOBuffers(std::unique_ptr&& src_list, std::unique_ptr&& dst_list): + src_buffer_list_(std::move(src_list)), dst_buffer_list_(std::move(dst_list)) +{ +} + +IOBuffers::~IOBuffers() +{ +} + +} diff --git a/deps/zlib/contrib/qat/qatzpp/io_buffers.h b/deps/zlib/contrib/qat/qatzpp/io_buffers.h new file mode 100644 index 00000000000000..9fe8bfdbc336c8 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/io_buffers.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_IO_BUFFERS_H +#define QATZPP_IO_BUFFERS_H + +#include + +#include +#include +#include +#include +#include + +#include "memory.hpp" +#include "qat_instance.hpp" + +namespace qat +{ + +struct BaseBufferList +{ + virtual ~BaseBufferList() {} + + CpaBufferList list; + std::vector flat_buffers; + +protected: + BaseBufferList() {} +}; + +class IOBuffers +{ +public: + IOBuffers( + std::unique_ptr &&src_list, + std::unique_ptr &&dst_list + ); + virtual ~IOBuffers(); + + BaseBufferList *GetSrc() const { + return src_buffer_list_.get(); + } + + BaseBufferList *GetDst() const { + return dst_buffer_list_.get(); + } +protected: + IOBuffers(); + + std::unique_ptr src_buffer_list_; + std::unique_ptr dst_buffer_list_; +}; + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/memory.cpp b/deps/zlib/contrib/qat/qatzpp/memory.cpp new file mode 100644 index 00000000000000..6a97ffe2fdfcdb --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/memory.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include + +#include +#include + +#include "memory.hpp" +#include "qat.hpp" + +namespace qat +{ + +void *Alloc(size_t size_bytes, uint32_t numa_node) +{ + return qaeMemAllocNUMA(size_bytes, numa_node, 1); +} + +void Free(void *ptr) +{ + qaeMemFreeNUMA(&ptr); +} + +} \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/memory.hpp b/deps/zlib/contrib/qat/qatzpp/memory.hpp new file mode 100644 index 00000000000000..191516ca75dd20 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/memory.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_MEMORY_HPP +#define QATZPP_MEMORY_HPP + +#include +#include + +namespace qat +{ + +void *Alloc(size_t sizeBytes, uint32_t numa_node); + +template +T *AllocBlock(int32_t numa_node) +{ + return static_cast(Alloc(sizeof(T), numa_node)); +} + +template +T *AllocBlockArray(size_t count, int32_t numa_node) +{ + if (count <= 0) { + return nullptr; + } + + return static_cast(Alloc(sizeof(T) * count, numa_node)); +} + +void Free(void *ptr); + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/qat.cpp b/deps/zlib/contrib/qat/qatzpp/qat.cpp new file mode 100644 index 00000000000000..80468d395151f3 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include "qat.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +namespace qat +{ + +static bool g_qat_not_available = false; +static bool g_qat_initialized = false; +static std::mutex g_qat_initialization_mutex; + +class QATContext +{ +public: + explicit QATContext() {} + + QATContext(const QATContext &) = delete; + QATContext &operator=(const QATContext &) = delete; + + QATContext(QATContext &&) = delete; + QATContext &operator=(QATContext &&) = delete; + + ~QATContext() + { + std::lock_guard lock(g_qat_initialization_mutex); + + if (g_qat_not_available) return; + + if (g_qat_initialized) { + icp_sal_userStop(); + g_qat_initialized = false; + } + } +}; + +static std::unique_ptr qat_context; + +int Initialize() +{ + std::lock_guard lock(g_qat_initialization_mutex); + uint32_t cpa_state; + if (g_qat_not_available) { + return CPA_STATUS_FAIL; + } + if (g_qat_initialized) { + return CPA_STATUS_SUCCESS; + } + + cpa_state = icp_sal_userStartMultiProcess("SSL", CPA_FALSE); + + g_qat_not_available = (cpa_state != CPA_STATUS_SUCCESS); + g_qat_initialized = (cpa_state == CPA_STATUS_SUCCESS); + + qat_context = std::make_unique(); + return cpa_state; +} + +} diff --git a/deps/zlib/contrib/qat/qatzpp/qat.hpp b/deps/zlib/contrib/qat/qatzpp/qat.hpp new file mode 100644 index 00000000000000..8ee7746b5efd04 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat.hpp @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_QAT_HPP +#define QATZPP_QAT_HPP + +namespace qat +{ + +int Initialize(); + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/qat_buffer_list.cpp b/deps/zlib/contrib/qat/qatzpp/qat_buffer_list.cpp new file mode 100644 index 00000000000000..f0eea4908a54ff --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat_buffer_list.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include "qat_buffer_list.hpp" + +namespace qat +{ + +BufferListUser::BufferListUser( + uint8_t *data, + size_t size, + size_t metadata_size) +{ + flat_buffers = std::vector(1); + flat_buffers[0].pData = data; + flat_buffers[0].dataLenInBytes = size; + list.pPrivateMetaData = AllocBlockArray(metadata_size, 0); + list.numBuffers = 1; + list.pBuffers = flat_buffers.data(); +} + +BufferListUser::~BufferListUser() +{ + if (list.pPrivateMetaData) { + Free(list.pPrivateMetaData); + } +} + +} diff --git a/deps/zlib/contrib/qat/qatzpp/qat_buffer_list.hpp b/deps/zlib/contrib/qat/qatzpp/qat_buffer_list.hpp new file mode 100644 index 00000000000000..2a28175e18dc58 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat_buffer_list.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_QAT_BUFFER_LIST_HPP +#define QATZPP_QAT_BUFFER_LIST_HPP + +#include + +#include "io_buffers.h" + +namespace qat +{ + +struct BufferListUser final : public BaseBufferList +{ + BufferListUser( + uint8_t *data, + size_t size, + size_t metadata_size + ); + + ~BufferListUser() override; +}; + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/qat_instance.cpp b/deps/zlib/contrib/qat/qatzpp/qat_instance.cpp new file mode 100644 index 00000000000000..5b833c2ce7e2dd --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat_instance.cpp @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include + +#include +#include + +#include "memory.hpp" +#include "qat_instance.hpp" +#include "session.hpp" + +#define MAX_SAMPLE_BUFFER_SIZE (4*1024*1024) + +namespace qat +{ + +static std::mutex g_instance_mutex; +static std::vector> instances; + +static CpaPhysicalAddr virt2Phys(void *virt_addr) +{ + return (CpaPhysicalAddr)qaeVirtToPhysNUMA(virt_addr); +} + +Instance::Instance(CpaInstanceHandle instance): + instance_(instance), + num_intermediate_buffer_lists_(0), + intermediate_buffer_array_(nullptr), + started_(false) +{ + CpaDcInstanceCapabilities caps{}; + cpaDcQueryCapabilities(instance_, &caps); + + if (!caps.statelessDeflateCompression || !caps.statelessDeflateDecompression || + !caps.checksumAdler32 || !caps.dynamicHuffman) + { + return; + } + + if (caps.dynamicHuffmanBufferReq) { + uint32_t buffer_metadata_size; + cpaDcBufferListGetMetaSize(instance_, 1, &buffer_metadata_size); + cpaDcGetNumIntermediateBuffers(instance_, &num_intermediate_buffer_lists_); + + if(num_intermediate_buffer_lists_) { + intermediate_buffer_array_ = AllocBlockArray(num_intermediate_buffer_lists_, 0); + } + for (int i = 0; i < num_intermediate_buffer_lists_; ++i) { + intermediate_buffer_array_[i] = AllocBlock(0); + intermediate_buffer_array_[i]->pPrivateMetaData = + AllocBlockArray(buffer_metadata_size, 0); + intermediate_buffer_array_[i]->pBuffers = AllocBlock(0); + intermediate_buffer_array_[i]->pBuffers->pData = + AllocBlockArray(MAX_SAMPLE_BUFFER_SIZE, 0); + intermediate_buffer_array_[i]->pBuffers->dataLenInBytes = MAX_SAMPLE_BUFFER_SIZE; + } + } + + cpaDcSetAddressTranslation(instance_, virt2Phys); +} + +Instance::~Instance() +{ +} + +CpaDcInstanceCapabilities Instance::GetCapabilities() +{ + CpaDcInstanceCapabilities caps{}; + cpaDcQueryCapabilities(instance_, &caps); + + return caps; +} + +CpaInstanceInfo2 Instance::GetInfo() +{ + CpaInstanceInfo2 info{}; + cpaDcInstanceGetInfo2(instance_, &info); + + return info; +} + +int Instance::Start() +{ + std::lock_guard lock(mutex_); + + if (started_) { + return 0; + } + + int ret = cpaDcStartInstance + ( + instance_, + num_intermediate_buffer_lists_, + intermediate_buffer_array_ + ); + if (ret) { + return -1; + } + started_ = true; + return 0; +} + +std::vector> Instance::Create() +{ + std::lock_guard lock(g_instance_mutex); + uint16_t num_instances = 0; + + if (!instances.empty()) { + return instances; + } + + cpaDcGetNumInstances(&num_instances); + + if (!num_instances) { + std::cerr << "No instances found\n"; + return {}; + } + + std::vector handles(num_instances); + cpaDcGetInstances(num_instances, handles.data()); + + for(auto& handle: handles) { + instances.emplace_back(std::make_shared(handle)); + } + + return instances; +} + +} diff --git a/deps/zlib/contrib/qat/qatzpp/qat_instance.hpp b/deps/zlib/contrib/qat/qatzpp/qat_instance.hpp new file mode 100644 index 00000000000000..1a2b4afcab10f1 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat_instance.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_QAT_INSTANCE_HPP +#define QATZPP_QAT_INSTANCE_HPP + +#include + +#include +#include +#include + +namespace qat +{ + +class Instance +{ +public: + Instance(CpaInstanceHandle); + ~Instance(); + + CpaInstanceHandle GetHandle() { return instance_; } + CpaDcInstanceCapabilities GetCapabilities(); + CpaInstanceInfo2 GetInfo(); + + int Start(void); + static std::vector> Create(); +private: + + CpaInstanceHandle instance_; + uint16_t num_intermediate_buffer_lists_; + CpaBufferList **intermediate_buffer_array_; + bool started_; + + std::mutex mutex_; +}; + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/qat_task.cpp b/deps/zlib/contrib/qat/qatzpp/qat_task.cpp new file mode 100644 index 00000000000000..a53ea94ac95a16 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat_task.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include +#include + +#include "qat_task.hpp" + +namespace qat +{ + +QATTask::QATTask(std::shared_ptr &qat_instance, + std::unique_ptr &&buffers, + std::unique_ptr &&dc_results): + qat_instance_(qat_instance), + io_buffers_(std::move(buffers)), + dc_results_(std::move(dc_results)), + completed_(false) +{ +} + +void QATTask::WaitCompletion() +{ + if (completed_) { + return; + } + + while (!completed_) { + icp_sal_DcPollInstance(qat_instance_->GetHandle(), 0); + } +} + +IOBuffers *QATTask::GetBuffers() +{ + return io_buffers_.get(); +} + +CpaDcRqResults *QATTask::GetResults() +{ + return dc_results_.get(); +} + +void dc_callback(void *callback_tag, CpaStatus status) +{ + if (!callback_tag) { + return; + } + // Ugly and dangerous + QATTask* task = static_cast(callback_tag); + task->completed_ = true; +} + +} \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/qat_task.hpp b/deps/zlib/contrib/qat/qatzpp/qat_task.hpp new file mode 100644 index 00000000000000..3950502f50d7e7 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/qat_task.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_WORK_HPP +#define QATZPP_WORK_HPP + +#include + +#include + +#include "io_buffers.h" + +namespace qat +{ + +class QATTask +{ +public: + explicit QATTask(std::shared_ptr &qat_instance, + std::unique_ptr &&, + std::unique_ptr &&dc_results); + + QATTask(QATTask &&) = delete; + QATTask& operator=(QATTask &&) = delete; + + QATTask(const QATTask &) = delete; + QATTask &operator=(const QATTask &) = delete; + + void WaitCompletion(); + + IOBuffers *GetBuffers(); + CpaDcRqResults *GetResults(); + +private: + bool completed_; + + std::shared_ptr qat_instance_; + + std::unique_ptr dc_results_; + std::unique_ptr io_buffers_; + + friend void dc_callback(void *, CpaStatus); +}; + +void dc_callback(void*, CpaStatus); + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/contrib/qat/qatzpp/session.cpp b/deps/zlib/contrib/qat/qatzpp/session.cpp new file mode 100644 index 00000000000000..b4cefb31e85008 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/session.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include +#include + +#include "memory.hpp" +#include "session.hpp" + +namespace qat +{ + +constexpr CpaDcHuffType kHuffType = CPA_DC_HT_FULL_DYNAMIC; + +DeflateSession::DeflateSession( + std::shared_ptr &qat_instance, + CpaDcCompLvl comp_level, CpaDcChecksum checksum, + uint32_t numa_node): + qat_instance_(qat_instance) +{ + uint32_t session_size = 0; + uint32_t ctx_size = 0; + + CpaDcSessionSetupData sd{}; + sd.compLevel = comp_level; + sd.compType = CPA_DC_DEFLATE; + sd.huffType = kHuffType; + sd.autoSelectBestHuffmanTree = CPA_DC_ASB_UNCOMP_STATIC_DYNAMIC_WITH_STORED_HDRS; + sd.sessDirection = CPA_DC_DIR_COMBINED; + sd.sessState = CPA_DC_STATELESS; + sd.checksum = checksum; + + cpaDcGetSessionSize(qat_instance_->GetHandle(), &sd, &session_size, &ctx_size); + session_ = AllocBlockArray(session_size, numa_node); + + cpaDcInitSession( + qat_instance_->GetHandle(), + session_, + &sd, + nullptr, // No context for stateless operations + &dc_callback + ); + +} + +DeflateSession::~DeflateSession() +{ + if (session_) { + cpaDcRemoveSession(qat_instance_->GetHandle(), session_); + Free(session_); + } + + session_ = nullptr; +} + +std::unique_ptr DeflateSession::Deflate( + std::unique_ptr &&buffers, + bool flush_final) +{ + CpaDcOpData op_data{}; + op_data.flushFlag = (flush_final) ? + CPA_DC_FLUSH_FINAL : CPA_DC_FLUSH_FULL; + op_data.compressAndVerify = CPA_TRUE; + op_data.inputSkipData.skipMode = CPA_DC_SKIP_DISABLED; + op_data.outputSkipData.skipMode = CPA_DC_SKIP_DISABLED; + + auto task = std::make_unique( + qat_instance_, std::move(buffers), + std::make_unique() + ); + + cpaDcCompressData2( + qat_instance_->GetHandle(), + session_, + &task->GetBuffers()->GetSrc()->list, + &task->GetBuffers()->GetDst()->list, + &op_data, + task->GetResults(), + static_cast(task.get()) + ); + + return std::move(task); +} + +std::unique_ptr DeflateSession::Inflate(std::unique_ptr &&buffers) +{ + CpaDcOpData op_data = {}; + op_data.flushFlag = CPA_DC_FLUSH_FINAL; + op_data.compressAndVerify = CPA_TRUE; + op_data.inputSkipData.skipMode = CPA_DC_SKIP_DISABLED; + op_data.outputSkipData.skipMode = CPA_DC_SKIP_DISABLED; + + auto task = std::make_unique( + qat_instance_, std::move(buffers), + std::make_unique() + ); + + cpaDcDecompressData2( + qat_instance_->GetHandle(), + session_, + &task->GetBuffers()->GetSrc()->list, + &task->GetBuffers()->GetDst()->list, + &op_data, + task->GetResults(), + static_cast(task.get()) + ); + + return std::move(task); +} + +uint32_t DeflateSession::GetDeflateBound(uint32_t input_size) +{ + uint32_t output_size = 0; + + cpaDcDeflateCompressBound( + qat_instance_->GetHandle(), + kHuffType, + input_size, &output_size + ); + + return output_size; +} + +} diff --git a/deps/zlib/contrib/qat/qatzpp/session.hpp b/deps/zlib/contrib/qat/qatzpp/session.hpp new file mode 100644 index 00000000000000..c8af47c27c2231 --- /dev/null +++ b/deps/zlib/contrib/qat/qatzpp/session.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2024 Intel Corporation. All rights reserved. + * Authors: + * Gustavo A Espinoza + * + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef QATZPP_SESSION_HPP +#define QATZPP_SESSION_HPP + +#include +#include + +#include + +#include "io_buffers.h" +#include "qat_task.hpp" + +namespace qat +{ + +class DeflateSession +{ +public: + DeflateSession( + std::shared_ptr &, CpaDcCompLvl, + CpaDcChecksum, uint32_t numa_node); + ~DeflateSession(); + + std::unique_ptr Deflate(std::unique_ptr &&buffers, bool flush_final); + std::unique_ptr Inflate(std::unique_ptr &&buffers); + + uint32_t GetDeflateBound(uint32_t input_size); + + std::shared_ptr getInstance() { return qat_instance_; } + +private: + std::shared_ptr qat_instance_; + CpaDcSessionHandle session_; +}; + +} + +#endif \ No newline at end of file diff --git a/deps/zlib/crc32.c b/deps/zlib/crc32.c index 4177e920a479df..204aa1ad0c445a 100644 --- a/deps/zlib/crc32.c +++ b/deps/zlib/crc32.c @@ -1168,6 +1168,11 @@ ZLIB_INTERNAL void crc_reset(deflate_state *const s) ZLIB_INTERNAL void crc_finalize(deflate_state *const s) { +#ifdef QAT_COMPRESSION_ENABLED + if (s->qat_s) { + return; + } +#endif #ifdef CRC32_SIMD_SSE42_PCLMUL if (x86_cpu_enable_simd) s->strm->adler = crc_fold_512to32(s); diff --git a/deps/zlib/deflate.c b/deps/zlib/deflate.c index b9a312030464c7..8a5281c2b6cd8d 100644 --- a/deps/zlib/deflate.c +++ b/deps/zlib/deflate.c @@ -57,6 +57,10 @@ #include "slide_hash_simd.h" #endif +#if defined(QAT_COMPRESSION_ENABLED) +#include "contrib/qat/deflate_qat.h" +#endif + #include "contrib/optimizations/insert_string.h" #ifdef FASTEST @@ -564,6 +568,13 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, s->strategy = strategy; s->method = (Byte)method; +#if defined(QAT_COMPRESSION_ENABLED) + s->qat_s = NULL; + if (s->level && qat_deflate_init() == Z_OK) { + s->qat_s = qat_deflate_state_init(s->level, s->wrap); + } +#endif + return deflateReset(strm); } @@ -962,6 +973,12 @@ local void flush_pending(z_streamp strm) { unsigned len; deflate_state *s = strm->state; +#if defined(QAT_COMPRESSION_ENABLED) + if (s->qat_s) { + qat_flush_pending(s); + } +#endif + _tr_flush_bits(s); len = s->pending; if (len > strm->avail_out) len = strm->avail_out; @@ -1315,6 +1332,12 @@ int ZEXPORT deflateEnd(z_streamp strm) { TRY_FREE(strm, strm->state->prev); TRY_FREE(strm, strm->state->window); +#if defined(QAT_COMPRESSION_ENABLED) + if (strm->state->qat_s) { + qat_deflate_state_free(strm->state); + } +#endif + ZFREE(strm, strm->state); strm->state = Z_NULL; @@ -1389,6 +1412,14 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { ds->d_desc.dyn_tree = ds->dyn_dtree; ds->bl_desc.dyn_tree = ds->bl_tree; +#if defined(QAT_COMPRESSION_ENABLED) + if(ss->qat_s) { + ds->qat_s = qat_deflate_copy(ss); + if (!ds->qat_s) + return Z_MEM_ERROR; + } +#endif + return Z_OK; #endif /* MAXSEG_64K */ } @@ -1880,6 +1911,24 @@ local block_state deflate_fast(deflate_state *s, int flush) { IPos hash_head; /* head of the hash chain */ int bflush; /* set if current block must be flushed */ +#if defined(QAT_COMPRESSION_ENABLED) + if (s->qat_s) { + qat_block_state qat_block = qat_deflate_step(s, flush); + switch (qat_block) { + case qat_block_need_more: + return need_more; + case qat_block_done: + return block_done; + case qat_block_finish_started: + return finish_started; + case qat_block_finish_done: + return finish_done; + case qat_failure: + break; + } + } +#endif + for (;;) { /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes @@ -1982,6 +2031,24 @@ local block_state deflate_slow(deflate_state *s, int flush) { IPos hash_head; /* head of hash chain */ int bflush; /* set if current block must be flushed */ +#if defined(QAT_COMPRESSION_ENABLED) + if (s->qat_s) { + qat_block_state qat_block = qat_deflate_step(s, flush); + switch (qat_block) { + case qat_block_need_more: + return need_more; + case qat_block_done: + return block_done; + case qat_block_finish_started: + return finish_started; + case qat_block_finish_done: + return finish_done; + case qat_failure: + break; + } + } +#endif + /* Process the input block. */ for (;;) { /* Make sure that we always have enough lookahead, except diff --git a/deps/zlib/deflate.h b/deps/zlib/deflate.h index eb7f0724015cc7..099d35943192bf 100644 --- a/deps/zlib/deflate.h +++ b/deps/zlib/deflate.h @@ -282,6 +282,13 @@ typedef struct internal_state { * hash is enabled. */ +#if defined(QAT_COMPRESSION_ENABLED) + /* Pointer to a struct that contains the current state of the QAT + * stream. + */ + struct qat_deflate *qat_s; +#endif + } FAR deflate_state; /* Output a byte on the stream.