diff --git a/CMakeLists.txt b/CMakeLists.txt index bd742ba3..9bd52ec1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.8) # generate CTest input files enable_testing() @@ -21,8 +20,7 @@ enable_testing() # where to find cmake modules set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules") -set(THIRDPARTY_PREFIX ${CMAKE_SOURCE_DIR}/thirdparty/installed) -set(CMAKE_PREFIX_PATH ${THIRDPARTY_PREFIX}) +set(CMAKE_PREFIX_PATH ${_PREFIX}) # find boost headers and libs set(Boost_DEBUG TRUE) @@ -49,16 +47,22 @@ set_target_properties(thriftstatic PROPERTIES IMPORTED_LOCATION ${THRIFT_STATIC_ ## Snappy find_package(Snappy REQUIRED) include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR}) -add_library(snappystatic STATIC IMPORTED) -set_target_properties(snappystatic PROPERTIES IMPORTED_LOCATION ${SNAPPY_STATIC_LIB}) ## LZ4 find_package(Lz4 REQUIRED) include_directories(SYSTEM ${LZ4_INCLUDE_DIR}) -add_library(lz4static STATIC IMPORTED) -set_target_properties(lz4static PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB}) -SET(CMAKE_CXX_FLAGS "-msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas") +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) +CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") +elseif(COMPILER_SUPPORTS_CXX11) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -Wall -Wno-unused-value -Wno-unused-variable -Wno-sign-compare -Wno-unknown-pragmas") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb") # Thrift requires these definitions for some types that we use diff --git a/cmake_modules/FindLz4.cmake b/cmake_modules/FindLz4.cmake index 4060cdbd..9daaa594 100644 --- a/cmake_modules/FindLz4.cmake +++ b/cmake_modules/FindLz4.cmake @@ -1,67 +1,25 @@ -# Copyright 2012 Cloudera Inc. +# Find the Lz4 libraries # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# - Find LZ4 (lz4.h, liblz4.a, liblz4.so, and liblz4.so.1) -# This module defines -# LZ4_INCLUDE_DIR, directory containing headers -# LZ4_LIBS, directory containing lz4 libraries -# LZ4_STATIC_LIB, path to liblz4.a -# LZ4_FOUND, whether lz4 has been found +# The following are set after configuration is done: +# LZ4_FOUND +# LZ4_INCLUDE_DIR +# LZ4_LIBRARIES -set(LZ4_SEARCH_HEADER_PATHS - ${THIRDPARTY_PREFIX}/include -) -set(LZ4_SEARCH_LIB_PATH - ${THIRDPARTY_PREFIX}/lib +find_path(LZ4_INCLUDE_DIR NAMES lz4.h PATHS + /usr/local/include + /usr/include ) -find_path(LZ4_INCLUDE_DIR lz4.h PATHS - ${LZ4_SEARCH_HEADER_PATHS} - # make sure we don't accidentally pick up a different version - NO_DEFAULT_PATH +find_library(LZ4_LIBRARIES NAMES lz4 PATHS + /usr/local/lib + /usr/lib ) - -find_library(LZ4_LIB_PATH NAMES liblz4.a PATHS ${LZ4_SEARCH_LIB_PATH} NO_DEFAULT_PATH) - -if (LZ4_INCLUDE_DIR AND LZ4_LIB_PATH) - set(LZ4_FOUND TRUE) - set(LZ4_LIBS ${LZ4_SEARCH_LIB_PATH}) - set(LZ4_STATIC_LIB ${LZ4_SEARCH_LIB_PATH}/liblz4.a) -else () - set(LZ4_FOUND FALSE) -endif () +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LZ4 DEFAULT_MSG LZ4_INCLUDE_DIR LZ4_LIBRARIES) if (LZ4_FOUND) - if (NOT Lz4_FIND_QUIETLY) - message(STATUS "Found the Lz4 library: ${LZ4_LIB_PATH}") - endif () -else () - if (NOT Lz4_FIND_QUIETLY) - set(LZ4_ERR_MSG "Could not find the Lz4 library. Looked for headers") - set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${LZ4_SEARCH_HEADER_PATHS}, and for libs") - set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${LZ4_SEARCH_LIB_PATH}") - if (Lz4_FIND_REQUIRED) - message(FATAL_ERROR "${LZ4_ERR_MSG}") - else (Lz4_FIND_REQUIRED) - message(STATUS "${LZ4_ERR_MSG}") - endif (Lz4_FIND_REQUIRED) - endif () -endif () - -mark_as_advanced( - LZ4_INCLUDE_DIR - LZ4_LIBS - LZ4_STATIC_LIB -) + message(STATUS "Found Lz4 (include: ${LZ4_INCLUDE_DIR}, library: ${LZ4_LIBRARIES})") + mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARIES) +endif() diff --git a/cmake_modules/FindSnappy.cmake b/cmake_modules/FindSnappy.cmake index 3d1ba14c..40b13e92 100644 --- a/cmake_modules/FindSnappy.cmake +++ b/cmake_modules/FindSnappy.cmake @@ -1,67 +1,21 @@ -# Copyright 2012 Cloudera Inc. +# Find the Snappy libraries # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# - Find SNAPPY (snappy.h, libsnappy.a, libsnappy.so, and libsnappy.so.1) -# This module defines -# SNAPPY_INCLUDE_DIR, directory containing headers -# SNAPPY_LIBS, directory containing snappy libraries -# SNAPPY_STATIC_LIB, path to libsnappy.a -# SNAPPY_FOUND, whether snappy has been found - -set(SNAPPY_SEARCH_HEADER_PATHS - ${THIRDPARTY_PREFIX}/include -) - -set(SNAPPY_SEARCH_LIB_PATH - ${THIRDPARTY_PREFIX}/lib -) - -find_path(SNAPPY_INCLUDE_DIR snappy.h PATHS - ${SNAPPY_SEARCH_HEADER_PATHS} - # make sure we don't accidentally pick up a different version - NO_DEFAULT_PATH -) +# The following are set after configuration is done: +# SNAPPY_FOUND +# Snappy_INCLUDE_DIR +# Snappy_LIBRARIES -find_library(SNAPPY_LIB_PATH NAMES snappy PATHS ${SNAPPY_SEARCH_LIB_PATH} NO_DEFAULT_PATH) +find_path(Snappy_INCLUDE_DIR NAMES snappy.h + PATHS /usr/local/include /usr/include) -if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIB_PATH) - set(SNAPPY_FOUND TRUE) - set(SNAPPY_LIBS ${SNAPPY_SEARCH_LIB_PATH}) - set(SNAPPY_STATIC_LIB ${SNAPPY_SEARCH_LIB_PATH}/libsnappy.a) -else () - set(SNAPPY_FOUND FALSE) -endif () +find_library(Snappy_LIBRARIES NAMES snappy + PATHS /usr/local/lib /usr/lib) -if (SNAPPY_FOUND) - if (NOT Snappy_FIND_QUIETLY) - message(STATUS "Found the Snappy library: ${SNAPPY_LIB_PATH}") - endif () -else () - if (NOT Snappy_FIND_QUIETLY) - set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked for headers") - set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${SNAPPY_SEARCH_HEADER_PATHS}, and for libs") - set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${SNAPPY_SEARCH_LIB_PATH}") - if (Snappy_FIND_REQUIRED) - message(FATAL_ERROR "${SNAPPY_ERR_MSG}") - else (Snappy_FIND_REQUIRED) - message(STATUS "${SNAPPY_ERR_MSG}") - endif (Snappy_FIND_REQUIRED) - endif () -endif () +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Snappy DEFAULT_MSG Snappy_INCLUDE_DIR Snappy_LIBRARIES) -mark_as_advanced( - SNAPPY_INCLUDE_DIR - SNAPPY_LIBS - SNAPPY_STATIC_LIB -) +if(SNAPPY_FOUND) + message(STATUS "Found Snappy (include: ${Snappy_INCLUDE_DIR}, library: ${Snappy_LIBRARIES})") + mark_as_advanced(Snappy_INCLUDE_DIR Snappy_LIBRARIES) +endif() diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 1f598560..4bb93aa0 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -21,11 +21,10 @@ SET(LINK_LIBS Parquet ParquetCompression Example - rt ThriftParquet thriftstatic - lz4static - snappystatic) + ${LZ4_LIBRARIES} + ${Snappy_LIBRARIES}) add_executable(compute_stats compute_stats.cc) target_link_libraries(compute_stats ${LINK_LIBS}) diff --git a/example/parquet_reader.cc b/example/parquet_reader.cc index c02ffb0b..0c87396c 100644 --- a/example/parquet_reader.cc +++ b/example/parquet_reader.cc @@ -54,7 +54,7 @@ int main(int argc, char** argv) { void *column_ptr = read_parquet(argv[1]); // an example to use the returned column_ptr - // printf("%-"COL_WIDTH"d\n",((int32_t *)(((int32_t **)column_ptr)[0]))[0]); + // printf("%-" COL_WIDTH "d\n",((int32_t *)(((int32_t **)column_ptr)[0]))[0]); return 0; } @@ -215,7 +215,7 @@ void* read_parquet(char* filename) { char *str = (char*)malloc(50); assert(str); strcpy(str, metadata.schema[j+1].name.c_str()); - printf("%-"COL_WIDTH"s", str); + printf("%-" COL_WIDTH "s", str); free(str); } @@ -225,22 +225,22 @@ void* read_parquet(char* filename) { for (j = 0;j < row_group.columns.size(); ++j) switch(type_array[j]) { case Type::BOOLEAN: - printf("%-"COL_WIDTH"s","BOOLEAN"); + printf("%-" COL_WIDTH "s","BOOLEAN"); break; case Type::INT32: - printf("%-"COL_WIDTH"s","INT32"); + printf("%-" COL_WIDTH "s","INT32"); break; case Type::INT64: - printf("%-"COL_WIDTH"s","INT64"); + printf("%-" COL_WIDTH "s","INT64"); break; case Type::FLOAT: - printf("%-"COL_WIDTH"s","FLOAT"); + printf("%-" COL_WIDTH "s","FLOAT"); break; case Type::DOUBLE: - printf("%-"COL_WIDTH"s","DOUBLE"); + printf("%-" COL_WIDTH "s","DOUBLE"); break; case Type::BYTE_ARRAY: - printf("%-"COL_WIDTH"s","BYTE_ARRAY"); + printf("%-" COL_WIDTH "s","BYTE_ARRAY"); break; default: continue; @@ -255,26 +255,26 @@ void* read_parquet(char* filename) { for (j = 0; j < row_group.columns.size(); ++j) { switch(type_array[j]) { case Type::BOOLEAN: - printf("%-"COL_WIDTH"d",((bool*)(((bool**)column_ptr)[j]))[k]); + printf("%-" COL_WIDTH "d",((bool*)(((bool**)column_ptr)[j]))[k]); break; case Type::INT32: - printf("%-"COL_WIDTH"d",((int32_t *)(((int32_t **)column_ptr)[j]))[k]); + printf("%-" COL_WIDTH "d",((int32_t *)(((int32_t **)column_ptr)[j]))[k]); break; case Type::INT64: - printf("%-"COL_WIDTH"ld",((int64_t *)(((int64_t **)column_ptr)[j]))[k]); + printf("%-" COL_WIDTH "ld",((int64_t *)(((int64_t **)column_ptr)[j]))[k]); break; case Type::FLOAT: - printf("%-"COL_WIDTH"f",((float*)(((float**)column_ptr)[j]))[k]); + printf("%-" COL_WIDTH "f",((float*)(((float**)column_ptr)[j]))[k]); break; case Type::DOUBLE: - printf("%-"COL_WIDTH"lf",((double*)(((double**)column_ptr)[j]))[k]); + printf("%-" COL_WIDTH "lf",((double*)(((double**)column_ptr)[j]))[k]); break; case Type::BYTE_ARRAY: result = ByteArrayToString( ((ByteArray*)(((ByteArray**)column_ptr)[j]))[k] ); str1 = (char*)malloc(result.size()); assert(str1); strcpy(str1, result.c_str()); - printf("%-"COL_WIDTH"s", str1); + printf("%-" COL_WIDTH "s", str1); free(str1); break; default: diff --git a/generated/gen-cpp/parquet_constants.cpp b/generated/gen-cpp/parquet_constants.cpp index caa5af69..15c40ed1 100644 --- a/generated/gen-cpp/parquet_constants.cpp +++ b/generated/gen-cpp/parquet_constants.cpp @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.9.0) + * Autogenerated by Thrift Compiler (0.9.2) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated diff --git a/generated/gen-cpp/parquet_constants.h b/generated/gen-cpp/parquet_constants.h index 71d6f58a..f717f1e2 100644 --- a/generated/gen-cpp/parquet_constants.h +++ b/generated/gen-cpp/parquet_constants.h @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.9.0) + * Autogenerated by Thrift Compiler (0.9.2) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated diff --git a/generated/gen-cpp/parquet_types.cpp b/generated/gen-cpp/parquet_types.cpp index 06d388cf..37ca83f0 100644 --- a/generated/gen-cpp/parquet_types.cpp +++ b/generated/gen-cpp/parquet_types.cpp @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.9.0) + * Autogenerated by Thrift Compiler (0.9.2) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -7,6 +7,9 @@ #include "parquet_types.h" #include +#include + +#include namespace parquet { @@ -38,7 +41,21 @@ int _kConvertedTypeValues[] = { ConvertedType::MAP_KEY_VALUE, ConvertedType::LIST, ConvertedType::ENUM, - ConvertedType::DECIMAL + ConvertedType::DECIMAL, + ConvertedType::DATE, + ConvertedType::TIME_MILLIS, + ConvertedType::TIMESTAMP_MILLIS, + ConvertedType::UINT_8, + ConvertedType::UINT_16, + ConvertedType::UINT_32, + ConvertedType::UINT_64, + ConvertedType::INT_8, + ConvertedType::INT_16, + ConvertedType::INT_32, + ConvertedType::INT_64, + ConvertedType::JSON, + ConvertedType::BSON, + ConvertedType::INTERVAL }; const char* _kConvertedTypeNames[] = { "UTF8", @@ -46,9 +63,23 @@ const char* _kConvertedTypeNames[] = { "MAP_KEY_VALUE", "LIST", "ENUM", - "DECIMAL" + "DECIMAL", + "DATE", + "TIME_MILLIS", + "TIMESTAMP_MILLIS", + "UINT_8", + "UINT_16", + "UINT_32", + "UINT_64", + "INT_8", + "INT_16", + "INT_32", + "INT_64", + "JSON", + "BSON", + "INTERVAL" }; -const std::map _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(6, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map _ConvertedType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(20, _kConvertedTypeValues, _kConvertedTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kFieldRepetitionTypeValues[] = { FieldRepetitionType::REQUIRED, @@ -112,6 +143,31 @@ const char* _kPageTypeNames[] = { }; const std::map _PageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); + +Statistics::~Statistics() throw() { +} + + +void Statistics::__set_max(const std::string& val) { + this->max = val; +__isset.max = true; +} + +void Statistics::__set_min(const std::string& val) { + this->min = val; +__isset.min = true; +} + +void Statistics::__set_null_count(const int64_t val) { + this->null_count = val; +__isset.null_count = true; +} + +void Statistics::__set_distinct_count(const int64_t val) { + this->distinct_count = val; +__isset.distinct_count = true; +} + const char* Statistics::ascii_fingerprint = "CE004821871820DD79A8FD98BB101F6D"; const uint8_t Statistics::binary_fingerprint[16] = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D}; @@ -181,35 +237,32 @@ uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("Statistics"); if (this->__isset.max) { - ++fcnt; xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); xfer += oprot->writeBinary(this->max); xfer += oprot->writeFieldEnd(); } if (this->__isset.min) { - ++fcnt; xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); xfer += oprot->writeBinary(this->min); xfer += oprot->writeFieldEnd(); } if (this->__isset.null_count) { - ++fcnt; xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); xfer += oprot->writeI64(this->null_count); xfer += oprot->writeFieldEnd(); } if (this->__isset.distinct_count) { - ++fcnt; xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); xfer += oprot->writeI64(this->distinct_count); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -222,8 +275,83 @@ void swap(Statistics &a, Statistics &b) { swap(a.__isset, b.__isset); } -const char* SchemaElement::ascii_fingerprint = "388A784401753800444CFEAC8BC1B1A1"; -const uint8_t SchemaElement::binary_fingerprint[16] = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1}; +Statistics::Statistics(const Statistics& other0) { + max = other0.max; + min = other0.min; + null_count = other0.null_count; + distinct_count = other0.distinct_count; + __isset = other0.__isset; +} +Statistics& Statistics::operator=(const Statistics& other1) { + max = other1.max; + min = other1.min; + null_count = other1.null_count; + distinct_count = other1.distinct_count; + __isset = other1.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const Statistics& obj) { + using apache::thrift::to_string; + out << "Statistics("; + out << "max="; (obj.__isset.max ? (out << to_string(obj.max)) : (out << "")); + out << ", " << "min="; (obj.__isset.min ? (out << to_string(obj.min)) : (out << "")); + out << ", " << "null_count="; (obj.__isset.null_count ? (out << to_string(obj.null_count)) : (out << "")); + out << ", " << "distinct_count="; (obj.__isset.distinct_count ? (out << to_string(obj.distinct_count)) : (out << "")); + out << ")"; + return out; +} + + +SchemaElement::~SchemaElement() throw() { +} + + +void SchemaElement::__set_type(const Type::type val) { + this->type = val; +__isset.type = true; +} + +void SchemaElement::__set_type_length(const int32_t val) { + this->type_length = val; +__isset.type_length = true; +} + +void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { + this->repetition_type = val; +__isset.repetition_type = true; +} + +void SchemaElement::__set_name(const std::string& val) { + this->name = val; +} + +void SchemaElement::__set_num_children(const int32_t val) { + this->num_children = val; +__isset.num_children = true; +} + +void SchemaElement::__set_converted_type(const ConvertedType::type val) { + this->converted_type = val; +__isset.converted_type = true; +} + +void SchemaElement::__set_scale(const int32_t val) { + this->scale = val; +__isset.scale = true; +} + +void SchemaElement::__set_precision(const int32_t val) { + this->precision = val; +__isset.precision = true; +} + +void SchemaElement::__set_field_id(const int32_t val) { + this->field_id = val; +__isset.field_id = true; +} + +const char* SchemaElement::ascii_fingerprint = "22DC89BFD9E48E604F01FB8CFDFB8229"; +const uint8_t SchemaElement::binary_fingerprint[16] = {0x22,0xDC,0x89,0xBF,0xD9,0xE4,0x8E,0x60,0x4F,0x01,0xFB,0x8C,0xFD,0xFB,0x82,0x29}; uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -248,9 +376,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast0; - xfer += iprot->readI32(ecast0); - this->type = (Type::type)ecast0; + int32_t ecast2; + xfer += iprot->readI32(ecast2); + this->type = (Type::type)ecast2; this->__isset.type = true; } else { xfer += iprot->skip(ftype); @@ -266,9 +394,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast1; - xfer += iprot->readI32(ecast1); - this->repetition_type = (FieldRepetitionType::type)ecast1; + int32_t ecast3; + xfer += iprot->readI32(ecast3); + this->repetition_type = (FieldRepetitionType::type)ecast3; this->__isset.repetition_type = true; } else { xfer += iprot->skip(ftype); @@ -292,9 +420,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 6: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast2; - xfer += iprot->readI32(ecast2); - this->converted_type = (ConvertedType::type)ecast2; + int32_t ecast4; + xfer += iprot->readI32(ecast4); + this->converted_type = (ConvertedType::type)ecast4; this->__isset.converted_type = true; } else { xfer += iprot->skip(ftype); @@ -316,6 +444,14 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->field_id); + this->__isset.field_id = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -332,58 +468,56 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("SchemaElement"); if (this->__isset.type) { - ++fcnt; xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32((int32_t)this->type); xfer += oprot->writeFieldEnd(); } if (this->__isset.type_length) { - ++fcnt; xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); xfer += oprot->writeI32(this->type_length); xfer += oprot->writeFieldEnd(); } if (this->__isset.repetition_type) { - ++fcnt; xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); xfer += oprot->writeI32((int32_t)this->repetition_type); xfer += oprot->writeFieldEnd(); } - ++fcnt; xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); xfer += oprot->writeString(this->name); xfer += oprot->writeFieldEnd(); if (this->__isset.num_children) { - ++fcnt; xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); xfer += oprot->writeI32(this->num_children); xfer += oprot->writeFieldEnd(); } if (this->__isset.converted_type) { - ++fcnt; xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); xfer += oprot->writeI32((int32_t)this->converted_type); xfer += oprot->writeFieldEnd(); } if (this->__isset.scale) { - ++fcnt; xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); xfer += oprot->writeI32(this->scale); xfer += oprot->writeFieldEnd(); } if (this->__isset.precision) { - ++fcnt; xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); xfer += oprot->writeI32(this->precision); xfer += oprot->writeFieldEnd(); } + if (this->__isset.field_id) { + xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); + xfer += oprot->writeI32(this->field_id); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -397,9 +531,77 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.converted_type, b.converted_type); swap(a.scale, b.scale); swap(a.precision, b.precision); + swap(a.field_id, b.field_id); swap(a.__isset, b.__isset); } +SchemaElement::SchemaElement(const SchemaElement& other5) { + type = other5.type; + type_length = other5.type_length; + repetition_type = other5.repetition_type; + name = other5.name; + num_children = other5.num_children; + converted_type = other5.converted_type; + scale = other5.scale; + precision = other5.precision; + field_id = other5.field_id; + __isset = other5.__isset; +} +SchemaElement& SchemaElement::operator=(const SchemaElement& other6) { + type = other6.type; + type_length = other6.type_length; + repetition_type = other6.repetition_type; + name = other6.name; + num_children = other6.num_children; + converted_type = other6.converted_type; + scale = other6.scale; + precision = other6.precision; + field_id = other6.field_id; + __isset = other6.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) { + using apache::thrift::to_string; + out << "SchemaElement("; + out << "type="; (obj.__isset.type ? (out << to_string(obj.type)) : (out << "")); + out << ", " << "type_length="; (obj.__isset.type_length ? (out << to_string(obj.type_length)) : (out << "")); + out << ", " << "repetition_type="; (obj.__isset.repetition_type ? (out << to_string(obj.repetition_type)) : (out << "")); + out << ", " << "name=" << to_string(obj.name); + out << ", " << "num_children="; (obj.__isset.num_children ? (out << to_string(obj.num_children)) : (out << "")); + out << ", " << "converted_type="; (obj.__isset.converted_type ? (out << to_string(obj.converted_type)) : (out << "")); + out << ", " << "scale="; (obj.__isset.scale ? (out << to_string(obj.scale)) : (out << "")); + out << ", " << "precision="; (obj.__isset.precision ? (out << to_string(obj.precision)) : (out << "")); + out << ", " << "field_id="; (obj.__isset.field_id ? (out << to_string(obj.field_id)) : (out << "")); + out << ")"; + return out; +} + + +DataPageHeader::~DataPageHeader() throw() { +} + + +void DataPageHeader::__set_num_values(const int32_t val) { + this->num_values = val; +} + +void DataPageHeader::__set_encoding(const Encoding::type val) { + this->encoding = val; +} + +void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { + this->definition_level_encoding = val; +} + +void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { + this->repetition_level_encoding = val; +} + +void DataPageHeader::__set_statistics(const Statistics& val) { + this->statistics = val; +__isset.statistics = true; +} + const char* DataPageHeader::ascii_fingerprint = "5FC1792B0483E9C984475384165040B1"; const uint8_t DataPageHeader::binary_fingerprint[16] = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1}; @@ -437,9 +639,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast3; - xfer += iprot->readI32(ecast3); - this->encoding = (Encoding::type)ecast3; + int32_t ecast7; + xfer += iprot->readI32(ecast7); + this->encoding = (Encoding::type)ecast7; isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -447,9 +649,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast4; - xfer += iprot->readI32(ecast4); - this->definition_level_encoding = (Encoding::type)ecast4; + int32_t ecast8; + xfer += iprot->readI32(ecast8); + this->definition_level_encoding = (Encoding::type)ecast8; isset_definition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -457,9 +659,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast5; - xfer += iprot->readI32(ecast5); - this->repetition_level_encoding = (Encoding::type)ecast5; + int32_t ecast9; + xfer += iprot->readI32(ecast9); + this->repetition_level_encoding = (Encoding::type)ecast9; isset_repetition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -495,37 +697,33 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("DataPageHeader"); - ++fcnt; xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32(this->num_values); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); xfer += oprot->writeI32((int32_t)this->encoding); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); xfer += oprot->writeI32((int32_t)this->definition_level_encoding); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); xfer += oprot->writeI32((int32_t)this->repetition_level_encoding); xfer += oprot->writeFieldEnd(); if (this->__isset.statistics) { - ++fcnt; xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); xfer += this->statistics.write(oprot); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -539,6 +737,40 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } +DataPageHeader::DataPageHeader(const DataPageHeader& other10) { + num_values = other10.num_values; + encoding = other10.encoding; + definition_level_encoding = other10.definition_level_encoding; + repetition_level_encoding = other10.repetition_level_encoding; + statistics = other10.statistics; + __isset = other10.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other11) { + num_values = other11.num_values; + encoding = other11.encoding; + definition_level_encoding = other11.definition_level_encoding; + repetition_level_encoding = other11.repetition_level_encoding; + statistics = other11.statistics; + __isset = other11.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) { + using apache::thrift::to_string; + out << "DataPageHeader("; + out << "num_values=" << to_string(obj.num_values); + out << ", " << "encoding=" << to_string(obj.encoding); + out << ", " << "definition_level_encoding=" << to_string(obj.definition_level_encoding); + out << ", " << "repetition_level_encoding=" << to_string(obj.repetition_level_encoding); + out << ", " << "statistics="; (obj.__isset.statistics ? (out << to_string(obj.statistics)) : (out << "")); + out << ")"; + return out; +} + + +IndexPageHeader::~IndexPageHeader() throw() { +} + + const char* IndexPageHeader::ascii_fingerprint = "99914B932BD37A50B983C5E7C90AE93B"; const uint8_t IndexPageHeader::binary_fingerprint[16] = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B}; @@ -571,11 +803,12 @@ uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("IndexPageHeader"); xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -585,6 +818,39 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other12) { + (void) other12; +} +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other13) { + (void) other13; + return *this; +} +std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) { + using apache::thrift::to_string; + (void) obj; + out << "IndexPageHeader("; + out << ")"; + return out; +} + + +DictionaryPageHeader::~DictionaryPageHeader() throw() { +} + + +void DictionaryPageHeader::__set_num_values(const int32_t val) { + this->num_values = val; +} + +void DictionaryPageHeader::__set_encoding(const Encoding::type val) { + this->encoding = val; +} + +void DictionaryPageHeader::__set_is_sorted(const bool val) { + this->is_sorted = val; +__isset.is_sorted = true; +} + const char* DictionaryPageHeader::ascii_fingerprint = "B149E4528254D495610C22AE4BD539C5"; const uint8_t DictionaryPageHeader::binary_fingerprint[16] = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5}; @@ -620,9 +886,9 @@ uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast6; - xfer += iprot->readI32(ecast6); - this->encoding = (Encoding::type)ecast6; + int32_t ecast14; + xfer += iprot->readI32(ecast14); + this->encoding = (Encoding::type)ecast14; isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -654,27 +920,25 @@ uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("DictionaryPageHeader"); - ++fcnt; xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32(this->num_values); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); xfer += oprot->writeI32((int32_t)this->encoding); xfer += oprot->writeFieldEnd(); if (this->__isset.is_sorted) { - ++fcnt; xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); xfer += oprot->writeBool(this->is_sorted); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -686,6 +950,68 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other15) { + num_values = other15.num_values; + encoding = other15.encoding; + is_sorted = other15.is_sorted; + __isset = other15.__isset; +} +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other16) { + num_values = other16.num_values; + encoding = other16.encoding; + is_sorted = other16.is_sorted; + __isset = other16.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) { + using apache::thrift::to_string; + out << "DictionaryPageHeader("; + out << "num_values=" << to_string(obj.num_values); + out << ", " << "encoding=" << to_string(obj.encoding); + out << ", " << "is_sorted="; (obj.__isset.is_sorted ? (out << to_string(obj.is_sorted)) : (out << "")); + out << ")"; + return out; +} + + +DataPageHeaderV2::~DataPageHeaderV2() throw() { +} + + +void DataPageHeaderV2::__set_num_values(const int32_t val) { + this->num_values = val; +} + +void DataPageHeaderV2::__set_num_nulls(const int32_t val) { + this->num_nulls = val; +} + +void DataPageHeaderV2::__set_num_rows(const int32_t val) { + this->num_rows = val; +} + +void DataPageHeaderV2::__set_encoding(const Encoding::type val) { + this->encoding = val; +} + +void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { + this->definition_levels_byte_length = val; +} + +void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { + this->repetition_levels_byte_length = val; +} + +void DataPageHeaderV2::__set_is_compressed(const bool val) { + this->is_compressed = val; +__isset.is_compressed = true; +} + +void DataPageHeaderV2::__set_statistics(const Statistics& val) { + this->statistics = val; +__isset.statistics = true; +} + const char* DataPageHeaderV2::ascii_fingerprint = "69FF2F6BD1A443440D5E46ABA5A3A919"; const uint8_t DataPageHeaderV2::binary_fingerprint[16] = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19}; @@ -741,9 +1067,9 @@ uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast7; - xfer += iprot->readI32(ecast7); - this->encoding = (Encoding::type)ecast7; + int32_t ecast17; + xfer += iprot->readI32(ecast17); + this->encoding = (Encoding::type)ecast17; isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -807,53 +1133,46 @@ uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("DataPageHeaderV2"); - ++fcnt; xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32(this->num_values); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); xfer += oprot->writeI32(this->num_nulls); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); xfer += oprot->writeI32(this->num_rows); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); xfer += oprot->writeI32((int32_t)this->encoding); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); xfer += oprot->writeI32(this->definition_levels_byte_length); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); xfer += oprot->writeI32(this->repetition_levels_byte_length); xfer += oprot->writeFieldEnd(); if (this->__isset.is_compressed) { - ++fcnt; xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); xfer += oprot->writeBool(this->is_compressed); xfer += oprot->writeFieldEnd(); } if (this->__isset.statistics) { - ++fcnt; xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); xfer += this->statistics.write(oprot); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -870,6 +1189,86 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other18) { + num_values = other18.num_values; + num_nulls = other18.num_nulls; + num_rows = other18.num_rows; + encoding = other18.encoding; + definition_levels_byte_length = other18.definition_levels_byte_length; + repetition_levels_byte_length = other18.repetition_levels_byte_length; + is_compressed = other18.is_compressed; + statistics = other18.statistics; + __isset = other18.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other19) { + num_values = other19.num_values; + num_nulls = other19.num_nulls; + num_rows = other19.num_rows; + encoding = other19.encoding; + definition_levels_byte_length = other19.definition_levels_byte_length; + repetition_levels_byte_length = other19.repetition_levels_byte_length; + is_compressed = other19.is_compressed; + statistics = other19.statistics; + __isset = other19.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) { + using apache::thrift::to_string; + out << "DataPageHeaderV2("; + out << "num_values=" << to_string(obj.num_values); + out << ", " << "num_nulls=" << to_string(obj.num_nulls); + out << ", " << "num_rows=" << to_string(obj.num_rows); + out << ", " << "encoding=" << to_string(obj.encoding); + out << ", " << "definition_levels_byte_length=" << to_string(obj.definition_levels_byte_length); + out << ", " << "repetition_levels_byte_length=" << to_string(obj.repetition_levels_byte_length); + out << ", " << "is_compressed="; (obj.__isset.is_compressed ? (out << to_string(obj.is_compressed)) : (out << "")); + out << ", " << "statistics="; (obj.__isset.statistics ? (out << to_string(obj.statistics)) : (out << "")); + out << ")"; + return out; +} + + +PageHeader::~PageHeader() throw() { +} + + +void PageHeader::__set_type(const PageType::type val) { + this->type = val; +} + +void PageHeader::__set_uncompressed_page_size(const int32_t val) { + this->uncompressed_page_size = val; +} + +void PageHeader::__set_compressed_page_size(const int32_t val) { + this->compressed_page_size = val; +} + +void PageHeader::__set_crc(const int32_t val) { + this->crc = val; +__isset.crc = true; +} + +void PageHeader::__set_data_page_header(const DataPageHeader& val) { + this->data_page_header = val; +__isset.data_page_header = true; +} + +void PageHeader::__set_index_page_header(const IndexPageHeader& val) { + this->index_page_header = val; +__isset.index_page_header = true; +} + +void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { + this->dictionary_page_header = val; +__isset.dictionary_page_header = true; +} + +void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { + this->data_page_header_v2 = val; +__isset.data_page_header_v2 = true; +} + const char* PageHeader::ascii_fingerprint = "B5BD2BDF3756C883A58B30B9C9F204A0"; const uint8_t PageHeader::binary_fingerprint[16] = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0}; @@ -898,9 +1297,9 @@ uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast8; - xfer += iprot->readI32(ecast8); - this->type = (PageType::type)ecast8; + int32_t ecast20; + xfer += iprot->readI32(ecast20); + this->type = (PageType::type)ecast20; isset_type = true; } else { xfer += iprot->skip(ftype); @@ -982,56 +1381,49 @@ uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("PageHeader"); - ++fcnt; xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32((int32_t)this->type); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); xfer += oprot->writeI32(this->uncompressed_page_size); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); xfer += oprot->writeI32(this->compressed_page_size); xfer += oprot->writeFieldEnd(); if (this->__isset.crc) { - ++fcnt; xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); xfer += oprot->writeI32(this->crc); xfer += oprot->writeFieldEnd(); } if (this->__isset.data_page_header) { - ++fcnt; xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); xfer += this->data_page_header.write(oprot); xfer += oprot->writeFieldEnd(); } if (this->__isset.index_page_header) { - ++fcnt; xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); xfer += this->index_page_header.write(oprot); xfer += oprot->writeFieldEnd(); } if (this->__isset.dictionary_page_header) { - ++fcnt; xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); xfer += this->dictionary_page_header.write(oprot); xfer += oprot->writeFieldEnd(); } if (this->__isset.data_page_header_v2) { - ++fcnt; xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); xfer += this->data_page_header_v2.write(oprot); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -1048,6 +1440,58 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } +PageHeader::PageHeader(const PageHeader& other21) { + type = other21.type; + uncompressed_page_size = other21.uncompressed_page_size; + compressed_page_size = other21.compressed_page_size; + crc = other21.crc; + data_page_header = other21.data_page_header; + index_page_header = other21.index_page_header; + dictionary_page_header = other21.dictionary_page_header; + data_page_header_v2 = other21.data_page_header_v2; + __isset = other21.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other22) { + type = other22.type; + uncompressed_page_size = other22.uncompressed_page_size; + compressed_page_size = other22.compressed_page_size; + crc = other22.crc; + data_page_header = other22.data_page_header; + index_page_header = other22.index_page_header; + dictionary_page_header = other22.dictionary_page_header; + data_page_header_v2 = other22.data_page_header_v2; + __isset = other22.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const PageHeader& obj) { + using apache::thrift::to_string; + out << "PageHeader("; + out << "type=" << to_string(obj.type); + out << ", " << "uncompressed_page_size=" << to_string(obj.uncompressed_page_size); + out << ", " << "compressed_page_size=" << to_string(obj.compressed_page_size); + out << ", " << "crc="; (obj.__isset.crc ? (out << to_string(obj.crc)) : (out << "")); + out << ", " << "data_page_header="; (obj.__isset.data_page_header ? (out << to_string(obj.data_page_header)) : (out << "")); + out << ", " << "index_page_header="; (obj.__isset.index_page_header ? (out << to_string(obj.index_page_header)) : (out << "")); + out << ", " << "dictionary_page_header="; (obj.__isset.dictionary_page_header ? (out << to_string(obj.dictionary_page_header)) : (out << "")); + out << ", " << "data_page_header_v2="; (obj.__isset.data_page_header_v2 ? (out << to_string(obj.data_page_header_v2)) : (out << "")); + out << ")"; + return out; +} + + +KeyValue::~KeyValue() throw() { +} + + +void KeyValue::__set_key(const std::string& val) { + this->key = val; +} + +void KeyValue::__set_value(const std::string& val) { + this->value = val; +__isset.value = true; +} + const char* KeyValue::ascii_fingerprint = "5B708A954C550ECA9C1A49D3C5CAFAB9"; const uint8_t KeyValue::binary_fingerprint[16] = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9}; @@ -1104,22 +1548,21 @@ uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("KeyValue"); - ++fcnt; xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); xfer += oprot->writeString(this->key); xfer += oprot->writeFieldEnd(); if (this->__isset.value) { - ++fcnt; xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); xfer += oprot->writeString(this->value); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -1130,6 +1573,43 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } +KeyValue::KeyValue(const KeyValue& other23) { + key = other23.key; + value = other23.value; + __isset = other23.__isset; +} +KeyValue& KeyValue::operator=(const KeyValue& other24) { + key = other24.key; + value = other24.value; + __isset = other24.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const KeyValue& obj) { + using apache::thrift::to_string; + out << "KeyValue("; + out << "key=" << to_string(obj.key); + out << ", " << "value="; (obj.__isset.value ? (out << to_string(obj.value)) : (out << "")); + out << ")"; + return out; +} + + +SortingColumn::~SortingColumn() throw() { +} + + +void SortingColumn::__set_column_idx(const int32_t val) { + this->column_idx = val; +} + +void SortingColumn::__set_descending(const bool val) { + this->descending = val; +} + +void SortingColumn::__set_nulls_first(const bool val) { + this->nulls_first = val; +} + const char* SortingColumn::ascii_fingerprint = "F079C2D58A783AD90F9BE05D10DBBC6F"; const uint8_t SortingColumn::binary_fingerprint[16] = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F}; @@ -1200,26 +1680,24 @@ uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("SortingColumn"); - ++fcnt; xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32(this->column_idx); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); xfer += oprot->writeBool(this->descending); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); xfer += oprot->writeBool(this->nulls_first); xfer += oprot->writeFieldEnd(); xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -1230,8 +1708,231 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } -const char* ColumnMetaData::ascii_fingerprint = "1AF797732BCB4465C6314FB29B86638D"; -const uint8_t ColumnMetaData::binary_fingerprint[16] = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D}; +SortingColumn::SortingColumn(const SortingColumn& other25) { + column_idx = other25.column_idx; + descending = other25.descending; + nulls_first = other25.nulls_first; +} +SortingColumn& SortingColumn::operator=(const SortingColumn& other26) { + column_idx = other26.column_idx; + descending = other26.descending; + nulls_first = other26.nulls_first; + return *this; +} +std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) { + using apache::thrift::to_string; + out << "SortingColumn("; + out << "column_idx=" << to_string(obj.column_idx); + out << ", " << "descending=" << to_string(obj.descending); + out << ", " << "nulls_first=" << to_string(obj.nulls_first); + out << ")"; + return out; +} + + +PageEncodingStats::~PageEncodingStats() throw() { +} + + +void PageEncodingStats::__set_page_type(const PageType::type val) { + this->page_type = val; +} + +void PageEncodingStats::__set_encoding(const Encoding::type val) { + this->encoding = val; +} + +void PageEncodingStats::__set_count(const int32_t val) { + this->count = val; +} + +const char* PageEncodingStats::ascii_fingerprint = "5F1BEE04836FAA3055D3EE2492AE44FB"; +const uint8_t PageEncodingStats::binary_fingerprint[16] = {0x5F,0x1B,0xEE,0x04,0x83,0x6F,0xAA,0x30,0x55,0xD3,0xEE,0x24,0x92,0xAE,0x44,0xFB}; + +uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_type = false; + bool isset_encoding = false; + bool isset_count = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast27; + xfer += iprot->readI32(ecast27); + this->page_type = (PageType::type)ecast27; + isset_page_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast28; + xfer += iprot->readI32(ecast28); + this->encoding = (Encoding::type)ecast28; + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->count); + isset_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_count) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +uint32_t PageEncodingStats::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + oprot->incrementRecursionDepth(); + xfer += oprot->writeStructBegin("PageEncodingStats"); + + xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32((int32_t)this->page_type); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32((int32_t)this->encoding); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->count); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); + return xfer; +} + +void swap(PageEncodingStats &a, PageEncodingStats &b) { + using ::std::swap; + swap(a.page_type, b.page_type); + swap(a.encoding, b.encoding); + swap(a.count, b.count); +} + +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other29) { + page_type = other29.page_type; + encoding = other29.encoding; + count = other29.count; +} +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other30) { + page_type = other30.page_type; + encoding = other30.encoding; + count = other30.count; + return *this; +} +std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) { + using apache::thrift::to_string; + out << "PageEncodingStats("; + out << "page_type=" << to_string(obj.page_type); + out << ", " << "encoding=" << to_string(obj.encoding); + out << ", " << "count=" << to_string(obj.count); + out << ")"; + return out; +} + + +ColumnMetaData::~ColumnMetaData() throw() { +} + + +void ColumnMetaData::__set_type(const Type::type val) { + this->type = val; +} + +void ColumnMetaData::__set_encodings(const std::vector & val) { + this->encodings = val; +} + +void ColumnMetaData::__set_path_in_schema(const std::vector & val) { + this->path_in_schema = val; +} + +void ColumnMetaData::__set_codec(const CompressionCodec::type val) { + this->codec = val; +} + +void ColumnMetaData::__set_num_values(const int64_t val) { + this->num_values = val; +} + +void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { + this->total_uncompressed_size = val; +} + +void ColumnMetaData::__set_total_compressed_size(const int64_t val) { + this->total_compressed_size = val; +} + +void ColumnMetaData::__set_key_value_metadata(const std::vector & val) { + this->key_value_metadata = val; +__isset.key_value_metadata = true; +} + +void ColumnMetaData::__set_data_page_offset(const int64_t val) { + this->data_page_offset = val; +} + +void ColumnMetaData::__set_index_page_offset(const int64_t val) { + this->index_page_offset = val; +__isset.index_page_offset = true; +} + +void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { + this->dictionary_page_offset = val; +__isset.dictionary_page_offset = true; +} + +void ColumnMetaData::__set_statistics(const Statistics& val) { + this->statistics = val; +__isset.statistics = true; +} + +void ColumnMetaData::__set_encoding_stats(const std::vector & val) { + this->encoding_stats = val; +__isset.encoding_stats = true; +} + +const char* ColumnMetaData::ascii_fingerprint = "AEE7317B6DB9719FE828388D537DBD71"; +const uint8_t ColumnMetaData::binary_fingerprint[16] = {0xAE,0xE7,0x31,0x7B,0x6D,0xB9,0x71,0x9F,0xE8,0x28,0x38,0x8D,0x53,0x7D,0xBD,0x71}; uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -1263,9 +1964,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast9; - xfer += iprot->readI32(ecast9); - this->type = (Type::type)ecast9; + int32_t ecast31; + xfer += iprot->readI32(ecast31); + this->type = (Type::type)ecast31; isset_type = true; } else { xfer += iprot->skip(ftype); @@ -1275,16 +1976,16 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encodings.clear(); - uint32_t _size10; - ::apache::thrift::protocol::TType _etype13; - xfer += iprot->readListBegin(_etype13, _size10); - this->encodings.resize(_size10); - uint32_t _i14; - for (_i14 = 0; _i14 < _size10; ++_i14) + uint32_t _size32; + ::apache::thrift::protocol::TType _etype35; + xfer += iprot->readListBegin(_etype35, _size32); + this->encodings.resize(_size32); + uint32_t _i36; + for (_i36 = 0; _i36 < _size32; ++_i36) { - int32_t ecast15; - xfer += iprot->readI32(ecast15); - this->encodings[_i14] = (Encoding::type)ecast15; + int32_t ecast37; + xfer += iprot->readI32(ecast37); + this->encodings[_i36] = (Encoding::type)ecast37; } xfer += iprot->readListEnd(); } @@ -1297,14 +1998,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size16; - ::apache::thrift::protocol::TType _etype19; - xfer += iprot->readListBegin(_etype19, _size16); - this->path_in_schema.resize(_size16); - uint32_t _i20; - for (_i20 = 0; _i20 < _size16; ++_i20) + uint32_t _size38; + ::apache::thrift::protocol::TType _etype41; + xfer += iprot->readListBegin(_etype41, _size38); + this->path_in_schema.resize(_size38); + uint32_t _i42; + for (_i42 = 0; _i42 < _size38; ++_i42) { - xfer += iprot->readString(this->path_in_schema[_i20]); + xfer += iprot->readString(this->path_in_schema[_i42]); } xfer += iprot->readListEnd(); } @@ -1315,9 +2016,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast21; - xfer += iprot->readI32(ecast21); - this->codec = (CompressionCodec::type)ecast21; + int32_t ecast43; + xfer += iprot->readI32(ecast43); + this->codec = (CompressionCodec::type)ecast43; isset_codec = true; } else { xfer += iprot->skip(ftype); @@ -1351,14 +2052,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size22; - ::apache::thrift::protocol::TType _etype25; - xfer += iprot->readListBegin(_etype25, _size22); - this->key_value_metadata.resize(_size22); - uint32_t _i26; - for (_i26 = 0; _i26 < _size22; ++_i26) + uint32_t _size44; + ::apache::thrift::protocol::TType _etype47; + xfer += iprot->readListBegin(_etype47, _size44); + this->key_value_metadata.resize(_size44); + uint32_t _i48; + for (_i48 = 0; _i48 < _size44; ++_i48) { - xfer += this->key_value_metadata[_i26].read(iprot); + xfer += this->key_value_metadata[_i48].read(iprot); } xfer += iprot->readListEnd(); } @@ -1399,6 +2100,26 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; + case 13: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encoding_stats.clear(); + uint32_t _size49; + ::apache::thrift::protocol::TType _etype52; + xfer += iprot->readListBegin(_etype52, _size49); + this->encoding_stats.resize(_size49); + uint32_t _i53; + for (_i53 = 0; _i53 < _size49; ++_i53) + { + xfer += this->encoding_stats[_i53].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.encoding_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -1429,99 +2150,101 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("ColumnMetaData"); - ++fcnt; xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32((int32_t)this->type); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter27; - for (_iter27 = this->encodings.begin(); _iter27 != this->encodings.end(); ++_iter27) + std::vector ::const_iterator _iter54; + for (_iter54 = this->encodings.begin(); _iter54 != this->encodings.end(); ++_iter54) { - xfer += oprot->writeI32((int32_t)(*_iter27)); + xfer += oprot->writeI32((int32_t)(*_iter54)); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter28; - for (_iter28 = this->path_in_schema.begin(); _iter28 != this->path_in_schema.end(); ++_iter28) + std::vector ::const_iterator _iter55; + for (_iter55 = this->path_in_schema.begin(); _iter55 != this->path_in_schema.end(); ++_iter55) { - xfer += oprot->writeString((*_iter28)); + xfer += oprot->writeString((*_iter55)); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); xfer += oprot->writeI32((int32_t)this->codec); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); xfer += oprot->writeI64(this->num_values); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); xfer += oprot->writeI64(this->total_uncompressed_size); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); xfer += oprot->writeI64(this->total_compressed_size); xfer += oprot->writeFieldEnd(); if (this->__isset.key_value_metadata) { - ++fcnt; xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter29; - for (_iter29 = this->key_value_metadata.begin(); _iter29 != this->key_value_metadata.end(); ++_iter29) + std::vector ::const_iterator _iter56; + for (_iter56 = this->key_value_metadata.begin(); _iter56 != this->key_value_metadata.end(); ++_iter56) { - xfer += (*_iter29).write(oprot); + xfer += (*_iter56).write(oprot); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); } - ++fcnt; xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); xfer += oprot->writeI64(this->data_page_offset); xfer += oprot->writeFieldEnd(); if (this->__isset.index_page_offset) { - ++fcnt; xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); xfer += oprot->writeI64(this->index_page_offset); xfer += oprot->writeFieldEnd(); } if (this->__isset.dictionary_page_offset) { - ++fcnt; xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); xfer += oprot->writeI64(this->dictionary_page_offset); xfer += oprot->writeFieldEnd(); } if (this->__isset.statistics) { - ++fcnt; xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); xfer += this->statistics.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.encoding_stats) { + xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); + std::vector ::const_iterator _iter57; + for (_iter57 = this->encoding_stats.begin(); _iter57 != this->encoding_stats.end(); ++_iter57) + { + xfer += (*_iter57).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -1539,11 +2262,84 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.index_page_offset, b.index_page_offset); swap(a.dictionary_page_offset, b.dictionary_page_offset); swap(a.statistics, b.statistics); + swap(a.encoding_stats, b.encoding_stats); swap(a.__isset, b.__isset); } -const char* ColumnChunk::ascii_fingerprint = "169FC47057EF3D82E2FACDDEC2641AE8"; -const uint8_t ColumnChunk::binary_fingerprint[16] = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8}; +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other58) { + type = other58.type; + encodings = other58.encodings; + path_in_schema = other58.path_in_schema; + codec = other58.codec; + num_values = other58.num_values; + total_uncompressed_size = other58.total_uncompressed_size; + total_compressed_size = other58.total_compressed_size; + key_value_metadata = other58.key_value_metadata; + data_page_offset = other58.data_page_offset; + index_page_offset = other58.index_page_offset; + dictionary_page_offset = other58.dictionary_page_offset; + statistics = other58.statistics; + encoding_stats = other58.encoding_stats; + __isset = other58.__isset; +} +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other59) { + type = other59.type; + encodings = other59.encodings; + path_in_schema = other59.path_in_schema; + codec = other59.codec; + num_values = other59.num_values; + total_uncompressed_size = other59.total_uncompressed_size; + total_compressed_size = other59.total_compressed_size; + key_value_metadata = other59.key_value_metadata; + data_page_offset = other59.data_page_offset; + index_page_offset = other59.index_page_offset; + dictionary_page_offset = other59.dictionary_page_offset; + statistics = other59.statistics; + encoding_stats = other59.encoding_stats; + __isset = other59.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) { + using apache::thrift::to_string; + out << "ColumnMetaData("; + out << "type=" << to_string(obj.type); + out << ", " << "encodings=" << to_string(obj.encodings); + out << ", " << "path_in_schema=" << to_string(obj.path_in_schema); + out << ", " << "codec=" << to_string(obj.codec); + out << ", " << "num_values=" << to_string(obj.num_values); + out << ", " << "total_uncompressed_size=" << to_string(obj.total_uncompressed_size); + out << ", " << "total_compressed_size=" << to_string(obj.total_compressed_size); + out << ", " << "key_value_metadata="; (obj.__isset.key_value_metadata ? (out << to_string(obj.key_value_metadata)) : (out << "")); + out << ", " << "data_page_offset=" << to_string(obj.data_page_offset); + out << ", " << "index_page_offset="; (obj.__isset.index_page_offset ? (out << to_string(obj.index_page_offset)) : (out << "")); + out << ", " << "dictionary_page_offset="; (obj.__isset.dictionary_page_offset ? (out << to_string(obj.dictionary_page_offset)) : (out << "")); + out << ", " << "statistics="; (obj.__isset.statistics ? (out << to_string(obj.statistics)) : (out << "")); + out << ", " << "encoding_stats="; (obj.__isset.encoding_stats ? (out << to_string(obj.encoding_stats)) : (out << "")); + out << ")"; + return out; +} + + +ColumnChunk::~ColumnChunk() throw() { +} + + +void ColumnChunk::__set_file_path(const std::string& val) { + this->file_path = val; +__isset.file_path = true; +} + +void ColumnChunk::__set_file_offset(const int64_t val) { + this->file_offset = val; +} + +void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { + this->meta_data = val; +__isset.meta_data = true; +} + +const char* ColumnChunk::ascii_fingerprint = "F9F2DFFF1A7805AEF05AA10B23AF3108"; +const uint8_t ColumnChunk::binary_fingerprint[16] = {0xF9,0xF2,0xDF,0xFF,0x1A,0x78,0x05,0xAE,0xF0,0x5A,0xA1,0x0B,0x23,0xAF,0x31,0x08}; uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -1606,28 +2402,26 @@ uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("ColumnChunk"); if (this->__isset.file_path) { - ++fcnt; xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); xfer += oprot->writeString(this->file_path); xfer += oprot->writeFieldEnd(); } - ++fcnt; xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); xfer += oprot->writeI64(this->file_offset); xfer += oprot->writeFieldEnd(); if (this->__isset.meta_data) { - ++fcnt; xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); xfer += this->meta_data.write(oprot); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -1639,8 +2433,53 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } -const char* RowGroup::ascii_fingerprint = "DC7968627FA826DDC4C6C9BE773586C9"; -const uint8_t RowGroup::binary_fingerprint[16] = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9}; +ColumnChunk::ColumnChunk(const ColumnChunk& other60) { + file_path = other60.file_path; + file_offset = other60.file_offset; + meta_data = other60.meta_data; + __isset = other60.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other61) { + file_path = other61.file_path; + file_offset = other61.file_offset; + meta_data = other61.meta_data; + __isset = other61.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) { + using apache::thrift::to_string; + out << "ColumnChunk("; + out << "file_path="; (obj.__isset.file_path ? (out << to_string(obj.file_path)) : (out << "")); + out << ", " << "file_offset=" << to_string(obj.file_offset); + out << ", " << "meta_data="; (obj.__isset.meta_data ? (out << to_string(obj.meta_data)) : (out << "")); + out << ")"; + return out; +} + + +RowGroup::~RowGroup() throw() { +} + + +void RowGroup::__set_columns(const std::vector & val) { + this->columns = val; +} + +void RowGroup::__set_total_byte_size(const int64_t val) { + this->total_byte_size = val; +} + +void RowGroup::__set_num_rows(const int64_t val) { + this->num_rows = val; +} + +void RowGroup::__set_sorting_columns(const std::vector & val) { + this->sorting_columns = val; +__isset.sorting_columns = true; +} + +const char* RowGroup::ascii_fingerprint = "9BCFCB6790B06809B46424957993EDA6"; +const uint8_t RowGroup::binary_fingerprint[16] = {0x9B,0xCF,0xCB,0x67,0x90,0xB0,0x68,0x09,0xB4,0x64,0x24,0x95,0x79,0x93,0xED,0xA6}; uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -1669,14 +2508,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->columns.clear(); - uint32_t _size30; - ::apache::thrift::protocol::TType _etype33; - xfer += iprot->readListBegin(_etype33, _size30); - this->columns.resize(_size30); - uint32_t _i34; - for (_i34 = 0; _i34 < _size30; ++_i34) + uint32_t _size62; + ::apache::thrift::protocol::TType _etype65; + xfer += iprot->readListBegin(_etype65, _size62); + this->columns.resize(_size62); + uint32_t _i66; + for (_i66 = 0; _i66 < _size62; ++_i66) { - xfer += this->columns[_i34].read(iprot); + xfer += this->columns[_i66].read(iprot); } xfer += iprot->readListEnd(); } @@ -1705,14 +2544,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->sorting_columns.clear(); - uint32_t _size35; - ::apache::thrift::protocol::TType _etype38; - xfer += iprot->readListBegin(_etype38, _size35); - this->sorting_columns.resize(_size35); - uint32_t _i39; - for (_i39 = 0; _i39 < _size35; ++_i39) + uint32_t _size67; + ::apache::thrift::protocol::TType _etype70; + xfer += iprot->readListBegin(_etype70, _size67); + this->sorting_columns.resize(_size67); + uint32_t _i71; + for (_i71 = 0; _i71 < _size67; ++_i71) { - xfer += this->sorting_columns[_i39].read(iprot); + xfer += this->sorting_columns[_i71].read(iprot); } xfer += iprot->readListEnd(); } @@ -1741,41 +2580,37 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("RowGroup"); - ++fcnt; xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter40; - for (_iter40 = this->columns.begin(); _iter40 != this->columns.end(); ++_iter40) + std::vector ::const_iterator _iter72; + for (_iter72 = this->columns.begin(); _iter72 != this->columns.end(); ++_iter72) { - xfer += (*_iter40).write(oprot); + xfer += (*_iter72).write(oprot); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); xfer += oprot->writeI64(this->total_byte_size); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); xfer += oprot->writeI64(this->num_rows); xfer += oprot->writeFieldEnd(); if (this->__isset.sorting_columns) { - ++fcnt; xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter41; - for (_iter41 = this->sorting_columns.begin(); _iter41 != this->sorting_columns.end(); ++_iter41) + std::vector ::const_iterator _iter73; + for (_iter73 = this->sorting_columns.begin(); _iter73 != this->sorting_columns.end(); ++_iter73) { - xfer += (*_iter41).write(oprot); + xfer += (*_iter73).write(oprot); } xfer += oprot->writeListEnd(); } @@ -1783,6 +2618,7 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -1795,8 +2631,65 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } -const char* FileMetaData::ascii_fingerprint = "44DC7D83A66D54A7B7892A985C4125C9"; -const uint8_t FileMetaData::binary_fingerprint[16] = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9}; +RowGroup::RowGroup(const RowGroup& other74) { + columns = other74.columns; + total_byte_size = other74.total_byte_size; + num_rows = other74.num_rows; + sorting_columns = other74.sorting_columns; + __isset = other74.__isset; +} +RowGroup& RowGroup::operator=(const RowGroup& other75) { + columns = other75.columns; + total_byte_size = other75.total_byte_size; + num_rows = other75.num_rows; + sorting_columns = other75.sorting_columns; + __isset = other75.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const RowGroup& obj) { + using apache::thrift::to_string; + out << "RowGroup("; + out << "columns=" << to_string(obj.columns); + out << ", " << "total_byte_size=" << to_string(obj.total_byte_size); + out << ", " << "num_rows=" << to_string(obj.num_rows); + out << ", " << "sorting_columns="; (obj.__isset.sorting_columns ? (out << to_string(obj.sorting_columns)) : (out << "")); + out << ")"; + return out; +} + + +FileMetaData::~FileMetaData() throw() { +} + + +void FileMetaData::__set_version(const int32_t val) { + this->version = val; +} + +void FileMetaData::__set_schema(const std::vector & val) { + this->schema = val; +} + +void FileMetaData::__set_num_rows(const int64_t val) { + this->num_rows = val; +} + +void FileMetaData::__set_row_groups(const std::vector & val) { + this->row_groups = val; +} + +void FileMetaData::__set_key_value_metadata(const std::vector & val) { + this->key_value_metadata = val; +__isset.key_value_metadata = true; +} + +void FileMetaData::__set_created_by(const std::string& val) { + this->created_by = val; +__isset.created_by = true; +} + +const char* FileMetaData::ascii_fingerprint = "D76119B1A4F2B4F404F095255FBDBD9A"; +const uint8_t FileMetaData::binary_fingerprint[16] = {0xD7,0x61,0x19,0xB1,0xA4,0xF2,0xB4,0xF4,0x04,0xF0,0x95,0x25,0x5F,0xBD,0xBD,0x9A}; uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -1834,14 +2727,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size42; - ::apache::thrift::protocol::TType _etype45; - xfer += iprot->readListBegin(_etype45, _size42); - this->schema.resize(_size42); - uint32_t _i46; - for (_i46 = 0; _i46 < _size42; ++_i46) + uint32_t _size76; + ::apache::thrift::protocol::TType _etype79; + xfer += iprot->readListBegin(_etype79, _size76); + this->schema.resize(_size76); + uint32_t _i80; + for (_i80 = 0; _i80 < _size76; ++_i80) { - xfer += this->schema[_i46].read(iprot); + xfer += this->schema[_i80].read(iprot); } xfer += iprot->readListEnd(); } @@ -1862,14 +2755,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size47; - ::apache::thrift::protocol::TType _etype50; - xfer += iprot->readListBegin(_etype50, _size47); - this->row_groups.resize(_size47); - uint32_t _i51; - for (_i51 = 0; _i51 < _size47; ++_i51) + uint32_t _size81; + ::apache::thrift::protocol::TType _etype84; + xfer += iprot->readListBegin(_etype84, _size81); + this->row_groups.resize(_size81); + uint32_t _i85; + for (_i85 = 0; _i85 < _size81; ++_i85) { - xfer += this->row_groups[_i51].read(iprot); + xfer += this->row_groups[_i85].read(iprot); } xfer += iprot->readListEnd(); } @@ -1882,14 +2775,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size52; - ::apache::thrift::protocol::TType _etype55; - xfer += iprot->readListBegin(_etype55, _size52); - this->key_value_metadata.resize(_size52); - uint32_t _i56; - for (_i56 = 0; _i56 < _size52; ++_i56) + uint32_t _size86; + ::apache::thrift::protocol::TType _etype89; + xfer += iprot->readListBegin(_etype89, _size86); + this->key_value_metadata.resize(_size86); + uint32_t _i90; + for (_i90 = 0; _i90 < _size86; ++_i90) { - xfer += this->key_value_metadata[_i56].read(iprot); + xfer += this->key_value_metadata[_i90].read(iprot); } xfer += iprot->readListEnd(); } @@ -1928,67 +2821,62 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; - uint32_t fcnt = 0; + oprot->incrementRecursionDepth(); xfer += oprot->writeStructBegin("FileMetaData"); - ++fcnt; xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); xfer += oprot->writeI32(this->version); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter57; - for (_iter57 = this->schema.begin(); _iter57 != this->schema.end(); ++_iter57) + std::vector ::const_iterator _iter91; + for (_iter91 = this->schema.begin(); _iter91 != this->schema.end(); ++_iter91) { - xfer += (*_iter57).write(oprot); + xfer += (*_iter91).write(oprot); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); xfer += oprot->writeI64(this->num_rows); xfer += oprot->writeFieldEnd(); - ++fcnt; xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter58; - for (_iter58 = this->row_groups.begin(); _iter58 != this->row_groups.end(); ++_iter58) + std::vector ::const_iterator _iter92; + for (_iter92 = this->row_groups.begin(); _iter92 != this->row_groups.end(); ++_iter92) { - xfer += (*_iter58).write(oprot); + xfer += (*_iter92).write(oprot); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); if (this->__isset.key_value_metadata) { - ++fcnt; xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter59; - for (_iter59 = this->key_value_metadata.begin(); _iter59 != this->key_value_metadata.end(); ++_iter59) + std::vector ::const_iterator _iter93; + for (_iter93 = this->key_value_metadata.begin(); _iter93 != this->key_value_metadata.end(); ++_iter93) { - xfer += (*_iter59).write(oprot); + xfer += (*_iter93).write(oprot); } xfer += oprot->writeListEnd(); } xfer += oprot->writeFieldEnd(); } if (this->__isset.created_by) { - ++fcnt; xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); xfer += oprot->writeString(this->created_by); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); return xfer; } @@ -2003,4 +2891,36 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } +FileMetaData::FileMetaData(const FileMetaData& other94) { + version = other94.version; + schema = other94.schema; + num_rows = other94.num_rows; + row_groups = other94.row_groups; + key_value_metadata = other94.key_value_metadata; + created_by = other94.created_by; + __isset = other94.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other95) { + version = other95.version; + schema = other95.schema; + num_rows = other95.num_rows; + row_groups = other95.row_groups; + key_value_metadata = other95.key_value_metadata; + created_by = other95.created_by; + __isset = other95.__isset; + return *this; +} +std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) { + using apache::thrift::to_string; + out << "FileMetaData("; + out << "version=" << to_string(obj.version); + out << ", " << "schema=" << to_string(obj.schema); + out << ", " << "num_rows=" << to_string(obj.num_rows); + out << ", " << "row_groups=" << to_string(obj.row_groups); + out << ", " << "key_value_metadata="; (obj.__isset.key_value_metadata ? (out << to_string(obj.key_value_metadata)) : (out << "")); + out << ", " << "created_by="; (obj.__isset.created_by ? (out << to_string(obj.created_by)) : (out << "")); + out << ")"; + return out; +} + } // namespace diff --git a/generated/gen-cpp/parquet_types.h b/generated/gen-cpp/parquet_types.h index 4360d02e..2de9e1e2 100644 --- a/generated/gen-cpp/parquet_types.h +++ b/generated/gen-cpp/parquet_types.h @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.9.0) + * Autogenerated by Thrift Compiler (0.9.2) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -7,11 +7,14 @@ #ifndef parquet_TYPES_H #define parquet_TYPES_H +#include + #include #include #include #include +#include namespace parquet { @@ -38,7 +41,21 @@ struct ConvertedType { MAP_KEY_VALUE = 2, LIST = 3, ENUM = 4, - DECIMAL = 5 + DECIMAL = 5, + DATE = 6, + TIME_MILLIS = 7, + TIMESTAMP_MILLIS = 9, + UINT_8 = 11, + UINT_16 = 12, + UINT_32 = 13, + UINT_64 = 14, + INT_8 = 15, + INT_16 = 16, + INT_32 = 17, + INT_64 = 18, + JSON = 19, + BSON = 20, + INTERVAL = 21 }; }; @@ -91,12 +108,40 @@ struct PageType { extern const std::map _PageType_VALUES_TO_NAMES; +class Statistics; + +class SchemaElement; + +class DataPageHeader; + +class IndexPageHeader; + +class DictionaryPageHeader; + +class DataPageHeaderV2; + +class PageHeader; + +class KeyValue; + +class SortingColumn; + +class PageEncodingStats; + +class ColumnMetaData; + +class ColumnChunk; + +class RowGroup; + +class FileMetaData; + typedef struct _Statistics__isset { _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {} - bool max; - bool min; - bool null_count; - bool distinct_count; + bool max :1; + bool min :1; + bool null_count :1; + bool distinct_count :1; } _Statistics__isset; class Statistics { @@ -105,11 +150,12 @@ class Statistics { static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D"; static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D}; + Statistics(const Statistics&); + Statistics& operator=(const Statistics&); Statistics() : max(), min(), null_count(0), distinct_count(0) { } - virtual ~Statistics() throw() {} - + virtual ~Statistics() throw(); std::string max; std::string min; int64_t null_count; @@ -117,25 +163,13 @@ class Statistics { _Statistics__isset __isset; - void __set_max(const std::string& val) { - max = val; - __isset.max = true; - } + void __set_max(const std::string& val); - void __set_min(const std::string& val) { - min = val; - __isset.min = true; - } + void __set_min(const std::string& val); - void __set_null_count(const int64_t val) { - null_count = val; - __isset.null_count = true; - } + void __set_null_count(const int64_t val); - void __set_distinct_count(const int64_t val) { - distinct_count = val; - __isset.distinct_count = true; - } + void __set_distinct_count(const int64_t val); bool operator == (const Statistics & rhs) const { @@ -166,32 +200,35 @@ class Statistics { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const Statistics& obj); }; void swap(Statistics &a, Statistics &b); typedef struct _SchemaElement__isset { - _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {} - bool type; - bool type_length; - bool repetition_type; - bool num_children; - bool converted_type; - bool scale; - bool precision; + _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false), field_id(false) {} + bool type :1; + bool type_length :1; + bool repetition_type :1; + bool num_children :1; + bool converted_type :1; + bool scale :1; + bool precision :1; + bool field_id :1; } _SchemaElement__isset; class SchemaElement { public: - static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1"; - static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1}; + static const char* ascii_fingerprint; // = "22DC89BFD9E48E604F01FB8CFDFB8229"; + static const uint8_t binary_fingerprint[16]; // = {0x22,0xDC,0x89,0xBF,0xD9,0xE4,0x8E,0x60,0x4F,0x01,0xFB,0x8C,0xFD,0xFB,0x82,0x29}; - SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) { + SchemaElement(const SchemaElement&); + SchemaElement& operator=(const SchemaElement&); + SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0), field_id(0) { } - virtual ~SchemaElement() throw() {} - + virtual ~SchemaElement() throw(); Type::type type; int32_t type_length; FieldRepetitionType::type repetition_type; @@ -200,47 +237,27 @@ class SchemaElement { ConvertedType::type converted_type; int32_t scale; int32_t precision; + int32_t field_id; _SchemaElement__isset __isset; - void __set_type(const Type::type val) { - type = val; - __isset.type = true; - } + void __set_type(const Type::type val); - void __set_type_length(const int32_t val) { - type_length = val; - __isset.type_length = true; - } + void __set_type_length(const int32_t val); - void __set_repetition_type(const FieldRepetitionType::type val) { - repetition_type = val; - __isset.repetition_type = true; - } + void __set_repetition_type(const FieldRepetitionType::type val); - void __set_name(const std::string& val) { - name = val; - } + void __set_name(const std::string& val); - void __set_num_children(const int32_t val) { - num_children = val; - __isset.num_children = true; - } + void __set_num_children(const int32_t val); - void __set_converted_type(const ConvertedType::type val) { - converted_type = val; - __isset.converted_type = true; - } + void __set_converted_type(const ConvertedType::type val); - void __set_scale(const int32_t val) { - scale = val; - __isset.scale = true; - } + void __set_scale(const int32_t val); - void __set_precision(const int32_t val) { - precision = val; - __isset.precision = true; - } + void __set_precision(const int32_t val); + + void __set_field_id(const int32_t val); bool operator == (const SchemaElement & rhs) const { @@ -274,6 +291,10 @@ class SchemaElement { return false; else if (__isset.precision && !(precision == rhs.precision)) return false; + if (__isset.field_id != rhs.__isset.field_id) + return false; + else if (__isset.field_id && !(field_id == rhs.field_id)) + return false; return true; } bool operator != (const SchemaElement &rhs) const { @@ -285,13 +306,14 @@ class SchemaElement { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const SchemaElement& obj); }; void swap(SchemaElement &a, SchemaElement &b); typedef struct _DataPageHeader__isset { _DataPageHeader__isset() : statistics(false) {} - bool statistics; + bool statistics :1; } _DataPageHeader__isset; class DataPageHeader { @@ -300,11 +322,12 @@ class DataPageHeader { static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1"; static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1}; + DataPageHeader(const DataPageHeader&); + DataPageHeader& operator=(const DataPageHeader&); DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) { } - virtual ~DataPageHeader() throw() {} - + virtual ~DataPageHeader() throw(); int32_t num_values; Encoding::type encoding; Encoding::type definition_level_encoding; @@ -313,26 +336,15 @@ class DataPageHeader { _DataPageHeader__isset __isset; - void __set_num_values(const int32_t val) { - num_values = val; - } + void __set_num_values(const int32_t val); - void __set_encoding(const Encoding::type val) { - encoding = val; - } + void __set_encoding(const Encoding::type val); - void __set_definition_level_encoding(const Encoding::type val) { - definition_level_encoding = val; - } + void __set_definition_level_encoding(const Encoding::type val); - void __set_repetition_level_encoding(const Encoding::type val) { - repetition_level_encoding = val; - } + void __set_repetition_level_encoding(const Encoding::type val); - void __set_statistics(const Statistics& val) { - statistics = val; - __isset.statistics = true; - } + void __set_statistics(const Statistics& val); bool operator == (const DataPageHeader & rhs) const { @@ -359,6 +371,7 @@ class DataPageHeader { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); }; void swap(DataPageHeader &a, DataPageHeader &b); @@ -370,11 +383,12 @@ class IndexPageHeader { static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B"; static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B}; + IndexPageHeader(const IndexPageHeader&); + IndexPageHeader& operator=(const IndexPageHeader&); IndexPageHeader() { } - virtual ~IndexPageHeader() throw() {} - + virtual ~IndexPageHeader() throw(); bool operator == (const IndexPageHeader & /* rhs */) const { @@ -389,13 +403,14 @@ class IndexPageHeader { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj); }; void swap(IndexPageHeader &a, IndexPageHeader &b); typedef struct _DictionaryPageHeader__isset { _DictionaryPageHeader__isset() : is_sorted(false) {} - bool is_sorted; + bool is_sorted :1; } _DictionaryPageHeader__isset; class DictionaryPageHeader { @@ -404,29 +419,23 @@ class DictionaryPageHeader { static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5"; static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5}; + DictionaryPageHeader(const DictionaryPageHeader&); + DictionaryPageHeader& operator=(const DictionaryPageHeader&); DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) { } - virtual ~DictionaryPageHeader() throw() {} - + virtual ~DictionaryPageHeader() throw(); int32_t num_values; Encoding::type encoding; bool is_sorted; _DictionaryPageHeader__isset __isset; - void __set_num_values(const int32_t val) { - num_values = val; - } + void __set_num_values(const int32_t val); - void __set_encoding(const Encoding::type val) { - encoding = val; - } + void __set_encoding(const Encoding::type val); - void __set_is_sorted(const bool val) { - is_sorted = val; - __isset.is_sorted = true; - } + void __set_is_sorted(const bool val); bool operator == (const DictionaryPageHeader & rhs) const { @@ -449,14 +458,15 @@ class DictionaryPageHeader { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj); }; void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); typedef struct _DataPageHeaderV2__isset { _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} - bool is_compressed; - bool statistics; + bool is_compressed :1; + bool statistics :1; } _DataPageHeaderV2__isset; class DataPageHeaderV2 { @@ -465,11 +475,12 @@ class DataPageHeaderV2 { static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919"; static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19}; + DataPageHeaderV2(const DataPageHeaderV2&); + DataPageHeaderV2& operator=(const DataPageHeaderV2&); DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) { } - virtual ~DataPageHeaderV2() throw() {} - + virtual ~DataPageHeaderV2() throw(); int32_t num_values; int32_t num_nulls; int32_t num_rows; @@ -481,39 +492,21 @@ class DataPageHeaderV2 { _DataPageHeaderV2__isset __isset; - void __set_num_values(const int32_t val) { - num_values = val; - } + void __set_num_values(const int32_t val); - void __set_num_nulls(const int32_t val) { - num_nulls = val; - } + void __set_num_nulls(const int32_t val); - void __set_num_rows(const int32_t val) { - num_rows = val; - } + void __set_num_rows(const int32_t val); - void __set_encoding(const Encoding::type val) { - encoding = val; - } + void __set_encoding(const Encoding::type val); - void __set_definition_levels_byte_length(const int32_t val) { - definition_levels_byte_length = val; - } + void __set_definition_levels_byte_length(const int32_t val); - void __set_repetition_levels_byte_length(const int32_t val) { - repetition_levels_byte_length = val; - } + void __set_repetition_levels_byte_length(const int32_t val); - void __set_is_compressed(const bool val) { - is_compressed = val; - __isset.is_compressed = true; - } + void __set_is_compressed(const bool val); - void __set_statistics(const Statistics& val) { - statistics = val; - __isset.statistics = true; - } + void __set_statistics(const Statistics& val); bool operator == (const DataPageHeaderV2 & rhs) const { @@ -548,17 +541,18 @@ class DataPageHeaderV2 { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); }; void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); typedef struct _PageHeader__isset { _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {} - bool crc; - bool data_page_header; - bool index_page_header; - bool dictionary_page_header; - bool data_page_header_v2; + bool crc :1; + bool data_page_header :1; + bool index_page_header :1; + bool dictionary_page_header :1; + bool data_page_header_v2 :1; } _PageHeader__isset; class PageHeader { @@ -567,11 +561,12 @@ class PageHeader { static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0"; static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0}; + PageHeader(const PageHeader&); + PageHeader& operator=(const PageHeader&); PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) { } - virtual ~PageHeader() throw() {} - + virtual ~PageHeader() throw(); PageType::type type; int32_t uncompressed_page_size; int32_t compressed_page_size; @@ -583,42 +578,21 @@ class PageHeader { _PageHeader__isset __isset; - void __set_type(const PageType::type val) { - type = val; - } + void __set_type(const PageType::type val); - void __set_uncompressed_page_size(const int32_t val) { - uncompressed_page_size = val; - } + void __set_uncompressed_page_size(const int32_t val); - void __set_compressed_page_size(const int32_t val) { - compressed_page_size = val; - } + void __set_compressed_page_size(const int32_t val); - void __set_crc(const int32_t val) { - crc = val; - __isset.crc = true; - } + void __set_crc(const int32_t val); - void __set_data_page_header(const DataPageHeader& val) { - data_page_header = val; - __isset.data_page_header = true; - } + void __set_data_page_header(const DataPageHeader& val); - void __set_index_page_header(const IndexPageHeader& val) { - index_page_header = val; - __isset.index_page_header = true; - } + void __set_index_page_header(const IndexPageHeader& val); - void __set_dictionary_page_header(const DictionaryPageHeader& val) { - dictionary_page_header = val; - __isset.dictionary_page_header = true; - } + void __set_dictionary_page_header(const DictionaryPageHeader& val); - void __set_data_page_header_v2(const DataPageHeaderV2& val) { - data_page_header_v2 = val; - __isset.data_page_header_v2 = true; - } + void __set_data_page_header_v2(const DataPageHeaderV2& val); bool operator == (const PageHeader & rhs) const { @@ -659,13 +633,14 @@ class PageHeader { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const PageHeader& obj); }; void swap(PageHeader &a, PageHeader &b); typedef struct _KeyValue__isset { _KeyValue__isset() : value(false) {} - bool value; + bool value :1; } _KeyValue__isset; class KeyValue { @@ -674,24 +649,20 @@ class KeyValue { static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9"; static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9}; + KeyValue(const KeyValue&); + KeyValue& operator=(const KeyValue&); KeyValue() : key(), value() { } - virtual ~KeyValue() throw() {} - + virtual ~KeyValue() throw(); std::string key; std::string value; _KeyValue__isset __isset; - void __set_key(const std::string& val) { - key = val; - } + void __set_key(const std::string& val); - void __set_value(const std::string& val) { - value = val; - __isset.value = true; - } + void __set_value(const std::string& val); bool operator == (const KeyValue & rhs) const { @@ -712,6 +683,7 @@ class KeyValue { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const KeyValue& obj); }; void swap(KeyValue &a, KeyValue &b); @@ -723,26 +695,21 @@ class SortingColumn { static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F"; static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F}; + SortingColumn(const SortingColumn&); + SortingColumn& operator=(const SortingColumn&); SortingColumn() : column_idx(0), descending(0), nulls_first(0) { } - virtual ~SortingColumn() throw() {} - + virtual ~SortingColumn() throw(); int32_t column_idx; bool descending; bool nulls_first; - void __set_column_idx(const int32_t val) { - column_idx = val; - } + void __set_column_idx(const int32_t val); - void __set_descending(const bool val) { - descending = val; - } + void __set_descending(const bool val); - void __set_nulls_first(const bool val) { - nulls_first = val; - } + void __set_nulls_first(const bool val); bool operator == (const SortingColumn & rhs) const { @@ -763,29 +730,79 @@ class SortingColumn { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); }; void swap(SortingColumn &a, SortingColumn &b); + +class PageEncodingStats { + public: + + static const char* ascii_fingerprint; // = "5F1BEE04836FAA3055D3EE2492AE44FB"; + static const uint8_t binary_fingerprint[16]; // = {0x5F,0x1B,0xEE,0x04,0x83,0x6F,0xAA,0x30,0x55,0xD3,0xEE,0x24,0x92,0xAE,0x44,0xFB}; + + PageEncodingStats(const PageEncodingStats&); + PageEncodingStats& operator=(const PageEncodingStats&); + PageEncodingStats() : page_type((PageType::type)0), encoding((Encoding::type)0), count(0) { + } + + virtual ~PageEncodingStats() throw(); + PageType::type page_type; + Encoding::type encoding; + int32_t count; + + void __set_page_type(const PageType::type val); + + void __set_encoding(const Encoding::type val); + + void __set_count(const int32_t val); + + bool operator == (const PageEncodingStats & rhs) const + { + if (!(page_type == rhs.page_type)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(count == rhs.count)) + return false; + return true; + } + bool operator != (const PageEncodingStats &rhs) const { + return !(*this == rhs); + } + + bool operator < (const PageEncodingStats & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + + friend std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); +}; + +void swap(PageEncodingStats &a, PageEncodingStats &b); + typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {} - bool key_value_metadata; - bool index_page_offset; - bool dictionary_page_offset; - bool statistics; + _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false) {} + bool key_value_metadata :1; + bool index_page_offset :1; + bool dictionary_page_offset :1; + bool statistics :1; + bool encoding_stats :1; } _ColumnMetaData__isset; class ColumnMetaData { public: - static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D"; - static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D}; + static const char* ascii_fingerprint; // = "AEE7317B6DB9719FE828388D537DBD71"; + static const uint8_t binary_fingerprint[16]; // = {0xAE,0xE7,0x31,0x7B,0x6D,0xB9,0x71,0x9F,0xE8,0x28,0x38,0x8D,0x53,0x7D,0xBD,0x71}; + ColumnMetaData(const ColumnMetaData&); + ColumnMetaData& operator=(const ColumnMetaData&); ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) { } - virtual ~ColumnMetaData() throw() {} - + virtual ~ColumnMetaData() throw(); Type::type type; std::vector encodings; std::vector path_in_schema; @@ -798,60 +815,35 @@ class ColumnMetaData { int64_t index_page_offset; int64_t dictionary_page_offset; Statistics statistics; + std::vector encoding_stats; _ColumnMetaData__isset __isset; - void __set_type(const Type::type val) { - type = val; - } + void __set_type(const Type::type val); - void __set_encodings(const std::vector & val) { - encodings = val; - } + void __set_encodings(const std::vector & val); - void __set_path_in_schema(const std::vector & val) { - path_in_schema = val; - } + void __set_path_in_schema(const std::vector & val); - void __set_codec(const CompressionCodec::type val) { - codec = val; - } + void __set_codec(const CompressionCodec::type val); - void __set_num_values(const int64_t val) { - num_values = val; - } + void __set_num_values(const int64_t val); - void __set_total_uncompressed_size(const int64_t val) { - total_uncompressed_size = val; - } + void __set_total_uncompressed_size(const int64_t val); - void __set_total_compressed_size(const int64_t val) { - total_compressed_size = val; - } + void __set_total_compressed_size(const int64_t val); - void __set_key_value_metadata(const std::vector & val) { - key_value_metadata = val; - __isset.key_value_metadata = true; - } + void __set_key_value_metadata(const std::vector & val); - void __set_data_page_offset(const int64_t val) { - data_page_offset = val; - } + void __set_data_page_offset(const int64_t val); - void __set_index_page_offset(const int64_t val) { - index_page_offset = val; - __isset.index_page_offset = true; - } + void __set_index_page_offset(const int64_t val); - void __set_dictionary_page_offset(const int64_t val) { - dictionary_page_offset = val; - __isset.dictionary_page_offset = true; - } + void __set_dictionary_page_offset(const int64_t val); - void __set_statistics(const Statistics& val) { - statistics = val; - __isset.statistics = true; - } + void __set_statistics(const Statistics& val); + + void __set_encoding_stats(const std::vector & val); bool operator == (const ColumnMetaData & rhs) const { @@ -887,6 +879,10 @@ class ColumnMetaData { return false; else if (__isset.statistics && !(statistics == rhs.statistics)) return false; + if (__isset.encoding_stats != rhs.__isset.encoding_stats) + return false; + else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) + return false; return true; } bool operator != (const ColumnMetaData &rhs) const { @@ -898,46 +894,40 @@ class ColumnMetaData { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); }; void swap(ColumnMetaData &a, ColumnMetaData &b); typedef struct _ColumnChunk__isset { _ColumnChunk__isset() : file_path(false), meta_data(false) {} - bool file_path; - bool meta_data; + bool file_path :1; + bool meta_data :1; } _ColumnChunk__isset; class ColumnChunk { public: - static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8"; - static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8}; + static const char* ascii_fingerprint; // = "F9F2DFFF1A7805AEF05AA10B23AF3108"; + static const uint8_t binary_fingerprint[16]; // = {0xF9,0xF2,0xDF,0xFF,0x1A,0x78,0x05,0xAE,0xF0,0x5A,0xA1,0x0B,0x23,0xAF,0x31,0x08}; + ColumnChunk(const ColumnChunk&); + ColumnChunk& operator=(const ColumnChunk&); ColumnChunk() : file_path(), file_offset(0) { } - virtual ~ColumnChunk() throw() {} - + virtual ~ColumnChunk() throw(); std::string file_path; int64_t file_offset; ColumnMetaData meta_data; _ColumnChunk__isset __isset; - void __set_file_path(const std::string& val) { - file_path = val; - __isset.file_path = true; - } + void __set_file_path(const std::string& val); - void __set_file_offset(const int64_t val) { - file_offset = val; - } + void __set_file_offset(const int64_t val); - void __set_meta_data(const ColumnMetaData& val) { - meta_data = val; - __isset.meta_data = true; - } + void __set_meta_data(const ColumnMetaData& val); bool operator == (const ColumnChunk & rhs) const { @@ -962,26 +952,28 @@ class ColumnChunk { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj); }; void swap(ColumnChunk &a, ColumnChunk &b); typedef struct _RowGroup__isset { _RowGroup__isset() : sorting_columns(false) {} - bool sorting_columns; + bool sorting_columns :1; } _RowGroup__isset; class RowGroup { public: - static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9"; - static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9}; + static const char* ascii_fingerprint; // = "9BCFCB6790B06809B46424957993EDA6"; + static const uint8_t binary_fingerprint[16]; // = {0x9B,0xCF,0xCB,0x67,0x90,0xB0,0x68,0x09,0xB4,0x64,0x24,0x95,0x79,0x93,0xED,0xA6}; + RowGroup(const RowGroup&); + RowGroup& operator=(const RowGroup&); RowGroup() : total_byte_size(0), num_rows(0) { } - virtual ~RowGroup() throw() {} - + virtual ~RowGroup() throw(); std::vector columns; int64_t total_byte_size; int64_t num_rows; @@ -989,22 +981,13 @@ class RowGroup { _RowGroup__isset __isset; - void __set_columns(const std::vector & val) { - columns = val; - } + void __set_columns(const std::vector & val); - void __set_total_byte_size(const int64_t val) { - total_byte_size = val; - } + void __set_total_byte_size(const int64_t val); - void __set_num_rows(const int64_t val) { - num_rows = val; - } + void __set_num_rows(const int64_t val); - void __set_sorting_columns(const std::vector & val) { - sorting_columns = val; - __isset.sorting_columns = true; - } + void __set_sorting_columns(const std::vector & val); bool operator == (const RowGroup & rhs) const { @@ -1029,27 +1012,29 @@ class RowGroup { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const RowGroup& obj); }; void swap(RowGroup &a, RowGroup &b); typedef struct _FileMetaData__isset { _FileMetaData__isset() : key_value_metadata(false), created_by(false) {} - bool key_value_metadata; - bool created_by; + bool key_value_metadata :1; + bool created_by :1; } _FileMetaData__isset; class FileMetaData { public: - static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9"; - static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9}; + static const char* ascii_fingerprint; // = "D76119B1A4F2B4F404F095255FBDBD9A"; + static const uint8_t binary_fingerprint[16]; // = {0xD7,0x61,0x19,0xB1,0xA4,0xF2,0xB4,0xF4,0x04,0xF0,0x95,0x25,0x5F,0xBD,0xBD,0x9A}; + FileMetaData(const FileMetaData&); + FileMetaData& operator=(const FileMetaData&); FileMetaData() : version(0), num_rows(0), created_by() { } - virtual ~FileMetaData() throw() {} - + virtual ~FileMetaData() throw(); int32_t version; std::vector schema; int64_t num_rows; @@ -1059,31 +1044,17 @@ class FileMetaData { _FileMetaData__isset __isset; - void __set_version(const int32_t val) { - version = val; - } + void __set_version(const int32_t val); - void __set_schema(const std::vector & val) { - schema = val; - } + void __set_schema(const std::vector & val); - void __set_num_rows(const int64_t val) { - num_rows = val; - } + void __set_num_rows(const int64_t val); - void __set_row_groups(const std::vector & val) { - row_groups = val; - } + void __set_row_groups(const std::vector & val); - void __set_key_value_metadata(const std::vector & val) { - key_value_metadata = val; - __isset.key_value_metadata = true; - } + void __set_key_value_metadata(const std::vector & val); - void __set_created_by(const std::string& val) { - created_by = val; - __isset.created_by = true; - } + void __set_created_by(const std::string& val); bool operator == (const FileMetaData & rhs) const { @@ -1114,6 +1085,7 @@ class FileMetaData { uint32_t read(::apache::thrift::protocol::TProtocol* iprot); uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + friend std::ostream& operator<<(std::ostream& out, const FileMetaData& obj); }; void swap(FileMetaData &a, FileMetaData &b); diff --git a/parquet-format/parquet.thrift b/parquet-format/parquet.thrift index 52dea7f9..328e8d5e 100644 --- a/parquet-format/parquet.thrift +++ b/parquet-format/parquet.thrift @@ -9,18 +9,19 @@ * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ /** * File format description for the parquet file format */ namespace cpp parquet -namespace java parquet.format +namespace java org.apache.parquet.format /** * Types supported by Parquet. These types are intended to be used in combination @@ -76,6 +77,90 @@ enum ConvertedType { * 2 digits over). */ DECIMAL = 5; + + /** + * A Date + * + * Stored as days since Unix epoch, encoded as the INT32 physical type. + * + */ + DATE = 6; + + /** + * A time + * + * The total number of milliseconds since midnight. The value is stored + * as an INT32 physical type. + */ + TIME_MILLIS = 7; + // RESERVED = 8; + + /** + * A date/time combination + * + * Date and time recorded as milliseconds since the Unix epoch. Recorded as + * a physical type of INT64. + */ + TIMESTAMP_MILLIS = 9; + // RESERVED = 10; + + + /** + * An unsigned integer value. + * + * The number describes the maximum number of meainful data bits in + * the stored value. 8, 16 and 32 bit values are stored using the + * INT32 physical type. 64 bit values are stored using the INT64 + * physical type. + * + */ + UINT_8 = 11; + UINT_16 = 12; + UINT_32 = 13; + UINT_64 = 14; + + /** + * A signed integer value. + * + * The number describes the maximum number of meainful data bits in + * the stored value. 8, 16 and 32 bit values are stored using the + * INT32 physical type. 64 bit values are stored using the INT64 + * physical type. + * + */ + INT_8 = 15; + INT_16 = 16; + INT_32 = 17; + INT_64 = 18; + + /** + * An embedded JSON document + * + * A JSON document embedded within a single UTF8 column. + */ + JSON = 19; + + /** + * An embedded BSON document + * + * A BSON document embedded within a single BINARY column. + */ + BSON = 20; + + /** + * An interval of time + * + * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 + * This data is composed of three separate little endian unsigned + * integers. Each stores a component of a duration of time. The first + * integer identifies the number of months associated with the duration, + * the second identifies the number of days associated with the duration + * and the third identifies the number of milliseconds associated with + * the provided duration. This duration of time is independent of any + * particular timezone or date. + */ + INTERVAL = 21; + } /** @@ -147,6 +232,12 @@ struct SchemaElement { */ 7: optional i32 scale 8: optional i32 precision + + /** When the original schema supports field ids, this will save the + * original field id in the parquet schema + */ + 9: optional i32 field_id; + } /** @@ -338,6 +429,22 @@ struct SortingColumn { 3: required bool nulls_first } +/** + * statistics of a given page type and encoding + */ +struct PageEncodingStats { + + /** the page type (data/dic/...) **/ + 1: required PageType page_type; + + /** encoding of the page **/ + 2: required Encoding encoding; + + /** number of pages of this type with this encoding **/ + 3: required i32 count; + +} + /** * Description for column metadata */ @@ -378,6 +485,11 @@ struct ColumnMetaData { /** optional statistics for this column chunk */ 12: optional Statistics statistics; + + /** Set of all encodings used for pages in this column chunk. + * This information can be used to determine if all data pages are + * dictionary encoded for example **/ + 13: optional list encoding_stats; } struct ColumnChunk { @@ -397,6 +509,9 @@ struct ColumnChunk { } struct RowGroup { + /** Metadata for each column chunk in this row group. + * This list must have the same order as the SchemaElement list in FileMetaData. + **/ 1: required list columns /** Total byte size of all the uncompressed column data in this row group **/ @@ -441,4 +556,3 @@ struct FileMetaData { **/ 6: optional string created_by } - diff --git a/src/compression/lz4-codec.cc b/src/compression/lz4-codec.cc index 8b8588c7..0c73e1db 100644 --- a/src/compression/lz4-codec.cc +++ b/src/compression/lz4-codec.cc @@ -20,7 +20,7 @@ using namespace parquet_cpp; void Lz4Codec::Decompress(int input_len, const uint8_t* input, int output_len, uint8_t* output_buffer) { - int n = LZ4_uncompress(reinterpret_cast(input), + int n = LZ4_decompress_fast(reinterpret_cast(input), reinterpret_cast(output_buffer), output_len); if (n != input_len) { throw ParquetException("Corrupt lz4 compressed data."); diff --git a/src/impala/bit-util.h b/src/impala/bit-util.h index 0c9c32d5..c2b6055a 100644 --- a/src/impala/bit-util.h +++ b/src/impala/bit-util.h @@ -16,7 +16,11 @@ #ifndef IMPALA_BIT_UTIL_H #define IMPALA_BIT_UTIL_H -#include +#if defined(__APPLE__) + #include +#else + #include +#endif #include "impala/compiler-util.h" #include "impala/logging.h" diff --git a/src/parquet.cc b/src/parquet.cc index 67d9caf4..8c697f00 100644 --- a/src/parquet.cc +++ b/src/parquet.cc @@ -164,7 +164,7 @@ bool ColumnReader::ReadNewPage() { } if (current_page_header_.type == PageType::DICTIONARY_PAGE) { - unordered_map >::iterator it = + unordered_map >::iterator it = decoders_.find(Encoding::RLE_DICTIONARY); if (it != decoders_.end()) { throw ParquetException("Column cannot have more than one dictionary."); @@ -173,7 +173,7 @@ bool ColumnReader::ReadNewPage() { PlainDecoder dictionary(schema_->type); dictionary.SetData(current_page_header_.dictionary_page_header.num_values, buffer, uncompressed_len); - shared_ptr decoder(new DictionaryDecoder(schema_->type, &dictionary)); + boost::shared_ptr decoder(new DictionaryDecoder(schema_->type, &dictionary)); decoders_[Encoding::RLE_DICTIONARY] = decoder; current_decoder_ = decoders_[Encoding::RLE_DICTIONARY].get(); continue; @@ -199,14 +199,14 @@ bool ColumnReader::ReadNewPage() { Encoding::type encoding = current_page_header_.data_page_header.encoding; if (IsDictionaryIndexEncoding(encoding)) encoding = Encoding::RLE_DICTIONARY; - unordered_map >::iterator it = + unordered_map >::iterator it = decoders_.find(encoding); if (it != decoders_.end()) { current_decoder_ = it->second.get(); } else { switch (encoding) { case Encoding::PLAIN: { - shared_ptr decoder; + boost::shared_ptr decoder; if (schema_->type == Type::BOOLEAN) { decoder.reset(new BoolDecoder()); } else { @@ -239,4 +239,3 @@ bool ColumnReader::ReadNewPage() { } } - diff --git a/src/util/stopwatch.h b/src/util/stopwatch.h index bfdb4e97..80f05ed4 100644 --- a/src/util/stopwatch.h +++ b/src/util/stopwatch.h @@ -15,8 +15,7 @@ #ifndef PARQUET_UTIL_STOPWATCH_H #define PARQUET_UTIL_STOPWATCH_H -#include -#include +#include namespace parquet_cpp { @@ -26,22 +25,22 @@ class StopWatch { } void Start() { - clock_gettime(CLOCK_MONOTONIC, &start_); + gettimeofday(&start_time, 0); } // Returns time in nanoseconds. uint64_t Stop() { - timespec end; - clock_gettime(CLOCK_MONOTONIC, &end); - return (end.tv_sec - start_.tv_sec) * 1000L * 1000L * 1000L + - (end.tv_nsec - start_.tv_nsec); + struct timeval t_time; + gettimeofday(&t_time, 0); + + return (1000L * 1000L * 1000L * (t_time.tv_sec - start_time.tv_sec) + + (t_time.tv_usec - start_time.tv_usec)); } private: - timespec start_; + struct timeval start_time; }; } #endif - diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh deleted file mode 100755 index 868185fd..00000000 --- a/thirdparty/build_thirdparty.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -set -x -set -e -TP_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) - -source $TP_DIR/versions.sh -PREFIX=$TP_DIR/installed - -################################################################################ - -if [ "$#" = "0" ]; then - F_ALL=1 -else - # Allow passing specific libs to build on the command line - for arg in "$*"; do - case $arg in - "lz4") F_LZ4=1 ;; - "snappy") F_SNAPPY=1 ;; - *) echo "Unknown module: $arg"; exit 1 ;; - esac - done -fi - -################################################################################ - -# Determine how many parallel jobs to use for make based on the number of cores -if [[ "$OSTYPE" =~ ^linux ]]; then - PARALLEL=$(grep -c processor /proc/cpuinfo) -elif [[ "$OSTYPE" == "darwin"* ]]; then - PARALLEL=$(sysctl -n hw.ncpu) -else - echo Unsupported platform $OSTYPE - exit 1 -fi - -mkdir -p "$PREFIX/include" -mkdir -p "$PREFIX/lib" - -# On some systems, autotools installs libraries to lib64 rather than lib. Fix -# this by setting up lib64 as a symlink to lib. We have to do this step first -# to handle cases where one third-party library depends on another. -ln -sf lib "$PREFIX/lib64" - -# use the compiled tools -export PATH=$PREFIX/bin:$PATH - -# build snappy -if [ -n "$F_ALL" -o -n "$F_SNAPPY" ]; then - cd $SNAPPY_DIR - ./configure --with-pic --prefix=$PREFIX - make -j$PARALLEL install -fi - -# build lz4 -if [ -n "$F_ALL" -o -n "$F_LZ4" ]; then - cd $LZ4_DIR - CFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX $LZ4_DIR - make -j$PARALLEL install -fi - -echo "---------------------" -echo "Thirdparty dependencies built and installed into $PREFIX successfully" - diff --git a/thirdparty/download_thirdparty.sh b/thirdparty/download_thirdparty.sh deleted file mode 100755 index 9144ac42..00000000 --- a/thirdparty/download_thirdparty.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -x -set -e - -TP_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) -cd $TP_DIR - -source versions.sh - -if [ ! -d snappy-${SNAPPY_VERSION} ]; then - echo "Fetching snappy" - curl -OC - http://snappy.googlecode.com/files/snappy-${SNAPPY_VERSION}.tar.gz - tar xzf snappy-${SNAPPY_VERSION}.tar.gz - rm snappy-${SNAPPY_VERSION}.tar.gz -fi - diff --git a/thirdparty/lz4-svn/CMakeLists.txt b/thirdparty/lz4-svn/CMakeLists.txt deleted file mode 100644 index 712403c6..00000000 --- a/thirdparty/lz4-svn/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -cmake_minimum_required(VERSION 2.8) - -SET(CMAKE_BUILD_TYPE "Release") -set(CMAKE_C_FLAGS_RELEASE "-O3 -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration") - -set(LZ4_SOURCES lz4.c lz4hc.c) -set(LZ4_HEADERS lz4.h lz4hc.h) - -add_library(lz4 STATIC ${LZ4_SOURCES}) -target_link_libraries(lz4) - -install(FILES ${LZ4_HEADERS} DESTINATION include) -install(TARGETS lz4 DESTINATION lib) \ No newline at end of file diff --git a/thirdparty/lz4-svn/lz4.c b/thirdparty/lz4-svn/lz4.c deleted file mode 100644 index 1f2eafde..00000000 --- a/thirdparty/lz4-svn/lz4.c +++ /dev/null @@ -1,906 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2012, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ - -//************************************** -// Tuning parameters -//************************************** -// MEMORY_USAGE : -// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -// Increasing memory usage improves compression ratio -// Reduced memory usage can improve speed, due to cache effect -// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache -#define MEMORY_USAGE 14 - -// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : -// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU. -// You can set this option to 1 in situations where data will remain within closed environment -// This option is useless on Little_Endian CPU (such as x86) -//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 - - - -//************************************** -// CPU Feature Detection -//************************************** -// 32 or 64 bits ? -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif - -// Little Endian or Big Endian ? -// Overwrite the #define below if you know your architecture endianess -#if defined (__GLIBC__) -# include -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 -#else -// Little Endian assumed. PDP Endian and other very rare endian format are unsupported. -#endif - -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -// Define this parameter if your target system or compiler does not support hardware bit count -#if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -//************************************** -// Compiler Options -//************************************** -#if __STDC_VERSION__ >= 199901L // C99 -/* "restrict" is a known keyword */ -#else -# define restrict // Disable restrict -#endif - -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -#ifdef _MSC_VER // Visual Studio -# include // For Visual 2005 -# if LZ4_ARCH64 // 64-bit -# pragma intrinsic(_BitScanForward64) // For Visual 2005 -# pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else -# pragma intrinsic(_BitScanForward) // For Visual 2005 -# pragma intrinsic(_BitScanReverse) // For Visual 2005 -# endif -#endif - -#ifdef _MSC_VER -# define lz4_bswap16(x) _byteswap_ushort(x) -#else -# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - -#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#define likely(expr) expect((expr) != 0, 1) -#define unlikely(expr) expect((expr) != 0, 0) - - -//************************************** -// Includes -//************************************** -#include // for malloc -#include // for memset -#include "lz4.h" - - -//************************************** -// Basic Types -//************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -# define BYTE unsigned __int8 -# define U16 unsigned __int16 -# define U32 unsigned __int32 -# define S32 __int32 -# define U64 unsigned __int64 -#else -# include -# define BYTE uint8_t -# define U16 uint16_t -# define U32 uint32_t -# define S32 int32_t -# define U64 uint64_t -#endif - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -# pragma pack(push, 1) -#endif - -typedef struct _U16_S { U16 v; } U16_S; -typedef struct _U32_S { U32 v; } U32_S; -typedef struct _U64_S { U64 v; } U64_S; - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -# pragma pack(pop) -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) - - -//************************************** -// Constants -//************************************** -#define MINMATCH 4 - -#define HASH_LOG (MEMORY_USAGE-2) -#define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) - -// NOTCOMPRESSIBLE_DETECTIONLEVEL : -// Decreasing this value will make the algorithm skip faster data segments considered "incompressible" -// This may decrease compression ratio dramatically, but will be faster on incompressible data -// Increasing this value will make the algorithm search more before declaring a segment "incompressible" -// This could improve compression a bit, but will be slower on incompressible data -// The default value (6) is recommended -#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 -#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2) -#define STACKLIMIT 13 -#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()). -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH+MINMATCH) -#define MINLENGTH (MFLIMIT+1) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) - -#define ML_BITS 4 -#define ML_MASK ((1U<> ((MINMATCH*8)-HASH_LOG)) -#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) -#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); - #else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); - #else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; - #endif -#endif -} - -#else - -static inline int LZ4_NbCommonBytes (register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); - #else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); - #else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; - #endif -#endif -} - -#endif - - - -//****************************** -// Compression functions -//****************************** - -// LZ4_compressCtx : -// ----------------- -// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. -// If it cannot achieve it, compression will stop, and result of the function will be zero. -// return : the number of bytes written in buffer 'dest', or 0 if the compression fails - -static inline int LZ4_compressCtx(void** ctx, - const char* source, - char* dest, - int isize, - int maxOutputSize) -{ -#if HEAPMODE - struct refTables *srt = (struct refTables *) (*ctx); - HTYPE* HashTable; -#else - HTYPE HashTable[HASHTABLESIZE] = {0}; -#endif - - const BYTE* ip = (BYTE*) source; - INITBASE(base); - const BYTE* anchor = ip; - const BYTE* const iend = ip + isize; - const BYTE* const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; - - int length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - // Init - if (isizehashTable); - memset((void*)HashTable, 0, sizeof(srt->hashTable)); -#else - (void) ctx; -#endif - - - // First Byte - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - ip++; forwardH = LZ4_HASH_VALUE(ip); - - // Main Loop - for ( ; ; ) - { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE* forwardIp = ip; - const BYTE* ref; - BYTE* token; - - // Find a match - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if unlikely(forwardIp > mflimit) { goto _last_literals; } - - forwardH = LZ4_HASH_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = ip - base; - - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); - - // Catch up - while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } - - // Encode Literal length - length = (int)(ip - anchor); - token = op++; - if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit -#ifdef _MSC_VER - if (length>=(int)RUN_MASK) - { - int len = length-RUN_MASK; - *token=(RUN_MASK<254) - { - do { *op++ = 255; len -= 255; } while (len>254); - *op++ = (BYTE)len; - memcpy(op, anchor, length); - op += length; - goto _next_match; - } - else - *op++ = (BYTE)len; - } - else *token = (length<=(int)RUN_MASK) - { - int len; - *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (length<>8) > oend) return 0; // Check output limit - if (length>=(int)ML_MASK) - { - *token += ML_MASK; - length -= ML_MASK; - for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } - if (length > 254) { length-=255; *op++ = 255; } - *op++ = (BYTE)length; - } - else *token += length; - - // Test end of chunk - if (ip > mflimit) { anchor = ip; break; } - - // Fill table - HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; - - // Test next position - ref = base + HashTable[LZ4_HASH_VALUE(ip)]; - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } - - // Prepare next loop - anchor = ip++; - forwardH = LZ4_HASH_VALUE(ip); - } - -_last_literals: - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); - if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (lastRun<> ((MINMATCH*8)-HASHLOG64K)) -#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) -static inline int LZ4_compress64kCtx(void** ctx, - const char* source, - char* dest, - int isize, - int maxOutputSize) -{ -#if HEAPMODE - struct refTables *srt = (struct refTables *) (*ctx); - U16* HashTable; -#else - U16 HashTable[HASH64KTABLESIZE] = {0}; -#endif - - const BYTE* ip = (BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const base = ip; - const BYTE* const iend = ip + isize; - const BYTE* const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; - - int len, length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - // Init - if (isizehashTable); - memset((void*)HashTable, 0, sizeof(srt->hashTable)); -#else - (void) ctx; -#endif - - - // First Byte - ip++; forwardH = LZ4_HASH64K_VALUE(ip); - - // Main Loop - for ( ; ; ) - { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE* forwardIp = ip; - const BYTE* ref; - BYTE* token; - - // Find a match - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if (forwardIp > mflimit) { goto _last_literals; } - - forwardH = LZ4_HASH64K_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = (U16)(ip - base); - - } while (A32(ref) != A32(ip)); - - // Catch up - while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; } - - // Encode Literal length - length = (int)(ip - anchor); - token = op++; - if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit -#ifdef _MSC_VER - if (length>=(int)RUN_MASK) - { - int len = length-RUN_MASK; - *token=(RUN_MASK<254) - { - do { *op++ = 255; len -= 255; } while (len>254); - *op++ = (BYTE)len; - memcpy(op, anchor, length); - op += length; - goto _next_match; - } - else - *op++ = (BYTE)len; - } - else *token = (length<=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } - else *token = (length<>8) > oend) return 0; // Check output limit - if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } - else *token += len; - - // Test end of chunk - if (ip > mflimit) { anchor = ip; break; } - - // Fill table - HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base); - - // Test next position - ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; - HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base); - if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; } - - // Prepare next loop - anchor = ip++; - forwardH = LZ4_HASH64K_VALUE(ip); - } - -_last_literals: - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); - if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0; - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (lastRun<>ML_BITS)) == RUN_MASK) { size_t len; for (;(len=*ip++)==255;length+=255){} length += len; } - - // copy literals - cpy = op+length; - if (cpy>oend-COPYLENGTH) - { - if (cpy != oend) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals - memcpy(op, ip, length); - ip += length; - break; // EOF - } - LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; - - // get offset - LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside destination buffer - - // get matchlength - if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } - - // copy repeated sequence - if unlikely((op-ref)oend-(COPYLENGTH)-(STEPSIZE-4)) - { - if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals - LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); - while(op>ML_BITS)) == RUN_MASK) - { - int s=255; - while (likely(ipoend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) - { - if (cpy > oend) goto _output_error; // Error : writes beyond output buffer - if (ip+length != iend) goto _output_error; // Error : LZ4 format requires to consume all input at this stage (no match within the last 11 bytes, and at least 8 remaining input bytes for another match+literals) - memcpy(op, ip, length); - op += length; - break; // Necessarily EOF, due to parsing restrictions - } - LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; - - // get offset - LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside of destination buffer - - // get matchlength - if ((length=(token&ML_MASK)) == ML_MASK) - { - while likely(ipoend-(COPYLENGTH+(STEPSIZE-4))) - { - if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals - LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); - while(op -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 -#else -// Little Endian assumed. PDP Endian and other very rare endian format are unsupported. -#endif - -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -// Define this parameter if your target system or compiler does not support hardware bit count -#if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -//************************************** -// Compiler Options -//************************************** -#if __STDC_VERSION__ >= 199901L // C99 - /* "restrict" is a known keyword */ -#else -# define restrict // Disable restrict -#endif - -#ifdef _MSC_VER -# define inline __inline // Visual is not C99, but supports some kind of inline -# define forceinline __forceinline -# include // For Visual 2005 -# if LZ4_ARCH64 // 64-bit -# pragma intrinsic(_BitScanForward64) // For Visual 2005 -# pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else -# pragma intrinsic(_BitScanForward) // For Visual 2005 -# pragma intrinsic(_BitScanReverse) // For Visual 2005 -# endif -#else -# ifdef __GNUC__ -# define forceinline inline __attribute__((always_inline)) -# else -# define forceinline inline -# endif -#endif - -#ifdef _MSC_VER // Visual Studio -#define lz4_bswap16(x) _byteswap_ushort(x) -#else -#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - - -//************************************** -// Includes -//************************************** -#include // calloc, free -#include // memset, memcpy -#include "lz4hc.h" - -#define ALLOCATOR(s) calloc(1,s) -#define FREEMEM free -#define MEM_INIT memset - - -//************************************** -// Basic Types -//************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -#define BYTE unsigned __int8 -#define U16 unsigned __int16 -#define U32 unsigned __int32 -#define S32 __int32 -#define U64 unsigned __int64 -#else -#include -#define BYTE uint8_t -#define U16 uint16_t -#define U32 uint32_t -#define S32 int32_t -#define U64 uint64_t -#endif - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(push, 1) -#endif - -typedef struct _U16_S { U16 v; } U16_S; -typedef struct _U32_S { U32 v; } U32_S; -typedef struct _U64_S { U64 v; } U64_S; - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(pop) -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) - - -//************************************** -// Constants -//************************************** -#define MINMATCH 4 - -#define DICTIONARY_LOGSIZE 16 -#define MAXD (1<> ((MINMATCH*8)-HASH_LOG)) -#define HASH_VALUE(p) HASH_FUNCTION(A32(p)) -#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) -#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] -#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) - - -//************************************** -// Private functions -//************************************** -#if LZ4_ARCH64 - -inline static int LZ4_NbCommonBytes (register U64 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); - #else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); - #else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; - #endif -#endif -} - -#else - -inline static int LZ4_NbCommonBytes (register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanReverse( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); - #else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); - #else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; - #endif -#endif -} - -#endif - - -inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base) -{ - MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); - hc4->nextToUpdate = base + LZ4_ARCH64; - hc4->base = base; - return 1; -} - - -inline static void* LZ4HC_Create (const BYTE* base) -{ - void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); - - LZ4HC_Init ((LZ4HC_Data_Structure*)hc4, base); - return hc4; -} - - -inline static int LZ4HC_Free (void** LZ4HC_Data) -{ - FREEMEM(*LZ4HC_Data); - *LZ4HC_Data = NULL; - return (1); -} - - -// Update chains up to ip (excluded) -forceinline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) -{ - U16* chainTable = hc4->chainTable; - HTYPE* HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - - while(hc4->nextToUpdate < ip) - { - const BYTE* p = hc4->nextToUpdate; - size_t delta = (p) - HASH_POINTER(p); - if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; - DELTANEXT(p) = (U16)delta; - HashTable[HASH_VALUE(p)] = (p) - base; - hc4->nextToUpdate++; - } -} - - -forceinline static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) -{ - const BYTE* p1t = p1; - - while (p1tchainTable; - HTYPE* const HashTable = hc4->hashTable; - const BYTE* ref; - INITBASE(base,hc4->base); - int nbAttempts=MAX_NB_ATTEMPTS; - size_t repl=0, ml=0; - U16 delta; - - // HC4 match finder - LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - -#define REPEAT_OPTIMIZATION -#ifdef REPEAT_OPTIMIZATION - // Detect repetitive sequences of length <= 4 - if (ref >= ip-4) // potential repetition - { - if (A32(ref) == A32(ip)) // confirmed - { - delta = (U16)(ip-ref); - repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - *matchpos = ref; - } - ref = GETNEXT(ref); - } -#endif - - while ((ref >= ip-MAX_DISTANCE) && (nbAttempts)) - { - nbAttempts--; - if (*(ref+ml) == *(ip+ml)) - if (A32(ref) == A32(ip)) - { - size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = ref; } - } - ref = GETNEXT(ref); - } - -#ifdef REPEAT_OPTIMIZATION - // Complete table - if (repl) - { - const BYTE* ptr = ip; - const BYTE* end; - - end = ip + repl - (MINMATCH-1); - while(ptr < end-delta) - { - DELTANEXT(ptr) = delta; // Pre-Load - ptr++; - } - do - { - DELTANEXT(ptr) = delta; - HashTable[HASH_VALUE(ptr)] = (ptr) - base; // Head of chain - ptr++; - } while(ptr < end); - hc4->nextToUpdate = end; - } -#endif - - return (int)ml; -} - - -forceinline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos) -{ - U16* const chainTable = hc4->chainTable; - HTYPE* const HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - const BYTE* ref; - int nbAttempts = MAX_NB_ATTEMPTS; - int delta = (int)(ip-startLimit); - - // First Match - LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - - while ((ref >= ip-MAX_DISTANCE) && (nbAttempts)) - { - nbAttempts--; - if (*(startLimit + longest) == *(ref - delta + longest)) - if (A32(ref) == A32(ip)) - { -#if 1 - const BYTE* reft = ref+MINMATCH; - const BYTE* ipt = ip+MINMATCH; - const BYTE* startt = ip; - - while (iptstartLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;} - - if ((ipt-startt) > longest) - { - longest = (int)(ipt-startt); - *matchpos = reft; - *startpos = startt; - } - } - ref = GETNEXT(ref); - } - - return longest; -} - - -forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref) -{ - int length, len; - BYTE* token; - - // Encode Literal length - length = (int)(*ip - *anchor); - token = (*op)++; - if (length>=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } - else *token = (length<=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } - else *token += len; - - // Prepare next loop - *ip += ml; - *anchor = *ip; - - return 0; -} - - -//**************************** -// Compression CODE -//**************************** - -int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx, - const char* source, - char* dest, - int isize) -{ - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + isize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - - BYTE* op = (BYTE*) dest; - - int ml, ml2, ml3, ml0; - const BYTE* ref=NULL; - const BYTE* start2=NULL; - const BYTE* ref2=NULL; - const BYTE* start3=NULL; - const BYTE* ref3=NULL; - const BYTE* start0; - const BYTE* ref0; - - ip++; - - // Main Loop - while (ip < mflimit) - { - ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); - if (!ml) { ip++; continue; } - - // saved, in case we would skip too much - start0 = ip; - ref0 = ref; - ml0 = ml; - -_Search2: - if (ip+ml < mflimit) - ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2); - else ml2 = ml; - - if (ml2 == ml) // No better match - { - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - continue; - } - - if (start0 < ip) - { - if (start2 < ip + ml0) // empirical - { - ip = start0; - ref = ref0; - ml = ml0; - } - } - - // Here, start0==ip - if ((start2 - ip) < 3) // First Match too small : removed - { - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } - -_Search3: - // Currently we have : - // ml2 > ml1, and - // ip1+3 <= ip2 (usually < ip1+ml1) - if ((start2 - ip) < OPTIMAL_ML) - { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) - - if (start2 + ml2 < mflimit) - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3); - else ml3 = ml2; - - if (ml3 == ml2) // No better match : 2 sequences to encode - { - // ip & ref are known; Now for ml - if (start2 < ip+ml) ml = (int)(start2 - ip); - // Now, encode 2 sequences - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - ip = start2; - LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2); - continue; - } - - if (start3 < ip+ml+3) // Not enough space for match 2 : remove it - { - if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 - { - if (start2 < ip+ml) - { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) - { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - // OK, now we have 3 ascending matches; let's write at least the first one - // ip & ref are known; Now for ml - if (start2 < ip+ml) - { - if ((start2 - ip) < (int)ML_MASK) - { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - else - { - ml = (int)(start2 - ip); - } - } - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - - ip = start2; - ref = ref2; - ml = ml2; - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - - goto _Search3; - - } - - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (lastRun<