Skip to content

Commit

Permalink
Merge branch 'apacheGH-36760-Go]-Adding-avro-ocf-reader---reader' of h…
Browse files Browse the repository at this point in the history
  • Loading branch information
loicalleyne committed Oct 6, 2023
2 parents 9913265 + d39d2cf commit d777c5d
Show file tree
Hide file tree
Showing 329 changed files with 8,629 additions and 1,831 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ jobs:
name: AMD64 Windows 2019 Go ${{ matrix.go }}
runs-on: windows-2019
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 15
timeout-minutes: 25
strategy:
fail-fast: false
matrix:
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/matlab.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ jobs:
run: sudo apt-get install ninja-build
- name: Install MATLAB
uses: matlab-actions/setup-matlab@v1
with:
release: R2023a
- name: Install ccache
run: sudo apt-get install ccache
- name: Setup ccache
Expand Down Expand Up @@ -99,6 +101,8 @@ jobs:
run: brew install ninja
- name: Install MATLAB
uses: matlab-actions/setup-matlab@v1
with:
release: R2023a
- name: Install ccache
run: brew install ccache
- name: Setup ccache
Expand Down Expand Up @@ -135,6 +139,8 @@ jobs:
fetch-depth: 0
- name: Install MATLAB
uses: matlab-actions/setup-matlab@v1
with:
release: R2023a
- name: Download Timezone Database
shell: bash
run: ci/scripts/download_tz_database.sh
Expand Down
17 changes: 8 additions & 9 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3346,7 +3346,7 @@ garrow_set_lookup_options_get_property(GObject *object,
g_value_set_object(value, priv->value_set);
break;
case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS:
g_value_set_boolean(value, options->skip_nulls);
g_value_set_boolean(value, options->skip_nulls.has_value() && options->skip_nulls.value());
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
Expand Down Expand Up @@ -3398,13 +3398,11 @@ garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass)
*
* Since: 6.0.0
*/
spec = g_param_spec_boolean("skip-nulls",
"Skip NULLs",
"Whether NULLs are skipped or not",
options.skip_nulls,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
auto skip_nulls = (options.skip_nulls.has_value() && options.skip_nulls.value());
spec =
g_param_spec_boolean("skip-nulls", "Skip NULLs", "Whether NULLs are skipped or not",
skip_nulls, static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
spec);
}

Expand Down Expand Up @@ -6458,9 +6456,10 @@ garrow_set_lookup_options_new_raw(
arrow_copied_options.get());
auto value_set =
garrow_datum_new_raw(&(arrow_copied_set_lookup_options->value_set));
auto skip_nulls = (arrow_options->skip_nulls.has_value() && arrow_options->skip_nulls.value());
auto options = g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS,
"value-set", value_set,
"skip-nulls", arrow_options->skip_nulls,
"skip-nulls", skip_nulls,
NULL);
return GARROW_SET_LOOKUP_OPTIONS(options);
}
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_archery.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jira
pygit2
pygithub
ruamel.yaml
setuptools_scm
setuptools_scm<8.0.0
toolz

# benchmark
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_crossbow.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ jinja2
jira
pygit2
ruamel.yaml
setuptools_scm
setuptools_scm<8.0.0
toolz
2 changes: 1 addition & 1 deletion ci/conda_env_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ pytest-faulthandler
pytest-lazy-fixture
s3fs>=2021.8.0
setuptools
setuptools_scm
setuptools_scm<8.0.0
2 changes: 1 addition & 1 deletion ci/conda_env_sphinx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ breathe
doxygen
ipython
numpydoc
pydata-sphinx-theme==0.8
pydata-sphinx-theme
sphinx-autobuild
sphinx-design
sphinx-copybutton
Expand Down
24 changes: 24 additions & 0 deletions ci/docker/conda-python-cython2.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG repo
ARG arch
ARG python=3.8
FROM ${repo}:${arch}-conda-python-${python}

RUN mamba install -q -y "cython<3" && \
mamba clean --all
19 changes: 19 additions & 0 deletions ci/scripts/go_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,22 @@ pushd ${source_dir}/parquet
go install -v ./...

popd

if [[ -n "${ARROW_GO_INTEGRATION}" ]]; then
pushd ${source_dir}/arrow/internal/cdata_integration

case "$(uname)" in
Linux)
go_lib="arrow_go_integration.so"
;;
Darwin)
go_lib="arrow_go_integration.so"
;;
MINGW*)
go_lib="arrow_go_integration.dll"
;;
esac
go build -tags cdata_integration,assert -buildmode=c-shared -o ${go_lib} .

popd
fi
12 changes: 7 additions & 5 deletions ci/scripts/js_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ yarn lint:ci
yarn build

if [ "${BUILD_DOCS_JS}" == "ON" ]; then
if [ "$(git config --get remote.origin.url)" == "https://github.com/apache/arrow.git" ]; then
yarn doc
elif [ "$(git config --get remote.upstream.url)" == "https://github.com/apache/arrow.git" ]; then
yarn doc --gitRemote upstream
elif [ "$(git config --get remote.apache.url)" == "git@github.com:apache/arrow.git" ]; then
# If apache or upstream are defined use those as remote.
# Otherwise use origin which could be a fork on PRs.
if [ "$(git config --get remote.apache.url)" == "git@github.com:apache/arrow.git" ]; then
yarn doc --gitRemote apache
elif [[ "$(git config --get remote.upstream.url)" =~ "https://github.com/apache/arrow" ]]; then
yarn doc --gitRemote upstream
elif [[ "$(basename -s .git $(git config --get remote.origin.url))" == "arrow" ]]; then
yarn doc
else
echo "Failed to build docs because the remote is not set correctly. Please set the origin or upstream remote to https://github.com/apache/arrow.git or the apache remote to git@github.com:apache/arrow.git."
exit 0
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ function(arrow_create_merged_static_lib output_target)
if(APPLE)
set(BUNDLE_COMMAND "libtool" "-no_warning_for_no_symbols" "-static" "-o"
${output_lib_path} ${all_library_paths})
elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel)$")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel|IntelLLVM)$")
set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar)

file(WRITE ${ar_script_path}.in "CREATE ${output_lib_path}\n")
Expand Down
9 changes: 6 additions & 3 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdate-time")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL
"IntelLLVM")
if(WIN32)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wno-deprecated")
Expand Down Expand Up @@ -360,7 +361,8 @@ elseif("${BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL
"IntelLLVM")
if(WIN32)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
else()
Expand All @@ -383,7 +385,8 @@ else()
OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL
"IntelLLVM")
if(WIN32)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
else()
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,9 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
*out_type = arrow::dictionary(index_type, value_type_);

// Build unified dictionary array
std::shared_ptr<ArrayData> data;
RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */, &data));
ARROW_ASSIGN_OR_RAISE(
auto data, DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */));
*out_dict = MakeArray(data);
return Status::OK();
}
Expand All @@ -299,9 +299,9 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
}

// Build unified dictionary array
std::shared_ptr<ArrayData> data;
RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */, &data));
ARROW_ASSIGN_OR_RAISE(
auto data, DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */));
*out_dict = MakeArray(data);
return Status::OK();
}
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,22 @@ std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) cons
return i == -1 ? nullptr : field(i);
}

Status StructArray::CanReferenceFieldByName(const std::string& name) const {
if (GetFieldByName(name) == nullptr) {
return Status::Invalid("Field named '", name,
"' not found or not unique in the struct.");
}
return Status::OK();
}

Status StructArray::CanReferenceFieldsByNames(
const std::vector<std::string>& names) const {
for (const auto& name : names) {
ARROW_RETURN_NOT_OK(CanReferenceFieldByName(name));
}
return Status::OK();
}

Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
ArrayVector flattened;
flattened.resize(data_->child_data.size());
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ class ARROW_EXPORT StructArray : public Array {
/// Returns null if name not found
std::shared_ptr<Array> GetFieldByName(const std::string& name) const;

/// Indicate if field named `name` can be found unambiguously in the struct.
Status CanReferenceFieldByName(const std::string& name) const;

/// Indicate if fields named `names` can be found unambiguously in the struct.
Status CanReferenceFieldsByNames(const std::vector<std::string>& names) const;

/// \brief Flatten this array as a vector of arrays, one for each field
///
/// \param[in] pool The pool to allocate null bitmaps from, if necessary
Expand Down
52 changes: 52 additions & 0 deletions cpp/src/arrow/array/array_struct_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,58 @@ TEST(StructArray, FlattenOfSlice) {
ASSERT_OK(arr->ValidateFull());
}

TEST(StructArray, CanReferenceFieldByName) {
auto a = ArrayFromJSON(int8(), "[4, 5]");
auto b = ArrayFromJSON(int16(), "[6, 7]");
auto c = ArrayFromJSON(int32(), "[8, 9]");
auto d = ArrayFromJSON(int64(), "[10, 11]");
auto children = std::vector<std::shared_ptr<Array>>{a, b, c, d};

auto f0 = field("f0", int8());
auto f1 = field("f1", int16());
auto f2 = field("f2", int32());
auto f3 = field("f1", int64());
auto type = struct_({f0, f1, f2, f3});

auto arr = std::make_shared<StructArray>(type, 2, children);

ASSERT_OK(arr->CanReferenceFieldByName("f0"));
ASSERT_OK(arr->CanReferenceFieldByName("f2"));
// Not found
ASSERT_RAISES(Invalid, arr->CanReferenceFieldByName("nope"));

// Duplicates
ASSERT_RAISES(Invalid, arr->CanReferenceFieldByName("f1"));
}

TEST(StructArray, CanReferenceFieldsByNames) {
auto a = ArrayFromJSON(int8(), "[4, 5]");
auto b = ArrayFromJSON(int16(), "[6, 7]");
auto c = ArrayFromJSON(int32(), "[8, 9]");
auto d = ArrayFromJSON(int64(), "[10, 11]");
auto children = std::vector<std::shared_ptr<Array>>{a, b, c, d};

auto f0 = field("f0", int8());
auto f1 = field("f1", int16());
auto f2 = field("f2", int32());
auto f3 = field("f1", int64());
auto type = struct_({f0, f1, f2, f3});

auto arr = std::make_shared<StructArray>(type, 2, children);

ASSERT_OK(arr->CanReferenceFieldsByNames({"f0", "f2"}));
ASSERT_OK(arr->CanReferenceFieldsByNames({"f2", "f0"}));

// Not found
ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"nope"}));
ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "nope"}));
// Duplicates
ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f1"}));
ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "f1"}));
// Both
ASSERT_RAISES(Invalid, arr->CanReferenceFieldsByNames({"f0", "f1", "nope"}));
}

// ----------------------------------------------------------------------------------
// Struct test
class TestStructBuilder : public ::testing::Test {
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/array/builder_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ class DictionaryMemoTable::DictionaryMemoTableImpl {
enable_if_memoize<T, Status> Visit(const T&) {
using ConcreteMemoTable = typename DictionaryTraits<T>::MemoTableType;
auto memo_table = checked_cast<ConcreteMemoTable*>(memo_table_);
return DictionaryTraits<T>::GetDictionaryArrayData(pool_, value_type_, *memo_table,
start_offset_, out_);
ARROW_ASSIGN_OR_RAISE(*out_, DictionaryTraits<T>::GetDictionaryArrayData(
pool_, value_type_, *memo_table, start_offset_));
return Status::OK();
}
};

Expand Down
Loading

0 comments on commit d777c5d

Please sign in to comment.