diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 15afb1acf67..8042661533e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -470,7 +470,6 @@ set(ARROW_SRCS src/arrow/array.cc src/arrow/builder.cc src/arrow/field.cc - src/arrow/schema.cc src/arrow/type.cc ) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 04f8dd1f908..77326ce38d7 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -30,4 +30,3 @@ install(FILES set(ARROW_TEST_LINK_LIBS arrow_test_util ${ARROW_MIN_TEST_LIBS}) ADD_ARROW_TEST(array-test) -ADD_ARROW_TEST(schema-test) diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt index a401622d2e0..b51258ffd8b 100644 --- a/cpp/src/arrow/table/CMakeLists.txt +++ b/cpp/src/arrow/table/CMakeLists.txt @@ -21,6 +21,8 @@ set(TABLE_SRCS column.cc + schema.cc + table.cc ) set(TABLE_LIBS @@ -37,3 +39,5 @@ install(FILES DESTINATION include/arrow/table) ADD_ARROW_TEST(column-test) +ADD_ARROW_TEST(schema-test) +ADD_ARROW_TEST(table-test) diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc index 15f554f4632..4959b82c6e2 100644 --- a/cpp/src/arrow/table/column-test.cc +++ b/cpp/src/arrow/table/column-test.cc @@ -22,48 +22,29 @@ #include #include "arrow/field.h" -#include "arrow/schema.h" #include "arrow/table/column.h" +#include "arrow/table/schema.h" +#include "arrow/table/test-common.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/types/integer.h" -#include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" using std::shared_ptr; using std::vector; namespace arrow { -class TestColumn : public ::testing::Test { - public: - void SetUp() { - pool_ = GetDefaultMemoryPool(); - } - - template - std::shared_ptr MakeArray(int32_t length, int32_t null_count = 0) { - auto data = std::make_shared(pool_); - auto nulls = std::make_shared(pool_); - data->Resize(length * sizeof(typename ArrayType::value_type)); - nulls->Resize(util::bytes_for_bits(length)); - return std::make_shared(length, data, 10, nulls); - } - +class TestColumn : public TestBase { protected: - MemoryPool* pool_; - std::shared_ptr data_; std::unique_ptr column_; }; TEST_F(TestColumn, BasicAPI) { ArrayVector arrays; - arrays.push_back(MakeArray(100)); - arrays.push_back(MakeArray(100, 10)); - arrays.push_back(MakeArray(100, 20)); + arrays.push_back(MakePrimitive(100)); + arrays.push_back(MakePrimitive(100, 10)); + arrays.push_back(MakePrimitive(100, 20)); auto field = std::make_shared("c0", INT32); column_.reset(new Column(field, arrays)); @@ -77,15 +58,15 @@ TEST_F(TestColumn, BasicAPI) { TEST_F(TestColumn, ChunksInhomogeneous) { ArrayVector arrays; - arrays.push_back(MakeArray(100)); - arrays.push_back(MakeArray(100, 10)); + arrays.push_back(MakePrimitive(100)); + arrays.push_back(MakePrimitive(100, 10)); auto field = std::make_shared("c0", INT32); column_.reset(new Column(field, arrays)); ASSERT_OK(column_->ValidateData()); - arrays.push_back(MakeArray(100, 10)); + arrays.push_back(MakePrimitive(100, 10)); column_.reset(new Column(field, arrays)); ASSERT_RAISES(Invalid, column_->ValidateData()); } diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc index 82750cf4d43..d68b491fb99 100644 --- a/cpp/src/arrow/table/column.cc +++ b/cpp/src/arrow/table/column.cc @@ -39,6 +39,12 @@ Column::Column(const std::shared_ptr& field, const ArrayVector& chunks) : data_ = std::make_shared(chunks); } +Column::Column(const std::shared_ptr& field, + const std::shared_ptr& data) : + field_(field) { + data_ = std::make_shared(ArrayVector({data})); +} + Column::Column(const std::shared_ptr& field, const std::shared_ptr& data) : field_(field), diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h index 9e9064e8654..64423bf9561 100644 --- a/cpp/src/arrow/table/column.h +++ b/cpp/src/arrow/table/column.h @@ -67,6 +67,8 @@ class Column { Column(const std::shared_ptr& field, const std::shared_ptr& data); + Column(const std::shared_ptr& field, const std::shared_ptr& data); + int64_t length() const { return data_->length(); } diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/table/schema-test.cc similarity index 99% rename from cpp/src/arrow/schema-test.cc rename to cpp/src/arrow/table/schema-test.cc index 7c190d068c2..0cf1b3c5f9a 100644 --- a/cpp/src/arrow/schema-test.cc +++ b/cpp/src/arrow/table/schema-test.cc @@ -21,7 +21,7 @@ #include #include "arrow/field.h" -#include "arrow/schema.h" +#include "arrow/table/schema.h" #include "arrow/type.h" #include "arrow/types/string.h" diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/table/schema.cc similarity index 98% rename from cpp/src/arrow/schema.cc rename to cpp/src/arrow/table/schema.cc index a735fd3d230..fb3b4d6f292 100644 --- a/cpp/src/arrow/schema.cc +++ b/cpp/src/arrow/table/schema.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/schema.h" +#include "arrow/table/schema.h" #include #include diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/table/schema.h similarity index 100% rename from cpp/src/arrow/schema.h rename to cpp/src/arrow/table/schema.h diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc new file mode 100644 index 00000000000..dd4f74cd16f --- /dev/null +++ b/cpp/src/arrow/table/table-test.cc @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include "arrow/field.h" +#include "arrow/table/column.h" +#include "arrow/table/schema.h" +#include "arrow/table/table.h" +#include "arrow/table/test-common.h" +#include "arrow/test-util.h" +#include "arrow/type.h" +#include "arrow/types/integer.h" + +using std::shared_ptr; +using std::vector; + +namespace arrow { + +class TestTable : public TestBase { + public: + void MakeExample1(int length) { + auto f0 = std::make_shared("f0", INT32); + auto f1 = std::make_shared("f1", UINT8); + auto f2 = std::make_shared("f2", INT16); + + vector > fields = {f0, f1, f2}; + schema_ = std::make_shared(fields); + + columns_ = { + std::make_shared(schema_->field(0), MakePrimitive(length)), + std::make_shared(schema_->field(1), MakePrimitive(length)), + std::make_shared(schema_->field(2), MakePrimitive(length)) + }; + } + + protected: + std::unique_ptr table_; + shared_ptr schema_; + vector > columns_; +}; + +TEST_F(TestTable, EmptySchema) { + auto empty_schema = shared_ptr(new Schema({})); + table_.reset(new Table("data", empty_schema, columns_)); + ASSERT_OK(table_->ValidateColumns()); + ASSERT_EQ(0, table_->num_rows()); + ASSERT_EQ(0, table_->num_columns()); +} + +TEST_F(TestTable, Ctors) { + int length = 100; + MakeExample1(length); + + std::string name = "data"; + + table_.reset(new Table(name, schema_, columns_)); + ASSERT_OK(table_->ValidateColumns()); + ASSERT_EQ(name, table_->name()); + ASSERT_EQ(length, table_->num_rows()); + ASSERT_EQ(3, table_->num_columns()); + + table_.reset(new Table(name, schema_, columns_, length)); + ASSERT_OK(table_->ValidateColumns()); + ASSERT_EQ(name, table_->name()); + ASSERT_EQ(length, table_->num_rows()); +} + +TEST_F(TestTable, Metadata) { + int length = 100; + MakeExample1(length); + + std::string name = "data"; + table_.reset(new Table(name, schema_, columns_)); + + ASSERT_TRUE(table_->schema()->Equals(schema_)); + + auto col = table_->column(0); + ASSERT_EQ(schema_->field(0)->name, col->name()); + ASSERT_EQ(schema_->field(0)->type, col->type()); +} + +TEST_F(TestTable, InvalidColumns) { + // Check that columns are all the same length + int length = 100; + MakeExample1(length); + + table_.reset(new Table("data", schema_, columns_, length - 1)); + ASSERT_RAISES(Invalid, table_->ValidateColumns()); + + columns_.clear(); + + // Wrong number of columns + table_.reset(new Table("data", schema_, columns_, length)); + ASSERT_RAISES(Invalid, table_->ValidateColumns()); + + columns_ = { + std::make_shared(schema_->field(0), MakePrimitive(length)), + std::make_shared(schema_->field(1), MakePrimitive(length)), + std::make_shared(schema_->field(2), MakePrimitive(length - 1)) + }; + + table_.reset(new Table("data", schema_, columns_, length)); + ASSERT_RAISES(Invalid, table_->ValidateColumns()); +} + +} // namespace arrow diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc new file mode 100644 index 00000000000..4cefc924ed3 --- /dev/null +++ b/cpp/src/arrow/table/table.cc @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/table/table.h" + +#include +#include + +#include "arrow/field.h" +#include "arrow/table/column.h" +#include "arrow/table/schema.h" +#include "arrow/util/status.h" + +namespace arrow { + +Table::Table(const std::string& name, const std::shared_ptr& schema, + const std::vector >& columns) : + name_(name), + schema_(schema), + columns_(columns) { + if (columns.size() == 0) { + num_rows_ = 0; + } else { + num_rows_ = columns[0]->length(); + } +} + +Table::Table(const std::string& name, const std::shared_ptr& schema, + const std::vector >& columns, int64_t num_rows) : + name_(name), + schema_(schema), + columns_(columns), + num_rows_(num_rows) {} + +Status Table::ValidateColumns() const { + if (num_columns() != schema_->num_fields()) { + return Status::Invalid("Number of columns did not match schema"); + } + + if (columns_.size() == 0) { + return Status::OK(); + } + + // Make sure columns are all the same length + for (size_t i = 0; i < columns_.size(); ++i) { + const Column* col = columns_[i].get(); + if (col->length() != num_rows_) { + std::stringstream ss; + ss << "Column " << i << " expected length " + << num_rows_ + << " but got length " + << col->length(); + return Status::Invalid(ss.str()); + } + } + return Status::OK(); +} + +} // namespace arrow diff --git a/cpp/src/arrow/table/table.h b/cpp/src/arrow/table/table.h new file mode 100644 index 00000000000..b0129387b71 --- /dev/null +++ b/cpp/src/arrow/table/table.h @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_TABLE_TABLE_H +#define ARROW_TABLE_TABLE_H + +#include +#include +#include + +namespace arrow { + +class Column; +class Schema; +class Status; + +// Immutable container of fixed-length columns conforming to a particular schema +class Table { + public: + // If columns is zero-length, the table's number of rows is zero + Table(const std::string& name, const std::shared_ptr& schema, + const std::vector >& columns); + + Table(const std::string& name, const std::shared_ptr& schema, + const std::vector >& columns, int64_t num_rows); + + // @returns: the table's name, if any (may be length 0) + const std::string& name() const { + return name_; + } + + // @returns: the table's schema + const std::shared_ptr& schema() const { + return schema_; + } + + // Note: Does not boundscheck + // @returns: the i-th column + const std::shared_ptr& column(int i) const { + return columns_[i]; + } + + // @returns: the number of columns in the table + int num_columns() const { + return columns_.size(); + } + + // @returns: the number of rows (the corresponding length of each column) + int64_t num_rows() const { + return num_rows_; + } + + // After construction, perform any checks to validate the input arguments + Status ValidateColumns() const; + + private: + // The table's name, optional + std::string name_; + + std::shared_ptr schema_; + std::vector > columns_; + + int64_t num_rows_; +}; + +} // namespace arrow + +#endif // ARROW_TABLE_TABLE_H diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h new file mode 100644 index 00000000000..efe2f228cd0 --- /dev/null +++ b/cpp/src/arrow/table/test-common.h @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include "arrow/field.h" +#include "arrow/table/column.h" +#include "arrow/table/schema.h" +#include "arrow/table/table.h" +#include "arrow/test-util.h" +#include "arrow/type.h" +#include "arrow/util/bit-util.h" +#include "arrow/util/buffer.h" +#include "arrow/util/memory-pool.h" + +namespace arrow { + +class TestBase : public ::testing::Test { + public: + void SetUp() { + pool_ = GetDefaultMemoryPool(); + } + + template + std::shared_ptr MakePrimitive(int32_t length, int32_t null_count = 0) { + auto data = std::make_shared(pool_); + auto nulls = std::make_shared(pool_); + EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type))); + EXPECT_OK(nulls->Resize(util::bytes_for_bits(length))); + return std::make_shared(length, data, 10, nulls); + } + + protected: + MemoryPool* pool_; +}; + +} // namespace arrow