Skip to content

Commit fa0440f

Browse files
update R to changes from ARROW-3144 #4316
1 parent aa18d25 commit fa0440f

File tree

8 files changed

+59
-65
lines changed

8 files changed

+59
-65
lines changed

r/R/RcppExports.R

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/R/dictionary.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,25 @@
3434

3535
active = list(
3636
index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)),
37-
dictionary = function() shared_ptr(`arrow::Array`, DictionaryType__dictionary(self)),
37+
value_type = function() `arrow::DataType`$dispatch(DictionaryType__value_type(self)),
3838
name = function() DictionaryType__name(self),
3939
ordered = function() DictionaryType__ordered(self)
4040
)
4141
)
4242

4343
#' dictionary type factory
4444
#'
45-
#' @param type indices type, e.g. [int32()]
46-
#' @param values values array, typically an arrow array of strings
47-
#' @param ordered Is this an ordered dictionary
45+
#' @param index_type index type, e.g. [int32()]
46+
#' @param value_type value type, probably [utf8()]
47+
#' @param ordered Is this an ordered dictionary ?
4848
#'
4949
#' @return a [arrow::DictionaryType][arrow__DictionaryType]
5050
#'
5151
#' @export
52-
dictionary <- function(type, values, ordered = FALSE) {
52+
dictionary <- function(index_type, value_type, ordered = FALSE) {
5353
assert_that(
54-
inherits(type, "arrow::DataType"),
55-
inherits(values, "arrow::Array")
54+
inherits(index_type, "arrow::DataType"),
55+
inherits(index_type, "arrow::DataType")
5656
)
57-
shared_ptr(`arrow::DictionaryType`, DictionaryType__initialize(type, values, ordered))
57+
shared_ptr(`arrow::DictionaryType`, DictionaryType__initialize(index_type, value_type, ordered))
5858
}

r/src/RcppExports.cpp

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/array_from_vector.cpp

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,11 @@ std::shared_ptr<Array> MakeFactorArrayImpl(Rcpp::IntegerVector_ factor,
141141
ArrayData::Make(std::make_shared<Type>(), n, std::move(buffers), null_count, 0);
142142
auto array_indices = MakeArray(array_indices_data);
143143

144+
SEXP levels = Rf_getAttrib(factor, R_LevelsSymbol);
145+
auto dict = MakeStringArray(levels);
146+
144147
std::shared_ptr<Array> out;
145-
STOP_IF_NOT_OK(DictionaryArray::FromArrays(type, array_indices, &out));
148+
STOP_IF_NOT_OK(DictionaryArray::FromArrays(type, array_indices, dict, &out));
146149
return out;
147150
}
148151

@@ -741,22 +744,20 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
741744
}
742745

743746
template <typename Type>
744-
std::shared_ptr<arrow::DataType> GetFactorTypeImpl(Rcpp::IntegerVector_ factor) {
745-
auto dict_values = MakeStringArray(Rf_getAttrib(factor, R_LevelsSymbol));
746-
auto dict_type =
747-
dictionary(std::make_shared<Type>(), dict_values, Rf_inherits(factor, "ordered"));
748-
return dict_type;
747+
std::shared_ptr<arrow::DataType> GetFactorTypeImpl(bool ordered) {
748+
return dictionary(std::make_shared<Type>(), arrow::utf8(), ordered);
749749
}
750750

751751
std::shared_ptr<arrow::DataType> GetFactorType(SEXP factor) {
752752
SEXP levels = Rf_getAttrib(factor, R_LevelsSymbol);
753+
bool is_ordered = Rf_inherits(factor, "ordered");
753754
int n = Rf_length(levels);
754755
if (n < 128) {
755-
return GetFactorTypeImpl<arrow::Int8Type>(factor);
756+
return GetFactorTypeImpl<arrow::Int8Type>(is_ordered);
756757
} else if (n < 32768) {
757-
return GetFactorTypeImpl<arrow::Int16Type>(factor);
758+
return GetFactorTypeImpl<arrow::Int16Type>(is_ordered);
758759
} else {
759-
return GetFactorTypeImpl<arrow::Int32Type>(factor);
760+
return GetFactorTypeImpl<arrow::Int32Type>(is_ordered);
760761
}
761762
}
762763

@@ -909,21 +910,7 @@ bool CheckCompatibleFactor(SEXP obj, const std::shared_ptr<arrow::DataType>& typ
909910

910911
arrow::DictionaryType* dict_type =
911912
arrow::checked_cast<arrow::DictionaryType*>(type.get());
912-
auto dictionary = dict_type->dictionary();
913-
if (dictionary->type() != utf8()) return false;
914-
915-
// then compare levels
916-
auto typed_dict = checked_cast<arrow::StringArray*>(dictionary.get());
917-
SEXP levels = Rf_getAttrib(obj, R_LevelsSymbol);
918-
919-
R_xlen_t n = XLENGTH(levels);
920-
if (n != typed_dict->length()) return false;
921-
922-
for (R_xlen_t i = 0; i < n; i++) {
923-
if (typed_dict->GetString(i) != CHAR(STRING_ELT(levels, i))) return false;
924-
}
925-
926-
return true;
913+
return dict_type->value_type() == utf8();
927914
}
928915

929916
std::shared_ptr<arrow::Array> Array__from_vector(

r/src/datatype.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,9 @@ arrow::TimeUnit::type TimestampType__unit(
250250

251251
// [[Rcpp::export]]
252252
std::shared_ptr<arrow::DataType> DictionaryType__initialize(
253-
const std::shared_ptr<arrow::DataType>& type,
254-
const std::shared_ptr<arrow::Array>& array, bool ordered) {
255-
return arrow::dictionary(type, array, ordered);
253+
const std::shared_ptr<arrow::DataType>& index_type,
254+
const std::shared_ptr<arrow::DataType>& value_type, bool ordered) {
255+
return arrow::dictionary(index_type, value_type, ordered);
256256
}
257257

258258
// [[Rcpp::export]]
@@ -262,14 +262,14 @@ std::shared_ptr<arrow::DataType> DictionaryType__index_type(
262262
}
263263

264264
// [[Rcpp::export]]
265-
std::string DictionaryType__name(const std::shared_ptr<arrow::DictionaryType>& type) {
266-
return type->name();
265+
std::shared_ptr<arrow::DataType> DictionaryType__value_type(
266+
const std::shared_ptr<arrow::DictionaryType>& type) {
267+
return type->value_type();
267268
}
268269

269270
// [[Rcpp::export]]
270-
std::shared_ptr<arrow::Array> DictionaryType__dictionary(
271-
const std::shared_ptr<arrow::DictionaryType>& type) {
272-
return type->dictionary();
271+
std::string DictionaryType__name(const std::shared_ptr<arrow::DictionaryType>& type) {
272+
return type->name();
273273
}
274274

275275
// [[Rcpp::export]]

r/src/message.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,20 @@ std::shared_ptr<arrow::RecordBatch> ipc___ReadRecordBatch__Message__Schema(
5656
const std::unique_ptr<arrow::ipc::Message>& message,
5757
const std::shared_ptr<arrow::Schema>& schema) {
5858
std::shared_ptr<arrow::RecordBatch> batch;
59-
STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(*message, schema, &batch));
59+
60+
// TODO: perhaps this should come from the R side
61+
arrow::ipc::DictionaryMemo memo;
62+
STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(*message, schema, &memo, &batch));
6063
return batch;
6164
}
6265

6366
// [[Rcpp::export]]
6467
std::shared_ptr<arrow::Schema> ipc___ReadSchema_InputStream(
6568
const std::shared_ptr<arrow::io::InputStream>& stream) {
6669
std::shared_ptr<arrow::Schema> schema;
67-
STOP_IF_NOT_OK(arrow::ipc::ReadSchema(stream.get(), &schema));
70+
// TODO: promote to function argument
71+
arrow::ipc::DictionaryMemo memo;
72+
STOP_IF_NOT_OK(arrow::ipc::ReadSchema(stream.get(), &memo, &schema));
6873
return schema;
6974
}
7075

r/src/recordbatch.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ std::shared_ptr<arrow::RecordBatch> ipc___ReadRecordBatch__InputStream__Schema(
140140
const std::shared_ptr<arrow::io::InputStream>& stream,
141141
const std::shared_ptr<arrow::Schema>& schema) {
142142
std::shared_ptr<arrow::RecordBatch> batch;
143-
STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(schema, stream.get(), &batch));
143+
// TODO: promote to function arg
144+
arrow::ipc::DictionaryMemo memo;
145+
STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(schema, &memo, stream.get(), &batch));
144146
return batch;
145147
}

r/tests/testthat/test-DataType.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,13 +314,13 @@ test_that("struct type works as expected", {
314314
})
315315

316316
test_that("DictionaryType works as expected (ARROW-3355)", {
317-
d <- dictionary(int32(), array(c("foo", "bar", "baz")))
317+
d <- dictionary(int32(), utf8())
318318
expect_equal(d, d)
319319
expect_true(d == d)
320320
expect_false(d == int32())
321321
expect_equal(d$id, Type$DICTIONARY)
322322
expect_equal(d$bit_width, 32L)
323323
expect_equal(d$ToString(), "dictionary<values=string, indices=int32, ordered=0>")
324324
expect_equal(d$index_type, int32())
325-
expect_equal(d$dictionary, array(c("foo", "bar", "baz")))
325+
expect_equal(d$value_type, utf8())
326326
})

0 commit comments

Comments
 (0)