Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions python/pyarrow/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,72 @@ void InitPandasStaticData() {
}
#endif

// Support conversion path for UUID objects
namespace {

// This needs a conditional, because using std::once_flag could introduce
// a deadlock when the GIL is enabled. See
// https://github.com/apache/arrow/commit/f69061935e92e36e25bb891177ca8bc4f463b272 for
// more info.
#ifdef Py_GIL_DISABLED
static std::once_flag uuid_static_initialized;
#else
static bool uuid_static_initialized = false;
#endif

// Once initialized, these variables hold borrowed references to UUID static data.
// We should not use OwnedRef here because Python destructors would be
// called on a finalized interpreter.
static PyObject* uuid_UUID = nullptr;

void GetUUIDStaticSymbols() {
OwnedRef uuid;

// Import uuid
Status s = ImportModule("uuid", &uuid);
if (!s.ok()) {
return;
}

#ifndef Py_GIL_DISABLED
// Since ImportModule can release the GIL, another thread could have
// already initialized the static data.
if (uuid_static_initialized) {
return;
}
#endif

OwnedRef ref;

// Retain reference to uuid.UUID
if (ImportFromModule(uuid.obj(), "UUID", &ref).ok()) {
uuid_UUID = ref.obj();
}

}

} // namespace

#ifdef Py_GIL_DISABLED
void InitUUIDStaticData() {
std::call_once(uuid_static_initialized, GetUUIDStaticSymbols);
}
#else
void InitUUIDStaticData() {
// NOTE: This is called with the GIL held. We needn't (and shouldn't,
// to avoid deadlocks) use an additional C++ lock (ARROW-10519).
if (uuid_static_initialized) {
return;
}
GetUUIDStaticSymbols();
uuid_static_initialized = true;
}
#endif

bool IsPyUUID(PyObject* obj) {
return PyObject_IsInstance(obj, uuid_UUID);
}

bool PandasObjectIsNull(PyObject* obj) {
if (!MayHaveNaN(obj)) {
return false;
Expand Down
5 changes: 5 additions & 0 deletions python/pyarrow/src/arrow/python/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ inline bool IsPyBinary(PyObject* obj) {
return PyBytes_Check(obj) || PyByteArray_Check(obj) || PyMemoryView_Check(obj);
}

void InitUUIDStaticData();

// \brief Check that obj is a uuid.UUID instance
bool IsPyUUID(PyObject* obj);

// \brief Convert a Python integer into a C integer
// \param[in] obj A Python integer
// \param[out] out A pointer to a C integer to hold the result of the conversion
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/src/arrow/python/inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ class TypeInferrer {
arrow_scalar_count_(0),
numpy_dtype_count_(0),
interval_count_(0),
uuid_count_(0),
max_decimal_metadata_(std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::min()),
decimal_type_() {
Expand Down Expand Up @@ -412,6 +413,8 @@ class TypeInferrer {
++decimal_count_;
} else if (PyObject_IsInstance(obj, interval_types_.obj())) {
++interval_count_;
} else if (internal::IsPyUUID(obj)) {
++uuid_count_;
} else {
return internal::InvalidValue(obj,
"did not recognize Python value type when inferring "
Expand Down Expand Up @@ -541,6 +544,9 @@ class TypeInferrer {
*out = utf8();
} else if (interval_count_) {
*out = month_day_nano_interval();
} else if (uuid_count_) {
// WIP: not binary, how do we set to UUID canonical extension type?
*out = extension::uuid();
} else if (arrow_scalar_count_) {
*out = scalar_type_;
} else {
Expand Down Expand Up @@ -698,6 +704,7 @@ class TypeInferrer {
int64_t arrow_scalar_count_;
int64_t numpy_dtype_count_;
int64_t interval_count_;
int64_t uuid_count_;
std::unique_ptr<TypeInferrer> list_inferrer_;
std::map<std::string, TypeInferrer> struct_inferrers_;
std::shared_ptr<DataType> scalar_type_;
Expand All @@ -721,6 +728,9 @@ Result<std::shared_ptr<DataType>> InferArrowType(PyObject* obj, PyObject* mask,
internal::InitPandasStaticData();
}

// Support conversion path for uuid.UUID objects
internal::InitUUIDStaticData();

std::shared_ptr<DataType> out_type;
TypeInferrer inferrer(pandas_null_sentinels);
RETURN_NOT_OK(inferrer.VisitSequence(obj, mask));
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/src/arrow/python/inference.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <memory>

#include "arrow/extension/uuid.h"
#include "arrow/python/visibility.h"
#include "arrow/type.h"
#include "arrow/util/macros.h"
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1242,6 +1242,13 @@ Result<std::shared_ptr<ChunkedArray>> ConvertPySequence(PyObject* obj, PyObject*
internal::InitPandasStaticData();
}

ARROW_ASSIGN_OR_RAISE(auto is_uuid_imported, internal::IsModuleImported("uuid"));
if (is_uuid_imported) {
// If uuid has been already imported initialize the static uuid objects to
// support converting uuid.UUID objects
internal::InitUUIDStaticData();
}

int64_t size = options.size;
RETURN_NOT_OK(ConvertToSequenceAndInferSize(obj, &seq, &size));
tmp_seq_nanny.reset(seq);
Expand Down
5 changes: 5 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4202,3 +4202,8 @@ def test_non_cpu_array():
arr.tolist()
with pytest.raises(NotImplementedError):
arr.validate(full=True)

def test_array_from_uuid():
import uuid
arr = pa.array([uuid.uuid4() for i in range(10)])
assert len(arr) == 10