Skip to content

Commit

Permalink
Fix/2089 support sets for pyarrow backend (#2090)
Browse files Browse the repository at this point in the history
* support sets for pyarrow
  • Loading branch information
karakanb authored Nov 27, 2024
1 parent aa80667 commit 6f146d1
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dlt/common/libs/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,15 +620,15 @@ def row_tuples_to_arrow(
)
float_array = pa.array(columnar_known_types[field.name], type=pa.float64())
columnar_known_types[field.name] = float_array.cast(field.type, safe=False)
if issubclass(py_type, (dict, list)):
if issubclass(py_type, (dict, list, set)):
logger.warning(
f"Field {field.name} was reflected as JSON type and needs to be serialized back to"
" string to be placed in arrow table. This will slow data extraction down. You"
" should cast JSON field to STRING in your database system ie. by creating and"
" extracting an SQL VIEW that selects with cast."
)
json_str_array = pa.array(
[None if s is None else json.dumps(s) for s in columnar_known_types[field.name]]
[None if s is None else json.dumps(s) if not issubclass(type(s), set) else json.dumps(list(s)) for s in columnar_known_types[field.name]]
)
columnar_known_types[field.name] = json_str_array

Expand Down

0 comments on commit 6f146d1

Please sign in to comment.