apache · betodealmeida · Jul 28, 2020 · Jul 28, 2020 · Jul 28, 2020 · Jul 28, 2020
diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
@@ -22,7 +22,7 @@
 from contextlib import closing
 from datetime import datetime
 from distutils.version import StrictVersion
-from typing import Any, cast, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import Any, cast, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
 from urllib import parse
 
 import pandas as pd
@@ -40,6 +40,7 @@
 from superset.exceptions import SupersetTemplateException
 from superset.models.sql_lab import Query
 from superset.models.sql_types.presto_sql_types import type_map as presto_type_map
+from superset.result_set import destringify
 from superset.sql_parse import ParsedQuery
 from superset.utils import core as utils
 
@@ -568,7 +569,7 @@ def get_all_datasource_names(
  return datasource_names
 
  @classmethod
- def expand_data( # pylint: disable=too-many-locals
+ def expand_data( # pylint: disable=too-many-locals,too-many-branches
  cls, columns: List[Dict[Any, Any]], data: List[Dict[Any, Any]]
  ) -> Tuple[List[Dict[Any, Any]], List[Dict[Any, Any]], List[Dict[Any, Any]]]:
  """
@@ -616,6 +617,7 @@ def expand_data( # pylint: disable=too-many-locals
  current_array_level = level
 
  name = column["name"]
+ values: Optional[Union[str, List[Any]]]
 
  if column["type"].startswith("ARRAY("):
  # keep processing array children; we append to the right so that
@@ -627,6 +629,8 @@ def expand_data( # pylint: disable=too-many-locals
  while i < len(data):
  row = data[i]
  values = row.get(name)
+ if isinstance(values, str):
+ row[name] = values = destringify(values)
  if values:
  # how many extra rows we need to unnest the data?
  extra_rows = len(values) - 1
@@ -653,12 +657,15 @@ def expand_data( # pylint: disable=too-many-locals
  # expand columns; we append them to the left so they are added
  # immediately after the parent
  expanded = get_children(column)
- to_process.extendleft((column, level) for column in expanded)
+ to_process.extendleft((column, level) for column in expanded[::-1])
  expanded_columns.extend(expanded)
 
  # expand row objects into new columns
  for row in data:
- for value, col in zip(row.get(name) or [], expanded):
+ values = row.get(name) or []
+ if isinstance(values, str):
+ row[name] = values = cast(List[Any], destringify(values))
+ for value, col in zip(values, expanded):
  row[col["name"]] = value
 
  data = [

diff --git a/superset/result_set.py b/superset/result_set.py
@@ -67,6 +67,10 @@ def stringify_values(array: np.ndarray) -> np.ndarray:
  return vstringify(array)
 
 
+def destringify(obj: str) -> Any:
+ return json.loads(obj)
+
+
 class SupersetResultSet:
  def __init__( # pylint: disable=too-many-locals,too-many-branches
  self,

diff --git a/tests/db_engine_specs/presto_tests.py b/tests/db_engine_specs/presto_tests.py
@@ -214,9 +214,9 @@ def test_presto_expand_data_with_complex_row_columns(self):
  "name": "row_column",
  "type": "ROW(NESTED_OBJ1 VARCHAR, NESTED_ROW ROW(NESTED_OBJ2 VARCHAR))",
  },
+ {"name": "row_column.nested_obj1", "type": "VARCHAR"},
  {"name": "row_column.nested_row", "type": "ROW(NESTED_OBJ2 VARCHAR)"},
  {"name": "row_column.nested_row.nested_obj2", "type": "VARCHAR"},
- {"name": "row_column.nested_obj1", "type": "VARCHAR"},
  ]
  expected_data = [
  {
@@ -433,3 +433,60 @@ def test_query_cost_formatter(self):
  }
  ]
  self.assertEqual(formatted_cost, expected)
+
+ @mock.patch.dict(
+ "superset.extensions.feature_flag_manager._feature_flags",
+ {"PRESTO_EXPAND_DATA": True},
+ clear=True,
+ )
+ def test_presto_expand_data_array(self):
+ cols = [
+ {"name": "event_id", "type": "VARCHAR", "is_date": False},
+ {"name": "timestamp", "type": "BIGINT", "is_date": False},
+ {
+ "name": "user",
+ "type": "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME VARCHAR)",
+ "is_date": False,
+ },
+ ]
+ data = [
+ {
+ "event_id": "abcdef01-2345-6789-abcd-ef0123456789",
+ "timestamp": "1595895506219",
+ "user": '[1, "JOHN", "DOE"]',
+ }
+ ]
+ actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data(
+ cols, data
+ )
+ expected_cols = [
+ {"name": "event_id", "type": "VARCHAR", "is_date": False},
+ {"name": "timestamp", "type": "BIGINT", "is_date": False},
+ {
+ "name": "user",
+ "type": "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME VARCHAR)",
+ "is_date": False,
+ },
+ {"name": "user.id", "type": "BIGINT"},
+ {"name": "user.first_name", "type": "VARCHAR"},
+ {"name": "user.last_name", "type": "VARCHAR"},
+ ]
+ expected_data = [
+ {
+ "event_id": "abcdef01-2345-6789-abcd-ef0123456789",
+ "timestamp": "1595895506219",
+ "user": [1, "JOHN", "DOE"],
+ "user.id": 1,
+ "user.first_name": "JOHN",
+ "user.last_name": "DOE",
+ }
+ ]
+ expected_expanded_cols = [
+ {"name": "user.id", "type": "BIGINT"},
+ {"name": "user.first_name", "type": "VARCHAR"},
+ {"name": "user.last_name", "type": "VARCHAR"},
+ ]
+
+ self.assertEqual(actual_cols, expected_cols)
+ self.assertEqual(actual_data, expected_data)
+ self.assertEqual(actual_expanded_cols, expected_expanded_cols)