fixup! Table.from_table: Obey is_sparse when returning subarrays

biolab · Nov 10, 2017 · 4410596 · 4410596
1 parent 20ae6d6
commit 4410596
Showing 1 changed file with 39 additions and 50 deletions.
diff --git a/Orange/data/table.py b/Orange/data/table.py
@@ -280,44 +280,32 @@ def from_table(cls, domain, source, row_indices=...):
 
         global _conversion_cache
 
-        def array_transform(x, to_sparse):
-            """ Assure that array x is sparse (when to_sparse=True) or dense (when to_sparse=False).
-
-            Args:
-                x (np.ndarray, scipy.sparse): either sparse or dense two-dimensional data
-
-            Returns:
-                array of correct density, as indicated by to_sparse flag.
-            """
-            if to_sparse == sp.issparse(x):
-                return x
-            if to_sparse:
+        def assure_array_dense(a):
+            if sp.issparse(a):
+                a = a.toarray()
+            return a
+
+        def assure_array_sparse(a):
+            if not sp.issparse(a):
                 # since x can be a list, cast to np.array
                 # since x can come from metas with string, cast to float
-                x = np.asarray(x).astype(np.float)
-                return sp.csc_matrix(x)
-            return x.toarray()
-
-        def column_transform(x, to_sparse):
-            """ Assure that column x is sparse (when to_sparse=True) or dense (when to_sparse=False).
-
-            Args:
-                x (np.ndarray, scipy.sparse): either sparse or dense one-dimensional data
-
-            Returns:
-                array of correct density, as indicated by to_sparse flag.
-            """
-            r = array_transform(x, to_sparse)
-            if sp.issparse(r):
-                # if x of shape (n, ) is passed to csc_matrix constructor,
-                # the resulting matrix is of shape (1, n) and hence we
-                # need to transpose it to make it a column
-                if r.shape[0] == 1:
-                    r = r.T
-                return r
-            else:
-                # column assignments must be of shape (n,) and not (n, 1)
-                return np.ravel(r)
+                a = np.asarray(a).astype(np.float)
+                return sp.csc_matrix(a)
+            return a
+
+        def assure_column_sparse(a):
+            a = assure_array_sparse(a)
+            # if x of shape (n, ) is passed to csc_matrix constructor,
+            # the resulting matrix is of shape (1, n) and hence we
+            # need to transpose it to make it a column
+            if a.shape[0] == 1:
+                a = a.T
+            return a
+
+        def assure_column_dense(a):
+            a = assure_array_dense(a)
+            # column assignments must be of shape (n,) and not (n, 1)
+            return np.ravel(a)
 
         def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, is_sparse=False):
             if not len(src_cols):
@@ -326,30 +314,31 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, is_sparse=False
                 else:
                     return np.zeros((n_rows, 0), dtype=source.X.dtype)
 
-            nonlocal array_transform, column_transform
-            match_array = partial(array_transform, to_sparse=is_sparse)
-            match_column = partial(column_transform, to_sparse=is_sparse)
-
+            # match density for subarrays
+            match_density = assure_array_sparse if is_sparse else assure_array_dense
             n_src_attrs = len(source.domain.attributes)
             if all(isinstance(x, Integral) and 0 <= x < n_src_attrs
                    for x in src_cols):
-                return match_array(_subarray(source.X, row_indices, src_cols))
+                return match_density(_subarray(source.X, row_indices, src_cols))
             if all(isinstance(x, Integral) and x < 0 for x in src_cols):
-                arr = match_array(_subarray(source.metas, row_indices,
+                arr = match_density(_subarray(source.metas, row_indices,
                                             [-1 - x for x in src_cols]))
                 if arr.dtype != dtype:
                     return arr.astype(dtype)
                 return arr
             if all(isinstance(x, Integral) and x >= n_src_attrs
                    for x in src_cols):
-                return match_array(_subarray(
+                return match_density(_subarray(
                     source._Y, row_indices,
                     [x - n_src_attrs for x in src_cols]))
 
+            # initialize final array & set `match_density` for columns
             if is_sparse:
                 a = sp.dok_matrix((n_rows, len(src_cols)), dtype=dtype)
+                match_density = assure_column_sparse
             else:
                 a = np.empty((n_rows, len(src_cols)), dtype=dtype)
+                match_density = assure_column_dense
 
             shared_cache = _conversion_cache
             for i, col in enumerate(src_cols):
@@ -362,22 +351,22 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, is_sparse=False
                                 col.compute_shared(source)
                         shared = shared_cache[id(col.compute_shared), id(source)]
                         if row_indices is not ...:
-                            a[:, i] = match_column(
+                            a[:, i] = match_density(
                                 col(source, shared_data=shared)[row_indices])
                         else:
-                            a[:, i] = match_column(
+                            a[:, i] = match_density(
                                 col(source, shared_data=shared))
                     else:
                         if row_indices is not ...:
-                            a[:, i] = match_column(col(source)[row_indices])
+                            a[:, i] = match_density(col(source)[row_indices])
                         else:
-                            a[:, i] = match_column(col(source))
+                            a[:, i] = match_density(col(source))
                 elif col < 0:
-                    a[:, i] = match_column(source.metas[row_indices, -1 - col])
+                    a[:, i] = match_density(source.metas[row_indices, -1 - col])
                 elif col < n_src_attrs:
-                    a[:, i] = match_column(source.X[row_indices, col])
+                    a[:, i] = match_density(source.X[row_indices, col])
                 else:
-                    a[:, i] = match_column(
+                    a[:, i] = match_density(
                         source._Y[row_indices, col - n_src_attrs])
 
             if is_sparse: