Skip to content

Commit

Permalink
fixup! Table.from_table: Obey is_sparse when returning subarrays
Browse files Browse the repository at this point in the history
  • Loading branch information
nikicc committed Nov 10, 2017
1 parent 20ae6d6 commit 4410596
Showing 1 changed file with 39 additions and 50 deletions.
89 changes: 39 additions & 50 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,44 +280,32 @@ def from_table(cls, domain, source, row_indices=...):

global _conversion_cache

def array_transform(x, to_sparse):
""" Assure that array x is sparse (when to_sparse=True) or dense (when to_sparse=False).
Args:
x (np.ndarray, scipy.sparse): either sparse or dense two-dimensional data
Returns:
array of correct density, as indicated by to_sparse flag.
"""
if to_sparse == sp.issparse(x):
return x
if to_sparse:
def assure_array_dense(a):
if sp.issparse(a):
a = a.toarray()
return a

def assure_array_sparse(a):
if not sp.issparse(a):
# since x can be a list, cast to np.array
# since x can come from metas with string, cast to float
x = np.asarray(x).astype(np.float)
return sp.csc_matrix(x)
return x.toarray()

def column_transform(x, to_sparse):
""" Assure that column x is sparse (when to_sparse=True) or dense (when to_sparse=False).
Args:
x (np.ndarray, scipy.sparse): either sparse or dense one-dimensional data
Returns:
array of correct density, as indicated by to_sparse flag.
"""
r = array_transform(x, to_sparse)
if sp.issparse(r):
# if x of shape (n, ) is passed to csc_matrix constructor,
# the resulting matrix is of shape (1, n) and hence we
# need to transpose it to make it a column
if r.shape[0] == 1:
r = r.T
return r
else:
# column assignments must be of shape (n,) and not (n, 1)
return np.ravel(r)
a = np.asarray(a).astype(np.float)
return sp.csc_matrix(a)
return a

def assure_column_sparse(a):
a = assure_array_sparse(a)
# if x of shape (n, ) is passed to csc_matrix constructor,
# the resulting matrix is of shape (1, n) and hence we
# need to transpose it to make it a column
if a.shape[0] == 1:
a = a.T
return a

def assure_column_dense(a):
a = assure_array_dense(a)
# column assignments must be of shape (n,) and not (n, 1)
return np.ravel(a)

def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, is_sparse=False):
if not len(src_cols):
Expand All @@ -326,30 +314,31 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, is_sparse=False
else:
return np.zeros((n_rows, 0), dtype=source.X.dtype)

nonlocal array_transform, column_transform
match_array = partial(array_transform, to_sparse=is_sparse)
match_column = partial(column_transform, to_sparse=is_sparse)

# match density for subarrays
match_density = assure_array_sparse if is_sparse else assure_array_dense
n_src_attrs = len(source.domain.attributes)
if all(isinstance(x, Integral) and 0 <= x < n_src_attrs
for x in src_cols):
return match_array(_subarray(source.X, row_indices, src_cols))
return match_density(_subarray(source.X, row_indices, src_cols))
if all(isinstance(x, Integral) and x < 0 for x in src_cols):
arr = match_array(_subarray(source.metas, row_indices,
arr = match_density(_subarray(source.metas, row_indices,
[-1 - x for x in src_cols]))
if arr.dtype != dtype:
return arr.astype(dtype)
return arr
if all(isinstance(x, Integral) and x >= n_src_attrs
for x in src_cols):
return match_array(_subarray(
return match_density(_subarray(
source._Y, row_indices,
[x - n_src_attrs for x in src_cols]))

# initialize final array & set `match_density` for columns
if is_sparse:
a = sp.dok_matrix((n_rows, len(src_cols)), dtype=dtype)
match_density = assure_column_sparse
else:
a = np.empty((n_rows, len(src_cols)), dtype=dtype)
match_density = assure_column_dense

shared_cache = _conversion_cache
for i, col in enumerate(src_cols):
Expand All @@ -362,22 +351,22 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, is_sparse=False
col.compute_shared(source)
shared = shared_cache[id(col.compute_shared), id(source)]
if row_indices is not ...:
a[:, i] = match_column(
a[:, i] = match_density(
col(source, shared_data=shared)[row_indices])
else:
a[:, i] = match_column(
a[:, i] = match_density(
col(source, shared_data=shared))
else:
if row_indices is not ...:
a[:, i] = match_column(col(source)[row_indices])
a[:, i] = match_density(col(source)[row_indices])
else:
a[:, i] = match_column(col(source))
a[:, i] = match_density(col(source))
elif col < 0:
a[:, i] = match_column(source.metas[row_indices, -1 - col])
a[:, i] = match_density(source.metas[row_indices, -1 - col])
elif col < n_src_attrs:
a[:, i] = match_column(source.X[row_indices, col])
a[:, i] = match_density(source.X[row_indices, col])
else:
a[:, i] = match_column(
a[:, i] = match_density(
source._Y[row_indices, col - n_src_attrs])

if is_sparse:
Expand Down

0 comments on commit 4410596

Please sign in to comment.