Skip to content

Commit

Permalink
PERF-modin-project#6378: use numpy.array operations in internals of i…
Browse files Browse the repository at this point in the history
…loc/loc operation

Co-authored-by: Anatoly Myachev <anatoly.myachev@intel.com>
Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
  • Loading branch information
vnlitvinov and anmyachev committed Jul 17, 2023
1 parent d6a6cf3 commit 691fffd
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 31 deletions.
38 changes: 15 additions & 23 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1051,18 +1051,14 @@ def _take_2d_positional(
return self._maybe_reorder_labels(
intermediate,
row_positions,
sorted_row_positions,
col_positions,
sorted_col_positions,
)

def _maybe_reorder_labels(
self,
intermediate: "PandasDataframe",
row_positions,
sorted_row_positions,
col_positions,
sorted_col_positions,
) -> "PandasDataframe":
"""
Call re-order labels on take_2d_labels_or_positional result if necessary.
Expand All @@ -1072,17 +1068,18 @@ def _maybe_reorder_labels(
intermediate : PandasDataFrame
row_positions : list-like of ints, optional
The row positions to extract.
sorted_row_positions : list-like of ints, optional
Sorted version of row_positions.
col_positions : list-like of ints, optional
The column positions to extract.
sorted_col_positions : list-like of ints, optional
Sorted version of col_positions.
Returns
-------
PandasDataframe
"""
# ensure that `row_positions` and `col_positions` are either None or np.array for efficiency
if row_positions is not None:
row_positions = np.asarray(row_positions, dtype=np.intp)
if col_positions is not None:
col_positions = np.asarray(col_positions, dtype=np.intp)
# Check if monotonically increasing, return if it is. Fast track code path for
# common case to keep it fast.
if (
Expand All @@ -1103,23 +1100,18 @@ def _maybe_reorder_labels(
# The new labels are often smaller than the old labels, so we can't reuse the
# original order values because those were mapped to the original data. We have
# to reorder here based on the expected order from within the data.
# We create a dictionary mapping the position of the numeric index with respect
# to all others, then recreate that order by mapping the new order values from
# the old. This information is sent to `_reorder_labels`.
# To do so, we "unsort" the indices by using np.argsort() twice, as inspired by
# https://stackoverflow.com/questions/2483696/undo-or-reverse-argsort-python,
# meaning that `new_row_order` must be so `np.sort(row_positions)[new_row_order] == row_positions`
# This is achieved by first calculating the indices which would sort `row_positions`,
# and then by calculating new indices that would sort "sorting indices" themselves.
# First argsort brings us to the proper "index space" (according to smaller labels count),
# and the second re-orders them to match the original data.
new_row_order, new_col_order = None, None
if row_positions is not None:
row_order_mapping = dict(
zip(sorted_row_positions, range(len(row_positions)))
)
new_row_order = [row_order_mapping[idx] for idx in row_positions]
else:
new_row_order = None
new_row_order = np.argsort(np.argsort(row_positions))
if col_positions is not None:
col_order_mapping = dict(
zip(sorted_col_positions, range(len(col_positions)))
)
new_col_order = [col_order_mapping[idx] for idx in col_positions]
else:
new_col_order = None
new_col_order = np.argsort(np.argsort(col_positions))
return intermediate._reorder_labels(
row_positions=new_row_order, col_positions=new_col_order
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,16 +272,8 @@ def take_2d_labels_or_positional(
new_dtypes,
)

sorted_row_positions = sorted_col_positions = None
if row_positions is not None:
sorted_row_positions = sorted(row_positions)
if col_positions is not None:
sorted_col_positions = sorted(col_positions)

return self._maybe_reorder_labels(
intermediate,
row_positions,
sorted_row_positions,
col_positions,
sorted_col_positions,
)

0 comments on commit 691fffd

Please sign in to comment.