-
Couldn't load subscription status.
- Fork 64
feat: Allow iloc to support lists of negative indices #1497
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
d6bc14f
d57ed9b
d839ce8
26f565f
ea61d07
fade680
fc9ca42
7f58504
c74c586
130fde3
1f0318a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
| import bigframes.core.guid as guid | ||
| import bigframes.core.indexes as indexes | ||
| import bigframes.core.scalar | ||
| import bigframes.core.window_spec as windows | ||
| import bigframes.dataframe | ||
| import bigframes.dtypes | ||
| import bigframes.exceptions as bfe | ||
|
|
@@ -477,6 +478,24 @@ def _iloc_getitem_series_or_dataframe( | |
| Union[bigframes.dataframe.DataFrame, bigframes.series.Series], | ||
| series_or_dataframe.iloc[0:0], | ||
| ) | ||
|
|
||
| # Check if both positive index and negative index are necessary | ||
| if isinstance(key, bigframes.series.Series): | ||
| # Avoid data download | ||
| is_key_unisigned = False | ||
| elif "shape" in series_or_dataframe._block.__dict__: | ||
| # If there is a cache, we convert all indices to positive. | ||
| row_count = series_or_dataframe._block.shape[0] | ||
| key = [k if k >= 0 else row_count + k for k in key] | ||
| is_key_unisigned = True | ||
|
||
| else: | ||
| first_sign = key[0] >= 0 | ||
| is_key_unisigned = True | ||
| for k in key: | ||
| if (k >= 0) != first_sign: | ||
| is_key_unisigned = False | ||
| break | ||
|
|
||
| if isinstance(series_or_dataframe, bigframes.series.Series): | ||
| original_series_name = series_or_dataframe.name | ||
| series_name = ( | ||
|
|
@@ -497,7 +516,23 @@ def _iloc_getitem_series_or_dataframe( | |
| block = df._block | ||
| # explicitly set index to offsets, reset_index may not generate offsets in some modes | ||
| block, offsets_id = block.promote_offsets("temp_iloc_offsets_") | ||
| block = block.set_index([offsets_id]) | ||
| pos_block = block.set_index([offsets_id]) | ||
|
|
||
| if not is_key_unisigned or key[0] < 0: | ||
| neg_block, _ = block.apply_window_op( | ||
| offsets_id, | ||
| ops.aggregations.ReverseRowNumberOp(), | ||
|
||
| window_spec=windows.rows(), | ||
| result_label="Reversed_Index", | ||
| ) | ||
|
|
||
| neg_block = neg_block.set_index([neg_block.value_columns[-1]]) | ||
|
|
||
| if is_key_unisigned: | ||
| block = pos_block if key[0] >= 0 else neg_block | ||
| else: | ||
| block = pos_block.concat([neg_block], how="inner") | ||
|
|
||
| df = bigframes.dataframe.DataFrame(block) | ||
|
|
||
| result = df.loc[key] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4409,7 +4409,7 @@ def test_loc_list_multiindex(scalars_dfs_maybe_ordered): | |
|
|
||
|
|
||
| def test_iloc_list(scalars_df_index, scalars_pandas_df_index): | ||
| index_list = [0, 0, 0, 5, 4, 7] | ||
| index_list = [0, 0, 0, 5, 4, 7, -2, -5, 3] | ||
|
||
|
|
||
| bf_result = scalars_df_index.iloc[index_list] | ||
| pd_result = scalars_pandas_df_index.iloc[index_list] | ||
|
|
@@ -4423,7 +4423,7 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index): | |
| def test_iloc_list_partial_ordering( | ||
| scalars_df_partial_ordering, scalars_pandas_df_index | ||
| ): | ||
| index_list = [0, 0, 0, 5, 4, 7] | ||
| index_list = [0, 0, 0, 5, 4, 7, -2, -5, 3] | ||
|
|
||
| bf_result = scalars_df_partial_ordering.iloc[index_list] | ||
| pd_result = scalars_pandas_df_index.iloc[index_list] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might need to check for bigframes.Index as well? Or maybe we should have a helper that helps identify an "remote" or "large" object we don't want to iterate over
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to also check index type.
For large object it's may not be necessary, in cloudtop, I tried 1 million keys(which have the same sign), and this process took 0.03s.