Skip to content

Commit bcc0005

Browse files
authored
Fix test failures (#3362)
1 parent 0a42ba8 commit bcc0005

File tree

21 files changed

+66
-55
lines changed

21 files changed

+66
-55
lines changed

.github/workflows/benchmark-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
git fetch upstream
5858
git merge upstream/master
5959
asv machine --yes
60-
asv continuous -e -f 1.1 --strict upstream/master HEAD
60+
asv continuous -e -f 1.1 upstream/master HEAD
6161
if: ${{ steps.build.outcome == 'success' }}
6262

6363
- name: Publish benchmarks artifact

.github/workflows/platform-ci.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ jobs:
8989
./ci/install-hadoop.sh
9090
echo "import coverage; coverage.process_startup()" > \
9191
$(python -c "import site; print(site.getsitepackages()[-1])")/coverage.pth
92-
conda install -n test --quiet --yes -c conda-forge python=$PYTHON skein libffi conda-pack
92+
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
93+
sudo apt install -y g++-11
94+
conda install -n test --quiet --yes -c conda-forge python=$PYTHON skein libffi conda-pack "grpcio<1.54"
9395
fi
9496
if [ -n "$WITH_VINEYARD" ]; then
9597
pip install vineyard -i https://pypi.org/simple
@@ -104,8 +106,7 @@ jobs:
104106
rm -fr /tmp/etcd-$ETCD_VER-linux-amd64.tar.gz /tmp/etcd-download-test
105107
fi
106108
if [ -n "$WITH_RAY" ] || [ -n "$WITH_RAY_DAG" ] || [ -n "$WITH_RAY_DEPLOY" ]; then
107-
pip install "ray>=1.8.0,<2.4.0"
108-
pip install "xgboost_ray<0.1.14" "protobuf<4"
109+
pip install "ray>=1.8.0,<2.4.0" "xgboost<2" "xgboost_ray<0.1.14" "protobuf<4"
109110
# Ray Datasets need pyarrow>=6.0.1
110111
pip install "pyarrow>=6.0.1"
111112
pip install lightgbm

mars/dataframe/contrib/raydataset/dataset.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
1415
import operator
1516
from functools import reduce
1617

@@ -55,8 +56,9 @@ def __getstate__():
5556
state.pop("dataframe", None)
5657
return state
5758

58-
# `dataframe` is not serializable by ray.
59-
dataset.__getstate__ = __getstate__
59+
if not hasattr(type(dataset), "__getstate__"):
60+
# if `dataframe` is not serializable by ray, patch our implementation
61+
dataset.__getstate__ = __getstate__
6062
return dataset
6163

6264

mars/dataframe/contrib/raydataset/mldataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def __getstate__():
131131
state.pop("dataframe", None)
132132
return state
133133

134-
# `dataframe` is not serializable by ray.
135-
dataset.__getstate__ = __getstate__
134+
if not hasattr(dataset, "__getstate__"):
135+
# `dataframe` is not serializable by ray.
136+
dataset.__getstate__ = __getstate__
136137
return dataset

mars/dataframe/contrib/raydataset/tests/test_mldataset.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@
2828

2929
ray = lazy_import("ray")
3030
ml_dataset = lazy_import("ray.util.data", rename="ml_dataset")
31-
32-
try:
33-
import xgboost_ray
34-
except ImportError: # pragma: no cover
35-
xgboost_ray = None
31+
xgboost_ray = lazy_import("xgboost_ray")
3632
try:
3733
import sklearn
3834
except ImportError: # pragma: no cover

mars/dataframe/datasource/read_raydataset.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,21 @@ def read_ray_dataset(ds, columns=None, incremental_index=False, **kwargs):
123123
from ray.data.impl.pandas_block import PandasBlockSchema
124124
except ImportError: # pragma: no cover
125125
PandasBlockSchema = type(None)
126+
try:
127+
from ray.data.dataset import Schema as RayDatasetSchema
128+
except ImportError:
129+
RayDatasetSchema = type(None)
126130

127131
if isinstance(schema, PandasBlockSchema):
128132
dtypes = pd.Series(schema.types, index=schema.names)
133+
elif isinstance(schema, RayDatasetSchema):
134+
dtypes = pd.Series(
135+
[
136+
t.to_pandas_dtype() if t is not object else np.dtype("O")
137+
for t in schema.types
138+
],
139+
index=schema.names,
140+
)
129141
elif isinstance(schema, pa.Schema):
130142
dtypes = schema.empty_table().to_pandas().dtypes
131143
else:

mars/dataframe/datasource/tests/test_datasource_execution.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,10 +1288,6 @@ def test_read_raydataset(ray_start_regular, ray_create_mars_cluster):
12881288
pdf2,
12891289
)
12901290

1291-
# Test simple datasets
1292-
with pytest.raises(NotImplementedError):
1293-
ray.data.range(10).to_mars()
1294-
12951291

12961292
@require_ray
12971293
@pytest.mark.skipif(

mars/dataframe/merge/tests/test_merge_execution.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,15 @@ def test_join_on(setup):
312312
expected4.set_index("a2", inplace=True)
313313
result4.set_index("a2", inplace=True)
314314
pd.testing.assert_frame_equal(
315-
sort_dataframe_inplace(expected4, 0), sort_dataframe_inplace(result4, 0)
315+
sort_dataframe_inplace(expected4, 0, kind="mergesort"),
316+
sort_dataframe_inplace(result4, 0, kind="mergesort"),
316317
)
317318

318319

319320
def test_merge_one_chunk(setup):
321+
def sort_by_col1(df):
322+
return df.sort_values(by=df.columns[1], kind="mergesort")
323+
320324
df1 = pd.DataFrame(
321325
{"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
322326
index=["a1", "a2", "a3", "a4"],
@@ -348,8 +352,8 @@ def test_merge_one_chunk(setup):
348352
result = jdf.execute().fetch()
349353

350354
pd.testing.assert_frame_equal(
351-
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
352-
result.sort_values(by=result.columns[1]).reset_index(drop=True),
355+
sort_by_col1(expected).reset_index(drop=True),
356+
sort_by_col1(result).reset_index(drop=True),
353357
)
354358

355359
# right have one chunk
@@ -361,8 +365,8 @@ def test_merge_one_chunk(setup):
361365
result = jdf.execute().fetch()
362366

363367
pd.testing.assert_frame_equal(
364-
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
365-
result.sort_values(by=result.columns[1]).reset_index(drop=True),
368+
sort_by_col1(expected).reset_index(drop=True),
369+
sort_by_col1(result).reset_index(drop=True),
366370
)
367371

368372
# left have one chunk and how="left", then one chunk tile
@@ -377,8 +381,8 @@ def test_merge_one_chunk(setup):
377381
result = jdf.execute().fetch()
378382

379383
pd.testing.assert_frame_equal(
380-
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
381-
result.sort_values(by=result.columns[1]).reset_index(drop=True),
384+
sort_by_col1(expected).reset_index(drop=True),
385+
sort_by_col1(result).reset_index(drop=True),
382386
)
383387

384388

@@ -418,7 +422,8 @@ def test_broadcast_merge(setup):
418422
expected.set_index("key", inplace=True)
419423
result.set_index("key", inplace=True)
420424
pd.testing.assert_frame_equal(
421-
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
425+
sort_dataframe_inplace(expected, 0, kind="mergesort"),
426+
sort_dataframe_inplace(result, 0, kind="mergesort"),
422427
)
423428

424429
# test broadcast right and how="left"
@@ -438,8 +443,8 @@ def test_broadcast_merge(setup):
438443
expected.set_index("key", inplace=True)
439444
result.set_index("key", inplace=True)
440445
pd.testing.assert_frame_equal(
441-
expected.sort_values(by=["key", "value_x"]),
442-
result.sort_values(by=["key", "value_x"]),
446+
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
447+
result.sort_values(by=["key", "value_x"], kind="mergesort"),
443448
)
444449

445450
# test broadcast left
@@ -459,7 +464,8 @@ def test_broadcast_merge(setup):
459464
expected.set_index("key", inplace=True)
460465
result.set_index("key", inplace=True)
461466
pd.testing.assert_frame_equal(
462-
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
467+
sort_dataframe_inplace(expected, 0, kind="mergesort"),
468+
sort_dataframe_inplace(result, 0, kind="mergesort"),
463469
)
464470

465471
# test broadcast left and how="right"
@@ -479,8 +485,8 @@ def test_broadcast_merge(setup):
479485
expected.set_index("key", inplace=True)
480486
result.set_index("key", inplace=True)
481487
pd.testing.assert_frame_equal(
482-
expected.sort_values(by=["key", "value_x"]),
483-
result.sort_values(by=["key", "value_x"]),
488+
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
489+
result.sort_values(by=["key", "value_x"], kind="mergesort"),
484490
)
485491

486492

mars/dataframe/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ def hash_dtypes(dtypes, size):
106106
return [dtypes[index] for index in hashed_indexes]
107107

108108

109-
def sort_dataframe_inplace(df, *axis):
109+
def sort_dataframe_inplace(df, *axis, **kw):
110110
for ax in axis:
111-
df.sort_index(axis=ax, inplace=True)
111+
df.sort_index(axis=ax, inplace=True, **kw)
112112
return df
113113

114114

mars/learn/contrib/lightgbm/_predict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __call__(self):
7878
elif hasattr(self.model, "classes_"):
7979
dtype = np.array(self.model.classes_).dtype
8080
else:
81-
dtype = getattr(self.model, "out_dtype_", np.dtype("float"))
81+
dtype = getattr(self.model, "out_dtype_", [np.dtype("float")])[0]
8282

8383
if self.output_types[0] == OutputType.tensor:
8484
# tensor

0 commit comments

Comments
 (0)