dmlc · hcho3 · May 23, 2024 · May 16, 2024 · May 23, 2024 · jameslamb
diff --git a/.gitignore b/.gitignore
@@ -71,4 +71,5 @@ __pycache__
 /python/.idea/
 /tests/python/.idea/
 /.idea/
+/.hypothesis
 /lint.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -64,3 +64,8 @@ repos:
   hooks:
     - id: mypy
       additional_dependencies: [types-setuptools, numpy]
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.4.4
+  hooks:
+    - id: ruff
+      args: ["--config", "python/pyproject.toml"]
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -43,3 +43,15 @@ testing = ["scikit-learn", "pytest", "hypothesis", "pandas"]
 plugins = "numpy.typing.mypy_plugin"
 
 [tool.hatch.build.targets.wheel.hooks.custom]
+
+[tool.ruff]
+line-length = 120
+
+# this should be set to the oldest version of python treelite supports
+target-version = "py38"
+
+[tool.ruff.lint]
+select = [
+    # numpy 2.0 deprecations/removals
+    "NPY201",
+]
diff --git a/python/treelite/gtil/gtil.py b/python/treelite/gtil/gtil.py
@@ -163,8 +163,8 @@ def _predict_impl(
     is_dense = isinstance(data, np.ndarray)
 
     if is_dense:
-        data = np.array(
-            data, copy=False, dtype=typestr_to_numpy_type(model.input_type), order="C"
+        data = np.asarray(
+            data, dtype=typestr_to_numpy_type(model.input_type), order="C"
         )
         if data.shape[1] < model.num_feature:
             # Pad missing features with NAs
@@ -177,14 +177,13 @@ def _predict_impl(
             assert data.shape[1] == model.num_feature
     else:
         assert isinstance(data, csr_matrix)
-        elems = np.array(
+        elems = np.asarray(
             data.data,
-            copy=False,
             dtype=typestr_to_numpy_type(model.input_type),
             order="C",
         )
-        col_ind = np.array(data.indices, copy=False, dtype=np.uint64, order="C")
-        row_ptr = np.array(data.indptr, copy=False, dtype=np.uint64, order="C")
+        col_ind = np.asarray(data.indices, dtype=np.uint64, order="C")
+        row_ptr = np.asarray(data.indptr, dtype=np.uint64, order="C")
     output_shape_ptr = ctypes.POINTER(ctypes.c_uint64)()
     output_ndim = ctypes.c_uint64()
     _check_call(

diff --git a/python/treelite/sklearn/importer.py b/python/treelite/sklearn/importer.py
@@ -46,7 +46,7 @@ def add(self, array, *, expected_shape=None):
             assert (
                 array.shape == expected_shape
             ), f"Expected shape: {expected_shape}, Got shape {array.shape}"
-        v = np.array(array, copy=False, dtype=self.dtype, order="C")
+        v = np.asarray(array, dtype=self.dtype, order="C")
         self.collection.append(v)
 
     def as_c_array(self):

diff --git a/tests/python/test_lightgbm_integration.py b/tests/python/test_lightgbm_integration.py
@@ -237,7 +237,7 @@ def test_lightgbm_sparse_ranking_model(tmpdir):
 
     lgb_model_path = pathlib.Path(tmpdir) / "sparse_ranking_lightgbm.txt"
 
-    dtrain = lgb.Dataset(X, label=y, group=[X.shape[0]])
+    dtrain = lgb.Dataset(X, label=y, group=np.array([X.shape[0]], dtype=np.int32))
     lgb_model = lgb.train(params, dtrain, num_boost_round=1)
     lgb_out = lgb_model.predict(X).reshape((-1, 1, 1))
     lgb_model.save_model(lgb_model_path)