[SPARK-54632][INFRA][FOLLOW-UP] Reenable ruff on our CI and lint-python

gaogaotiantian · HyukjinKwon · commit 4c58ecb872f9 · 2025-12-12T09:42:46.000+09:00
### What changes were proposed in this pull request? Re-enable #53412 and fix lint. ### Why are the changes needed? Same as #53412 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Local ruff test passes ### Was this patch authored or co-authored using generative AI tooling? No Closes #53441 from gaogaotiantian/reenable-ruff. Authored-by: Tian Gao <gaogaotiantian@hotmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/dev/lint-python b/dev/lint-python
@@ -79,6 +79,7 @@ if [[ -z "$COMPILE_TEST$BLACK_TEST$PYSPARK_CUSTOM_ERRORS_CHECK_TEST$FLAKE8_TEST$
   BLACK_TEST=true
   PYSPARK_CUSTOM_ERRORS_CHECK_TEST=true
   FLAKE8_TEST=true
+  RUFF_TEST=true
   MYPY_TEST=true
   MYPY_EXAMPLES_TEST=true
   MYPY_DATA_TEST=true
diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile
@@ -80,6 +80,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN python3.11 -m pip install \
     'black==23.12.1' \
     'flake8==3.9.0' \
+    'ruff==0.14.8' \
     'googleapis-common-protos-stubs==2.2.0' \
     'grpc-stubs==1.24.11' \
     'grpcio-status==1.76.0' \
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
@@ -1216,7 +1216,6 @@ def load_stream(self, stream):
         Each outer iterator element represents a group, containing an iterator of Series lists
         (one list per batch).
         """
-        import pyarrow as pa
 
         def process_group(batches: "Iterator[pa.RecordBatch]"):
             # Convert each Arrow batch to pandas Series list on-demand, yielding one list per batch
diff --git a/python/pyspark/sql/tests/arrow/test_arrow_udf_typehints.py b/python/pyspark/sql/tests/arrow/test_arrow_udf_typehints.py
@@ -461,7 +461,7 @@ def multiply_pandas(a: pd.Series, b: pd.Series) -> pd.Series:
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.arrow.test_arrow_udf_typehints import *  # noqa: #F401
+    from pyspark.sql.tests.arrow.test_arrow_udf_typehints import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
@@ -444,7 +444,7 @@ def multiply_arrow(a: pa.Array, b: pa.Array) -> pa.Array:
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.pandas.test_pandas_udf_typehints import *  # noqa: #F401
+    from pyspark.sql.tests.pandas.test_pandas_udf_typehints import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
@@ -367,7 +367,8 @@ def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd.
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations import *  # noqa: #F401
+    # E501: line too long
+    from pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations import *  # noqa: F401, E501
 
     try:
         import xmlrunner