From 8fd0ebb2f2ce756fd66c2ddbcd332d0af7463243 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 22 Jan 2026 15:21:22 +0800 Subject: [PATCH 1/2] [SPARK-55128][INFRA] Restore SQL tests by pin 'pandas==2.3.3' Restore Restore SQL tests by pin 'pandas<3' pandas 3 is just released, and fail sql tests https://github.com/apache/spark/actions/runs/21232213791/job/61092886134 currently pandas 3 doesn't affect python tests too much: 1, in `dev/requirements.txt`, the latest `mlflow==3.8.1` requires: `pandas<3` 2, `pandas==2.3.3` is pinned in most places no ci no Closes #53910 from zhengruifeng/restore_sql. Authored-by: Ruifeng Zheng Signed-off-by: Ruifeng Zheng --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8db7538c8318f..fa2e67acbec9b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -360,7 +360,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 'yarn') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' + python3.11 -m pip install 'numpy>=1.20.0' pyarrow 'pandas==2.3.3' scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' python3.11 -m pip list # Run the tests. - name: Run tests From de57980f16daec5bc2bce41665bc63245bed6d64 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 23 Jan 2026 16:07:36 +0800 Subject: [PATCH 2/2] [SPARK-55128][INFRA][FOLLOWUP] Restore SQL tests by pin 'pandas==2.3.3' for maven daily test Similar to https://github.com/apache/spark/pull/53910, this pr pins the pandas version to 2.3.3. To restore SQL tests for maven daily test. - https://github.com/apache/spark/actions/runs/21249870076/job/61148348328 ``` - udf/postgreSQL/udf-case.sql - Scalar Pandas UDF *** FAILED *** udf/postgreSQL/udf-case.sql - Scalar Pandas UDF Python: 3.11 Pandas: 3.0.0 PyArrow: 23.0.0 Expected Some("struct"), but got Some("struct<>") Schema did not match for query #30 SELECT '' AS `Two`, * FROM CASE_TBL a, CASE2_TBL b WHERE udf(COALESCE(f,b.i) = 2): -- !query SELECT '' AS `Two`, * FROM CASE_TBL a, CASE2_TBL b WHERE udf(COALESCE(f,b.i) = 2) -- !query schema struct<> -- !query output org.apache.spark.SparkRuntimeException { "errorClass" : "CAST_INVALID_INPUT", "sqlState" : "22018", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", "expression" : "'nan'", "sourceType" : "\"STRING\"", "targetType" : "\"BOOLEAN\"" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 62, "stopIndex" : 85, "fragment" : "udf(COALESCE(f,b.i) = 2)" } ] } (SQLQueryTestSuite.scala:681) ``` No monitor maven daily test after pr merged No Closes #53933 from LuciferYang/SPARK-55128-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: Ruifeng Zheng --- .github/workflows/maven_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 4ca99bf942bcb..4531da4f7a9f1 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -180,7 +180,7 @@ jobs: - name: Install Python packages (Python 3.11) if: contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' + python3.11 -m pip install 'numpy>=1.20.0' pyarrow 'pandas==2.3.3' scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' python3.11 -m pip list # Run the tests. - name: Run tests