diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index dbcfe52e77b6e..03c97b0d0b2a8 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -64,7 +64,6 @@ ) from pyspark.errors.exceptions.captured import install_exception_handler from pyspark.sql.utils import ( - is_timestamp_ntz_preferred, to_str, try_remote_session_classmethod, remote_only, @@ -1050,10 +1049,25 @@ def _inferSchemaFromList( errorClass="CANNOT_INFER_EMPTY_SCHEMA", messageParameters={}, ) - infer_dict_as_struct = self._jconf.inferDictAsStruct() - infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement() - infer_map_from_first_pair = self._jconf.legacyInferMapStructTypeFromFirstItem() - prefer_timestamp_ntz = is_timestamp_ntz_preferred() + + ( + timestampType, + inferDictAsStruct, + legacyInferArrayTypeFromFirstElement, + legacyInferMapStructTypeFromFirstItem, + ) = self._jconf.getConfs( + [ + "spark.sql.timestampType", + "spark.sql.pyspark.inferNestedDictAsStruct.enabled", + "spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled", + "spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled", + ] + ) + prefer_timestamp_ntz = timestampType == "TIMESTAMP_NTZ" + infer_dict_as_struct = inferDictAsStruct == "true" + infer_array_from_first_element = legacyInferArrayTypeFromFirstElement == "true" + infer_map_from_first_pair = legacyInferMapStructTypeFromFirstItem == "true" + schema = reduce( _merge_type, ( @@ -1103,10 +1117,24 @@ def _inferSchema( messageParameters={}, ) - infer_dict_as_struct = self._jconf.inferDictAsStruct() - infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement() - infer_map_from_first_pair = self._jconf.legacyInferMapStructTypeFromFirstItem() - prefer_timestamp_ntz = is_timestamp_ntz_preferred() + ( + timestampType, + inferDictAsStruct, + legacyInferArrayTypeFromFirstElement, + legacyInferMapStructTypeFromFirstItem, + ) = self._jconf.getConfs( + [ + "spark.sql.timestampType", + "spark.sql.pyspark.inferNestedDictAsStruct.enabled", + "spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled", + "spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled", + ] + ) + prefer_timestamp_ntz = timestampType == "TIMESTAMP_NTZ" + infer_dict_as_struct = inferDictAsStruct == "true" + infer_array_from_first_element = legacyInferArrayTypeFromFirstElement == "true" + infer_map_from_first_pair = legacyInferMapStructTypeFromFirstItem == "true" + if samplingRatio is None: schema = _infer_schema( first,