Skip to content

Commit 8f91c69

Browse files
xuanyuankingHyukjinKwon
authored andcommitted
[SPARK-24665][PYSPARK] Use SQLConf in PySpark to manage all sql configs
## What changes were proposed in this pull request? Use SQLConf for PySpark to manage all sql configs, drop all the hard code in config usage. ## How was this patch tested? Existing UT. Author: Yuanjian Li <xyliyuanjian@gmail.com> Closes #21648 from xuanyuanking/SPARK-24665.
1 parent f825847 commit 8f91c69

File tree

3 files changed

+21
-32
lines changed

3 files changed

+21
-32
lines changed

python/pyspark/sql/context.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ def _ssql_ctx(self):
9393
"""
9494
return self._jsqlContext
9595

96+
@property
97+
def _conf(self):
98+
"""Accessor for the JVM SQL-specific configurations"""
99+
return self.sparkSession._jsparkSession.sessionState().conf()
100+
96101
@classmethod
97102
@since(1.6)
98103
def getOrCreate(cls, sc):

python/pyspark/sql/dataframe.py

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -354,32 +354,12 @@ def show(self, n=20, truncate=True, vertical=False):
354354
else:
355355
print(self._jdf.showString(n, int(truncate), vertical))
356356

357-
@property
358-
def _eager_eval(self):
359-
"""Returns true if the eager evaluation enabled.
360-
"""
361-
return self.sql_ctx.getConf(
362-
"spark.sql.repl.eagerEval.enabled", "false").lower() == "true"
363-
364-
@property
365-
def _max_num_rows(self):
366-
"""Returns the max row number for eager evaluation.
367-
"""
368-
return int(self.sql_ctx.getConf(
369-
"spark.sql.repl.eagerEval.maxNumRows", "20"))
370-
371-
@property
372-
def _truncate(self):
373-
"""Returns the truncate length for eager evaluation.
374-
"""
375-
return int(self.sql_ctx.getConf(
376-
"spark.sql.repl.eagerEval.truncate", "20"))
377-
378357
def __repr__(self):
379-
if not self._support_repr_html and self._eager_eval:
358+
if not self._support_repr_html and self.sql_ctx._conf.isReplEagerEvalEnabled():
380359
vertical = False
381360
return self._jdf.showString(
382-
self._max_num_rows, self._truncate, vertical)
361+
self.sql_ctx._conf.replEagerEvalMaxNumRows(),
362+
self.sql_ctx._conf.replEagerEvalTruncate(), vertical)
383363
else:
384364
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
385365

@@ -391,10 +371,10 @@ def _repr_html_(self):
391371
import cgi
392372
if not self._support_repr_html:
393373
self._support_repr_html = True
394-
if self._eager_eval:
395-
max_num_rows = max(self._max_num_rows, 0)
374+
if self.sql_ctx._conf.isReplEagerEvalEnabled():
375+
max_num_rows = max(self.sql_ctx._conf.replEagerEvalMaxNumRows(), 0)
396376
sock_info = self._jdf.getRowsToPython(
397-
max_num_rows, self._truncate)
377+
max_num_rows, self.sql_ctx._conf.replEagerEvalTruncate())
398378
rows = list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
399379
head = rows[0]
400380
row_data = rows[1:]
@@ -2049,13 +2029,12 @@ def toPandas(self):
20492029

20502030
import pandas as pd
20512031

2052-
if self.sql_ctx.getConf("spark.sql.execution.pandas.respectSessionTimeZone").lower() \
2053-
== "true":
2054-
timezone = self.sql_ctx.getConf("spark.sql.session.timeZone")
2032+
if self.sql_ctx._conf.pandasRespectSessionTimeZone():
2033+
timezone = self.sql_ctx._conf.sessionLocalTimeZone()
20552034
else:
20562035
timezone = None
20572036

2058-
if self.sql_ctx.getConf("spark.sql.execution.arrow.enabled", "false").lower() == "true":
2037+
if self.sql_ctx._conf.arrowEnabled():
20592038
use_arrow = True
20602039
try:
20612040
from pyspark.sql.types import to_arrow_schema
@@ -2065,8 +2044,7 @@ def toPandas(self):
20652044
to_arrow_schema(self.schema)
20662045
except Exception as e:
20672046

2068-
if self.sql_ctx.getConf("spark.sql.execution.arrow.fallback.enabled", "true") \
2069-
.lower() == "true":
2047+
if self.sql_ctx._conf.arrowFallbackEnabled():
20702048
msg = (
20712049
"toPandas attempted Arrow optimization because "
20722050
"'spark.sql.execution.arrow.enabled' is set to true; however, "

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,12 @@ class SQLConf extends Serializable with Logging {
17281728

17291729
def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL)
17301730

1731+
def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED)
1732+
1733+
def replEagerEvalMaxNumRows: Int = getConf(SQLConf.REPL_EAGER_EVAL_MAX_NUM_ROWS)
1734+
1735+
def replEagerEvalTruncate: Int = getConf(SQLConf.REPL_EAGER_EVAL_TRUNCATE)
1736+
17311737
/** ********************** SQLConf functionality methods ************ */
17321738

17331739
/** Set Spark SQL configuration properties. */

0 commit comments

Comments
 (0)