Skip to content

Commit d9798c8

Browse files
committed
[SPARK-22313][PYTHON] Mark/print deprecation warnings as DeprecationWarning for deprecated APIs
## What changes were proposed in this pull request? This PR proposes to mark the existing warnings as `DeprecationWarning` and print out warnings for deprecated functions. This could be actually useful for Spark app developers. I use (old) PyCharm and this IDE can detect this specific `DeprecationWarning` in some cases: **Before** <img src="https://user-images.githubusercontent.com/6477701/31762664-df68d9f8-b4f6-11e7-8773-f0468f70a2cc.png" height="45" /> **After** <img src="https://user-images.githubusercontent.com/6477701/31762662-de4d6868-b4f6-11e7-98dc-3c8446a0c28a.png" height="70" /> For console usage, `DeprecationWarning` is usually disabled (see https://docs.python.org/2/library/warnings.html#warning-categories and https://docs.python.org/3/library/warnings.html#warning-categories): ``` >>> import warnings >>> filter(lambda f: f[2] == DeprecationWarning, warnings.filters) [('ignore', <_sre.SRE_Pattern object at 0x10ba58c00>, <type 'exceptions.DeprecationWarning'>, <_sre.SRE_Pattern object at 0x10bb04138>, 0), ('ignore', None, <type 'exceptions.DeprecationWarning'>, None, 0)] ``` so, it won't actually mess up the terminal much unless it is intended. If this is intendedly enabled, it'd should as below: ``` >>> import warnings >>> warnings.simplefilter('always', DeprecationWarning) >>> >>> from pyspark.sql import functions >>> functions.approxCountDistinct("a") .../spark/python/pyspark/sql/functions.py:232: DeprecationWarning: Deprecated in 2.1, use approx_count_distinct instead. "Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning) ... ``` These instances were found by: ``` cd python/pyspark grep -r "Deprecated" . grep -r "deprecated" . grep -r "deprecate" . ``` ## How was this patch tested? Manually tested. Author: hyukjinkwon <gurwls223@gmail.com> Closes #19535 from HyukjinKwon/deprecated-warning.
1 parent 884d4f9 commit d9798c8

File tree

8 files changed

+110
-21
lines changed

8 files changed

+110
-21
lines changed

python/pyspark/ml/util.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,9 @@ def context(self, sqlContext):
175175
176176
.. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
177177
"""
178-
warnings.warn("Deprecated in 2.1 and will be removed in 3.0, use session instead.")
178+
warnings.warn(
179+
"Deprecated in 2.1 and will be removed in 3.0, use session instead.",
180+
DeprecationWarning)
179181
self._jwrite.context(sqlContext._ssql_ctx)
180182
return self
181183

@@ -256,7 +258,9 @@ def context(self, sqlContext):
256258
257259
.. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
258260
"""
259-
warnings.warn("Deprecated in 2.1 and will be removed in 3.0, use session instead.")
261+
warnings.warn(
262+
"Deprecated in 2.1 and will be removed in 3.0, use session instead.",
263+
DeprecationWarning)
260264
self._jread.context(sqlContext._ssql_ctx)
261265
return self
262266

python/pyspark/mllib/classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
311311
"""
312312
warnings.warn(
313313
"Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
314-
"LogisticRegressionWithLBFGS.")
314+
"LogisticRegressionWithLBFGS.", DeprecationWarning)
315315

316316
def train(rdd, i):
317317
return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),

python/pyspark/mllib/evaluation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def precision(self, label=None):
234234
"""
235235
if label is None:
236236
# note:: Deprecated in 2.0.0. Use accuracy.
237-
warnings.warn("Deprecated in 2.0.0. Use accuracy.")
237+
warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning)
238238
return self.call("precision")
239239
else:
240240
return self.call("precision", float(label))
@@ -246,7 +246,7 @@ def recall(self, label=None):
246246
"""
247247
if label is None:
248248
# note:: Deprecated in 2.0.0. Use accuracy.
249-
warnings.warn("Deprecated in 2.0.0. Use accuracy.")
249+
warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning)
250250
return self.call("recall")
251251
else:
252252
return self.call("recall", float(label))
@@ -259,7 +259,7 @@ def fMeasure(self, label=None, beta=None):
259259
if beta is None:
260260
if label is None:
261261
# note:: Deprecated in 2.0.0. Use accuracy.
262-
warnings.warn("Deprecated in 2.0.0. Use accuracy.")
262+
warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning)
263263
return self.call("fMeasure")
264264
else:
265265
return self.call("fMeasure", label)

python/pyspark/mllib/regression.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,8 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
278278
A condition which decides iteration termination.
279279
(default: 0.001)
280280
"""
281-
warnings.warn("Deprecated in 2.0.0. Use ml.regression.LinearRegression.")
281+
warnings.warn(
282+
"Deprecated in 2.0.0. Use ml.regression.LinearRegression.", DeprecationWarning)
282283

283284
def train(rdd, i):
284285
return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
@@ -421,7 +422,8 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
421422
"""
422423
warnings.warn(
423424
"Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
424-
"Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.")
425+
"Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.",
426+
DeprecationWarning)
425427

426428
def train(rdd, i):
427429
return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
@@ -566,7 +568,7 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
566568
warnings.warn(
567569
"Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
568570
"Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for "
569-
"LinearRegression.")
571+
"LinearRegression.", DeprecationWarning)
570572

571573
def train(rdd, i):
572574
return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),

python/pyspark/sql/dataframe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ def registerTempTable(self, name):
130130
131131
.. note:: Deprecated in 2.0, use createOrReplaceTempView instead.
132132
"""
133+
warnings.warn(
134+
"Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning)
133135
self._jdf.createOrReplaceTempView(name)
134136

135137
@since(2.0)
@@ -1308,6 +1310,7 @@ def unionAll(self, other):
13081310
13091311
.. note:: Deprecated in 2.0, use :func:`union` instead.
13101312
"""
1313+
warnings.warn("Deprecated in 2.0, use union instead.", DeprecationWarning)
13111314
return self.union(other)
13121315

13131316
@since(2.3)

python/pyspark/sql/functions.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import math
2222
import sys
2323
import functools
24+
import warnings
2425

2526
if sys.version < "3":
2627
from itertools import imap as map
@@ -44,6 +45,14 @@ def _(col):
4445
return _
4546

4647

48+
def _wrap_deprecated_function(func, message):
49+
""" Wrap the deprecated function to print out deprecation warnings"""
50+
def _(col):
51+
warnings.warn(message, DeprecationWarning)
52+
return func(col)
53+
return functools.wraps(func)(_)
54+
55+
4756
def _create_binary_mathfunction(name, doc=""):
4857
""" Create a binary mathfunction by name"""
4958
def _(col1, col2):
@@ -207,6 +216,12 @@ def _():
207216
"""returns the relative rank (i.e. percentile) of rows within a window partition.""",
208217
}
209218

219+
# Wraps deprecated functions (keys) with the messages (values).
220+
_functions_deprecated = {
221+
'toDegrees': 'Deprecated in 2.1, use degrees instead.',
222+
'toRadians': 'Deprecated in 2.1, use radians instead.',
223+
}
224+
210225
for _name, _doc in _functions.items():
211226
globals()[_name] = since(1.3)(_create_function(_name, _doc))
212227
for _name, _doc in _functions_1_4.items():
@@ -219,6 +234,8 @@ def _():
219234
globals()[_name] = since(1.6)(_create_function(_name, _doc))
220235
for _name, _doc in _functions_2_1.items():
221236
globals()[_name] = since(2.1)(_create_function(_name, _doc))
237+
for _name, _message in _functions_deprecated.items():
238+
globals()[_name] = _wrap_deprecated_function(globals()[_name], _message)
222239
del _name, _doc
223240

224241

@@ -227,6 +244,7 @@ def approxCountDistinct(col, rsd=None):
227244
"""
228245
.. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead.
229246
"""
247+
warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning)
230248
return approx_count_distinct(col, rsd)
231249

232250

python/pyspark/streaming/flume.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,13 @@ def createStream(ssc, hostname, port,
5454
:param bodyDecoder: A function used to decode body (default is utf8_decoder)
5555
:return: A DStream object
5656
57-
.. note:: Deprecated in 2.3.0
57+
.. note:: Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0.
58+
See SPARK-22142.
5859
"""
60+
warnings.warn(
61+
"Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0. "
62+
"See SPARK-22142.",
63+
DeprecationWarning)
5964
jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
6065
helper = FlumeUtils._get_helper(ssc._sc)
6166
jstream = helper.createStream(ssc._jssc, hostname, port, jlevel, enableDecompression)
@@ -82,8 +87,13 @@ def createPollingStream(ssc, addresses,
8287
:param bodyDecoder: A function used to decode body (default is utf8_decoder)
8388
:return: A DStream object
8489
85-
.. note:: Deprecated in 2.3.0
90+
.. note:: Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0.
91+
See SPARK-22142.
8692
"""
93+
warnings.warn(
94+
"Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0. "
95+
"See SPARK-22142.",
96+
DeprecationWarning)
8797
jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
8898
hosts = []
8999
ports = []

python/pyspark/streaming/kafka.py

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# limitations under the License.
1616
#
1717

18+
import warnings
19+
1820
from py4j.protocol import Py4JJavaError
1921

2022
from pyspark.rdd import RDD
@@ -56,8 +58,13 @@ def createStream(ssc, zkQuorum, groupId, topics, kafkaParams=None,
5658
:param valueDecoder: A function used to decode value (default is utf8_decoder)
5759
:return: A DStream object
5860
59-
.. note:: Deprecated in 2.3.0
61+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
62+
See SPARK-21893.
6063
"""
64+
warnings.warn(
65+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
66+
"See SPARK-21893.",
67+
DeprecationWarning)
6168
if kafkaParams is None:
6269
kafkaParams = dict()
6370
kafkaParams.update({
@@ -105,8 +112,13 @@ def createDirectStream(ssc, topics, kafkaParams, fromOffsets=None,
105112
:return: A DStream object
106113
107114
.. note:: Experimental
108-
.. note:: Deprecated in 2.3.0
115+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
116+
See SPARK-21893.
109117
"""
118+
warnings.warn(
119+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
120+
"See SPARK-21893.",
121+
DeprecationWarning)
110122
if fromOffsets is None:
111123
fromOffsets = dict()
112124
if not isinstance(topics, list):
@@ -159,8 +171,13 @@ def createRDD(sc, kafkaParams, offsetRanges, leaders=None,
159171
:return: An RDD object
160172
161173
.. note:: Experimental
162-
.. note:: Deprecated in 2.3.0
174+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
175+
See SPARK-21893.
163176
"""
177+
warnings.warn(
178+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
179+
"See SPARK-21893.",
180+
DeprecationWarning)
164181
if leaders is None:
165182
leaders = dict()
166183
if not isinstance(kafkaParams, dict):
@@ -229,7 +246,8 @@ class OffsetRange(object):
229246
"""
230247
Represents a range of offsets from a single Kafka TopicAndPartition.
231248
232-
.. note:: Deprecated in 2.3.0
249+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
250+
See SPARK-21893.
233251
"""
234252

235253
def __init__(self, topic, partition, fromOffset, untilOffset):
@@ -240,6 +258,10 @@ def __init__(self, topic, partition, fromOffset, untilOffset):
240258
:param fromOffset: Inclusive starting offset.
241259
:param untilOffset: Exclusive ending offset.
242260
"""
261+
warnings.warn(
262+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
263+
"See SPARK-21893.",
264+
DeprecationWarning)
243265
self.topic = topic
244266
self.partition = partition
245267
self.fromOffset = fromOffset
@@ -270,7 +292,8 @@ class TopicAndPartition(object):
270292
"""
271293
Represents a specific topic and partition for Kafka.
272294
273-
.. note:: Deprecated in 2.3.0
295+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
296+
See SPARK-21893.
274297
"""
275298

276299
def __init__(self, topic, partition):
@@ -279,6 +302,10 @@ def __init__(self, topic, partition):
279302
:param topic: Kafka topic name.
280303
:param partition: Kafka partition id.
281304
"""
305+
warnings.warn(
306+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
307+
"See SPARK-21893.",
308+
DeprecationWarning)
282309
self._topic = topic
283310
self._partition = partition
284311

@@ -303,7 +330,8 @@ class Broker(object):
303330
"""
304331
Represent the host and port info for a Kafka broker.
305332
306-
.. note:: Deprecated in 2.3.0
333+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
334+
See SPARK-21893.
307335
"""
308336

309337
def __init__(self, host, port):
@@ -312,6 +340,10 @@ def __init__(self, host, port):
312340
:param host: Broker's hostname.
313341
:param port: Broker's port.
314342
"""
343+
warnings.warn(
344+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
345+
"See SPARK-21893.",
346+
DeprecationWarning)
315347
self._host = host
316348
self._port = port
317349

@@ -323,10 +355,15 @@ class KafkaRDD(RDD):
323355
"""
324356
A Python wrapper of KafkaRDD, to provide additional information on normal RDD.
325357
326-
.. note:: Deprecated in 2.3.0
358+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
359+
See SPARK-21893.
327360
"""
328361

329362
def __init__(self, jrdd, ctx, jrdd_deserializer):
363+
warnings.warn(
364+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
365+
"See SPARK-21893.",
366+
DeprecationWarning)
330367
RDD.__init__(self, jrdd, ctx, jrdd_deserializer)
331368

332369
def offsetRanges(self):
@@ -345,10 +382,15 @@ class KafkaDStream(DStream):
345382
"""
346383
A Python wrapper of KafkaDStream
347384
348-
.. note:: Deprecated in 2.3.0
385+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
386+
See SPARK-21893.
349387
"""
350388

351389
def __init__(self, jdstream, ssc, jrdd_deserializer):
390+
warnings.warn(
391+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
392+
"See SPARK-21893.",
393+
DeprecationWarning)
352394
DStream.__init__(self, jdstream, ssc, jrdd_deserializer)
353395

354396
def foreachRDD(self, func):
@@ -383,10 +425,15 @@ class KafkaTransformedDStream(TransformedDStream):
383425
"""
384426
Kafka specific wrapper of TransformedDStream to transform on Kafka RDD.
385427
386-
.. note:: Deprecated in 2.3.0
428+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
429+
See SPARK-21893.
387430
"""
388431

389432
def __init__(self, prev, func):
433+
warnings.warn(
434+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
435+
"See SPARK-21893.",
436+
DeprecationWarning)
390437
TransformedDStream.__init__(self, prev, func)
391438

392439
@property
@@ -405,7 +452,8 @@ class KafkaMessageAndMetadata(object):
405452
"""
406453
Kafka message and metadata information. Including topic, partition, offset and message
407454
408-
.. note:: Deprecated in 2.3.0
455+
.. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
456+
See SPARK-21893.
409457
"""
410458

411459
def __init__(self, topic, partition, offset, key, message):
@@ -419,6 +467,10 @@ def __init__(self, topic, partition, offset, key, message):
419467
:param message: actual message payload of this Kafka message, the return data is
420468
undecoded bytearray.
421469
"""
470+
warnings.warn(
471+
"Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
472+
"See SPARK-21893.",
473+
DeprecationWarning)
422474
self.topic = topic
423475
self.partition = partition
424476
self.offset = offset

0 commit comments

Comments
 (0)