From 2a05414b5c8dcec1a2d3c2389ecbc1cdd498b6bb Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Wed, 4 May 2016 14:08:12 +0800 Subject: [PATCH 1/3] [SPARK-14772][ML,PySpark]Python ML Params.copy treats uid, paramMaps differently than Scala --- python/pyspark/ml/param/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 40d830062581..8a8126ee0a03 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -357,7 +357,7 @@ def getOrDefault(self, param): return self._defaultParamMap[param] @since("1.4.0") - def extractParamMap(self, extra=None): + def extractParamMap(self, extra=None, default=False): """ Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into @@ -371,7 +371,8 @@ def extractParamMap(self, extra=None): if extra is None: extra = dict() paramMap = self._defaultParamMap.copy() - paramMap.update(self._paramMap) + if not default: + paramMap.update(self._paramMap) paramMap.update(extra) return paramMap @@ -463,7 +464,7 @@ def _setDefault(self, **kwargs): self._defaultParamMap[p] = value return self - def _copyValues(self, to, extra=None): + def _copyValues(self, to, extra=None, default=False): """ Copies param values from this instance to another instance for params shared by them. @@ -474,10 +475,11 @@ def _copyValues(self, to, extra=None): """ if extra is None: extra = dict() - paramMap = self.extractParamMap(extra) + paramMap = self.extractParamMap(extra, default) for p in self.params: if p in paramMap and to.hasParam(p.name): to._set(**{p.name: paramMap[p]}) + to.set(**{'uid': self.uid}) return to def _resetUid(self, newUid): From f389ad2aa5cd0a8f6c5c88bfab967b0a9594f7fa Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Fri, 6 May 2016 13:30:56 +0800 Subject: [PATCH 2/3] copy default param map --- python/pyspark/ml/param/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 8a8126ee0a03..c106533b8f9a 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -366,14 +366,15 @@ def extractParamMap(self, extra=None, default=False): user-supplied values < extra. :param extra: extra param values + :param default: if just copy the default param map :return: merged param map """ - if extra is None: + if extra is None and not default: extra = dict() paramMap = self._defaultParamMap.copy() if not default: paramMap.update(self._paramMap) - paramMap.update(extra) + paramMap.update(extra) return paramMap @since("1.4.0") @@ -471,6 +472,7 @@ def _copyValues(self, to, extra=None, default=False): :param to: the target instance :param extra: extra params to be copied + :param default: if just copy the default param map :return: the target instance with param values copied """ if extra is None: @@ -479,7 +481,6 @@ def _copyValues(self, to, extra=None, default=False): for p in self.params: if p in paramMap and to.hasParam(p.name): to._set(**{p.name: paramMap[p]}) - to.set(**{'uid': self.uid}) return to def _resetUid(self, newUid): From dc352cd828342e591802372eecef5d8f98f0ce62 Mon Sep 17 00:00:00 2001 From: HuJiayin Date: Fri, 6 May 2016 13:38:24 +0800 Subject: [PATCH 3/3] copy default param map --- python/pyspark/ml/param/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index c106533b8f9a..dbcf2cf691aa 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -378,7 +378,7 @@ def extractParamMap(self, extra=None, default=False): return paramMap @since("1.4.0") - def copy(self, extra=None): + def copy(self, extra=None, default=False): """ Creates a copy of this instance with the same uid and some extra params. The default implementation creates a @@ -388,13 +388,14 @@ def copy(self, extra=None): is not sufficient. :param extra: Extra parameters to copy to the new instance + :param default: if just copy the default param map :return: Copy of this instance """ if extra is None: extra = dict() that = copy.copy(self) that._paramMap = {} - return self._copyValues(that, extra) + return self._copyValues(that, extra, default) def _shouldOwn(self, param): """