Skip to content

Commit a69ca83

Browse files
committed
rename param
1 parent ecf3dfe commit a69ca83

File tree

10 files changed

+51
-52
lines changed

10 files changed

+51
-52
lines changed

mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel
4242
/** Params for linear SVM Classifier. */
4343
private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
4444
with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
45-
with HasAggregationDepth with HasThreshold with HasBlockSizeInMB {
45+
with HasAggregationDepth with HasThreshold with HasMaxBlockSizeInMB {
4646

4747
/**
4848
* Param for threshold in binary classification prediction.
@@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
5757
"threshold in binary classification prediction applied to rawPrediction")
5858

5959
setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6,
60-
standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSizeInMB -> 0.0)
60+
standardization -> true, threshold -> 0.0, aggregationDepth -> 2, maxBlockSizeInMB -> 0.0)
6161
}
6262

6363
/**
@@ -153,13 +153,13 @@ class LinearSVC @Since("2.2.0") (
153153
def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
154154

155155
/**
156-
* Sets the value of param [[blockSizeInMB]].
156+
* Sets the value of param [[maxBlockSizeInMB]].
157157
* Default is 0.0.
158158
*
159159
* @group expertSetParam
160160
*/
161161
@Since("3.1.0")
162-
def setBlockSizeInMB(value: Double): this.type = set(blockSizeInMB, value)
162+
def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
163163

164164
@Since("2.2.0")
165165
override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
@@ -169,7 +169,7 @@ class LinearSVC @Since("2.2.0") (
169169
instr.logDataset(dataset)
170170
instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
171171
regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth,
172-
blockSizeInMB)
172+
maxBlockSizeInMB)
173173

174174
if (dataset.storageLevel != StorageLevel.NONE) {
175175
instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
@@ -191,7 +191,7 @@ class LinearSVC @Since("2.2.0") (
191191
instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
192192
instr.logSumOfWeights(summarizer.weightSum)
193193

194-
var actualBlockSizeInMB = $(blockSizeInMB)
194+
var actualBlockSizeInMB = $(maxBlockSizeInMB)
195195
if (actualBlockSizeInMB == 0) {
196196
actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
197197
require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")

mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,20 +161,19 @@ private[spark] object InstanceBlock {
161161
var blockMemUsage = 0L
162162

163163
while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) {
164-
val instance: Instance = instanceIterator.next()
164+
val instance = instanceIterator.next()
165165
if (numCols < 0L) numCols = instance.features.size
166166
require(numCols == instance.features.size)
167-
val nnz = instance.features.numNonzeros
168167

169168
buff += instance
170169
buffCnt += 1L
171-
buffNnz += nnz
170+
buffNnz += instance.features.numNonzeros
172171
buffUnitWeight &&= (instance.weight == 1)
173172
blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight)
174173
}
175174

176-
// the block mem usage may slightly exceed threshold, not a big issue.
177-
// and this ensure even if one row exceed block limit, each block has one row
175+
// the block memory usage may slightly exceed threshold, not a big issue.
176+
// and this ensure even if one row exceed block limit, each block has one row.
178177
InstanceBlock.fromInstances(buff.result())
179178
}
180179
}

mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@ private[shared] object SharedParamsCodeGen {
109109
"stacked within partitions. If block size is more than remaining data in a partition " +
110110
"then it is adjusted to the size of this data.",
111111
isValid = "ParamValidators.gt(0)", isExpertParam = true),
112-
ParamDesc[Double]("blockSizeInMB", "Maximum memory in MB for stacking input data " +
113-
"in blocks. Data is stacked within partitions. If more than remaining data size in a " +
114-
"partition then it is adjusted to the data size. If 0, try to infer an appropriate value " +
115-
"based on the statistics of dataset. Must be >= 0.",
112+
ParamDesc[Double]("maxBlockSizeInMB", "Maximum memory in MB for stacking input data " +
113+
"into blocks. Data is stacked within partitions. If more than remaining data size in a " +
114+
"partition then it is adjusted to the data size. If 0, try to infer an appropriate " +
115+
"value. Must be >= 0.",
116116
Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true)
117117
)
118118

mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -564,20 +564,20 @@ trait HasBlockSize extends Params {
564564
}
565565

566566
/**
567-
* Trait for shared param blockSizeInMB (default: 0.0). This trait may be changed or
567+
* Trait for shared param maxBlockSizeInMB (default: 0.0). This trait may be changed or
568568
* removed between minor versions.
569569
*/
570-
trait HasBlockSizeInMB extends Params {
570+
trait HasMaxBlockSizeInMB extends Params {
571571

572572
/**
573-
* Param for Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be &gt;= 0..
573+
* Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be &gt;= 0..
574574
* @group expertParam
575575
*/
576-
final val blockSizeInMB: DoubleParam = new DoubleParam(this, "blockSizeInMB", "Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", ParamValidators.gtEq(0.0))
576+
final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", ParamValidators.gtEq(0.0))
577577

578-
setDefault(blockSizeInMB, 0.0)
578+
setDefault(maxBlockSizeInMB, 0.0)
579579

580580
/** @group expertGetParam */
581-
final def getBlockSizeInMB: Double = $(blockSizeInMB)
581+
final def getMaxBlockSizeInMB: Double = $(maxBlockSizeInMB)
582582
}
583583
// scalastyle:on

mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
215215
.setMaxIter(5)
216216
val model = lsvc.fit(dataset)
217217
Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
218-
val model2 = lsvc.setBlockSizeInMB(s).fit(dataset)
218+
val model2 = lsvc.setMaxBlockSizeInMB(s).fit(dataset)
219219
assert(model.intercept ~== model2.intercept relTol 1e-9)
220220
assert(model.coefficients ~== model2.coefficients relTol 1e-9)
221221
}

python/pyspark/ml/classification.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from pyspark.ml import Estimator, Predictor, PredictionModel, Model
2727
from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \
2828
HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \
29-
HasAggregationDepth, HasThreshold, HasBlockSize, HasBlockSizeInMB, Param, Params, \
29+
HasAggregationDepth, HasThreshold, HasBlockSize, HasMaxBlockSizeInMB, Param, Params, \
3030
TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
3131
from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
3232
_TreeEnsembleModel, _RandomForestParams, _GBTParams, \
@@ -504,7 +504,7 @@ def recallByThreshold(self):
504504

505505
class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol,
506506
HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold,
507-
HasBlockSizeInMB):
507+
HasMaxBlockSizeInMB):
508508
"""
509509
Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`.
510510
@@ -521,7 +521,7 @@ def __init__(self, *args):
521521
super(_LinearSVCParams, self).__init__(*args)
522522
self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
523523
standardization=True, threshold=0.0, aggregationDepth=2,
524-
blockSizeInMB=0.0)
524+
maxBlockSizeInMB=0.0)
525525

526526

527527
@inherit_doc
@@ -565,7 +565,7 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
565565
LinearSVCModel...
566566
>>> model.getThreshold()
567567
0.5
568-
>>> model.getBlockSizeInMB()
568+
>>> model.getMaxBlockSizeInMB()
569569
0.0
570570
>>> model.coefficients
571571
DenseVector([0.0, -0.2792, -0.1833])
@@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
605605
def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
606606
maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
607607
fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
608-
aggregationDepth=2, blockSizeInMB=0.0):
608+
aggregationDepth=2, maxBlockSizeInMB=0.0):
609609
"""
610610
__init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
611611
maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
612612
fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
613-
aggregationDepth=2, blockSizeInMB=0.0):
613+
aggregationDepth=2, maxBlockSizeInMB=0.0):
614614
"""
615615
super(LinearSVC, self).__init__()
616616
self._java_obj = self._new_java_obj(
@@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
623623
def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
624624
maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
625625
fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
626-
aggregationDepth=2, blockSizeInMB=0.0):
626+
aggregationDepth=2, maxBlockSizeInMB=0.0):
627627
"""
628628
setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
629629
maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
630630
fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
631-
aggregationDepth=2, blockSizeInMB=0.0):
631+
aggregationDepth=2, maxBlockSizeInMB=0.0):
632632
Sets params for Linear SVM Classifier.
633633
"""
634634
kwargs = self._input_kwargs
@@ -694,11 +694,11 @@ def setAggregationDepth(self, value):
694694
return self._set(aggregationDepth=value)
695695

696696
@since("3.1.0")
697-
def setBlockSizeInMB(self, value):
697+
def setMaxBlockSizeInMB(self, value):
698698
"""
699-
Sets the value of :py:attr:`blockSizeInMB`.
699+
Sets the value of :py:attr:`maxBlockSizeInMB`.
700700
"""
701-
return self._set(blockSizeInMB=value)
701+
return self._set(maxBlockSizeInMB=value)
702702

703703

704704
class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable,

python/pyspark/ml/classification.pyi

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ from pyspark.ml.base import _PredictorParams
2626
from pyspark.ml.param.shared import (
2727
HasAggregationDepth,
2828
HasBlockSize,
29-
HasBlockSizeInMB,
29+
HasMaxBlockSizeInMB,
3030
HasElasticNetParam,
3131
HasFitIntercept,
3232
HasMaxIter,
@@ -173,7 +173,7 @@ class _LinearSVCParams(
173173
HasWeightCol,
174174
HasAggregationDepth,
175175
HasThreshold,
176-
HasBlockSizeInMB,
176+
HasMaxBlockSizeInMB,
177177
):
178178
threshold: Param[float]
179179
def __init__(self, *args: Any) -> None: ...
@@ -199,7 +199,7 @@ class LinearSVC(
199199
threshold: float = ...,
200200
weightCol: Optional[str] = ...,
201201
aggregationDepth: int = ...,
202-
blockSizeInMB: float = ...
202+
maxBlockSizeInMB: float = ...
203203
) -> None: ...
204204
def setParams(
205205
self,
@@ -216,7 +216,7 @@ class LinearSVC(
216216
threshold: float = ...,
217217
weightCol: Optional[str] = ...,
218218
aggregationDepth: int = ...,
219-
blockSizeInMB: float = ...
219+
maxBlockSizeInMB: float = ...
220220
) -> LinearSVC: ...
221221
def setMaxIter(self, value: int) -> LinearSVC: ...
222222
def setRegParam(self, value: float) -> LinearSVC: ...
@@ -226,7 +226,7 @@ class LinearSVC(
226226
def setThreshold(self, value: float) -> LinearSVC: ...
227227
def setWeightCol(self, value: str) -> LinearSVC: ...
228228
def setAggregationDepth(self, value: int) -> LinearSVC: ...
229-
def setBlockSizeInMB(self, value: float) -> LinearSVC: ...
229+
def setMaxBlockSizeInMB(self, value: float) -> LinearSVC: ...
230230

231231
class LinearSVCModel(
232232
_JavaClassificationModel[Vector],

python/pyspark/ml/param/_shared_params_code_gen.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,10 @@ def get$Name(self):
166166
("blockSize", "block size for stacking input data in matrices. Data is stacked within "
167167
"partitions. If block size is more than remaining data in a partition then it is "
168168
"adjusted to the size of this data.", None, "TypeConverters.toInt"),
169-
("blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is " +
169+
("maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is " +
170170
"stacked within partitions. If more than remaining data size in a partition then it " +
171-
"is adjusted to the data size. If 0, try to infer an appropriate value based on the " +
172-
"statistics of dataset. Must be >= 0.", "0.0", "TypeConverters.toFloat")]
171+
"is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.",
172+
"0.0", "TypeConverters.toFloat")]
173173

174174
code = []
175175
for name, doc, defaultValueStr, typeConverter in shared:

python/pyspark/ml/param/shared.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -599,19 +599,19 @@ def getBlockSize(self):
599599
return self.getOrDefault(self.blockSize)
600600

601601

602-
class HasBlockSizeInMB(Params):
602+
class HasMaxBlockSizeInMB(Params):
603603
"""
604-
Mixin for param blockSizeInMB: maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.
604+
Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.
605605
"""
606606

607-
blockSizeInMB = Param(Params._dummy(), "blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", typeConverter=TypeConverters.toFloat)
607+
maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", typeConverter=TypeConverters.toFloat)
608608

609609
def __init__(self):
610-
super(HasBlockSizeInMB, self).__init__()
611-
self._setDefault(blockSizeInMB=0.0)
610+
super(HasMaxBlockSizeInMB, self).__init__()
611+
self._setDefault(maxBlockSizeInMB=0.0)
612612

613-
def getBlockSizeInMB(self):
613+
def getMaxBlockSizeInMB(self):
614614
"""
615-
Gets the value of blockSizeInMB or its default value.
615+
Gets the value of maxBlockSizeInMB or its default value.
616616
"""
617-
return self.getOrDefault(self.blockSizeInMB)
617+
return self.getOrDefault(self.maxBlockSizeInMB)

python/pyspark/ml/param/shared.pyi

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ class HasBlockSize(Params):
186186
def __init__(self) -> None: ...
187187
def getBlockSize(self) -> int: ...
188188

189-
class HasBlockSizeInMB(Params):
190-
blockSizeInMB: Param[float]
189+
class HasMaxBlockSizeInMB(Params):
190+
maxBlockSizeInMB: Param[float]
191191
def __init__(self) -> None: ...
192-
def getBlockSizeInMB(self) -> float: ...
192+
def getMaxBlockSizeInMB(self) -> float: ...

0 commit comments

Comments
 (0)