From f66fe6b2dfe34190fd7bdc31ecbdd37f1be08730 Mon Sep 17 00:00:00 2001 From: WilliamZhu Date: Thu, 23 Dec 2021 14:48:54 +0800 Subject: [PATCH] convert options in include statment to set statments (#1633) * convert options in include statment to set statments * add test and fix exception throws when no suffix in include statement * ignore result check in pr_1633.mlsql revise the code example add checking multi group params revise the remind msg when setting multi-group params in the Discretizer et rename the checking functions --- .../dsl/mmlib/algs/SQLDiscretizer.scala | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/streamingpro-mlsql/src/main/java/streaming/dsl/mmlib/algs/SQLDiscretizer.scala b/streamingpro-mlsql/src/main/java/streaming/dsl/mmlib/algs/SQLDiscretizer.scala index 54d9f9d3e..f8fb95f8d 100644 --- a/streamingpro-mlsql/src/main/java/streaming/dsl/mmlib/algs/SQLDiscretizer.scala +++ b/streamingpro-mlsql/src/main/java/streaming/dsl/mmlib/algs/SQLDiscretizer.scala @@ -87,8 +87,17 @@ class SQLDiscretizer(override val uid: String) extends SQLAlg with Functions wit return metas } + def checkWithoutGroupParams(params: Map[String, String]): Unit = { + val group = params.map(p => { + val key = p._1 + key.contains(DiscretizerParamsConstrant.PARAMS_PREFIX + ".") + }).filter(_ == true) + require(group.size == 0, "The multi-group params are not available for the Discretizer Et!") + } + def trainWithoutGroup(df: DataFrame, params: Map[String, String], _method: String, metaPath: String): DataFrame = { val dfWithId = df.withColumn("id", monotonically_increasing_id) + checkWithoutGroupParams(params) var transformedDF = dfWithId // we need save metadatas with index, because we need index val metas: Array[(Int, DiscretizerTrainData)] = { @@ -220,10 +229,14 @@ class SQLDiscretizer(override val uid: String) extends SQLAlg with Functions wit | |train data1 as Discretizer.`/tmp/model` |where method="bucketizer" - |and `fitParam.0.inputCol`="a" - |and `fitParam.0.splitArray`="-inf,0.0,1.0,inf" - |and `fitParam.1.inputCol`="b" - |and `fitParam.1.splitArray`="-inf,0.0,1.0,inf"; + |and `inputCol`="a" + |and `splits`="-inf,0.0,1.0,inf"; + | + |train data1 as Discretizer.`/tmp/discretizer` + |where method="quantile" + |and `inputCol`="a" + |and `outputCol`="a_v" + |and `numBuckets`="3"; |; """.stripMargin)