From fa5cd8c286cf31f5723df8979dae68cf80c05688 Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 24 Aug 2022 14:44:07 -0400 Subject: [PATCH] chore: improve test coverage --- .../synapse/ml/cognitive/ComputerVision.scala | 5 ++ .../MultivariateAnomalyDetection.scala | 14 +-- .../split1/ComputerVisionSuite.scala | 20 ++++- .../cognitive/split1/TextAnalyticsSuite.scala | 23 +++++ .../MultivariateAnamolyDetectionSuite.scala | 6 ++ .../synapse/ml/codegen/DefaultParamInfo.scala | 86 ++++++++++++------- .../azure/synapse/ml/codegen/Wrappable.scala | 11 +-- .../ml/featurize/text/PageSplitter.scala | 2 +- .../ml/recommendation/RankingAdapter.scala | 27 +++--- .../ml/stages/StratifiedRepartition.scala | 2 +- .../ml/core/test/fuzzing/Fuzzing.scala | 70 +++++++++++++-- .../recommendation/RankingAdapterSpec.scala | 1 + .../synapse/ml/recommendation/SARSpec.scala | 36 ++++---- .../ml/stages/TextPreprocessorSuite.scala | 5 ++ .../azure/synapse/ml/onnx/ONNXModel.scala | 4 +- .../synapse/ml/onnx/ONNXModelSuite.scala | 5 ++ pipeline.yaml | 6 +- .../ml/core/test/fuzzing/FuzzingTest.scala | 5 ++ .../ml/vw/VowpalWabbitInteractions.scala | 2 +- 19 files changed, 243 insertions(+), 87 deletions(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala index a70bdfb7d2..97f93c02b9 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala @@ -73,6 +73,11 @@ trait HasImageInput extends HasImageUrl override protected def inputFunc(schema: StructType): Row => Option[HttpRequestBase] = { val rowToUrl = prepareUrl val rowToEntity = prepareEntity; + if (get(imageUrl).orElse(get(imageBytes)).isEmpty){ + throw new IllegalArgumentException("Please set one of the" + + " imageUrl, imageUrlCol, imageBytes, imageBytesCol parameters.") + } + { row: Row => if (shouldSkip(row)) { None diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/MultivariateAnomalyDetection.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/MultivariateAnomalyDetection.scala index 49f95c8c3a..f7517d9a3f 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/MultivariateAnomalyDetection.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/MultivariateAnomalyDetection.scala @@ -390,7 +390,7 @@ class FitMultivariateAnomaly(override val uid: String) extends Estimator[DetectM } } - def getSlidingWindow: Option[Int] = get(slidingWindow) + def getSlidingWindow: Int = $(slidingWindow) val alignMode = new Param[String](this, "alignMode", "An optional field, indicates how " + "we align different variables into the same time-range which is required by the model.{Inner, Outer}") @@ -403,7 +403,7 @@ class FitMultivariateAnomaly(override val uid: String) extends Estimator[DetectM } } - def getAlignMode: Option[String] = get(alignMode) + def getAlignMode: String = $(alignMode) val fillNAMethod = new Param[String](this, "fillNAMethod", "An optional field, indicates how missed " + "values will be filled with. Can not be set to NotFill, when alignMode is Outer.{Previous, Subsequent," + @@ -417,7 +417,7 @@ class FitMultivariateAnomaly(override val uid: String) extends Estimator[DetectM } } - def getFillNAMethod: Option[String] = get(fillNAMethod) + def getFillNAMethod: String = $(fillNAMethod) val paddingValue = new IntParam(this, "paddingValue", "optional field, is only useful" + " if FillNAMethod is set to Fixed.") @@ -439,10 +439,10 @@ class FitMultivariateAnomaly(override val uid: String) extends Estimator[DetectM source, getStartTime, getEndTime, - getSlidingWindow, + get(slidingWindow), Option(AlignPolicy( - getAlignMode, - getFillNAMethod, + get(alignMode), + get(fillNAMethod), get(paddingValue))), get(displayName) ).toJson.compactPrint, ContentType.APPLICATION_JSON)) @@ -480,7 +480,7 @@ class FitMultivariateAnomaly(override val uid: String) extends Estimator[DetectM }) } - override def copy(extra: ParamMap): Estimator[DetectMultivariateAnomaly] = defaultCopy(extra) + override def copy(extra: ParamMap): FitMultivariateAnomaly = defaultCopy(extra) override def transformSchema(schema: StructType): StructType = { schema.add(getErrorCol, DMAError.schema) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala index f409566785..e86a5d43a6 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala @@ -7,10 +7,10 @@ import com.microsoft.azure.synapse.ml.Secrets import com.microsoft.azure.synapse.ml.cognitive._ import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._ import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} -import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{GetterSetterFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.NamespaceInjections.pipelineModel import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.functions.typedLit +import org.apache.spark.sql.functions.{col, typedLit} import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.scalactic.Equality @@ -87,7 +87,8 @@ class OCRSuite extends TransformerFuzzing[OCR] with CognitiveKey with Flaky with override def reader: MLReadable[_] = OCR } -class AnalyzeImageSuite extends TransformerFuzzing[AnalyzeImage] with CognitiveKey with Flaky { +class AnalyzeImageSuite extends TransformerFuzzing[AnalyzeImage] + with CognitiveKey with Flaky with GetterSetterFuzzing[AnalyzeImage] { import spark.implicits._ @@ -97,6 +98,12 @@ class AnalyzeImageSuite extends TransformerFuzzing[AnalyzeImage] with CognitiveK ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en") ).toDF("url", "language") + lazy val nullDf: DataFrame = Seq( + ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"), + ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", null), //scalastyle:ignore null + (null, "en") + ).toDF("url", "language") + def baseAI: AnalyzeImage = new AnalyzeImage() .setSubscriptionKey(cognitiveKey) .setLocation("eastus") @@ -118,6 +125,13 @@ class AnalyzeImageSuite extends TransformerFuzzing[AnalyzeImage] with CognitiveK def bytesAI: AnalyzeImage = baseAI .setImageBytesCol("imageBytes") + test("Null handling"){ + assertThrows[IllegalArgumentException]{ + baseAI.transform(nullDf) + } + assert(ai.transform(nullDf).where(col("features").isNull).count() == 1) + } + test("full parametrization") { val row = (Seq("Categories"), "en", Seq("Celebrities"), "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg") diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala index 759b1b86d6..728e1b9f90 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala @@ -7,9 +7,14 @@ import com.microsoft.azure.synapse.ml.Secrets import com.microsoft.azure.synapse.ml.cognitive._ import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import com.microsoft.azure.synapse.ml.stages.{FixedMiniBatchTransformer, FlattenBatch} +import org.apache.spark.SparkException import org.apache.spark.ml.util.MLReadable +import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Row} +import java.util.concurrent.TimeoutException + + trait TextEndpoint { lazy val textKey: String = sys.env.getOrElse("TEXT_API_KEY", Secrets.CognitiveApiKey) lazy val textApiLocation: String = sys.env.getOrElse("TEXT_API_LOCATION", "eastus") @@ -327,6 +332,24 @@ class TextAnalyzeSuite extends TransformerFuzzing[TextAnalyze] with TextEndpoint assert(results(24).get.sentimentAnalysis.get.document.get.sentiment == "positive") } + test("Exceeded Retries Info") { + val badModel = model + .setPollingDelay(0) + .setInitialPollingDelay(0) + .setMaxPollingRetries(1) + + val results = badModel + .setSuppressMaxRetriesException(true) + .transform(df.coalesce(1)) + assert(results.where(!col("error").isNull).count() > 0) + + assertThrows[SparkException] { + badModel.setSuppressMaxRetriesException(false) + .transform(df.coalesce(1)) + .collect() + } + } + override def testObjects(): Seq[TestObject[TextAnalyze]] = Seq(new TestObject[TextAnalyze](model, df)) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split4/MultivariateAnamolyDetectionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split4/MultivariateAnamolyDetectionSuite.scala index 7faf47299b..5a770931a3 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split4/MultivariateAnamolyDetectionSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split4/MultivariateAnamolyDetectionSuite.scala @@ -9,6 +9,7 @@ import com.microsoft.azure.synapse.ml.cognitive.split1.AnomalyKey import com.microsoft.azure.synapse.ml.core.test.base.TestBase import com.microsoft.azure.synapse.ml.core.test.benchmarks.DatasetUtils import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import org.apache.spark.ml.param.{Param, ParamPair} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame import spray.json.{DefaultJsonProtocol, _} @@ -231,6 +232,11 @@ class FitMultivariateAnomalySuite extends EstimatorFuzzing[FitMultivariateAnomal } + override def getterSetterParamExamples(p: FitMultivariateAnomaly): Map[Param[_],Any] = Map( + (p.alignMode, "Inner"), + (p.fillNAMethod, "Zero") + ) + override def testSerialization(): Unit = { println("ignore the Serialization Fuzzing test because fitting process takes more than 3 minutes") } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala index a0bd602d9a..14137cdcd6 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala @@ -5,49 +5,65 @@ package com.microsoft.azure.synapse.ml.codegen import com.microsoft.azure.synapse.ml.param._ import org.apache.spark.internal.Logging +import org.apache.spark.ml.PipelineStage import org.apache.spark.ml.param._ import scala.reflect.ClassTag case class ParamInfo[T <: Param[_]: ClassTag](pyType: String, - pyTypeConverter: Option[String], - rTypeConverter: Option[String], - dotnetType: String) { + pyTypeConverter: Option[String], + rTypeConverter: Option[String], + dotnetType: String, + example: Any) { - def this(pyType: String, typeConverterArg: String, rTypeConverterArg: String, dotnetType: String) = { - this(pyType, Some(typeConverterArg), Some(rTypeConverterArg), dotnetType) + def this(pyType: String, typeConverterArg: String, rTypeConverterArg: String, dotnetType: String, example: Any) = { + this(pyType, Some(typeConverterArg), Some(rTypeConverterArg), dotnetType, example) } - def this(pyType: String, dotnetType: String) = { - this(pyType, None, None, dotnetType) + def this(pyType: String, dotnetType: String, example: Any) = { + this(pyType, None, None, dotnetType, example) } } object DefaultParamInfo extends Logging { - val BooleanInfo = new ParamInfo[BooleanParam]("bool", "TypeConverters.toBoolean", "as.logical", "bool") - val IntInfo = new ParamInfo[IntParam]("int", "TypeConverters.toInt", "as.integer", "int") - val LongInfo = new ParamInfo[LongParam]("long", None, Some("as.integer"), "long") - val FloatInfo = new ParamInfo[FloatParam]("float", "TypeConverters.toFloat", "as.double", "float") - val DoubleInfo = new ParamInfo[DoubleParam]("float", "TypeConverters.toFloat", "as.double", "double") - val StringInfo = new ParamInfo[Param[String]]("str", Some("TypeConverters.toString"), None, "string") - val StringArrayInfo = new ParamInfo[StringArrayParam]("list", "TypeConverters.toListString", - "as.array", "string[]") - val DoubleArrayInfo = new ParamInfo[DoubleArrayParam]("list", "TypeConverters.toListFloat", - "as.array", "double[]") - val IntArrayInfo = new ParamInfo[IntArrayParam]("list", "TypeConverters.toListInt", - "as.array", "int[]") - val ByteArrayInfo = new ParamInfo[ByteArrayParam]("list", "byte[]") - val DoubleArrayArrayInfo = new ParamInfo[DoubleArrayArrayParam]("object", "double[][]") - val StringStringMapInfo = new ParamInfo[StringStringMapParam]("dict", "Dictionary") - val StringIntMapInfo = new ParamInfo[StringIntMapParam]("dict", "Dictionary") - val ArrayMapInfo = new ParamInfo[ArrayMapParam]("object", "Dictionary[]") - val TypedIntArrayInfo = new ParamInfo[TypedIntArrayParam]("object", "int[]") - val TypedDoubleArrayInfo = new ParamInfo[TypedDoubleArrayParam]("object", "double[]") - val UntypedArrayInfo = new ParamInfo[UntypedArrayParam]("object", "object[]") - - val UnknownInfo = new ParamInfo[Param[_]]("object", "object") + val BooleanInfo = new ParamInfo[BooleanParam]( + "bool", "TypeConverters.toBoolean", "as.logical", "bool", true) + val IntInfo = new ParamInfo[IntParam]( + "int", "TypeConverters.toInt", "as.integer", "int", 1) + val LongInfo = new ParamInfo[LongParam]( + "long", None, Some("as.integer"), "long", 1L) + val FloatInfo = new ParamInfo[FloatParam]( + "float", "TypeConverters.toFloat", "as.double", "float", 1.0) + val DoubleInfo = new ParamInfo[DoubleParam]( + "float", "TypeConverters.toFloat", "as.double", "double", 1.0) + val StringInfo = new ParamInfo[Param[String]]( + "str", Some("TypeConverters.toString"), None, "string", "foo") + val StringArrayInfo = new ParamInfo[StringArrayParam]( + "list", "TypeConverters.toListString", "as.array", "string[]", Array("foo", "bar")) + val DoubleArrayInfo = new ParamInfo[DoubleArrayParam]( + "list", "TypeConverters.toListFloat", "as.array", "double[]", Array(1.0, 2.0)) + val IntArrayInfo = new ParamInfo[IntArrayParam]( + "list", "TypeConverters.toListInt", "as.array", "int[]", Array(1, 2)) + val ByteArrayInfo = new ParamInfo[ByteArrayParam]( + "list", "byte[]", Array(1.toByte, 0.toByte)) + val DoubleArrayArrayInfo = new ParamInfo[DoubleArrayArrayParam]( + "object", "double[][]", Array(Array(1.0, 2.0))) + val StringStringMapInfo = new ParamInfo[StringStringMapParam]( + "dict", "Dictionary", Map("foo" -> "bar")) + val StringIntMapInfo = new ParamInfo[StringIntMapParam]( + "dict", "Dictionary", Map("foo" -> 1)) + val ArrayMapInfo = new ParamInfo[ArrayMapParam]( + "object", "Dictionary[]", Array(Map("foo" -> 1))) + val TypedIntArrayInfo = new ParamInfo[TypedIntArrayParam]( + "object", "int[]", Array(1, 2)) + val TypedDoubleArrayInfo = new ParamInfo[TypedDoubleArrayParam]( + "object", "double[]", Array(1.0, 2.0)) + val UntypedArrayInfo = new ParamInfo[UntypedArrayParam]( + "object", "object[]", Array(1.0, 2.0)) + val UnknownInfo = new ParamInfo[Param[_]]( + "object", "object", null) //noinspection ScalaStyle def getGeneralParamInfo(dataType: Param[_]): ParamInfo[_] = { @@ -75,4 +91,16 @@ object DefaultParamInfo extends Logging { } } + def defaultGetParamInfo(stage: Params, p: Param[_]): ParamInfo[_] = { + try { + stage.getClass.getMethod(p.name) + .getAnnotatedReturnType.getType.toString match { + case "org.apache.spark.ml.param.Param" => StringInfo + case _ => getGeneralParamInfo(p) + } + } catch { + case _: Exception => getGeneralParamInfo(p) + } + } + } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala index d055657782..1f7d6b78e0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala @@ -46,16 +46,7 @@ trait BaseWrappable extends Params { } def getParamInfo(p: Param[_]): ParamInfo[_] = { - try { - thisStage.getClass.getMethod(p.name) - .getAnnotatedReturnType.getType.toString match { - case "org.apache.spark.ml.param.Param" => StringInfo - case _ => getGeneralParamInfo(p) - } - } catch { - case _: Exception => getGeneralParamInfo(p) - } - + DefaultParamInfo.defaultGetParamInfo(thisStage, p) } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala index 43532b6bf1..c20313edf5 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala @@ -98,7 +98,7 @@ class PageSplitter(override val uid: String) ) } - override def copy(extra: ParamMap): MultiNGram = + override def copy(extra: ParamMap): PageSplitter = defaultCopy(extra) def transformSchema(schema: StructType): StructType = { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala index 9d9c103933..757a860456 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala @@ -74,16 +74,16 @@ class RankingAdapter(override val uid: String) def this() = this(Identifiable.randomUID("RecommenderAdapter")) - /** @group getParam */ - override def getUserCol: String = getRecommender.asInstanceOf[Estimator[_] with RecommendationParams].getUserCol - - /** @group getParam */ - override def getItemCol: String = getRecommender.asInstanceOf[Estimator[_] with RecommendationParams].getItemCol - - /** @group getParam */ - override def getRatingCol: String = getRecommender.asInstanceOf[Estimator[_] with RecommendationParams].getRatingCol + override def setRecommender(value: Estimator[_ <: Model[_]]): this.type = { + val v = value.asInstanceOf[Estimator[_ <: Model[_]] with RecommendationParams] + v.get(v.userCol).map(setUserCol) + v.get(v.ratingCol).map(setRatingCol) + v.get(v.itemCol).map(setItemCol) + super.setRecommender(v) + } def fit(dataset: Dataset[_]): RankingAdapterModel = { + transformSchema(dataset.schema) logFit({ new RankingAdapterModel() .setRecommenderModel(getRecommender.fit(dataset)) @@ -96,6 +96,13 @@ class RankingAdapter(override val uid: String) }) } + override def transformSchema(schema: StructType): StructType = { + // Trigger the updating of parameters + // in case python parameterizes the call + get(recommender).foreach(setRecommender) + super.transformSchema(schema) + } + override def copy(extra: ParamMap): RankingAdapter = { defaultCopy(extra) } @@ -138,8 +145,8 @@ class RankingAdapterModel private[ml](val uid: String) val recs = getMode match { case "allUsers" => this.getRecommenderModel match { - case als: ALSModel => als.asInstanceOf[ALSModel].recommendForAllUsers(getK) - case sar: SARModel => sar.asInstanceOf[SARModel].recommendForAllUsers(getK) + case als: ALSModel => als.recommendForAllUsers(getK) + case sar: SARModel => sar.recommendForAllUsers(getK) } case "normal" => SparkHelpers.flatten(getRecommenderModel.transform(dataset), getK, getItemCol, getUserCol) } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala index 5f9768ec5e..be768a62e0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala @@ -80,5 +80,5 @@ class StratifiedRepartition(val uid: String) extends Transformer with Wrappable def transformSchema(schema: StructType): StructType = schema - def copy(extra: ParamMap): DropColumns = defaultCopy(extra) + def copy(extra: ParamMap): StratifiedRepartition = defaultCopy(extra) } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala index f92c9ef7d5..8caeed9995 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala @@ -3,7 +3,7 @@ package com.microsoft.azure.synapse.ml.core.test.fuzzing -import com.microsoft.azure.synapse.ml.codegen.CodegenConfig +import com.microsoft.azure.synapse.ml.codegen.{CodegenConfig, DefaultParamInfo} import com.microsoft.azure.synapse.ml.codegen.GenerationUtils._ import com.microsoft.azure.synapse.ml.core.env.FileUtilities import com.microsoft.azure.synapse.ml.core.test.base.TestBase @@ -173,7 +173,8 @@ trait DotnetTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEqual saveDotnetTestData(conf) val testDataDirString = dotnetTestDataDir(conf).toString val generatedTests = dotnetTestObjects().zipWithIndex.map { case (to, i) => - makeDotnetTests(to, i, testDataDirString) } + makeDotnetTests(to, i, testDataDirString) + } val stage = dotnetTestObjects().head.stage val importPath = stage.getClass.getName.split(".".toCharArray).dropRight(1) val importPathString = importPath.mkString(".") @@ -518,8 +519,8 @@ trait SerializationFuzzing[S <: PipelineStage with MLWritable] extends TestBase val retrySerializationFuzzing = false test("Serialization Fuzzing") { - if (retrySerializationFuzzing){ - tryWithRetries() {() => + if (retrySerializationFuzzing) { + tryWithRetries() { () => testSerialization() } } else { @@ -529,8 +530,65 @@ trait SerializationFuzzing[S <: PipelineStage with MLWritable] extends TestBase } +trait GetterSetterFuzzing[S <: PipelineStage with Params] extends TestBase with DataFrameEquality { + def getterSetterTestObject(): TestObject[S] + + def getterSetterParamExamples(pipelineStage: S): Map[Param[_], Any] = Map() + + def getterSetterParamExample(pipelineStage: S, p: Param[_]): Option[Any] = { + pipelineStage + .get(p).orElse(pipelineStage.getDefault(p)) + .orElse(getterSetterParamExamples(pipelineStage).get(p)) + .orElse { + Option(DefaultParamInfo.defaultGetParamInfo(pipelineStage, p).example) + } + } + + def testGettersAndSetters(): Unit = { + val pipelineStage = getterSetterTestObject().stage.copy(new ParamMap()).asInstanceOf[S] + val methods = pipelineStage.getClass.getMethods + pipelineStage.params.foreach { p => + val getters = methods.filter(_.getName == s"get${p.name.capitalize}").toSeq + val setters = methods.filter(_.getName == s"set${p.name.capitalize}").toSeq + val defaultValue = getterSetterParamExample(pipelineStage, p) + p match { + case sp: ServiceParam[_] => + val colGetters = methods.filter(_.getName == s"get${sp.name.capitalize}Col").toSeq + val colSetters = methods.filter(_.getName == s"set${sp.name.capitalize}Col").toSeq + (colGetters, colSetters) match { + case (Seq(getter), Seq(setter)) => + setter.invoke(pipelineStage, "foo") + assert(getter.invoke(pipelineStage) === "foo") + case _ => + println(s"Could not test Service parameter column API for: ${sp.name}") + } + (getters, setters, defaultValue) match { + case (Seq(getter), Seq(setter), Some(Left(v))) => + setter.invoke(pipelineStage, v.asInstanceOf[Object]) + assert(getter.invoke(pipelineStage) === v) + case _ => + println(s"Could not test Service parameter value API ${p.name}") + } + case p: Param[_] => + (getters, setters, defaultValue) match { + case (Seq(getter), Seq(setter), Some(v)) => + setter.invoke(pipelineStage, v.asInstanceOf[Object]) + assert(getter.invoke(pipelineStage) === v) + case _ => + println(s"Could not test parameter ${p.name}") + } + } + } + } + + test("Getters and Setters work as anticipated") { + testGettersAndSetters() + } + +} + trait Fuzzing[S <: PipelineStage with MLWritable] extends SerializationFuzzing[S] - with ExperimentFuzzing[S] with PyTestFuzzing[S] with DotnetTestFuzzing[S] { + with ExperimentFuzzing[S] with PyTestFuzzing[S] with DotnetTestFuzzing[S] with GetterSetterFuzzing[S] { def testObjects(): Seq[TestObject[S]] @@ -542,6 +600,8 @@ trait Fuzzing[S <: PipelineStage with MLWritable] extends SerializationFuzzing[S def experimentTestObjects(): Seq[TestObject[S]] = testObjects() + def getterSetterTestObject(): TestObject[S] = testObjects().head + } trait TransformerFuzzing[S <: Transformer with MLWritable] extends Fuzzing[S] { diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala index f4bb36559e..2962ebde9e 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala @@ -14,6 +14,7 @@ class RankingAdapterSpec extends RankingTestBase with EstimatorFuzzing[RankingAd override def reader: MLReadable[_] = RankingAdapter override def modelReader: MLReadable[_] = RankingAdapterModel + } class RankingAdapterModelSpec extends RankingTestBase with TransformerFuzzing[RankingAdapterModel] { diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala index e7635b3822..8bd733b614 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala @@ -80,31 +80,31 @@ class SARSpec extends RankingTestBase with EstimatorFuzzing[SAR] { .csv(testFile).na.drop.cache test("tlc test sim count1")( - SarTLCSpec.test_affinity_matrices(tlcSampleData, 1, "cooc", simCount1, userAff)) + SarTLCSpec.testAffinityMatrices(tlcSampleData, 1, "cooc", simCount1, userAff)) test("tlc test sim lift1")( - SarTLCSpec.test_affinity_matrices(tlcSampleData, 1, "lift", simLift1, userAff)) + SarTLCSpec.testAffinityMatrices(tlcSampleData, 1, "lift", simLift1, userAff)) test("tlc test sim jac1")( - SarTLCSpec.test_affinity_matrices(tlcSampleData, 1, "jaccard", simJac1, userAff)) + SarTLCSpec.testAffinityMatrices(tlcSampleData, 1, "jaccard", simJac1, userAff)) test("tlc test sim count3")( - SarTLCSpec.test_affinity_matrices(tlcSampleData, 3, "cooc", simCount3, userAff)) + SarTLCSpec.testAffinityMatrices(tlcSampleData, 3, "cooc", simCount3, userAff)) test("tlc test sim lift3")( - SarTLCSpec.test_affinity_matrices(tlcSampleData, 3, "lift", simLift3, userAff)) + SarTLCSpec.testAffinityMatrices(tlcSampleData, 3, "lift", simLift3, userAff)) test("tlc test sim jac3")( - SarTLCSpec.test_affinity_matrices(tlcSampleData, 3, "jaccard", simJac3, userAff)) + SarTLCSpec.testAffinityMatrices(tlcSampleData, 3, "jaccard", simJac3, userAff)) test("tlc test userpred count3 userid only")( - SarTLCSpec.test_product_recommendations(tlcSampleData, 3, "cooc", simCount3, userAff, userpredCount3)) + SarTLCSpec.testProductRecommendations(tlcSampleData, 3, "cooc", simCount3, userAff, userpredCount3)) test("tlc test userpred lift3 userid only")( - SarTLCSpec.test_product_recommendations(tlcSampleData, 3, "lift", simLift3, userAff, userpredLift3)) + SarTLCSpec.testProductRecommendations(tlcSampleData, 3, "lift", simLift3, userAff, userpredLift3)) test("tlc test userpred jac3 userid only")( - SarTLCSpec.test_product_recommendations(tlcSampleData, 3, "jaccard", simJac3, userAff, userpredJac3)) + SarTLCSpec.testProductRecommendations(tlcSampleData, 3, "jaccard", simJac3, userAff, userpredJac3)) } @@ -132,8 +132,11 @@ object SarTLCSpec extends RankingTestBase { override lazy val itemColIndex = "itemID" //scalastyle:on field.name - def test_affinity_matrices(tlcSampleData: DataFrame, threshold: Int, similarityFunction: String, simFile: String, - user_aff: String): + def testAffinityMatrices(tlcSampleData: DataFrame, + threshold: Int, + similarityFunction: String, + simFile: String, + user_aff: String): (SARModel, RecommendationIndexerModel) = { val ratings = tlcSampleData @@ -169,11 +172,14 @@ object SarTLCSpec extends RankingTestBase { (model, recommendationIndexerModel) } - def test_product_recommendations(tlcSampleData: DataFrame, threshold: Int, similarityFunction: String, - simFile: String, user_aff: String, - userPredFile: String): Unit = { + def testProductRecommendations(tlcSampleData: DataFrame, + threshold: Int, + similarityFunction: String, + simFile: String, + user_aff: String, + userPredFile: String): Unit = { - val (model, recommendationIndexerModel) = test_affinity_matrices(tlcSampleData, threshold, similarityFunction, + val (model, recommendationIndexerModel) = testAffinityMatrices(tlcSampleData, threshold, similarityFunction, simFile, user_aff) diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala index 06cca3b2fa..4ea19e407f 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala @@ -5,6 +5,7 @@ package com.microsoft.azure.synapse.ml.stages import com.microsoft.azure.synapse.ml.core.test.base.TestBase import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.MLReadable class TextPreprocessorSuite extends TestBase with TransformerFuzzing[TextPreprocessor] { @@ -118,6 +119,10 @@ class TextPreprocessorSuite extends TestBase with TransformerFuzzing[TextPreproc assert(verifyResult(result, expectedResult)) } + override def getterSetterParamExamples(pipelineStage: TextPreprocessor): Map[Param[_], Any] = Map( + (pipelineStage.normFunc, "identity") + ) + def testObjects(): Seq[TestObject[TextPreprocessor]] = List(new TestObject( new TextPreprocessor().setInputCol("words").setOutputCol("out"), makeBasicDF())) diff --git a/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala index cff78d35fe..8ff15b6ff9 100644 --- a/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala @@ -84,7 +84,7 @@ trait ONNXModelParams extends Params with HasMiniBatcher with HasFeedFetchDicts "deviceType", "Specify a device type the model inference runs on. Supported types are: CPU or CUDA." + "If not specified, auto detection will be used.", - ParamValidators.inArray(Array("CPU", "CUDA")) + {x => Set("CPU", "CUDA")(x.toUpperCase())} ) def getDeviceType: String = $(deviceType) @@ -295,7 +295,7 @@ object ONNXModel extends ComplexParamsReadable[ONNXModel] with Logging { } private[onnx] def selectGpuDevice(deviceType: Option[String]): Option[Int] = { - deviceType match { + deviceType.map(_.toUpperCase) match { case None | Some("CUDA") => val gpuNum = TaskContext.get().resources().get("gpu").flatMap(_.addresses.map(_.toInt).headOption) gpuNum diff --git a/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala index a614e83434..d80f067a9f 100644 --- a/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala @@ -15,6 +15,7 @@ import org.apache.commons.io.FileUtils import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.image.ImageSchema import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors} +import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.expressions.UserDefinedFunction import org.apache.spark.sql.functions._ @@ -280,6 +281,10 @@ class ONNXModelSuite extends TestBase imageTransformer.transform(imageDf).cache() } + override def getterSetterParamExamples(pipelineStage: ONNXModel): Map[Param[_], Any] = Map( + (pipelineStage.deviceType, "cpu") + ) + test("ONNXModel can infer for resnet50 model") { val (probability, prediction) = onnxResNet50.transform(testDfResNet50) .select("probability", "prediction") diff --git a/pipeline.yaml b/pipeline.yaml index efe8ed4327..387b30fabd 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -327,7 +327,7 @@ jobs: scriptType: bash inlineScript: | source activate synapseml - sbt getDatasets installPipPackage + sbt coverage getDatasets installPipPackage sbt publishM2 sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y sudo apt-get update @@ -398,9 +398,9 @@ jobs: inlineScript: | echo "SPARK_HOME=$SPARK_HOME" echo "DOTNET_WORKER_DIR=$DOTNET_WORKER_DIR" - sbt publishDotnetTestBase + sbt coverage publishDotnetTestBase sbt publishLocal - sbt "project $(PACKAGE)" publishDotnet + sbt "project $(PACKAGE)" coverage publishDotnet export SBT_OPTS="-XX:+UseG1GC" echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) diff --git a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala index 987d19885b..0e96325f2e 100644 --- a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala +++ b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala @@ -175,6 +175,11 @@ class FuzzingTest extends TestBase { assertOrLog(classesWithoutFuzzers.isEmpty, classesWithoutFuzzers.mkString("\n")) } + ignore("Quick analysis of all getters and setters") { + JarLoadingUtils.instantiateServices[GetterSetterFuzzing[_ <: PipelineStage]]() + .foreach(_.testGettersAndSetters()) + } + // TODO verify that model UIDs match the class names, perhaps use a Trait test("Verify all pipeline stages don't have exotic characters") { diff --git a/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala index 1c1495e071..2d232536d6 100644 --- a/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala @@ -92,5 +92,5 @@ class VowpalWabbitInteractions(override val uid: String) extends Transformer schema.add(StructField(getOutputCol, VectorType, true)) } - override def copy(extra: ParamMap): VowpalWabbitFeaturizer = defaultCopy(extra) + override def copy(extra: ParamMap): VowpalWabbitInteractions = defaultCopy(extra) }