diff --git a/build.sbt b/build.sbt index ea6ee5d4a53..9d8b4b14893 100644 --- a/build.sbt +++ b/build.sbt @@ -24,7 +24,7 @@ libraryDependencies ++= Seq( "com.jcraft" % "jsch" % "0.1.54", "com.microsoft.cognitiveservices.speech" % "client-sdk" % "1.10.0", "org.apache.httpcomponents" % "httpclient" % "4.5.6", - "com.microsoft.ml.lightgbm" % "lightgbmlib" % "2.3.150", + "com.microsoft.ml.lightgbm" % "lightgbmlib" % "2.3.170", "com.github.vowpalwabbit" % "vw-jni" % "8.7.0.3", "com.linkedin.isolation-forest" %% "isolation-forest_2.4.3" % "0.3.2", "org.apache.spark" %% "spark-avro" % sparkVersion diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDataset.scala b/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDataset.scala index 07dabbfff2b..f4a4eeaa5e9 100644 --- a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDataset.scala +++ b/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDataset.scala @@ -10,9 +10,6 @@ import com.microsoft.ml.lightgbm._ * @param dataset The native representation of the dataset. */ class LightGBMDataset(val dataset: SWIGTYPE_p_void) extends AutoCloseable { - var featureNames: Option[SWIGTYPE_p_p_char] = None - var featureNamesOpt2: Option[Array[String]] = None - def validateDataset(): Unit = { // Validate num rows val numDataPtr = lightgbmlib.new_intp() diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala b/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala index 6948fbfe041..08e87d6e3c0 100644 --- a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala +++ b/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala @@ -33,6 +33,13 @@ object LightGBMUtils { } } + def validateArray(result: SWIGTYPE_p_void, component: String): Unit = { + if (result == null) { + throw new Exception(component + " call failed in LightGBM with error: " + + lightgbmlib.LGBM_GetLastError()) + } + } + /** Loads the native shared object binaries lib_lightgbm.so and lib_lightgbm_swig.so */ def initializeNativeLibrary(): Unit = { diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala b/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala index 6755435f9a8..40a9458bd79 100644 --- a/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala +++ b/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala @@ -180,13 +180,12 @@ private object TrainUtils extends Serializable { def getEvalNames(boosterPtr: Option[SWIGTYPE_p_void]): Array[String] = { // Need to keep track of best scores for each metric, see callback.py in lightgbm for reference - val evalCountsPtr = lightgbmlib.new_intp() - val resultCounts = lightgbmlib.LGBM_BoosterGetEvalCounts(boosterPtr.get, evalCountsPtr) - LightGBMUtils.validate(resultCounts, "Booster Get Eval Counts") - val evalCounts = lightgbmlib.intp_value(evalCountsPtr) - // For debugging, can get metric names: - val evalNamesPtr = lightgbmlib.LGBM_BoosterGetEvalNamesSWIG(boosterPtr.get, evalCounts) - (0 until evalCounts).map(lightgbmlib.stringArray_getitem(evalNamesPtr, _)).toArray + // For debugging, can get metric names + val stringArrayHandle = lightgbmlib.LGBM_BoosterGetEvalNamesSWIG(boosterPtr.get) + LightGBMUtils.validateArray(stringArrayHandle, "Booster Get Eval Names") + val evalNames = lightgbmlib.StringArrayHandle_get_strings(stringArrayHandle) + lightgbmlib.StringArrayHandle_free(stringArrayHandle) + evalNames } def trainCore(trainParams: TrainParams, boosterPtr: Option[SWIGTYPE_p_void], diff --git a/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala b/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala index 22b630a3ff0..260a97a7e35 100644 --- a/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala +++ b/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala @@ -309,9 +309,9 @@ class VerifyLightGBMClassifier extends Benchmarks with EstimatorFuzzing[LightGBM // If the max delta step is specified, assert AUC differs (assert parameter works) // Note: the final max output of leaves is learning_rate * max_delta_step, so param should reduce the effect val Array(train, test) = taskDF.randomSplit(Array(0.8, 0.2), seed) - val baseModelWithLR = baseModel.setLearningRate(0.9).setNumIterations(100) + val baseModelWithLR = baseModel.setLearningRate(0.9).setNumIterations(200) val scoredDF1 = baseModelWithLR.fit(train).transform(test) - val scoredDF2 = baseModelWithLR.setMaxDeltaStep(0.1).fit(train).transform(test) + val scoredDF2 = baseModelWithLR.setMaxDeltaStep(0.5).fit(train).transform(test) assertBinaryImprovement(scoredDF1, scoredDF2) }