Kotlin · zaleslaw · Jun 3, 2021 · May 31, 2021 · May 31, 2021 · Jun 1, 2021
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/GraphTrainableModel.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/GraphTrainableModel.kt
@@ -172,19 +172,31 @@ public abstract class GraphTrainableModel(vararg layers: Layer) : TrainableModel
         )
 
         yPredOp = forward(xOp, inputLayer)
-        lossOp = loss.apply(tf, yPredOp, yTrueOp, numberOfLossesOp)
+        lossOp = buildLossFunction(loss)
         targets = optimizer.prepareTargets(kGraph, tf, lossOp)
 
         predictionOp = when (loss) {
             is SoftmaxCrossEntropyWithLogits -> tf.withName(OUTPUT_NAME).nn.softmax(yPredOp)
             else -> tf.withName(OUTPUT_NAME).identity(yPredOp)
         }
-
         metricOp = metric.apply(tf, predictionOp, yTrueOp, numberOfLossesOp)
 
         isModelCompiled = true
     }
 
+    private fun buildLossFunction(loss: LossFunction): Operand<Float> {
+        val basicLoss = loss.apply(tf, yPredOp, yTrueOp, numberOfLossesOp)
+        var totalLoss = basicLoss
+        // TODO: probably regularization output should be divided on numberOfLossesOp and changed together with loss before averaging
+        kGraph.variableRegularizers.forEach { (variable, regularizer) ->
+            run {
+                totalLoss = tf.math.add(totalLoss, regularizer.apply(tf, variable))
+            }
+        }
+        return tf.withName(TRAINING_LOSS).identity(totalLoss)
+    }
+
+
     override fun compile(optimizer: Optimizer, loss: Losses, metric: Metric, callback: Callback) {
         compile(optimizer, Losses.convert(loss), metric, callback)
     }

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/KGraph.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/KGraph.kt
@@ -5,6 +5,7 @@
 
 package org.jetbrains.kotlinx.dl.api.core
 
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.tensorflow.Graph
 import org.tensorflow.GraphOperation
 import org.tensorflow.Operand
@@ -35,6 +36,9 @@ public class KGraph(graphDef: ByteArray, prefix: String) : AutoCloseable {
     /** A list of variables to train. */
     private val layerVariables: MutableMap<Variable<Float>, Boolean> = mutableMapOf()
 
+    /** A list of variables to train. */
+    internal val variableRegularizers: MutableMap<Variable<Float>, Regularizer> = mutableMapOf()
+
     /** A list of optimizers' variables. */
     private val optimizerVariables: MutableList<Variable<Float>> = mutableListOf()
 
@@ -82,6 +86,17 @@ public class KGraph(graphDef: ByteArray, prefix: String) : AutoCloseable {
         layerVariables[variable] = isTrainable
     }
 
+    /**
+     * Adds a variable used in layer to the pool of tracked variables.
+     *
+     * @param variable Variable to track in KGraph.
+     * @param isTrainable When passed isTrainable = true, the KGraph automatically adds new variables to the tracked collection of graph variables.
+     */
+    public fun addVariableRegularizer(variable: Variable<Float>, regularizer: Regularizer) {
+        check(!variableRegularizers.contains(variable)) { "$variable is added to graph already. Analyze and fix the static graph building process." }
+        variableRegularizers[variable] = regularizer
+    }
+
     /**
      * Adds a variable used in optimizer to the pool of tracked variables.
      *
@@ -199,4 +214,4 @@ public class KGraph(graphDef: ByteArray, prefix: String) : AutoCloseable {
             runner.run()
         }
     }
-}
+}
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt
@@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer
 import org.jetbrains.kotlinx.dl.api.core.KGraph
 import org.jetbrains.kotlinx.dl.api.core.TrainableModel
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
 import org.jetbrains.kotlinx.dl.api.extension.convertTensorToMultiDimArray
 import org.tensorflow.Operand
@@ -119,14 +120,16 @@ public abstract class Layer(public var name: String) {
         kGraph: KGraph,
         name: String,
         variable: Variable<Float>,
-        initializer: Initializer
+        initializer: Initializer,
+        regularizer: Regularizer? = null
     ): Variable<Float> {
         // require(fanIn != Int.MIN_VALUE) { "fanIn should be calculated before initialization for variable $name" }
         // require(fanOut != Int.MIN_VALUE) { "fanOut should be calculated before initialization for variable $name" }
 
         val initOp = initializer.apply(fanIn, fanOut, tf, variable, name)
         kGraph.addLayerVariable(variable, isTrainable)
         kGraph.addInitializer(name, initOp)
+        if (regularizer != null) kGraph.addVariableRegularizer(variable, regularizer)
         return variable
     }
 

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt
@@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dl.api.core.activation.Activations
 import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
 import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.tensorflow.Operand
 import org.tensorflow.op.Ops
 import org.tensorflow.op.core.Squeeze
@@ -43,6 +44,9 @@ private const val EXTRA_DIM = 1L
  * @property [activation] Activation function.
  * @property [kernelInitializer] An initializer for the convolution kernel
  * @property [biasInitializer] An initializer for the bias vector.
+ * @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix.
+ * @property [biasRegularizer] Regularizer function applied to the `bias` vector.
+ * @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
  * @property [padding] The padding method, either 'valid' or 'same' or 'full'.
  * @property [name] Custom layer name.
  * @property [useBias] If true the layer uses a bias vector.
@@ -58,6 +62,9 @@ public class Conv1D(
     public val activation: Activations = Activations.Relu,
     public val kernelInitializer: Initializer = HeNormal(),
     public val biasInitializer: Initializer = HeUniform(),
+    public val kernelRegularizer: Regularizer? = null,
+    public val biasRegularizer: Regularizer? = null,
+    public val activityRegularizer: Regularizer? = null,
     public val padding: ConvPadding = ConvPadding.SAME,
     public val useBias: Boolean = true,
     name: String = "",
@@ -69,6 +76,9 @@ public class Conv1D(
     activationInternal = activation,
     kernelInitializerInternal = kernelInitializer,
     biasInitializerInternal = biasInitializer,
+    kernelRegularizerInternal = kernelRegularizer,
+    biasRegularizerInternal = biasRegularizer,
+    activityRegularizerInternal = activityRegularizer,
     paddingInternal = padding,
     useBiasInternal = useBias,
     kernelVariableName = KERNEL_VARIABLE_NAME,

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt
@@ -11,7 +11,11 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
 import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
 import org.jetbrains.kotlinx.dl.api.core.layer.Layer
-import org.jetbrains.kotlinx.dl.api.core.shape.*
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
+import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
+import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
+import org.jetbrains.kotlinx.dl.api.core.shape.numElements
+import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims
 import org.jetbrains.kotlinx.dl.api.core.util.conv2dBiasVarName
 import org.jetbrains.kotlinx.dl.api.core.util.conv2dKernelVarName
 import org.jetbrains.kotlinx.dl.api.core.util.getDType
@@ -21,7 +25,6 @@ import org.tensorflow.op.Ops
 import org.tensorflow.op.core.Variable
 import org.tensorflow.op.nn.Conv2d
 import org.tensorflow.op.nn.Conv2d.dilations
-import java.lang.IllegalArgumentException
 import kotlin.math.roundToInt
 
 private const val KERNEL_VARIABLE_NAME = "conv2d_kernel"
@@ -51,6 +54,9 @@ private const val BIAS_VARIABLE_NAME = "conv2d_bias"
  * @property [activation] Activation function.
  * @property [kernelInitializer] An initializer for the convolution kernel
  * @property [biasInitializer] An initializer for the bias vector.
+ * @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix.
+ * @property [biasRegularizer] Regularizer function applied to the `bias` vector.
+ * @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
  * @property [padding] The padding method, either 'valid' or 'same' or 'full'.
  * @property [name] Custom layer name.
  * @property [useBias] If true the layer uses a bias vector.
@@ -64,6 +70,9 @@ public class Conv2D(
     public val activation: Activations = Activations.Relu,
     public val kernelInitializer: Initializer = HeNormal(),
     public val biasInitializer: Initializer = HeUniform(),
+    public val kernelRegularizer: Regularizer? = null,
+    public val biasRegularizer: Regularizer? = null,
+    public val activityRegularizer: Regularizer? = null,
     public val padding: ConvPadding = ConvPadding.SAME,
     public val useBias: Boolean = true,
     name: String = ""
@@ -75,6 +84,9 @@ public class Conv2D(
     activationInternal = activation,
     kernelInitializerInternal = kernelInitializer,
     biasInitializerInternal = biasInitializer,
+    kernelRegularizerInternal = kernelRegularizer,
+    biasRegularizerInternal = biasRegularizer,
+    activityRegularizerInternal = activityRegularizer,
     paddingInternal = padding,
     useBiasInternal = useBias,
     kernelVariableName = KERNEL_VARIABLE_NAME,
@@ -102,6 +114,9 @@ public abstract class Conv2DImpl(
     private val activationInternal: Activations,
     private val kernelInitializerInternal: Initializer,
     private val biasInitializerInternal: Initializer,
+    private val kernelRegularizerInternal: Regularizer? = null,
+    private val biasRegularizerInternal: Regularizer? = null,
+    private val activityRegularizerInternal: Regularizer? = null,
     private val paddingInternal: ConvPadding,
     private val useBiasInternal: Boolean,
     private val kernelVariableName: String,
@@ -205,8 +220,9 @@ public abstract class Conv2DImpl(
         kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
         if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
 
-        kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal)
-        if (useBiasInternal) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal)
+        kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal, kernelRegularizerInternal)
+        if (useBiasInternal) bias =
+            addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal)
     }
 }
 

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt
@@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
 import org.jetbrains.kotlinx.dl.api.core.layer.Layer
 import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.jetbrains.kotlinx.dl.api.core.shape.*
 import org.jetbrains.kotlinx.dl.api.core.util.depthwiseConv2dBiasVarName
 import org.jetbrains.kotlinx.dl.api.core.util.depthwiseConv2dKernelVarName
@@ -44,6 +45,9 @@ private const val BIAS_VARIABLE_NAME = "depthwise_conv2d_bias"
  * The total number of depthwise convolution output channels will be equal to `numberOfChannels * depthMultiplier`.
  * @property [depthwiseInitializer] An initializer for the depthwise kernel matrix.
  * @property [biasInitializer] An initializer for the bias vector.
+ * @property [depthwiseRegularizer] Regularizer function applied to the depthwise kernel matrix.
+ * @property [biasRegularizer] Regularizer function applied to the `bias` vector.
+ * @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
  * @property [padding] The padding method, either 'valid' or 'same' or 'full'.
  * @property [useBias] If true the layer uses a bias vector.
  * @property [name] Custom layer name.
@@ -58,6 +62,9 @@ public class DepthwiseConv2D(
     public val depthMultiplier: Int = 1,
     public val depthwiseInitializer: Initializer = HeNormal(),
     public val biasInitializer: Initializer = HeUniform(),
+    public val depthwiseRegularizer: Regularizer? = null,
+    public val biasRegularizer: Regularizer? = null,
+    public val activityRegularizer: Regularizer? = null,
     public val padding: ConvPadding = ConvPadding.SAME,
     public val useBias: Boolean = true,
     name: String = ""
@@ -111,8 +118,15 @@ public class DepthwiseConv2D(
         depthwiseKernel = tf.withName(kernelVariableName).variable(depthwiseKernelShape, getDType())
         if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
 
-        depthwiseKernel = addWeight(tf, kGraph, kernelVariableName, depthwiseKernel, depthwiseInitializer)
-        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer)
+        depthwiseKernel = addWeight(
+            tf,
+            kGraph,
+            kernelVariableName,
+            depthwiseKernel,
+            depthwiseInitializer,
+            depthwiseRegularizer
+        )
+        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer)
     }
 
     override fun computeOutputShape(inputShape: Shape): Shape {

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt
@@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
 import org.jetbrains.kotlinx.dl.api.core.layer.Layer
 import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.jetbrains.kotlinx.dl.api.core.shape.*
 import org.jetbrains.kotlinx.dl.api.core.util.getDType
 import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dBiasVarName
@@ -51,6 +52,10 @@ private const val BIAS_VARIABLE_NAME = "separable_conv2d_bias"
  * @property [depthwiseInitializer] An initializer for the depthwise kernel matrix.
  * @property [pointwiseInitializer] An initializer for the pointwise kernel matrix.
  * @property [biasInitializer] An initializer for the bias vector.
+ * @property [depthwiseRegularizer] Regularizer function applied to the `depthwise` kernel matrix.
+ * @property [pointwiseRegularizer] Regularizer function applied to the `pointwise` kernel matrix
+ * @property [biasRegularizer] Regularizer function applied to the `bias` vector.
+ * @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
  * @property [padding] The padding method, either 'valid' or 'same' or 'full'.
  * @property [useBias] If true the layer uses a bias vector.
  * @property [name] Custom layer name.
@@ -68,6 +73,10 @@ public class SeparableConv2D(
     public val depthwiseInitializer: Initializer = HeNormal(),
     public val pointwiseInitializer: Initializer = HeNormal(),
     public val biasInitializer: Initializer = HeUniform(),
+    public val depthwiseRegularizer: Regularizer? = null,
+    public val pointwiseRegularizer: Regularizer? = null,
+    public val biasRegularizer: Regularizer? = null,
+    public val activityRegularizer: Regularizer? = null,
     public val padding: ConvPadding = ConvPadding.SAME,
     public val useBias: Boolean = true,
     name: String = ""
@@ -137,9 +146,23 @@ public class SeparableConv2D(
         pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType())
         if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
 
-        depthwiseKernel = addWeight(tf, kGraph, depthwiseKernelVariableName, depthwiseKernel, depthwiseInitializer)
-        pointwiseKernel = addWeight(tf, kGraph, pointwiseKernelVariableName, pointwiseKernel, pointwiseInitializer)
-        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer)
+        depthwiseKernel = addWeight(
+            tf,
+            kGraph,
+            depthwiseKernelVariableName,
+            depthwiseKernel,
+            depthwiseInitializer,
+            depthwiseRegularizer
+        )
+        pointwiseKernel = addWeight(
+            tf,
+            kGraph,
+            pointwiseKernelVariableName,
+            pointwiseKernel,
+            pointwiseInitializer,
+            pointwiseRegularizer
+        )
+        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer)
     }
 
     override fun computeOutputShape(inputShape: Shape): Shape {

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/core/Dense.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/core/Dense.kt
@@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
 import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
 import org.jetbrains.kotlinx.dl.api.core.layer.Layer
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
 import org.jetbrains.kotlinx.dl.api.core.shape.numElements
 import org.jetbrains.kotlinx.dl.api.core.util.denseBiasVarName
@@ -37,8 +38,11 @@ private const val BIAS_VARIABLE_NAME = "dense_bias"
  *
  * @property [outputSize] Dimensionality of the output space.
  * @property [activation] Activation function.
- * @property [kernelInitializer] Initializer function for the weight matrix.
+ * @property [kernelInitializer] Initializer function for the 'kernel' weights matrix.
  * @property [biasInitializer] Initializer function for the bias.
+ * @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix.
+ * @property [biasRegularizer] Regularizer function applied to the `bias` vector.
+ * @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
  * @property [useBias] If true the layer uses a bias vector.
  * @property [name] Custom layer name.
  * @constructor Creates [Dense] object.
@@ -48,6 +52,9 @@ public class Dense(
     public val activation: Activations = Activations.Relu,
     public val kernelInitializer: Initializer = HeNormal(),
     public val biasInitializer: Initializer = HeUniform(),
+    public val kernelRegularizer: Regularizer? = null,
+    public val biasRegularizer: Regularizer? = null,
+    public val activityRegularizer: Regularizer? = null,
     public val useBias: Boolean = true,
     name: String = ""
 ) : Layer(name) {
@@ -67,7 +74,7 @@ public class Dense(
         fanOut = outputSize
 
         val (kernelVariableName, biasVariableName) = defineVariableNames()
-        createDepthwiseConv2DVariables(tf, kernelVariableName, biasVariableName, kGraph)
+        createDenseVariables(tf, kernelVariableName, biasVariableName, kGraph)
     }
 
     private fun defineVariableNames(): Pair<String, String> {
@@ -78,7 +85,7 @@ public class Dense(
         }
     }
 
-    private fun createDepthwiseConv2DVariables(
+    private fun createDenseVariables(
         tf: Ops,
         kernelVariableName: String,
         biasVariableName: String,
@@ -87,8 +94,8 @@ public class Dense(
         kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
         if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
 
-        kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializer)
-        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias, biasInitializer)
+        kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializer, kernelRegularizer)
+        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias, biasInitializer, biasRegularizer)
     }
 
     override fun computeOutputShape(inputShape: Shape): Shape {