Skip to content

Added Regularizers support #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 3, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -172,19 +172,31 @@ public abstract class GraphTrainableModel(vararg layers: Layer) : TrainableModel
)

yPredOp = forward(xOp, inputLayer)
lossOp = loss.apply(tf, yPredOp, yTrueOp, numberOfLossesOp)
lossOp = buildLossFunction(loss)
targets = optimizer.prepareTargets(kGraph, tf, lossOp)

predictionOp = when (loss) {
is SoftmaxCrossEntropyWithLogits -> tf.withName(OUTPUT_NAME).nn.softmax(yPredOp)
else -> tf.withName(OUTPUT_NAME).identity(yPredOp)
}

metricOp = metric.apply(tf, predictionOp, yTrueOp, numberOfLossesOp)

isModelCompiled = true
}

private fun buildLossFunction(loss: LossFunction): Operand<Float> {
val basicLoss = loss.apply(tf, yPredOp, yTrueOp, numberOfLossesOp)
var totalLoss = basicLoss
// TODO: probably regularization output should be divided on numberOfLossesOp and changed together with loss before averaging
kGraph.variableRegularizers.forEach { (variable, regularizer) ->
run {
totalLoss = tf.math.add(totalLoss, regularizer.apply(tf, variable))
}
}
return tf.withName(TRAINING_LOSS).identity(totalLoss)
}


override fun compile(optimizer: Optimizer, loss: Losses, metric: Metric, callback: Callback) {
compile(optimizer, Losses.convert(loss), metric, callback)
}
Expand Down
17 changes: 16 additions & 1 deletion api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/KGraph.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.jetbrains.kotlinx.dl.api.core

import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.tensorflow.Graph
import org.tensorflow.GraphOperation
import org.tensorflow.Operand
Expand Down Expand Up @@ -35,6 +36,9 @@ public class KGraph(graphDef: ByteArray, prefix: String) : AutoCloseable {
/** A list of variables to train. */
private val layerVariables: MutableMap<Variable<Float>, Boolean> = mutableMapOf()

/** A list of variables to train. */
internal val variableRegularizers: MutableMap<Variable<Float>, Regularizer> = mutableMapOf()

/** A list of optimizers' variables. */
private val optimizerVariables: MutableList<Variable<Float>> = mutableListOf()

Expand Down Expand Up @@ -82,6 +86,17 @@ public class KGraph(graphDef: ByteArray, prefix: String) : AutoCloseable {
layerVariables[variable] = isTrainable
}

/**
* Adds a variable used in layer to the pool of tracked variables.
*
* @param variable Variable to track in KGraph.
* @param isTrainable When passed isTrainable = true, the KGraph automatically adds new variables to the tracked collection of graph variables.
*/
public fun addVariableRegularizer(variable: Variable<Float>, regularizer: Regularizer) {
check(!variableRegularizers.contains(variable)) { "$variable is added to graph already. Analyze and fix the static graph building process." }
variableRegularizers[variable] = regularizer
}

/**
* Adds a variable used in optimizer to the pool of tracked variables.
*
Expand Down Expand Up @@ -199,4 +214,4 @@ public class KGraph(graphDef: ByteArray, prefix: String) : AutoCloseable {
runner.run()
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer
import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.TrainableModel
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
import org.jetbrains.kotlinx.dl.api.extension.convertTensorToMultiDimArray
import org.tensorflow.Operand
Expand Down Expand Up @@ -119,14 +120,16 @@ public abstract class Layer(public var name: String) {
kGraph: KGraph,
name: String,
variable: Variable<Float>,
initializer: Initializer
initializer: Initializer,
regularizer: Regularizer? = null
): Variable<Float> {
// require(fanIn != Int.MIN_VALUE) { "fanIn should be calculated before initialization for variable $name" }
// require(fanOut != Int.MIN_VALUE) { "fanOut should be calculated before initialization for variable $name" }

val initOp = initializer.apply(fanIn, fanOut, tf, variable, name)
kGraph.addLayerVariable(variable, isTrainable)
kGraph.addInitializer(name, initOp)
if (regularizer != null) kGraph.addVariableRegularizer(variable, regularizer)
return variable
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dl.api.core.activation.Activations
import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.tensorflow.Operand
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Squeeze
Expand Down Expand Up @@ -43,6 +44,9 @@ private const val EXTRA_DIM = 1L
* @property [activation] Activation function.
* @property [kernelInitializer] An initializer for the convolution kernel
* @property [biasInitializer] An initializer for the bias vector.
* @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix.
* @property [biasRegularizer] Regularizer function applied to the `bias` vector.
* @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
* @property [padding] The padding method, either 'valid' or 'same' or 'full'.
* @property [name] Custom layer name.
* @property [useBias] If true the layer uses a bias vector.
Expand All @@ -58,6 +62,9 @@ public class Conv1D(
public val activation: Activations = Activations.Relu,
public val kernelInitializer: Initializer = HeNormal(),
public val biasInitializer: Initializer = HeUniform(),
public val kernelRegularizer: Regularizer? = null,
public val biasRegularizer: Regularizer? = null,
public val activityRegularizer: Regularizer? = null,
public val padding: ConvPadding = ConvPadding.SAME,
public val useBias: Boolean = true,
name: String = "",
Expand All @@ -69,6 +76,9 @@ public class Conv1D(
activationInternal = activation,
kernelInitializerInternal = kernelInitializer,
biasInitializerInternal = biasInitializer,
kernelRegularizerInternal = kernelRegularizer,
biasRegularizerInternal = biasRegularizer,
activityRegularizerInternal = activityRegularizer,
paddingInternal = padding,
useBiasInternal = useBias,
kernelVariableName = KERNEL_VARIABLE_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.shape.*
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
import org.jetbrains.kotlinx.dl.api.core.shape.numElements
import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims
import org.jetbrains.kotlinx.dl.api.core.util.conv2dBiasVarName
import org.jetbrains.kotlinx.dl.api.core.util.conv2dKernelVarName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
Expand All @@ -21,7 +25,6 @@ import org.tensorflow.op.Ops
import org.tensorflow.op.core.Variable
import org.tensorflow.op.nn.Conv2d
import org.tensorflow.op.nn.Conv2d.dilations
import java.lang.IllegalArgumentException
import kotlin.math.roundToInt

private const val KERNEL_VARIABLE_NAME = "conv2d_kernel"
Expand Down Expand Up @@ -51,6 +54,9 @@ private const val BIAS_VARIABLE_NAME = "conv2d_bias"
* @property [activation] Activation function.
* @property [kernelInitializer] An initializer for the convolution kernel
* @property [biasInitializer] An initializer for the bias vector.
* @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix.
* @property [biasRegularizer] Regularizer function applied to the `bias` vector.
* @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
* @property [padding] The padding method, either 'valid' or 'same' or 'full'.
* @property [name] Custom layer name.
* @property [useBias] If true the layer uses a bias vector.
Expand All @@ -64,6 +70,9 @@ public class Conv2D(
public val activation: Activations = Activations.Relu,
public val kernelInitializer: Initializer = HeNormal(),
public val biasInitializer: Initializer = HeUniform(),
public val kernelRegularizer: Regularizer? = null,
public val biasRegularizer: Regularizer? = null,
public val activityRegularizer: Regularizer? = null,
public val padding: ConvPadding = ConvPadding.SAME,
public val useBias: Boolean = true,
name: String = ""
Expand All @@ -75,6 +84,9 @@ public class Conv2D(
activationInternal = activation,
kernelInitializerInternal = kernelInitializer,
biasInitializerInternal = biasInitializer,
kernelRegularizerInternal = kernelRegularizer,
biasRegularizerInternal = biasRegularizer,
activityRegularizerInternal = activityRegularizer,
paddingInternal = padding,
useBiasInternal = useBias,
kernelVariableName = KERNEL_VARIABLE_NAME,
Expand Down Expand Up @@ -102,6 +114,9 @@ public abstract class Conv2DImpl(
private val activationInternal: Activations,
private val kernelInitializerInternal: Initializer,
private val biasInitializerInternal: Initializer,
private val kernelRegularizerInternal: Regularizer? = null,
private val biasRegularizerInternal: Regularizer? = null,
private val activityRegularizerInternal: Regularizer? = null,
private val paddingInternal: ConvPadding,
private val useBiasInternal: Boolean,
private val kernelVariableName: String,
Expand Down Expand Up @@ -205,8 +220,9 @@ public abstract class Conv2DImpl(
kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType())

kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal)
if (useBiasInternal) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal)
kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal, kernelRegularizerInternal)
if (useBiasInternal) bias =
addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.*
import org.jetbrains.kotlinx.dl.api.core.util.depthwiseConv2dBiasVarName
import org.jetbrains.kotlinx.dl.api.core.util.depthwiseConv2dKernelVarName
Expand Down Expand Up @@ -44,6 +45,9 @@ private const val BIAS_VARIABLE_NAME = "depthwise_conv2d_bias"
* The total number of depthwise convolution output channels will be equal to `numberOfChannels * depthMultiplier`.
* @property [depthwiseInitializer] An initializer for the depthwise kernel matrix.
* @property [biasInitializer] An initializer for the bias vector.
* @property [depthwiseRegularizer] Regularizer function applied to the depthwise kernel matrix.
* @property [biasRegularizer] Regularizer function applied to the `bias` vector.
* @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
* @property [padding] The padding method, either 'valid' or 'same' or 'full'.
* @property [useBias] If true the layer uses a bias vector.
* @property [name] Custom layer name.
Expand All @@ -58,6 +62,9 @@ public class DepthwiseConv2D(
public val depthMultiplier: Int = 1,
public val depthwiseInitializer: Initializer = HeNormal(),
public val biasInitializer: Initializer = HeUniform(),
public val depthwiseRegularizer: Regularizer? = null,
public val biasRegularizer: Regularizer? = null,
public val activityRegularizer: Regularizer? = null,
public val padding: ConvPadding = ConvPadding.SAME,
public val useBias: Boolean = true,
name: String = ""
Expand Down Expand Up @@ -111,8 +118,15 @@ public class DepthwiseConv2D(
depthwiseKernel = tf.withName(kernelVariableName).variable(depthwiseKernelShape, getDType())
if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())

depthwiseKernel = addWeight(tf, kGraph, kernelVariableName, depthwiseKernel, depthwiseInitializer)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer)
depthwiseKernel = addWeight(
tf,
kGraph,
kernelVariableName,
depthwiseKernel,
depthwiseInitializer,
depthwiseRegularizer
)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer)
}

override fun computeOutputShape(inputShape: Shape): Shape {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.*
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dBiasVarName
Expand Down Expand Up @@ -51,6 +52,10 @@ private const val BIAS_VARIABLE_NAME = "separable_conv2d_bias"
* @property [depthwiseInitializer] An initializer for the depthwise kernel matrix.
* @property [pointwiseInitializer] An initializer for the pointwise kernel matrix.
* @property [biasInitializer] An initializer for the bias vector.
* @property [depthwiseRegularizer] Regularizer function applied to the `depthwise` kernel matrix.
* @property [pointwiseRegularizer] Regularizer function applied to the `pointwise` kernel matrix
* @property [biasRegularizer] Regularizer function applied to the `bias` vector.
* @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
* @property [padding] The padding method, either 'valid' or 'same' or 'full'.
* @property [useBias] If true the layer uses a bias vector.
* @property [name] Custom layer name.
Expand All @@ -68,6 +73,10 @@ public class SeparableConv2D(
public val depthwiseInitializer: Initializer = HeNormal(),
public val pointwiseInitializer: Initializer = HeNormal(),
public val biasInitializer: Initializer = HeUniform(),
public val depthwiseRegularizer: Regularizer? = null,
public val pointwiseRegularizer: Regularizer? = null,
public val biasRegularizer: Regularizer? = null,
public val activityRegularizer: Regularizer? = null,
public val padding: ConvPadding = ConvPadding.SAME,
public val useBias: Boolean = true,
name: String = ""
Expand Down Expand Up @@ -137,9 +146,23 @@ public class SeparableConv2D(
pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType())
if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())

depthwiseKernel = addWeight(tf, kGraph, depthwiseKernelVariableName, depthwiseKernel, depthwiseInitializer)
pointwiseKernel = addWeight(tf, kGraph, pointwiseKernelVariableName, pointwiseKernel, pointwiseInitializer)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer)
depthwiseKernel = addWeight(
tf,
kGraph,
depthwiseKernelVariableName,
depthwiseKernel,
depthwiseInitializer,
depthwiseRegularizer
)
pointwiseKernel = addWeight(
tf,
kGraph,
pointwiseKernelVariableName,
pointwiseKernel,
pointwiseInitializer,
pointwiseRegularizer
)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer)
}

override fun computeOutputShape(inputShape: Shape): Shape {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
import org.jetbrains.kotlinx.dl.api.core.shape.numElements
import org.jetbrains.kotlinx.dl.api.core.util.denseBiasVarName
Expand All @@ -37,8 +38,11 @@ private const val BIAS_VARIABLE_NAME = "dense_bias"
*
* @property [outputSize] Dimensionality of the output space.
* @property [activation] Activation function.
* @property [kernelInitializer] Initializer function for the weight matrix.
* @property [kernelInitializer] Initializer function for the 'kernel' weights matrix.
* @property [biasInitializer] Initializer function for the bias.
* @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix.
* @property [biasRegularizer] Regularizer function applied to the `bias` vector.
* @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation").
* @property [useBias] If true the layer uses a bias vector.
* @property [name] Custom layer name.
* @constructor Creates [Dense] object.
Expand All @@ -48,6 +52,9 @@ public class Dense(
public val activation: Activations = Activations.Relu,
public val kernelInitializer: Initializer = HeNormal(),
public val biasInitializer: Initializer = HeUniform(),
public val kernelRegularizer: Regularizer? = null,
public val biasRegularizer: Regularizer? = null,
public val activityRegularizer: Regularizer? = null,
public val useBias: Boolean = true,
name: String = ""
) : Layer(name) {
Expand All @@ -67,7 +74,7 @@ public class Dense(
fanOut = outputSize

val (kernelVariableName, biasVariableName) = defineVariableNames()
createDepthwiseConv2DVariables(tf, kernelVariableName, biasVariableName, kGraph)
createDenseVariables(tf, kernelVariableName, biasVariableName, kGraph)
}

private fun defineVariableNames(): Pair<String, String> {
Expand All @@ -78,7 +85,7 @@ public class Dense(
}
}

private fun createDepthwiseConv2DVariables(
private fun createDenseVariables(
tf: Ops,
kernelVariableName: String,
biasVariableName: String,
Expand All @@ -87,8 +94,8 @@ public class Dense(
kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())

kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializer)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias, biasInitializer)
kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializer, kernelRegularizer)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias, biasInitializer, biasRegularizer)
}

override fun computeOutputShape(inputShape: Shape): Shape {
Expand Down
Loading