Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
import org.tensorflow.op.core.Variable
import org.tensorflow.op.train.ApplyAdadelta
Expand Down Expand Up @@ -51,9 +48,6 @@ public class AdaDelta(
private val epsilon: Float = 1e-8f,
clipGradient: ClipGradientAction = NoClipGradient()
) : Optimizer(clipGradient) {
private lateinit var epsilonConstant: Constant<Float>
private lateinit var learningRateConst: Constant<Float>
private lateinit var rhoConst: Constant<Float>

init {
require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." }
Expand All @@ -67,18 +61,16 @@ public class AdaDelta(
weights: List<Variable<Float>>,
gradients: Gradients
): List<Operand<Float>> {
val targets: MutableList<Operand<Float>> =
ArrayList()
rhoConst = tf.constant(rho, getDType())
learningRateConst = tf.constant(learningRate, getDType())
epsilonConstant = tf.constant(epsilon, getDType())
val targets = mutableListOf<Operand<Float>>()

for (i in weights.indices) {
val variable = weights[i]
val varName = variable.ref().op().name()
val rhoConst = tf.constant(rho, getDType())
val learningRateConst = tf.constant(learningRate, getDType())
val epsilonConstant = tf.constant(epsilon, getDType())

val accumSlot: Variable<Float> = getSlot(varName, ACCUMULATOR)
val accumUpdateSlot: Variable<Float> = getSlot(varName, ACCUMULATOR_UPDATE)
for ((i, variable) in weights.withIndex()) {
val output = variable.asOutput()
val accumSlot = createSlot(ACCUMULATOR, output, tf, graph)
val accumUpdateSlot = createSlot(ACCUMULATOR_UPDATE, output, tf, graph)

targets.add(
tf.train.applyAdadelta(
Expand All @@ -95,24 +87,6 @@ public class AdaDelta(
return targets
}

private fun createAdaDeltaSlot(graph: KGraph, tf: Ops, v: Output<Float>) {
val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))
val accumulatorInitializer = tf.withName(accumInitializerName)
.fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType()))
createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumulatorInitializer)

val accumUpdateInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR_UPDATE))
val updateInitializer: Operand<Float> = tf.withName(accumUpdateInitializerName)
.fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType()))
createSlot(graph, tf, v.asOutput(), ACCUMULATOR_UPDATE, updateInitializer)
}

override fun createSlots(graph: KGraph, tf: Ops, variables: List<Output<Float>>) {
for (v in variables) {
createAdaDeltaSlot(graph, tf, v.asOutput())
}
}

override val optimizerName: String get() = "Adadelta"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
package org.jetbrains.kotlinx.dl.api.core.optimizer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
import org.tensorflow.op.core.Variable
import org.tensorflow.op.train.ApplyAdagrad
Expand Down Expand Up @@ -44,8 +41,6 @@ public class AdaGrad(
private val initialAccumulatorValue: Float = 0.01f,
clipGradient: ClipGradientAction = NoClipGradient()
) : Optimizer(clipGradient) {
private lateinit var initialAccumulatorValueConstant: Constant<Float>
private lateinit var learningRateConst: Constant<Float>

init {
require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." }
Expand All @@ -58,17 +53,12 @@ public class AdaGrad(
weights: List<Variable<Float>>,
gradients: Gradients
): List<Operand<Float>> {
val targets: MutableList<Operand<Float>> =
ArrayList()
val targets = mutableListOf<Operand<Float>>()

initialAccumulatorValueConstant = tf.constant(initialAccumulatorValue, getDType())
learningRateConst = tf.constant(learningRate, getDType())
val learningRateConst = tf.constant(learningRate, getDType())

for (i in weights.indices) {
val variable = weights[i]
val varName = variable.ref().op().name()

val slot: Variable<Float> = getSlot(varName, ACCUMULATOR)
for ((i, variable) in weights.withIndex()) {
val slot = createSlot(ACCUMULATOR, variable.asOutput(), tf, graph, initialValue = initialAccumulatorValue)

targets.add(
tf.train.applyAdagrad(
Expand All @@ -84,20 +74,6 @@ public class AdaGrad(
return targets
}

private fun createAdaGradSlot(graph: KGraph, tf: Ops, v: Output<Float>) {
val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))

val initializer: Operand<Float> = tf.withName(accumInitializerName)
.fill(tf.shape(v), tf.constant(initialAccumulatorValue))
createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer)
}

override fun createSlots(graph: KGraph, tf: Ops, variables: List<Output<Float>>) {
for (v in variables) {
createAdaGradSlot(graph, tf, v.asOutput())
}
}

override val optimizerName: String get() = "Adagrad"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.Shape
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Assign
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
import org.tensorflow.op.core.Variable
import org.tensorflow.op.train.ApplyAdagradDa
Expand Down Expand Up @@ -52,10 +50,6 @@ public class AdaGradDA(
private val l2Strength: Float = 0.01f,
clipGradient: ClipGradientAction = NoClipGradient()
) : Optimizer(clipGradient) {
private lateinit var learningRateConst: Constant<Float>
private lateinit var l1StrengthConst: Constant<Float>
private lateinit var l2StrengthConst: Constant<Float>
private lateinit var globalStep: Variable<Float>

init {
require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." }
Expand All @@ -70,19 +64,22 @@ public class AdaGradDA(
weights: List<Variable<Float>>,
gradients: Gradients
): List<Operand<Float>> {
val targets: MutableList<Operand<Float>> =
ArrayList()
learningRateConst = tf.constant(learningRate, getDType())
l1StrengthConst = tf.constant(l1Strength, getDType())
l2StrengthConst = tf.constant(l2Strength, getDType())
val targets = mutableListOf<Operand<Float>>()

for (i in weights.indices) {
val variable = weights[i]
val varName = variable.ref().op().name()
val learningRateConst = tf.constant(learningRate, getDType())
val l1StrengthConst = tf.constant(l1Strength, getDType())
val l2StrengthConst = tf.constant(l2Strength, getDType())

val gradSlot: Variable<Float> = getSlot(varName, ACCUMULATOR)
val gradSquaredSlot: Variable<Float> = getSlot(varName, SQUARED_ACCUMULATOR)
val globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType())
val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP)
val globalStepInit: Assign<*> = tf.withName(globalStepAssignName)
.assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f))
graph.addOptimizerVariableInitializer(globalStepInit)

for ((i, variable) in weights.withIndex()) {
val output = variable.asOutput()
val gradSlot = createSlot(ACCUMULATOR, output, tf, graph)
val gradSquaredSlot = createSlot(SQUARED_ACCUMULATOR, output, tf, graph)
targets.add(
tf.train.applyAdagradDa(
variable,
Expand All @@ -104,30 +101,6 @@ public class AdaGradDA(
return targets
}

private fun createAdaGradDASlot(graph: KGraph, tf: Ops, v: Output<Float>) {
val accumulatorInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR))
val accumInitializer: Operand<Float> = tf.withName(accumulatorInitializerName)
.fill(tf.shape(v), tf.constant(0.0f))
createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer)

val squareAccumInitializerName = defaultInitializerOpName(createName(v, SQUARED_ACCUMULATOR))
val sqInitializer: Operand<Float> = tf.withName(squareAccumInitializerName)
.fill(tf.shape(v), tf.constant(initialAccumulatorValue))

createSlot(graph, tf, v.asOutput(), SQUARED_ACCUMULATOR, sqInitializer)
}

override fun createSlots(graph: KGraph, tf: Ops, variables: List<Output<Float>>) {
for (v in variables) {
createAdaGradDASlot(graph, tf, v.asOutput())
}
globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType())
val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP)
val globalStepInit: Assign<*> = tf.withName(globalStepAssignName)
.assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f))
graph.addOptimizerVariableInitializer(globalStepInit)
}

override val optimizerName: String get() = "AdaGradDA"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName
import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName
import org.jetbrains.kotlinx.dl.api.core.util.getDType
import org.tensorflow.Operand
import org.tensorflow.Output
import org.tensorflow.Shape
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Assign
import org.tensorflow.op.core.Constant
import org.tensorflow.op.core.Gradients
import org.tensorflow.op.core.Variable
import org.tensorflow.op.train.ApplyAdam
Expand Down Expand Up @@ -53,13 +51,6 @@ public class Adam(
clipGradient: ClipGradientAction = NoClipGradient()
) : Optimizer(clipGradient) {

private lateinit var epsilonConstant: Constant<Float>
private lateinit var learningRateConst: Constant<Float>
private lateinit var betaOneConst: Constant<Float>
private lateinit var betaTwoConst: Constant<Float>
private lateinit var betaOnePower: Variable<Float>
private lateinit var betaTwoPower: Variable<Float>

init {
require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." }
require(beta1 > 0.0f && beta1 < 1.0f) { "Beta1 $beta1 should be in range (0.0; 1.0)." }
Expand All @@ -73,22 +64,35 @@ public class Adam(
weights: List<Variable<Float>>,
gradients: Gradients
): List<Operand<Float>> {
val targets: MutableList<Operand<Float>> =
ArrayList()

betaOneConst = tf.constant(beta1, getDType())
betaTwoConst = tf.constant(beta2, getDType())
learningRateConst = tf.constant(learningRate, getDType())
epsilonConstant = tf.constant(epsilon, getDType())
val targets = mutableListOf<Operand<Float>>()

for (i in weights.indices) {
val betaOneConst = tf.constant(beta1, getDType())
val betaTwoConst = tf.constant(beta2, getDType())
val learningRateConst = tf.constant(learningRate, getDType())
val epsilonConstant = tf.constant(epsilon, getDType())

val variable = weights[i]
val varName = variable.ref().op().name()
val betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType())
val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME)
val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName)
.assign(
betaOnePower,
tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType())
)
graph.addOptimizerVariableInitializer(betaOnePowerInit)

val firstMomentSlot: Variable<Float> = getSlot(varName, FIRST_MOMENT)
val secondMomentSlot: Variable<Float> = getSlot(varName, SECOND_MOMENT)
val betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType())
val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME)
val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName)
.assign(
betaTwoPower,
tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType())
)
graph.addOptimizerVariableInitializer(betaTwoPowerInit)

for ((i, variable) in weights.withIndex()) {
val output = variable.asOutput()
val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph)
val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph)
targets.add(
tf.train.applyAdam(
variable,
Expand Down Expand Up @@ -122,44 +126,6 @@ public class Adam(
return targets
}

private fun createAdamSlot(graph: KGraph, tf: Ops, v: Output<Float>) {
val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT))
val firstMomentInitializer =
tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType()))
createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer)

val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT))
val secondMomentInitializer =
tf.withName(secondMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType()))
createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer)
}

override fun createSlots(graph: KGraph, tf: Ops, variables: List<Output<Float>>) {
for (v in variables) {
createAdamSlot(graph, tf, v.asOutput())
}
betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType())

val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME)
val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName)
.assign(
betaOnePower,
tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType())
)
graph.addOptimizerVariableInitializer(betaOnePowerInit)


betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType())

val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME)
val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName)
.assign(
betaTwoPower,
tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType())
)
graph.addOptimizerVariableInitializer(betaTwoPowerInit)
}

override val optimizerName: String get() = "Adam"

override val isRunningOnGPU: Boolean get() = true
Expand Down
Loading