diff --git a/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp b/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp index ef6c1dc830..b4e4f46dca 100644 --- a/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp +++ b/newton-4.00/applications/ndSandbox/toolbox/ndTestDeepBrain.cpp @@ -517,32 +517,35 @@ static void MnistTrainingSet() if (trainingLabels && trainingDigits) { ndBrain brain; - ndInt32 neuronsPerLayers = 64; + ndFixSizeArray layers; #ifdef D_USE_CONVOLUTIONAL_LAYERS - //layers.PushBack(new ndBrainLayerLinear(trainingDigits->GetColumns(), neuronsPerLayers)); - ndInt32 width = trainingDigits->GetColumns() / 28; ndInt32 height = 28; + ndInt32 width = trainingDigits->GetColumns() / height; ndAssert((height * width) == trainingDigits->GetColumns()); - layers.PushBack(new ndBrainLayerConvolutional(28, width, height, 1, 5, 16)); - //layers.PushBack(new ndBrainLayerConvolutional(28, 3, 3, 3, 2, 2)); + layers.PushBack(new ndBrainLayerConvolutional(width, width, height, 1, 5, 1)); + layers.PushBack(new ndBrainLayerReluActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + + layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), trainingLabels->GetColumns())); + layers.PushBack(new ndBrainLayerCategoricalSoftmaxActivation(layers[layers.GetCount() - 1]->GetOutputSize())); - //layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); #else + ndInt32 neuronsPerLayers = 64; + layers.PushBack(new ndBrainLayerLinear(trainingDigits->GetColumns(), neuronsPerLayers)); layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + + layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), neuronsPerLayers)); + layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + + layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), neuronsPerLayers)); + layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); + + layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), trainingLabels->GetColumns())); + layers.PushBack(new ndBrainLayerCategoricalSoftmaxActivation(layers[layers.GetCount() - 1]->GetOutputSize())); #endif - //layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), neuronsPerLayers)); - //layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); - // - //layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), neuronsPerLayers)); - //layers.PushBack(new ndBrainLayerApproximateTanhActivation(layers[layers.GetCount() - 1]->GetOutputSize())); - // - //layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), trainingLabels->GetColumns())); - ////layers.PushBack(new ndBrainLayerSoftmaxActivation(layers[layers.GetCount() - 1]->GetOutputSize())); - //layers.PushBack(new ndBrainLayerCategoricalSoftmaxActivation(layers[layers.GetCount() - 1]->GetOutputSize())); for (ndInt32 i = 0; i < layers.GetCount(); ++i) { diff --git a/newton-4.00/sdk/dBrain/ndBrainLayer.h b/newton-4.00/sdk/dBrain/ndBrainLayer.h index bfe33ba13c..87b9998b18 100644 --- a/newton-4.00/sdk/dBrain/ndBrainLayer.h +++ b/newton-4.00/sdk/dBrain/ndBrainLayer.h @@ -44,17 +44,13 @@ class ndBrainLayer : public ndClassAlloc virtual ndInt32 GetInputSize() const; virtual ndInt32 GetOutputSize() const; - virtual void Blend(const ndBrainLayer& src, ndBrainFloat blend); - - //virtual ndBrainVector* GetBias(); - //virtual ndBrainMatrix* GetWeights(); - virtual void Clear(); virtual void FlushToZero(); virtual void Scale(ndBrainFloat scale); virtual void Set(const ndBrainLayer& src); virtual void Add(const ndBrainLayer& src); virtual void Mul(const ndBrainLayer& src); + virtual void Blend(const ndBrainLayer& src, ndBrainFloat blend); virtual void ScaleAdd(const ndBrainLayer& src, ndBrainFloat scale); virtual void InitWeightsXavierMethod(); diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerActivation.h b/newton-4.00/sdk/dBrain/ndBrainLayerActivation.h index 2e5c1a3e2e..a0d61f4593 100644 --- a/newton-4.00/sdk/dBrain/ndBrainLayerActivation.h +++ b/newton-4.00/sdk/dBrain/ndBrainLayerActivation.h @@ -39,8 +39,17 @@ class ndBrainLayerActivation : public ndBrainLayer virtual ndInt32 GetOutputSize() const; virtual ndInt32 GetInputSize() const; virtual const char* GetLabelId() const; - virtual void Blend(const ndBrainLayer& src, ndBrainFloat blend); + protected: + void Clear(); + void FlushToZero(); + void Scale(ndBrainFloat scale); + void Set(const ndBrainLayer& src); + void Add(const ndBrainLayer& src); + void Mul(const ndBrainLayer& src); + void ScaleAdd(const ndBrainLayer& src, ndBrainFloat scale); + virtual void Blend(const ndBrainLayer& src, ndBrainFloat blend); + virtual void InitWeightsXavierMethod(); virtual void InitWeights(ndBrainFloat weighVariance, ndBrainFloat biasVariance); @@ -51,18 +60,10 @@ class ndBrainLayerActivation : public ndBrainLayer const ndBrainVector& input, const ndBrainVector& output, const ndBrainVector& outputDerivative, ndBrainVector& inputGradient, ndBrainLayer* const gradientOut) const; - virtual void Save(const ndBrainSave* const loadSave) const; + void AdamUpdate(const ndBrainLayer& u, const ndBrainLayer& v, ndBrainFloat epsilon); - void Clear(); - void FlushToZero(); - void Scale(ndBrainFloat scale); - void Set(const ndBrainLayer& src); - void Add(const ndBrainLayer& src); - void Mul(const ndBrainLayer& src); - void ScaleAdd(const ndBrainLayer& src, ndBrainFloat scale); + virtual void Save(const ndBrainSave* const loadSave) const; - void AdamUpdate(const ndBrainLayer& u, const ndBrainLayer& v, ndBrainFloat epsilon); - protected: ndInt32 m_neurons; }; diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.cpp b/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.cpp index afefd3592e..61fd24dbe0 100644 --- a/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.cpp @@ -38,14 +38,12 @@ ndBrainLayerConvolutional::ndBrainLayerConvolutional(ndInt32 inputStride, ndInt3 m_outputHeight = m_inputHeight - m_kernelSize + 1; m_bias.SetCount(m_numberOfKernels); - for (ndInt32 i = 0; i < m_numberOfKernels; ++i) - { - m_bias[i] = new ndBrainMatrix(m_outputWidth, m_outputHeight); - } + //for (ndInt32 i = 0; i < m_numberOfKernels; ++i) + //{ + // m_bias[i] = new ndBrainMatrix(m_outputWidth, m_outputHeight); + //} - ndAssert(0); - //m_weights.SetCount(m_numberOfKernels * m_inputDepth); - //m_weights.SetCount(m_numberOfKernels * (m_inputDepth * ); + m_weights.SetCount(m_numberOfKernels * m_inputDepth); for (ndInt32 i = 0; i < m_numberOfKernels * m_inputDepth; ++i) { m_weights[i] = new ndBrainMatrix(m_kernelSize, m_kernelSize); @@ -66,17 +64,16 @@ ndBrainLayerConvolutional::ndBrainLayerConvolutional(const ndBrainLayerConvoluti ,m_outputHeight(src.m_outputHeight) { m_bias.SetCount(m_numberOfKernels); - for (ndInt32 i = 0; i < m_numberOfKernels; ++i) - { - //m_bias[i] = new ndBrainMatrix(width, height); - m_bias[i] = new ndBrainMatrix(m_kernelSize, m_kernelSize); - m_bias[i]->Set(*src.m_bias[i]); - } + //for (ndInt32 i = 0; i < m_numberOfKernels; ++i) + //{ + // //m_bias[i] = new ndBrainMatrix(width, height); + // m_bias[i] = new ndBrainMatrix(m_kernelSize, m_kernelSize); + // m_bias[i]->Set(*src.m_bias[i]); + //} m_weights.SetCount(m_numberOfKernels * m_inputDepth); for (ndInt32 i = 0; i < m_numberOfKernels * m_inputDepth; ++i) { - //m_weights[i] = new ndBrainMatrix(width, height); m_weights[i] = new ndBrainMatrix(m_kernelSize, m_kernelSize); m_weights[i]->Set(*src.m_weights[i]); } @@ -85,10 +82,10 @@ ndBrainLayerConvolutional::ndBrainLayerConvolutional(const ndBrainLayerConvoluti ndBrainLayerConvolutional::~ndBrainLayerConvolutional() { ndAssert(0); - for (ndInt32 i = 0; i < m_bias.GetCount(); ++i) - { - delete (m_bias[i]); - } + //for (ndInt32 i = 0; i < m_bias.GetCount(); ++i) + //{ + // delete (m_bias[i]); + //} for (ndInt32 i = 0; i < m_weights.GetCount(); ++i) { @@ -148,14 +145,15 @@ void ndBrainLayerConvolutional::InitWeightsXavierMethod() void ndBrainLayerConvolutional::InitGaussianBias(ndBrainFloat variance) { - for (ndInt32 i = m_bias.GetCount() - 1; i >= 0; --i) - { - ndBrainMatrix& biasMatrix = *m_bias[i]; - for (ndInt32 j = biasMatrix.GetRows() - 1; j >= 0 ; --j) - { - biasMatrix[j].InitGaussianWeights(variance); - } - } + //for (ndInt32 i = m_bias.GetCount() - 1; i >= 0; --i) + //{ + // ndBrainMatrix& biasMatrix = *m_bias[i]; + // for (ndInt32 j = biasMatrix.GetRows() - 1; j >= 0 ; --j) + // { + // biasMatrix[j].InitGaussianWeights(variance); + // } + //} + m_bias.InitGaussianWeights(variance); } void ndBrainLayerConvolutional::InitGaussianWeights(ndBrainFloat variance) @@ -185,11 +183,12 @@ void ndBrainLayerConvolutional::Set(const ndBrainLayer& src) //m_bias.Set(linearSrc.m_bias); //m_weights.Set(linearSrc.m_weights); - for (ndInt32 i = 0; i < m_numberOfKernels; ++i) - { - m_bias[i]->Set(*convSrc.m_bias[i]); - } + //for (ndInt32 i = 0; i < m_numberOfKernels; ++i) + //{ + // m_bias[i]->Set(*convSrc.m_bias[i]); + //} + m_bias.Set(convSrc.m_bias); for (ndInt32 i = 0; i < m_numberOfKernels * m_inputDepth; ++i) { m_weights[i]->Set(*convSrc.m_weights[i]); @@ -210,27 +209,6 @@ void ndBrainLayerConvolutional::InputDerivative(const ndBrainVector&, const ndBr ndAssert(0); } -//void ndBrainLayerConvolutional::CalculateParamGradients(const ndBrainVector& input, const ndBrainVector& output, const ndBrainVector& outputDerivative, -// ndBrainVector& inputGradient, ndBrainVector& biasGradient, ndBrainMatrix& weightGradient) -//{ -// //ndAssert(biasGradient.GetCount() == outputDerivative.GetCount()); -// //biasGradient.Set(outputDerivative); -// //for (ndInt32 i = outputDerivative.GetCount() - 1; i >= 0 ; --i) -// //{ -// // ndBrainFloat value = outputDerivative[i]; -// // weightGradient[i].ScaleSet(input, value); -// //} -// //InputDerivative(output, outputDerivative, inputGradient); -// ndAssert(0); -//} - -void ndBrainLayerConvolutional::CalculateParamGradients( - const ndBrainVector& input, const ndBrainVector& output, - const ndBrainVector& outputDerivative, ndBrainVector& inputGradient, ndBrainLayer* const gradientOut) const -{ - ndAssert(0); -} - void ndBrainLayerConvolutional::Save(const ndBrainSave* const loadSave) const { //char buffer[1024]; @@ -305,49 +283,68 @@ ndBrainLayer* ndBrainLayerConvolutional::Load(const ndBrainLoad* const loadSave) return nullptr; } +void ndBrainLayerConvolutional::CalculateParamGradients( + const ndBrainVector& input, const ndBrainVector& output, + const ndBrainVector& outputDerivative, ndBrainVector& inputGradient, ndBrainLayer* const gradientOut) const +{ + ndAssert(0); + //ndAssert(!strcmp(GetLabelId(), gradientOut->GetLabelId())); + //ndBrainLayerLinear* const gradients = (ndBrainLayerLinear*)gradientOut; + //ndAssert(gradients->m_bias.GetCount() == outputDerivative.GetCount()); + //gradients->m_bias.Set(outputDerivative); + //for (ndInt32 i = outputDerivative.GetCount() - 1; i >= 0; --i) + //{ + // ndBrainFloat value = outputDerivative[i]; + // gradients->m_weights[i].ScaleSet(input, value); + //} + //InputDerivative(output, outputDerivative, inputGradient); +} + void ndBrainLayerConvolutional::MakePrediction(const ndBrainVector& input, ndBrainVector& output) const { //m_weights.Mul(input, output); //output.Add(m_bias); - output.Set(ndBrainFloat(ndFloat32(0.0f))); + //output.Set(ndBrainFloat(ndFloat32(0.0f))); //ndInt32 inputStart = 0; - ndInt32 outputStart = 0; -float xxxxx = 0; - for (ndInt32 i = 0; i < m_numberOfKernels; ++i) - { - const ndBrainMatrix& bias = *m_bias[i]; - for (ndInt32 j = 0; j < m_outputHeight; ++j) - { - ndBrainMemVector out(&output[outputStart], m_outputWidth); - out.Set(bias[j]); - - for (ndInt32 k = 0; k < m_outputWidth; ++k) - { - out[k] = xxxxx; - xxxxx += 1.0f; - } - outputStart += m_outputWidth; - } - - //for (ndInt32 j = 0; j < m_inputDepth; ++j) - //{ - // const ndBrainMatrix& weight = *m_weights[j]; - // - // for (ndInt32 k = 0; k < m_outputWidth; ++k) - // { - // for (ndInt32 n = 0; n < m_outputHeight; ++n) - // { - // - // } - // } - // - // //for (ndInt32 k = 0; k < m_kernelSize; ++k) - // //{ - // // const ndBrainVector& kernelRow = weight[j]; - // // const ndBrainMemVector in(&input[inputStart], m_inputWidth); - // // //inputStart += m_inputStride; - // //} - //} - } + //ndInt32 outputStart = 0; + ndAssert(0); + output.InitGaussianWeights(0.1f); +//float xxxxx = 0; +// for (ndInt32 i = 0; i < m_numberOfKernels; ++i) +// { +// const ndBrainMatrix& bias = *m_bias[i]; +// for (ndInt32 j = 0; j < m_outputHeight; ++j) +// { +// ndBrainMemVector out(&output[outputStart], m_outputWidth); +// out.Set(bias[j]); +// +// for (ndInt32 k = 0; k < m_outputWidth; ++k) +// { +// out[k] = xxxxx; +// xxxxx += 1.0f; +// } +// outputStart += m_outputWidth; +// } +// +// //for (ndInt32 j = 0; j < m_inputDepth; ++j) +// //{ +// // const ndBrainMatrix& weight = *m_weights[j]; +// // +// // for (ndInt32 k = 0; k < m_outputWidth; ++k) +// // { +// // for (ndInt32 n = 0; n < m_outputHeight; ++n) +// // { +// // +// // } +// // } +// // +// // //for (ndInt32 k = 0; k < m_kernelSize; ++k) +// // //{ +// // // const ndBrainVector& kernelRow = weight[j]; +// // // const ndBrainMemVector in(&input[inputStart], m_inputWidth); +// // // //inputStart += m_inputStride; +// // //} +// //} +// } } \ No newline at end of file diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.h b/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.h index b247975663..e382a71459 100644 --- a/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.h +++ b/newton-4.00/sdk/dBrain/ndBrainLayerConvolutional.h @@ -61,7 +61,8 @@ class ndBrainLayerConvolutional : public ndBrainLayer void InitGaussianBias(ndBrainFloat variance); void InitGaussianWeights(ndBrainFloat variance); - ndArray m_bias; + //ndArray m_bias; + ndBrainVector m_bias; ndArray m_weights; ndInt32 m_inputWidth; @@ -75,7 +76,5 @@ class ndBrainLayerConvolutional : public ndBrainLayer ndInt32 m_outputHeight; }; - - #endif diff --git a/newton-4.00/sdk/dBrain/ndBrainLayerLinear.cpp b/newton-4.00/sdk/dBrain/ndBrainLayerLinear.cpp index 805d1afede..389b7bbedd 100644 --- a/newton-4.00/sdk/dBrain/ndBrainLayerLinear.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainLayerLinear.cpp @@ -198,35 +198,6 @@ void ndBrainLayerLinear::InputDerivative(const ndBrainVector&, const ndBrainVect m_weights.TransposeMul(outputDerivative, inputDerivative); } -//void ndBrainLayerLinear::CalculateParamGradients(const ndBrainVector& input, const ndBrainVector& output, const ndBrainVector& outputDerivative, -// ndBrainVector& inputGradient, ndBrainVector& biasGradient, ndBrainMatrix& weightGradient) -//{ -// ndAssert(biasGradient.GetCount() == outputDerivative.GetCount()); -// biasGradient.Set(outputDerivative); -// for (ndInt32 i = outputDerivative.GetCount() - 1; i >= 0 ; --i) -// { -// ndBrainFloat value = outputDerivative[i]; -// weightGradient[i].ScaleSet(input, value); -// } -// InputDerivative(output, outputDerivative, inputGradient); -//} - -void ndBrainLayerLinear::CalculateParamGradients( - const ndBrainVector& input, const ndBrainVector& output, - const ndBrainVector& outputDerivative, ndBrainVector& inputGradient, ndBrainLayer* const gradientOut) const -{ - ndAssert(!strcmp(GetLabelId(), gradientOut->GetLabelId())); - ndBrainLayerLinear* const gradients = (ndBrainLayerLinear*)gradientOut; - ndAssert(gradients->m_bias.GetCount() == outputDerivative.GetCount()); - gradients->m_bias.Set(outputDerivative); - for (ndInt32 i = outputDerivative.GetCount() - 1; i >= 0; --i) - { - ndBrainFloat value = outputDerivative[i]; - gradients->m_weights[i].ScaleSet(input, value); - } - InputDerivative(output, outputDerivative, inputGradient); -} - void ndBrainLayerLinear::Save(const ndBrainSave* const loadSave) const { char buffer[1024]; @@ -295,3 +266,33 @@ ndBrainLayer* ndBrainLayerLinear::Load(const ndBrainLoad* const loadSave) loadSave->ReadString(buffer); return layer; } + + +//void ndBrainLayerLinear::CalculateParamGradients(const ndBrainVector& input, const ndBrainVector& output, const ndBrainVector& outputDerivative, +// ndBrainVector& inputGradient, ndBrainVector& biasGradient, ndBrainMatrix& weightGradient) +//{ +// ndAssert(biasGradient.GetCount() == outputDerivative.GetCount()); +// biasGradient.Set(outputDerivative); +// for (ndInt32 i = outputDerivative.GetCount() - 1; i >= 0 ; --i) +// { +// ndBrainFloat value = outputDerivative[i]; +// weightGradient[i].ScaleSet(input, value); +// } +// InputDerivative(output, outputDerivative, inputGradient); +//} + +void ndBrainLayerLinear::CalculateParamGradients( + const ndBrainVector& input, const ndBrainVector& output, + const ndBrainVector& outputDerivative, ndBrainVector& inputGradient, ndBrainLayer* const gradientOut) const +{ + ndAssert(!strcmp(GetLabelId(), gradientOut->GetLabelId())); + ndBrainLayerLinear* const gradients = (ndBrainLayerLinear*)gradientOut; + ndAssert(gradients->m_bias.GetCount() == outputDerivative.GetCount()); + gradients->m_bias.Set(outputDerivative); + for (ndInt32 i = outputDerivative.GetCount() - 1; i >= 0; --i) + { + ndBrainFloat value = outputDerivative[i]; + gradients->m_weights[i].ScaleSet(input, value); + } + InputDerivative(output, outputDerivative, inputGradient); +}