Skip to content

Commit

Permalink
removed unneded interface function (wip)
Browse files Browse the repository at this point in the history
  • Loading branch information
Julio Jerez committed Oct 20, 2023
1 parent a1a9979 commit 1a026a7
Show file tree
Hide file tree
Showing 12 changed files with 68 additions and 82 deletions.
26 changes: 13 additions & 13 deletions newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,19 +350,19 @@ namespace ndQuadruped_1
}

//#pragma optimize( "", off ) //for debugging purpose
void AddExploration(ndBrainFloat* const actions)
{
m_explorationProbability = ndMax(m_explorationProbability - m_explorationAnneliningRate, m_minExplorationProbability);
ndFloat32 explore = ndRand();
if (explore <= m_explorationProbability)
{
for (ndInt32 i = 0; i < m_actionsSize; ++i)
{
ndBrainFloat actionNoise = ndBrainFloat(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(m_actionNoiseVariance)));
actions[i] = actionNoise;
}
}
}
//void AddExploration(ndBrainFloat* const actions)
//{
// m_explorationProbability = ndMax(m_explorationProbability - m_explorationAnneliningRate, m_minExplorationProbability);
// ndFloat32 explore = ndRand();
// if (explore <= m_explorationProbability)
// {
// for (ndInt32 i = 0; i < m_actionsSize; ++i)
// {
// ndBrainFloat actionNoise = ndBrainFloat(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(m_actionNoiseVariance)));
// actions[i] = actionNoise;
// }
// }
//}

//#pragma optimize( "", off ) //for debugging purpose
ndBrainFloat GetReward() const
Expand Down
48 changes: 37 additions & 11 deletions newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,42 @@ namespace ndUnicycle
}

#ifndef D_USE_VANILLA_POLICY_GRAD
void AddExploration(ndBrainFloat* const actions)
//void AddExploration(ndBrainFloat* const actions)
//{
// if (GetEpisodeFrames() >= 15000)
// {
// for (ndInt32 i = 0; i < m_actionsSize; ++i)
// {
// ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(2.0f)));
// ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
// actions[i] = clippiedNoisyAction;
// }
// }
// else if (GetEpisodeFrames() >= 10000)
// {
// for (ndInt32 i = 0; i < m_actionsSize; ++i)
// {
// ndReal gaussianNoise = ndReal(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(1.0f)));
// ndReal clippiedNoisyAction = ndClamp(gaussianNoise, ndReal(-1.0f), ndReal(1.0f));
// actions[i] = clippiedNoisyAction;
// }
// }
//
// for (ndInt32 i = 0; i < m_actionsSize; ++i)
// {
// ndBrainFloat actionNoise = ndBrainFloat(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(m_actionNoiseVariance)));
// actions[i] = actionNoise;
// }
//}
#endif

#ifdef D_USE_VANILLA_POLICY_GRAD
virtual void ApplyActions(ndBrainFloat* const actions) const
{
m_model->ApplyActions(actions);
}
#else
virtual void ApplyActions(ndBrainFloat* const actions) const
{
if (GetEpisodeFrames() >= 15000)
{
Expand All @@ -209,18 +244,9 @@ namespace ndUnicycle
}
}

for (ndInt32 i = 0; i < m_actionsSize; ++i)
{
ndBrainFloat actionNoise = ndBrainFloat(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(m_actionNoiseVariance)));
actions[i] = actionNoise;
}
}
#endif

virtual void ApplyActions(ndBrainFloat* const actions) const
{
m_model->ApplyActions(actions);
}
#endif

void GetObservation(ndBrainFloat* const state) const
{
Expand Down
1 change: 0 additions & 1 deletion newton-4.00/sdk/dBrain/ndBrainAgent.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class ndBrainAgent: public ndClassAlloc

virtual void InitWeights() = 0;
virtual bool IsTrainer() const = 0;
virtual void AddExploration(ndBrainFloat* const actions) = 0;
virtual void InitWeights(ndBrainFloat weighVariance, ndBrainFloat biasVariance) = 0;

protected:
Expand Down
8 changes: 0 additions & 8 deletions newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ class ndBrainAgentContinueVPG: public ndBrainAgent
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
void AddExploration(ndBrainFloat* const actions);

void Save(ndBrainSave* const loadSave) const;

void InitWeights();
Expand Down Expand Up @@ -91,12 +89,6 @@ bool ndBrainAgentContinueVPG<statesDim, actionDim>::IsTerminal() const
return false;
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentContinueVPG<statesDim, actionDim>::AddExploration(ndBrainFloat* const)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainFloat ndBrainAgentContinueVPG<statesDim, actionDim>::GetReward() const
{
Expand Down
9 changes: 1 addition & 8 deletions newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ class ndBrainAgentContinueVPG_Trainer : public ndBrainAgent, public ndBrainThrea
bool IsSampling() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
void AddExploration(ndBrainFloat* const actions);

private:
void Optimize();
Expand Down Expand Up @@ -388,16 +387,10 @@ void ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::Optimize()
BackPropagate();
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::AddExploration(ndBrainFloat* const)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::SelectAction(ndBrainVector& probabilities) const
{
// for now use a constant deviations until teh algorism is stable
// for now use a constant deviations until the algorism is stable
for (ndInt32 i = actionDim - 1; i >= 0; --i)
{
ndBrainFloat sample = ndGaussianRandom(probabilities[i], SIGMA);
Expand Down
7 changes: 0 additions & 7 deletions newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ class ndBrainAgentDDPG: public ndBrainAgent
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
void Save(ndBrainSave* const loadSave) const;
void AddExploration(ndBrainFloat* const actions);

void InitWeights();
void InitWeights(ndBrainFloat weighVariance, ndBrainFloat biasVariance);
Expand Down Expand Up @@ -78,12 +77,6 @@ ndBrainFloat ndBrainAgentDDPG<statesDim, actionDim>::GetReward() const
return ndBrainFloat(0.0f);
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG<statesDim, actionDim>::AddExploration(ndBrainFloat* const)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG<statesDim, actionDim>::ResetModel() const
{
Expand Down
14 changes: 8 additions & 6 deletions newton-4.00/sdk/dBrain/ndBrainAgentDDPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,14 @@ class ndBrainAgentDDPG_Trainer: public ndBrainAgent, public ndBrainThreadPool
bool IsTerminal() const;
ndBrainFloat GetReward() const;
void SetBufferSize(ndInt32 size);
void AddExploration(ndBrainFloat* const actions);
void BackPropagateActor(const ndUnsigned32* const bashIndex);
void BackPropagateCritic(const ndUnsigned32* const bashIndex);

void InitWeights();
void InitWeights(ndBrainFloat weighVariance, ndBrainFloat biasVariance);

void BackPropagate();
void SelectAction(ndBrainFloat* const actions) const;

void CalculateQvalue(const ndBrainVector& state, const ndBrainVector& actions);

Expand Down Expand Up @@ -479,12 +479,14 @@ ndBrainFloat ndBrainAgentDDPG_Trainer<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::AddExploration(ndBrainFloat* const actions)
void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::SelectAction(ndBrainFloat* const actions) const
{
for (ndInt32 i = 0; i < actionDim; ++i)
for (ndInt32 i = actionDim - 1; i >= 0; --i)
{
ndBrainFloat actionNoise = ndBrainFloat(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(m_actionNoiseVariance)));
actions[i] = actionNoise;
ndBrainFloat sample = ndGaussianRandom(actions[i], m_actionNoiseVariance);
//ndBrainFloat squashSample(ndTanh(sample));
ndBrainFloat squashSample = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
actions[i] = squashSample;
}
}

Expand Down Expand Up @@ -518,7 +520,7 @@ void ndBrainAgentDDPG_Trainer<statesDim, actionDim>::Step()
m_actor.MakePrediction(m_currentTransition.m_observation, m_currentTransition.m_action);

// explore environment
AddExploration(&m_currentTransition.m_action[0]);
SelectAction(&m_currentTransition.m_action[0]);
ApplyActions(&m_currentTransition.m_action[0]);

m_currentTransition.m_reward = GetReward();
Expand Down
7 changes: 0 additions & 7 deletions newton-4.00/sdk/dBrain/ndBrainAgentDQN.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ class ndBrainAgentDQN: public ndBrainAgent
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
void AddExploration(ndBrainFloat* const actions);

void Save(ndBrainSave* const loadSave) const;

Expand Down Expand Up @@ -112,12 +111,6 @@ void ndBrainAgentDQN<statesDim, actionDim>::Save(ndBrainSave* const) const
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDQN<statesDim, actionDim>::AddExploration(ndBrainFloat* const)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDQN<statesDim, actionDim>::OptimizeStep()
{
Expand Down
2 changes: 1 addition & 1 deletion newton-4.00/sdk/dBrain/ndBrainAgentDQN_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,13 @@ class ndBrainAgentDQN_Trainer: public ndBrainAgent, public ndBrainThreadPool
bool IsSampling() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
void AddExploration(ndBrainFloat* const actions);

private:
void Optimize();
void BackPropagate();
void PopulateReplayBuffer();
void SetBufferSize(ndInt32 size);
void AddExploration(ndBrainFloat* const actions);

protected:
ndBrain m_actor;
Expand Down
7 changes: 0 additions & 7 deletions newton-4.00/sdk/dBrain/ndBrainAgentDiscreteVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ class ndBrainAgentDiscreteVPG: public ndBrainAgent
bool IsTerminal() const;
ndBrainFloat GetReward() const;
ndInt32 GetEpisodeFrames() const;
void AddExploration(ndBrainFloat* const actions);

void Save(ndBrainSave* const loadSave) const;

Expand Down Expand Up @@ -91,12 +90,6 @@ bool ndBrainAgentDiscreteVPG<statesDim, actionDim>::IsTerminal() const
return false;
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDiscreteVPG<statesDim, actionDim>::AddExploration(ndBrainFloat* const)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainFloat ndBrainAgentDiscreteVPG<statesDim, actionDim>::GetReward() const
{
Expand Down
7 changes: 0 additions & 7 deletions newton-4.00/sdk/dBrain/ndBrainAgentDiscreteVPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ class ndBrainAgentDiscreteVPG_Trainer : public ndBrainAgent, public ndBrainThrea
bool IsSampling() const;
bool IsTerminal() const;
ndBrainFloat GetReward() const;
void AddExploration(ndBrainFloat* const actions);

private:
void Optimize();
Expand Down Expand Up @@ -396,12 +395,6 @@ void ndBrainAgentDiscreteVPG_Trainer<statesDim, actionDim>::Optimize()
BackPropagate();
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentDiscreteVPG_Trainer<statesDim, actionDim>::AddExploration(ndBrainFloat* const)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainFloat ndBrainAgentDiscreteVPG_Trainer<statesDim, actionDim>::SelectAction(const ndBrainVector& probabilities) const
{
Expand Down
14 changes: 8 additions & 6 deletions newton-4.00/sdk/dBrain/ndBrainAgentTD3_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class ndBrainAgentTD3_Trainer : public ndBrainAgent, public ndBrainThreadPool
void Save(ndBrainSave* const loadSave) const;

void BackPropagate();
void AddExploration(ndBrainFloat* const actions);
void SelectAction(ndBrainFloat* const actions) const;
void BackPropagateActor(const ndUnsigned32* const bashIndex);
void BackPropagateCritic(const ndUnsigned32* const bashIndex);
void CalculateQvalue(const ndBrainVector& state, const ndBrainVector& actions);
Expand Down Expand Up @@ -349,12 +349,14 @@ ndBrainFloat ndBrainAgentTD3_Trainer<statesDim, actionDim>::GetReward() const
}

template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentTD3_Trainer<statesDim, actionDim>::AddExploration(ndBrainFloat* const actions)
void ndBrainAgentTD3_Trainer<statesDim, actionDim>::SelectAction(ndBrainFloat* const actions) const
{
for (ndInt32 i = 0; i < actionDim; ++i)
for (ndInt32 i = actionDim - 1; i >= 0; --i)
{
ndBrainFloat actionNoise = ndBrainFloat(ndGaussianRandom(ndFloat32(actions[i]), ndFloat32(m_actionNoiseVariance)));
actions[i] = actionNoise;
ndBrainFloat sample = ndGaussianRandom(actions[i], m_actionNoiseVariance);
//ndBrainFloat squashSample(ndTanh(sample));
ndBrainFloat squashSample = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
actions[i] = squashSample;
}
}

Expand Down Expand Up @@ -555,7 +557,7 @@ void ndBrainAgentTD3_Trainer<statesDim, actionDim>::Step()
m_actor.MakePrediction(m_currentTransition.m_observation, m_currentTransition.m_action);

// explore environment
AddExploration(&m_currentTransition.m_action[0]);
SelectAction(&m_currentTransition.m_action[0]);
ApplyActions(&m_currentTransition.m_action[0]);

m_currentTransition.m_reward = GetReward();
Expand Down

0 comments on commit 1a026a7

Please sign in to comment.