Skip to content

Commit

Permalink
more calibration of contionue VPG (wip)
Browse files Browse the repository at this point in the history
  • Loading branch information
Julio Jerez committed Oct 19, 2023
1 parent ee9062d commit 2628d26
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
namespace ndCarpole_1
{
#define D_TRAIN_AGENT

#define D_USE_VANILLA_POLICY_GRAD
//#define D_USE_PROXIMA_POLICY_GRAD

Expand Down Expand Up @@ -61,8 +62,8 @@ namespace ndCarpole_1
public:
#ifdef D_USE_VANILLA_POLICY_GRAD
ndCartpoleAgent(ndSharedPtr<ndBrain>& actor)
: ndBrainAgentDiscreteVPG<m_stateSize, m_actionsSize>(actor)
, m_model(nullptr)
:ndBrainAgentDiscreteVPG<m_stateSize, m_actionsSize>(actor)
,m_model(nullptr)
{
}
#else
Expand Down Expand Up @@ -102,7 +103,7 @@ namespace ndCarpole_1
,m_timer(ndGetTimeInMicroseconds())
,m_maxGain(ndFloat32(-1.0e10f))
,m_maxFrames(5000)
,m_stopTraining(500000)
,m_stopTraining(5000000)
{
m_outFile = fopen("cartpole-DDPG.csv", "wb");
fprintf(m_outFile, "ddpg\n");
Expand Down
1 change: 0 additions & 1 deletion newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ void ndBrainAgentContinueVPG<statesDim, actionDim>::OptimizeStep()
template<ndInt32 statesDim, ndInt32 actionDim>
void ndBrainAgentContinueVPG<statesDim, actionDim>::Step()
{
ndAssert(0);
ndBrainFixSizeVector<actionDim> actions;
ndBrainFixSizeVector<statesDim> observations;

Expand Down
17 changes: 7 additions & 10 deletions newton-4.00/sdk/dBrain/ndBrainAgentContinueVPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ template<ndInt32 statesDim, ndInt32 actionDim>
class ndBrainAgentContinueVPG_Trainer : public ndBrainAgent, public ndBrainThreadPool
{
public:
#define SIGMA ndBrainFloat(0.25f)
#define SIGMA ndBrainFloat(0.5f)

class HyperParameters
{
Expand Down Expand Up @@ -145,8 +145,8 @@ class ndBrainAgentContinueVPG_Trainer : public ndBrainAgent, public ndBrainThrea
ndInt32 m_bashBufferSize;
ndInt32 m_maxTrajectorySteps;
ndInt32 m_extraTrajectorySteps;
ndMovingAverage<256> m_averageQvalue;
ndMovingAverage<256> m_averageFramesPerEpisodes;
ndMovingAverage<128> m_averageQvalue;
ndMovingAverage<128> m_averageFramesPerEpisodes;
};

template<ndInt32 statesDim, ndInt32 actionDim>
Expand Down Expand Up @@ -295,14 +295,10 @@ void ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::BackPropagate()
{
const ndBrainVector& rewards = m_agent->m_rewards;
const ndBrainVector& actions = m_agent->m_trajectory[m_index].m_actions;
//ndInt32 actionIndex = ndInt32 (m_agent->m_trajectory[m_index].m_action);
//loss.Set(ndBrainFloat(0.0f));
//ndBrainFloat negLogProb = -ndLog(output[actionIndex]);
//loss[actionIndex] = negLogProb * rewards[m_index];
ndBrainFloat avantage = -rewards[m_index] / (SIGMA * SIGMA);
ndBrainFloat negLogProbAdvantage = -rewards[m_index] / (SIGMA * SIGMA);
for (ndInt32 i = actionDim - 1; i >= 0; --i)
{
loss[i] = avantage * (output[i] - actions[i]);
loss[i] = negLogProbAdvantage * (output[i] - actions[i]);
}
}

Expand Down Expand Up @@ -405,7 +401,8 @@ void ndBrainAgentContinueVPG_Trainer<statesDim, actionDim>::SelectAction(ndBrain
for (ndInt32 i = actionDim - 1; i >= 0; --i)
{
ndBrainFloat sample = ndGaussianRandom(probabilities[i], SIGMA);
ndBrainFloat squashSample(ndTanh(sample));
//ndBrainFloat squashSample(ndTanh(sample));
ndBrainFloat squashSample = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
probabilities[i] = squashSample;
}
}
Expand Down
2 changes: 0 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentDiscreteVPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,11 @@ ndBrainAgentDiscreteVPG<statesDim, actionDim>::ndBrainAgentDiscreteVPG(const ndS
:ndBrainAgent()
,m_actor(actor)
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
ndBrainAgentDiscreteVPG<statesDim, actionDim>::~ndBrainAgentDiscreteVPG()
{
ndAssert(0);
}

template<ndInt32 statesDim, ndInt32 actionDim>
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/sdk/dBrain/ndBrainAgentDiscreteVPG_Trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ class ndBrainAgentDiscreteVPG_Trainer : public ndBrainAgent, public ndBrainThrea
ndInt32 m_bashBufferSize;
ndInt32 m_maxTrajectorySteps;
ndInt32 m_extraTrajectorySteps;
ndMovingAverage<256> m_averageQvalue;
ndMovingAverage<256> m_averageFramesPerEpisodes;
ndMovingAverage<128> m_averageQvalue;
ndMovingAverage<128> m_averageFramesPerEpisodes;
};

template<ndInt32 statesDim, ndInt32 actionDim>
Expand Down

0 comments on commit 2628d26

Please sign in to comment.