#include "capy.h"
#ifndef FIXTURE
#define FIXTURE
#define MCE_N 20

// Environment for the mountain car problem
typedef struct MountainCarEnvironment {

  // Inherits CapyPGEnvironment
  struct CapyPGEnvironmentDef;

  // Destructor for the parent class
  void (*destructCapyPGEnvironment)(void);
} MountainCarEnvironment;

// Set the current state to an initial state
// Output:
//   The current state is set to an intial state
static void SetToInitialStateMountainCarEnvironment(void) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;

  // v[0] is speed, v[1] is position
  that->curState.vals[0] = 0.0;
  that->curState.vals[1] = -0.6 + 0.2 * $(&(that->rng), getDouble)();
}

// Step the environment
// Input:
//   action: the applied action
// Output:
//   Update the current state according to the action, and return the transition
static CapyPGTransition StepMountainCarEnvironment(size_t const action) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;
  CapyPGTransition transition = CapyPGTransitionCreate(that->curState.dim);
  CapyVecCopy(&(that->curState), &(transition.fromState));
  transition.action = action;
  double acc = (action == 0 ? -1.0 : action == 1 ? 0.0 : 1.0);
  double nextVals[2] = {0.0, 0.0};
  nextVals[0] =
    that->curState.vals[0] + 0.001 * acc -
    0.0025 * cos(3.0 * that->curState.vals[1]);
  if(nextVals[0] < -0.07) nextVals[0] = -0.07;
  if(nextVals[0] > 0.07) nextVals[0] = 0.07;
  nextVals[1] = that->curState.vals[0] + that->curState.vals[1];
  if(nextVals[1] < -1.2) {
    nextVals[0] = 0.0;
    nextVals[1] = -1.2;
  }
  that->curState.vals[0] = nextVals[0];
  that->curState.vals[1] = nextVals[1];
  if(that->curState.vals[1] > 0.5) transition.reward = 1.0;
  else transition.reward = -0.001;
  CapyVecCopy(&(that->curState), &(transition.toState));
  return transition;
}

// Check if the current state is an end state
// Output:
//   Return true if the current state is an end state, else false
static bool IsEndStateMountainCarEnvironment(void) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;
  if(that->curState.vals[1] > 0.5) return true;
  else return false;
}

// Free the memory used by a MountainCarEnvironment
static void DestructMountainCarEnvironment(void) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;
  $(that, destructCapyPGEnvironment)();
}

static size_t StateToIdxMountainCarEnvironment(CapyVec const* const state) {
  CapyRangeDouble fromX = {.min = -0.07, .max = 0.07};
  CapyRangeDouble fromY = {.min = -1.2, .max = 0.5};
  CapyRangeDouble to = {.min = 0, .max = MCE_N - 1};
  double x = CapyLerp(state->vals[0], &fromX, &to);
  if(x < 0.0) x = 0.0;
  if(x > (double)(MCE_N-1)) x = (double)(MCE_N-1);
  double y = CapyLerp(state->vals[1], &fromY, &to);
  if(y < 0.0) y = 0.0;
  if(y > (double)(MCE_N-1)) y = (double)(MCE_N-1);
  size_t idx = (MCE_N * (size_t)x) + (size_t)y;
  return idx;
}

// Evaluate the action probabilities
// Input:
//   state: the state used for evaluation
//   actionsProb: the evaluated actions probability
// Output:
//   'actionsProb' is updated.
static void GetActionsProbMountainCarEnvironment(
  CapyVec const* const state,
        CapyVec* const actionsProb) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;
  loop(iAction, (size_t)3) {
    size_t const idx = StateToIdxMountainCarEnvironment(state);
    actionsProb->vals[iAction] =
      that->paramAction.vals[iAction * MCE_N * MCE_N + idx];
  }
  CapyVecSoftmax(actionsProb, 1.0);
}

// Evaluate the value
// Input:
//   state: the state used for evaluation
// Output:
//   Return the evaluated value
static double GetValueMountainCarEnvironment(CapyVec const* const state) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;
  size_t const idx = StateToIdxMountainCarEnvironment(state);
  return that->paramValue.vals[idx];
}

// Create a new MountainCarEnvironment
// Input:
//   seed: seed for the random number generator
// Output:
//   Return a MountainCarEnvironment
static MountainCarEnvironment MountainCarEnvironmentCreate(
  CapyRandomSeed_t const seed) {
  MountainCarEnvironment that = {0};
  size_t const nbFeature = 2;
  size_t const nbAction = 3;
  size_t const nbParamAction = 3 * MCE_N * MCE_N;
  size_t const nbParamValue = MCE_N * MCE_N;
  CapyInherits(
    that, CapyPGEnvironment,
    (nbFeature, nbAction, nbParamAction, nbParamValue, seed)
  );
  that.step = StepMountainCarEnvironment;
  that.setToInitialState = SetToInitialStateMountainCarEnvironment;
  that.isEndState = IsEndStateMountainCarEnvironment;
  that.destruct = DestructMountainCarEnvironment;
  that.getActionsProb = GetActionsProbMountainCarEnvironment;
  that.getValue = GetValueMountainCarEnvironment;
  return that;
}

static void VisualiseMountainCarEnvironment(
  MountainCarEnvironment* const that,
                      int const iStep) {
  CapyImg* img = CapyImgAlloc(capyImgMode_rgb, capyImgDims_400x400);
  CapyImg* imgB = CapyImgAlloc(capyImgMode_rgb, capyImgDims_400x400);
  CapyRangeDouble fromX = {.min = 0, .max = img->dims.width};
  CapyRangeDouble fromY = {.min = 0, .max = img->dims.height};
  CapyRangeDouble toX = {.min = -0.07, .max = 0.07};
  CapyRangeDouble toY = {.min = 0.5, .max = -1.2};
  loop(x, img->dims.width) loop(y, img->dims.height) {
    that->curState.vals[0] = CapyLerp(x, &fromX, &toX);
    that->curState.vals[1] = CapyLerp(y, &fromY, &toY);
    double out = $(that, getValue)(&(that->curState));
    if(out < 0.0) out = 0.0; else if(out > 1.0) out = 1.0;
    CapyColorData color = {.RGBA = {out, 0.0, 1.0 - out, 1.0}};
    CapyImgPos pos = {.coords = {(CapyImgPos_t)x, (CapyImgPos_t)y}};
    $(img, setColor)(&pos, &color);
  }
  loop(x, img->dims.width) loop(y, img->dims.height) {
    that->curState.vals[0] = CapyLerp(x, &fromX, &toX);
    that->curState.vals[1] = CapyLerp(y, &fromY, &toY);
    $(that, getActionsProb)(&(that->curState), &(that->actionsProb));
    CapyColorData color = {
      .RGBA = {
        that->actionsProb.vals[0],
        that->actionsProb.vals[1],
        that->actionsProb.vals[2], 1.0
      }
    };
    CapyImgPos pos = {.coords = {(CapyImgPos_t)x, (CapyImgPos_t)y}};
    $(imgB, setColor)(&pos, &color);
  }
  $(that, setToInitialState)();
  bool isEndState = $(that, isEndState)();
  size_t nbStep = 0;
  CapyPen pen = CapyPenCreate();
  pen.size = 3.0;
  pen.color = capyColorRGBAWhite;
  while(isEndState == false && nbStep < 1000) {
    double in[2] = {
      CapyLerp(that->curState.vals[0], &toX, &fromX),
      CapyLerp(that->curState.vals[1], &toY, &fromY)
    };
    $(&pen, drawPoint)(in, img);
    size_t const action = $(that, getBestAction)(&(that->curState));
    CapyPGTransition transition = $(that, step)(action);
    CapyPGTransitionDestruct(&transition);
    isEndState = $(that, isEndState)();
    nbStep += 1;
  }
  $(&pen, destruct)();
  char* path = strCreate(
    "./UnitTests/TestPolicyGradient/mountainCarValue%03d.png", iStep);
  $(img, saveToPath)(path);
  free(path);
  path = strCreate(
    "./UnitTests/TestPolicyGradient/mountainCarAction%03d.png", iStep);
  $(imgB, saveToPath)(path);
  free(path);
  CapyImgFree(&img);
  CapyImgFree(&imgB);
}

// Derivative function for the cartpole simulation
typedef struct CartpoleDerivative {
  struct CapyMathFunDef;
  double force;
  void (*destructCapyMathFun)(void);
} CartpoleDerivative;

static void DestructCartpoleDerivative(void) {
  CartpoleDerivative* that = (CartpoleDerivative*)capyThat;
  $(that, destructCapyMathFun)();
}

static void CartpoleDerivativeEval(
  double const* const in,
        double* const out) {
  CartpoleDerivative* that = (CartpoleDerivative*)capyThat;
  double const M = 1.0;
  double const mp = 0.1;
  double const g = 9.8;
  double const L = 1.0;
  double const F = that->force;

  // https://courses.ece.ucsb.edu/ECE594/594D_W10Byl/hw/cartpole_eom.pdf
  out[0] = (
    -mp * L * sin(in[1]) * cos(in[1]) * in[3] * in[3] +
    (M + mp) * g * sin(in[1]) + cos(in[1]) * F
  ) / (
    (M + mp * (1.0 - cos(in[1]) * cos(in[1]))) * L
  );
  out[1] = (
    -mp * L * sin(in[1]) * in[3] * in[3] +
    mp * g * sin(in[1]) * cos(in[1]) + F
  ) / (
    M + mp * (1.0 - cos(in[1]) * cos(in[1]))
  );
}

static CartpoleDerivative CartpoleDerivativeCreate(void) {
  CartpoleDerivative that = {0};
  CapyInherits(that, CapyMathFun, (5, 2));
  that.eval = CartpoleDerivativeEval;
  that.destruct = DestructCartpoleDerivative;
  return that;
}

static CartpoleDerivative* CartpoleDerivativeAlloc(void) {
  CartpoleDerivative* that;
  safeMalloc(that, 1);
  *that = CartpoleDerivativeCreate();
  return that;
}

static void CartpoleDerivativeFree(CartpoleDerivative** that) {
  $(*that, destruct)();
  free(*that);
}

// Environment for the cartpole problem
typedef struct CartpoleEnvironment {

  // Inherits CapyPGEnvironment
  struct CapyPGEnvironmentDef;

  // RK4 instance for step calculation
  CapyRungeKutta* rk;

  // Activation function for neural networks
  CapyNNActivationReLU* activation;

  // Neural network for action evaluation
  CapyNeuralNetwork* nnAction;

  // Neural network for value evaluation
  CapyNeuralNetwork* nnValue;

  // Destructor for the parent class
  void (*destructCapyPGEnvironment)(void);
} CartpoleEnvironment;

// Set the current state to an initial state
// Output:
//   The current state is set to an intial state
static void SetToInitialStateCartpoleEnvironment(void) {
  MountainCarEnvironment* that = (MountainCarEnvironment*)capyThat;

  // t, theta, x, deltaTheta, deltaX
  that->curState.vals[0] = 0.0;
  that->curState.vals[1] = -0.1 + 0.2 * $(&(that->rng), getDouble)();
  that->curState.vals[2] = 0.0;
  that->curState.vals[3] = 0.0;
  that->curState.vals[4] = 0.0;
}

// Step the environment
// Input:
//   action: the applied action
// Output:
//   Update the current state according to the action, and return the transition
static CapyPGTransition StepCartpoleEnvironment(size_t const action) {
  CartpoleEnvironment* that = (CartpoleEnvironment*)capyThat;
  CapyPGTransition transition = CapyPGTransitionCreate(that->curState.dim);
  CapyVecCopy(&(that->curState), &(transition.fromState));
  transition.action = action;
  CapyVecCopy(&(that->curState), &(that->rk->initVal));
  CartpoleDerivative* cartpoleDerivative =
    (CartpoleDerivative*)(that->rk->derivative);
  cartpoleDerivative->force = 2.0 * ((double)action - 1.0);
  $(that->rk, step)(that->rk->initVal.vals);
  CapyVecCopy(&(that->rk->initVal), &(that->curState));
  transition.reward = 1.0;
  CapyVecCopy(&(that->curState), &(transition.toState));
  return transition;
}

// Check if the current state is an end state
// Output:
//   Return true if the current state is an end state, else false
static bool IsEndStateCartpoleEnvironment(void) {
  CartpoleEnvironment* that = (CartpoleEnvironment*)capyThat;
  if(fabs(that->curState.vals[1]) > 0.2095) return true;
  if(fabs(that->curState.vals[2]) > 2.4) return true;
  return false;
}

// Free the memory used by a CartpoleEnvironment
static void DestructCartpoleEnvironment(void) {
  CartpoleEnvironment* that = (CartpoleEnvironment*)capyThat;
  CartpoleDerivativeFree((CartpoleDerivative**)&(that->rk->derivative));
  CapyRungeKuttaFree(&(that->rk));
  CapyNeuralNetworkFree(&(that->nnAction));
  CapyNeuralNetworkFree(&(that->nnValue));
  CapyNNActivationReLUFree(&(that->activation));
  $(that, destructCapyPGEnvironment)();
}

// Evaluate the action probabilities
// Input:
//   state: the state used for evaluation
//   actionsProb: the evaluated actions probability
// Output:
//   'actionsProb' is updated.
static void GetActionsProbCartpoleEnvironment(
  CapyVec const* const state,
        CapyVec* const actionsProb) {
  CartpoleEnvironment* that = (CartpoleEnvironment*)capyThat;

  // Copy the NN parameters from the environment instance to the NN instance
  CapyVec u = {.dim=that->paramAction.dim, .vals=that->nnAction->params};
  CapyVecCopy(&(that->paramAction), &u);

  // Create the input values for the NN
  double in[4];
  in[0] = state->vals[1];
  in[1] = state->vals[2];
  in[2] = state->vals[3];
  in[3] = state->vals[4];

  // Evaluate the probabilities
  $(that->nnAction, eval)(in, actionsProb->vals);
  CapyVecSoftmax(actionsProb, 1.0);
}

// Evaluate the value
// Input:
//   state: the state used for evaluation
// Output:
//   Return the evaluated value
static double GetValueCartpoleEnvironment(CapyVec const* const state) {
  CartpoleEnvironment* that = (CartpoleEnvironment*)capyThat;
  double in[4];
  in[0] = state->vals[1];
  in[1] = state->vals[2];
  in[2] = state->vals[3];
  in[3] = state->vals[4];
  double val;
  CapyVec u = {.dim=that->paramValue.dim, .vals=that->nnValue->params};
  CapyVecCopy(&(that->paramValue), &u);
  $(that->nnValue, eval)(in, &val);
  return val;
}

// Create a new CartpoleEnvironment
// Input:
//   seed: seed for the random number generator
// Output:
//   Return a CartpoleEnvironment
static CartpoleEnvironment CartpoleEnvironmentCreate(
  CapyRandomSeed_t const seed) {
  CartpoleEnvironment that = {0};
  CapyNNActivationReLU* activation = CapyNNActivationReLUAlloc();
  CapyNNModel layerDef = {
    .nbLayer = 2,
    .layers = (CapyNNLayerDef[]){
      {.nbNode = 8, .activation = (CapyNNActivationFun*)activation},
      {.nbNode = 8, .activation = (CapyNNActivationFun*)activation},
    },
  };
  CapyNeuralNetwork* nnAction =
    CapyNeuralNetworkAllocFullyConnected(4, &layerDef, 3);
  CapyNeuralNetwork* nnValue =
    CapyNeuralNetworkAllocFullyConnected(4, &layerDef, 1);
  size_t const nbFeature = 5;
  size_t const nbAction = 3;
  size_t const nbParamAction = nnAction->nbParam;
  size_t const nbParamValue = nnValue->nbParam;
  CapyInherits(
    that, CapyPGEnvironment,
    (nbFeature, nbAction, nbParamAction, nbParamValue, seed)
  );
  size_t const order = 2;
  CartpoleDerivative* cartpoleDerivative = CartpoleDerivativeAlloc();
  that.rk = CapyRungeKuttaAlloc((CapyMathFun*)cartpoleDerivative, order);
  that.rk->deltaT = 0.01;
  that.activation = activation;
  that.nnAction = nnAction;
  that.nnValue = nnValue;
  that.step = StepCartpoleEnvironment;
  that.setToInitialState = SetToInitialStateCartpoleEnvironment;
  that.isEndState = IsEndStateCartpoleEnvironment;
  that.destruct = DestructCartpoleEnvironment;
  that.getActionsProb = GetActionsProbCartpoleEnvironment;
  that.getValue = GetValueCartpoleEnvironment;
  loop(i, nbParamAction) {
    that.paramAction.vals[i] = 0.5 - $(&(that.rng), getDouble)();
  }
  loop(i, nbParamValue) {
    that.paramValue.vals[i] = 0.5 - $(&(that.rng), getDouble)();
  }
  return that;
}

static void VisualiseCartpoleEnvironment(CartpoleEnvironment* const that) {
  CapyImg* img = CapyImgAlloc(capyImgMode_rgb, capyImgDims_400x400);
  CapyRangeDouble toX = {.min = 0, .max = img->dims.width};
  CapyRangeDouble toY = {.min = 0, .max = img->dims.height};
  CapyRangeDouble fromX = {.min = -2.4, .max = 2.4};
  CapyRangeDouble fromY = {.min = 1.5, .max = -0.1};
  CapyPen pen = CapyPenCreate();
  pen.color = capyColorRGBAWhite;
  pen.size = 3.0;
  size_t iImg = 0;
  loop(iTry, 10) {
    $(that, setToInitialState)();
    bool isEndState = $(that, isEndState)();
    size_t nbStep = 0;
    while(isEndState == false && nbStep < 5000) {
      nbStep += 1;
      if((nbStep % 20) == 0) {
        $(img, fillWithColor)(&capyColorRGBABlack);
        double from[2] = {
          CapyLerp(that->curState.vals[2], &fromX, &toX),
          CapyLerp(0.0, &fromY, &toY),
        };
        double to[2] = {
          CapyLerp(
            that->curState.vals[2] - sin(that->curState.vals[1]), &fromX, &toX),
          CapyLerp(cos(that->curState.vals[1]), &fromY, &toY),
        };
        $(&pen, drawLine)(from, to, img);
        char* path = strCreate(
          "./UnitTests/TestPolicyGradient/cartpole%05d.png", iImg);
        iImg += 1;
        $(img, saveToPath)(path);
        free(path);
      }
      size_t const action = $(that, getBestAction)(&(that->curState));
      CapyPGTransition transition = $(that, step)(action);
      CapyPGTransitionDestruct(&transition);
      isEndState = $(that, isEndState)();
    }
  }
  CapyImgFree(&img);
  $(&pen, destruct)();
}

#endif
CUTEST(test001, "Mountain car example (Reinforce with baseline)") {
  CapyRandomSeed_t const seed = 0;
  MountainCarEnvironment env = MountainCarEnvironmentCreate(seed);
  CapyPolicyGradient* pg = CapyPolicyGradientAlloc((CapyPGEnvironment*)&env);
  pg->learnRateAction = 0.001;
  pg->learnRateState = 0.001;
  pg->discount = 0.999;
  pg->nbMaxStep = 10000;
  size_t nbEpisode = 50;
  double const checkAvgNbSteps[50] = {
    1147.580000, 716.300000, 535.620000, 440.620000, 390.380000,
    344.480000, 318.100000, 307.860000, 291.160000, 284.200000,
    273.400000, 257.820000, 264.780000, 250.900000, 256.500000,
    252.280000, 254.600000, 251.500000, 239.980000, 238.460000,
    239.540000, 237.840000, 241.540000, 236.280000, 220.700000,
    226.880000, 202.920000, 199.820000, 173.320000, 177.240000,
    176.700000, 168.060000, 168.200000, 168.660000, 166.320000,
    164.620000, 162.840000, 166.840000, 161.160000, 163.700000,
    158.100000, 158.820000, 162.120000, 159.040000, 165.220000,
    159.540000, 160.340000, 160.860000, 159.740000, 160.320000,
  };
  bool isOk = true;
  loop(iStep, 5) {
    $(pg, reinforce)(nbEpisode);
    if(0) VisualiseMountainCarEnvironment(&env, iStep);
    isOk &= (fabs(pg->avgNbStep - checkAvgNbSteps[iStep]) < 1e-3);
  }
  CUTEST_ASSERT(isOk, "unexpected results");
  CapyPolicyGradientFree(&pg);
  $(&env, destruct)();
}

CUTEST(test002, "Mountain car example (PPO)") {
  CapyRandomSeed_t const seed = 0;
  MountainCarEnvironment env = MountainCarEnvironmentCreate(seed);
  CapyPolicyGradient* pg = CapyPolicyGradientAlloc((CapyPGEnvironment*)&env);
  pg->learnRateAction = 0.001;
  pg->learnRateState = 0.001;
  pg->discount = 0.999;
  pg->nbMaxStep = 10000;
  size_t nbEpisode = 50;
  double const checkAvgNbSteps[50] = {
    895.440000, 650.260000, 548.160000, 419.820000, 367.260000,
    321.740000, 316.260000, 285.600000, 246.820000, 240.440000,
    204.580000, 204.500000, 199.120000, 196.480000, 183.280000,
    184.720000, 180.440000, 180.620000, 177.160000, 176.260000,
    171.200000, 174.260000, 174.600000, 168.260000, 167.440000,
    166.580000, 164.900000, 162.180000, 164.860000, 162.760000,
    162.820000, 161.580000, 158.160000, 158.080000, 160.180000,
    158.560000, 154.660000, 153.220000, 156.920000, 156.540000,
    156.340000, 154.960000, 153.780000, 153.320000, 151.420000,
    154.580000, 156.220000, 155.580000, 154.040000, 151.520000,
  };
  bool isOk = true;
  loop(iStep, 5) {
    $(pg, proximalPolicyOptimisation)(nbEpisode);
    if(0) VisualiseMountainCarEnvironment(&env, iStep);
    isOk &= (fabs(pg->avgNbStep - checkAvgNbSteps[iStep]) < 1e-3);
  }
  CUTEST_ASSERT(isOk, "unexpected results");
  CapyPolicyGradientFree(&pg);
  $(&env, destruct)();
}

CUTEST(test003, "Cartpole example (Reinforce with baseline)") {
  CapyRandomSeed_t const seed = 0;
  CartpoleEnvironment env = CartpoleEnvironmentCreate(seed);
  CapyPolicyGradient* pg = CapyPolicyGradientAlloc((CapyPGEnvironment*)&env);
  pg->learnRateAction = 0.0001;
  pg->learnRateState = 0.0001;
  pg->discount = 0.999;
  pg->nbMaxStep = 1000;
  size_t nbEpisode = 100;
  double const checkAvgNbSteps[25] = {
    73.290000, 71.620000, 74.860000, 77.370000, 83.090000,
    84.800000, 102.850000, 127.400000, 146.250000, 152.070000,
    225.530000, 306.410000, 358.360000, 403.780000, 411.980000,
    516.640000, 627.870000, 792.550000, 887.460000, 932.620000,
    949.990000, 956.680000, 955.980000, 938.670000, 985.400000,
  };
  bool isOk = true;
  loop(iStep, 5) {
    $(pg, reinforce)(nbEpisode);
    isOk &= (fabs(pg->avgNbStep - checkAvgNbSteps[iStep]) < 1e-3);
  }
  if(0) VisualiseCartpoleEnvironment(&env);
  CUTEST_ASSERT(isOk, "unexpected results");
  CapyPolicyGradientFree(&pg);
  $(&env, destruct)();
}

CUTEST(test004, "Cartpole example (PPO)") {
  CapyRandomSeed_t const seed = 0;
  CartpoleEnvironment env = CartpoleEnvironmentCreate(seed);
  CapyPolicyGradient* pg = CapyPolicyGradientAlloc((CapyPGEnvironment*)&env);
  pg->learnRateAction = 0.0001;
  pg->learnRateState = 0.0001;
  pg->discount = 0.999;
  pg->nbMaxStep = 1000;
  size_t nbEpisode = 100;
  double const checkAvgNbSteps[25] = {
    68.710000, 72.770000, 71.830000, 78.780000, 84.770000,
    88.550000, 109.770000, 134.270000, 178.160000, 218.490000,
    285.250000, 326.750000, 404.960000, 478.100000, 621.210000,
    781.020000, 906.260000, 947.280000, 917.450000, 938.080000,
    919.690000, 931.580000, 975.950000, 968.790000, 991.580000,
  };
  bool isOk = true;
  loop(iStep, 5) {
    $(pg, proximalPolicyOptimisation)(nbEpisode);
    isOk &= (fabs(pg->avgNbStep - checkAvgNbSteps[iStep]) < 1e-3);
  }
  if(0) VisualiseCartpoleEnvironment(&env);
  CUTEST_ASSERT(isOk, "unexpected results");
  CapyPolicyGradientFree(&pg);
  $(&env, destruct)();
}
