// -------------------------- predictor.c -------------------------
/*
    LibCapy - a general purpose library of C functions and data structures
    Copyright (C) 2021-2025 Pascal Baillehache baillehache.pascal@gmail.com
    https://baillehachepascal.dev
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "predictor.h"

// Return the evaluated accuracy of the predictor
// Input:
//   type: type of accuracy measure
// Output:
//   Return the accuracy.
static double GetAccuracy(CapyPredictorAccuracyMeasure const type) {
  methodOf(CapyPredictorEvaluation);
  return that->accuracies[type];
}

// Free the memory used by a CapyPredictorEvaluation
static void PredictorEvaluationDestruct(void) {
  methodOf(CapyPredictorEvaluation);
  if(that->confusionMatrix) {
    free(that->confusionMatrix);
    that->confusionMatrix = NULL;
  }
}

// Create a CapyPredictorEvaluation
// Output:
//   Return a CapyPredictorEvaluation
CapyPredictorEvaluation CapyPredictorEvaluationCreate(void) {
  CapyPredictorEvaluation that = {
    .confusionMatrix = NULL,
    .destruct = PredictorEvaluationDestruct,
    .getAccuracy = GetAccuracy,
  };
  loop(iAccType, capyPredictorAccuracyMeasure_nb) {
    that.accuracies[iAccType] = 0.0;
  }
  return that;
}

// Allocate memory for a new CapyPredictorEvaluation and create it
// Output:
//   Return a CapyPredictorEvaluation
// Exception:
//   May raise CapyExc_MallocFailed.
CapyPredictorEvaluation* CapyPredictorEvaluationAlloc(void) {
  CapyPredictorEvaluation* that = NULL;
  safeMalloc(that, 1);
  if(!that) return NULL;
  *that = CapyPredictorEvaluationCreate();
  return that;
}

// Free the memory used by a CapyPredictorEvaluation* and reset '*that' to NULL
// Input:
//   that: a pointer to the CapyPredictorEvaluation to free
void CapyPredictorEvaluationFree(CapyPredictorEvaluation** const that) {
  if(that == NULL || *that == NULL) return;
  $(*that, destruct)();
  free(*that);
  *that = NULL;
}

// Train the predictor on a dataset
// Input:
//   dataset: the dataset
// Output:
//   The predictor is trained.
// Exception:
//   May raise CapyExc_UnsupportedFormat
static void Train(CapyDataset const* const dataset) {
  (void)dataset;
  raiseExc(CapyExc_UndefinedExecution);
}

// Classify an input
// Input:
//   inp: the input
// Output:
//   Return the result of prediction
static CapyPredictorPrediction Predict(CapyVec const* const inp) {
  (void)inp;
  raiseExc(CapyExc_UndefinedExecution);
  return (CapyPredictorPrediction){0};
}

// Convert a CapyDataset into a CapyMat usable by the predictor
// Input:
//   dataset: the dataset to be converted
// Output:
//   Return a matrix formatted as necessary
static CapyMat CvtDatasetToMat(CapyDataset const* const dataset) {
  (void)dataset;
  raiseExc(CapyExc_UndefinedExecution);
  return (CapyMat){0};
}

// Evaluate the predictor on a dataset (version for categorical)
// Input:
//   dataset: the dataset
// Output:
//   Return the evaluation of the predictor.
static CapyPredictorEvaluation* EvaluateCategorical(
      CapyPredictor* const that,
  CapyDataset const* const dataset) {

  // Variable to memorise the results
  CapyPredictorEvaluation* eval = CapyPredictorEvaluationAlloc();

  // Convert the dataset into a matrix
  CapyMat mat = $(that, cvtDatasetToMat)(dataset);

  // Allocate memory for the confusion matrix if the predictor is of type
  // categorical
  size_t nbCategory = $(dataset, getNbValOutputField)(0);
  safeMalloc(eval->confusionMatrix, nbCategory * nbCategory);
  if(!(eval->confusionMatrix)) return NULL;
  loop(i, nbCategory * nbCategory) eval->confusionMatrix[i] = 0;

  // Get the number of input fields
  size_t nbInput = $(dataset, getNbInput)();

  // Loop on the rows
  loop(iRow, dataset->nbRow) {

    // Cast the row into an input vector
    CapyVec inpPredict = {
      .dim = nbInput,
      .vals = mat.vals + iRow * mat.nbCol
    };

    // Predict the category for that row
    CapyPredictorPrediction resPredict = $(that, predict)(&inpPredict);

    // If the expected category is valid
    if(mat.vals[(iRow + 1) * mat.nbCol - 1] >= 0.0) {

      // Get the expected category
      size_t expectedCategory =
        (size_t)lround(mat.vals[(iRow + 1) * mat.nbCol - 1]);

      // Update the confusion matrix
      size_t idx = expectedCategory * nbCategory + resPredict.category;
      eval->confusionMatrix[idx] += 1;

    // Else the expected category is not valid
    } else {
      raiseExc(CapyExc_UndefinedExecution);
    }
  }

  // Calculate the overall accuracy from the confusion matrix
  size_t nbCorrectPred = 0;
  loop(i, nbCategory) {
    nbCorrectPred += eval->confusionMatrix[i * nbCategory + i];
  }
  eval->accuracies[capyPredictorAccuracyMeasure_accuracy] =
    (double)nbCorrectPred / (double)dataset->nbRow;

  // Free memory
  CapyMatDestruct(&mat);

  // Return the results
  return eval;
}

// Evaluate the predictor on a dataset (version for numerical)
// Input:
//   dataset: the dataset
// Output:
//   Return the evaluation of the predictor.
static CapyPredictorEvaluation* EvaluateNumerical(
      CapyPredictor* const that,
  CapyDataset const* const dataset) {

  // Variable to memorise the results
  CapyPredictorEvaluation* eval = CapyPredictorEvaluationAlloc();

  // Convert the dataset into a matrix
  CapyMat mat = $(that, cvtDatasetToMat)(dataset);

  // Get the number of input fields
  size_t nbInput = $(dataset, getNbInput)();

  // Vector of absolute error
  CapyVec absErr = CapyVecCreate(dataset->nbRow);

  // Vector of true value
  CapyVec trueVal = CapyVecCreate(dataset->nbRow);

  // Loop on the rows
  loop(iRow, dataset->nbRow) {

    // Cast the row into an input vector
    CapyVec inpPredict = {
      .dim = nbInput,
      .vals = mat.vals + iRow * mat.nbCol
    };

    // Predict the category for that row
    CapyPredictorPrediction resPredict = $(that, predict)(&inpPredict);

    // Update the vectors of absolute error and true value
    absErr.vals[iRow] =
      fabs(mat.vals[(iRow + 1) * mat.nbCol - 1] - resPredict.val);
    trueVal.vals[iRow] = mat.vals[(iRow + 1) * mat.nbCol - 1];
  }

  // Update MAE
  eval->accuracies[capyPredictorAccuracyMeasure_mae] =
    CapyVecGetMoment(&absErr, 0, 1);

  // Update RMSE
  double sumSquareErr = 0.0;
  loop(i, absErr.dim) {
    sumSquareErr += absErr.vals[i] * absErr.vals[i];
  }
  eval->accuracies[capyPredictorAccuracyMeasure_rmse] =
    sqrt(sumSquareErr / ((double)(absErr.dim)));

  // Update R^2
  double const avgTrueVal = CapyVecGetMoment(&trueVal, 0, 1);
  double sumSquareMean = 0.0;
  loop(i, absErr.dim) {
    double const d = trueVal.vals[i] - avgTrueVal;
    sumSquareMean += d * d;
  }
  eval->accuracies[capyPredictorAccuracyMeasure_rSquared] =
    1.0 - sumSquareErr / sumSquareMean;

  // Free memory
  CapyVecDestruct(&absErr);
  CapyVecDestruct(&trueVal);
  CapyMatDestruct(&mat);

  // Return the results
  return eval;
}

// Evaluate the predictor on a dataset
// Input:
//   dataset: the dataset
// Output:
//   Return the evaluation of the predictor.
static CapyPredictorEvaluation* Evaluate(CapyDataset const* const dataset) {
  methodOf(CapyPredictor);

  // Triage on the type of predictor
  if(that->type == capyPredictorType_categorical) {
    return EvaluateCategorical(that, dataset);
  } else if(that->type == capyPredictorType_numerical) {
    return EvaluateNumerical(that, dataset);
  } else raiseExc(CapyExc_UndefinedExecution);
  return NULL;
}

// Clone a predictor
// Output:
//   Return a clone of the predictor.
static void* Clone(void) {
  methodOf(CapyPredictor);
  CapyPredictor* clone = NULL;
  safeMalloc(clone, 1);
  if(!clone) return NULL;
  *clone = *that;
  if(that->scalingFrom != NULL) {
    safeMalloc(clone->scalingFrom, clone->nbInput);
    loop(i, that->nbInput) clone->scalingFrom[i] = that->scalingFrom[i];
  }
  return clone;
}

// Export the predictor as a C function
// Input:
//   stream: the stream where to export
//   name: the name of the function
//   dataset: the training dataset
// Output:
//   A ready to use C function implementing the predictor is written on the
//   stream. See the comment exported with the function to know how to use
//   the exported function.
static void ExportToCFun(
               FILE* const stream,
         char const* const name,
  CapyDataset const* const dataset) {
  (void)stream;
  (void)name;
  (void)dataset;
  raiseExc(CapyExc_UndefinedExecution);
}

//  Save the predictor to a stream
//  Input:
//    stream: the stream on which to save
//  Output:
//    The predictor data are saved on the stream
static void Save(FILE* const stream) {
  (void)stream;
  raiseExc(CapyExc_UndefinedExecution);
}

// Export the predictor as a HTML web app
// Input:
//   stream: the stream where to export
//   title: the title of the web app
//   dataset: the training dataset
//   expectedAccuracy: the expected accuracy of the predictor (in [0,1])
// Output:
//   A ready to use web app implementing the predictor is written on the
//   stream.
static void ExportToHtml(
               FILE* const stream,
         char const* const title,
  CapyDataset const* const dataset,
              double const expectedAccuracy) {
  methodOf(CapyPredictor);

  // Write the head of the page (up to <body>)
  fprintf(
    stream,
    "<!-- This web app has been automatically generated by LibCapy, "
    "more info here: https://baillehachepascal.dev/ -->\n");
  fprintf(stream, "<head>\n");
  fprintf(stream, "<meta charset=\"UTF-8\">\n");
  fprintf(
    stream,
    "<meta name=\"viewport\" "
    "content=\"width=device-width,initial-scale=1\">\n");
  fprintf(stream, "<style>\n");
  fprintf(stream, "html { width: 100%%; height: 100%%; }\n");
  fprintf(stream, "body { ");
  fprintf(stream, "background-color: #aaaaaa; color: #433126;");
  fprintf(stream, "font-family:  serif; font-size: 20px; ");
  fprintf(stream, "width: 100%%; height: 100%%; margin: 0; ");
  fprintf(stream, "text-align: center; }\n");
  fprintf(
    stream,
    "#divTitle {  padding: 10px; text-align:center; display: inline-block; "
    "width: 100%%; font-size: 30px; }\n");
  fprintf(stream, ".divTile { ");
  fprintf(stream, "padding: 5px; text-align:center; display: inline-block; ");
  fprintf(stream, "vertical-align: top; width: 45%%; min-width: 500px; }\n");
  fprintf(
    stream,
    ".divRow { margin: auto; padding: 5px; width: 95%%; "
    "text-align: left; }\n");
  fprintf(
    stream,
    "select { background-color: #fecb5e; box-shadow: 2px 2px 10px #888888; "
    "color: #624838; margin: 2px 5px; padding: 2px 4px; max-width: 290px; "
    "font: 13px sans-serif; text-decoration: none; font: 13px sans-serif; "
    "border: 1px solid #fee9aa; border-radius: 5px; width: 150px; }\n");
  fprintf(
    stream,
    "#divPredTitle { display: inline-block; text-align: center; }\n");
  fprintf(stream, "input[type=\"range\"] { width: 150px; }\n");
  fprintf(stream, ".divInputLbl { min-width: 150px; display:inline-block;}\n");
  fprintf(stream, ".divInputVal { width: 150px; display:inline-block;}\n");
  fprintf(
    stream,
    "#divPrediction { display: inline-block; text-align: left; }\n");
  fprintf(
    stream,
    "#divConfidence { width: 300px; display: inline-block; "
    "text-align: left; margin-top: 5px; }\n");
  fprintf(
    stream,
    "#divWarning { width: 50%%; min-width: 150px; display: inline-block; "
    "text-align: left; margin-top: 5px; }\n");
  fprintf(stream, "</style>\n");
  fprintf(stream, "</head>\n");
  fprintf(stream, "<title> %s </title>\n", title);

  // Write the body head and title
  fprintf(stream, "<body>\n");
  fprintf(stream, "<div id=\"divTitle\"> %s </div>\n", title);

  // Write the inputs
  fprintf(stream, "<div class=\"divTile\">\n");
  size_t nbInput = $(dataset, getNbInput)();
  loop(iInput, nbInput) {
    size_t iFieldInput = $(dataset, getIdxInputField)(iInput);
    fprintf(
      stream,
      "<div class=\"divRow\"><div class=\"divInputLbl\">[%s]: </div><br>\n",
      dataset->fields[iFieldInput].label);
    if(dataset->fields[iFieldInput].type == capyDatasetFieldType_num) {
      double x =
        0.01 * (
          dataset->fields[iFieldInput].range.max -
          dataset->fields[iFieldInput].range.min
        );
      double stepRange = 1.0;
      while(stepRange < x) stepRange *= 10.0;
      while(stepRange > x) stepRange *= 0.1;
      fprintf(
        stream,
        "<input type=\"range\" id=\"input%lu\" min=%lf max=%lf "
        "value=%lf step=%lf oninput=\"predict();\">\n"
        "<div class=\"divInputVal\" id=\"divInputVal%lu\"> </div>\n",
        iInput,
        dataset->fields[iFieldInput].range.min,
        dataset->fields[iFieldInput].range.max,
        dataset->fields[iFieldInput].range.min,
        stepRange,
        iInput);
    } else {
      fprintf(
        stream,
        "<select id=\"input%lu\" oninput=\"predict();\">",
        iInput);
      loop(iVal, dataset->fields[iFieldInput].nbCategoryVal) {
        fprintf(
          stream,
          "<option value=\"%lu\">%s</option>",
          iVal,
          dataset->fields[iFieldInput].categoryVals[iVal]);
      }
      fprintf(stream, "</select>\n");
    }
    fprintf(stream, "</div>\n");
  }
  fprintf(stream, "</div>\n");

  // Write the outputs
  fprintf(stream, "<div class=\"divTile\">\n");
  size_t iFieldOutput =
    $(dataset, getIdxOutputField)(that->iOutput);
  char const* lblPredCat = dataset->fields[iFieldOutput].label;
  fprintf(
    stream,
    "<div><div id=\"divPredTitle\"> Predicted [%s] is:</div><br>\n",
    lblPredCat);
  fprintf(stream, "<div><div id=\"divPrediction\">\n");
  if(that->type == capyPredictorType_categorical) {
    size_t nbValOutput =
      $(dataset, getNbValOutputField)(that->iOutput);
    loop(iValOutput, nbValOutput) {
      char const* lblPredVal =
        dataset->fields[iFieldOutput].categoryVals[iValOutput];
      fprintf(
        stream,
        "<input type=\"radio\" id=\"pred%lu\"> %s<br>\n",
        iValOutput, lblPredVal);
    }
  } else if(that->type == capyPredictorType_numerical) {
    fprintf(
      stream,
      "<input type=\"text\" id=\"pred\"><br>\n");
  } else raiseExc(CapyExc_UndefinedExecution);
  fprintf(stream, "</div><br>\n");
  if(that->type == capyPredictorType_categorical) {
    fprintf(
      stream,
      "<div id=\"divConfidence\">confidence:"
      "<span id=\"spanConfidence\"></span><br>"
      "(higher is more confident)</div></div>\n");
  }
  fprintf(stream, "<div id=\"divWarning\">\n");
  fprintf(
    stream,
    "Disclaimer:<br>"
    "This prediction is the result of an algorithm which reached an expected ");
  if(that->type == capyPredictorType_categorical) {
    fprintf(stream, "accuracy of %.2lf%% ", expectedAccuracy * 100.0);
  } else {
    fprintf(stream, "MAE of %e ", expectedAccuracy);
  }
  fprintf(
    stream,
    "on training data. However, there is absolutely no "
    "guaranty on the level of accuracy for predictions from data input by the "
    "user. The user of the prediction must ensure its accuracy using third "
    "party means before taking action or making decision based on the "
    "prediction. Any consequence of such action or decision is the entire "
    "responsibility of the user.\n");
  fprintf(stream, "</div></div>\n");
  fprintf(stream, "</div>\n");

  // Write the body tail
  fprintf(stream, "</body>\n");

  // Write the conversion function for the input values
  fprintf(stream, "<script>\n");
  fprintf(stream, "function getInput() {\n");
  fprintf(stream, "  let u = [];\n");
  loop(iInput, nbInput) {
    size_t iFieldInput = $(dataset, getIdxInputField)(iInput);
    if(dataset->fields[iFieldInput].type == capyDatasetFieldType_num) {
      fprintf(
        stream,
        "  elem(\"divInputVal%lu\").innerHTML = elem(\"input%lu\").value;\n",
        iInput, iInput);
    }
    fprintf(stream, "  u[%lu] = elem(\"input%lu\").value;\n", iInput, iInput);
    fprintf(
      stream,
      "  u[%lu] = %.9lf + (u[%lu] - %.9lf) * %.9lf;\n",
      iInput, that->scalingTo.min, iInput, that->scalingFrom[iInput].min,
      (that->scalingTo.max - that->scalingTo.min) /
      (that->scalingFrom[iInput].max - that->scalingFrom[iInput].min));
  }
  fprintf(stream, "  return u;\n");
  fprintf(stream, "}\n");
  fprintf(stream, "window.onload = function() {predict();}\n");
  fprintf(stream, "</script>\n");
}

// Preprocess the input features in the training data
// Input:
//   mat: the training data
//   dataset: the training dataset
// Output:
//   'mat' is updated
static void ScaleTrainingInputFeatures(
            CapyMat* const mat,
  CapyDataset const* const dataset) {
  methodOf(CapyPredictor);

  // Set the scaling ranges according to the type of feature scaling
  if(that->featureScaling == capyPredictorFeatureScaling_none) {
    loop(i, that->nbInput) loop(j, 2) that->scalingFrom[i].vals[j] = j;
    that->scalingTo.min = 0.0;
    that->scalingTo.max = 1.0;
  } else if(
    that->featureScaling == capyPredictorFeatureScaling_minMaxNormalization
  ) {
    loop(i, that->nbInput) that->scalingFrom[i] = dataset->fields[i].range;
    that->scalingTo.min = 0.0;
    that->scalingTo.max = 1.0;
  } else if(
    that->featureScaling == capyPredictorFeatureScaling_minMaxNormalizationSym
  ) {
    loop(i, that->nbInput) that->scalingFrom[i] = dataset->fields[i].range;
    that->scalingTo.min = -1.0;
    that->scalingTo.max = 1.0;
  } else if(
    that->featureScaling == capyPredictorFeatureScaling_meanNormalization
  ) {
    loop(i, that->nbInput) {
      double avg = CapyMatGetMomentCol(mat, i, 0, 1);
      double r = dataset->fields[i].range.max - dataset->fields[i].range.min;
      that->scalingFrom[i].min = avg - r;
      that->scalingFrom[i].max = avg + r;
    }
    that->scalingTo.min = -1.0;
    that->scalingTo.max = 1.0;
  } else if(
    that->featureScaling == capyPredictorFeatureScaling_standardization
  ) {
    loop(i, that->nbInput) {
      double avg = CapyMatGetMomentCol(mat, i, 0, 1);
      double sigma = sqrt(CapyMatGetMomentCol(mat, i, avg, 2));
      that->scalingFrom[i].min = avg - sigma;
      that->scalingFrom[i].max = avg + sigma;
    }
    that->scalingTo.min = -1.0;
    that->scalingTo.max = 1.0;
  } else raiseExc(CapyExc_UndefinedExecution);

  // Preprocess the training data
  size_t nbInput = $(dataset, getNbInput)();
  loop(iRow, mat->nbRow) {
    CapyVec inp = {.dim = nbInput, .vals = mat->vals + iRow * mat->nbCol};
    $(that, scaleInputFeatures)(&inp);
  }
}

// Preprocess the input features in the input vector
// Input:
//   inp: the input vector
// Output:
//   'inp' is updated
static void ScaleInputFeatures(CapyVec* const inp) {
  methodOf(CapyPredictor);
  loop(i, inp->dim) {
    inp->vals[i] =
      CapyLerp(inp->vals[i], that->scalingFrom + i, &(that->scalingTo));
  }
}

//  Export the input feature scaling as C code
//  Input:
//    stream: the stream on which the code is exported
//  Output:
//    The scaling code is written to the stream (to be used by the
//    exportToCFun method.
static void ExportScaleInputToCFun(FILE* const stream) {
  methodOf(CapyPredictor);
  fprintf(stream, "  double w[%lu] = {};\n", that->nbInput);
  loop(i, that->nbInput) {
    fprintf(
      stream,
      "  w[%lu] = (double)%a + (u[%lu] - (double)%a) * (double)%a;\n",
      i, that->scalingTo.min, i, that->scalingFrom[i].min,
      (that->scalingTo.max - that->scalingTo.min) /
      (that->scalingFrom[i].max - that->scalingFrom[i].min));
  }
}

// Free the memory used by a CapyPredictor
static void Destruct(void) {
  methodOf(CapyPredictor);
  loop(i, that->nbInput) $(that->scalingFrom + i, destruct)();
  free(that->scalingFrom);
  $(&(that->scalingTo), destruct)();
}

// Create a CapyPredictor
// Input:
//   type: type of predictor
// Output:
//   Return a CapyPredictor
CapyPredictor CapyPredictorCreate(CapyPredictorType const type) {
  CapyPredictor predictor = {
    .nbInput = 0,
    .iOutput = 0,
    .type = type,
    .featureScaling = capyPredictorFeatureScaling_none,
    .scalingFrom = NULL,
    .scalingTo = CapyRangeDoubleCreate(0.0, 1.0),
    .destruct = Destruct,
    .train = Train,
    .predict = Predict,
    .evaluate = Evaluate,
    .cvtDatasetToMat = CvtDatasetToMat,
    .clone = Clone,
    .exportToCFun = ExportToCFun,
    .exportScaleInputToCFun = ExportScaleInputToCFun,
    .exportToHtml = ExportToHtml,
    .scaleTrainingInputFeatures = ScaleTrainingInputFeatures,
    .scaleInputFeatures = ScaleInputFeatures,
    .save = Save,
  };
  return predictor;
}

// Allocate memory for a new CapyPredictor and create it
// Input:
//   type: type of predictor
// Output:
//   Return a CapyPredictor
// Exception:
//   May raise CapyExc_MallocFailed.
CapyPredictor* CapyPredictorAlloc(CapyPredictorType const type) {
  CapyPredictor* that = NULL;
  safeMalloc(that, 1);
  if(!that) return NULL;
  *that = CapyPredictorCreate(type);
  return that;
}

// Free the memory used by a CapyPredictor* and reset '*that' to NULL
// Input:
//   that: a pointer to the CapyPredictor to free
void CapyPredictorFree(CapyPredictor** const that) {
  if(that == NULL || *that == NULL) return;
  $(*that, destruct)();
  free(*that);
  *that = NULL;
}
