// ------------------------------ math.c ------------------------------
/*
    LibCapy - a general purpose library of C functions and data structures
    Copyright (C) 2021-2025 Pascal Baillehache baillehache.pascal@gmail.com
    https://baillehachepascal.dev
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "capymath.h"

// Get the GCD of two positive integers
// Input:
//   a, b: the two integers
// Output:
//   Return the greatest common divisor using the Stein's algorithm
uint64_t CapyGcd(
  uint64_t a,
  uint64_t b) {
  uint8_t shift = 0;
  if(a == 0) return b;
  if(b == 0) return a;
  while(((a | b) & 1) == 0) {
    ++shift;
    a >>= 1;
    b >>= 1;
  }
  while((a & 1) == 0) a >>= 1;
  do {
    while((b & 1) == 0) b >>= 1;
    if(a > b) {
      uint64_t const t = b;
      b = a;
      a = t;
    }
    b -= a;
  } while(b != 0);
  return a << shift;
}

// Get the GCD decomposition of two integers
// Input:
//   a, b: the two integers
//   x, y: the result two integers
// Output:
//   Update x and y with the result of ax+by=gcd(a,b) using the extended
//   Euclidian algorithm, and return gcd(a,b)
int64_t CapyGcdDecomposition(
   int64_t a,
   int64_t b,
  int64_t* x,
  int64_t* y) {
  if (b == 0) {
    *x = 1;
    *y = 0;
    return a;
  }
  int64_t u[2];
  int64_t d = CapyGcdDecomposition(b, a % b, u, u + 1);
  *x = u[1];
  *y = u[0] - u[1] * (a / b);
  return d;
}

// Get the GCD decomposition of two integers (version for positive inputs)
// Input:
//   a, b: the two integers
//   x, y: the result two integers
// Output:
//   Update x and y with the result of ax+by=gcd(a,b) using the extended
//   Euclidian algorithm, and return gcd(a,b)
uint64_t CapyGcdDecompositionUnsignedInput(
  uint64_t a,
  uint64_t b,
  int64_t* x,
  int64_t* y) {
  int64_t x1 = 1;
  int64_t y1 = 0;
  uint64_t a1 = a;
  int64_t x0 = 0;
  int64_t y0 = 1;
  uint64_t a2 = b;
  uint64_t q = 0;
  while (a2 != 0) {
    int64_t x2 = x0 - ((int64_t)q) * x1;
    int64_t y2 = y0 - ((int64_t)q) * y1;
    x0 = x1;
    y0 = y1;
    uint64_t a0 = a1;
    x1 = x2;
    y1 = y2;
    a1 = a2;
    q = a0 / a1;
    a2 = a0 - q * a1;
  }
  *x = x1;
  *y = y1;
  return a1;
}

// Get the LCM of two positive integers
// Input:
//   a, b: the two integers
// Output:
//   Return the lowest common multpiple using the formula
//   lcm(a, b) = |ab|/gcd(a, b)
uint64_t CapyLcm(
  uint64_t a,
  uint64_t b) {
  return (a * b) / CapyGcd(a, b);
}

// Peasant multiplication of two integers
// Input:
//   a, b: the two integers to multiply
// Output:
//   Return the multiplication of two integers using the Peasant method.
uint64_t CapyPeasantMul(
  uint64_t a,
  uint64_t b) {
  uint64_t res = 0;
  while(b > 0) {
    if(b & 1) res += a;
    a *= 2;
    b /= 2;
  }
  return res;
}

// Calculate ab/c for a,b,c positive integers while avoiding eventual
// intermediate overflow using the peasant multiplication
// Input:
//   a, b, c: the three integers
// Output:
//   Return the value of ab/c as an uint64_t for the integer part and
//   a CapyRatio for the remaining fractional part, or {0, capyRatioNaN}
//   if the calculation overflows.
CapyPeasantMulDivRes CapyPeasantMulDiv(
  uint64_t a,
  uint64_t b,
  uint64_t c) {

  // Eliminate trivial cases
  if(c == 0) {
    return (CapyPeasantMulDivRes){0, capyRatioNaN};
  }
  if(a == 0 || b == 0) {
    return (CapyPeasantMulDivRes){0, capyRatioZero};
  }
  if(a == c) {
    return (CapyPeasantMulDivRes){b, capyRatioZero};
  }
  if(b == c) {
    return (CapyPeasantMulDivRes){a, capyRatioZero};
  }
  if(a == 1) {
    return (CapyPeasantMulDivRes){0, CapyRatioReduce((CapyRatio){0, b, c})};
  }
  if(b == 1) {
    return (CapyPeasantMulDivRes){0, CapyRatioReduce((CapyRatio){0, a, c})};
  }

  // Constant used during calculation
  uint64_t const half_uint64_max = UINT64_MAX / 2;

  // Initialise the result value with the result of the integer division
  CapyPeasantMulDivRes res = {0, capyRatioZero};
  res.frac.den = c;
  res.base = a / c;
  if(res.base > UINT64_MAX / b) {
    return (CapyPeasantMulDivRes){0, capyRatioNaN};
  }
  res.base *= b;

  // Variable to memorise the remainder from the integer division which
  // we have to multiply by b and add to the result value
  uint64_t r = a % c;

  // Peasant multiplication algorithm to calculate res.frac.num=rb,
  // modified to update res.base when accounting for the division of
  // res.frac.num by c.
  // Loop until b has been consumed, or the remainder is null.
  while(b > 0 && r > 0) {

    // If b is odd, the remainder is added to the result of the
    // multiplication.
    if(b & 1) {

      // Add the current remainder to the numerator of the fractional
      // part. If by adding it the numerator overflows, we are sure
      // that it will be larger than c. To avoid the overflow we can
      // first increment the numerator up to c, equivalent to reset it
      // to 0 and increment the base by one, and then update the
      // numerator with the remainder decreased by the initial numerator
      // minus c (which can't overflow anymore).
      uint64_t t = r;
      if(res.frac.num > UINT64_MAX - t) {
        t -= c - res.frac.num;
        if(res.base == UINT64_MAX) {
          return (CapyPeasantMulDivRes){0, capyRatioNaN};
        }
        res.base++;
        res.frac.num = 0;
      }
      res.frac.num += t;

      // If the numerator becomes larger than c it means the current
      // value of res.frac.num/c is larger than 1, increment res.base
      // by one and decrement res.frac.num by c. This helps avoiding
      // overflows of a*b.
      while(res.frac.num >= c) {
        res.frac.num -= c;
        if(res.base == UINT64_MAX) {
          return (CapyPeasantMulDivRes){0, capyRatioNaN};
        }
        res.base++;
      }
    }

    // Update the multiplication.
    b /= 2;

    // Avoid updating the remainder if b reach 0, we don't need it
    // anymore anyway so it avoid useless calculation.
    if(b > 0) {

      // If updating the remainder would overflow
      if(r > half_uint64_max) {

        // Increment the base and update the remainder accordingly
        if(res.base > UINT64_MAX - b) {
          return (CapyPeasantMulDivRes){0, capyRatioNaN};
        }
        res.base += b;
        r -= c - r;

      // Update the remainder normally.
      } else r *= 2;

      // If the remainder becomes larger than divisor we can jump
      // forward by increment of b to go faster and avoid overflow on
      // the remainder.
      while(r >= c) {
        r -= c;
        if(res.base > UINT64_MAX - b) {
          return (CapyPeasantMulDivRes){0, capyRatioNaN};
        }
        res.base += b;
      }
    }
  }

  // Reduce the fraction
  res.frac = CapyRatioReduce(res.frac);

  // Return the result
  return res;
}

// Generic smooth step function.
// Input:
//   x: input, in [0.0, 1.0]
//   a: smoothing coefficient in ]0.0, +inf]
// Output:
//   Return the smoothed value of x. If a equals 1.0, it's x itself. As
//   a gets lower than 1.0, the smoothed value varies following the
//   pattern fast-slow-fast. As a gets greater than 1.0, the smoothed
//   value varies following the pattern slow-fast-slow. Continuous but
//   not necessary derivable at x=0.0 or x=1.0.
double CapySmoothStep(
  double const x,
  double const a) {
  return pow(x, a) / (pow(x, a) + pow(1.0 - x, a));
}

// Smoother step function
// Inputs:
//   x: the input value in [0,1]
// Output:
//   Return the smoothed value, in [0, 1]
double CapySmootherStep(double const x) {
  return x * x * x * (x * (x * 6.0 - 15.0) + 10.0);
}

// Power function for an integer value and integer exponent
// Input:
//   x: the value
//   n: the power
// Output:
//   Return x^n
int64_t CapyPowi(
   int64_t const x,
  uint64_t const n) {

  // Trivial case
  if(n == 0) return 1;

  // Variable to memorise the result
  int64_t res = x;

  // Temporary variables for the calculation
  int64_t y = 1;
  uint64_t m = n;

  // Calculate the result
  while(m > 1) {
    if(m & 1) y *= res;
    res *= res;
    m /= 2;
  }
  res *= y;

  // Return the result
  return res;
}

// Power function for a real value and integer exponent
// Input:
//   x: the value
//   n: the power
// Output:
//   Return x^n
double CapyPowf(
    double const x,
  uint64_t const n) {

  // Trivial case
  if(n == 0) return 1.0;

  // Variable to memorise the result
  double res = x;

  // Temporary variables for the calculation
  double y = 1;
  uint64_t m = n;

  // Calculate the result
  while(m > 1) {
    if(m & 1) y *= res;
    res *= res;
    m /= 2;
  }
  res *= y;

  // Return the result
  return res;
}

// LERP function, map a double value linearly from a range to another
// Input:
//      x: the input
//   from: range of the input
//     to: range of the output
// Output:
//   Return the mapped intput
double CapyLerp(
                  double const x,
  CapyRangeDouble const* const from,
  CapyRangeDouble const* const to) {
  double y =
    to->min + (x - from->min) / (from->max - from->min) * (to->max - to->min);
  return y;
}

// LERP function, map a double value linearly from [0, 1] to an array
// Input:
//      x: the input
//     to: array of two values
// Output:
//   Return the mapped intput
double CapyLerpNorm2Arr(
  double const x,
  double const to[2]) {
  return to[0] + x * (to[1] - to[0]);
}

// Comparator functions for basic types (to be used with the qsort
// function)
int CapyCmpCharInc(
  char const* a,
  char const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt8Inc(
  int8_t const* a,
  int8_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt8Inc(
  uint8_t const* a,
  uint8_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt16Inc(
  int16_t const* a,
  int16_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt16Inc(
  uint16_t const* a,
  uint16_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt32Inc(
  int32_t const* a,
  int32_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt32Inc(
  uint32_t const* a,
  uint32_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt64Inc(
  int64_t const* a,
  int64_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt64Inc(
  uint64_t const* a,
  uint64_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpFloatInc(
  float const* a,
  float const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpDoubleInc(
  double const* a,
  double const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpSizeInc(
  size_t const* a,
  size_t const* b) {
  return ((*a > *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpCharDec(
  char const* a,
  char const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt8Dec(
  int8_t const* a,
  int8_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt8Dec(
  uint8_t const* a,
  uint8_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt16Dec(
  int16_t const* a,
  int16_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt16Dec(
  uint16_t const* a,
  uint16_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt32Dec(
  int32_t const* a,
  int32_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt32Dec(
  uint32_t const* a,
  uint32_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpInt64Dec(
  int64_t const* a,
  int64_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpUInt64Dec(
  uint64_t const* a,
  uint64_t const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpFloatDec(
  float const* a,
  float const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

int CapyCmpDoubleDec(
  double const* a,
  double const* b) {
  return ((*a < *b) ? 1 : ((*a == *b) ? 0 : -1));
}

// Predefined X 2D vector
CapyVec const capyXAxis2D = {
  .dim = 2, .vals = (double[2]){1, 0}
};

// Predefined Y 2D vector
CapyVec const capyYAxis2D = {
  .dim = 2, .vals = (double[2]){0, 1}
};

// Predefined X 3D vector
CapyVec const capyXAxis3D = {
  .dim = 3, .vals = (double[3]){1, 0, 0}
};

// Predefined Y 3D vector
CapyVec const capyYAxis3D = {
  .dim = 3, .vals = (double[3]){0, 1, 0}
};

// Predefined Z 3D vector
CapyVec const capyZAxis3D = {
  .dim = 3, .vals = (double[3]){0, 0, 1}
};

// Allocate an array of CapyVec
// Input:
//   dim: the dimension of the vectors
//    nb: the size of the array
// Output:
//   Return a newly allocated array of CapyVec (values initialised to 0.0)
// Exceptions:
//   May raise CapyExc_MallocFailed
CapyVec* CapyVecAllocArr(
  size_t dim,
  size_t nb) {
  CapyVec* that = NULL;
  safeMalloc(that, nb);
  if(!that) return NULL;
  loop(i, nb) that[i] = CapyVecCreate(dim);
  return that;
}

// Free an array of CapyVec
// Input:
//   that: the array of CapyVec
//     nb: the size of the array
void CapyVecFreeArr(
  CapyVec** const that,
           size_t nb) {
  if(that == NULL || *that == NULL) return;
  loop(i, nb) CapyVecDestruct(*that + i);
  free(*that);
  *that = NULL;
}

// Create a CapyVec
// Input:
//   dim: the dimension of the vector
// Output:
//   Return a CapyVec (values initialised to 0.0)
// Exceptions:
//   May raise CapyExc_MallocFailed
CapyVec CapyVecCreate(size_t dim) {
  CapyVec that = {.dim = dim};
  safeMalloc(that.vals, dim);
  if(that.vals) loop(i, dim) that.vals[i] = 0.0;
  return that;
}

// Free a CapyVec
// Input:
//   that: the CapyVec to free
void CapyVecDestruct(CapyVec* const that) {
  if(that == NULL) return;
  if(that->vals) {
    free(that->vals);
    that->vals = NULL;
  }
}

// Get a 3D vector orthogonal to another 3D vector
// Input:
//   u: the input vector
//   v: the output vector orthonormal to u
// Output:
//   v is updated
void CapyVec3DGetOrtho(
  double const* const u,
        double* const v) {
  if(fabs(u[0]) > 0.0 || fabs(u[1]) > 0.0) {
    v[0] = u[1];
    v[1] = -u[0];
    v[2] = 0.0;
  } else {
    v[0] = u[2];
    v[1] = 0.0;
    v[2] = -u[0];
  }
}

// Create a CapyMat
// Input:
//   nbCol: the number of columns of the matrix
//   nbRow: the number of rows of the matrix
// Output:
//   Return a CapyVec (values initialised to 0.0)
// Exceptions:
//   May raise CapyExc_MallocFailed
CapyMat CapyMatCreate(
  size_t const nbCol,
  size_t const nbRow) {
  CapyMat that = {.nbCol = nbCol, .nbRow = nbRow};
  safeMalloc(that.vals, nbRow * nbCol);
  if(that.vals) loop(i, nbRow * nbCol) that.vals[i] = 0.0;
  return that;
}

// Create the 3x3 CapyMat for the rotation matrix around the i-th axis and
// given angle (same handness for the rotation as for the coordinates system)
// Input:
//   iAxis: the axis index (0: x, 1: y, 2: z)
//   theta: the angle in radians
// Output:
//   Return a CapyVec
// Exceptions:
//   May raise CapyExc_MallocFailed, CapyExc_UndefinedExecution
CapyMat CapyMatCreateRotMat(
  size_t const iAxis,
  double const theta) {
  CapyMat that = CapyMatCreate(3, 3);
  if(iAxis == 0) {
    that.vals[0] = 1.0;
    that.vals[4] = cos(theta);
    that.vals[5] = -sin(theta);
    that.vals[7] = sin(theta);
    that.vals[8] = cos(theta);
  } else if(iAxis == 1) {
    that.vals[0] = cos(theta);
    that.vals[2] = sin(theta);
    that.vals[4] = 1.0;
    that.vals[6] = -sin(theta);
    that.vals[8] = cos(theta);
  } else if(iAxis == 2) {
    that.vals[0] = cos(theta);
    that.vals[1] = -sin(theta);
    that.vals[3] = sin(theta);
    that.vals[4] = cos(theta);
    that.vals[8] = 1.0;
  } else raiseExc(CapyExc_UndefinedExecution);
  return that;
}

// Free a CapyMat
// Input:
//   that: the CapyMat to free
void CapyMatDestruct(CapyMat* const that) {
  if(that == NULL) return;
  if(that->vals) {
    free(that->vals);
    that->vals = NULL;
  }
}

// Add two vectors
// Input:
//   a: first vector
//   b: second vector
//   c: result vector, c=a+b, (c can be a or b)
void CapyVecAdd(
  CapyVec const* const a,
  CapyVec const* const b,
        CapyVec* const c) {
  loop(i, a->dim) c->vals[i] = a->vals[i] + b->vals[i];
}

// Substract two vectors
// Input:
//   a: first vector
//   b: second vector
//   c: result vector, c=a-b
void CapyVecSub(
  CapyVec const* const a,
  CapyVec const* const b,
        CapyVec* const c) {
  loop(i, a->dim) c->vals[i] = a->vals[i] - b->vals[i];
}

// Dot product of two vectors
// Input:
//   a: first vector
//   b: second vector
//   c: result vector, c=a.b
void CapyVecDot(
  CapyVec const* const a,
  CapyVec const* const b,
         double* const c) {
  *c = 0.0;
  loop(i, a->dim) *c += a->vals[i] * b->vals[i];
}

// Cross product of two vectors of dimension 3
// Input:
//   a: first vector
//   b: second vector
//   c: result vector, c=a*b
void CapyVecCross(
  CapyVec const* const a,
  CapyVec const* const b,
        CapyVec* const c) {
  c->vals[0] = a->vals[1] * b->vals[2] - a->vals[2] * b->vals[1];
  c->vals[1] = a->vals[2] * b->vals[0] - a->vals[0] * b->vals[2];
  c->vals[2] = a->vals[0] * b->vals[1] - a->vals[1] * b->vals[0];
}

// Product vector scalar
// Input:
//   a: vector
//   b: scalar
//   c: result vector, c=a*b
void CapyVecMul(
  CapyVec const* const a,
          double const b,
        CapyVec* const c) {
  loop(i, a->dim) c->vals[i] = b * a->vals[i];
}

// Copy a vector to another
// Input:
//   a: from vector
//   b: to vector
void CapyVecCopy(
  CapyVec const* const a,
        CapyVec* const b) {
  loop(i, a->dim) b->vals[i] = a->vals[i];
}

// Normalise the vector
// Input:
//   a: the vector
// Output:
//   The vector is normalised.
void CapyVecNormalise(CapyVec* const a) {
  double sum = 0.0;
  loop(i, a->dim) sum += a->vals[i] * a->vals[i];
  double invSqrt = 1.0 / sqrt(sum);
  loop(i, a->dim) a->vals[i] *= invSqrt;
}

// Normalise the vector using the fast inverse square root
// Input:
//   a: the vector
// Output:
//   The vector is normalised.
void CapyVecNormaliseFast(CapyVec* const a) {
  float sum = 0.0;
  loop(i, a->dim) sum += (float)(a->vals[i] * a->vals[i]);
  double invSqrt = CapyFastInverseSquareRoot(sum);
  loop(i, a->dim) a->vals[i] *= invSqrt;
}

// Get the norm of the vector
// Input:
//   a: the vector
// Output:
//   Retun the norm of the vector
double CapyVecGetNorm(CapyVec const* const a) {
  double sum = 0.0;
  loop(i, a->dim) sum += a->vals[i] * a->vals[i];
  return sqrt(sum);
}

// Get the cosine-similarity of two vectors
// Input:
//   a: the first vector
//   b: the second vector
// Output:
//   Retun the dot product of normalised vectors
double CapyVecGetCosineSimilarity(
  CapyVec const* const a,
  CapyVec const* const b) {
  double res = 0.0;
  CapyVecDot(a, b, &res);
  res /= CapyVecGetNorm(a) * CapyVecGetNorm(b);
  return res;
}

// Get the approximated norm of a 2D vector
// Input:
//   u: the 2D vector
// Output:
//   Retun the approx norm of the vector (equals to 0.96x+0.4y where x>=y>=0)
//   accurate within 4%
double CapyVec2DApproxNorm(CapyVec* const u) {
  double v[2];
  if(fabs(u->vals[0]) > fabs(u->vals[1])) {
    v[0] = fabs(u->vals[0]);
    v[1] = fabs(u->vals[1]);
  } else {
    v[0] = fabs(u->vals[1]);
    v[1] = fabs(u->vals[0]);
  }
  return 0.96 * v[0] + 0.4 * v[1];
}

// Get the angle between two 2D vectors
// Input:
//   u: the first 2D vector
//   v: the second 2D vector
// Output:
//   Return the angle between the 2 vectors in [-M_PI, M_PI]
double CapyVec2DGetAngle(
  CapyVec const* const u,
  CapyVec const* const v) {
  return -atan2(
    u->vals[0] * v->vals[1] - u->vals[1] * v->vals[0],
    u->vals[0] * v->vals[0] + u->vals[1] * v->vals[1]);
}

// Set the angle of a 2D vector
// Input:
//   a: vector
//   b: angle in radians
//   c: result vector, c=a*b
// Output:
//   Set the angle to 'b' (relative to x, ccw) while conserving the distance
//   of 'a' and store the result in 'c' (which can be 'a')
void CapyVec2DSetAngle(
  CapyVec const* const a,
          double const b,
        CapyVec* const c) {
  double l = CapyVecGetNorm(a);
  c->vals[0] = l * cos(b);
  c->vals[1] = l * sin(b);
}

// Rotate a 2D vector by a given angle
// Input:
//   a: vector to rotate
//   theta: angle in radians
//   b: result vector
// Output:
//   Rotate CCW the vector 'a' by 'theta' and store the result in 'c'
//   (which can be 'a')
void CapyVec2DRotate(
  CapyVec const* const a,
          double const theta,
        CapyVec* const b) {
  double const cosTheta = cos(theta);
  double const sinTheta = sin(theta);
  double const x = a->vals[0] * cosTheta - a->vals[1] * sinTheta;
  double const y = a->vals[1] * cosTheta + a->vals[0] * sinTheta;
  b->vals[0] = x;
  b->vals[1] = y;
}

// Get the squared norm of the vector
// Input:
//   a: the vector
// Output:
//   Retun the squared norm of the vector
double CapyVecGetSquaredNorm(CapyVec* const a) {
  double sum = 0.0;
  loop(i, a->dim) sum += a->vals[i] * a->vals[i];
  return sum;
}

// Get the moment of a vector's values
// Input:
//   u: the vector
//   c: the center of the moment
//   n: the order of the moment
// Output:
//   Return the moment. CapyVecGetMoment(u, 0, 0) is the sum of u's values, aka
//   total mass. CapyVecGetMoment(u, 0, 1) is the mean of u's values, aka
//   first raw moment. CapyVecGetMoment(u, mean(u), 2) is the variance of u's
//   values, aka second centered moment, or square of the standard deviation
//   sigma. CapyVecGetMoment(u, mean(u), 3) is the skewness, aka third centered
//   moment. CapyVecGetMoment(u, mean(u), 4) is the kurtosis, aka fourth
//   centered moment.
//   Standardized moment of order n (aka normalized n-th central moment) is
//   equal to:
//   CapyVecMoment(u, mean(u), n) / sqrt(CapyVecMoment(u, mean(u), 2))^n
double CapyVecGetMoment(
  CapyVec const* const u,
          double const c,
         uint8_t const n) {
  double moment = 0.0;
  if(n == 0) loop(i, u->dim) moment += u->vals[i];
  else {
    double invDim = 1.0 / (double)(u->dim);
    loop(i, u->dim) moment += pow(u->vals[i] - c, n) * invDim;
  }
  return moment;
}

// Get the k-th largest number in a CapyVec (using quickselect)
// Input:
//   u: the vector
//   k: the index of the element to find
// Output:
//   Return the k-th largest number. Can also be used to get the median value
//   without sorting (CapyVecQuickSelect(u, u->dim/2))
double CapyVecQuickSelect(
  CapyVec const* const u,
          size_t const k) {

  // If there is no element, return NAN
  if(u->dim == 0) return NAN;

  // If there is only one element, it is the median
  if(u->dim == 1) return u->vals[0];

  // Select the pivot (arbitrarily choosen)
  double pivot = u->vals[u->dim / 2];

  // Temporary vectors to partition the values
  CapyVec lows = CapyVecCreate(u->dim);
  size_t nbLow = 0;
  CapyVec highs = CapyVecCreate(u->dim);
  size_t nbHigh = 0;
  CapyVec pivots = CapyVecCreate(u->dim);
  size_t nbPivot = 0;

  // Partition on the pivot
  loop(i, u->dim) {
    if(u->vals[i] == pivot) {
      pivots.vals[nbPivot] = u->vals[i];
      nbPivot += 1;
    } else if(u->vals[i] < pivot) {
      lows.vals[nbLow] = u->vals[i];
      nbLow += 1;
    } else {
      highs.vals[nbHigh] = u->vals[i];
      nbHigh += 1;
    }
  }

  // If we have found the k-th element return it, else continue the recursion
  double res = 0.0;
  if(k < nbLow) {
    lows.dim = nbLow;
    res = CapyVecQuickSelect(&lows, k);
  } else if(k < nbLow + nbPivot) {
    res = pivots.vals[0];
  } else {
    highs.dim = nbHigh;
    res = CapyVecQuickSelect(&highs, k - nbLow - nbPivot);
  }

  // Free memory
  CapyVecDestruct(&lows);
  CapyVecDestruct(&highs);
  CapyVecDestruct(&pivots);

  // Return the result
  return res;
}

// Get the median value of vector's values (using quickselect)
// Input:
//   u the vector
// Output:
//   Return the median value (threshold which split the vector values into
//   two sets ('smaller than threshold' and 'larger than threshold') of same
//   size)
double CapyVecGetMedian(CapyVec const* const u) {
  if((u->dim & 1) == 1) return CapyVecQuickSelect(u, u->dim / 2);
  else {
    return 0.5 * (
      CapyVecQuickSelect(u, u->dim / 2 - 1) +
      CapyVecQuickSelect(u, u->dim / 2));
  }
}

// Get the covariance of two vectors' values. The two vectors must have same
// dimension.
// Input:
//   u: the first vector
//   v: the second vector
// Output:
//   Return the covariance, equal to E[(u-E(u)(v-E(v))].
// Exception:
//   May raise CapyExc_InvalidParameters.
double CapyVecGetCovariance(
  CapyVec const* const u,
  CapyVec const* const v) {
  if(u->dim != v->dim) raiseExc(CapyExc_InvalidParameters);
  CapyVec w = CapyVecCreate(u->dim);
  double mean[2] = {
    CapyVecGetMoment(u, 0, 1),
    CapyVecGetMoment(v, 0, 1),
  };
  loop(i, w.dim) w.vals[i] = (u->vals[i] - mean[0]) * (v->vals[i] - mean[1]);
  double covariance = CapyVecGetMoment(&w, 0, 1);
  CapyVecDestruct(&w);
  return covariance;
}

// Get the Pearson correlation of two vector's values. The two vectors must
// have same dimension.
// Input:
//   u: the first vector
//   v: the second vector
// Output:
//   Return the covariance (in [-1,1]), equal to cov(u, v)/(sigma(u)*sigma(v)).
// Exception:
//   May raise CapyExc_InvalidParameters.
double CapyVecGetPearsonCorrelation(
  CapyVec const* const u,
  CapyVec const* const v) {
  double pearson =
    CapyVecGetCovariance(u, v) / sqrt(
      CapyVecGetMoment(u, CapyVecGetMoment(u, 0, 1), 2) *
      CapyVecGetMoment(v, CapyVecGetMoment(v, 0, 1), 2));
  return pearson;
}

// Get the distance covariance of two vector's values (seen as univariate
// variables). The two vectors must have same dimension.
// Input:
//   u: the first vector
//   v: the second vector
// Output:
//   Return the distance covariance.
// Exception:
//   May raise CapyExc_InvalidParameters.
double CapyVecGetDistanceCovariance(
  CapyVec const* const u,
  CapyVec const* const v) {
  if(u->dim != v->dim) raiseExc(CapyExc_InvalidParameters);
  CapyVec const* x[2] = {u, v};
  double means[2] = {0.0, 0.0};
  double* hmeans[2][2] = {{NULL, NULL}, {NULL, NULL}};
  loop(i, 2) loop(j, 2) safeMalloc(hmeans[i][j], u->dim);
  loop(i, 2) loop(j, 2) loop(k, u->dim) hmeans[i][j][k] = 0.0;
  double invDim = 1.0 / (double)(u->dim);
  double invDimDim = 1.0 / (double)(u->dim * u->dim);
  loop(i, 2) loop(r, u->dim) loop(c, u->dim) {
    double val = fabs(x[i]->vals[r] - x[i]->vals[c]);
    means[i] += val * invDimDim;
    hmeans[i][0][r] += val * invDim;
    hmeans[i][1][c] += val * invDim;
  }
  double covariance = 0.0;
  loop(r, u->dim) loop(c, u->dim) {
    double val = 1.0;
    loop(i, 2) {
      val *=
        fabs(x[i]->vals[r] - x[i]->vals[c]) -
        hmeans[i][0][c] - hmeans[i][1][r] + means[i];
    }
    val *= invDimDim;
    covariance += val;
  }
  loop(i, 2) loop(j, 2) free(hmeans[i][j]);
  return covariance;
}

// Get the distance correlation of two vector's values (seen as univariate
// variables). The two vectors must have same dimension.
// Input:
//   u: the first vector
//   v: the second vector
// Output:
//   Return the distance correlation (in [0,1]).
// Exception:
//   May raise CapyExc_InvalidParameters.
double CapyVecGetDistanceCorrelation(
  CapyVec const* const u,
  CapyVec const* const v) {
  if(u->dim != v->dim) raiseExc(CapyExc_InvalidParameters);
  double distCovariance = CapyVecGetDistanceCovariance(u, v);
  double distVariance[2] = {
    CapyVecGetDistanceCovariance(u, u),
    CapyVecGetDistanceCovariance(v, v)
  };
  double correlation = distCovariance / sqrt(distVariance[0] * distVariance[1]);
  return correlation;
}

// Apply the softmax function to a vector
// Input:
//   u: the vector
//   t: 'temperature'
// Output:
//   The vector is updated. The temperature must be >0.0. A temperature of
//   1.0 gives the standard softmax function. The higher the temperature the
//   more uniformly distributed the result vector is. A temperature value
//   infinitely small produces a vector with value 1.0 for the max value of
//   the input, and 0.0 for all other values.
void CapyVecSoftmax(
  CapyVec* const u,
    double const t) {
  double x = 0.0;
  double c = 1.0 / t;
  loop(i, u->dim) {
    u->vals[i] = exp(u->vals[i] * c);
    x += u->vals[i];
  }
  if(fabs(x) > 1e-12) x = 1.0 / x;
  loop(i, u->dim) u->vals[i] *= x;
}

// Get the distance between two vectors. The two vectors must have same
// dimension.
// Input:
//   u: the first vector
//   v: the second vector
// Output:
//   Return the norm of the difference ofthe two vectors.
// Exception:
//   May raise CapyExc_InvalidParameters.
double CapyVecGetDistance(
  CapyVec const* const u,
  CapyVec const* const v) {
  if(u->dim != v->dim) raiseExc(CapyExc_InvalidParameters);
  double res = 0.0;
  loop(i, u->dim) res += (u->vals[i] - v->vals[i]) * (u->vals[i] - v->vals[i]);
  return sqrt(res);
}

// Apply lerp to a vector components
// Input:
//   from: start vector
//   to: end vector
//   res: result vector (can same as 'start' or 'end')
//   t: the coefficient (in [0,1])
// Output:
//   'res' is set to start+t*(end-start)
void CapyVecLerp(
  CapyVec const* const start,
  CapyVec const* const end,
        CapyVec* const res,
          double const t) {
  loop(i, res->dim) {
    res->vals[i] = start->vals[i] + t * (end->vals[i] - start->vals[i]);
  }
}

// Apply easing to a vector components
// Input:
//   from: start vector
//   to: end vector
//   res: result vector (can same as 'start' or 'end')
//   t: the coefficient (in [0,1])
//   easing: the easing function
// Output:
//   'res' is updated
void CapyVecEasing(
  CapyVec const* const start,
  CapyVec const* const end,
        CapyVec* const res,
          double const t,
      CapyVecEasingFun easing) {
  loop(i, res->dim) {
    res->vals[i] = easing(start->vals[i], end->vals[i], t);
  }
}

// Product matrix vector
// Input:
//   a: matrix
//   b: vector
//   c: result vector, c=a*b
void CapyMatProdVec(
  CapyMat const* const a,
  CapyVec const* const b,
        CapyVec* const c) {
  loop(i, a->nbRow) {
    c->vals[i] = 0.0;
    loop(j, a->nbCol) c->vals[i] += a->vals[i * a->nbCol + j] * b->vals[j];
  }
}

// Product transposed matrix vector
// Input:
//   a: matrix
//   b: vector
//   c: result vector, c=a^t*b
void CapyMatTransProdVec(
  CapyMat const* const a,
  CapyVec const* const b,
        CapyVec* const c) {
  loop(j, a->nbCol) {
    c->vals[j] = 0.0;
    loop(i, a->nbRow) c->vals[j] += a->vals[i * a->nbCol + j] * b->vals[i];
  }
}

// Product matrix matrix
// Input:
//   a: matrix
//   b: matrix
//   c: result matrix, c=a*b
void CapyMatProdMat(
  CapyMat const* const a,
  CapyMat const* const b,
        CapyMat* const c) {
  loop(i, c->nbRow) loop(j, c->nbCol) {
    c->vals[i * c->nbCol + j] = 0.0;
    loop(k, a->nbCol) {
      c->vals[i * c->nbCol + j] +=
        a->vals[i * a->nbCol + k] * b->vals[k * b->nbCol + j];
    }
  }
}

// Product scalar matrix
// Input:
//   a: matrix
//   b: scalar
//   c: result matrix, c=a*b
void CapyMatProdScalar(
  CapyMat const* const a,
          double const b,
        CapyMat* const c) {
  loop(i, c->nbRow * c->nbCol) c->vals[i] = a->vals[i] * b;
}

// Add matrix matrix
// Input:
//   a: matrix
//   b: matrix
//   c: result matrix, c=a+b
void CapyMatAddMat(
  CapyMat const* const a,
  CapyMat const* const b,
        CapyMat* const c) {
  loop(i, c->nbRow * c->nbCol) c->vals[i] = a->vals[i] + b->vals[i];
}

// Transpose matrix
// Input:
//   a: matrix
//   b: result transpose matrix
void CapyMatTransp(
  CapyMat const* const a,
        CapyMat* const b) {
  loop(i, a->nbRow) loop(j, a->nbCol) {
    b->vals[j * b->nbCol + i] = a->vals[i * a->nbCol + j];
  }
}

// Get the determinant of a matrix
//  a: matrix
//  b: result determinant
void CapyMatDet(
  CapyMat const* const a,
         double* const b) {
  if(a->nbCol == 1) {
    *b = a->vals[0];
  } else if(a->nbCol == 2) {
    *b = a->vals[0] * a->vals[3] - a->vals[1] * a->vals[2];
  } else if(a->nbCol == 3) {
    *b =
      a->vals[0] * (a->vals[4] * a->vals[8] - a->vals[5] * a->vals[7]) -
      a->vals[3] * (a->vals[1] * a->vals[8] - a->vals[2] * a->vals[7]) +
      a->vals[6] * (a->vals[1] * a->vals[5] - a->vals[2] * a->vals[4]);
  } else {
    assert(false && "CapyMatDet not implemented for dim > 3");

    // TODO cf https://en.wikipedia.org/wiki/Determinant
  }
}

// Pseudo inverse matrix (Moore-Penrose inverse)
// Input:
//   a: matrix
//   b: result pseudo inverse matrix
// Exceptions:
//   May raise CapyExc_MatrixInversionFailed, CapyExc_MallocFailed
void CapyMatPseudoInv(
  CapyMat const* const a,
        CapyMat* const b) {

  // cf https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse
  CapyMat aT = {.dims = {0, 0}, .vals = NULL};
  CapyMat c = {.dims = {0, 0}, .vals = NULL};
  CapyMat cInv = {.dims = {0, 0}, .vals = NULL};
  try {
    aT = CapyMatCreate(a->nbRow, a->nbCol);
    CapyMatTransp(a, &aT);
    c = CapyMatCreate(a->nbCol, a->nbCol);
    CapyMatProdMat(&aT, a, &c);
    cInv = CapyMatCreate(a->nbCol, a->nbCol);
    CapyMatInv(&c, &cInv);
    CapyMatProdMat(&cInv, &aT, b);
  } endCatch;
  CapyMatDestruct(&aT);
  CapyMatDestruct(&c);
  CapyMatDestruct(&cInv);
  CapyForwardExc();
}

// Inverse matrix (if the matrix is not square the result is the pseudo
// inverse)
// Input:
//   a: matrix
//   b: result inverse matrix
// Exceptions:
//   May raise CapyExc_MatrixInversionFailed, CapyExc_MallocFailed
void CapyMatInv(
  CapyMat const* const a,
        CapyMat* const b) {
  if(a->nbCol != a->nbRow) {
    CapyMatPseudoInv(a, b);
  } else if(a->nbCol == 1) {
    b->vals[0] = 1.0 / a->vals[0];
    if(isnan(b->vals[0])) raiseExc(CapyExc_MatrixInversionFailed);
  } else if(a->nbCol == 2) {
    double det;
    CapyMatDet(a, &det);
    double invDet = 1.0 / det;
    if(isnan(invDet)) raiseExc(CapyExc_MatrixInversionFailed);
    b->vals[0] = a->vals[3] * invDet;
    b->vals[1] = -1.0 * a->vals[1] * invDet;
    b->vals[2] = -1.0 * a->vals[2] * invDet;
    b->vals[3] = a->vals[0] * invDet;
  } else if(a->nbCol == 3) {
    double det;
    CapyMatDet(a, &det);
    double invDet = 1.0 / det;
    if(isnan(invDet)) raiseExc(CapyExc_MatrixInversionFailed);
    b->vals[0] =
      (a->vals[4] * a->vals[8] - a->vals[5] * a->vals[7]) * invDet;
    b->vals[1] =
      -(a->vals[1] * a->vals[8] - a->vals[2] * a->vals[7]) * invDet;
    b->vals[2] =
      (a->vals[1] * a->vals[5] - a->vals[2] * a->vals[4]) * invDet;
    b->vals[3] =
      -(a->vals[3] * a->vals[8] - a->vals[5] * a->vals[6]) * invDet;
    b->vals[4] =
      (a->vals[0] * a->vals[8] - a->vals[2] * a->vals[6]) * invDet;
    b->vals[5] =
      -(a->vals[0] * a->vals[5] - a->vals[2] * a->vals[3]) * invDet;
    b->vals[6] =
      (a->vals[3] * a->vals[7] - a->vals[4] * a->vals[6]) * invDet;
    b->vals[7] =
      -(a->vals[0] * a->vals[7] - a->vals[1] * a->vals[6]) * invDet;
    b->vals[8] =
      (a->vals[0] * a->vals[4] - a->vals[1] * a->vals[3]) * invDet;
  } else {

    // Farooq Hamid algorithm (modified to handle some matrix with null
    // values on the diagonal)
    // https://www.researchgate.net/publication/
    //   220337322_An_Efficient_and_Simple_Algorithm_for_Matrix_Inversion
    loop(i, a->nbCol * a->nbRow) b->vals[i] = a->vals[i];
    bool flagHasChanged = true;
    size_t nbRemaining = a->nbCol;
    bool* hasPivotChanged = NULL;
    safeMalloc(hasPivotChanged, a->nbCol);
    if(!hasPivotChanged) return;
    loop(i, a->nbCol) hasPivotChanged[i] = false;
    while(flagHasChanged && nbRemaining > 0) {
      flagHasChanged = false;
      loop(p, b->nbCol) {
        double pivot = b->vals[p * b->nbCol + p];
        if(!equal(pivot, 0.0) && !(hasPivotChanged[p])) {
          flagHasChanged = true;
          --nbRemaining;
          hasPivotChanged[p] = true;
          loop(i, b->nbCol) {
            b->vals[i * b->nbCol + p] =
              -1.0 * b->vals[i * b->nbCol + p] / pivot;
          }
          loop(i, b->nbCol) if(i != p) loop(j, b->nbCol) if(j != p) {
            b->vals[i * b->nbCol + j] =
              b->vals[i * b->nbCol + j] +
              b->vals[p * b->nbCol + j] *
              b->vals[i * b->nbCol + p];
          }
          loop(j, b->nbCol) {
            b->vals[p * b->nbCol + j] = b->vals[p * b->nbCol + j] / pivot;
          }
          b->vals[p * b->nbCol + p] = 1.0 / pivot;
        }
      }
    }
    free(hasPivotChanged);
    if(nbRemaining > 0) raiseExc(CapyExc_MatrixInversionFailed);
  }
}

// Get the QR decomposition of a matrix.
// cf http://madrury.github.io/
// jekyll/update/statistics/2017/10/04/qr-algorithm.html
// Input:
//   m: the matrix to decompose (nbRow>=nbCol)
//   q: the result Q matrix (same dimensions as m)
//   r: the result R matrix (dimensions: m.nbRow, m.nbRow)
// Exception:
//   May raise CapyExc_QRDecompositionFailed, CapyExc_MallocFailed.
void CapyMatGetQR(
  CapyMat const* const m,
        CapyMat* const q,
        CapyMat* const r) {

  // Variables to perform the decomposition
  CapyMat QQtilde = CapyMatCreate(m->nbRow, m->nbRow);
  loop(i, m->nbRow) QQtilde.vals[i * (1 + m->nbRow)] = 1.0;
  CapyMat A = CapyMatCreate(m->nbCol, m->nbRow);
  memcpy(A.vals, m->vals, sizeof(A.vals[0]) * m->nbCol * m->nbRow);
  CapyVec w = CapyVecCreate(m->nbRow);
  CapyVec v = CapyVecCreate(m->nbRow);
  CapyMat reflector = CapyMatCreate(m->nbRow, m->nbRow);
  CapyMat M = CapyMatCreate(m->nbCol, m->nbRow);
  CapyMat Mp = CapyMatCreate(m->nbRow, m->nbRow);

  // Householder algorithm
  loop(k, m->nbCol) {
    size_t dim = m->nbRow - k;
    bool flagNull = true;
    double norm = 0.0;
    loop(i, dim) {
      w.vals[i] = A.vals[(k + i) * A.nbCol + k];
      norm += w.vals[i] * w.vals[i];
      if(fabs(w.vals[i]) > 1e-6) flagNull = false;
    }
    if(flagNull) {
      CapyMatDestruct(&A);
      CapyMatDestruct(&QQtilde);
      CapyMatDestruct(&reflector);
      CapyMatDestruct(&M);
      CapyMatDestruct(&Mp);
      CapyVecDestruct(&w);
      CapyVecDestruct(&v);
      raiseExc(CapyExc_QRDecompositionFailed);
      return;
    }
    double sign = (w.vals[0] >= 0.0 ? 1.0 : -1.0);
    w.vals[0] += sign * sqrt(norm);
    norm = 0.0;
    loop(i, dim) {
      v.vals[i] = w.vals[i];
      norm += v.vals[i] * v.vals[i];
    }
    norm = sqrt(norm);
    loop(i, dim) v.vals[i] /= norm;
    loop(i, reflector.nbCol) {
      loop(j, reflector.nbRow) {
        if(i >= k && j >= k) {
          reflector.vals[j * reflector.nbCol + i] =
            -2.0 * v.vals[i - k] * v.vals[j - k];
          if (i == j) reflector.vals[j * reflector.nbCol + i] += 1.0;
        } else if(i == j) reflector.vals[j * reflector.nbCol + i] = 1.0;
        else reflector.vals[j * reflector.nbCol + i] = 0.0;
      }
    }
    CapyMatProdMat(&reflector, &A, &M);
    loop(i, A.nbCol) {
      loop(j, A.nbRow) {
        A.vals[j * A.nbCol + i] = M.vals[j * M.nbCol + i];
      }
    }
    CapyMatProdMat(&QQtilde, &reflector, &Mp);
    loop(i, QQtilde.nbCol) {
      loop(j, QQtilde.nbRow) {
        QQtilde.vals[j * QQtilde.nbCol + i] = Mp.vals[j * Mp.nbCol + i];
      }
    }
  }

  // Extract R from the final A and Q from the final QQtilde
  loop(i, r->nbCol) {
    loop(j, r->nbRow) {
      r->vals[j * r->nbCol + i] = A.vals[j * A.nbCol + i];
    }
  }
  loop(i, q->nbCol) {
    loop(j, q->nbRow) {
      q->vals[j * q->nbCol + i] = QQtilde.vals[j * QQtilde.nbCol + i];
    }
  }

  // Free memory
  CapyMatDestruct(&A);
  CapyMatDestruct(&QQtilde);
  CapyMatDestruct(&reflector);
  CapyMatDestruct(&M);
  CapyMatDestruct(&Mp);
  CapyVecDestruct(&w);
  CapyVecDestruct(&v);
}

// Set a matrix to the identity.
// Input:
//   m: the matrix (may be rectangular)
void CapyMatSetToIdentity(CapyMat* const m) {
  loop(i, m->nbCol) {
    loop(j, m->nbRow) {
      if (i == j) m->vals[j * m->nbCol + i] = 1.0;
      else m->vals[j * m->nbCol + i] = 0.0;
    }
  }
}

// Copy a matrix to another.
// Input:
//    src: the matrix to be copied
//   dest: the matrix updated
void CapyMatCopy(
  CapyMat const* const src,
        CapyMat* const dest) {
  memcpy(dest->vals, src->vals, sizeof(src->vals[0]) * src->nbCol * src->nbRow);
}

// Get the moment of a column of a matrix
// Input:
//   u: the vector
//   iCol: the column index
//   c: the center of the moment
//   n: the order of the moment
// Output:
//   Return the moment (cf CapyVecGetMoment)
double CapyMatGetMomentCol(
  CapyMat const* const m,
          size_t const iCol,
          double const c,
         uint8_t const n) {
  CapyVec u = CapyVecCreate(m->nbRow);
  loop(iRow, m->nbRow) u.vals[iRow] = m->vals[iRow * m->nbCol + iCol];
  double moment = CapyVecGetMoment(&u, c, n);
  CapyVecDestruct(&u);
  return moment;
}

// Get the Eigen values and vectors of a matrix
// cf http://madrury.github.io/
// jekyll/update/statistics/2017/10/04/qr-algorithm.html
// Input:
//   m: the matrix (nbRow==nbCol)
//   eigenVal: the Eigen values (from largest to smallest in
//             absolute value, dim==m.nbCol)
//   eigenVec: the Eigen vectors (same order as Eigen values, same dimension as
//             m, one per column)
// Output:
//   eigenVec and eigenVal are updated with the result.
// Exception:
//   May raise CapyExc_QRDecompositionFailed, CapyExc_MallocFailed.
void CapyMatGetEigen(
  CapyMat const* const m,
        CapyVec* const eigenVal,
        CapyMat* const eigenVec) {

  // Variables to perform the calculation
  CapyMat A = CapyMatCreate(m->nbCol, m->nbRow);
  CapyMatCopy(m, &A);
  CapyMat Q = CapyMatCreate(m->nbCol, m->nbRow);
  CapyMat R = CapyMatCreate(m->nbRow, m->nbRow);
  CapyMat prod = CapyMatCreate(m->nbCol, m->nbRow);

  // Calculate the Eigen vectors and values
  CapyMatSetToIdentity(eigenVec);
  volatile double residual = 0.0;
  try {
    do {
      CapyMatGetQR(&A, &Q, &R);
      CapyMatProdMat(&R, &Q, &prod);
      CapyMatCopy(&prod, &A);
      CapyMatProdMat(eigenVec, &Q, &prod);
      CapyMatCopy(&prod, eigenVec);
      double newResidual = 0.0;
      loop(i, A.nbCol) {
        loop(j, A.nbRow) {
          if (i != j && newResidual < fabs(A.vals[j * A.nbCol + i])) {
            newResidual = fabs(A.vals[j * A.nbCol + i]);
          }
        }
      }
      if (equald(newResidual, residual)) newResidual = 0.0;
      residual = newResidual;
    } while(residual > DBL_EPSILON);
    loop(i, A.nbCol) eigenVal->vals[i] = A.vals[i * A.nbCol + i];
  } endCatch;

  // Free memory
  CapyMatDestruct(&A);
  CapyMatDestruct(&Q);
  CapyMatDestruct(&R);
  CapyMatDestruct(&prod);

  // Forward eventual exceptions
  CapyForwardExc();
}

// Calculate (a*b)%c using the square and multiply algorithm
// Input:
//   a,b,c: the value of a, b and c in (a*b)%c
// Ouput:
//   Return (a*b)%c
uint64_t CapyMultMod(
  uint64_t a,
  uint64_t b,
  uint64_t c) {
  uint64_t res = 0;
  a = a % c;
  while (b > 0) {
    if (b % 2 == 1) res = (res + a) % c;
    a = (a * 2) % c;
    b /= 2;
  }
  return res % c;
}

// Calculate (a^b)%c using the square and multiply algorithm
// Input:
//   a,b,c: the value of a, b and c in (a^b)%c
// Ouput:
//   Return (a^b)%c
uint64_t CapyPowMod(
  uint64_t a,
  uint64_t b,
  uint64_t c) {
  uint64_t res = 1;
  uint64_t mask = 0x8000000000000000;
  while(!(b & mask)) mask >>= 1;
  while(mask) {
    res = CapyMultMod(res, res, c);
    if(b & mask) {
      res = CapyMultMod(res, a, c);
    }
    mask >>= 1;
  }
  return res;
}

// Return the n-th Fibonacci number.
// Input:
//   n: the index of the element (starting from 0)
// Output:
//   Return the n-th Fibonnaci number.
uint64_t CapyGetFibonacciNumber(uint64_t const n) {
  double sqrt_5 = sqrt(5.0);
  uint64_t res =
    (uint64_t)round(
      ((5.0 + sqrt_5) * 0.1) *
      pow(((1.0 + sqrt_5) * 0.5), (double)n) +
      ((5.0 - sqrt_5) * 0.1) *
      pow(((1.0 - sqrt_5) * 0.5), (double)n));
  return res;
}

// Return the Fibonacci sequence up to the 'n'-th element.
// Input:
//   n: the Fibonacci number (starting from 0)
// Output:
//   Return the Fibonnaci sequence in newly allocated array of 'n' elements.
uint64_t* CapyFibonacciSeq(uint64_t const n) {
  uint64_t* seq = NULL;
  safeMalloc(seq, n + 1);
  if(!seq) return NULL;
  loop(i, n + 1) {
    if(i < 2) seq[i] = 1;
    else seq[i] = seq[i - 1] + seq[i - 2];
  }
  return seq;
}

// Return the index in the Fibonacci sequence of the smallest value greater
// or equal than a given value.
// Input:
//   val: the value
// Output:
//   Return the index in the Fibonnaci sequence. (i.e: CapyFibonacciIdx(10)=6)
uint64_t CapyFibonacciIdx(uint64_t const val) {
  uint64_t precVal[2] = {1, 1};
  if(val < 2) return 0;
  uint64_t idx = 2;
  uint64_t newVal = precVal[0] + precVal[1];
  while(newVal < val) {
    ++idx;
    precVal[0] = precVal[1];
    precVal[1] = newVal;
    newVal = precVal[0] + precVal[1];
  }
  return idx;
}

// Free a CapyFiboLattice
// Input:
//   that: the CapyFiboLattice to free
void CapyFiboLatticeDestruct(CapyFiboLattice* const that) {
  if(that == NULL) return;
  free(that->points);
}

// Return the Fibonacci grid lattice for the 'n'-th Fibonacci number
// Input:
//   n: the Fibonacci number
// Output:
//   Return the lattice.
CapyFiboLattice CapyFibonacciGridLattice(uint64_t const n) {

  // Declare the result lattice
  CapyFiboLattice lattice = {0};

  // Get the Fibonacci sequence
  uint64_t* seq = CapyFibonacciSeq(n);

  // Update the number of points
  lattice.nbPoints = seq[n];

  // Allocate memory for the result
  safeMalloc(lattice.points, 2 * (lattice.nbPoints));

  // Generate the lattice points
  if(lattice.points) loop(iPoint, lattice.nbPoints) {
    lattice.points[iPoint * 2] = fmod((double)iPoint / (double)seq[n], 1.0);
    double x = (double)iPoint * (double)seq[n > 0 ? n - 1 : 0] / (double)seq[n];
    lattice.points[iPoint * 2 + 1] = fmod(x, 1.0);
  }

  // Free memory
  free(seq);

  // Return the lattice
  return lattice;
}

// Return the Fibonacci polar lattice for the 'n'-th Fibonacci number
// Input:
//   n: the Fibonacci number
// Output:
//   Return the lattice.
CapyFiboLattice CapyFibonacciPolarLattice(uint64_t const n) {

  // Get the grid lattice
  CapyFiboLattice lattice = CapyFibonacciGridLattice(n);

  // Convert each points to polar coordinates
  loop(iPoint, lattice.nbPoints) {
    lattice.points[iPoint * 2] = sqrt(lattice.points[iPoint * 2]);
    lattice.points[iPoint * 2 + 1] =
      2.0 * M_PI * lattice.points[iPoint * 2 + 1];
  }

  // Return the lattice
  return lattice;
}

// Get the polar coordinates of n points uniformly distributed on a demi-sphere
// using the Fibonacci sequence
// Input:
//        n: the number of points
// Output:
//   Return the lattice.
//   points[2i] in [0,pi] and points[2i+1] in [0,2pi]
CapyFiboLattice CapyFibonacciDemiSphereLattice(size_t const n) {

  // Declare the result lattice
  CapyFiboLattice lattice = {0};

  // Allocate memory for the result
  lattice.nbPoints = n;
  safeMalloc(lattice.points, 2 * (lattice.nbPoints));

  // Calculate the points polar coordinates
  double deltaPhi = M_PI * (3.0 - sqrt(5.0));
  double phi = 0.0;
  double deltaZ = 1.0 / (double)n;
  double z = 1.0;
  if(lattice.points) loop(iPoint, n) {
    lattice.points[iPoint * 2] = acos(z);
    lattice.points[iPoint * 2 + 1] = phi;
    z -= deltaZ;
    phi += deltaPhi;
    if(phi >= 2.0 * M_PI) phi -= 2.0 * M_PI;
  }

  // Return the lattice
  return lattice;
}

// Get the polar coordinates of n points uniformly distributed on a sphere
// using the Fibonacci sequence
// Input:
//        n: the number of points
// Output:
//   Return the lattice.
//   points[2i] in [0,pi] and points[2i+1] in [0,2pi]
CapyFiboLattice CapyFibonacciSphereLattice(size_t const n) {

  // Declare the result lattice
  CapyFiboLattice lattice = {0};

  // Allocate memory for the result
  lattice.nbPoints = n;
  safeMalloc(lattice.points, 2 * (lattice.nbPoints));

  // Calculate the points polar coordinates
  double deltaPhi = M_PI * (3.0 - sqrt(5.0));
  double phi = 0.0;
  double deltaZ = 2.0 / (double)n;
  double z = 1.0;
  if(lattice.points) loop(iPoint, n) {
    lattice.points[iPoint * 2] = acos(z);
    lattice.points[iPoint * 2 + 1] = phi;
    z -= deltaZ;
    phi += deltaPhi;
    if(phi >= 2.0 * M_PI) phi -= 2.0 * M_PI;
  }

  // Return the lattice
  return lattice;
}

// Solve the quadratic equation a+bx+cx^2=0
// Input:
//   coeffs: the coefficients of the equation (in order a,b,...)
//    roots: array of size 2 to memorise the roots
// Output:
//   Return true and update 'roots' (sorted by increasing values) if there is a
//   solution, else return false and leave 'roots' unchanged. If there are less
//   roots than the maximum possible number, the smallest root is repeated to
//   fill in 'roots'.
bool CapySolveQuadratic(
  double const* const coeffs,
        double* const roots) {
  if(fabs(coeffs[2]) > DBL_EPSILON) {
    double det = coeffs[1] * coeffs[1] - 4.0 * coeffs[2] * coeffs[0];
    if(det < -DBL_EPSILON) return false;
    else if(det > DBL_EPSILON) {
      det = sqrt(det);
      double f = 1.0 / (2.0 * coeffs[2]);
      if(f > 0.0) {
        roots[0] = (-coeffs[1] - det) * f;
        roots[1] = (-coeffs[1] + det) * f;
      } else {
        roots[1] = (-coeffs[1] - det) * f;
        roots[0] = (-coeffs[1] + det) * f;
      }
    } else {
      roots[0] = roots[1] = -coeffs[1] / (2.0 * coeffs[2]);
    }
  } else if(fabs(coeffs[1]) > DBL_EPSILON) {
    roots[0] = roots[1] = -coeffs[0] / coeffs[1];
  } else if(fabs(coeffs[0]) < DBL_EPSILON) {
    roots[0] = roots[1] = 0.0;
  } else return false;
  return true;
}

// Solve the cubic equation a+bx+cx^2+d^3=0
// Input:
//   coeffs: the coefficients of the equation (in order a,b,...)
//    roots: array of size 3 to memorise the roots
// Output:
//   Return true and update 'roots' (sorted by increasing values) if there is a
//   solution, else return false and leave 'roots' unchanged. If there are less
//   roots than the maximum possible number, the smallest root is repeated to
//   fill in 'roots'.
bool CapySolveCubic(
  double const* const coeffs,
        double* const roots) {

  // Referring to https://github.com/erich666/GraphicsGems/blob/
  // master/gems/Roots3And4.c
  if(fabs(coeffs[3]) < DBL_EPSILON) {
    bool ret = CapySolveQuadratic(coeffs, roots + 1);
    if(ret) roots[0] = roots[1];
    return ret;
  } else {
    double ba = coeffs[2] / coeffs[3];
    double ca = coeffs[1] / coeffs[3];
    double da = coeffs[0] / coeffs[3];
    double ba2 = ba * ba;
    double p = (-ba2 / 3.0 + ca) / 3.0;
    double q = 0.5 * (2.0 / 27.0 * ba * ba2 - 1.0 / 3.0 * ba * ca + da);
    double p3 = p * p * p;
    double qqp3 = q * q + p3;
    if(fabs(qqp3) <= DBL_EPSILON) {
      if(fabs(q) < 1e-9) roots[0] = roots[1] = roots[2] = 0.0;
      else {
        double u = cbrt(-q);
        if(u < 0.0) {
          roots[0] = roots[1] = 2.0 * u;
          roots[2] = -u;
        } else {
          roots[0] = roots[1] = -u;
          roots[2] = 2.0 * u;
        }
      }
    } else if(qqp3 > DBL_EPSILON) {
      double sqqp3 = sqrt(qqp3);
      double u = cbrt(sqqp3 - q);
      double v = -cbrt(sqqp3 + q);
      roots[0] = roots[1] = roots[2] = u + v;
    } else {
      if(fabs(p3) < DBL_EPSILON) return false;
      double phi = acos(-q / sqrt(-p3)) / 3.0;
      double t = 2.0 * sqrt(-p);
      roots[0] = t * cos(phi);
      roots[1] = -t * cos(phi + M_PI / 3.0);
      roots[2] = -t * cos(phi - M_PI / 3.0);
      loop(k, 3) loop(j, k) if(roots[j] > roots[k]) {
        double tmp = roots[k];
        roots[k] = roots[j];
        roots[j] = tmp;
      }
      loop(k, 2) if(fabs(roots[k] - roots[k + 1]) < DBL_EPSILON) {
        loop(j, k) roots[k - j] = roots[k - j - 1];
      }
    }
    double sub = ba / 3.0;
    loop(i, 3) roots[i] -= sub;
  }
  return true;
}

// Solve the quartic equation a+bx+cx^2+dx^3+ex^4=0
// Input:
//   coeffs: the coefficients of the equation (in order a,b,...)
//    roots: array of size 4 to memorise the roots
// Output:
//   Return true and update 'roots' (sorted by increasing values) if there is a
//   solution, else return false and leave 'roots' unchanged. If there are less
//   roots than the maximum possible number, the smallest root is repeated to
//   fill in 'roots'.
bool CapySolveQuartic(
  double const* const coeffs,
        double* const roots) {

  // Referring to https://github.com/erich666/GraphicsGems/blob/
  // master/gems/Roots3And4.c
  if(fabs(coeffs[4]) < DBL_EPSILON) {
    bool ret = CapySolveCubic(coeffs, roots + 1);
    if(ret) roots[0] = roots[1];
    return ret;
  } else {
    double A = coeffs[3] / coeffs[4];
    double B = coeffs[2] / coeffs[4];
    double C = coeffs[1] / coeffs[4];
    double D = coeffs[0] / coeffs[4];
    double sq_A = A * A;
    double p = -3.0 / 8.0 * sq_A + B;
    double q = 0.125 * sq_A * A - 0.5 * A * B + C;
    double r =
      -3.0 / 256.0 * sq_A * sq_A + 0.0625 * sq_A * B - 0.25 * A * C + D;
    if(fabs(r) < DBL_EPSILON) {
      double c[4] = {q, p, 0.0, 1.0};
      bool ret = CapySolveCubic(c, roots + 1);
      if(ret == false) return false;
      else {
        roots[0] = roots[1];
        loop(k, 4) loop(j, k) if(roots[j] > roots[k]) {
          double tmp = roots[k];
          roots[k] = roots[j];
          roots[j] = tmp;
        }
        loop(k, 3) if(roots[k] == roots[k + 1]) {
          loop(j, k) roots[k - j] = roots[k - j - 1];
        }
      }
    } else {
      double cc[4] = {0.5 * r * p - 0.125 * q * q, -r, -.5 * p, 1.0};
      bool ret = CapySolveCubic(cc, roots + 1);
      if(ret == false) return false;
      roots[0] = roots[1];
      double z = roots[0];
      double u = z * z - r;
      if(fabs(u) < 1e-9) u = 0.0;
      else if(u < -1e-9) return false;
      else u = sqrt(u);
      double v = 2.0 * z - p;
      if(fabs(v) < 1e-9) v = 0.0;
      else if(v < -1e-9) return false;
      else v = sqrt(v);
      double cqa[3] = {z - u, (q < 0.0 ? -v: v), 1.0};
      bool retA = CapySolveQuadratic(cqa, roots);
      double cqb[3] = {z + u, (q < 0.0 ? v: -v), 1.0};
      bool retB = CapySolveQuadratic(cqb, roots + 2);
      if(!retA && !retB) return false;
      else {
        if(retA && !retB) {
          roots[3] = roots[1];
          roots[2] = roots[1] = roots[0];
        } else if(!retA && retB) {
          roots[0] = roots[1] = roots[2];
        } else {
          loop(k, 4) loop(j, k) if(roots[j] > roots[k]) {
            double tmp = roots[k];
            roots[k] = roots[j];
            roots[j] = tmp;
          }
          loop(k, 3) if(roots[k] == roots[k + 1]) {
            loop(j, k) roots[k - j] = roots[k - j - 1];
          }
        }
      }
    }
    double sub = 0.25 * A;
    loop(i, 4) roots[i] -= sub;
  }
  return true;
}

// Get the approximated inverse square root of a number using the Quake
// algorithm (cf https://en.wikipedia.org/wiki/Fast_inverse_square_root)
// Input:
//   x: the number
// Output:
//   Return 1/sqrt(x).
float CapyFastInverseSquareRoot(float x) {
  const float x2 = x * 0.5F;
  const float threehalfs = 1.5F;
  union {
    float f;
    uint32_t i;
  } conv = {.f = x};
  conv.i = 0x5f3759df - (conv.i >> 1);
  conv.f *= threehalfs - (x2 * conv.f * conv.f);
  return conv.f;
}

// Get the approximated exponential of a number using the Quake
// algorithm (cf https://specbranch.com/posts/fast-exp/)
// Input:
//   x: the number
// Output:
//   Return exp(x).
float CapyFastExponential(float x) {

  // Change based on desired error metric
  // Max relative error: 2.98%
  // uint32_t const K_TUNED_MINIMAX = 366393;
  // Mean relative error: 1.48%
  // uint32_t const K_TUNED_L1 = 545948;
  // Root mean squared relative error: 1.77%
  // uint32_t const K_TUNED_L2 = 486412;
  uint32_t const K = 366393;

  // Log2(e) * 2^(23) for e^x in a format with 23 mantissa bits
  float const BASE = 1.44269504089f * 8388608.0f;

  // Multiply by the base and convert to Q9.23 fixed point, rounding down
  float x_base = x * BASE;
  uint32_t i_base = (uint32_t)x_base;

  // Perform a piecewise linear approximation of 2^x
  // (127 << 23) is the exponent bias of 32-bit float and K is a tuned constant
  uint32_t bias = 127;
  union {uint32_t i; float f;} result;
  result.i = i_base + (bias << 23) - K;
  return result.f;
}

// Convert from degree to radians
// Input:
//   theta: the angle in degree
// Output:
//   Return the ange in radians.
double CapyDegToRad(double const theta) {
  return theta * 0.017453292519943295;
}

// Convert from radians to degree
// Input:
//   theta: the angle in radians
// Output:
//   Return the ange in degree.
double CapyRadToDeg(double const theta) {
  return theta * 57.29577951308232;
}

// ----- PIECEWISE GAUSSIAN -----

// Piecewise Gaussian evaluation
// Input:
//   x: argument of the Gaussian
//   gauss: the Gaussian
// Output:
//   Return the value of the piecewise Gaussian at the requested argument
double CapyPiecewiseGaussianEval(
  CapyPiecewiseGaussian const* const gauss,
                        double const x) {
  if(x < gauss->mean) {
    return gauss->base + gauss->amp * exp(
      -0.5 * (x - gauss->mean) * (x - gauss->mean) /
      (gauss->sig1 * gauss->sig1));
  } else {
    return gauss->base + gauss->amp * exp(
      -0.5 * (x - gauss->mean) * (x - gauss->mean) /
      (gauss->sig2 * gauss->sig2));
  }
}

// Ackley's function
// Input:
//   in: 2D input
//   out: 1D output
// Output:
//   'out' is updated. Cf https://en.wikipedia.org/wiki/Ackley_function
void CapyAckley(
  double const* const in,
        double* const out) {
  out[0] =
    -20.0 * exp(-0.2 * sqrt(0.5 * (in[0] * in[0] + in[1] * in[1]))) -
    exp(0.5 * (cos(2.0 * 3.1415926 * in[0]) + cos(2.0 * 3.1415926 * in[1]))) +
    20.0 + 2.71828;
}

// Himmelblau's function
// Input:
//   in: 2D input
//   out: 1D output
// Output:
//   'out' is updated. Cf https://en.wikipedia.org/wiki/Himmelblau%27s_function
void CapyHimmelblau(
  double const* const in,
        double* const out) {
  double a = in[0] * in[0] + in[1] - 11.0;
  double b = in[0] + in[1] * in[1] - 7.0;
  out[0] = a * a + b * b;
}

// Check if a position is inside an ellipse (aligned with cooridnate system)
// Input:
//   pos: the position to check
//   center: the center of the ellipse
//   dims: the dimensions of the ellise
// Output:
//   Return true if pos is inside the ellipse, false else
bool CapyIsInsideEllipse(
  double const* const pos,
  double const* const center,
  double const* const dims) {
  double v[2] = {0.0};
  loop(i, 2) {
    v[i] = (pos[i] - center[i]) * (pos[i] - center[i]) / (dims[i] * dims[i]);
  }
  return v[0] + v[1] <= 1.0;
}

// Calculate the value of a cubic Bezier curve
// Inputs:
//   t: argument of the function, in [0, 1]
//   params: the 4 control values of the Bezier
// Output:
//   Return the value of the Bezier
double CapyCubicBezierEval(
  double const t,
  double const params[4]) {
  double v[6] = {0.0};
  v[0] = CapyLerpNorm2Arr(t, params);
  v[1] = CapyLerpNorm2Arr(t, params + 1);
  v[2] = CapyLerpNorm2Arr(t, params + 2);
  v[3] = CapyLerpNorm2Arr(t, v);
  v[4] = CapyLerpNorm2Arr(t, v + 1);
  v[5] = CapyLerpNorm2Arr(t, v + 3);
  return v[5];
}

// Quadratic easing acceleration
// Input:
//   from: start value
//   to: end value
//   t: control (in [0,1])
// Output:
//   Return the eased value
double CapyEaseQuadraticAccel(
  double const from,
  double const to,
  double const t) {
  return from + t * t * (to - from);
}

// Quadratic easing deceleration
// Input:
//   from: start value
//   to: end value
//   t: control (in [0,1])
// Output:
//   Return the eased value
double CapyEaseQuadraticDecel(
  double const from,
  double const to,
  double const t) {
  return from - t * (t - 2.0) * (to - from);
}

// Quadratic easing acceleration first half deceleration second half
// Input:
//   from: start value
//   to: end value
//   t: control (in [0,1])
// Output:
//   Return the eased value
double CapyEaseQuadraticAccelDecel(
  double const from,
  double const to,
  double const t) {
  double const middle = 0.5 * (from + to);
  if(t < 0.5) return CapyEaseQuadraticAccel(from, middle, t * 2.0);
  else return CapyEaseQuadraticDecel(middle, to, (t - 0.5) * 2.0);
}

// Quadratic easing deceleration first hald acceleration second half
// Input:
//   from: start value
//   to: end value
//   t: control (in [0,1])
// Output:
//   Return the eased value
double CapyEaseQuadraticDecelAccel(
  double const from,
  double const to,
  double const t) {
  double const middle = 0.5 * (from + to);
  if(t < 0.5) return CapyEaseQuadraticDecel(from, middle, t * 2.0);
  else return CapyEaseQuadraticAccel(middle, to, (t - 0.5) * 2.0);
}

// Conversion from acre to square meter
// Input:
//   areaAcre: the area in acre
// Output:
//   Return the area in square meter
double CapyConvAcreSquareMeter(double const areaAcre) {
  return 4046.8564224 * areaAcre;
}

// Conversion from square meter to acre
// Input:
//   areaSquareMeter: the area in sqaure meter
// Output:
//   Return the area in acre
double CapyConvSquareMeterAcre(double const areaSquareMeter) {
  return areaSquareMeter / 4046.8564224;
}

// Flory decomposition (aka Volumetric-deviatoric decomposition) of a matrix
// Input:
//   mat: the matrix to decompose
//   vol: the volumetric matrix
//   dev: the deviatoric matrix
// Output:
//   'vol' and 'dev' are updated. 'vol' represent the change in volume
//   (identity is no change) and 'dev' represent the change in shape
//   (identity is no change). All matrices must be square.
void CapyFloryDecomposition(
  CapyMat const* const mat,
        CapyMat* const vol,
        CapyMat* const dev) {
  if(
    mat->dims[0] != mat->dims[1] ||
    vol->dims[0] != vol->dims[1] ||
    dev->dims[0] != dev->dims[1]
  ) {
    raiseExc(CapyExc_InvalidParameters);
  }
  double det = 0.0;
  CapyMatDet(mat, &det);
  CapyMatSetToIdentity(vol);
  double const c = pow(det, 1.0 / 3.0);
  loop(i, vol->dims[0]) vol->vals[vol->dims[0] * i + i] *= c;
  CapyMatCopy(mat, dev);
  loop(i, dev->dims[0] * dev->dims[1]) dev->vals[i] /= c;
}
