libslope/hybrid__cd_8h_source.html

#pragma once


#include "../clusters.h"

#include "../math.h"

#include "slope_threshold.h"

#include <Eigen/Core>

#include <iostream>

#include <vector>


namespace slope {


template<typename T>

std::pair<double, double>


computeGradientAndHessian(const T& x,

                          const int k,

                          const Eigen::VectorXd& w,

                          const Eigen::VectorXd& residual,

                          const Eigen::VectorXd& x_centers,

                          const Eigen::VectorXd& x_scales,

                          const double s,

                          const JitNormalization jit_normalization,

                          const int n)

{

  double gradient = 0.0;

  double hessian = 0.0;


  switch (jit_normalization) {

    case JitNormalization::Both:

      gradient = s *

                 (x.col(k).cwiseProduct(w).dot(residual) -

                  w.dot(residual) * x_centers(k)) /

                 (n * x_scales(k));

      hessian =

        (x.col(k).cwiseAbs2().dot(w) - 2 * x_centers(k) * x.col(k).dot(w) +

         std::pow(x_centers(k), 2) * w.sum()) /

        (std::pow(x_scales(k), 2) * n);

      break;


    case JitNormalization::Center:

      gradient = s *

                 (x.col(k).cwiseProduct(w).dot(residual) -

                  w.dot(residual) * x_centers(k)) /

                 n;

      hessian =

        (x.col(k).cwiseAbs2().dot(w) - 2 * x_centers(k) * x.col(k).dot(w) +

         std::pow(x_centers(k), 2) * w.sum()) /

        n;

      break;


    case JitNormalization::Scale:

      gradient =

        s * (x.col(k).cwiseProduct(w).dot(residual)) / (n * x_scales(k));

      hessian = x.col(k).cwiseAbs2().dot(w) / (std::pow(x_scales(k), 2) * n);

      break;


    case JitNormalization::None:

      gradient = s * (x.col(k).cwiseProduct(w).dot(residual)) / n;

      hessian = x.col(k).cwiseAbs2().dot(w) / n;

      break;

  }


  return { gradient, hessian };

}


std::pair<double, double>

computeClusterGradientAndHessian(const Eigen::MatrixXd& x,

                                 const int j,

                                 const std::vector<int>& s,

                                 const Clusters& clusters,

                                 const Eigen::VectorXd& w,

                                 const Eigen::VectorXd& residual,

                                 const Eigen::VectorXd& x_centers,

                                 const Eigen::VectorXd& x_scales,

                                 const JitNormalization jit_normalization);


std::pair<double, double>

computeClusterGradientAndHessian(const Eigen::SparseMatrix<double>& x,

                                 const int j,

                                 const std::vector<int>& s,

                                 const Clusters& clusters,

                                 const Eigen::VectorXd& w,

                                 const Eigen::VectorXd& residual,

                                 const Eigen::VectorXd& x_centers,

                                 const Eigen::VectorXd& x_scales,

                                 const JitNormalization jit_normalization);


template<typename T>

double


coordinateDescent(Eigen::VectorXd& beta0,

                  Eigen::VectorXd& beta,

                  Eigen::VectorXd& residual,

                  Clusters& clusters,

                  const Eigen::ArrayXd& lambda,

                  const T& x,

                  const Eigen::VectorXd& w,

                  const Eigen::VectorXd& x_centers,

                  const Eigen::VectorXd& x_scales,

                  const bool intercept,

                  const JitNormalization jit_normalization,

                  const bool update_clusters)

{

  using namespace Eigen;


  const int n = x.rows();


  double max_abs_gradient = 0;


  for (int j = 0; j < clusters.n_clusters(); ++j) {

    double c_old = clusters.coeff(j);


    if (c_old == 0) {

      // We do not update the zero cluster because it can be very large, but

      // often does not change.

      continue;

    }


    int cluster_size = clusters.cluster_size(j);

    std::vector<int> s;

    s.reserve(cluster_size);


    for (auto c_it = clusters.cbegin(j); c_it != clusters.cend(j); ++c_it) {

      double s_k = sign(beta(*c_it));

      s.emplace_back(s_k);

    }


    double hessian_j = 1;

    double gradient_j = 0;

    VectorXd x_s(n);


    if (cluster_size == 1) {

      int k = *clusters.cbegin(j);

      std::tie(gradient_j, hessian_j) = computeGradientAndHessian(

        x, k, w, residual, x_centers, x_scales, s[0], jit_normalization, n);

    } else {

      std::tie(hessian_j, gradient_j) = computeClusterGradientAndHessian(

        x, j, s, clusters, w, residual, x_centers, x_scales, jit_normalization);

    }


    max_abs_gradient = std::max(max_abs_gradient, std::abs(gradient_j));


    double c_tilde;

    int new_index;


    if (lambda(0) == 0) {

      // No regularization

      c_tilde = c_old - gradient_j / hessian_j;

      new_index = j;

    } else {

      std::tie(c_tilde, new_index) = slopeThreshold(

        c_old - gradient_j / hessian_j, j, lambda / hessian_j, clusters);

    }


    double c_diff = c_old - c_tilde;


    if (c_diff != 0) {

      auto s_it = s.cbegin();

      auto c_it = clusters.cbegin(j);

      for (; c_it != clusters.cend(j); ++c_it, ++s_it) {

        int k = *c_it;

        double s_k = *s_it;


        // Update coefficient

        beta(k) = c_tilde * s_k;


        // Update residual

        switch (jit_normalization) {

          case JitNormalization::Both:

            residual -= x.col(k) * (s_k * c_diff / x_scales(k));

            residual.array() += x_centers(k) * s_k * c_diff / x_scales(k);

            break;


          case JitNormalization::Center:

            residual -= x.col(k) * (s_k * c_diff);

            residual.array() += x_centers(k) * s_k * c_diff;

            break;


          case JitNormalization::Scale:

            residual -= x.col(k) * (s_k * c_diff / x_scales(k));

            break;


          case JitNormalization::None:

            residual -= x.col(k) * (s_k * c_diff);

            break;

        }

      }

    }


    if (update_clusters) {

      clusters.update(j, new_index, std::abs(c_tilde));

    } else {

      clusters.setCoeff(j, std::abs(c_tilde));

    }

  }


  if (intercept) {

    double beta0_update = residual.dot(w) / n;

    residual.array() -= beta0_update;

    beta0(0) -= beta0_update;

  }


  return max_abs_gradient;

}


} // namespace slope

slope::Clusters
Representation of the clusters in SLOPE.
Definition clusters.h:18

slope::Clusters::update
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
Definition clusters.cpp:164

slope::Clusters::coeff
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
Definition clusters.cpp:117

slope::Clusters::cluster_size
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
Definition clusters.cpp:78

slope::Clusters::setCoeff
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
Definition clusters.cpp:127

slope::Clusters::n_clusters
int n_clusters() const
Returns the number of clusters.
Definition clusters.cpp:103

slope::Clusters::cend
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
Definition clusters.cpp:48

slope::Clusters::cbegin
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Definition clusters.cpp:38

slope
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5

slope::coordinateDescent
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::VectorXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda, const T &x, const Eigen::VectorXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters)
Definition hybrid_cd.h:188

slope::sign
int sign(T val)
Returns the sign of a given value.
Definition math.h:35

slope::computeClusterGradientAndHessian
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixXd &x, const int j, const std::vector< int > &s, const Clusters &clusters, const Eigen::VectorXd &w, const Eigen::VectorXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Definition hybrid_cd.cpp:6

slope::JitNormalization
JitNormalization
Enums to control predictor standardization behavior.
Definition jit_normalization.h:13

slope::JitNormalization::Both
@ Both
Both.

slope::JitNormalization::Center
@ Center
Center JIT.

slope::JitNormalization::None
@ None
No JIT normalization.

slope::JitNormalization::Scale
@ Scale
Scale JIT.

slope::slopeThreshold
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd lambdas, const Clusters &clusters)
Definition slope_threshold.cpp:8

slope::computeGradientAndHessian
std::pair< double, double > computeGradientAndHessian(const T &x, const int k, const Eigen::VectorXd &w, const Eigen::VectorXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
Definition hybrid_cd.h:42

slope_threshold.h
The declaration of the slopeThreshold function.