9#include "../eigen_compat.h"
10#include "../clusters.h"
42std::pair<double, double>
45 const Eigen::MatrixXd& w,
46 const Eigen::MatrixXd& residual,
47 const Eigen::VectorXd& x_centers,
48 const Eigen::VectorXd& x_scales,
53 double gradient = 0.0;
58 auto [k, j] = std::div(ind, p);
61 Eigen::VectorXd residual_v = residual.col(k);
62 Eigen::VectorXd w_v = w.col(k);
64 switch (jit_normalization) {
67 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
68 w_v.dot(residual_v) * x_centers(j)) /
71 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
72 std::pow(x_centers(j), 2) * w_v.sum()) /
73 (std::pow(x_scales(j), 2) * n);
78 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
79 w_v.dot(residual_v) * x_centers(j)) /
82 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
83 std::pow(x_centers(j), 2) * w_v.sum()) /
89 s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / (n * x_scales(j));
90 hessian = x.col(j).cwiseAbs2().dot(w_v) / (std::pow(x_scales(j), 2) * n);
94 gradient = s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / n;
95 hessian = x.col(j).cwiseAbs2().dot(w_v) / n;
99 return { gradient, hessian };
126std::pair<double, double>
129 const std::vector<int>& s,
131 const Eigen::MatrixXd& w,
132 const Eigen::MatrixXd& residual,
133 const Eigen::VectorXd& x_centers,
134 const Eigen::VectorXd& x_scales,
139 int m = residual.cols();
141 Eigen::MatrixXd x_s = Eigen::MatrixXd::Zero(n, m);
143 auto s_it = s.cbegin();
144 auto c_it = clusters.
cbegin(c_ind);
146 for (; c_it != clusters.
cend(c_ind); ++c_it, ++s_it) {
148 auto [k, j] = std::div(ind, p);
151 switch (jit_normalization) {
153 x_s.col(k) += x.col(j) * (s / x_scales(j));
154 x_s.col(k).array() -= x_centers(j) * s / x_scales(j);
158 x_s.col(k) += x.col(j) * s;
159 x_s.col(k).array() -= x_centers(j) * s;
163 x_s.col(k) += x.col(j) * (s / x_scales(j));
167 x_s.col(k) += x.col(j) * s;
175 for (
int k = 0; k < m; ++k) {
176 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
177 grad += x_s.col(k).cwiseProduct(w.col(k)).dot(residual.col(k)) / n;
180 return { hess, grad };
206std::pair<double, double>
209 const std::vector<int>& s,
211 const Eigen::MatrixXd& w,
212 const Eigen::MatrixXd& residual,
213 const Eigen::VectorXd& x_centers,
214 const Eigen::VectorXd& x_scales,
219 int m = residual.cols();
221 std::vector<Eigen::Triplet<double>> triplets;
223 Eigen::SparseMatrix<double> x_s(n, m);
224 Eigen::ArrayXd offset = Eigen::ArrayXd::Zero(m);
226 auto s_it = s.cbegin();
227 auto c_it = clusters.
cbegin(c_ind);
229 for (; c_it != clusters.
cend(c_ind); ++c_it, ++s_it) {
231 auto [k, j] = std::div(ind, p);
232 double s_ind = *s_it;
234 switch (jit_normalization) {
236 offset(k) += x_centers(j) * s_ind;
239 offset(k) += x_centers(j) * s_ind / x_scales(j);
249 for (
typename T::InnerIterator it(x.derived(), j); it; ++it) {
250 switch (jit_normalization) {
254 v = it.value() * s_ind;
260 v = it.value() * s_ind / x_scales(j);
264 triplets.emplace_back(it.row(), k, v);
268 x_s.setFromTriplets(triplets.begin(), triplets.end());
274 for (
int k = 0; k < m; ++k) {
275 Eigen::VectorXd weighted_residual = w.col(k).cwiseProduct(residual.col(k));
277 switch (jit_normalization) {
281 hess += (x_s.col(k).cwiseAbs2().dot(w.col(k)) -
282 2 * offset(k) * x_s.col(k).dot(w.col(k)) +
283 std::pow(offset(k), 2) * w.col(k).sum()) /
285 grad += x_s.col(k).dot(weighted_residual) / n -
286 offset(k) * weighted_residual.sum() / n;
292 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
293 grad += x_s.col(k).dot(weighted_residual) / n;
299 return { hess, grad };
333 Eigen::VectorXd& beta,
334 Eigen::MatrixXd& residual,
336 const Eigen::ArrayXd& lambda_cumsum,
338 const Eigen::MatrixXd& w,
339 const Eigen::VectorXd& x_centers,
340 const Eigen::VectorXd& x_scales,
341 const bool intercept,
343 const bool update_clusters,
345 const std::string& cd_type =
"cyclical")
347 using namespace Eigen;
349 const int n = x.rows();
350 const int p = x.cols();
351 const int m = residual.cols();
353 double max_abs_gradient = 0;
356 std::vector<int> indices;
357 indices.reserve(clusters.
size());
358 for (
int i = 0; i < clusters.
size(); ++i) {
359 if (clusters.
coeff(i) != 0) {
360 indices.push_back(i);
364 if (cd_type ==
"permuted") {
365 std::shuffle(indices.begin(), indices.end(), rng);
368 for (
int c_ind : indices) {
370 if (c_ind >= clusters.
size()) {
374 double c_old = clusters.
coeff(c_ind);
384 s.reserve(cluster_size);
386 for (
auto c_it = clusters.
cbegin(c_ind); c_it != clusters.
cend(c_ind);
389 assert(ind >= 0 && ind < beta.size() &&
"Invalid index in cluster");
390 double s_ind =
sign(beta(ind));
391 s.emplace_back(s_ind);
398 if (cluster_size == 1) {
399 int ind = *clusters.
cbegin(c_ind);
401 x, ind, w, residual, x_centers, x_scales, s[0], jit_normalization, n);
403 std::tie(hess, grad) =
415 max_abs_gradient = std::max(max_abs_gradient, std::abs(grad));
421 c_old - grad / hess, c_ind, lambda_cumsum / hess, clusters);
423 assert(c_tilde == 0 || new_index < clusters.
size());
424 assert(new_index >= 0 && new_index <= clusters.
size());
426 double c_diff = c_old - c_tilde;
429 auto s_it = s.cbegin();
430 auto c_it = clusters.
cbegin(c_ind);
431 for (; c_it != clusters.
cend(c_ind); ++c_it, ++s_it) {
433 auto [k, j] = std::div(ind, p);
434 double s_ind = *s_it;
437 beta(ind) = c_tilde * s_ind;
440 switch (jit_normalization) {
442 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
443 residual.col(k).array() +=
444 x_centers(j) * s_ind * c_diff / x_scales(j);
448 residual.col(k) -= x.col(j) * (s_ind * c_diff);
449 residual.col(k).array() += x_centers(j) * s_ind * c_diff;
453 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
457 residual.col(k) -= x.col(j) * (s_ind * c_diff);
463 if (update_clusters) {
464 clusters.
update(c_ind, new_index, std::abs(c_tilde));
466 clusters.
setCoeff(c_ind, std::abs(c_tilde));
471 for (
int k = 0; k < residual.cols(); ++k) {
472 double beta0_update = residual.col(k).dot(w.col(k)) / n;
473 residual.col(k).array() -= beta0_update;
474 beta0(k) -= beta0_update;
478 return max_abs_gradient;
Representation of the clusters in SLOPE.
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
std::size_t size()
Returns the number of clusters.
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Namespace containing SLOPE regression implementation.
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd &lambda_cumsum, const Clusters &clusters)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
int sign(T val)
Returns the sign of a given value.
std::pair< double, double > computeGradientAndHessian(const T &x, const int ind, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixBase< T > &x, const int c_ind, const std::vector< int > &s, const Clusters &clusters, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Eigen::Index nonZeros(const Eigen::SparseMatrixBase< Derived > &x)
The declaration of the slopeThreshold function.