9#include "../eigen_compat.h"
10#include "../clusters.h"
43std::pair<double, double>
46 const Eigen::MatrixXd& w,
47 const Eigen::MatrixXd& residual,
48 const Eigen::VectorXd& x_centers,
49 const Eigen::VectorXd& x_scales,
54 double gradient = 0.0;
59 auto [k, j] = std::div(ind, p);
62 Eigen::VectorXd residual_v = residual.col(k);
63 Eigen::VectorXd w_v = w.col(k);
65 switch (jit_normalization) {
68 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
69 w_v.dot(residual_v) * x_centers(j)) /
72 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
73 std::pow(x_centers(j), 2) * w_v.sum()) /
74 (std::pow(x_scales(j), 2) * n);
79 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
80 w_v.dot(residual_v) * x_centers(j)) /
83 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
84 std::pow(x_centers(j), 2) * w_v.sum()) /
90 s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / (n * x_scales(j));
91 hessian = x.col(j).cwiseAbs2().dot(w_v) / (std::pow(x_scales(j), 2) * n);
95 gradient = s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / n;
96 hessian = x.col(j).cwiseAbs2().dot(w_v) / n;
100 return { gradient, hessian };
127std::pair<double, double>
130 const std::vector<int>& s,
132 const Eigen::MatrixXd& w,
133 const Eigen::MatrixXd& residual,
134 const Eigen::VectorXd& x_centers,
135 const Eigen::VectorXd& x_scales,
140 int m = residual.cols();
142 Eigen::MatrixXd x_s = Eigen::MatrixXd::Zero(n, m);
144 auto s_it = s.cbegin();
145 auto c_it = clusters.
cbegin(c_ind);
147 for (; c_it != clusters.
cend(c_ind); ++c_it, ++s_it) {
149 auto [k, j] = std::div(ind, p);
152 switch (jit_normalization) {
154 x_s.col(k) += x.col(j) * (s / x_scales(j));
155 x_s.col(k).array() -= x_centers(j) * s / x_scales(j);
159 x_s.col(k) += x.col(j) * s;
160 x_s.col(k).array() -= x_centers(j) * s;
164 x_s.col(k) += x.col(j) * (s / x_scales(j));
168 x_s.col(k) += x.col(j) * s;
176 for (
int k = 0; k < m; ++k) {
177 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
178 grad += x_s.col(k).cwiseProduct(w.col(k)).dot(residual.col(k)) / n;
181 return { hess, grad };
207std::pair<double, double>
210 const std::vector<int>& s,
212 const Eigen::MatrixXd& w,
213 const Eigen::MatrixXd& residual,
214 const Eigen::VectorXd& x_centers,
215 const Eigen::VectorXd& x_scales,
220 int m = residual.cols();
222 std::vector<Eigen::Triplet<double>> triplets;
224 Eigen::SparseMatrix<double> x_s(n, m);
225 Eigen::ArrayXd offset = Eigen::ArrayXd::Zero(m);
227 auto s_it = s.cbegin();
228 auto c_it = clusters.
cbegin(c_ind);
230 for (; c_it != clusters.
cend(c_ind); ++c_it, ++s_it) {
232 auto [k, j] = std::div(ind, p);
233 double s_ind = *s_it;
235 switch (jit_normalization) {
237 offset(k) += x_centers(j) * s_ind;
240 offset(k) += x_centers(j) * s_ind / x_scales(j);
250 for (
typename T::InnerIterator it(x.derived(), j); it; ++it) {
251 switch (jit_normalization) {
255 v = it.value() * s_ind;
261 v = it.value() * s_ind / x_scales(j);
265 triplets.emplace_back(it.row(), k, v);
269 x_s.setFromTriplets(triplets.begin(), triplets.end());
275 for (
int k = 0; k < m; ++k) {
276 Eigen::VectorXd weighted_residual = w.col(k).cwiseProduct(residual.col(k));
278 switch (jit_normalization) {
282 hess += (x_s.col(k).cwiseAbs2().dot(w.col(k)) -
283 2 * offset(k) * x_s.col(k).dot(w.col(k)) +
284 std::pow(offset(k), 2) * w.col(k).sum()) /
286 grad += x_s.col(k).dot(weighted_residual) / n -
287 offset(k) * weighted_residual.sum() / n;
293 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
294 grad += x_s.col(k).dot(weighted_residual) / n;
300 return { hess, grad };
334 Eigen::VectorXd& beta,
335 Eigen::MatrixXd& residual,
337 const Eigen::ArrayXd& lambda_cumsum,
339 const Eigen::MatrixXd& w,
340 const Eigen::VectorXd& x_centers,
341 const Eigen::VectorXd& x_scales,
342 const bool intercept,
344 const bool update_clusters,
346 const std::string& cd_type =
"cyclical")
348 using namespace Eigen;
350 const int n = x.rows();
351 const int p = x.cols();
352 const int m = residual.cols();
354 double max_abs_gradient = 0;
357 std::vector<int> indices;
358 indices.reserve(clusters.
size());
359 for (
int i = 0; i < clusters.
size(); ++i) {
360 if (clusters.
coeff(i) != 0) {
361 indices.push_back(i);
365 if (cd_type ==
"permuted") {
366 std::shuffle(indices.begin(), indices.end(), rng);
369 for (
int c_ind : indices) {
371 if (c_ind >= clusters.
size()) {
375 double c_old = clusters.
coeff(c_ind);
385 s.reserve(cluster_size);
387 for (
auto c_it = clusters.
cbegin(c_ind); c_it != clusters.
cend(c_ind);
390 assert(ind >= 0 && ind < beta.size() &&
"Invalid index in cluster");
391 double s_ind =
sign(beta(ind));
392 s.emplace_back(s_ind);
399 if (cluster_size == 1) {
400 int ind = *clusters.
cbegin(c_ind);
402 x, ind, w, residual, x_centers, x_scales, s[0], jit_normalization, n);
404 std::tie(hess, grad) =
416 max_abs_gradient = std::max(max_abs_gradient, std::abs(grad));
422 c_old - grad / hess, c_ind, lambda_cumsum / hess, clusters);
424 assert(c_tilde == 0 || new_index < clusters.
size());
425 assert(new_index >= 0 && new_index <= clusters.
size());
427 double c_diff = c_old - c_tilde;
430 auto s_it = s.cbegin();
431 auto c_it = clusters.
cbegin(c_ind);
432 for (; c_it != clusters.
cend(c_ind); ++c_it, ++s_it) {
434 auto [k, j] = std::div(ind, p);
435 double s_ind = *s_it;
438 beta(ind) = c_tilde * s_ind;
441 switch (jit_normalization) {
443 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
444 residual.col(k).array() +=
445 x_centers(j) * s_ind * c_diff / x_scales(j);
449 residual.col(k) -= x.col(j) * (s_ind * c_diff);
450 residual.col(k).array() += x_centers(j) * s_ind * c_diff;
454 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
458 residual.col(k) -= x.col(j) * (s_ind * c_diff);
464 if (update_clusters) {
465 clusters.
update(c_ind, new_index, std::abs(c_tilde));
467 clusters.
setCoeff(c_ind, std::abs(c_tilde));
472 for (
int k = 0; k < residual.cols(); ++k) {
473 double beta0_update = residual.col(k).dot(w.col(k)) / n;
474 residual.col(k).array() -= beta0_update;
475 beta0(k) -= beta0_update;
479 return max_abs_gradient;
Representation of the clusters in SLOPE.
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
std::size_t size()
Returns the number of clusters.
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Namespace containing SLOPE regression implementation.
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd &lambda_cumsum, const Clusters &clusters)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
int sign(T val)
Returns the sign of a given value.
std::pair< double, double > computeGradientAndHessian(const T &x, const int ind, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixBase< T > &x, const int c_ind, const std::vector< int > &s, const Clusters &clusters, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Eigen::Index nonZeros(const Eigen::SparseMatrixBase< Derived > &x)
The declaration of the slopeThreshold function.