slope 6.2.1
Loading...
Searching...
No Matches
hybrid_cd.h
Go to the documentation of this file.
1
7#pragma once
8
9#include "../eigen_compat.h"
10#include "../clusters.h"
11#include "../math.h"
12#include "slope_threshold.h"
13#include <Eigen/Core>
14#include <random>
15#include <vector>
16
17namespace slope {
18
41template<typename T>
42std::pair<double, double>
44 const int ind,
45 const Eigen::MatrixXd& w,
46 const Eigen::MatrixXd& residual,
47 const Eigen::VectorXd& x_centers,
48 const Eigen::VectorXd& x_scales,
49 const double s,
50 const JitNormalization jit_normalization,
51 const int n)
52{
53 double gradient = 0.0;
54 double hessian = 0.0;
55
56 int p = x.cols();
57
58 auto [k, j] = std::div(ind, p);
59
60 // TODO: Avoid these copies
61 Eigen::VectorXd residual_v = residual.col(k);
62 Eigen::VectorXd w_v = w.col(k);
63
64 switch (jit_normalization) {
66 gradient = s *
67 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
68 w_v.dot(residual_v) * x_centers(j)) /
69 (n * x_scales(j));
70 hessian =
71 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
72 std::pow(x_centers(j), 2) * w_v.sum()) /
73 (std::pow(x_scales(j), 2) * n);
74 break;
75
77 gradient = s *
78 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
79 w_v.dot(residual_v) * x_centers(j)) /
80 n;
81 hessian =
82 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
83 std::pow(x_centers(j), 2) * w_v.sum()) /
84 n;
85 break;
86
88 gradient =
89 s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / (n * x_scales(j));
90 hessian = x.col(j).cwiseAbs2().dot(w_v) / (std::pow(x_scales(j), 2) * n);
91 break;
92
94 gradient = s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / n;
95 hessian = x.col(j).cwiseAbs2().dot(w_v) / n;
96 break;
97 }
98
99 return { gradient, hessian };
100}
101
125template<typename T>
126std::pair<double, double>
127computeClusterGradientAndHessian(const Eigen::MatrixBase<T>& x,
128 const int c_ind,
129 const std::vector<int>& s,
130 const Clusters& clusters,
131 const Eigen::MatrixXd& w,
132 const Eigen::MatrixXd& residual,
133 const Eigen::VectorXd& x_centers,
134 const Eigen::VectorXd& x_scales,
135 const JitNormalization jit_normalization)
136{
137 int n = x.rows();
138 int p = x.cols();
139 int m = residual.cols();
140
141 Eigen::MatrixXd x_s = Eigen::MatrixXd::Zero(n, m);
142
143 auto s_it = s.cbegin();
144 auto c_it = clusters.cbegin(c_ind);
145
146 for (; c_it != clusters.cend(c_ind); ++c_it, ++s_it) {
147 int ind = *c_it;
148 auto [k, j] = std::div(ind, p);
149 double s = *s_it;
150
151 switch (jit_normalization) {
153 x_s.col(k) += x.col(j) * (s / x_scales(j));
154 x_s.col(k).array() -= x_centers(j) * s / x_scales(j);
155 break;
156
158 x_s.col(k) += x.col(j) * s;
159 x_s.col(k).array() -= x_centers(j) * s;
160 break;
161
163 x_s.col(k) += x.col(j) * (s / x_scales(j));
164 break;
165
167 x_s.col(k) += x.col(j) * s;
168 break;
169 }
170 }
171
172 double hess = 0;
173 double grad = 0;
174
175 for (int k = 0; k < m; ++k) {
176 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
177 grad += x_s.col(k).cwiseProduct(w.col(k)).dot(residual.col(k)) / n;
178 }
179
180 return { hess, grad };
181}
182
205template<typename T>
206std::pair<double, double>
207computeClusterGradientAndHessian(const Eigen::SparseMatrixBase<T>& x,
208 const int c_ind,
209 const std::vector<int>& s,
210 const Clusters& clusters,
211 const Eigen::MatrixXd& w,
212 const Eigen::MatrixXd& residual,
213 const Eigen::VectorXd& x_centers,
214 const Eigen::VectorXd& x_scales,
215 const JitNormalization jit_normalization)
216{
217 int n = x.rows();
218 int p = x.cols();
219 int m = residual.cols();
220
221 std::vector<Eigen::Triplet<double>> triplets;
222
223 Eigen::SparseMatrix<double> x_s(n, m);
224 Eigen::ArrayXd offset = Eigen::ArrayXd::Zero(m);
225
226 auto s_it = s.cbegin();
227 auto c_it = clusters.cbegin(c_ind);
228
229 for (; c_it != clusters.cend(c_ind); ++c_it, ++s_it) {
230 int ind = *c_it;
231 auto [k, j] = std::div(ind, p);
232 double s_ind = *s_it;
233
234 switch (jit_normalization) {
236 offset(k) += x_centers(j) * s_ind;
237 break;
239 offset(k) += x_centers(j) * s_ind / x_scales(j);
240 break;
242 break;
244 break;
245 }
246
247 double v = 0;
248
249 for (typename T::InnerIterator it(x.derived(), j); it; ++it) {
250 switch (jit_normalization) {
252 [[fallthrough]];
254 v = it.value() * s_ind;
255 break;
256
258 [[fallthrough]];
260 v = it.value() * s_ind / x_scales(j);
261 break;
262 }
263
264 triplets.emplace_back(it.row(), k, v);
265 }
266 }
267
268 x_s.setFromTriplets(triplets.begin(), triplets.end());
269 assert(slope::nonZeros(x_s) > 0);
270
271 double hess = 0;
272 double grad = 0;
273
274 for (int k = 0; k < m; ++k) {
275 Eigen::VectorXd weighted_residual = w.col(k).cwiseProduct(residual.col(k));
276
277 switch (jit_normalization) {
279 [[fallthrough]];
281 hess += (x_s.col(k).cwiseAbs2().dot(w.col(k)) -
282 2 * offset(k) * x_s.col(k).dot(w.col(k)) +
283 std::pow(offset(k), 2) * w.col(k).sum()) /
284 n;
285 grad += x_s.col(k).dot(weighted_residual) / n -
286 offset(k) * weighted_residual.sum() / n;
287 break;
288
290 [[fallthrough]];
292 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
293 grad += x_s.col(k).dot(weighted_residual) / n;
294
295 break;
296 }
297 }
298
299 return { hess, grad };
300}
301
330template<typename T>
331double
332coordinateDescent(Eigen::VectorXd& beta0,
333 Eigen::VectorXd& beta,
334 Eigen::MatrixXd& residual,
335 Clusters& clusters,
336 const Eigen::ArrayXd& lambda_cumsum,
337 const T& x,
338 const Eigen::MatrixXd& w,
339 const Eigen::VectorXd& x_centers,
340 const Eigen::VectorXd& x_scales,
341 const bool intercept,
342 const JitNormalization jit_normalization,
343 const bool update_clusters,
344 std::mt19937& rng,
345 const std::string& cd_type = "cyclical")
346{
347 using namespace Eigen;
348
349 const int n = x.rows();
350 const int p = x.cols();
351 const int m = residual.cols();
352
353 double max_abs_gradient = 0;
354
355 // Create a vector of indices to process
356 std::vector<int> indices;
357 indices.reserve(clusters.size());
358 for (int i = 0; i < clusters.size(); ++i) {
359 if (clusters.coeff(i) != 0) { // Skip zero cluster
360 indices.push_back(i);
361 }
362 }
363
364 if (cd_type == "permuted") {
365 std::shuffle(indices.begin(), indices.end(), rng);
366 }
367
368 for (int c_ind : indices) {
369 // Skip if index is no longer valid due to cluster updates
370 if (c_ind >= clusters.size()) {
371 continue;
372 }
373
374 double c_old = clusters.coeff(c_ind);
375
376 if (c_old == 0) {
377 // We do not update the zero cluster because it can be very large, but
378 // often does not change.
379 continue;
380 }
381
382 int cluster_size = clusters.cluster_size(c_ind);
383 std::vector<int> s;
384 s.reserve(cluster_size);
385
386 for (auto c_it = clusters.cbegin(c_ind); c_it != clusters.cend(c_ind);
387 ++c_it) {
388 int ind = *c_it;
389 assert(ind >= 0 && ind < beta.size() && "Invalid index in cluster");
390 double s_ind = sign(beta(ind));
391 s.emplace_back(s_ind);
392 }
393
394 double hess = 1;
395 double grad = 0;
396 VectorXd x_s(n);
397
398 if (cluster_size == 1) {
399 int ind = *clusters.cbegin(c_ind);
400 std::tie(grad, hess) = computeGradientAndHessian(
401 x, ind, w, residual, x_centers, x_scales, s[0], jit_normalization, n);
402 } else {
403 std::tie(hess, grad) =
405 c_ind,
406 s,
407 clusters,
408 w,
409 residual,
410 x_centers,
411 x_scales,
412 jit_normalization);
413 }
414
415 max_abs_gradient = std::max(max_abs_gradient, std::abs(grad));
416
417 double c_tilde;
418 int new_index;
419
420 std::tie(c_tilde, new_index) = slopeThreshold(
421 c_old - grad / hess, c_ind, lambda_cumsum / hess, clusters);
422
423 assert(c_tilde == 0 || new_index < clusters.size());
424 assert(new_index >= 0 && new_index <= clusters.size());
425
426 double c_diff = c_old - c_tilde;
427
428 if (c_diff != 0) {
429 auto s_it = s.cbegin();
430 auto c_it = clusters.cbegin(c_ind);
431 for (; c_it != clusters.cend(c_ind); ++c_it, ++s_it) {
432 int ind = *c_it;
433 auto [k, j] = std::div(ind, p);
434 double s_ind = *s_it;
435
436 // Update coefficient
437 beta(ind) = c_tilde * s_ind;
438
439 // Update residual
440 switch (jit_normalization) {
442 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
443 residual.col(k).array() +=
444 x_centers(j) * s_ind * c_diff / x_scales(j);
445 break;
446
448 residual.col(k) -= x.col(j) * (s_ind * c_diff);
449 residual.col(k).array() += x_centers(j) * s_ind * c_diff;
450 break;
451
453 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
454 break;
455
457 residual.col(k) -= x.col(j) * (s_ind * c_diff);
458 break;
459 }
460 }
461 }
462
463 if (update_clusters) {
464 clusters.update(c_ind, new_index, std::abs(c_tilde));
465 } else {
466 clusters.setCoeff(c_ind, std::abs(c_tilde));
467 }
468 }
469
470 if (intercept) {
471 for (int k = 0; k < residual.cols(); ++k) {
472 double beta0_update = residual.col(k).dot(w.col(k)) / n;
473 residual.col(k).array() -= beta0_update;
474 beta0(k) -= beta0_update;
475 }
476 }
477
478 return max_abs_gradient;
479}
480
481} // namespace slope
Representation of the clusters in SLOPE.
Definition clusters.h:18
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
std::size_t size()
Returns the number of clusters.
Definition clusters.h:35
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd &lambda_cumsum, const Clusters &clusters)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
Definition hybrid_cd.h:332
int sign(T val)
Returns the sign of a given value.
Definition math.h:37
std::pair< double, double > computeGradientAndHessian(const T &x, const int ind, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
Definition hybrid_cd.h:43
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixBase< T > &x, const int c_ind, const std::vector< int > &s, const Clusters &clusters, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Definition hybrid_cd.h:127
Eigen::Index nonZeros(const Eigen::SparseMatrixBase< Derived > &x)
Definition utils.h:33
The declaration of the slopeThreshold function.