slope 6.5.0
Loading...
Searching...
No Matches
hybrid_cd.h
Go to the documentation of this file.
1
7#pragma once
8
9#include "../eigen_compat.h"
10#include "../clusters.h"
11#include "../math.h"
12#include "slope_threshold.h"
13#include <Eigen/Core>
14#include <cassert>
15#include <random>
16#include <vector>
17
18namespace slope {
19
42template<typename T>
43std::pair<double, double>
45 const int ind,
46 const Eigen::MatrixXd& w,
47 const Eigen::MatrixXd& residual,
48 const Eigen::VectorXd& x_centers,
49 const Eigen::VectorXd& x_scales,
50 const double s,
51 const JitNormalization jit_normalization,
52 const int n)
53{
54 double gradient = 0.0;
55 double hessian = 0.0;
56
57 int p = x.cols();
58
59 auto [k, j] = std::div(ind, p);
60
61 // TODO: Avoid these copies
62 Eigen::VectorXd residual_v = residual.col(k);
63 Eigen::VectorXd w_v = w.col(k);
64
65 switch (jit_normalization) {
67 gradient = s *
68 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
69 w_v.dot(residual_v) * x_centers(j)) /
70 (n * x_scales(j));
71 hessian =
72 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
73 std::pow(x_centers(j), 2) * w_v.sum()) /
74 (std::pow(x_scales(j), 2) * n);
75 break;
76
78 gradient = s *
79 (x.col(j).cwiseProduct(w_v).dot(residual_v) -
80 w_v.dot(residual_v) * x_centers(j)) /
81 n;
82 hessian =
83 (x.col(j).cwiseAbs2().dot(w_v) - 2 * x_centers(j) * x.col(j).dot(w_v) +
84 std::pow(x_centers(j), 2) * w_v.sum()) /
85 n;
86 break;
87
89 gradient =
90 s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / (n * x_scales(j));
91 hessian = x.col(j).cwiseAbs2().dot(w_v) / (std::pow(x_scales(j), 2) * n);
92 break;
93
95 gradient = s * (x.col(j).cwiseProduct(w_v).dot(residual_v)) / n;
96 hessian = x.col(j).cwiseAbs2().dot(w_v) / n;
97 break;
98 }
99
100 return { gradient, hessian };
101}
102
126template<typename T>
127std::pair<double, double>
128computeClusterGradientAndHessian(const Eigen::MatrixBase<T>& x,
129 const int c_ind,
130 const std::vector<int>& s,
131 const Clusters& clusters,
132 const Eigen::MatrixXd& w,
133 const Eigen::MatrixXd& residual,
134 const Eigen::VectorXd& x_centers,
135 const Eigen::VectorXd& x_scales,
136 const JitNormalization jit_normalization)
137{
138 int n = x.rows();
139 int p = x.cols();
140 int m = residual.cols();
141
142 Eigen::MatrixXd x_s = Eigen::MatrixXd::Zero(n, m);
143
144 auto s_it = s.cbegin();
145 auto c_it = clusters.cbegin(c_ind);
146
147 for (; c_it != clusters.cend(c_ind); ++c_it, ++s_it) {
148 int ind = *c_it;
149 auto [k, j] = std::div(ind, p);
150 double s = *s_it;
151
152 switch (jit_normalization) {
154 x_s.col(k) += x.col(j) * (s / x_scales(j));
155 x_s.col(k).array() -= x_centers(j) * s / x_scales(j);
156 break;
157
159 x_s.col(k) += x.col(j) * s;
160 x_s.col(k).array() -= x_centers(j) * s;
161 break;
162
164 x_s.col(k) += x.col(j) * (s / x_scales(j));
165 break;
166
168 x_s.col(k) += x.col(j) * s;
169 break;
170 }
171 }
172
173 double hess = 0;
174 double grad = 0;
175
176 for (int k = 0; k < m; ++k) {
177 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
178 grad += x_s.col(k).cwiseProduct(w.col(k)).dot(residual.col(k)) / n;
179 }
180
181 return { hess, grad };
182}
183
206template<typename T>
207std::pair<double, double>
208computeClusterGradientAndHessian(const Eigen::SparseMatrixBase<T>& x,
209 const int c_ind,
210 const std::vector<int>& s,
211 const Clusters& clusters,
212 const Eigen::MatrixXd& w,
213 const Eigen::MatrixXd& residual,
214 const Eigen::VectorXd& x_centers,
215 const Eigen::VectorXd& x_scales,
216 const JitNormalization jit_normalization)
217{
218 int n = x.rows();
219 int p = x.cols();
220 int m = residual.cols();
221
222 std::vector<Eigen::Triplet<double>> triplets;
223
224 Eigen::SparseMatrix<double> x_s(n, m);
225 Eigen::ArrayXd offset = Eigen::ArrayXd::Zero(m);
226
227 auto s_it = s.cbegin();
228 auto c_it = clusters.cbegin(c_ind);
229
230 for (; c_it != clusters.cend(c_ind); ++c_it, ++s_it) {
231 int ind = *c_it;
232 auto [k, j] = std::div(ind, p);
233 double s_ind = *s_it;
234
235 switch (jit_normalization) {
237 offset(k) += x_centers(j) * s_ind;
238 break;
240 offset(k) += x_centers(j) * s_ind / x_scales(j);
241 break;
243 break;
245 break;
246 }
247
248 double v = 0;
249
250 for (typename T::InnerIterator it(x.derived(), j); it; ++it) {
251 switch (jit_normalization) {
253 [[fallthrough]];
255 v = it.value() * s_ind;
256 break;
257
259 [[fallthrough]];
261 v = it.value() * s_ind / x_scales(j);
262 break;
263 }
264
265 triplets.emplace_back(it.row(), k, v);
266 }
267 }
268
269 x_s.setFromTriplets(triplets.begin(), triplets.end());
270 assert(slope::nonZeros(x_s) > 0);
271
272 double hess = 0;
273 double grad = 0;
274
275 for (int k = 0; k < m; ++k) {
276 Eigen::VectorXd weighted_residual = w.col(k).cwiseProduct(residual.col(k));
277
278 switch (jit_normalization) {
280 [[fallthrough]];
282 hess += (x_s.col(k).cwiseAbs2().dot(w.col(k)) -
283 2 * offset(k) * x_s.col(k).dot(w.col(k)) +
284 std::pow(offset(k), 2) * w.col(k).sum()) /
285 n;
286 grad += x_s.col(k).dot(weighted_residual) / n -
287 offset(k) * weighted_residual.sum() / n;
288 break;
289
291 [[fallthrough]];
293 hess += x_s.col(k).cwiseAbs2().dot(w.col(k)) / n;
294 grad += x_s.col(k).dot(weighted_residual) / n;
295
296 break;
297 }
298 }
299
300 return { hess, grad };
301}
302
331template<typename T>
332double
333coordinateDescent(Eigen::VectorXd& beta0,
334 Eigen::VectorXd& beta,
335 Eigen::MatrixXd& residual,
336 Clusters& clusters,
337 const Eigen::ArrayXd& lambda_cumsum,
338 const T& x,
339 const Eigen::MatrixXd& w,
340 const Eigen::VectorXd& x_centers,
341 const Eigen::VectorXd& x_scales,
342 const bool intercept,
343 const JitNormalization jit_normalization,
344 const bool update_clusters,
345 std::mt19937& rng,
346 const std::string& cd_type = "cyclical")
347{
348 using namespace Eigen;
349
350 const int n = x.rows();
351 const int p = x.cols();
352 const int m = residual.cols();
353
354 double max_abs_gradient = 0;
355
356 // Create a vector of indices to process
357 std::vector<int> indices;
358 indices.reserve(clusters.size());
359 for (int i = 0; i < clusters.size(); ++i) {
360 if (clusters.coeff(i) != 0) { // Skip zero cluster
361 indices.push_back(i);
362 }
363 }
364
365 if (cd_type == "permuted") {
366 std::shuffle(indices.begin(), indices.end(), rng);
367 }
368
369 for (int c_ind : indices) {
370 // Skip if index is no longer valid due to cluster updates
371 if (c_ind >= clusters.size()) {
372 continue;
373 }
374
375 double c_old = clusters.coeff(c_ind);
376
377 if (c_old == 0) {
378 // We do not update the zero cluster because it can be very large, but
379 // often does not change.
380 continue;
381 }
382
383 int cluster_size = clusters.cluster_size(c_ind);
384 std::vector<int> s;
385 s.reserve(cluster_size);
386
387 for (auto c_it = clusters.cbegin(c_ind); c_it != clusters.cend(c_ind);
388 ++c_it) {
389 int ind = *c_it;
390 assert(ind >= 0 && ind < beta.size() && "Invalid index in cluster");
391 double s_ind = sign(beta(ind));
392 s.emplace_back(s_ind);
393 }
394
395 double hess = 1;
396 double grad = 0;
397 VectorXd x_s(n);
398
399 if (cluster_size == 1) {
400 int ind = *clusters.cbegin(c_ind);
401 std::tie(grad, hess) = computeGradientAndHessian(
402 x, ind, w, residual, x_centers, x_scales, s[0], jit_normalization, n);
403 } else {
404 std::tie(hess, grad) =
406 c_ind,
407 s,
408 clusters,
409 w,
410 residual,
411 x_centers,
412 x_scales,
413 jit_normalization);
414 }
415
416 max_abs_gradient = std::max(max_abs_gradient, std::abs(grad));
417
418 double c_tilde;
419 int new_index;
420
421 std::tie(c_tilde, new_index) = slopeThreshold(
422 c_old - grad / hess, c_ind, lambda_cumsum / hess, clusters);
423
424 assert(c_tilde == 0 || new_index < clusters.size());
425 assert(new_index >= 0 && new_index <= clusters.size());
426
427 double c_diff = c_old - c_tilde;
428
429 if (c_diff != 0) {
430 auto s_it = s.cbegin();
431 auto c_it = clusters.cbegin(c_ind);
432 for (; c_it != clusters.cend(c_ind); ++c_it, ++s_it) {
433 int ind = *c_it;
434 auto [k, j] = std::div(ind, p);
435 double s_ind = *s_it;
436
437 // Update coefficient
438 beta(ind) = c_tilde * s_ind;
439
440 // Update residual
441 switch (jit_normalization) {
443 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
444 residual.col(k).array() +=
445 x_centers(j) * s_ind * c_diff / x_scales(j);
446 break;
447
449 residual.col(k) -= x.col(j) * (s_ind * c_diff);
450 residual.col(k).array() += x_centers(j) * s_ind * c_diff;
451 break;
452
454 residual.col(k) -= x.col(j) * (s_ind * c_diff / x_scales(j));
455 break;
456
458 residual.col(k) -= x.col(j) * (s_ind * c_diff);
459 break;
460 }
461 }
462 }
463
464 if (update_clusters) {
465 clusters.update(c_ind, new_index, std::abs(c_tilde));
466 } else {
467 clusters.setCoeff(c_ind, std::abs(c_tilde));
468 }
469 }
470
471 if (intercept) {
472 for (int k = 0; k < residual.cols(); ++k) {
473 double beta0_update = residual.col(k).dot(w.col(k)) / n;
474 residual.col(k).array() -= beta0_update;
475 beta0(k) -= beta0_update;
476 }
477 }
478
479 return max_abs_gradient;
480}
481
482} // namespace slope
Representation of the clusters in SLOPE.
Definition clusters.h:18
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
std::size_t size()
Returns the number of clusters.
Definition clusters.h:35
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd &lambda_cumsum, const Clusters &clusters)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
Definition hybrid_cd.h:333
int sign(T val)
Returns the sign of a given value.
Definition math.h:38
std::pair< double, double > computeGradientAndHessian(const T &x, const int ind, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
Definition hybrid_cd.h:44
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixBase< T > &x, const int c_ind, const std::vector< int > &s, const Clusters &clusters, const Eigen::MatrixXd &w, const Eigen::MatrixXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Definition hybrid_cd.h:128
Eigen::Index nonZeros(const Eigen::SparseMatrixBase< Derived > &x)
Definition utils.h:34
The declaration of the slopeThreshold function.