slope 0.31.0
Loading...
Searching...
No Matches
hybrid_cd.h
Go to the documentation of this file.
1
7#pragma once
8
9#include "../clusters.h"
10#include "../math.h"
11#include "slope_threshold.h"
12#include <Eigen/Core>
13#include <iostream>
14#include <vector>
15
16namespace slope {
17
40template<typename T>
41std::pair<double, double>
43 const int k,
44 const Eigen::VectorXd& w,
45 const Eigen::VectorXd& residual,
46 const Eigen::VectorXd& x_centers,
47 const Eigen::VectorXd& x_scales,
48 const double s,
49 const JitNormalization jit_normalization,
50 const int n)
51{
52 double gradient = 0.0;
53 double hessian = 0.0;
54
55 switch (jit_normalization) {
57 gradient = s *
58 (x.col(k).cwiseProduct(w).dot(residual) -
59 w.dot(residual) * x_centers(k)) /
60 (n * x_scales(k));
61 hessian =
62 (x.col(k).cwiseAbs2().dot(w) - 2 * x_centers(k) * x.col(k).dot(w) +
63 std::pow(x_centers(k), 2) * w.sum()) /
64 (std::pow(x_scales(k), 2) * n);
65 break;
66
68 gradient = s *
69 (x.col(k).cwiseProduct(w).dot(residual) -
70 w.dot(residual) * x_centers(k)) /
71 n;
72 hessian =
73 (x.col(k).cwiseAbs2().dot(w) - 2 * x_centers(k) * x.col(k).dot(w) +
74 std::pow(x_centers(k), 2) * w.sum()) /
75 n;
76 break;
77
79 gradient =
80 s * (x.col(k).cwiseProduct(w).dot(residual)) / (n * x_scales(k));
81 hessian = x.col(k).cwiseAbs2().dot(w) / (std::pow(x_scales(k), 2) * n);
82 break;
83
85 gradient = s * (x.col(k).cwiseProduct(w).dot(residual)) / n;
86 hessian = x.col(k).cwiseAbs2().dot(w) / n;
87 break;
88 }
89
90 return { gradient, hessian };
91}
92
116std::pair<double, double>
117computeClusterGradientAndHessian(const Eigen::MatrixXd& x,
118 const int j,
119 const std::vector<int>& s,
120 const Clusters& clusters,
121 const Eigen::VectorXd& w,
122 const Eigen::VectorXd& residual,
123 const Eigen::VectorXd& x_centers,
124 const Eigen::VectorXd& x_scales,
125 const JitNormalization jit_normalization);
126
149std::pair<double, double>
150computeClusterGradientAndHessian(const Eigen::SparseMatrix<double>& x,
151 const int j,
152 const std::vector<int>& s,
153 const Clusters& clusters,
154 const Eigen::VectorXd& w,
155 const Eigen::VectorXd& residual,
156 const Eigen::VectorXd& x_centers,
157 const Eigen::VectorXd& x_scales,
158 const JitNormalization jit_normalization);
159
186template<typename T>
187double
188coordinateDescent(Eigen::VectorXd& beta0,
189 Eigen::VectorXd& beta,
190 Eigen::VectorXd& residual,
191 Clusters& clusters,
192 const Eigen::ArrayXd& lambda,
193 const T& x,
194 const Eigen::VectorXd& w,
195 const Eigen::VectorXd& x_centers,
196 const Eigen::VectorXd& x_scales,
197 const bool intercept,
198 const JitNormalization jit_normalization,
199 const bool update_clusters)
200{
201 using namespace Eigen;
202
203 const int n = x.rows();
204
205 double max_abs_gradient = 0;
206
207 for (int j = 0; j < clusters.n_clusters(); ++j) {
208 double c_old = clusters.coeff(j);
209
210 if (c_old == 0) {
211 // We do not update the zero cluster because it can be very large, but
212 // often does not change.
213 continue;
214 }
215
216 int cluster_size = clusters.cluster_size(j);
217 std::vector<int> s;
218 s.reserve(cluster_size);
219
220 for (auto c_it = clusters.cbegin(j); c_it != clusters.cend(j); ++c_it) {
221 double s_k = sign(beta(*c_it));
222 s.emplace_back(s_k);
223 }
224
225 double hessian_j = 1;
226 double gradient_j = 0;
227 VectorXd x_s(n);
228
229 if (cluster_size == 1) {
230 int k = *clusters.cbegin(j);
231 std::tie(gradient_j, hessian_j) = computeGradientAndHessian(
232 x, k, w, residual, x_centers, x_scales, s[0], jit_normalization, n);
233 } else {
234 std::tie(hessian_j, gradient_j) = computeClusterGradientAndHessian(
235 x, j, s, clusters, w, residual, x_centers, x_scales, jit_normalization);
236 }
237
238 max_abs_gradient = std::max(max_abs_gradient, std::abs(gradient_j));
239
240 double c_tilde;
241 int new_index;
242
243 if (lambda(0) == 0) {
244 // No regularization
245 c_tilde = c_old - gradient_j / hessian_j;
246 new_index = j;
247 } else {
248 std::tie(c_tilde, new_index) = slopeThreshold(
249 c_old - gradient_j / hessian_j, j, lambda / hessian_j, clusters);
250 }
251
252 double c_diff = c_old - c_tilde;
253
254 if (c_diff != 0) {
255 auto s_it = s.cbegin();
256 auto c_it = clusters.cbegin(j);
257 for (; c_it != clusters.cend(j); ++c_it, ++s_it) {
258 int k = *c_it;
259 double s_k = *s_it;
260
261 // Update coefficient
262 beta(k) = c_tilde * s_k;
263
264 // Update residual
265 switch (jit_normalization) {
267 residual -= x.col(k) * (s_k * c_diff / x_scales(k));
268 residual.array() += x_centers(k) * s_k * c_diff / x_scales(k);
269 break;
270
272 residual -= x.col(k) * (s_k * c_diff);
273 residual.array() += x_centers(k) * s_k * c_diff;
274 break;
275
277 residual -= x.col(k) * (s_k * c_diff / x_scales(k));
278 break;
279
281 residual -= x.col(k) * (s_k * c_diff);
282 break;
283 }
284 }
285 }
286
287 if (update_clusters) {
288 clusters.update(j, new_index, std::abs(c_tilde));
289 } else {
290 clusters.setCoeff(j, std::abs(c_tilde));
291 }
292 }
293
294 if (intercept) {
295 double beta0_update = residual.dot(w) / n;
296 residual.array() -= beta0_update;
297 beta0(0) -= beta0_update;
298 }
299
300 return max_abs_gradient;
301}
302
303} // namespace slope
Representation of the clusters in SLOPE.
Definition clusters.h:18
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
Definition clusters.cpp:164
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
Definition clusters.cpp:117
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
Definition clusters.cpp:78
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
Definition clusters.cpp:127
int n_clusters() const
Returns the number of clusters.
Definition clusters.cpp:103
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
Definition clusters.cpp:48
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Definition clusters.cpp:38
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::VectorXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda, const T &x, const Eigen::VectorXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters)
Definition hybrid_cd.h:188
int sign(T val)
Returns the sign of a given value.
Definition math.h:35
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixXd &x, const int j, const std::vector< int > &s, const Clusters &clusters, const Eigen::VectorXd &w, const Eigen::VectorXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Definition hybrid_cd.cpp:6
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd lambdas, const Clusters &clusters)
std::pair< double, double > computeGradientAndHessian(const T &x, const int k, const Eigen::VectorXd &w, const Eigen::VectorXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
Definition hybrid_cd.h:42
The declaration of the slopeThreshold function.