slope 0.29.0
Loading...
Searching...
No Matches
hybrid_cd.h
Go to the documentation of this file.
1
7#pragma once
8
9#include "../clusters.h"
10#include "../math.h"
11#include "slope_threshold.h"
12#include <Eigen/Core>
13#include <vector>
14
15namespace slope {
16
39template<typename T>
40std::pair<double, double>
42 const int k,
43 const Eigen::VectorXd& w,
44 const Eigen::VectorXd& residual,
45 const Eigen::VectorXd& x_centers,
46 const Eigen::VectorXd& x_scales,
47 const double s,
48 const JitNormalization jit_normalization,
49 const int n)
50{
51 double gradient = 0.0;
52 double hessian = 0.0;
53
54 switch (jit_normalization) {
56 gradient = s *
57 (x.col(k).cwiseProduct(w).dot(residual) -
58 w.dot(residual) * x_centers(k)) /
59 (n * x_scales(k));
60 hessian =
61 (x.col(k).cwiseAbs2().dot(w) - 2 * x_centers(k) * x.col(k).dot(w) +
62 std::pow(x_centers(k), 2) * w.sum()) /
63 (std::pow(x_scales(k), 2) * n);
64 break;
65
67 gradient = s *
68 (x.col(k).cwiseProduct(w).dot(residual) -
69 w.dot(residual) * x_centers(k)) /
70 n;
71 hessian =
72 (x.col(k).cwiseAbs2().dot(w) - 2 * x_centers(k) * x.col(k).dot(w) +
73 std::pow(x_centers(k), 2) * w.sum()) /
74 n;
75 break;
76
78 gradient =
79 s * (x.col(k).cwiseProduct(w).dot(residual)) / (n * x_scales(k));
80 hessian = x.col(k).cwiseAbs2().dot(w) / (std::pow(x_scales(k), 2) * n);
81 break;
82
84 gradient = s * (x.col(k).cwiseProduct(w).dot(residual)) / n;
85 hessian = x.col(k).cwiseAbs2().dot(w) / n;
86 break;
87 }
88
89 return { gradient, hessian };
90}
91
115std::pair<double, double>
116computeClusterGradientAndHessian(const Eigen::MatrixXd& x,
117 const int j,
118 const std::vector<int>& s,
119 const Clusters& clusters,
120 const Eigen::VectorXd& w,
121 const Eigen::VectorXd& residual,
122 const Eigen::VectorXd& x_centers,
123 const Eigen::VectorXd& x_scales,
124 const JitNormalization jit_normalization);
125
148std::pair<double, double>
149computeClusterGradientAndHessian(const Eigen::SparseMatrix<double>& x,
150 const int j,
151 const std::vector<int>& s,
152 const Clusters& clusters,
153 const Eigen::VectorXd& w,
154 const Eigen::VectorXd& residual,
155 const Eigen::VectorXd& x_centers,
156 const Eigen::VectorXd& x_scales,
157 const JitNormalization jit_normalization);
158
185template<typename T>
186void
187coordinateDescent(Eigen::VectorXd& beta0,
188 Eigen::VectorXd& beta,
189 Eigen::VectorXd& residual,
190 Clusters& clusters,
191 const Eigen::ArrayXd& lambda,
192 const T& x,
193 const Eigen::VectorXd& w,
194 const Eigen::VectorXd& x_centers,
195 const Eigen::VectorXd& x_scales,
196 const bool intercept,
197 const JitNormalization jit_normalization,
198 const bool update_clusters)
199{
200 using namespace Eigen;
201
202 const int n = x.rows();
203
204 for (int j = 0; j < clusters.n_clusters(); ++j) {
205 double c_old = clusters.coeff(j);
206
207 if (c_old == 0) {
208 // We do not update the zero cluster because it can be very large, but
209 // often does not change.
210 continue;
211 }
212
213 int cluster_size = clusters.cluster_size(j);
214 std::vector<int> s;
215 s.reserve(cluster_size);
216
217 for (auto c_it = clusters.cbegin(j); c_it != clusters.cend(j); ++c_it) {
218 double s_k = sign(beta(*c_it));
219 s.emplace_back(s_k);
220 }
221
222 double hessian_j = 1;
223 double gradient_j = 0;
224 VectorXd x_s(n);
225
226 if (cluster_size == 1) {
227 int k = *clusters.cbegin(j);
228 std::tie(gradient_j, hessian_j) = computeGradientAndHessian(
229 x, k, w, residual, x_centers, x_scales, s[0], jit_normalization, n);
230 } else {
231 std::tie(hessian_j, gradient_j) = computeClusterGradientAndHessian(
232 x, j, s, clusters, w, residual, x_centers, x_scales, jit_normalization);
233 }
234
235 auto [c_tilde, new_index] = slopeThreshold(
236 c_old - gradient_j / hessian_j, j, lambda / hessian_j, clusters);
237
238 double c_diff = c_old - c_tilde;
239
240 if (c_diff != 0) {
241 auto s_it = s.cbegin();
242 auto c_it = clusters.cbegin(j);
243 for (; c_it != clusters.cend(j); ++c_it, ++s_it) {
244 int k = *c_it;
245 double s_k = *s_it;
246
247 // Update coefficient
248 beta(k) = c_tilde * s_k;
249
250 // Update residual
251 switch (jit_normalization) {
253 residual -= x.col(k) * (s_k * c_diff / x_scales(k));
254 residual.array() += x_centers(k) * s_k * c_diff / x_scales(k);
255 break;
256
258 residual -= x.col(k) * (s_k * c_diff);
259 residual.array() += x_centers(k) * s_k * c_diff;
260 break;
261
263 residual -= x.col(k) * (s_k * c_diff / x_scales(k));
264 break;
265
267 residual -= x.col(k) * (s_k * c_diff);
268 break;
269 }
270 }
271 }
272
273 if (update_clusters) {
274 clusters.update(j, new_index, std::abs(c_tilde));
275 } else {
276 clusters.setCoeff(j, std::abs(c_tilde));
277 }
278 }
279
280 if (intercept) {
281 double beta0_update = residual.dot(w) / n;
282 residual.array() -= beta0_update;
283 beta0(0) -= beta0_update;
284 }
285}
286
287} // namespace slope
Representation of the clusters in SLOPE.
Definition clusters.h:18
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
Definition clusters.cpp:164
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
Definition clusters.cpp:117
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
Definition clusters.cpp:78
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
Definition clusters.cpp:127
int n_clusters() const
Returns the number of clusters.
Definition clusters.cpp:103
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
Definition clusters.cpp:48
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Definition clusters.cpp:38
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
int sign(T val)
Returns the sign of a given value.
Definition math.h:31
std::pair< double, double > computeClusterGradientAndHessian(const Eigen::MatrixXd &x, const int j, const std::vector< int > &s, const Clusters &clusters, const Eigen::VectorXd &w, const Eigen::VectorXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Definition hybrid_cd.cpp:6
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
std::tuple< double, int > slopeThreshold(const double x, const int j, const Eigen::ArrayXd lambdas, const Clusters &clusters)
void coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::VectorXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda, const T &x, const Eigen::VectorXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters)
Definition hybrid_cd.h:187
std::pair< double, double > computeGradientAndHessian(const T &x, const int k, const Eigen::VectorXd &w, const Eigen::VectorXd &residual, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const double s, const JitNormalization jit_normalization, const int n)
Definition hybrid_cd.h:41
The declaration of the slopeThreshold function.