slope 6.2.1
Loading...
Searching...
No Matches
hybrid.h
Go to the documentation of this file.
1
7#pragma once
8
9#include "../clusters.h"
10#include "../losses/loss.h"
11#include "../sorted_l1_norm.h"
12#include "hybrid_cd.h"
13#include "pgd.h"
14#include "solver.h"
15#include <memory>
16#include <optional>
17
18namespace slope {
19
36class Hybrid : public SolverBase
37{
38public:
49 bool intercept,
50 bool update_clusters,
51 int cd_iterations,
52 const std::string& cd_type,
53 std::optional<int> random_seed = std::nullopt)
55 , update_clusters(update_clusters)
56 , cd_iterations(cd_iterations)
57 , cd_type(cd_type)
58 , rng(random_seed.has_value() ? std::mt19937(*random_seed)
59 : std::mt19937(std::random_device{}()))
60 {
61 }
62
64 void run(Eigen::VectorXd& beta0,
65 Eigen::VectorXd& beta,
66 Eigen::MatrixXd& eta,
67 const Eigen::ArrayXd& lambda,
68 const std::unique_ptr<Loss>& loss,
69 const SortedL1Norm& penalty,
70 const Eigen::VectorXd& gradient,
71 const std::vector<int>& working_set,
72 const Eigen::MatrixXd& x,
73 const Eigen::VectorXd& x_centers,
74 const Eigen::VectorXd& x_scales,
75 const Eigen::MatrixXd& y) override;
76
78 void run(Eigen::VectorXd& beta0,
79 Eigen::VectorXd& beta,
80 Eigen::MatrixXd& eta,
81 const Eigen::ArrayXd& lambda,
82 const std::unique_ptr<Loss>& loss,
83 const SortedL1Norm& penalty,
84 const Eigen::VectorXd& gradient,
85 const std::vector<int>& working_set,
86 const Eigen::SparseMatrix<double>& x,
87 const Eigen::VectorXd& x_centers,
88 const Eigen::VectorXd& x_scales,
89 const Eigen::MatrixXd& y) override;
90
92 void run(Eigen::VectorXd& beta0,
93 Eigen::VectorXd& beta,
94 Eigen::MatrixXd& eta,
95 const Eigen::ArrayXd& lambda,
96 const std::unique_ptr<Loss>& loss,
97 const SortedL1Norm& penalty,
98 const Eigen::VectorXd& gradient,
99 const std::vector<int>& working_set,
100 const Eigen::Map<Eigen::MatrixXd>& x,
101 const Eigen::VectorXd& x_centers,
102 const Eigen::VectorXd& x_scales,
103 const Eigen::MatrixXd& y) override;
104
106 void run(Eigen::VectorXd& beta0,
107 Eigen::VectorXd& beta,
108 Eigen::MatrixXd& eta,
109 const Eigen::ArrayXd& lambda,
110 const std::unique_ptr<Loss>& loss,
111 const SortedL1Norm& penalty,
112 const Eigen::VectorXd& gradient,
113 const std::vector<int>& working_set,
114 const Eigen::Map<Eigen::SparseMatrix<double>>& x,
115 const Eigen::VectorXd& x_centers,
116 const Eigen::VectorXd& x_scales,
117 const Eigen::MatrixXd& y) override;
118
119private:
134 template<typename MatrixType>
135 void runImpl(Eigen::VectorXd& beta0,
136 Eigen::VectorXd& beta,
137 Eigen::MatrixXd& eta,
138 const Eigen::ArrayXd& lambda,
139 const std::unique_ptr<Loss>& loss,
140 const SortedL1Norm& penalty,
141 const Eigen::VectorXd& gradient_in,
142 const std::vector<int>& working_set,
143 const MatrixType& x,
144 const Eigen::VectorXd& x_centers,
145 const Eigen::VectorXd& x_scales,
146 const Eigen::MatrixXd& y)
147 {
148 using Eigen::MatrixXd;
149 using Eigen::VectorXd;
150
151 const int n = x.rows();
152 const int m = eta.cols();
153
154 PGD pgd_solver(jit_normalization, intercept, "pgd");
155
156 // Run proximal gradient descent
157 pgd_solver.run(beta0,
158 beta,
159 eta,
160 lambda,
161 loss,
162 penalty,
163 gradient_in,
164 working_set,
165 x,
166 x_centers,
167 x_scales,
168 y);
169
170 Clusters clusters(beta);
171
172 // TODO: Make these parameters and initialize once
173 MatrixXd w = MatrixXd::Ones(n, m);
174 MatrixXd z = y;
175
176 loss->updateWeightsAndWorkingResponse(w, z, eta, y);
177
178 MatrixXd residual = eta - z;
179
180 Eigen::ArrayXd lambda_cumsum(lambda.size() + 1);
181 lambda_cumsum(0) = 0.0;
182 std::partial_sum(lambda.begin(), lambda.end(), lambda_cumsum.begin() + 1);
183
184 for (int it = 0; it < this->cd_iterations; ++it) {
185 double old_obj =
186 computeObjective(penalty, beta, residual, w, lambda, working_set);
187
188 // Store old values to revert if no progress is made
189 Clusters old_clusters = clusters;
190 Eigen::MatrixXd old_residual = residual;
191 Eigen::VectorXd old_beta = beta;
192 Eigen::VectorXd old_beta0 = beta0;
193
194 coordinateDescent(beta0,
195 beta,
196 residual,
197 clusters,
198 lambda_cumsum,
199 x,
200 w,
201 x_centers,
202 x_scales,
203 this->intercept,
204 this->jit_normalization,
205 this->update_clusters,
206 rng,
207 this->cd_type);
208
209 double new_obj =
210 computeObjective(penalty, beta, residual, w, lambda, working_set);
211
212 if (!std::isfinite(new_obj) || new_obj > old_obj) {
213 // No progress, revert to previous state
214 clusters = old_clusters;
215 residual = old_residual;
216 beta = old_beta;
217 beta0 = old_beta0;
218
219 break;
220 }
221 }
222
223 // The residual is kept up to date, but not eta. So we need to compute
224 // it here.
225 eta = residual + z;
226 // TODO: register convergence status
227 }
228
229 double computeObjective(const SortedL1Norm& penalty,
230 const Eigen::VectorXd& beta,
231 const Eigen::MatrixXd& residual,
232 const Eigen::MatrixXd& w,
233 const Eigen::ArrayXd& lambda,
234 const std::vector<int>& working_set)
235 {
236 double val =
237 0.5 * (residual.array().square() * w.array()).sum() / residual.rows() +
238 penalty.eval(beta(working_set), lambda.head(working_set.size()));
239
240 return val;
241 }
242
243 // TODO: These should be used in the PGD solver and taken as arguments to the
244 // Hybrid solver and not just set and ignored here.
245 double pgd_learning_rate =
246 1.0;
247 double pgd_learning_rate_decr =
248 0.5;
249
250 bool update_clusters = false;
251 int cd_iterations = 10;
252 std::string cd_type =
253 "cyclical";
254 std::mt19937 rng{
255 std::random_device{}()
256 };
257};
258
259} // namespace slope
Representation of the clusters in SLOPE.
Definition clusters.h:18
Hybrid CD-PGD solver for SLOPE.
Definition hybrid.h:37
void run(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &eta, const Eigen::ArrayXd &lambda, const std::unique_ptr< Loss > &loss, const SortedL1Norm &penalty, const Eigen::VectorXd &gradient, const std::vector< int > &working_set, const Eigen::SparseMatrix< double > &x, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::MatrixXd &y) override
Pure virtual function defining the solver's optimization routine.
void run(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &eta, const Eigen::ArrayXd &lambda, const std::unique_ptr< Loss > &loss, const SortedL1Norm &penalty, const Eigen::VectorXd &gradient, const std::vector< int > &working_set, const Eigen::MatrixXd &x, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::MatrixXd &y) override
Pure virtual function defining the solver's optimization routine.
void run(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &eta, const Eigen::ArrayXd &lambda, const std::unique_ptr< Loss > &loss, const SortedL1Norm &penalty, const Eigen::VectorXd &gradient, const std::vector< int > &working_set, const Eigen::Map< Eigen::SparseMatrix< double > > &x, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::MatrixXd &y) override
Pure virtual function defining the solver's optimization routine.
Hybrid(JitNormalization jit_normalization, bool intercept, bool update_clusters, int cd_iterations, const std::string &cd_type, std::optional< int > random_seed=std::nullopt)
Constructs Hybrid solver for SLOPE optimization.
Definition hybrid.h:48
void run(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &eta, const Eigen::ArrayXd &lambda, const std::unique_ptr< Loss > &loss, const SortedL1Norm &penalty, const Eigen::VectorXd &gradient, const std::vector< int > &working_set, const Eigen::Map< Eigen::MatrixXd > &x, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::MatrixXd &y) override
Pure virtual function defining the solver's optimization routine.
Proximal Gradient Descent solver for SLOPE optimization.
Definition pgd.h:29
Abstract base class for SLOPE optimization solvers.
Definition solver.h:30
JitNormalization jit_normalization
JIT feature normalization strategy.
Definition solver.h:165
bool intercept
If true, fits intercept term.
Definition solver.h:166
Class representing the Sorted L1 Norm.
An implementation of the coordinate descent step in the hybrid algorithm for solving SLOPE.
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
Definition hybrid_cd.h:332
JitNormalization
Enums to control predictor standardization behavior.
Proximal Gradient Descent solver implementation for SLOPE.
Numerical solver class for SLOPE (Sorted L-One Penalized Estimation)