slope 0.29.0
Loading...
Searching...
No Matches
pgd.h
Go to the documentation of this file.
1
6#pragma once
7
8#include "../losses/loss.h"
9#include "../math.h"
10#include "../sorted_l1_norm.h"
11#include "solver.h"
12#include <Eigen/Dense>
13#include <Eigen/SparseCore>
14#include <memory>
15
16namespace slope {
17
25class PGD : public SolverBase
26{
27public:
35 bool intercept,
36 const std::string& update_type)
38 , learning_rate(1.0)
39 , learning_rate_decr(0.5)
40 , update_type{ update_type }
41 , t(1.0)
42 {
43 }
44
46 void run(Eigen::VectorXd& beta0,
47 Eigen::VectorXd& beta,
48 Eigen::MatrixXd& eta,
49 const Eigen::ArrayXd& lambda,
50 const std::unique_ptr<Loss>& loss,
51 const SortedL1Norm& penalty,
52 const Eigen::VectorXd& gradient,
53 const std::vector<int>& working_set,
54 const Eigen::MatrixXd& x,
55 const Eigen::VectorXd& x_centers,
56 const Eigen::VectorXd& x_scales,
57 const Eigen::MatrixXd& y) override;
58
60 void run(Eigen::VectorXd& beta0,
61 Eigen::VectorXd& beta,
62 Eigen::MatrixXd& eta,
63 const Eigen::ArrayXd& lambda,
64 const std::unique_ptr<Loss>& loss,
65 const SortedL1Norm& penalty,
66 const Eigen::VectorXd& gradient,
67 const std::vector<int>& working_set,
68 const Eigen::SparseMatrix<double>& x,
69 const Eigen::VectorXd& x_centers,
70 const Eigen::VectorXd& x_scales,
71 const Eigen::MatrixXd& y) override;
72
73private:
74 template<typename MatrixType>
75 void runImpl(Eigen::VectorXd& beta0,
76 Eigen::VectorXd& beta,
77 Eigen::MatrixXd& eta,
78 const Eigen::ArrayXd& lambda,
79 const std::unique_ptr<Loss>& loss,
80 const SortedL1Norm& penalty,
81 const Eigen::VectorXd& gradient,
82 const std::vector<int>& working_set,
83 const MatrixType& x,
84 const Eigen::VectorXd& x_centers,
85 const Eigen::VectorXd& x_scales,
86 const Eigen::MatrixXd& y)
87 {
88 using Eigen::all;
89 using Eigen::MatrixXd;
90 using Eigen::VectorXd;
91
92 int n_working = working_set.size();
93
94 Eigen::VectorXd beta_old = beta(working_set);
95 Eigen::VectorXd beta_diff(n_working);
96 Eigen::VectorXd beta0_old = beta0;
97
98 double g_old = loss->loss(eta, y);
99 double t_old = t;
100
101 Eigen::MatrixXd residual = loss->residual(eta, y);
102 Eigen::VectorXd intercept_grad = residual.colwise().mean();
103 Eigen::VectorXd grad_working = gradient(working_set);
104
105 int line_search_iter = 0;
106 const int max_line_search_iter = 100; // maximum iterations before exit
107
108 while (true) {
109 if (intercept) {
110 beta0 = beta0_old - this->learning_rate * intercept_grad;
111 }
112
113 beta(working_set) =
114 penalty.prox(beta_old - this->learning_rate * grad_working,
115 this->learning_rate * lambda.head(n_working));
116
117 beta_diff = beta(working_set) - beta_old;
118 double beta_diff_norm =
119 beta_diff.dot(grad_working) +
120 (1.0 / (2 * this->learning_rate)) * beta_diff.squaredNorm();
121
122 if (intercept) {
123 Eigen::VectorXd beta0_diff = beta0 - beta0_old;
124
125 beta_diff_norm +=
126 intercept_grad.dot(beta0_diff) +
127 (1.0 / (2 * this->learning_rate)) * beta0_diff.squaredNorm();
128 }
129
130 eta = linearPredictor(x,
131 working_set,
132 beta0,
133 beta,
134 x_centers,
135 x_scales,
137 intercept);
138
139 double g = loss->loss(eta, y);
140 double q = g_old + beta_diff_norm;
141
142 if (q >= g * (1 - 1e-12) || this->learning_rate < 1e-12) {
143 this->learning_rate *= 1.1;
144 break;
145 } else {
146 this->learning_rate *= this->learning_rate_decr;
147 }
148
149 line_search_iter++;
150 if (line_search_iter >= max_line_search_iter) {
151 break;
152 }
153 }
154
155 if (update_type == "fista") {
156 if (beta_prev.size() == 0) {
157 beta_prev = beta;
158 }
159
160 this->t = 0.5 * (1.0 + std::sqrt(1.0 + 4.0 * t_old * t_old));
161 Eigen::VectorXd beta_current = beta(working_set);
162
163 beta(working_set) +=
164 (beta_current - beta_prev(working_set)) * (t_old - 1.0) / this->t;
165 beta_prev(working_set) = beta_current;
166
167 eta = linearPredictor(x,
168 working_set,
169 beta0,
170 beta,
171 x_centers,
172 x_scales,
174 intercept);
175 }
176 }
177
178 double learning_rate;
179 double learning_rate_decr;
180 std::string update_type;
181 double t;
182 Eigen::VectorXd beta_prev;
183};
184
185} // namespace slope
Proximal Gradient Descent solver for SLOPE optimization.
Definition pgd.h:26
void run(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &eta, const Eigen::ArrayXd &lambda, const std::unique_ptr< Loss > &loss, const SortedL1Norm &penalty, const Eigen::VectorXd &gradient, const std::vector< int > &working_set, const Eigen::MatrixXd &x, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::MatrixXd &y) override
Pure virtual function defining the solver's optimization routine.
Definition pgd.cpp:17
PGD(JitNormalization jit_normalization, bool intercept, const std::string &update_type)
Constructs Proximal Gradient Descent solver for SLOPE optimization.
Definition pgd.h:34
Abstract base class for SLOPE optimization solvers.
Definition solver.h:30
JitNormalization jit_normalization
JIT feature normalization strategy.
Definition solver.h:107
bool intercept
If true, fits intercept term.
Definition solver.h:108
Class representing the Sorted L1 Norm.
Eigen::MatrixXd prox(const Eigen::VectorXd &beta, const Eigen::ArrayXd &lambda) const
Computes the proximal operator of the Sorted L1 Norm.
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
JitNormalization
Enums to control predictor standardization behavior.
Eigen::MatrixXd linearPredictor(const T &x, const std::vector< int > &active_set, const Eigen::VectorXd &beta0, const Eigen::VectorXd &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization, const bool intercept)
Definition math.h:142
Numerical solver class for SLOPE (Sorted L-One Penalized Estimation)