18#include <Eigen/SparseCore>
28 const Eigen::MatrixXd& y_in,
30 Eigen::ArrayXd lambda)
32 using Eigen::MatrixXd;
33 using Eigen::VectorXd;
35 const int n = x.rows();
36 const int p = x.cols();
38 if (n != y_in.rows()) {
39 throw std::invalid_argument(
"x and y_in must have the same number of rows");
49 std::unique_ptr<Loss> loss =
setupLoss(this->loss_type);
51 MatrixXd y = loss->preprocessResponse(y_in);
53 const int m = y.cols();
55 std::vector<int> full_set(p * m);
56 std::iota(full_set.begin(), full_set.end(), 0);
58 VectorXd beta0 = VectorXd::Zero(m);
59 VectorXd beta = VectorXd::Zero(p * m);
61 MatrixXd eta = MatrixXd::Zero(n, m);
63 if (this->intercept) {
64 beta0 = loss->link(y.colwise().mean()).transpose();
65 eta.rowwise() = beta0.transpose();
68 MatrixXd residual = loss->residual(eta, y);
69 VectorXd gradient(beta.size());
72 bool user_alpha = alpha.size() > 0;
73 bool user_lambda = lambda.size() > 0;
77 p * m, this->q, this->lambda_type, n, this->theta1, this->theta2);
79 if (lambda.size() != beta.size()) {
80 throw std::invalid_argument(
81 "lambda must be the same length as the number of coefficients");
83 if (lambda.minCoeff() < 0) {
84 throw std::invalid_argument(
"lambda must be non-negative");
86 if (!lambda.isFinite().all()) {
87 throw std::invalid_argument(
"lambda must be finite");
99 this->update_clusters,
100 this->cd_iterations);
108 Eigen::VectorXd::Ones(n),
111 int alpha_max_ind =
whichMax(gradient.cwiseAbs());
112 double alpha_max = sl1_norm.
dualNorm(gradient, lambda);
114 if (alpha_type ==
"path") {
115 if (alpha_min_ratio < 0) {
116 alpha_min_ratio = n > gradient.size() ? 1e-4 : 1e-2;
120 path_length = alpha.size();
122 if (loss_type !=
"quadratic") {
123 throw std::invalid_argument(
124 "Automatic alpha estimation is only available for the quadratic loss");
130 std::unique_ptr<ScreeningRule> screening_rule =
132 std::vector<int> working_set =
133 screening_rule->initialize(full_set, alpha_max_ind);
136 double null_deviance = loss->deviance(eta, y);
137 double dev_prev = null_deviance;
141 double alpha_prev = std::max(alpha_max, alpha(0));
143 std::vector<SlopeFit> fits;
146 for (
int path_step = 0; path_step < this->path_length; ++path_step) {
147 double alpha_curr = alpha(path_step);
149 assert(alpha_curr <= alpha_prev &&
"Alpha must be decreasing");
151 Eigen::ArrayXd lambda_curr = alpha_curr * lambda;
152 Eigen::ArrayXd lambda_prev = alpha_prev * lambda;
154 std::vector<double> duals, primals, time;
166 Eigen::VectorXd::Ones(x.rows()),
169 working_set = screening_rule->screen(
170 gradient, lambda_curr, lambda_prev, beta, full_set);
173 for (; it < this->max_it; ++it) {
175 residual = loss->residual(eta, y);
182 Eigen::VectorXd::Ones(n),
187 sl1_norm.
eval(beta(working_set), lambda_curr.head(working_set.size()));
189 MatrixXd theta = residual;
192 VectorXd dual_gradient = gradient;
197 if (this->intercept) {
198 VectorXd theta_mean = theta.colwise().mean();
199 theta.rowwise() -= theta_mean.transpose();
211 double dual_norm = sl1_norm.
dualNorm(
212 dual_gradient(working_set), lambda_curr.head(working_set.size()));
213 theta.array() /= std::max(1.0, dual_norm);
215 double dual = loss->dual(theta, y, Eigen::VectorXd::Ones(n));
217 if (collect_diagnostics) {
232 time.emplace_back(timer.
elapsed());
233 primals.emplace_back(primal);
234 duals.emplace_back(true_dual);
237 double dual_gap = primal - dual;
239 assert(dual_gap > -1e-6 &&
"Dual gap should be positive");
243 if (dual_gap <= tol_scaled) {
245 screening_rule->checkKktViolations(gradient,
274 if (it == this->max_it) {
277 "Maximum number of iterations reached at step = " +
278 std::to_string(path_step) +
".");
283 beta0, beta, this->x_centers, this->x_scales, this->intercept);
285 alpha_prev = alpha_curr;
288 double dev = loss->deviance(eta, y);
289 double dev_ratio = 1 - dev / null_deviance;
290 double dev_change = path_step == 0 ? 1.0 : 1 - dev / dev_prev;
295 if (return_clusters) {
300 beta0_out, beta_out.sparseView(), clusters, alpha_curr, lambda,
301 dev, null_deviance, primals, duals, time,
302 it, this->centering_type, this->scaling_type
305 fits.emplace_back(std::move(
fit));
308 int n_unique =
unique(beta.cwiseAbs()).size();
309 if (dev_ratio > dev_ratio_tol || dev_change < dev_change_tol ||
310 n_unique >= this->max_clusters.value_or(n + 1)) {
322 const Eigen::MatrixXd& y_in,
324 Eigen::ArrayXd lambda)
326 Eigen::ArrayXd alpha_arr(1);
327 alpha_arr(0) = alpha;
336 validateOption(solver, {
"auto",
"pgd",
"hybrid",
"fista" },
"solver");
337 this->solver_type = solver;
343 this->intercept = intercept;
350 type, {
"standardization",
"min_max",
"max_abs",
"none" },
"type");
352 if (type ==
"standardization") {
353 this->centering_type =
"mean";
354 this->scaling_type =
"sd";
355 }
else if (type ==
"none") {
356 this->centering_type =
"none";
357 this->scaling_type =
"none";
358 }
else if (type ==
"min_max") {
359 this->centering_type =
"min";
360 this->scaling_type =
"range";
361 }
else if (type ==
"max_abs") {
362 this->centering_type =
"none";
363 this->scaling_type =
"max_abs";
371 this->centering_type = type;
377 this->x_centers = x_centers;
378 this->centering_type =
"manual";
385 type, {
"sd",
"l1",
"l2",
"range",
"max_abs",
"none" },
"type");
386 this->scaling_type = type;
392 this->x_scales = x_scales;
393 this->scaling_type =
"manual";
399 this->update_clusters = update_clusters;
405 this->return_clusters = return_clusters;
412 this->alpha_type = alpha_type;
418 if (alpha_min_ratio <= 0 || alpha_min_ratio >= 1) {
419 throw std::invalid_argument(
"alpha_min_ratio must be in (0, 1)");
421 this->alpha_min_ratio = alpha_min_ratio;
427 if (learning_rate_decr <= 0 || learning_rate_decr >= 1) {
428 throw std::invalid_argument(
"learning_rate_decr must be in (0, 1)");
430 this->learning_rate_decr = learning_rate_decr;
436 if (q < 0 || q > 1) {
437 throw std::invalid_argument(
"q must be between 0 and 1");
446 throw std::invalid_argument(
"theta1 must be between 0 and 1");
450 throw std::invalid_argument(
"theta2 must be between 0 and 1");
453 this->theta1 = theta1;
454 this->theta2 = theta2;
461 throw std::invalid_argument(
"tol must be non-negative");
470 throw std::invalid_argument(
"max_it_outer must be >= 1");
472 this->max_it = max_it;
478 if (path_length < 1) {
479 throw std::invalid_argument(
"path_length must be >= 1");
481 this->path_length = path_length;
487 if (cd_iterations < 0) {
488 throw std::invalid_argument(
"cd_iterations must be >= 0");
490 this->cd_iterations = cd_iterations;
497 lambda_type, {
"bh",
"gaussian",
"oscar",
"lasso" },
"lambda_type");
499 this->lambda_type = lambda_type;
506 {
"quadratic",
"logistic",
"poisson",
"multinomial" },
508 this->loss_type = loss_type;
514 validateOption(screening_type, {
"strong",
"none" },
"screening_type");
515 this->screening_type = screening_type;
521 this->modify_x = modify_x;
527 if (dev_change_tol < 0 || dev_change_tol > 1) {
528 throw std::invalid_argument(
"dev_change_tol must be in [0, 1]");
531 this->dev_change_tol = dev_change_tol;
537 if (dev_ratio_tol < 0 || dev_ratio_tol > 1) {
538 throw std::invalid_argument(
"dev_ratio_tol must be in [0, 1]");
541 this->dev_ratio_tol = dev_ratio_tol;
547 if (max_clusters < 1) {
548 throw std::invalid_argument(
"max_clusters must be >= 1");
551 this->max_clusters = max_clusters;
557 this->collect_diagnostics = collect_diagnostics;
563 this->alpha_est_maxit = alpha_est_maxit;
569 return alpha_est_maxit;
587Slope::path<Eigen::MatrixXd>(Eigen::MatrixXd&,
588 const Eigen::MatrixXd&,
593Slope::path<Eigen::SparseMatrix<double>>(Eigen::SparseMatrix<double>&,
594 const Eigen::MatrixXd&,
599Slope::fit<Eigen::MatrixXd>(Eigen::MatrixXd&,
600 const Eigen::MatrixXd&,
605Slope::fit<Eigen::SparseMatrix<double>>(Eigen::SparseMatrix<double>&,
606 const Eigen::MatrixXd&,
Representation of the clusters in SLOPE.
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
A class representing the results of SLOPE (Sorted L1 Penalized Estimation) fitting.
Container class for SLOPE regression solution paths.
void setSolver(const std::string &solver)
Sets the numerical solver used to fit the model.
void setAlphaMinRatio(double alpha_min_ratio)
Sets the alpha min ratio.
void setMaxIterations(int max_it)
Sets the maximum number of iterations.
void setAlphaEstimationMaxIterations(const int alpha_est_maxit)
Sets the maximum number of iterations for the alpha estimation procedure.
SlopePath path(T &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Computes SLOPE regression solution path for multiple alpha and lambda values.
void setDevRatioTol(const double dev_ratio_tol)
Sets tolerance in deviance change for early stopping.
void setScaling(const std::string &type)
Sets the scaling type.
SlopeFit fit(T &x, const Eigen::MatrixXd &y_in, const double alpha=1.0, Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Fits a single SLOPE regression model for given alpha and lambda values.
void setDevChangeTol(const double dev_change_tol)
Sets tolerance in deviance change for early stopping.
const std::string & getLossType()
Get currently defined loss type.
void setReturnClusters(const bool return_clusters)
Sets the update clusters flag.
void setMaxClusters(const int max_clusters)
Sets tolerance in deviance change for early stopping.
void setIntercept(bool intercept)
Sets the intercept flag.
bool getFitIntercept() const
Returns the intercept flag.
void setDiagnostics(const bool collect_diagnostics)
Toggles collection of diagnostics.
void setNormalization(const std::string &type)
Sets normalization type for the design matrix.
int getAlphaEstimationMaxIterations() const
Gets the maximum number of iterations allowed for the alpha estimation procedure.
void setScreening(const std::string &screening_type)
Sets the type of feature screening used, which discards predictors that are unlikely to be active.
void setOscarParameters(const double theta1, const double theta2)
Sets OSCAR parameters.
void setLambdaType(const std::string &lambda_type)
Sets the lambda type for regularization weights.
void setAlphaType(const std::string &alpha_type)
Sets the alpha type.
void setModifyX(const bool modify_x)
Controls if x should be modified-in-place.
void setCentering(const std::string &type)
Sets the center points for feature normalization.
void setLearningRateDecr(double learning_rate_decr)
Sets the learning rate decrement.
void setLoss(const std::string &loss_type)
Sets the loss function type.
void setPathLength(int path_length)
Sets the path length.
void setHybridCdIterations(int cd_iterations)
Sets the frequence of proximal gradient descent steps.
void setTol(double tol)
Sets the tolerance value.
void setQ(double q)
Sets the q value.
void setUpdateClusters(bool update_clusters)
Sets the update clusters flag.
Class representing the Sorted L1 Norm.
double eval(const Eigen::VectorXd &beta, const Eigen::ArrayXd &lambda) const
Evaluates the Sorted L1 Norm.
double dualNorm(const Eigen::VectorXd &a, const Eigen::ArrayXd &lambda) const
Computes the dual norm of a vector.
Timer class for measuring elapsed time with high resolution.
void start()
Starts the timer by recording the current time point.
void resume()
Resumes the timer after a pause.
double elapsed() const
Returns the elapsed time in seconds since start() was called.
void pause()
Pauses the timer.
static void addWarning(WarningCode code, const std::string &message)
Log a new warning.
The declaration of the Clusters class.
Definitions of constants used in libslope.
Diagnostics for SLOPE optimization.
Functions for estimating noise level and regularization parameter alpha.
Thread-safe warning logging facility for the slope library.
The declartion of the Objctive class and its subclasses, which represent the data-fitting part of the...
Mathematical support functions for the slope package.
constexpr double EPSILON
Small value used for floating-point comparisons to handle precision issues.
Namespace containing SLOPE regression implementation.
std::unique_ptr< Loss > setupLoss(const std::string &loss)
Factory function to create the appropriate loss function based on the distribution family.
std::unordered_set< double > unique(const Eigen::MatrixXd &x)
Create a set of unique values from an Eigen matrix.
void validateOption(const std::string &value, const std::set< std::string > &valid_options, const std::string ¶meter_name)
Validates if a given value exists in a set of valid options.
std::unique_ptr< ScreeningRule > createScreeningRule(const std::string &screening_type)
Creates a screening rule based on the provided type.
Eigen::ArrayXd lambdaSequence(const int p, const double q, const std::string &type, const int n, const double theta1, const double theta2)
JitNormalization normalize(Eigen::MatrixXd &x, Eigen::VectorXd &x_centers, Eigen::VectorXd &x_scales, const std::string ¢ering_type, const std::string &scaling_type, const bool modify_x)
double computeDual(const Eigen::VectorXd &beta, const Eigen::MatrixXd &residual, const std::unique_ptr< Loss > &loss, const SortedL1Norm &sl1_norm, const Eigen::ArrayXd &lambda, const MatrixType &x, const Eigen::MatrixXd &y, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization &jit_normalization, const bool intercept)
Computes the dual objective function value for SLOPE optimization.
@ MAXIT_REACHED
Maximum iterations reached without convergence.
void updateGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::MatrixXd &residual, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::VectorXd &w, const JitNormalization jit_normalization)
void offsetGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::VectorXd &offset, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Eigen::ArrayXd regularizationPath(const Eigen::ArrayXd &alpha_in, const int path_length, double alpha_min_ratio, double alpha_max)
SlopePath estimateAlpha(MatrixType &x, Eigen::MatrixXd &y, const Slope &model)
Estimates the regularization parameter alpha for SLOPE regression.
std::unique_ptr< SolverBase > setupSolver(const std::string &solver_type, const std::string &loss, JitNormalization jit_normalization, bool intercept, bool update_clusters, int cd_iterations)
Factory function to create and configure a SLOPE solver.
int whichMax(const T &x)
Returns the index of the maximum element in a container.
std::tuple< Eigen::VectorXd, Eigen::MatrixXd > rescaleCoefficients(const Eigen::VectorXd &beta0, const Eigen::VectorXd &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept)
Rescales the coefficients using the given parameters.
Functions to normalize the design matrix and rescale coefficients in case the design was normalized.
Functions for generating regularization sequences for SLOPE.
Screening rules for SLOPE regression optimization.
Factory function to create the appropriate loss function based on.
Factory function to create and configure a SLOPE solver.
SLOPE (Sorted L-One Penalized Estimation) optimization.
The declaration of the SortedL1Norm class.
Simple high-resolution timer class for performance measurements.
Various utility functions.