26#include <Eigen/SparseCore>
389 Eigen::EigenBase<T>& x,
390 const Eigen::MatrixXd& y_in,
391 Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0),
392 Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0),
395 using Eigen::MatrixXd;
396 using Eigen::VectorXd;
398 const int n = x.rows();
399 const int p = x.cols();
401 const int INTERRUPT_FREQ = 100;
402 bool interrupt =
false;
404 if (n != y_in.rows()) {
405 throw std::invalid_argument(
406 "x and y_in must have the same number of rows");
409 auto jit_normalization =
normalize(x.derived(),
412 this->centering_type,
416 std::unique_ptr<Loss> loss =
setupLoss(this->loss_type);
418 MatrixXd y = loss->preprocessResponse(y_in);
420 const int m = y.cols();
422 std::vector<int> full_set(p * m);
423 std::iota(full_set.begin(), full_set.end(), 0);
425 VectorXd beta0 = VectorXd::Zero(m);
426 VectorXd beta = VectorXd::Zero(p * m);
428 MatrixXd eta = MatrixXd::Zero(n, m);
430 if (this->intercept) {
431 beta0 = loss->link(y.colwise().mean()).transpose();
432 eta.rowwise() = beta0.transpose();
435 MatrixXd residual = loss->residual(eta, y);
436 VectorXd gradient(beta.size());
439 bool user_alpha = alpha.size() > 0;
440 bool user_lambda = lambda.size() > 0;
444 p * m, this->q, this->lambda_type, n, this->theta1, this->theta2);
446 if (lambda.size() != beta.size()) {
447 throw std::invalid_argument(
448 "lambda must be the same length as the number of coefficients");
450 if (lambda.minCoeff() < 0) {
451 throw std::invalid_argument(
"lambda must be non-negative");
453 if (!lambda.isFinite().all()) {
454 throw std::invalid_argument(
"lambda must be finite");
466 this->update_clusters,
477 Eigen::VectorXd::Ones(n),
480 int alpha_max_ind =
whichMax(gradient.cwiseAbs());
481 double alpha_max = sl1_norm.
dualNorm(gradient, lambda);
483 if (alpha_type ==
"path" ||
484 (alpha_type ==
"estimate" && alpha_estimate != 1)) {
485 if (alpha_min_ratio < 0) {
486 alpha_min_ratio = n > gradient.size() ? 1e-4 : 1e-2;
491 path_length = alpha.size();
492 }
else if (alpha_type ==
"estimate" && alpha_estimate == -1) {
493 if (loss_type !=
"quadratic") {
494 throw std::invalid_argument(
"Automatic alpha estimation is only "
495 "available for the quadratic loss");
500 std::unique_ptr<ScreeningRule> screening_rule =
502 std::vector<int> working_set =
503 screening_rule->initialize(full_set, alpha_max_ind);
506 double null_deviance = loss->deviance(eta, y);
507 double dev_prev = null_deviance;
511 double alpha_prev = std::max(alpha_max, alpha(0));
513 std::vector<SlopeFit> fits;
516 for (
int path_step = 0; path_step < this->path_length; ++path_step) {
518 bool local_interrupt =
false;
520#pragma omp critical(check_interrupt)
523 local_interrupt = check_interrupt();
525 if (local_interrupt) {
530 double alpha_curr = alpha(path_step);
532 assert(alpha_curr <= alpha_prev &&
"Alpha must be decreasing");
534 Eigen::ArrayXd lambda_curr = alpha_curr * lambda;
535 Eigen::ArrayXd lambda_prev = alpha_prev * lambda;
537 std::vector<double> duals, primals, time;
549 Eigen::VectorXd::Ones(x.rows()),
552 working_set = screening_rule->screen(
553 gradient, lambda_curr, lambda_prev, beta, full_set);
557 for (; it < this->max_it; ++it, ++total_it) {
559 residual = loss->residual(eta, y);
566 Eigen::VectorXd::Ones(n),
569 double primal = loss->loss(eta, y) +
570 sl1_norm.
eval(beta(working_set),
571 lambda_curr.head(working_set.size()));
573 MatrixXd theta = residual;
576 VectorXd dual_gradient = gradient;
581 if (this->intercept) {
582 VectorXd theta_mean = theta.colwise().mean();
583 theta.rowwise() -= theta_mean.transpose();
595 double dual_norm = sl1_norm.
dualNorm(
596 dual_gradient(working_set), lambda_curr.head(working_set.size()));
597 theta.array() /= std::max(1.0, dual_norm);
599 double dual = loss->dual(theta, y, Eigen::VectorXd::Ones(n));
601 if (collect_diagnostics) {
616 time.emplace_back(timer.
elapsed());
617 primals.emplace_back(primal);
618 duals.emplace_back(true_dual);
621 double dual_gap = primal - dual;
623 assert(dual_gap > -1e-6 &&
"Dual gap should be positive");
627 if (dual_gap <= tol_scaled || it == this->max_it) {
629 screening_rule->checkKktViolations(gradient,
646 if (it % INTERRUPT_FREQ == 0) {
647 bool local_interrupt =
false;
649#pragma omp critical(check_interrupt)
652 local_interrupt = check_interrupt();
654 if (local_interrupt) {
674 if (it == this->max_it) {
677 "Maximum number of iterations reached at step = " +
678 std::to_string(path_step) +
".");
681 alpha_prev = alpha_curr;
684 double dev = loss->deviance(eta, y);
685 double dev_ratio = 1 - dev / null_deviance;
686 double dev_change = path_step == 0 ? 1.0 : 1 - dev / dev_prev;
691 if (return_clusters) {
696 beta.reshaped(p, m).sparseView(),
706 this->centering_type,
712 fits.emplace_back(std::move(
fit));
719 int n_unique =
unique(beta.cwiseAbs()).size();
720 if (dev_ratio > dev_ratio_tol || dev_change < dev_change_tol ||
721 n_unique >= this->max_clusters.value_or(n + 1)) {
749 Eigen::EigenBase<T>& x,
750 const Eigen::MatrixXd& y_in,
751 const double alpha = 1.0,
752 Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0),
755 Eigen::ArrayXd alpha_arr(1);
756 alpha_arr(0) = alpha;
757 SlopePath res =
path(x, y_in, alpha_arr, lambda, check_interrupt);
788 Eigen::EigenBase<T>& x,
796 Slope model_copy = *
this;
799 std::vector<int> selected;
800 Eigen::ArrayXd alpha(1);
806 this->alpha_estimate = alpha(0);
808 model_copy.
path(x, y, alpha, Eigen::ArrayXd::Zero(0), check_interrupt);
810 for (
int it = 0; it < this->alpha_est_maxit; ++it) {
811 T x_selected =
subsetCols(x.derived(), selected);
813 std::vector<int> selected_prev = selected;
816 alpha(0) =
estimateNoise(x_selected, y, this->intercept) / n;
817 this->alpha_estimate = alpha(0);
819 result = model_copy.
path(
820 x, y, alpha, Eigen::ArrayXd::Zero(0), check_interrupt);
821 auto coefs = result.
getCoefs().back();
823 for (
typename Eigen::SparseMatrix<double>::InnerIterator it(coefs, 0);
826 selected.emplace_back(it.row());
829 if (selected == selected_prev) {
833 if (
static_cast<int>(selected.size()) >= n + this->intercept) {
834 throw std::runtime_error(
835 "selected >= n - 1 variables, cannot estimate variance");
841 "Maximum iterations reached in alpha estimation");
864 const Eigen::VectorXd& y_in,
865 const double gamma = 0.0,
866 Eigen::VectorXd beta0 = Eigen::VectorXd(0),
867 Eigen::VectorXd beta = Eigen::VectorXd(0))
869 using Eigen::MatrixXd;
870 using Eigen::VectorXd;
875 if (beta0.size() == 0) {
879 if (beta.size() == 0) {
887 std::vector<double> primals, duals, time;
890 auto jit_normalization =
891 normalize(x, x_centers, x_scales, centering_type, scaling_type, modify_x);
893 bool update_clusters =
false;
895 std::unique_ptr<Loss> loss =
setupLoss(this->loss_type);
897 MatrixXd y = loss->preprocessResponse(y_in);
901 Eigen::ArrayXd lambda_cumsum_relax = Eigen::ArrayXd::Zero(p * m + 1);
913 VectorXd gradient = VectorXd::Zero(p * m);
914 MatrixXd residual(n, m);
915 MatrixXd working_residual(n, m);
917 MatrixXd w = MatrixXd::Ones(n, m);
918 MatrixXd w_ones = MatrixXd::Ones(n, m);
925 if (random_seed.has_value()) {
926 rng.seed(*random_seed);
928 rng.seed(std::random_device{}());
933 for (
int irls_it = 0; irls_it < max_it_outer_relax; irls_it++) {
934 residual = loss->residual(eta, y);
936 if (collect_diagnostics) {
937 primals.push_back(loss->loss(eta, y));
938 duals.push_back(0.0);
939 time.push_back(timer.
elapsed());
951 double norm_grad = cluster_gradient.lpNorm<Eigen::Infinity>();
953 if (norm_grad < tol_relax) {
957 loss->updateWeightsAndWorkingResponse(w, z, eta, y);
958 working_residual = eta - z;
960 for (
int inner_it = 0; inner_it < max_it_inner_relax; ++inner_it) {
978 if (max_abs_gradient < tol_relax) {
983 eta = working_residual + z;
985 if (irls_it == max_it_outer_relax) {
987 "Maximum number of IRLS iterations reached.");
991 double dev = loss->deviance(eta, y);
996 beta = (1 - gamma) * beta + gamma * old_coefs;
1000 beta.reshaped(p, m).sparseView(),
1031 template<
typename T>
1034 const Eigen::VectorXd& y,
1035 const double gamma = 0.0)
1037 std::vector<SlopeFit> fits;
1039 for (
size_t i = 0; i <
path.
size(); i++) {
1045 auto relaxed_fit =
relax(
path(i), x, y, gamma);
1047 fits.emplace_back(relaxed_fit);
1055 bool collect_diagnostics =
false;
1056 bool intercept =
true;
1057 bool modify_x =
false;
1058 bool return_clusters =
true;
1059 bool update_clusters =
true;
1060 double alpha_min_ratio = -1;
1061 double dev_change_tol = 1e-5;
1062 double dev_ratio_tol = 0.999;
1063 double learning_rate_decr = 0.5;
1065 double theta1 = 1.0;
1066 double theta2 = 0.5;
1068 double tol_relax = 1e-4;
1069 double alpha_estimate = -1;
1070 int alpha_est_maxit = 1000;
1071 int cd_iterations = 10;
1073 int max_it_inner_relax = 1e5;
1074 int max_it_outer_relax = 50;
1075 int path_length = 100;
1076 std::optional<int> max_clusters = std::nullopt;
1077 std::optional<int> random_seed = 0;
1078 std::string alpha_type =
"path";
1079 std::string cd_type =
"permuted";
1080 std::string centering_type =
"mean";
1081 std::string lambda_type =
"bh";
1082 std::string loss_type =
"quadratic";
1083 std::string scaling_type =
"sd";
1084 std::string screening_type =
"strong";
1085 std::string solver_type =
"auto";
1088 Eigen::VectorXd x_centers;
1089 Eigen::VectorXd x_scales;
Representation of the clusters in SLOPE.
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
A class representing the results of SLOPE (Sorted L1 Penalized Estimation) fitting.
Eigen::VectorXd getIntercepts(const bool original_scale=true) const
Gets the intercept terms for this SLOPE fit.
const Eigen::ArrayXd & getLambda() const
Gets the lambda (regularization) parameter used.
double getAlpha() const
Gets the alpha (mixing) parameter used.
Eigen::SparseMatrix< double > getCoefs(const bool original_scale=true) const
Gets the sparse coefficient matrix for this fit.
double getNullDeviance() const
Gets the null model deviance.
Container class for SLOPE regression solution paths.
std::size_t size() const
Gets the number of solutions in the path.
std::vector< Eigen::SparseMatrix< double > > getCoefs(const bool original_scale=true) const
Returns the vector of coefficient matrices for each solution in the path.
void setSolver(const std::string &solver)
Sets the numerical solver used to fit the model.
void setAlphaMinRatio(double alpha_min_ratio)
Sets the alpha min ratio.
void setMaxIterations(int max_it)
Sets the maximum number of iterations.
void setAlphaEstimationMaxIterations(const int alpha_est_maxit)
Sets the maximum number of iterations for the alpha estimation procedure.
void setRelaxMaxInnerIterations(int max_it)
Sets the maximum number of inner iterations for the relaxed solver.
void setRandomSeed(std::optional< int > seed)
Sets the random seed.
int getRandomSeed() const
Gets the random seed.
SlopePath path(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0), std::function< bool()> check_interrupt=defaultInterruptChecker)
Computes SLOPE regression solution path for multiple alpha and lambda values.
SlopePath estimateAlpha(Eigen::EigenBase< T > &x, Eigen::MatrixXd &y, std::function< bool()> check_interrupt=defaultInterruptChecker)
Estimates the regularization parameter alpha for SLOPE regression.
void setDevRatioTol(const double dev_ratio_tol)
Sets tolerance in deviance change for early stopping.
void setScaling(const std::string &type)
Sets the scaling type.
void setRandomSeed(const int seed)
Sets the random seed.
void setDevChangeTol(const double dev_change_tol)
Sets tolerance in deviance change for early stopping.
const std::string & getLossType()
Get currently defined loss type.
void setReturnClusters(const bool return_clusters)
Sets the return clusters flag.
void setRelaxMaxOuterIterations(int max_it)
Sets the maximum number of outer (IRLS) iterations for the relaxed solver.
void setMaxClusters(const int max_clusters)
Sets the maximum number of clusters.
void setIntercept(bool intercept)
Sets the intercept flag.
bool getFitIntercept() const
Returns the intercept flag.
void setDiagnostics(const bool collect_diagnostics)
Toggles collection of diagnostics.
void setNormalization(const std::string &type)
Sets normalization type for the design matrix.
SlopeFit relax(const SlopeFit &fit, T &x, const Eigen::VectorXd &y_in, const double gamma=0.0, Eigen::VectorXd beta0=Eigen::VectorXd(0), Eigen::VectorXd beta=Eigen::VectorXd(0))
Relaxes a fitted SLOPE model.
int getAlphaEstimationMaxIterations() const
Gets the maximum number of iterations allowed for the alpha estimation procedure.
void setScreening(const std::string &screening_type)
Sets the type of feature screening used, which discards predictors that are unlikely to be active.
void setOscarParameters(const double theta1, const double theta2)
Sets OSCAR parameters.
void setLambdaType(const std::string &lambda_type)
Sets the lambda type for regularization weights.
void setAlphaType(const std::string &alpha_type)
Sets the alpha type.
SlopeFit fit(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, const double alpha=1.0, Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0), std::function< bool()> check_interrupt=defaultInterruptChecker)
Fits a single SLOPE regression model for given alpha and lambda values.
void setModifyX(const bool modify_x)
Controls if x should be modified-in-place.
void setCentering(const std::string &type)
Sets the center points for feature normalization.
void setHybridCdType(const std::string &cd_type)
Sets the frequence of proximal gradient descent steps.
bool hasRandomSeed() const
Checks if a random seed is set.
void setLearningRateDecr(double learning_rate_decr)
Sets the learning rate decrement.
void setLoss(const std::string &loss_type)
Sets the loss function type.
SlopePath relax(const SlopePath &path, T &x, const Eigen::VectorXd &y, const double gamma=0.0)
Relaxes a fitted SLOPE path.
void setScaling(const Eigen::VectorXd &x_scales)
Sets the scaling factors for feature normalization.
void setRelaxTol(double tol)
Sets the tolerance value for the relaxed SLOPE solver.
void setPathLength(int path_length)
Sets the path length.
void setCentering(const Eigen::VectorXd &x_centers)
Sets the center points for feature normalization.
void setHybridCdIterations(int cd_iterations)
Sets the frequence of proximal gradient descent steps.
void setTol(double tol)
Sets the tolerance value.
void setQ(double q)
Sets the q value.
void setUpdateClusters(bool update_clusters)
Sets the update clusters flag.
Class representing the Sorted L1 Norm.
double eval(const Eigen::VectorXd &beta, const Eigen::ArrayXd &lambda) const
Evaluates the Sorted L1 Norm.
double dualNorm(const Eigen::VectorXd &a, const Eigen::ArrayXd &lambda) const
Computes the dual norm of a vector.
Timer class for measuring elapsed time with high resolution.
void start()
Starts the timer by recording the current time point.
void resume()
Resumes the timer after a pause.
double elapsed() const
Returns the elapsed time in seconds since start() was called.
void pause()
Pauses the timer.
static void addWarning(WarningCode code, const std::string &message)
Log a new warning.
The declaration of the Clusters class.
Definitions of constants used in libslope.
Diagnostics for SLOPE optimization.
Functions for estimating noise level and regularization parameter alpha.
An implementation of the coordinate descent step in the hybrid algorithm for solving SLOPE.
Thread-safe warning logging facility for the slope library.
The declartion of the Objctive class and its subclasses, which represent the data-fitting part of the...
Mathematical support functions for the slope package.
constexpr double EPSILON
Small value used for floating-point comparisons to handle precision issues.
Namespace containing SLOPE regression implementation.
Eigen::ArrayXd regularizationPath(const Eigen::ArrayXd &alpha_in, const int path_length, double alpha_min_ratio, const double alpha_max)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
std::unique_ptr< SolverBase > setupSolver(const std::string &solver_type, const std::string &loss, JitNormalization jit_normalization, bool intercept, bool update_clusters, int cd_iterations, const std::string &cd_type, std::optional< int > random_seed=std::nullopt)
Factory function to create and configure a SLOPE solver.
std::unique_ptr< Loss > setupLoss(const std::string &loss)
Factory function to create the appropriate loss function based on the distribution family.
bool defaultInterruptChecker()
Default no-op interrupt checker.
std::unordered_set< double > unique(const Eigen::MatrixXd &x)
Create a set of unique values from an Eigen matrix.
double estimateNoise(Eigen::EigenBase< T > &x, Eigen::MatrixXd &y, const bool fit_intercept)
Estimates noise (standard error) in a linear model using OLS residuals.
T subsetCols(const Eigen::MatrixBase< T > &x, const std::vector< int > &indices)
Extract specified columns from a dense matrix.
std::unique_ptr< ScreeningRule > createScreeningRule(const std::string &screening_type)
Creates a screening rule based on the provided type.
Eigen::MatrixXd linearPredictor(const T &x, const std::vector< int > &active_set, const Eigen::VectorXd &beta0, const Eigen::VectorXd &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization, const bool intercept)
double computeDual(const Eigen::VectorXd &beta, const Eigen::MatrixXd &residual, const std::unique_ptr< Loss > &loss, const SortedL1Norm &sl1_norm, const Eigen::ArrayXd &lambda, const MatrixType &x, const Eigen::MatrixXd &y, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization &jit_normalization, const bool intercept)
Computes the dual objective function value for SLOPE optimization.
@ MAXIT_REACHED
Maximum iterations reached without convergence.
Eigen::ArrayXd lambdaSequence(const int p, const double q, const std::string &type, const int n=-1, const double theta1=1.0, const double theta2=1.0)
void updateGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::MatrixXd &residual, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::VectorXd &w, const JitNormalization jit_normalization)
void offsetGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::VectorXd &offset, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Eigen::VectorXd clusterGradient(Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
std::vector< int > activeSet(const Eigen::VectorXd &beta)
Identifies previously active variables.
JitNormalization normalize(Eigen::MatrixBase< T > &x, Eigen::VectorXd &x_centers, Eigen::VectorXd &x_scales, const std::string ¢ering_type, const std::string &scaling_type, const bool modify_x)
int whichMax(const T &x)
Returns the index of the maximum element in a container.
Functions to normalize the design matrix and rescale coefficients in case the design was normalized.
Functions for generating regularization sequences for SLOPE.
Screening rules for SLOPE regression optimization.
Factory function to create the appropriate loss function based on.
Factory function to create and configure a SLOPE solver.
SLOPE (Sorted L-One Penalized Estimation) fitting results.
Defines the SlopePath class for storing and accessing SLOPE regression solution paths.
The declaration of the SortedL1Norm class.
Simple high-resolution timer class for performance measurements.