26#include <Eigen/SparseCore>
389 Eigen::EigenBase<T>& x,
390 const Eigen::MatrixXd& y_in,
391 Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0),
392 Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0),
395 using Eigen::MatrixXd;
396 using Eigen::VectorXd;
398 const int n = x.rows();
399 const int p = x.cols();
401 const int INTERRUPT_FREQ = 100;
402 bool interrupt =
false;
404 if (n != y_in.rows()) {
405 throw std::invalid_argument(
406 "x and y_in must have the same number of rows");
410 throw std::invalid_argument(
"x must not contain NA, NaN, or Inf values");
413 if (!y_in.array().isFinite().all()) {
414 throw std::invalid_argument(
"y must not contain NA, NaN, or Inf values");
417 auto jit_normalization =
normalize(x.derived(),
420 this->centering_type,
424 std::unique_ptr<Loss> loss =
setupLoss(this->loss_type);
426 MatrixXd y = loss->preprocessResponse(y_in);
428 const int m = y.cols();
430 std::vector<int> full_set(p * m);
431 std::iota(full_set.begin(), full_set.end(), 0);
433 VectorXd beta0 = VectorXd::Zero(m);
434 VectorXd beta = VectorXd::Zero(p * m);
436 MatrixXd eta = MatrixXd::Zero(n, m);
438 if (this->intercept) {
439 beta0 = loss->link(y.colwise().mean()).transpose();
440 eta.rowwise() = beta0.transpose();
443 MatrixXd residual = loss->residual(eta, y);
444 VectorXd gradient(beta.size());
447 bool user_alpha = alpha.size() > 0;
448 bool user_lambda = lambda.size() > 0;
452 p * m, this->q, this->lambda_type, n, this->theta1, this->theta2);
454 if (lambda.size() != beta.size()) {
455 throw std::invalid_argument(
456 "lambda must be the same length as the number of coefficients");
458 if (lambda.minCoeff() < 0) {
459 throw std::invalid_argument(
"lambda must be non-negative");
461 if (!lambda.isFinite().all()) {
462 throw std::invalid_argument(
"lambda must be finite");
465 for (
int i = 1; i < lambda.size(); ++i) {
466 if (lambda(i) > lambda(i - 1)) {
467 throw std::invalid_argument(
468 "lambda must be in decreasing order");
481 this->update_clusters,
492 Eigen::VectorXd::Ones(n),
495 int alpha_max_ind =
whichMax(gradient.cwiseAbs());
496 double alpha_max = sl1_norm.
dualNorm(gradient, lambda);
498 if (alpha_type ==
"path" ||
499 (alpha_type ==
"estimate" && alpha_estimate != 1)) {
500 if (alpha_min_ratio < 0) {
501 alpha_min_ratio = n > gradient.size() ? 1e-4 : 1e-2;
506 path_length = alpha.size();
507 }
else if (alpha_type ==
"estimate" && alpha_estimate == -1) {
508 if (loss_type !=
"quadratic") {
509 throw std::invalid_argument(
"Automatic alpha estimation is only "
510 "available for the quadratic loss");
515 std::unique_ptr<ScreeningRule> screening_rule =
517 std::vector<int> working_set =
518 screening_rule->initialize(full_set, alpha_max_ind);
521 double null_deviance = loss->deviance(eta, y);
522 double dev_prev = null_deviance;
526 double alpha_prev = std::max(alpha_max, alpha(0));
528 std::vector<SlopeFit> fits;
531 for (
int path_step = 0; path_step < this->path_length; ++path_step) {
533 bool local_interrupt =
false;
535#pragma omp critical(check_interrupt)
538 local_interrupt = check_interrupt();
540 if (local_interrupt) {
545 double alpha_curr = alpha(path_step);
547 assert(alpha_curr <= alpha_prev &&
"Alpha must be decreasing");
549 Eigen::ArrayXd lambda_curr = alpha_curr * lambda;
550 Eigen::ArrayXd lambda_prev = alpha_prev * lambda;
552 std::vector<double> duals, primals, time;
564 Eigen::VectorXd::Ones(x.rows()),
567 working_set = screening_rule->screen(
568 gradient, lambda_curr, lambda_prev, beta, full_set);
572 for (; it < this->max_it; ++it, ++total_it) {
574 residual = loss->residual(eta, y);
581 Eigen::VectorXd::Ones(n),
584 double primal = loss->loss(eta, y) +
585 sl1_norm.
eval(beta(working_set),
586 lambda_curr.head(working_set.size()));
588 MatrixXd theta = residual;
591 VectorXd dual_gradient = gradient;
596 if (this->intercept) {
597 VectorXd theta_mean = theta.colwise().mean();
598 theta.rowwise() -= theta_mean.transpose();
610 double dual_norm = sl1_norm.
dualNorm(
611 dual_gradient(working_set), lambda_curr.head(working_set.size()));
612 theta.array() /= std::max(1.0, dual_norm);
614 double dual = loss->dual(theta, y, Eigen::VectorXd::Ones(n));
616 if (collect_diagnostics) {
631 time.emplace_back(timer.
elapsed());
632 primals.emplace_back(primal);
633 duals.emplace_back(true_dual);
636 double dual_gap = primal - dual;
638 assert(dual_gap > -1e-6 &&
"Dual gap should be positive");
642 if (dual_gap <= tol_scaled || it == this->max_it) {
644 screening_rule->checkKktViolations(gradient,
661 if (it % INTERRUPT_FREQ == 0) {
662 bool local_interrupt =
false;
664#pragma omp critical(check_interrupt)
667 local_interrupt = check_interrupt();
669 if (local_interrupt) {
689 if (it == this->max_it) {
692 "Maximum number of iterations reached at step = " +
693 std::to_string(path_step) +
".");
696 alpha_prev = alpha_curr;
699 double dev = loss->deviance(eta, y);
700 double dev_ratio = 1 - dev / null_deviance;
701 double dev_change = path_step == 0 ? 1.0 : 1 - dev / dev_prev;
706 if (return_clusters) {
711 beta.reshaped(p, m).sparseView(),
721 this->centering_type,
727 fits.emplace_back(std::move(
fit));
734 int n_unique =
unique(beta.cwiseAbs()).size();
735 if (dev_ratio > dev_ratio_tol || dev_change < dev_change_tol ||
736 n_unique >= this->max_clusters.value_or(n + 1)) {
764 Eigen::EigenBase<T>& x,
765 const Eigen::MatrixXd& y_in,
766 const double alpha = 1.0,
767 Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0),
770 Eigen::ArrayXd alpha_arr(1);
771 alpha_arr(0) = alpha;
772 SlopePath res =
path(x, y_in, alpha_arr, lambda, check_interrupt);
803 Eigen::EigenBase<T>& x,
811 Slope model_copy = *
this;
814 std::vector<int> selected;
815 Eigen::ArrayXd alpha(1);
821 this->alpha_estimate = alpha(0);
823 model_copy.
path(x, y, alpha, Eigen::ArrayXd::Zero(0), check_interrupt);
825 for (
int it = 0; it < this->alpha_est_maxit; ++it) {
826 T x_selected =
subsetCols(x.derived(), selected);
828 std::vector<int> selected_prev = selected;
831 alpha(0) =
estimateNoise(x_selected, y, this->intercept) / n;
832 this->alpha_estimate = alpha(0);
834 result = model_copy.
path(
835 x, y, alpha, Eigen::ArrayXd::Zero(0), check_interrupt);
836 auto coefs = result.
getCoefs().back();
838 for (
typename Eigen::SparseMatrix<double>::InnerIterator it(coefs, 0);
841 selected.emplace_back(it.row());
844 if (selected == selected_prev) {
848 if (
static_cast<int>(selected.size()) >= n + this->intercept) {
849 throw std::runtime_error(
850 "selected >= n - 1 variables, cannot estimate variance");
856 "Maximum iterations reached in alpha estimation");
879 const Eigen::VectorXd& y_in,
880 const double gamma = 0.0,
881 Eigen::VectorXd beta0 = Eigen::VectorXd(0),
882 Eigen::VectorXd beta = Eigen::VectorXd(0))
884 using Eigen::MatrixXd;
885 using Eigen::VectorXd;
890 if (beta0.size() == 0) {
894 if (beta.size() == 0) {
902 std::vector<double> primals, duals, time;
905 auto jit_normalization =
906 normalize(x, x_centers, x_scales, centering_type, scaling_type, modify_x);
908 bool update_clusters =
false;
910 std::unique_ptr<Loss> loss =
setupLoss(this->loss_type);
912 MatrixXd y = loss->preprocessResponse(y_in);
916 Eigen::ArrayXd lambda_cumsum_relax = Eigen::ArrayXd::Zero(p * m + 1);
928 VectorXd gradient = VectorXd::Zero(p * m);
929 MatrixXd residual(n, m);
930 MatrixXd working_residual(n, m);
932 MatrixXd w = MatrixXd::Ones(n, m);
933 MatrixXd w_ones = MatrixXd::Ones(n, m);
940 if (random_seed.has_value()) {
941 rng.seed(*random_seed);
943 rng.seed(std::random_device{}());
948 for (
int irls_it = 0; irls_it < max_it_outer_relax; irls_it++) {
949 residual = loss->residual(eta, y);
951 if (collect_diagnostics) {
952 primals.push_back(loss->loss(eta, y));
953 duals.push_back(0.0);
954 time.push_back(timer.
elapsed());
966 double norm_grad = cluster_gradient.lpNorm<Eigen::Infinity>();
968 if (norm_grad < tol_relax) {
972 loss->updateWeightsAndWorkingResponse(w, z, eta, y);
973 working_residual = eta - z;
975 for (
int inner_it = 0; inner_it < max_it_inner_relax; ++inner_it) {
993 if (max_abs_gradient < tol_relax) {
998 eta = working_residual + z;
1000 if (irls_it == max_it_outer_relax) {
1002 "Maximum number of IRLS iterations reached.");
1006 double dev = loss->deviance(eta, y);
1011 beta = (1 - gamma) * beta + gamma * old_coefs;
1015 beta.reshaped(p, m).sparseView(),
1046 template<
typename T>
1049 const Eigen::VectorXd& y,
1050 const double gamma = 0.0)
1052 std::vector<SlopeFit> fits;
1054 for (
size_t i = 0; i <
path.
size(); i++) {
1060 auto relaxed_fit =
relax(
path(i), x, y, gamma);
1062 fits.emplace_back(relaxed_fit);
1070 bool collect_diagnostics =
false;
1071 bool intercept =
true;
1072 bool modify_x =
false;
1073 bool return_clusters =
true;
1074 bool update_clusters =
true;
1075 double alpha_min_ratio = -1;
1076 double dev_change_tol = 1e-5;
1077 double dev_ratio_tol = 0.999;
1078 double learning_rate_decr = 0.5;
1080 double theta1 = 1.0;
1081 double theta2 = 0.5;
1083 double tol_relax = 1e-4;
1084 double alpha_estimate = -1;
1085 int alpha_est_maxit = 1000;
1086 int cd_iterations = 10;
1088 int max_it_inner_relax = 1e5;
1089 int max_it_outer_relax = 50;
1090 int path_length = 100;
1091 std::optional<int> max_clusters = std::nullopt;
1092 std::optional<int> random_seed = 0;
1093 std::string alpha_type =
"path";
1094 std::string cd_type =
"permuted";
1095 std::string centering_type =
"mean";
1096 std::string lambda_type =
"bh";
1097 std::string loss_type =
"quadratic";
1098 std::string scaling_type =
"sd";
1099 std::string screening_type =
"strong";
1100 std::string solver_type =
"auto";
1103 Eigen::VectorXd x_centers;
1104 Eigen::VectorXd x_scales;
Representation of the clusters in SLOPE.
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
A class representing the results of SLOPE (Sorted L1 Penalized Estimation) fitting.
Eigen::VectorXd getIntercepts(const bool original_scale=true) const
Gets the intercept terms for this SLOPE fit.
const Eigen::ArrayXd & getLambda() const
Gets the lambda (regularization) parameter used.
double getAlpha() const
Gets the alpha (mixing) parameter used.
Eigen::SparseMatrix< double > getCoefs(const bool original_scale=true) const
Gets the sparse coefficient matrix for this fit.
double getNullDeviance() const
Gets the null model deviance.
Container class for SLOPE regression solution paths.
std::size_t size() const
Gets the number of solutions in the path.
std::vector< Eigen::SparseMatrix< double > > getCoefs(const bool original_scale=true) const
Returns the vector of coefficient matrices for each solution in the path.
void setSolver(const std::string &solver)
Sets the numerical solver used to fit the model.
void setAlphaMinRatio(double alpha_min_ratio)
Sets the alpha min ratio.
void setMaxIterations(int max_it)
Sets the maximum number of iterations.
void setAlphaEstimationMaxIterations(const int alpha_est_maxit)
Sets the maximum number of iterations for the alpha estimation procedure.
void setRelaxMaxInnerIterations(int max_it)
Sets the maximum number of inner iterations for the relaxed solver.
void setRandomSeed(std::optional< int > seed)
Sets the random seed.
int getRandomSeed() const
Gets the random seed.
SlopePath path(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0), std::function< bool()> check_interrupt=defaultInterruptChecker)
Computes SLOPE regression solution path for multiple alpha and lambda values.
SlopePath estimateAlpha(Eigen::EigenBase< T > &x, Eigen::MatrixXd &y, std::function< bool()> check_interrupt=defaultInterruptChecker)
Estimates the regularization parameter alpha for SLOPE regression.
void setDevRatioTol(const double dev_ratio_tol)
Sets tolerance in deviance change for early stopping.
void setScaling(const std::string &type)
Sets the scaling type.
void setRandomSeed(const int seed)
Sets the random seed.
void setDevChangeTol(const double dev_change_tol)
Sets tolerance in deviance change for early stopping.
const std::string & getLossType()
Get currently defined loss type.
void setReturnClusters(const bool return_clusters)
Sets the return clusters flag.
void setRelaxMaxOuterIterations(int max_it)
Sets the maximum number of outer (IRLS) iterations for the relaxed solver.
void setMaxClusters(const int max_clusters)
Sets the maximum number of clusters.
void setIntercept(bool intercept)
Sets the intercept flag.
bool getFitIntercept() const
Returns the intercept flag.
void setDiagnostics(const bool collect_diagnostics)
Toggles collection of diagnostics.
void setNormalization(const std::string &type)
Sets normalization type for the design matrix.
SlopeFit relax(const SlopeFit &fit, T &x, const Eigen::VectorXd &y_in, const double gamma=0.0, Eigen::VectorXd beta0=Eigen::VectorXd(0), Eigen::VectorXd beta=Eigen::VectorXd(0))
Relaxes a fitted SLOPE model.
int getAlphaEstimationMaxIterations() const
Gets the maximum number of iterations allowed for the alpha estimation procedure.
void setScreening(const std::string &screening_type)
Sets the type of feature screening used, which discards predictors that are unlikely to be active.
void setOscarParameters(const double theta1, const double theta2)
Sets OSCAR parameters.
void setLambdaType(const std::string &lambda_type)
Sets the lambda type for regularization weights.
void setAlphaType(const std::string &alpha_type)
Sets the alpha type.
SlopeFit fit(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, const double alpha=1.0, Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0), std::function< bool()> check_interrupt=defaultInterruptChecker)
Fits a single SLOPE regression model for given alpha and lambda values.
void setModifyX(const bool modify_x)
Controls if x should be modified-in-place.
void setCentering(const std::string &type)
Sets the center points for feature normalization.
void setHybridCdType(const std::string &cd_type)
Sets the frequence of proximal gradient descent steps.
bool hasRandomSeed() const
Checks if a random seed is set.
void setLearningRateDecr(double learning_rate_decr)
Sets the learning rate decrement.
void setLoss(const std::string &loss_type)
Sets the loss function type.
SlopePath relax(const SlopePath &path, T &x, const Eigen::VectorXd &y, const double gamma=0.0)
Relaxes a fitted SLOPE path.
void setScaling(const Eigen::VectorXd &x_scales)
Sets the scaling factors for feature normalization.
void setRelaxTol(double tol)
Sets the tolerance value for the relaxed SLOPE solver.
void setPathLength(int path_length)
Sets the path length.
void setCentering(const Eigen::VectorXd &x_centers)
Sets the center points for feature normalization.
void setHybridCdIterations(int cd_iterations)
Sets the frequence of proximal gradient descent steps.
void setTol(double tol)
Sets the tolerance value.
void setQ(double q)
Sets the q value.
void setUpdateClusters(bool update_clusters)
Sets the update clusters flag.
Class representing the Sorted L1 Norm.
double eval(const Eigen::VectorXd &beta, const Eigen::ArrayXd &lambda) const
Evaluates the Sorted L1 Norm.
double dualNorm(const Eigen::VectorXd &a, const Eigen::ArrayXd &lambda) const
Computes the dual norm of a vector.
Timer class for measuring elapsed time with high resolution.
void start()
Starts the timer by recording the current time point.
void resume()
Resumes the timer after a pause.
double elapsed() const
Returns the elapsed time in seconds since start() was called.
void pause()
Pauses the timer.
static void addWarning(WarningCode code, const std::string &message)
Log a new warning.
The declaration of the Clusters class.
Definitions of constants used in libslope.
Diagnostics for SLOPE optimization.
Functions for estimating noise level and regularization parameter alpha.
An implementation of the coordinate descent step in the hybrid algorithm for solving SLOPE.
Thread-safe warning logging facility for the slope library.
The declartion of the Objctive class and its subclasses, which represent the data-fitting part of the...
Mathematical support functions for the slope package.
constexpr double EPSILON
Small value used for floating-point comparisons to handle precision issues.
Namespace containing SLOPE regression implementation.
Eigen::ArrayXd regularizationPath(const Eigen::ArrayXd &alpha_in, const int path_length, double alpha_min_ratio, const double alpha_max)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
std::unique_ptr< SolverBase > setupSolver(const std::string &solver_type, const std::string &loss, JitNormalization jit_normalization, bool intercept, bool update_clusters, int cd_iterations, const std::string &cd_type, std::optional< int > random_seed=std::nullopt)
Factory function to create and configure a SLOPE solver.
std::unique_ptr< Loss > setupLoss(const std::string &loss)
Factory function to create the appropriate loss function based on the distribution family.
bool defaultInterruptChecker()
Default no-op interrupt checker.
std::unordered_set< double > unique(const Eigen::MatrixXd &x)
Create a set of unique values from an Eigen matrix.
double estimateNoise(Eigen::EigenBase< T > &x, Eigen::MatrixXd &y, const bool fit_intercept)
Estimates noise (standard error) in a linear model using OLS residuals.
T subsetCols(const Eigen::MatrixBase< T > &x, const std::vector< int > &indices)
Extract specified columns from a dense matrix.
std::unique_ptr< ScreeningRule > createScreeningRule(const std::string &screening_type)
Creates a screening rule based on the provided type.
Eigen::MatrixXd linearPredictor(const T &x, const std::vector< int > &active_set, const Eigen::VectorXd &beta0, const Eigen::VectorXd &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization, const bool intercept)
double computeDual(const Eigen::VectorXd &beta, const Eigen::MatrixXd &residual, const std::unique_ptr< Loss > &loss, const SortedL1Norm &sl1_norm, const Eigen::ArrayXd &lambda, const MatrixType &x, const Eigen::MatrixXd &y, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization &jit_normalization, const bool intercept)
Computes the dual objective function value for SLOPE optimization.
@ MAXIT_REACHED
Maximum iterations reached without convergence.
Eigen::ArrayXd lambdaSequence(const int p, const double q, const std::string &type, const int n=-1, const double theta1=1.0, const double theta2=1.0)
void updateGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::MatrixXd &residual, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::VectorXd &w, const JitNormalization jit_normalization)
void offsetGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::VectorXd &offset, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Eigen::VectorXd clusterGradient(Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
std::vector< int > activeSet(const Eigen::VectorXd &beta)
Identifies previously active variables.
JitNormalization normalize(Eigen::MatrixBase< T > &x, Eigen::VectorXd &x_centers, Eigen::VectorXd &x_scales, const std::string ¢ering_type, const std::string &scaling_type, const bool modify_x)
int whichMax(const T &x)
Returns the index of the maximum element in a container.
bool isFinite(const Eigen::DenseBase< Derived > &x)
Check if all elements in a dense matrix are finite.
Functions to normalize the design matrix and rescale coefficients in case the design was normalized.
Functions for generating regularization sequences for SLOPE.
Screening rules for SLOPE regression optimization.
Factory function to create the appropriate loss function based on.
Factory function to create and configure a SLOPE solver.
SLOPE (Sorted L-One Penalized Estimation) fitting results.
Defines the SlopePath class for storing and accessing SLOPE regression solution paths.
The declaration of the SortedL1Norm class.
Simple high-resolution timer class for performance measurements.