26#include <Eigen/SparseCore> 
  377                 const Eigen::MatrixXd& y_in,
 
  378                 Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0),
 
  379                 Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0))
 
  381    using Eigen::MatrixXd;
 
  382    using Eigen::VectorXd;
 
  384    const int n = x.rows();
 
  385    const int p = x.cols();
 
  387    if (n != y_in.rows()) {
 
  388      throw std::invalid_argument(
 
  389        "x and y_in must have the same number of rows");
 
  392    auto jit_normalization = 
normalize(x.derived(),
 
  395                                       this->centering_type,
 
  399    std::unique_ptr<Loss> loss = 
setupLoss(this->loss_type);
 
  401    MatrixXd y = loss->preprocessResponse(y_in);
 
  403    const int m = y.cols();
 
  405    std::vector<int> full_set(p * m);
 
  406    std::iota(full_set.begin(), full_set.end(), 0);
 
  408    VectorXd beta0 = VectorXd::Zero(m);
 
  409    VectorXd beta = VectorXd::Zero(p * m);
 
  411    MatrixXd eta = MatrixXd::Zero(n, m); 
 
  413    if (this->intercept) {
 
  414      beta0 = loss->link(y.colwise().mean()).transpose();
 
  415      eta.rowwise() = beta0.transpose();
 
  418    MatrixXd residual = loss->residual(eta, y);
 
  419    VectorXd gradient(beta.size());
 
  422    bool user_alpha = alpha.size() > 0;
 
  423    bool user_lambda = lambda.size() > 0;
 
  427        p * m, this->q, this->lambda_type, n, this->theta1, this->theta2);
 
  429      if (lambda.size() != beta.size()) {
 
  430        throw std::invalid_argument(
 
  431          "lambda must be the same length as the number of coefficients");
 
  433      if (lambda.minCoeff() < 0) {
 
  434        throw std::invalid_argument(
"lambda must be non-negative");
 
  436      if (!lambda.isFinite().all()) {
 
  437        throw std::invalid_argument(
"lambda must be finite");
 
  449                              this->update_clusters,
 
  460                   Eigen::VectorXd::Ones(n),
 
  463    int alpha_max_ind = 
whichMax(gradient.cwiseAbs());
 
  464    double alpha_max = sl1_norm.
dualNorm(gradient, lambda);
 
  466    if (alpha_type == 
"path" ||
 
  467        (alpha_type == 
"estimate" && alpha_estimate != 1)) {
 
  468      if (alpha_min_ratio < 0) {
 
  469        alpha_min_ratio = n > gradient.size() ? 1e-4 : 1e-2;
 
  474      path_length = alpha.size();
 
  475    } 
else if (alpha_type == 
"estimate" && alpha_estimate == -1) {
 
  476      if (loss_type != 
"quadratic") {
 
  477        throw std::invalid_argument(
"Automatic alpha estimation is only " 
  478                                    "available for the quadratic loss");
 
  483    std::unique_ptr<ScreeningRule> screening_rule =
 
  485    std::vector<int> working_set =
 
  486      screening_rule->initialize(full_set, alpha_max_ind);
 
  489    double null_deviance = loss->deviance(eta, y);
 
  490    double dev_prev = null_deviance;
 
  494    double alpha_prev = std::max(alpha_max, alpha(0));
 
  496    std::vector<SlopeFit> fits;
 
  499    for (
int path_step = 0; path_step < this->path_length; ++path_step) {
 
  500      double alpha_curr = alpha(path_step);
 
  502      assert(alpha_curr <= alpha_prev && 
"Alpha must be decreasing");
 
  504      Eigen::ArrayXd lambda_curr = alpha_curr * lambda;
 
  505      Eigen::ArrayXd lambda_prev = alpha_prev * lambda;
 
  507      std::vector<double> duals, primals, time;
 
  519                     Eigen::VectorXd::Ones(x.rows()),
 
  522      working_set = screening_rule->screen(
 
  523        gradient, lambda_curr, lambda_prev, beta, full_set);
 
  527      for (; it < this->max_it; ++it, ++total_it) {
 
  529        residual = loss->residual(eta, y);
 
  536                       Eigen::VectorXd::Ones(n),
 
  539        double primal = loss->loss(eta, y) +
 
  540                        sl1_norm.
eval(beta(working_set),
 
  541                                      lambda_curr.head(working_set.size()));
 
  543        MatrixXd theta = residual;
 
  546        VectorXd dual_gradient = gradient;
 
  551        if (this->intercept) {
 
  552          VectorXd theta_mean = theta.colwise().mean();
 
  553          theta.rowwise() -= theta_mean.transpose();
 
  565        double dual_norm = sl1_norm.
dualNorm(
 
  566          dual_gradient(working_set), lambda_curr.head(working_set.size()));
 
  567        theta.array() /= std::max(1.0, dual_norm);
 
  569        double dual = loss->dual(theta, y, Eigen::VectorXd::Ones(n));
 
  571        if (collect_diagnostics) {
 
  586          time.emplace_back(timer.
elapsed());
 
  587          primals.emplace_back(primal);
 
  588          duals.emplace_back(true_dual);
 
  591        double dual_gap = primal - dual;
 
  593        assert(dual_gap > -1e-6 && 
"Dual gap should be positive");
 
  597        if (dual_gap <= tol_scaled || it == this->max_it) {
 
  599            screening_rule->checkKktViolations(gradient,
 
  630      if (it == this->max_it) {
 
  633          "Maximum number of iterations reached at step = " +
 
  634            std::to_string(path_step) + 
".");
 
  637      alpha_prev = alpha_curr;
 
  640      double dev = loss->deviance(eta, y);
 
  641      double dev_ratio = 1 - dev / null_deviance;
 
  642      double dev_change = path_step == 0 ? 1.0 : 1 - dev / dev_prev;
 
  647      if (return_clusters) {
 
  652                    beta.reshaped(p, m).sparseView(),
 
  662                    this->centering_type,
 
  668      fits.emplace_back(std::move(
fit));
 
  671        int n_unique = 
unique(beta.cwiseAbs()).size();
 
  672        if (dev_ratio > dev_ratio_tol || dev_change < dev_change_tol ||
 
  673            n_unique >= this->max_clusters.value_or(n + 1)) {
 
 
  699               const Eigen::MatrixXd& y_in,
 
  700               const double alpha = 1.0,
 
  701               Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0))
 
  703    Eigen::ArrayXd alpha_arr(1);
 
  704    alpha_arr(0) = alpha;
 
 
  740    Slope model_copy = *
this;
 
  743    std::vector<int> selected;
 
  744    Eigen::ArrayXd alpha(1);
 
  750      this->alpha_estimate = alpha(0);
 
  751      result = model_copy.
path(x, y, alpha);
 
  753      for (
int it = 0; it < this->alpha_est_maxit; ++it) {
 
  754        T x_selected = 
subsetCols(x.derived(), selected);
 
  756        std::vector<int> selected_prev = selected;
 
  759        alpha(0) = 
estimateNoise(x_selected, y, this->intercept) / n;
 
  760        this->alpha_estimate = alpha(0);
 
  762        result = model_copy.
path(x, y, alpha);
 
  763        auto coefs = result.
getCoefs().back();
 
  765        for (
typename Eigen::SparseMatrix<double>::InnerIterator it(coefs, 0);
 
  768          selected.emplace_back(it.row());
 
  771        if (selected == selected_prev) {
 
  775        if (
static_cast<int>(selected.size()) >= n + this->intercept) {
 
  776          throw std::runtime_error(
 
  777            "selected >= n - 1 variables, cannot estimate variance");
 
  783        "Maximum iterations reached in alpha estimation");
 
 
  806                 const Eigen::VectorXd& y_in,
 
  807                 const double gamma = 0.0,
 
  808                 Eigen::VectorXd beta0 = Eigen::VectorXd(0),
 
  809                 Eigen::VectorXd beta = Eigen::VectorXd(0))
 
  811    using Eigen::MatrixXd;
 
  812    using Eigen::VectorXd;
 
  817    if (beta0.size() == 0) {
 
  821    if (beta.size() == 0) {
 
  829    std::vector<double> primals, duals, time;
 
  832    auto jit_normalization =
 
  833      normalize(x, x_centers, x_scales, centering_type, scaling_type, modify_x);
 
  835    bool update_clusters = 
false;
 
  837    std::unique_ptr<Loss> loss = 
setupLoss(this->loss_type);
 
  839    MatrixXd y = loss->preprocessResponse(y_in);
 
  843    Eigen::ArrayXd lambda_cumsum_relax = Eigen::ArrayXd::Zero(p * m + 1);
 
  855    VectorXd gradient = VectorXd::Zero(p * m);
 
  856    MatrixXd residual(n, m);
 
  857    MatrixXd working_residual(n, m);
 
  859    MatrixXd w = MatrixXd::Ones(n, m);
 
  860    MatrixXd w_ones = MatrixXd::Ones(n, m);
 
  867    if (random_seed.has_value()) {
 
  868      rng.seed(*random_seed);
 
  870      rng.seed(std::random_device{}());
 
  875    for (
int irls_it = 0; irls_it < max_it_outer_relax; irls_it++) {
 
  876      residual = loss->residual(eta, y);
 
  878      if (collect_diagnostics) {
 
  879        primals.push_back(loss->loss(eta, y));
 
  880        duals.push_back(0.0);
 
  881        time.push_back(timer.
elapsed());
 
  893      double norm_grad = cluster_gradient.lpNorm<Eigen::Infinity>();
 
  895      if (norm_grad < tol_relax) {
 
  899      loss->updateWeightsAndWorkingResponse(w, z, eta, y);
 
  900      working_residual = eta - z;
 
  902      for (
int inner_it = 0; inner_it < max_it_inner_relax; ++inner_it) {
 
  920        if (max_abs_gradient < tol_relax) {
 
  925      eta = working_residual + z;
 
  927      if (irls_it == max_it_outer_relax) {
 
  929                                  "Maximum number of IRLS iterations reached.");
 
  933    double dev = loss->deviance(eta, y);
 
  938      beta = (1 - gamma) * beta + gamma * old_coefs;
 
  942                      beta.reshaped(p, m).sparseView(),
 
 
  976                  const Eigen::VectorXd& y,
 
  977                  const double gamma = 0.0)
 
  979    std::vector<SlopeFit> fits;
 
  981    for (
size_t i = 0; i < 
path.
size(); i++) {
 
  987      auto relaxed_fit = 
relax(
path(i), x, y, gamma);
 
  989      fits.emplace_back(relaxed_fit);
 
 
  997  bool collect_diagnostics = 
false;
 
  998  bool intercept = 
true;
 
  999  bool modify_x = 
false;
 
 1000  bool return_clusters = 
true;
 
 1001  bool update_clusters = 
true;
 
 1002  double alpha_min_ratio = -1;
 
 1003  double dev_change_tol = 1e-5;
 
 1004  double dev_ratio_tol = 0.999;
 
 1005  double learning_rate_decr = 0.5;
 
 1007  double theta1 = 1.0;
 
 1008  double theta2 = 0.5;
 
 1010  double tol_relax = 1e-4;
 
 1011  double alpha_estimate = -1;
 
 1012  int alpha_est_maxit = 1000;
 
 1013  int cd_iterations = 10;
 
 1015  int max_it_inner_relax = 1e5;
 
 1016  int max_it_outer_relax = 50;
 
 1017  int path_length = 100;
 
 1018  std::optional<int> max_clusters = std::nullopt;
 
 1019  std::optional<int> random_seed = 0;
 
 1020  std::string alpha_type = 
"path";
 
 1021  std::string cd_type = 
"permuted";
 
 1022  std::string centering_type = 
"mean";
 
 1023  std::string lambda_type = 
"bh";
 
 1024  std::string loss_type = 
"quadratic";
 
 1025  std::string scaling_type = 
"sd";
 
 1026  std::string screening_type = 
"strong";
 
 1027  std::string solver_type = 
"auto";
 
 1030  Eigen::VectorXd x_centers;
 
 1031  Eigen::VectorXd x_scales;
 
 
Representation of the clusters in SLOPE.
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
A class representing the results of SLOPE (Sorted L1 Penalized Estimation) fitting.
Eigen::VectorXd getIntercepts(const bool original_scale=true) const
Gets the intercept terms for this SLOPE fit.
const Eigen::ArrayXd & getLambda() const
Gets the lambda (regularization) parameter used.
double getAlpha() const
Gets the alpha (mixing) parameter used.
Eigen::SparseMatrix< double > getCoefs(const bool original_scale=true) const
Gets the sparse coefficient matrix for this fit.
double getNullDeviance() const
Gets the null model deviance.
Container class for SLOPE regression solution paths.
std::size_t size() const
Gets the number of solutions in the path.
std::vector< Eigen::SparseMatrix< double > > getCoefs(const bool original_scale=true) const
Returns the vector of coefficient matrices for each solution in the path.
void setSolver(const std::string &solver)
Sets the numerical solver used to fit the model.
void setAlphaMinRatio(double alpha_min_ratio)
Sets the alpha min ratio.
void setMaxIterations(int max_it)
Sets the maximum number of iterations.
void setAlphaEstimationMaxIterations(const int alpha_est_maxit)
Sets the maximum number of iterations for the alpha estimation procedure.
void setRelaxMaxInnerIterations(int max_it)
Sets the maximum number of inner iterations for the relaxed solver.
void setRandomSeed(std::optional< int > seed)
Sets the random seed.
int getRandomSeed() const
Gets the random seed.
void setDevRatioTol(const double dev_ratio_tol)
Sets tolerance in deviance change for early stopping.
void setScaling(const std::string &type)
Sets the scaling type.
void setRandomSeed(const int seed)
Sets the random seed.
void setDevChangeTol(const double dev_change_tol)
Sets tolerance in deviance change for early stopping.
const std::string & getLossType()
Get currently defined loss type.
SlopePath path(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Computes SLOPE regression solution path for multiple alpha and lambda values.
SlopePath estimateAlpha(Eigen::EigenBase< T > &x, Eigen::MatrixXd &y)
Estimates the regularization parameter alpha for SLOPE regression.
void setReturnClusters(const bool return_clusters)
Sets the return clusters flag.
void setRelaxMaxOuterIterations(int max_it)
Sets the maximum number of outer (IRLS) iterations for the relaxed solver.
void setMaxClusters(const int max_clusters)
Sets the maximum number of clusters.
void setIntercept(bool intercept)
Sets the intercept flag.
bool getFitIntercept() const
Returns the intercept flag.
void setDiagnostics(const bool collect_diagnostics)
Toggles collection of diagnostics.
void setNormalization(const std::string &type)
Sets normalization type for the design matrix.
SlopeFit relax(const SlopeFit &fit, T &x, const Eigen::VectorXd &y_in, const double gamma=0.0, Eigen::VectorXd beta0=Eigen::VectorXd(0), Eigen::VectorXd beta=Eigen::VectorXd(0))
Relaxes a fitted SLOPE model.
int getAlphaEstimationMaxIterations() const
Gets the maximum number of iterations allowed for the alpha estimation procedure.
void setScreening(const std::string &screening_type)
Sets the type of feature screening used, which discards predictors that are unlikely to be active.
void setOscarParameters(const double theta1, const double theta2)
Sets OSCAR parameters.
void setLambdaType(const std::string &lambda_type)
Sets the lambda type for regularization weights.
void setAlphaType(const std::string &alpha_type)
Sets the alpha type.
void setModifyX(const bool modify_x)
Controls if x should be modified-in-place.
void setCentering(const std::string &type)
Sets the center points for feature normalization.
SlopeFit fit(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, const double alpha=1.0, Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Fits a single SLOPE regression model for given alpha and lambda values.
void setHybridCdType(const std::string &cd_type)
Sets the frequence of proximal gradient descent steps.
bool hasRandomSeed() const
Checks if a random seed is set.
void setLearningRateDecr(double learning_rate_decr)
Sets the learning rate decrement.
void setLoss(const std::string &loss_type)
Sets the loss function type.
SlopePath relax(const SlopePath &path, T &x, const Eigen::VectorXd &y, const double gamma=0.0)
Relaxes a fitted SLOPE path.
void setScaling(const Eigen::VectorXd &x_scales)
Sets the scaling factors for feature normalization.
void setRelaxTol(double tol)
Sets the tolerance value for the relaxed SLOPE solver.
void setPathLength(int path_length)
Sets the path length.
void setCentering(const Eigen::VectorXd &x_centers)
Sets the center points for feature normalization.
void setHybridCdIterations(int cd_iterations)
Sets the frequence of proximal gradient descent steps.
void setTol(double tol)
Sets the tolerance value.
void setQ(double q)
Sets the q value.
void setUpdateClusters(bool update_clusters)
Sets the update clusters flag.
Class representing the Sorted L1 Norm.
double eval(const Eigen::VectorXd &beta, const Eigen::ArrayXd &lambda) const
Evaluates the Sorted L1 Norm.
double dualNorm(const Eigen::VectorXd &a, const Eigen::ArrayXd &lambda) const
Computes the dual norm of a vector.
Timer class for measuring elapsed time with high resolution.
void start()
Starts the timer by recording the current time point.
void resume()
Resumes the timer after a pause.
double elapsed() const
Returns the elapsed time in seconds since start() was called.
void pause()
Pauses the timer.
static void addWarning(WarningCode code, const std::string &message)
Log a new warning.
The declaration of the Clusters class.
Definitions of constants used in libslope.
Diagnostics for SLOPE optimization.
Functions for estimating noise level and regularization parameter alpha.
An implementation of the coordinate descent step in the hybrid algorithm for solving SLOPE.
Thread-safe warning logging facility for the slope library.
The declartion of the Objctive class and its subclasses, which represent the data-fitting part of the...
Mathematical support functions for the slope package.
constexpr double EPSILON
Small value used for floating-point comparisons to handle precision issues.
Namespace containing SLOPE regression implementation.
Eigen::ArrayXd regularizationPath(const Eigen::ArrayXd &alpha_in, const int path_length, double alpha_min_ratio, const double alpha_max)
double coordinateDescent(Eigen::VectorXd &beta0, Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const Eigen::ArrayXd &lambda_cumsum, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept, const JitNormalization jit_normalization, const bool update_clusters, std::mt19937 &rng, const std::string &cd_type="cyclical")
std::unique_ptr< SolverBase > setupSolver(const std::string &solver_type, const std::string &loss, JitNormalization jit_normalization, bool intercept, bool update_clusters, int cd_iterations, const std::string &cd_type, std::optional< int > random_seed=std::nullopt)
Factory function to create and configure a SLOPE solver.
std::unique_ptr< Loss > setupLoss(const std::string &loss)
Factory function to create the appropriate loss function based on the distribution family.
std::unordered_set< double > unique(const Eigen::MatrixXd &x)
Create a set of unique values from an Eigen matrix.
double estimateNoise(Eigen::EigenBase< T > &x, Eigen::MatrixXd &y, const bool fit_intercept)
Estimates noise (standard error) in a linear model using OLS residuals.
T subsetCols(const Eigen::MatrixBase< T > &x, const std::vector< int > &indices)
Extract specified columns from a dense matrix.
std::unique_ptr< ScreeningRule > createScreeningRule(const std::string &screening_type)
Creates a screening rule based on the provided type.
Eigen::MatrixXd linearPredictor(const T &x, const std::vector< int > &active_set, const Eigen::VectorXd &beta0, const Eigen::VectorXd &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization, const bool intercept)
double computeDual(const Eigen::VectorXd &beta, const Eigen::MatrixXd &residual, const std::unique_ptr< Loss > &loss, const SortedL1Norm &sl1_norm, const Eigen::ArrayXd &lambda, const MatrixType &x, const Eigen::MatrixXd &y, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization &jit_normalization, const bool intercept)
Computes the dual objective function value for SLOPE optimization.
@ MAXIT_REACHED
Maximum iterations reached without convergence.
Eigen::ArrayXd lambdaSequence(const int p, const double q, const std::string &type, const int n=-1, const double theta1=1.0, const double theta2=1.0)
void updateGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::MatrixXd &residual, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::VectorXd &w, const JitNormalization jit_normalization)
void offsetGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::VectorXd &offset, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Eigen::VectorXd clusterGradient(Eigen::VectorXd &beta, Eigen::MatrixXd &residual, Clusters &clusters, const T &x, const Eigen::MatrixXd &w, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
std::vector< int > activeSet(const Eigen::VectorXd &beta)
Identifies previously active variables.
JitNormalization normalize(Eigen::MatrixBase< T > &x, Eigen::VectorXd &x_centers, Eigen::VectorXd &x_scales, const std::string ¢ering_type, const std::string &scaling_type, const bool modify_x)
int whichMax(const T &x)
Returns the index of the maximum element in a container.
Functions to normalize the design matrix and rescale coefficients in case the design was normalized.
Functions for generating regularization sequences for SLOPE.
Screening rules for SLOPE regression optimization.
Factory function to create the appropriate loss function based on.
Factory function to create and configure a SLOPE solver.
SLOPE (Sorted L-One Penalized Estimation) fitting results.
Defines the SlopePath class for storing and accessing SLOPE regression solution paths.
The declaration of the SortedL1Norm class.
Simple high-resolution timer class for performance measurements.