slope 0.29.0
Loading...
Searching...
No Matches
estimate_alpha.h
Go to the documentation of this file.
1
7#pragma once
8
9#include "slope.h"
10#include "slope/logger.h"
11#include "slope/ols.h"
12#include "utils.h"
13
14namespace slope {
15
30template<typename MatrixType>
31double
32estimateNoise(MatrixType& x, Eigen::MatrixXd& y, const bool fit_intercept)
33{
34 int n = x.rows();
35 int p = x.cols();
36
37 Eigen::VectorXd residuals;
38 int df; // Degrees of freedom
39
40 if (p == 0) {
41 // Handle the case when X has zero columns
42 residuals = y;
43 df = n - 1;
44
45 if (fit_intercept) {
46 y.array() -= y.mean();
47 df -= 1;
48 }
49 } else {
50 // Normal case with predictors
51 auto [ols_intercept, ols_coefs] = fitOls(x, y, fit_intercept);
52 residuals = y - x * ols_coefs;
53
54 if (fit_intercept) {
55 residuals.array() -= ols_intercept;
56 }
57
58 df = n - p - static_cast<int>(fit_intercept) - 1;
59 }
60
61 assert(df > 0);
62
63 return residuals.norm() / std::sqrt(df);
64}
65
90template<typename MatrixType>
91SlopePath
92estimateAlpha(MatrixType& x, Eigen::MatrixXd& y, const Slope& model)
93{
94 int n = x.rows();
95 int p = x.cols();
96
97 int alpha_est_maxit = model.getAlphaEstimationMaxIterations();
98
99 // TODO: Possibly we could just run the path instead. We could use the
100 // selected predictors at its end as the selected set and run these in an OLS.
101 // There would no longer be any problem with cyclic behavior of this
102 // algorithm, but on the other hand the model is likely very unstable.
103 std::vector<int> selected;
104
105 Eigen::ArrayXd alpha(1);
106
107 Slope model_copy = model;
108
109 model_copy.setAlphaType("path"); // Otherwise we would be in an infinite loop
110
111 SlopePath result;
112
113 // Estimate the noise level, if possible
114 if (n >= p + 30) {
115 alpha(0) = estimateNoise(x, y, model_copy.getFitIntercept()) / n;
116 result = model_copy.path(x, y, alpha);
117 } else {
118 for (int it = 0; it < alpha_est_maxit; ++it) {
119
120 MatrixType x_selected = subsetCols(x, selected);
121
122 std::vector<int> selected_prev = selected;
123 selected.clear();
124
125 alpha(0) = estimateNoise(x_selected, y, model_copy.getFitIntercept()) / n;
126
127 // TODO: If changes in alpha are small between two steps, then it should
128 // be easy to screen, but we are not using that possibility here.
129 result = model_copy.path(x, y, alpha);
130 auto coefs = result.getCoefs().back();
131
132 for (typename Eigen::SparseMatrix<double>::InnerIterator it(coefs, 0); it;
133 ++it) {
134 selected.emplace_back(it.row());
135 }
136
137 if (selected == selected_prev) {
138 return result;
139 }
140
141 if (static_cast<int>(selected.size()) >=
142 n + model_copy.getFitIntercept()) {
143 throw std::runtime_error(
144 "selected >= n - 1 variables, cannot estimate variance");
145 }
146 }
147
150 "Maximum iterations reached in alpha estimation");
151 }
152
153 return result;
154}
155} // namespace slope
Container class for SLOPE regression solution paths.
Definition slope_path.h:31
std::vector< Eigen::SparseMatrix< double > > getCoefs() const
Returns the vector of coefficient matrices for each solution in the path.
Definition slope_path.h:95
The SLOPE model.
Definition slope.h:29
SlopePath path(T &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Computes SLOPE regression solution path for multiple alpha and lambda values.
Definition slope.cpp:27
bool getFitIntercept() const
Returns the intercept flag.
Definition slope.cpp:573
int getAlphaEstimationMaxIterations() const
Gets the maximum number of iterations allowed for the alpha estimation procedure.
Definition slope.cpp:567
void setAlphaType(const std::string &alpha_type)
Sets the alpha type.
Definition slope.cpp:409
static void addWarning(WarningCode code, const std::string &message)
Log a new warning.
Definition logger.cpp:31
Thread-safe warning logging facility for the slope library.
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
Eigen::MatrixXd subsetCols(const Eigen::MatrixXd &x, const std::vector< int > &indices)
Extract specified columns from a dense matrix.
Definition utils.cpp:54
double estimateNoise(MatrixType &x, Eigen::MatrixXd &y, const bool fit_intercept)
Estimates noise (standard error) in a linear model using OLS residuals.
@ MAXIT_REACHED
Maximum iterations reached without convergence.
SlopePath estimateAlpha(MatrixType &x, Eigen::MatrixXd &y, const Slope &model)
Estimates the regularization parameter alpha for SLOPE regression.
Ordinary Least Squares (OLS) regression functionality.
SLOPE (Sorted L-One Penalized Estimation) optimization.
Various utility functions.