slope 6.0.1
Loading...
Searching...
No Matches
cv.h
Go to the documentation of this file.
1
11#pragma once
12
13#include "folds.h"
14#include "score.h"
15#include "slope.h"
16#include <vector>
17
18#ifdef _OPENMP
19#include <omp.h>
20#endif
21
22namespace slope {
23
31{
35 Eigen::MatrixXd score;
36
38 std::map<std::string, double> params;
39
41 Eigen::ArrayXd alphas;
42
44 Eigen::ArrayXd mean_scores;
45
48 Eigen::ArrayXd std_errors;
49};
50
59{
62 std::vector<GridResult> results;
63
66 std::map<std::string, double> best_params;
67
69 double best_score;
70
73
77};
78
87{
89 int n_folds = 10;
90
92 int n_repeats = 1;
93
95 std::string metric = "mse";
96
98 uint64_t random_seed = 42;
99
101 bool copy_x = true;
102
104 std::map<std::string, std::vector<double>> hyperparams;
105
107 std::map<std::string, std::vector<double>> default_hyperparams = {
108 { "q", { 0.1 } },
109 { "gamma", { 0.0 } },
110 };
111
113 std::optional<std::vector<std::vector<std::vector<int>>>> predefined_folds;
114};
115
116namespace detail {
117
118std::vector<std::map<std::string, double>>
119createGrid(const std::map<std::string, std::vector<double>>& param_values);
120
121void
122findBestParameters(CvResult& cv_result, const std::unique_ptr<Score>& scorer);
123
124template<typename T>
125Eigen::ArrayXd
126fitToFold(Eigen::MatrixBase<T>& x,
127 const Eigen::MatrixXd& y,
128 const Folds& folds,
129 const std::unique_ptr<Loss>& loss,
130 const std::unique_ptr<Score>& scorer,
131 const Eigen::ArrayXd& alphas,
132 Slope& thread_model,
133 const int fold,
134 const int rep,
135 const double gamma = 0.0,
136 const bool copy_x = true)
137{
138 Eigen::ArrayXd scores = Eigen::ArrayXd::Zero(alphas.size());
139
140 if (copy_x) {
141 thread_model.setModifyX(true);
142
143 auto [x_train, y_train, x_test, y_test] = folds.split(x, y, fold, rep);
144
145 auto path = thread_model.path(x_train, y_train, alphas);
146
147 if (gamma > 0) {
148 path = thread_model.relax(path, x_train, y_train, gamma);
149 }
150
151 for (int j = 0; j < path.size(); ++j) {
152 auto eta = path(j).predict(x_test, "linear");
153 scores(j) = scorer->eval(eta, y_test, loss);
154 }
155
156 } else {
157 thread_model.setModifyX(false);
158
159 auto train_idx = folds.getTrainingIndices(fold, rep);
160 auto test_idx = folds.getTestIndices(fold, rep);
161
162 // Create views
163 auto x_train = x(train_idx, Eigen::all);
164 auto x_test = x(test_idx, Eigen::all);
165
166 Eigen::MatrixXd y_train = y(train_idx, Eigen::all);
167 Eigen::MatrixXd y_test = y(test_idx, Eigen::all);
168
169 auto path = thread_model.path(x_train, y_train, alphas);
170
171 if (gamma > 0) {
172 path = thread_model.relax(path, x_train, y_train, gamma);
173 }
174
175 for (int j = 0; j < path.size(); ++j) {
176 auto eta = path(j).predict(x_test, "linear");
177 scores(j) = scorer->eval(eta, y_test, loss);
178 }
179 }
180
181 return scores;
182}
183
184template<typename T>
185Eigen::ArrayXd
186fitToFold(Eigen::SparseMatrixBase<T>& x,
187 const Eigen::MatrixXd& y,
188 const Folds& folds,
189 const std::unique_ptr<Loss>& loss,
190 const std::unique_ptr<Score>& scorer,
191 const Eigen::ArrayXd& alphas,
192 Slope& thread_model,
193 const int fold,
194 const int rep,
195 const double gamma = 0.0,
196 const bool copy_x = true)
197{
198 thread_model.setModifyX(true);
199
200 auto [x_train, y_train, x_test, y_test] = folds.split(x, y, fold, rep);
201
202 auto path = thread_model.path(x_train, y_train, alphas);
203
204 if (gamma > 0) {
205 path = thread_model.relax(path, x_train, y_train, gamma);
206 }
207
208 Eigen::ArrayXd scores = Eigen::ArrayXd::Zero(path.size());
209
210 for (int j = 0; j < path.size(); ++j) {
211 auto eta = path(j).predict(x_test, "linear");
212 scores(j) = scorer->eval(eta, y_test, loss);
213 }
214
215 return scores;
216}
217
218} // namespace detail
219
243template<typename T>
244CvResult
246 Eigen::EigenBase<T>& x,
247 const Eigen::MatrixXd& y_in,
248 const CvConfig& config = CvConfig())
249{
250 CvResult cv_result;
251
252 int n = y_in.rows();
253
254 auto loss = setupLoss(model.getLossType());
255
256 auto y = loss->preprocessResponse(y_in);
257 auto scorer = Score::create(config.metric);
258
259 auto hyperparams = config.default_hyperparams;
260
261 // Override with user-specified parameters
262 for (const auto& [key, values] : config.hyperparams) {
263 hyperparams[key] = values;
264 }
265
266 auto grid = detail::createGrid(hyperparams);
267
268 // Total number of evaluations (n_repeats * n_folds)
269 Folds folds =
270 config.predefined_folds.has_value()
271 ? Folds(*config.predefined_folds)
272 : Folds(n, config.n_folds, config.n_repeats, config.random_seed);
273
274 int n_evals = folds.numEvals();
275
276 for (const auto& params : grid) {
277 GridResult result;
278 result.params = params;
279
280 double q = params.at("q");
281 double gamma = params.at("gamma");
282
283 model.setQ(q);
284
285 auto initial_path = model.path(x, y);
286
287 result.alphas = initial_path.getAlpha();
288 int n_alpha = result.alphas.size();
289
290 assert((result.alphas > 0).all());
291
292 Eigen::MatrixXd scores = Eigen::MatrixXd::Zero(n_evals, n_alpha);
293
294#ifdef _OPENMP
295 Eigen::setNbThreads(1);
296#endif
297
298 // Thread-safety for exceptions
299 std::vector<std::string> thread_errors(n_evals);
300 bool had_exception = false;
301
302#ifdef _OPENMP
303 omp_set_max_active_levels(1);
304#pragma omp parallel for num_threads(Threads::get()) \
305 shared(scores, thread_errors, had_exception)
306#endif
307 for (int i = 0; i < n_evals; ++i) {
308 try {
309 auto [rep, fold] = std::div(i, folds.numFolds());
310
311 Slope thread_model = model;
312
313 scores.row(i) = detail::fitToFold(x.derived(),
314 y,
315 folds,
316 loss,
317 scorer,
318 result.alphas,
319 thread_model,
320 fold,
321 rep,
322 gamma,
323 config.copy_x);
324
325 } catch (const std::exception& e) {
326 thread_errors[i] = e.what();
327#ifdef _OPENMP
328#pragma omp atomic write
329#endif
330 had_exception = true;
331 } catch (...) {
332 thread_errors[i] = "Unknown exception";
333#ifdef _OPENMP
334#pragma omp atomic write
335#endif
336 had_exception = true;
337 }
338 }
339
340 if (had_exception) {
341 std::string error_message = "Exception(s) during cross-validation:\n";
342 for (int i = 0; i < n_evals; ++i) {
343 if (!thread_errors[i].empty()) {
344 error_message +=
345 "Fold " + std::to_string(i) + ": " + thread_errors[i] + "\n";
346 }
347 }
348 throw std::runtime_error(error_message);
349 }
350
351 result.mean_scores = scores.colwise().mean();
352 result.std_errors = stdDevs(scores).array() / std::sqrt(n_evals);
353 result.score = std::move(scores);
354 cv_result.results.push_back(result);
355 }
356
357#ifdef _OPENMP
358 Eigen::setNbThreads(0);
359#endif
360
361 detail::findBestParameters(cv_result, scorer);
362
363 return cv_result;
364}
365
366} // namespace slope
Manages data partitioning for cross-validation.
Definition folds.h:26
size_t numEvals() const
Get the total number of folds (repetitions * folds)
Definition folds.h:163
size_t numFolds() const
Get the number of folds.
Definition folds.h:149
std::vector< int > getTrainingIndices(size_t fold_idx, size_t rep_idx=0) const
Get training indices for a specific fold and repetition.
const std::vector< int > & getTestIndices(size_t fold_idx, size_t rep_idx=0) const
Get test indices for a specific fold and repetition.
auto split(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y, size_t fold_idx, size_t rep_idx=0) const
Split data into training and test sets for a specific fold and repetition.
Definition folds.h:127
static std::unique_ptr< Score > create(const std::string &metric)
Eigen::MatrixXd predict(Eigen::EigenBase< T > &x, const std::string &type="response") const
Predict the response for a given input matrix.
Definition slope_fit.h:237
The SLOPE model.
Definition slope.h:46
const std::string & getLossType()
Get currently defined loss type.
SlopePath path(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Computes SLOPE regression solution path for multiple alpha and lambda values.
Definition slope.h:376
SlopeFit relax(const SlopeFit &fit, T &x, const Eigen::VectorXd &y_in, const double gamma=0.0, Eigen::VectorXd beta0=Eigen::VectorXd(0), Eigen::VectorXd beta=Eigen::VectorXd(0))
Relaxes a fitted SLOPE model.
Definition slope.h:804
void setModifyX(const bool modify_x)
Controls if x should be modified-in-place.
void setQ(double q)
Sets the q value.
Cross-validation fold management for SLOPE models.
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
std::unique_ptr< Loss > setupLoss(const std::string &loss)
Factory function to create the appropriate loss function based on the distribution family.
CvResult crossValidate(Slope model, Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, const CvConfig &config=CvConfig())
Performs cross-validation on a SLOPE model to select optimal hyperparameters.
Definition cv.h:245
Eigen::VectorXd stdDevs(const Eigen::SparseMatrixBase< T > &x)
Computes the standard deviation for each column of a matrix.
Definition math.h:616
Scoring metrics for model evaluation.
SLOPE (Sorted L-One Penalized Estimation) optimization.
Configuration settings for cross-validation.
Definition cv.h:87
int n_repeats
Number of times to repeat the cross-validation (default: 1)
Definition cv.h:92
bool copy_x
Whether to copy the design matrix for each fold (default: true)
Definition cv.h:101
std::map< std::string, std::vector< double > > hyperparams
Map of hyperparameter names to vectors of values to evaluate.
Definition cv.h:104
std::optional< std::vector< std::vector< std::vector< int > > > > predefined_folds
Optional user-defined fold assignments for custom cross-validation splits.
Definition cv.h:113
int n_folds
Number of folds for cross-validation (default: 10)
Definition cv.h:89
std::string metric
Evaluation metric used for model assessment (default: "mse")
Definition cv.h:95
std::map< std::string, std::vector< double > > default_hyperparams
Map of hyperparameter names to vectors of values to evaluate.
Definition cv.h:107
uint64_t random_seed
Seed for random number generator to ensure reproducibility (default: 42)
Definition cv.h:98
Contains overall results from a cross-validation process.
Definition cv.h:59
double best_score
The score achieved by the optimal hyperparameter configuration.
Definition cv.h:69
std::map< std::string, double > best_params
Definition cv.h:66
std::vector< GridResult > results
Definition cv.h:62
int best_ind
Index of the best performing configuration in the results vector.
Definition cv.h:72
int best_alpha_ind
Definition cv.h:76
Stores cross-validation results for a specific set of hyperparameters.
Definition cv.h:31
Eigen::MatrixXd score
Definition cv.h:35
Eigen::ArrayXd mean_scores
Array of scores averaged across all folds for each alpha value.
Definition cv.h:44
std::map< std::string, double > params
Map of hyperparameter names to their values for the configuration.
Definition cv.h:38
Eigen::ArrayXd std_errors
Definition cv.h:48
Eigen::ArrayXd alphas
Array of regularization parameters used in the regularization path.
Definition cv.h:41