38 std::map<std::string, double>
params;
109 {
"gamma", { 0.0 } },
118std::vector<std::map<std::string, double>>
119createGrid(
const std::map<std::string, std::vector<double>>& param_values);
122findBestParameters(
CvResult& cv_result,
const std::unique_ptr<Score>& scorer);
126fitToFold(Eigen::MatrixBase<T>& x,
127 const Eigen::MatrixXd& y,
129 const std::unique_ptr<Loss>& loss,
130 const std::unique_ptr<Score>& scorer,
131 const Eigen::ArrayXd& alphas,
135 const double gamma = 0.0,
136 const bool copy_x =
true)
138 Eigen::ArrayXd scores = Eigen::ArrayXd::Zero(alphas.size());
143 auto [x_train, y_train, x_test, y_test] = folds.
split(x, y, fold, rep);
145 auto path = thread_model.
path(x_train, y_train, alphas);
148 path = thread_model.
relax(path, x_train, y_train, gamma);
151 for (
int j = 0; j < path.size(); ++j) {
152 auto eta = path(j).
predict(x_test,
"linear");
153 scores(j) = scorer->eval(eta, y_test, loss);
163 auto x_train = x(train_idx, Eigen::all);
164 auto x_test = x(test_idx, Eigen::all);
166 Eigen::MatrixXd y_train = y(train_idx, Eigen::all);
167 Eigen::MatrixXd y_test = y(test_idx, Eigen::all);
169 auto path = thread_model.
path(x_train, y_train, alphas);
172 path = thread_model.
relax(path, x_train, y_train, gamma);
175 for (
int j = 0; j < path.size(); ++j) {
176 auto eta = path(j).
predict(x_test,
"linear");
177 scores(j) = scorer->eval(eta, y_test, loss);
186fitToFold(Eigen::SparseMatrixBase<T>& x,
187 const Eigen::MatrixXd& y,
189 const std::unique_ptr<Loss>& loss,
190 const std::unique_ptr<Score>& scorer,
191 const Eigen::ArrayXd& alphas,
195 const double gamma = 0.0,
196 const bool copy_x =
true)
198 thread_model.setModifyX(
true);
200 auto [x_train, y_train, x_test, y_test] = folds.split(x, y, fold, rep);
202 auto path = thread_model.path(x_train, y_train, alphas);
205 path = thread_model.relax(path, x_train, y_train, gamma);
208 Eigen::ArrayXd scores = Eigen::ArrayXd::Zero(path.size());
210 for (
int j = 0; j < path.size(); ++j) {
211 auto eta = path(j).predict(x_test,
"linear");
212 scores(j) = scorer->eval(eta, y_test, loss);
246 Eigen::EigenBase<T>& x,
247 const Eigen::MatrixXd& y_in,
256 auto y = loss->preprocessResponse(y_in);
259 auto hyperparams = config.default_hyperparams;
262 for (
const auto& [key, values] : config.hyperparams) {
263 hyperparams[key] = values;
266 auto grid = detail::createGrid(hyperparams);
270 config.predefined_folds.has_value()
271 ?
Folds(*config.predefined_folds)
272 :
Folds(n, config.n_folds, config.n_repeats, config.random_seed);
276 for (
const auto& params : grid) {
280 double q = params.at(
"q");
281 double gamma = params.at(
"gamma");
285 auto initial_path = model.
path(x, y);
287 result.
alphas = initial_path.getAlpha();
288 int n_alpha = result.
alphas.size();
290 assert((result.
alphas > 0).all());
292 Eigen::MatrixXd scores = Eigen::MatrixXd::Zero(n_evals, n_alpha);
295 Eigen::setNbThreads(1);
299 std::vector<std::string> thread_errors(n_evals);
300 bool had_exception =
false;
303 omp_set_max_active_levels(1);
304#pragma omp parallel for num_threads(Threads::get()) \
305 shared(scores, thread_errors, had_exception)
307 for (
int i = 0; i < n_evals; ++i) {
309 auto [rep, fold] = std::div(i, folds.
numFolds());
311 Slope thread_model = model;
313 scores.row(i) = detail::fitToFold(x.derived(),
325 }
catch (
const std::exception& e) {
326 thread_errors[i] = e.what();
328#pragma omp atomic write
330 had_exception =
true;
332 thread_errors[i] =
"Unknown exception";
334#pragma omp atomic write
336 had_exception =
true;
341 std::string error_message =
"Exception(s) during cross-validation:\n";
342 for (
int i = 0; i < n_evals; ++i) {
343 if (!thread_errors[i].empty()) {
345 "Fold " + std::to_string(i) +
": " + thread_errors[i] +
"\n";
348 throw std::runtime_error(error_message);
353 result.
score = std::move(scores);
354 cv_result.
results.push_back(result);
358 Eigen::setNbThreads(0);
361 detail::findBestParameters(cv_result, scorer);
Manages data partitioning for cross-validation.
size_t numEvals() const
Get the total number of folds (repetitions * folds)
size_t numFolds() const
Get the number of folds.
std::vector< int > getTrainingIndices(size_t fold_idx, size_t rep_idx=0) const
Get training indices for a specific fold and repetition.
const std::vector< int > & getTestIndices(size_t fold_idx, size_t rep_idx=0) const
Get test indices for a specific fold and repetition.
auto split(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y, size_t fold_idx, size_t rep_idx=0) const
Split data into training and test sets for a specific fold and repetition.
static std::unique_ptr< Score > create(const std::string &metric)
Eigen::MatrixXd predict(Eigen::EigenBase< T > &x, const std::string &type="response") const
Predict the response for a given input matrix.
const std::string & getLossType()
Get currently defined loss type.
SlopePath path(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, Eigen::ArrayXd alpha=Eigen::ArrayXd::Zero(0), Eigen::ArrayXd lambda=Eigen::ArrayXd::Zero(0))
Computes SLOPE regression solution path for multiple alpha and lambda values.
SlopeFit relax(const SlopeFit &fit, T &x, const Eigen::VectorXd &y_in, const double gamma=0.0, Eigen::VectorXd beta0=Eigen::VectorXd(0), Eigen::VectorXd beta=Eigen::VectorXd(0))
Relaxes a fitted SLOPE model.
void setModifyX(const bool modify_x)
Controls if x should be modified-in-place.
void setQ(double q)
Sets the q value.
Cross-validation fold management for SLOPE models.
Namespace containing SLOPE regression implementation.
std::unique_ptr< Loss > setupLoss(const std::string &loss)
Factory function to create the appropriate loss function based on the distribution family.
CvResult crossValidate(Slope model, Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y_in, const CvConfig &config=CvConfig())
Performs cross-validation on a SLOPE model to select optimal hyperparameters.
Eigen::VectorXd stdDevs(const Eigen::SparseMatrixBase< T > &x)
Computes the standard deviation for each column of a matrix.
Scoring metrics for model evaluation.
SLOPE (Sorted L-One Penalized Estimation) optimization.
Configuration settings for cross-validation.
int n_repeats
Number of times to repeat the cross-validation (default: 1)
bool copy_x
Whether to copy the design matrix for each fold (default: true)
std::map< std::string, std::vector< double > > hyperparams
Map of hyperparameter names to vectors of values to evaluate.
std::optional< std::vector< std::vector< std::vector< int > > > > predefined_folds
Optional user-defined fold assignments for custom cross-validation splits.
int n_folds
Number of folds for cross-validation (default: 10)
std::string metric
Evaluation metric used for model assessment (default: "mse")
std::map< std::string, std::vector< double > > default_hyperparams
Map of hyperparameter names to vectors of values to evaluate.
uint64_t random_seed
Seed for random number generator to ensure reproducibility (default: 42)
Contains overall results from a cross-validation process.
double best_score
The score achieved by the optimal hyperparameter configuration.
std::map< std::string, double > best_params
std::vector< GridResult > results
int best_ind
Index of the best performing configuration in the results vector.
Stores cross-validation results for a specific set of hyperparameters.
Eigen::ArrayXd mean_scores
Array of scores averaged across all folds for each alpha value.
std::map< std::string, double > params
Map of hyperparameter names to their values for the configuration.
Eigen::ArrayXd std_errors
Eigen::ArrayXd alphas
Array of regularization parameters used in the regularization path.