slope 6.0.1
Loading...
Searching...
No Matches
folds.h
Go to the documentation of this file.
1
9#pragma once
10
11#include "utils.h"
12#include <Eigen/Core>
13#include <cstdint>
14#include <vector>
15
16namespace slope {
17
25class Folds
26{
27public:
34 : n_folds(0)
35 , n_repeats(0)
36 {
37 }
38
51 Folds(int n_samples, int n_folds, int n_repeats = 1, uint64_t seed = 42)
54 {
55 folds.resize(n_repeats);
56 for (int rep = 0; rep < n_repeats; ++rep) {
57 // Use a different seed for each repetition
58 uint64_t rep_seed = seed + rep;
59 folds[rep] = createFolds(n_samples, n_folds, rep_seed);
60 }
61 }
62
70 explicit Folds(const std::vector<std::vector<int>>& user_folds)
71 : n_folds(user_folds.size())
72 , n_repeats(1)
73 {
74 folds.emplace_back(user_folds);
75 }
76
82 explicit Folds(const std::vector<std::vector<std::vector<int>>>& user_folds)
83 : folds(user_folds)
84 , n_folds(user_folds[0].size())
85 , n_repeats(user_folds.size())
86 {
87 }
88
96 const std::vector<int>& getTestIndices(size_t fold_idx,
97 size_t rep_idx = 0) const;
98
107 std::vector<int> getTrainingIndices(size_t fold_idx,
108 size_t rep_idx = 0) const;
109
126 template<typename T>
127 auto split(Eigen::EigenBase<T>& x,
128 const Eigen::MatrixXd& y,
129 size_t fold_idx,
130 size_t rep_idx = 0) const
131 {
132 auto test_idx = getTestIndices(fold_idx, rep_idx);
133 auto train_idx = getTrainingIndices(fold_idx, rep_idx);
134
135 auto x_test = subset(x.derived(), test_idx);
136 Eigen::MatrixXd y_test = y(test_idx, Eigen::all);
137
138 auto x_train = subset(x.derived(), train_idx);
139 Eigen::MatrixXd y_train = y(train_idx, Eigen::all);
140
141 return std::make_tuple(x_train, y_train, x_test, y_test);
142 }
143
149 size_t numFolds() const { return n_folds; }
150
156 size_t numRepetitions() const { return n_repeats; }
157
163 size_t numEvals() const { return n_repeats * n_folds; }
164
165 std::vector<std::vector<std::vector<int>>>
167 std::size_t n_folds;
168 std::size_t n_repeats;
169
170private:
185 static std::vector<std::vector<int>> createFolds(int n_samples,
186 int n_folds,
187 uint64_t seed);
188};
189
190} // namespace slope
Manages data partitioning for cross-validation.
Definition folds.h:26
std::size_t n_folds
Number of folds.
Definition folds.h:167
Folds(const std::vector< std::vector< int > > &user_folds)
Constructor for user-provided folds.
Definition folds.h:70
Folds(int n_samples, int n_folds, int n_repeats=1, uint64_t seed=42)
Constructor for generating random folds with optional repetitions.
Definition folds.h:51
size_t numEvals() const
Get the total number of folds (repetitions * folds)
Definition folds.h:163
size_t numFolds() const
Get the number of folds.
Definition folds.h:149
std::vector< int > getTrainingIndices(size_t fold_idx, size_t rep_idx=0) const
Get training indices for a specific fold and repetition.
const std::vector< int > & getTestIndices(size_t fold_idx, size_t rep_idx=0) const
Get test indices for a specific fold and repetition.
size_t numRepetitions() const
Get the number of repetitions.
Definition folds.h:156
auto split(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y, size_t fold_idx, size_t rep_idx=0) const
Split data into training and test sets for a specific fold and repetition.
Definition folds.h:127
Folds(const std::vector< std::vector< std::vector< int > > > &user_folds)
Constructor for user-provided repeated folds.
Definition folds.h:82
Folds()
Default constructor.
Definition folds.h:33
std::vector< std::vector< std::vector< int > > > folds
Indices for each fold in each repetition.
Definition folds.h:166
std::size_t n_repeats
Number of repetitions.
Definition folds.h:168
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
T subset(const Eigen::EigenBase< T > &x, const std::vector< int > &indices)
Extract a subset of rows from an Eigen matrix.
Definition utils.h:225
Various utility functions.