slope 6.2.1
Loading...
Searching...
No Matches
folds.h
Go to the documentation of this file.
1
9#pragma once
10
11#include "eigen_compat.h"
12#include "utils.h"
13#include <Eigen/Core>
14#include <cstdint>
15#include <vector>
16
17namespace slope {
18
19using slope::all;
20
28class Folds
29{
30public:
37 : n_folds(0)
38 , n_repeats(0)
39 {
40 }
41
54 Folds(int n_samples, int n_folds, int n_repeats = 1, uint64_t seed = 42)
57 {
58 folds.resize(n_repeats);
59 for (int rep = 0; rep < n_repeats; ++rep) {
60 // Use a different seed for each repetition
61 uint64_t rep_seed = seed + rep;
62 folds[rep] = createFolds(n_samples, n_folds, rep_seed);
63 }
64 }
65
73 explicit Folds(const std::vector<std::vector<int>>& user_folds)
74 : n_folds(user_folds.size())
75 , n_repeats(1)
76 {
77 folds.emplace_back(user_folds);
78 }
79
85 explicit Folds(const std::vector<std::vector<std::vector<int>>>& user_folds)
86 : folds(user_folds)
87 , n_folds(user_folds[0].size())
88 , n_repeats(user_folds.size())
89 {
90 }
91
99 const std::vector<int>& getTestIndices(size_t fold_idx,
100 size_t rep_idx = 0) const;
101
110 std::vector<int> getTrainingIndices(size_t fold_idx,
111 size_t rep_idx = 0) const;
112
129 template<typename T>
130 auto split(Eigen::EigenBase<T>& x,
131 const Eigen::MatrixXd& y,
132 size_t fold_idx,
133 size_t rep_idx = 0) const
134 {
135 auto test_idx = getTestIndices(fold_idx, rep_idx);
136 auto train_idx = getTrainingIndices(fold_idx, rep_idx);
137
138 auto x_test = subset(x.derived(), test_idx);
139 Eigen::MatrixXd y_test = y(test_idx, all);
140
141 auto x_train = subset(x.derived(), train_idx);
142 Eigen::MatrixXd y_train = y(train_idx, all);
143
144 return std::make_tuple(x_train, y_train, x_test, y_test);
145 }
146
152 size_t numFolds() const { return n_folds; }
153
159 size_t numRepetitions() const { return n_repeats; }
160
166 size_t numEvals() const { return n_repeats * n_folds; }
167
168 std::vector<std::vector<std::vector<int>>>
170 std::size_t n_folds;
171 std::size_t n_repeats;
172
173private:
188 static std::vector<std::vector<int>> createFolds(int n_samples,
189 int n_folds,
190 uint64_t seed);
191};
192
193} // namespace slope
Manages data partitioning for cross-validation.
Definition folds.h:29
std::size_t n_folds
Number of folds.
Definition folds.h:170
Folds(const std::vector< std::vector< int > > &user_folds)
Constructor for user-provided folds.
Definition folds.h:73
Folds(int n_samples, int n_folds, int n_repeats=1, uint64_t seed=42)
Constructor for generating random folds with optional repetitions.
Definition folds.h:54
size_t numEvals() const
Get the total number of folds (repetitions * folds)
Definition folds.h:166
size_t numFolds() const
Get the number of folds.
Definition folds.h:152
std::vector< int > getTrainingIndices(size_t fold_idx, size_t rep_idx=0) const
Get training indices for a specific fold and repetition.
const std::vector< int > & getTestIndices(size_t fold_idx, size_t rep_idx=0) const
Get test indices for a specific fold and repetition.
size_t numRepetitions() const
Get the number of repetitions.
Definition folds.h:159
auto split(Eigen::EigenBase< T > &x, const Eigen::MatrixXd &y, size_t fold_idx, size_t rep_idx=0) const
Split data into training and test sets for a specific fold and repetition.
Definition folds.h:130
Folds(const std::vector< std::vector< std::vector< int > > > &user_folds)
Constructor for user-provided repeated folds.
Definition folds.h:85
Folds()
Default constructor.
Definition folds.h:36
std::vector< std::vector< std::vector< int > > > folds
Indices for each fold in each repetition.
Definition folds.h:169
std::size_t n_repeats
Number of repetitions.
Definition folds.h:171
Eigen compatibility layer for version differences.
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
T subset(const Eigen::EigenBase< T > &x, const std::vector< int > &indices)
Extract a subset of rows from an Eigen matrix.
Definition utils.h:257
Various utility functions.