slope 0.29.0
Loading...
Searching...
No Matches
folds.h
Go to the documentation of this file.
1
9#pragma once
10
11#include "utils.h"
12#include <Eigen/Core>
13#include <cstdint>
14#include <vector>
15
16namespace slope {
17
25class Folds
26{
27public:
34 : n_folds(0)
35 , n_repeats(0)
36 {
37 }
38
51 Folds(int n_samples, int n_folds, int n_repeats = 1, uint64_t seed = 42)
54 {
55 folds.resize(n_repeats);
56 for (int rep = 0; rep < n_repeats; ++rep) {
57 // Use a different seed for each repetition
58 uint64_t rep_seed = seed + rep;
59 folds[rep] = createFolds(n_samples, n_folds, rep_seed);
60 }
61 }
62
70 explicit Folds(const std::vector<std::vector<int>>& user_folds)
71 : n_folds(user_folds.size())
72 , n_repeats(1)
73 {
74 folds.emplace_back(user_folds);
75 }
76
82 explicit Folds(const std::vector<std::vector<std::vector<int>>>& user_folds)
83 : folds(user_folds)
84 , n_folds(user_folds[0].size())
85 , n_repeats(user_folds.size())
86 {
87 }
88
96 const std::vector<int>& getTestIndices(size_t fold_idx,
97 size_t rep_idx = 0) const;
98
107 std::vector<int> getTrainingIndices(size_t fold_idx,
108 size_t rep_idx = 0) const;
109
122 template<typename MatrixType>
123 std::tuple<MatrixType, Eigen::MatrixXd, MatrixType, Eigen::MatrixXd> split(
124 MatrixType& x,
125 const Eigen::MatrixXd& y,
126 size_t fold_idx,
127 size_t rep_idx = 0) const
128 {
129 auto test_idx = getTestIndices(fold_idx, rep_idx);
130 auto train_idx = getTrainingIndices(fold_idx, rep_idx);
131
132 MatrixType x_test = subset(x, test_idx);
133 Eigen::MatrixXd y_test = y(test_idx, Eigen::all);
134
135 MatrixType x_train = subset(x, train_idx);
136 Eigen::MatrixXd y_train = y(train_idx, Eigen::all);
137
138 return { x_train, y_train, x_test, y_test };
139 }
140
146 size_t numFolds() const { return n_folds; }
147
153 size_t numRepetitions() const { return n_repeats; }
154
160 size_t numEvals() const { return n_repeats * n_folds; }
161
162 std::vector<std::vector<std::vector<int>>>
164 std::size_t n_folds;
165 std::size_t n_repeats;
166
167private:
182 static std::vector<std::vector<int>> createFolds(int n_samples,
183 int n_folds,
184 uint64_t seed);
185};
186
187} // namespace slope
Manages data partitioning for cross-validation.
Definition folds.h:26
std::size_t n_folds
Number of folds.
Definition folds.h:164
Folds(const std::vector< std::vector< int > > &user_folds)
Constructor for user-provided folds.
Definition folds.h:70
Folds(int n_samples, int n_folds, int n_repeats=1, uint64_t seed=42)
Constructor for generating random folds with optional repetitions.
Definition folds.h:51
size_t numEvals() const
Get the total number of folds (repetitions * folds)
Definition folds.h:160
size_t numFolds() const
Get the number of folds.
Definition folds.h:146
std::vector< int > getTrainingIndices(size_t fold_idx, size_t rep_idx=0) const
Get training indices for a specific fold and repetition.
Definition folds.cpp:14
std::tuple< MatrixType, Eigen::MatrixXd, MatrixType, Eigen::MatrixXd > split(MatrixType &x, const Eigen::MatrixXd &y, size_t fold_idx, size_t rep_idx=0) const
Split data into training and test sets for a specific fold and repetition.
Definition folds.h:123
const std::vector< int > & getTestIndices(size_t fold_idx, size_t rep_idx=0) const
Get test indices for a specific fold and repetition.
Definition folds.cpp:8
size_t numRepetitions() const
Get the number of repetitions.
Definition folds.h:153
Folds(const std::vector< std::vector< std::vector< int > > > &user_folds)
Constructor for user-provided repeated folds.
Definition folds.h:82
Folds()
Default constructor.
Definition folds.h:33
std::vector< std::vector< std::vector< int > > > folds
Indices for each fold in each repetition.
Definition folds.h:163
std::size_t n_repeats
Number of repetitions.
Definition folds.h:165
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
Eigen::MatrixXd subset(const Eigen::MatrixXd &x, const std::vector< int > &indices)
Extract a subset of rows from an Eigen matrix.
Definition utils.cpp:26
Various utility functions.