slope 6.2.1
Loading...
Searching...
No Matches
normalize.h
Go to the documentation of this file.
1
6#pragma once
7
8#include "jit_normalization.h"
9#include "math.h"
10#include <Eigen/SparseCore>
11
12namespace slope {
13
31template<typename T>
32void
33computeCenters(Eigen::VectorXd& x_centers, const T& x, const std::string& type)
34{
35 int p = x.cols();
36
37 if (type == "manual") {
38 if (x_centers.size() != p) {
39 throw std::invalid_argument("Invalid dimensions in centers");
40 }
41
42 if (!x_centers.allFinite()) {
43 throw std::invalid_argument("Centers must be finite");
44 }
45
46 } else if (type == "mean") {
47 x_centers = means(x);
48 } else if (type == "min") {
49 x_centers = mins(x);
50 } else if (type != "none") {
51 throw std::invalid_argument("Invalid centering type");
52 }
53}
54
72template<typename T>
73void
74computeScales(Eigen::VectorXd& x_scales, const T& x, const std::string& type)
75{
76 int p = x.cols();
77
78 if (type == "manual") {
79 if (x_scales.size() != p) {
80 throw std::invalid_argument("Invalid dimensions in scales");
81 }
82 if (!x_scales.allFinite()) {
83 throw std::invalid_argument("Scales must be finite");
84 }
85 } else if (type == "sd") {
86 x_scales = stdDevs(x);
87 } else if (type == "l1") {
88 x_scales = l1Norms(x);
89 } else if (type == "l2") {
90 x_scales = l2Norms(x);
91 } else if (type == "max_abs") {
92 x_scales = maxAbs(x);
93 } else if (type == "range") {
94 x_scales = ranges(x);
95 } else if (type != "none") {
96 throw std::invalid_argument("Invalid scaling type");
97 }
98}
99
121template<typename T>
123normalize(Eigen::MatrixBase<T>& x,
124 Eigen::VectorXd& x_centers,
125 Eigen::VectorXd& x_scales,
126 const std::string& centering_type,
127 const std::string& scaling_type,
128 const bool modify_x)
129{
130 const int p = x.cols();
131
132 computeCenters(x_centers, x, centering_type);
133 computeScales(x_scales, x, scaling_type);
134
135 // Handle zero-variance columns by setting their scale to 1.0
136 // to avoid division by zero during normalization
137 if (scaling_type != "none" && scaling_type != "manual") {
138 for (int j = 0; j < p; ++j) {
139 if (std::abs(x_scales(j)) == 0.0) {
140 x_scales(j) = 1.0;
141 }
142 }
143 }
144
145 bool center = centering_type != "none";
146 bool scale = scaling_type != "none";
147 bool center_jit = center && !modify_x;
148 bool scale_jit = scale && !modify_x;
149
150 JitNormalization jit_normalization;
151
152 if (center_jit && scale_jit) {
153 jit_normalization = JitNormalization::Both;
154 } else if (center_jit) {
155 jit_normalization = JitNormalization::Center;
156 } else if (scale_jit) {
157 jit_normalization = JitNormalization::Scale;
158 } else {
159 jit_normalization = JitNormalization::None;
160 }
161
162 if (modify_x && (center || scale)) {
163 for (int j = 0; j < p; ++j) {
164 if (center) {
165 x.col(j).array() -= x_centers(j);
166 }
167 if (scale) {
168 x.col(j).array() /= x_scales(j);
169 }
170 }
171 }
172
173 return jit_normalization;
174}
175
197template<typename T>
199normalize(Eigen::SparseMatrixBase<T>& x,
200 Eigen::VectorXd& x_centers,
201 Eigen::VectorXd& x_scales,
202 const std::string& centering_type,
203 const std::string& scaling_type,
204 const bool)
205{
206 computeCenters(x_centers, x, centering_type);
207 computeScales(x_scales, x, scaling_type);
208
209 // Handle zero-variance columns by setting their scale to 1.0
210 // to avoid division by zero during normalization
211 const int p = x.cols();
212 if (scaling_type != "none" && scaling_type != "manual") {
213 for (int j = 0; j < p; ++j) {
214 if (std::abs(x_scales(j)) == 0.0) {
215 x_scales(j) = 1.0;
216 }
217 }
218 }
219
220 bool center = centering_type != "none";
221 bool scale = scaling_type != "none";
222 bool center_jit = center;
223 bool scale_jit = scale;
224
225 JitNormalization jit_normalization;
226
227 if (center_jit && scale_jit) {
228 jit_normalization = JitNormalization::Both;
229 } else if (center_jit) {
230 jit_normalization = JitNormalization::Center;
231 } else if (scale_jit) {
232 jit_normalization = JitNormalization::Scale;
233 } else {
234 jit_normalization = JitNormalization::None;
235 }
236
237 // TODO: Implement in-place scaling for sparse matrices.
238 // if (modify_x && scaling_type != "none") {
239 // for (int j = 0; j < x.cols(); ++j) {
240 // for (Eigen::SparseMatrix<double>::InnerIterator it(x, j); it; ++it) {
241 // it.valueRef() = it.value() / x_scales(j);
242 // }
243 // }
244 // }
245
246 return jit_normalization;
247}
248
269std::tuple<Eigen::VectorXd, Eigen::MatrixXd>
270rescaleCoefficients(const Eigen::VectorXd& beta0,
271 const Eigen::SparseMatrix<double>& beta,
272 const Eigen::VectorXd& x_centers,
273 const Eigen::VectorXd& x_scales,
274 const bool intercept);
275
276} // namespace slope
Enums to control predictor standardization behavior.
Mathematical support functions for the slope package.
Namespace containing SLOPE regression implementation.
Definition clusters.h:11
Eigen::VectorXd means(const Eigen::SparseMatrixBase< T > &x)
Computes the arithmetic mean for each column of a sparse matrix.
Definition math.h:572
Eigen::VectorXd l1Norms(const T &x)
Computes the L1 (Manhattan) norms for each column of a matrix.
Definition math.h:453
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
Eigen::VectorXd ranges(const Eigen::SparseMatrixBase< T > &x)
Computes the range (max - min) for each column of a matrix.
Definition math.h:692
std::tuple< Eigen::VectorXd, Eigen::MatrixXd > rescaleCoefficients(const Eigen::VectorXd &beta0, const Eigen::SparseMatrix< double > &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const bool intercept)
Rescales the coefficients using the given parameters.
Eigen::VectorXd mins(const Eigen::SparseMatrixBase< T > &x)
Computes the minimum value for each column of a sparse matrix.
Definition math.h:745
void computeScales(Eigen::VectorXd &x_scales, const T &x, const std::string &type)
Definition normalize.h:74
Eigen::VectorXd stdDevs(const Eigen::SparseMatrixBase< T > &x)
Computes the standard deviation for each column of a matrix.
Definition math.h:618
Eigen::VectorXd l2Norms(const Eigen::SparseMatrixBase< T > &x)
Computes the L2 (Euclidean) norms for each column of a sparse matrix.
Definition math.h:478
JitNormalization normalize(Eigen::MatrixBase< T > &x, Eigen::VectorXd &x_centers, Eigen::VectorXd &x_scales, const std::string &centering_type, const std::string &scaling_type, const bool modify_x)
Definition normalize.h:123
Eigen::VectorXd maxAbs(const Eigen::SparseMatrixBase< T > &x)
Computes the maximum absolute value for each column of a matrix.
Definition math.h:521
void computeCenters(Eigen::VectorXd &x_centers, const T &x, const std::string &type)
Definition normalize.h:33