slope 0.29.0
Loading...
Searching...
No Matches
math.h
Go to the documentation of this file.
1
6#pragma once
7
8#include "jit_normalization.h"
9#include "threads.h"
10#include <Eigen/Core>
11#include <Eigen/SparseCore>
12#include <numeric>
13#include <vector>
14
15namespace slope {
16
29template<typename T>
30int
31sign(T val)
32{
33 return (T(0) < val) - (val < T(0));
34}
35
44template<typename T>
45Eigen::ArrayXd
46cumSum(const T& x)
47{
48 std::vector<double> cum_sum(x.size());
49 std::partial_sum(x.begin(), x.end(), cum_sum.begin(), std::plus<double>());
50
51 Eigen::Map<Eigen::ArrayXd> out(cum_sum.data(), cum_sum.size());
52
53 return out;
54}
55
65template<typename T>
66T
67sigmoid(const T& x)
68{
69 return 1.0 / (1.0 + std::exp(-x));
70}
71
81template<typename T>
82T
83logit(const T& x)
84{
85 assert(x > 0 && x < 1 && "Input must be in (0, 1)");
86
87 return std::log(x) - std::log1p(-x);
88}
89
99template<typename T>
100T
101clamp(const T& x, const T& lo, const T& hi)
102{
103 return x < lo ? lo : x > hi ? hi : x;
104}
105
112Eigen::VectorXd
113logSumExp(const Eigen::MatrixXd& a);
114
123Eigen::MatrixXd
124softmax(const Eigen::MatrixXd& x);
125
140template<typename T>
141Eigen::MatrixXd
143 const std::vector<int>& active_set,
144 const Eigen::VectorXd& beta0,
145 const Eigen::VectorXd& beta,
146 const Eigen::VectorXd& x_centers,
147 const Eigen::VectorXd& x_scales,
148 const JitNormalization jit_normalization,
149 const bool intercept)
150{
151 int n = x.rows();
152 int p = x.cols();
153 int m = beta0.size();
154
155 Eigen::MatrixXd eta = Eigen::MatrixXd::Zero(n, m);
156
157#ifdef _OPENMP
158 bool large_problem = active_set.size() > 100 && n * active_set.size() > 1e7;
159#pragma omp parallel num_threads(Threads::get()) if (large_problem)
160#endif
161 {
162 Eigen::MatrixXd eta_local = Eigen::MatrixXd::Zero(n, m);
163
164#ifdef _OPENMP
165#pragma omp for nowait
166#endif
167 for (size_t i = 0; i < active_set.size(); ++i) {
168 int ind = active_set[i];
169 auto [k, j] = std::div(ind, p);
170
171 switch (jit_normalization) {
173 eta_local.col(k) += x.col(j) * beta(ind) / x_scales(j);
174 eta_local.col(k).array() -= beta(ind) * x_centers(j) / x_scales(j);
175 break;
176
178 eta_local.col(k) += x.col(j) * beta(ind);
179 eta_local.col(k).array() -= beta(ind) * x_centers(j);
180 break;
181
183 eta_local.col(k) += x.col(j) * beta(ind) / x_scales(j);
184 break;
185
187 eta_local.col(k) += x.col(j) * beta(ind);
188 break;
189 }
190 }
191
192#ifdef _OPENMP
193#pragma omp critical
194#endif
195 {
196 eta += eta_local;
197 }
198 }
199
200 if (intercept) {
201 eta.rowwise() += beta0.transpose();
202 }
203
204 return eta;
205}
206
221template<typename T>
222void
223updateGradient(Eigen::VectorXd& gradient,
224 const T& x,
225 const Eigen::MatrixXd& residual,
226 const std::vector<int>& active_set,
227 const Eigen::VectorXd& x_centers,
228 const Eigen::VectorXd& x_scales,
229 const Eigen::VectorXd& w,
230 const JitNormalization jit_normalization)
231{
232 const int n = x.rows();
233 const int p = x.cols();
234 const int m = residual.cols();
235
236 assert(gradient.size() == p * m &&
237 "Gradient matrix has incorrect dimensions");
238
239 Eigen::MatrixXd weighted_residual(n, m);
240 Eigen::ArrayXd wr_sums(m);
241
242#ifdef _OPENMP
243 bool large_problem = active_set.size() > 100 && n * active_set.size() > 1e5;
244#pragma omp parallel for num_threads(Threads::get()) if (large_problem)
245#endif
246 for (int k = 0; k < m; ++k) {
247 weighted_residual.col(k) = residual.col(k).cwiseProduct(w);
248 wr_sums(k) = weighted_residual.col(k).sum();
249 }
250
251#ifdef _OPENMP
252#pragma omp parallel for num_threads(Threads::get()) if (large_problem)
253#endif
254 for (size_t i = 0; i < active_set.size(); ++i) {
255 int ind = active_set[i];
256 auto [k, j] = std::div(ind, p);
257
258 switch (jit_normalization) {
260 gradient(ind) =
261 (x.col(j).dot(weighted_residual.col(k)) - x_centers(j) * wr_sums(k)) /
262 (x_scales(j) * n);
263 break;
265 gradient(ind) =
266 (x.col(j).dot(weighted_residual.col(k)) - x_centers(j) * wr_sums(k)) /
267 n;
268 break;
270 gradient(ind) =
271 x.col(j).dot(weighted_residual.col(k)) / (x_scales(j) * n);
272 break;
274 gradient(ind) = x.col(j).dot(weighted_residual.col(k)) / n;
275 break;
276 }
277 }
278}
279
293template<typename T>
294void
295offsetGradient(Eigen::VectorXd& gradient,
296 const T& x,
297 const Eigen::VectorXd& offset,
298 const std::vector<int>& active_set,
299 const Eigen::VectorXd& x_centers,
300 const Eigen::VectorXd& x_scales,
301 const JitNormalization jit_normalization)
302{
303 const int n = x.rows();
304 const int p = x.cols();
305
306 for (size_t i = 0; i < active_set.size(); ++i) {
307 int ind = active_set[i];
308 auto [k, j] = std::div(ind, p);
309
310 switch (jit_normalization) {
312 gradient(ind) -=
313 offset(k) * (x.col(j).sum() / n - x_centers(j)) / x_scales(j);
314 break;
316 gradient(ind) -= offset(k) * (x.col(j).sum() / n - x_centers(j));
317 break;
319 gradient(ind) -= offset(k) * x.col(j).sum() / (n * x_scales(j));
320 break;
322 gradient(ind) -= offset(k) * x.col(j).sum() / n;
323 break;
324 }
325 }
326}
327
336std::vector<int>
337setUnion(const std::vector<int>& a, const std::vector<int>& b);
338
349std::vector<int>
350setDiff(const std::vector<int>& a, const std::vector<int>& b);
351
363template<typename T>
364int
365whichMax(const T& x)
366{
367 return std::distance(x.begin(), std::max_element(x.begin(), x.end()));
368}
369
381template<typename T>
382int
383whichMin(const T& x)
384{
385 return std::distance(x.begin(), std::min_element(x.begin(), x.end()));
386}
387
401template<typename T, typename Comparator>
402int
403whichBest(const T& x, const Comparator& comp)
404{
405 return std::distance(x.begin(), std::max_element(x.begin(), x.end(), comp));
406}
407
422Eigen::ArrayXd
423geomSpace(const double start, const double end, const int n);
424
437template<typename T>
438Eigen::VectorXd
439l1Norms(const T& x)
440{
441 const int p = x.cols();
442
443 Eigen::VectorXd out(p);
444
445 for (int j = 0; j < p; ++j) {
446 out(j) = x.col(j).cwiseAbs().sum();
447 }
448
449 return out;
450}
451
462Eigen::VectorXd
463l2Norms(const Eigen::SparseMatrix<double>& x);
464
475Eigen::VectorXd
476l2Norms(const Eigen::MatrixXd& x);
477
489Eigen::VectorXd
490maxAbs(const Eigen::SparseMatrix<double>& x);
491
504Eigen::VectorXd
505maxAbs(const Eigen::MatrixXd& x);
506
518Eigen::VectorXd
519means(const Eigen::SparseMatrix<double>& x);
520
532Eigen::VectorXd
533means(const Eigen::MatrixXd& x);
534
547Eigen::VectorXd
548stdDevs(const Eigen::SparseMatrix<double>& x);
549
562Eigen::VectorXd
563stdDevs(const Eigen::MatrixXd& x);
564
575Eigen::VectorXd
576ranges(const Eigen::SparseMatrix<double>& x);
577
591Eigen::VectorXd
592ranges(const Eigen::MatrixXd& x);
593
604Eigen::VectorXd
605mins(const Eigen::SparseMatrix<double>& x);
606
620Eigen::VectorXd
621mins(const Eigen::MatrixXd& x);
622
623} // namespace slope
Enums to control predictor standardization behavior.
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
T clamp(const T &x, const T &lo, const T &hi)
Definition math.h:101
Eigen::VectorXd l1Norms(const T &x)
Computes the L1 (Manhattan) norms for each column of a matrix.
Definition math.h:439
int sign(T val)
Returns the sign of a given value.
Definition math.h:31
Eigen::VectorXd maxAbs(const Eigen::SparseMatrix< double > &x)
Computes the maximum absolute value for each column of a matrix.
Definition math.cpp:126
Eigen::VectorXd means(const Eigen::SparseMatrix< double > &x)
Computes the arithmetic mean for each column of a sparse matrix.
Definition math.cpp:76
int whichMin(const T &x)
Returns the index of the minimum element in a container.
Definition math.h:383
Eigen::ArrayXd cumSum(const T &x)
Definition math.h:46
T sigmoid(const T &x)
Definition math.h:67
JitNormalization
Enums to control predictor standardization behavior.
@ None
No JIT normalization.
Eigen::VectorXd logSumExp(const Eigen::MatrixXd &a)
Definition math.cpp:7
Eigen::MatrixXd softmax(const Eigen::MatrixXd &a)
Definition math.cpp:17
std::vector< int > setDiff(const std::vector< int > &a, const std::vector< int > &b)
Computes the set difference of two sorted integer vectors.
Definition math.cpp:36
Eigen::VectorXd stdDevs(const Eigen::SparseMatrix< double > &x)
Computes the standard deviation for each column of a matrix.
Definition math.cpp:180
Eigen::MatrixXd linearPredictor(const T &x, const std::vector< int > &active_set, const Eigen::VectorXd &beta0, const Eigen::VectorXd &beta, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization, const bool intercept)
Definition math.h:142
std::vector< int > setUnion(const std::vector< int > &a, const std::vector< int > &b)
Computes the union of two sorted integer vectors.
Definition math.cpp:26
int whichBest(const T &x, const Comparator &comp)
Returns the index of the minimum element in a container.
Definition math.h:403
Eigen::VectorXd ranges(const Eigen::SparseMatrix< double > &x)
Computes the range (max - min) for each column of a matrix.
Definition math.cpp:97
void updateGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::MatrixXd &residual, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const Eigen::VectorXd &w, const JitNormalization jit_normalization)
Definition math.h:223
void offsetGradient(Eigen::VectorXd &gradient, const T &x, const Eigen::VectorXd &offset, const std::vector< int > &active_set, const Eigen::VectorXd &x_centers, const Eigen::VectorXd &x_scales, const JitNormalization jit_normalization)
Definition math.h:295
T logit(const T &x)
Definition math.h:83
Eigen::VectorXd l2Norms(const Eigen::SparseMatrix< double > &x)
Computes the L2 (Euclidean) norms for each column of a sparse matrix.
Definition math.cpp:56
Eigen::VectorXd mins(const Eigen::SparseMatrix< double > &x)
Computes the minimum value for each column of a sparse matrix.
Definition math.cpp:153
Eigen::ArrayXd geomSpace(const double start, const double end, const int n)
Creates an array of n numbers in geometric progression from start to end.
Definition math.cpp:46
int whichMax(const T &x)
Returns the index of the maximum element in a container.
Definition math.h:365
Thread management for parallel computations.