slope 0.29.0
Loading...
Searching...
No Matches
multinomial.cpp
1#include "multinomial.h"
2#include "../constants.h"
3#include "../math.h"
4#include "../utils.h"
5#include <unordered_set>
6
7namespace slope {
8
9double
10Multinomial::loss(const Eigen::MatrixXd& eta, const Eigen::MatrixXd& y)
11{
12 int n = y.rows();
13
14 double out = logSumExp(eta).mean();
15
16 assert(out == out && "Loss is NaN");
17
18 out -= (y.array() * eta.array()).sum() / n;
19
20 return out;
21}
22
23double
24Multinomial::dual(const Eigen::MatrixXd& theta,
25 const Eigen::MatrixXd& y,
26 const Eigen::VectorXd&)
27{
28 const Eigen::MatrixXd r = theta + y;
29
30 return -(r.array() * r.array().max(constants::P_MIN).log()).sum() / y.rows();
31}
32
33Eigen::MatrixXd
34Multinomial::residual(const Eigen::MatrixXd& eta, const Eigen::MatrixXd& y)
35{
36 return softmax(eta) - y;
37}
38
39Eigen::MatrixXd
40Multinomial::preprocessResponse(const Eigen::MatrixXd& y)
41{
42 const int n = y.rows();
43 int m = y.cols();
44
45 Eigen::MatrixXd result;
46
47 if (m == 1) {
48 // Y is a column vector, expect integers representing classes
49 m = y.array().maxCoeff() + 1; // Assuming classes are 0-based
50
51 if (m == 1) {
52 throw std::invalid_argument("Only one class found in response");
53 }
54
55 result = Eigen::MatrixXd::Zero(n, m);
56
57 for (int i = 0; i < n; i++) {
58 int class_label = static_cast<int>(y(i, 0));
59 if (class_label < 0) {
60 throw std::invalid_argument(
61 "Class labels must be consecutive integers starting from 0");
62 }
63
64 result(i, class_label) = 1.0;
65 }
66 } else {
67 // Y is a matrix, expect one-hot encoding
68 auto y_unique = unique(y);
69
70 if (y_unique.size() > 2) {
71 throw std::invalid_argument(
72 "Expected binary labels (0/1) but found more than two unique values");
73 }
74
75 for (const auto& val : y_unique) {
76 if (val != 0.0 && val != 1.0) {
77 throw std::invalid_argument(
78 "Expected binary labels with values 0 and 1 only");
79 }
80 }
81
82 result = y;
83 }
84
85 return result;
86}
87
88void
90 Eigen::VectorXd&,
91 const Eigen::VectorXd&,
92 const Eigen::VectorXd&)
93{
94 throw std::runtime_error("Multinomial loss does not currently support IRLS");
95}
96
97Eigen::MatrixXd
98Multinomial::link(const Eigen::MatrixXd& mu)
99{
100 return mu.unaryExpr([](const double& x) {
101 return logit(std::clamp(x, constants::P_MIN, constants::P_MAX));
102 });
103}
104
105Eigen::MatrixXd
106Multinomial::inverseLink(const Eigen::MatrixXd& eta)
107{
108 return softmax(eta);
109}
110
111Eigen::MatrixXd
112Multinomial::predict(const Eigen::MatrixXd& eta)
113{
114 Eigen::MatrixXd prob = inverseLink(eta);
115
116 // Find the class with the highest probability
117 Eigen::VectorXd out(eta.rows());
118
119 for (int i = 0; i < eta.rows(); i++) {
120 out(i) = whichMax(prob.row(i));
121 }
122
123 return out;
124}
125
126// TODO: Consider adjusting the coefficients somehow.
127
128} // namespace slope
double dual(const Eigen::MatrixXd &theta, const Eigen::MatrixXd &y, const Eigen::VectorXd &w)
Calculates the dual for the multinomial loss function.
void updateWeightsAndWorkingResponse(Eigen::VectorXd &w, Eigen::VectorXd &z, const Eigen::VectorXd &eta, const Eigen::VectorXd &y)
Updates the weights and working response for the multinomial loss function. Currently not implemented...
Eigen::MatrixXd inverseLink(const Eigen::MatrixXd &eta)
The inverse link function, also known as the mean function.
Eigen::MatrixXd preprocessResponse(const Eigen::MatrixXd &y)
Preprocesses the response for the Multinomial model.
Eigen::MatrixXd predict(const Eigen::MatrixXd &eta)
Return predicted response, which is an integer class label based on the predicted probabilities.
Eigen::MatrixXd link(const Eigen::MatrixXd &mu)
The link function.
Eigen::MatrixXd residual(const Eigen::MatrixXd &eta, const Eigen::MatrixXd &y)
Calculates the residual for the multinomial loss function.
double loss(const Eigen::MatrixXd &eta, const Eigen::MatrixXd &y)
Calculates the loss for the multinomial loss function.
Multinomial loss function implementation for SLOPE algorithm.
constexpr double P_MIN
Minimum allowed probability value to avoid numerical underflow.
Definition constants.h:30
constexpr double P_MAX
Maximum allowed probability value to avoid numerical issues near 1.0.
Definition constants.h:33
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
std::unordered_set< double > unique(const Eigen::MatrixXd &x)
Create a set of unique values from an Eigen matrix.
Definition utils.h:272
Eigen::VectorXd logSumExp(const Eigen::MatrixXd &a)
Definition math.cpp:7
Eigen::MatrixXd softmax(const Eigen::MatrixXd &a)
Definition math.cpp:17
T logit(const T &x)
Definition math.h:83
int whichMax(const T &x)
Returns the index of the maximum element in a container.
Definition math.h:365