slope 0.29.0
Loading...
Searching...
No Matches
clusters.cpp
1#include "clusters.h"
2#include "math.h"
3#include "utils.h"
4
5namespace slope {
6
7Clusters::Clusters(const Eigen::VectorXd& beta)
8{
9 update(beta);
10}
11
12bool
13Clusters::hasAllZeros() const
14{
15 return !c.empty() && c[0] == 0 && c.size() == 1 && c_ind.empty();
16}
17
18std::vector<int>&
19Clusters::getZeroIndices() const
20{
21 if (!zero_indices_valid) {
22 zero_indices.clear();
23
24 // Find indices not in c_ind (set difference)
25 for (int j = 0; j < p; ++j) {
26 if (std::find(c_ind.begin(), c_ind.end(), j) == c_ind.end()) {
27 zero_indices.push_back(j);
28 }
29 }
30
31 zero_indices_valid = true;
32 }
33
34 return zero_indices;
35}
36
37std::vector<int>::const_iterator
38Clusters::cbegin(const int i) const
39{
40 if (i < static_cast<int>(c.size())) {
41 return c_ind.cbegin() + c_ptr[i];
42 }
43
44 return getZeroIndices().cbegin();
45}
46
47std::vector<int>::const_iterator
48Clusters::cend(const int i) const
49{
50 if (i < static_cast<int>(c.size())) {
51 return c_ind.cbegin() + c_ptr[i + 1];
52 }
53
54 return getZeroIndices().cend();
55}
56
57std::vector<int>::iterator
58Clusters::begin(const int i)
59{
60 if (i < static_cast<int>(c.size())) {
61 return c_ind.begin() + c_ptr[i];
62 }
63
64 return getZeroIndices().begin();
65}
66
67std::vector<int>::iterator
68Clusters::end(const int i)
69{
70 if (i < static_cast<int>(c.size())) {
71 return c_ind.begin() + c_ptr[i + 1];
72 }
73
74 return getZeroIndices().end();
75}
76
77int
78Clusters::cluster_size(const int i) const
79{
80 if (i < static_cast<int>(c.size())) {
81 return c_ptr[i + 1] - c_ptr[i];
82 }
83
84 // For zero cluster, compute the size based on indices not in other clusters
85 if (i == static_cast<int>(c.size()) && p > static_cast<int>(c_ind.size())) {
86 return p - static_cast<int>(c_ind.size());
87 }
88
89 return 0; // Invalid cluster index
90}
91
92int
93Clusters::pointer(const int i) const
94{
95 if (i < static_cast<int>(c_ptr.size())) {
96 return c_ptr[i];
97 }
98
99 return c_ind.size(); // Return end of indices for virtual zero cluster
100}
101
102int
104{
105 // Special case: if we have a coefficient of 0 in c, that means all values are
106 // 0
107 if (hasAllZeros()) {
108 return 1;
109 }
110
111 // Regular case: count non-zero clusters, and add 1 if there are zero
112 // coefficients
113 return c.size() + (p > static_cast<int>(c_ind.size()) ? 1 : 0);
114}
115
116double
117Clusters::coeff(const int i) const
118{
119 if (i < static_cast<int>(c.size())) {
120 return c[i];
121 }
122
123 return 0.0; // Return 0 for the virtual zero cluster
124}
125
126void
127Clusters::setCoeff(const int i, const double x)
128{
129 c[i] = x;
130}
131
132std::vector<double>
134{
135 std::vector<double> result = c;
136
137 // Special case: if we have a coefficient of 0 in c and all values are zeros
138 // don't add an additional zero
139 if (hasAllZeros()) {
140 return result; // Just return {0.0}
141 }
142
143 // Add zero coefficient if there's a zero cluster
144 if (p > static_cast<int>(c_ind.size())) {
145 result.push_back(0.0);
146 }
147
148 return result;
149}
150
151std::vector<int>
153{
154 return c_ind;
155}
156
157std::vector<int>
159{
160 return c_ptr;
161}
162
163void
164Clusters::update(const int old_index, const int new_index, const double c_new)
165{
166 // Invalidate zero indices cache - the cluster structure is changing
167 zero_indices_valid = false;
168
169 auto c_old = coeff(old_index);
170
171 if (c_new != c_old) {
172 if (c_new == coeff(new_index)) {
173 merge(old_index, new_index);
174 } else {
175 setCoeff(old_index, c_new);
176 if (old_index != new_index) {
177 reorder(old_index, new_index);
178 }
179 }
180 }
181}
182
183void
184Clusters::update(const Eigen::VectorXd& beta)
185{
186 using sort_pair = std::pair<double, int>;
187
188 p = beta.size();
189
190 c.clear();
191 c_ind.clear();
192 c_ptr.clear();
193 signs.clear();
194
195 // Invalidate zero indices cache
196 zero_indices_valid = false;
197
198 std::vector<sort_pair> sorted;
199 sorted.reserve(p);
200 signs.reserve(p);
201
202 for (int i = 0; i < beta.size(); ++i) {
203 signs.emplace_back(sign(beta(i)));
204 double abs_val = std::abs(beta(i));
205 if (abs_val > 0) {
206 sorted.emplace_back(abs_val, i);
207 }
208 }
209
210 // Special case: all values are zero
211 if (sorted.empty() && p > 0) {
212 c.push_back(0.0); // Add a single zero coefficient
213 c_ptr.push_back(0);
214 c_ptr.push_back(0);
215
216 return;
217 }
218
219 std::sort(sorted.begin(), sorted.end(), std::greater<sort_pair>());
220
221 c_ind.reserve(sorted.size());
222 for (const auto& sorted_i : sorted) {
223 c_ind.emplace_back(sorted_i.second);
224 }
225
226 // Extract unique coefficients while preserving order
227 std::vector<sort_pair> sorted_unique;
228 sorted_unique.reserve(sorted.size()); // At most, all values could be unique
229
230 for (auto it = sorted.begin(); it != sorted.end();) {
231 const double current_value = it->first;
232 sorted_unique.emplace_back(*it);
233 it = std::find_if(it, sorted.end(), [current_value](const sort_pair& elem) {
234 return elem.first != current_value;
235 });
236 }
237
238 c.reserve(sorted_unique.size());
239 for (const auto& sorted_unique_i : sorted_unique) {
240 c.emplace_back(sorted_unique_i.first);
241 }
242
243 c_ptr.reserve(c.size() + 1);
244 c_ptr.emplace_back(0);
245
246 auto range_start = sorted.begin();
247 for (const auto& c_i : c) {
248 auto range_end =
249 std::find_if(range_start, sorted.end(), [&c_i](const sort_pair& x) {
250 return x.first != c_i;
251 });
252 c_ptr.emplace_back(std::distance(sorted.begin(), range_end));
253 range_start = range_end;
254 }
255}
256
257void
258Clusters::reorder(const int old_index, const int new_index)
259{
260 auto c_size = cluster_size(old_index);
261
262 // update coefficients
263 move_elements(c, old_index, new_index, 1);
264
265 // update indices
266 move_elements(c_ind, pointer(old_index), pointer(new_index), c_size);
267
268 // update pointers
269 if (new_index < old_index) {
270 move_elements(c_ptr, old_index + 1, new_index + 1, 1);
271
272 std::for_each(c_ptr.begin() + new_index + 1,
273 c_ptr.begin() + old_index + 2,
274 [c_size](int& x) { x += c_size; });
275
276 c_ptr[new_index + 1] = c_ptr[new_index] + c_size;
277 } else {
278 move_elements(c_ptr, old_index, new_index, 1);
279
280 std::for_each(c_ptr.begin() + old_index,
281 c_ptr.begin() + new_index,
282 [c_size](int& x) { x -= c_size; });
283 c_ptr[new_index] = c_ptr[new_index + 1] - c_size;
284 }
285}
286
287void
288Clusters::merge(const int old_index, const int new_index)
289{
290 auto c_size = cluster_size(old_index);
291
292 // update coefficients
293 c.erase(c.cbegin() + old_index);
294
295 // update indices
296 move_elements(c_ind, pointer(old_index), pointer(new_index), c_size);
297
298 // update pointers
299 if (new_index < old_index) {
300 std::for_each(c_ptr.begin() + new_index + 1,
301 c_ptr.begin() + old_index + 1,
302 [c_size](int& x) { x += c_size; });
303 } else {
304 std::for_each(c_ptr.begin() + old_index + 1,
305 c_ptr.begin() + new_index + 1,
306 [c_size](int& x) { x -= c_size; });
307 }
308
309 c_ptr.erase(c_ptr.begin() + old_index + 1);
310}
311
312std::vector<std::vector<int>>
314{
315 std::vector<std::vector<int>> clusters;
316 clusters.reserve(n_clusters());
317
318 // Special case for all zeros vector
319 if (hasAllZeros()) {
320 std::vector<int> zero_cluster;
321 for (int i = 0; i < p; ++i) {
322 zero_cluster.push_back(i);
323 }
324 clusters.push_back(zero_cluster);
325 return clusters;
326 }
327
328 for (int i = 0; i < n_clusters(); ++i) {
329 clusters.emplace_back(cbegin(i), cend(i));
330 }
331
332 return clusters;
333}
334
335Eigen::SparseMatrix<int>
337{
338 std::vector<Eigen::Triplet<int>> triplets;
339 triplets.reserve(p);
340
341 int n_cols = 0;
342
343 for (int k = 0; k < n_clusters(); ++k) {
344 if (coeff(k) == 0) {
345 // Skip the zero cluster
346 break;
347 }
348
349 n_cols++;
350
351 for (auto it = cbegin(k); it != cend(k); ++it) {
352 int ind = *it;
353 int s = signs[ind];
354 triplets.emplace_back(ind, k, s);
355 }
356 }
357
358 Eigen::SparseMatrix<int> out(p, n_cols);
359 out.setFromTriplets(triplets.begin(), triplets.end());
360
361 return out;
362}
363
364Eigen::SparseMatrix<int>
365patternMatrix(const Eigen::VectorXd& beta)
366{
367 Clusters clusters(beta);
368 return clusters.patternMatrix();
369}
370
371} // namespace slope
Representation of the clusters in SLOPE.
Definition clusters.h:18
void update(const int old_index, const int new_index, const double c_new)
Updates the cluster structure when an index is changed.
Definition clusters.cpp:164
double coeff(const int i) const
Returns the coefficient of the cluster with the given index.
Definition clusters.cpp:117
Eigen::SparseMatrix< int > patternMatrix() const
Returns the cluster pattern as a sparse matrix.
Definition clusters.cpp:336
int cluster_size(const int i) const
Returns the size of the cluster with the given index.
Definition clusters.cpp:78
int pointer(const int i) const
Returns the pointer of the cluster with the given index.
Definition clusters.cpp:93
std::vector< std::vector< int > > getClusters() const
Returns the clusters as a vector of vectors.
Definition clusters.cpp:313
std::vector< int > indices() const
Returns a vector containing the indices of all clusters.
Definition clusters.cpp:152
void setCoeff(const int i, const double x)
Sets the coefficient of the cluster with the given index.
Definition clusters.cpp:127
int n_clusters() const
Returns the number of clusters.
Definition clusters.cpp:103
std::vector< int >::iterator begin(const int i)
Returns an iterator pointing to the beginning of the cluster with the given index.
Definition clusters.cpp:58
Clusters()=default
Constructs an Clusters object.
std::vector< int >::const_iterator cend(const int i) const
Returns a constant iterator pointing to the end of the cluster with the given index.
Definition clusters.cpp:48
std::vector< int > pointers() const
Returns a vector containing the pointers of all clusters.
Definition clusters.cpp:158
std::vector< int >::iterator end(const int i)
Returns an iterator pointing to the end of the cluster with the given index.
Definition clusters.cpp:68
std::vector< double > coeffs() const
Returns a vector containing the coefficients of all clusters.
Definition clusters.cpp:133
std::vector< int >::const_iterator cbegin(const int i) const
Returns a constant iterator pointing to the beginning of the cluster with the given index.
Definition clusters.cpp:38
The declaration of the Clusters class.
Mathematical support functions for the slope package.
Namespace containing SLOPE regression implementation.
Definition clusters.cpp:5
void move_elements(std::vector< T > &v, const int from, const int to, const int size)
Definition utils.h:176
int sign(T val)
Returns the sign of a given value.
Definition math.h:31
Eigen::SparseMatrix< int > patternMatrix(const Eigen::VectorXd &beta)
Returns the cluster pattern as a sparse matrix.
Definition clusters.cpp:365
Various utility functions.