]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/histogram/detail/fill_n.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / boost / histogram / detail / fill_n.hpp
CommitLineData
92f5a8d4
TL
1// Copyright 2019 Hans Dembinski
2//
3// Distributed under the Boost Software License, Version 1.0.
4// (See accompanying file LICENSE_1_0.txt
5// or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7#ifndef BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
8#define BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
9
10#include <algorithm>
92f5a8d4
TL
11#include <boost/histogram/axis/option.hpp>
12#include <boost/histogram/axis/traits.hpp>
13#include <boost/histogram/detail/axes.hpp>
14#include <boost/histogram/detail/detect.hpp>
15#include <boost/histogram/detail/fill.hpp>
16#include <boost/histogram/detail/linearize.hpp>
20effc67 17#include <boost/histogram/detail/nonmember_container_access.hpp>
92f5a8d4
TL
18#include <boost/histogram/detail/optional_index.hpp>
19#include <boost/histogram/detail/span.hpp>
20#include <boost/histogram/detail/static_if.hpp>
21#include <boost/histogram/fwd.hpp>
22#include <boost/mp11/algorithm.hpp>
23#include <boost/mp11/bind.hpp>
24#include <boost/mp11/utility.hpp>
25#include <boost/throw_exception.hpp>
20effc67
TL
26#include <boost/variant2/variant.hpp>
27#include <cassert>
28#include <initializer_list>
92f5a8d4
TL
29#include <stdexcept>
30#include <type_traits>
31#include <utility>
32
33namespace boost {
34namespace histogram {
35namespace detail {
36
37namespace dtl = boost::histogram::detail;
38
39template <class Axes, class T>
40using is_convertible_to_any_value_type =
41 mp11::mp_any_of_q<value_types<Axes>, mp11::mp_bind_front<std::is_convertible, T>>;
42
92f5a8d4
TL
43template <class T>
44auto to_ptr_size(const T& x) {
45 return static_if<std::is_scalar<T>>(
46 [](const auto& x) { return std::make_pair(&x, static_cast<std::size_t>(0)); },
47 [](const auto& x) { return std::make_pair(dtl::data(x), dtl::size(x)); }, x);
48}
49
50template <class F, class V>
51decltype(auto) maybe_visit(F&& f, V&& v) {
52 return static_if<is_variant<std::decay_t<V>>>(
53 [](auto&& f, auto&& v) {
54 return variant2::visit(std::forward<F>(f), std::forward<V>(v));
55 },
56 [](auto&& f, auto&& v) { return std::forward<F>(f)(std::forward<V>(v)); },
57 std::forward<F>(f), std::forward<V>(v));
58}
59
60template <class Index, class Axis, class IsGrowing>
61struct index_visitor {
62 using index_type = Index;
63 using pointer = index_type*;
64 using value_type = axis::traits::value_type<Axis>;
f67539c2 65 using Opt = axis::traits::get_options<Axis>;
92f5a8d4
TL
66
67 Axis& axis_;
68 const std::size_t stride_, start_, size_; // start and size of value collection
69 const pointer begin_;
70 axis::index_type* shift_;
71
72 index_visitor(Axis& a, std::size_t& str, const std::size_t& sta, const std::size_t& si,
73 const pointer it, axis::index_type* shift)
74 : axis_(a), stride_(str), start_(sta), size_(si), begin_(it), shift_(shift) {}
75
76 template <class T>
77 void call_2(std::true_type, pointer it, const T& x) const {
78 // must use this code for all axes if one of them is growing
79 axis::index_type shift;
80 linearize_growth(*it, shift, stride_, axis_,
81 try_cast<value_type, std::invalid_argument>(x));
82 if (shift > 0) { // shift previous indices, because axis zero-point has changed
83 while (it != begin_) *--it += static_cast<std::size_t>(shift) * stride_;
84 *shift_ += shift;
85 }
86 }
87
88 template <class T>
89 void call_2(std::false_type, pointer it, const T& x) const {
90 // no axis is growing
91 linearize(*it, stride_, axis_, try_cast<value_type, std::invalid_argument>(x));
92 }
93
94 template <class T>
95 void call_1(std::false_type, const T& iterable) const {
96 // T is iterable; fill N values
97 const auto* tp = dtl::data(iterable) + start_;
98 for (auto it = begin_; it != begin_ + size_; ++it) call_2(IsGrowing{}, it, *tp++);
99 }
100
101 template <class T>
102 void call_1(std::true_type, const T& value) const {
103 // T is compatible value; fill single value N times
104 index_type idx{*begin_};
105 call_2(IsGrowing{}, &idx, value);
106 if (is_valid(idx)) {
107 const auto delta =
108 static_cast<std::intptr_t>(idx) - static_cast<std::intptr_t>(*begin_);
109 for (auto&& i : make_span(begin_, size_)) i += delta;
110 } else
111 std::fill(begin_, begin_ + size_, invalid_index);
112 }
113
114 template <class T>
115 void operator()(const T& iterable_or_value) const {
116 call_1(mp11::mp_bool<(std::is_convertible<T, value_type>::value ||
117 !is_iterable<T>::value)>{},
118 iterable_or_value);
119 }
120};
121
122template <class Index, class S, class Axes, class T>
123void fill_n_indices(Index* indices, const std::size_t start, const std::size_t size,
124 const std::size_t offset, S& storage, Axes& axes, const T* viter) {
125 axis::index_type extents[buffer_size<Axes>::value];
126 axis::index_type shifts[buffer_size<Axes>::value];
127 for_each_axis(axes, [eit = extents, sit = shifts](const auto& a) mutable {
128 *sit++ = 0;
129 *eit++ = axis::traits::extent(a);
130 }); // LCOV_EXCL_LINE: gcc-8 is missing this line for no reason
131
132 // offset must be zero for growing axes
133 using IsGrowing = has_growing_axis<Axes>;
134 std::fill(indices, indices + size, IsGrowing::value ? 0 : offset);
135 for_each_axis(axes, [&, stride = static_cast<std::size_t>(1),
136 pshift = shifts](auto& axis) mutable {
137 using Axis = std::decay_t<decltype(axis)>;
138 maybe_visit(
139 index_visitor<Index, Axis, IsGrowing>{axis, stride, start, size, indices, pshift},
140 *viter++);
141 stride *= static_cast<std::size_t>(axis::traits::extent(axis));
142 ++pshift;
143 });
144
145 bool update_needed = false;
146 for_each_axis(axes, [&update_needed, eit = extents](const auto& a) mutable {
147 update_needed |= *eit++ != axis::traits::extent(a);
148 });
149 if (update_needed) {
150 storage_grower<Axes> g(axes);
151 g.from_extents(extents);
152 g.apply(storage, shifts);
153 }
154}
155
156template <class S, class Index, class... Ts>
157void fill_n_storage(S& s, const Index idx, Ts&&... p) noexcept {
158 if (is_valid(idx)) {
20effc67 159 assert(idx < s.size());
f67539c2 160 fill_storage_element(s[idx], *p.first...);
92f5a8d4 161 }
20effc67
TL
162 // operator folding emulation
163 (void)std::initializer_list<int>{(p.second ? (++p.first, 0) : 0)...};
92f5a8d4
TL
164}
165
166template <class S, class Index, class T, class... Ts>
167void fill_n_storage(S& s, const Index idx, weight_type<T>&& w, Ts&&... ps) noexcept {
168 if (is_valid(idx)) {
20effc67 169 assert(idx < s.size());
f67539c2 170 fill_storage_element(s[idx], weight(*w.value.first), *ps.first...);
92f5a8d4
TL
171 }
172 if (w.value.second) ++w.value.first;
20effc67
TL
173 // operator folding emulation
174 (void)std::initializer_list<int>{(ps.second ? (++ps.first, 0) : 0)...};
92f5a8d4
TL
175}
176
177// general Nd treatment
178template <class Index, class S, class A, class T, class... Ts>
179void fill_n_nd(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
180 const T* values, Ts&&... ts) {
181 constexpr std::size_t buffer_size = 1ul << 14;
182 Index indices[buffer_size];
183
184 /*
185 Parallelization options.
186
187 A) Run the whole fill2 method in parallel, each thread fills its own buffer of
188 indices, synchronization (atomics) are needed to synchronize the incrementing of
189 the storage cells. This leads to a lot of congestion for small histograms.
190
191 B) Run only fill_n_indices in parallel, subsections of the indices buffer
192 can be filled by different threads. The final loop that fills the storage runs
193 in the main thread, this requires no synchronization for the storage, cells do
194 not need to support atomic operations.
195
196 C) Like B), then sort the indices in the main thread and fill the
197 storage in parallel, where each thread uses a disjunct set of indices. This
198 should create less congestion and requires no synchronization for the storage.
199
200 Note on C): Let's say we have an axis with 5 bins (with *flow to simplify).
201 Then after filling 10 values, converting to indices and sorting, the index
202 buffer may look like this: 0 0 0 1 2 2 2 4 4 5. Let's use two threads to fill
203 the storage. Still in the main thread, we compute an iterator to the middle of
204 the index buffer and move it to the right until the pointee changes. Now we have
205 two ranges which contain disjunct sets of indices. We pass these ranges to the
206 threads which then fill the storage. Since the threads by construction do not
207 compete to increment the same cell, no further synchronization is required.
208
209 In all cases, growing axes cannot be parallelized.
210 */
211
212 for (std::size_t start = 0; start < vsize; start += buffer_size) {
213 const std::size_t n = std::min(buffer_size, vsize - start);
214 // fill buffer of indices...
215 fill_n_indices(indices, start, n, offset, storage, axes, values);
216 // ...and fill corresponding storage cells
217 for (auto&& idx : make_span(indices, n))
218 fill_n_storage(storage, idx, std::forward<Ts>(ts)...);
219 }
220}
221
222template <class S, class... As, class T, class... Us>
223void fill_n_1(const std::size_t offset, S& storage, std::tuple<As...>& axes,
224 const std::size_t vsize, const T* values, Us&&... us) {
225 using index_type =
226 mp11::mp_if<has_non_inclusive_axis<std::tuple<As...>>, optional_index, std::size_t>;
227 fill_n_nd<index_type>(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
228}
229
230template <class S, class A, class T, class... Us>
231void fill_n_1(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
232 const T* values, Us&&... us) {
233 bool all_inclusive = true;
234 for_each_axis(axes,
235 [&](const auto& ax) { all_inclusive &= axis::traits::inclusive(ax); });
236 if (axes_rank(axes) == 1) {
237 axis::visit(
238 [&](auto& ax) {
239 std::tuple<decltype(ax)> axes{ax};
240 fill_n_1(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
241 },
242 axes[0]);
243 } else {
244 if (all_inclusive)
245 fill_n_nd<std::size_t>(offset, storage, axes, vsize, values,
246 std::forward<Us>(us)...);
247 else
248 fill_n_nd<optional_index>(offset, storage, axes, vsize, values,
249 std::forward<Us>(us)...);
250 }
251}
252
253template <class A, class T, std::size_t N>
254std::size_t get_total_size(const A& axes, const dtl::span<const T, N>& values) {
255 // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
256 // - span<CT, N>: for any histogram, N == rank
257 // - span<V<T, CT>, N>: for any histogram, N == rank
20effc67 258 assert(axes_rank(axes) == values.size());
92f5a8d4
TL
259 constexpr auto unset = static_cast<std::size_t>(-1);
260 std::size_t size = unset;
261 for_each_axis(axes, [&size, vit = values.begin()](const auto& ax) mutable {
262 using AV = axis::traits::value_type<std::decay_t<decltype(ax)>>;
263 maybe_visit(
264 [&size](const auto& v) {
265 // v is either convertible to value or a sequence of values
266 using V = std::remove_const_t<std::remove_reference_t<decltype(v)>>;
267 static_if_c<(std::is_convertible<decltype(v), AV>::value ||
268 !is_iterable<V>::value)>(
269 [](const auto&) {},
270 [&size](const auto& v) {
271 const auto n = dtl::size(v);
272 // must repeat this here for msvc :(
273 constexpr auto unset = static_cast<std::size_t>(-1);
274 if (size == unset)
275 size = dtl::size(v);
276 else if (size != n)
277 BOOST_THROW_EXCEPTION(
278 std::invalid_argument("spans must have compatible lengths"));
279 },
280 v);
281 },
282 *vit++);
283 });
284 // if all arguments are not iterables, return size of 1
285 return size == unset ? 1 : size;
286}
287
288inline void fill_n_check_extra_args(std::size_t) noexcept {}
289
290template <class T, class... Ts>
291void fill_n_check_extra_args(std::size_t size, T&& x, Ts&&... ts) {
292 // sequences must have same lengths, but sequences of length 0 are broadcast
293 if (x.second != 0 && x.second != size)
294 BOOST_THROW_EXCEPTION(std::invalid_argument("spans must have compatible lengths"));
295 fill_n_check_extra_args(size, std::forward<Ts>(ts)...);
296}
297
298template <class T, class... Ts>
299void fill_n_check_extra_args(std::size_t size, weight_type<T>&& w, Ts&&... ts) {
300 fill_n_check_extra_args(size, w.value, std::forward<Ts>(ts)...);
301}
302
303template <class S, class A, class T, std::size_t N, class... Us>
304void fill_n(std::true_type, const std::size_t offset, S& storage, A& axes,
305 const dtl::span<const T, N> values, Us&&... us) {
306 // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
307 // - span<T, N>: only valid for 1D histogram, N > 1 allowed
308 // - span<CT, N>: for any histogram, N == rank
309 // - span<V<T, CT>, N>: for any histogram, N == rank
310 static_if<is_convertible_to_any_value_type<A, T>>(
311 [&](const auto& values, auto&&... us) {
312 // T matches one of the axis value types, must be 1D special case
313 if (axes_rank(axes) != 1)
314 BOOST_THROW_EXCEPTION(
315 std::invalid_argument("number of arguments must match histogram rank"));
316 fill_n_check_extra_args(values.size(), std::forward<Us>(us)...);
317 fill_n_1(offset, storage, axes, values.size(), &values, std::forward<Us>(us)...);
318 },
319 [&](const auto& values, auto&&... us) {
320 // generic ND case
321 if (axes_rank(axes) != values.size())
322 BOOST_THROW_EXCEPTION(
323 std::invalid_argument("number of arguments must match histogram rank"));
324 const auto vsize = get_total_size(axes, values);
325 fill_n_check_extra_args(vsize, std::forward<Us>(us)...);
326 fill_n_1(offset, storage, axes, vsize, values.data(), std::forward<Us>(us)...);
327 },
328 values, std::forward<Us>(us)...);
329}
330
331// empty implementation for bad arguments to stop compiler from showing internals
332template <class... Ts>
333void fill_n(std::false_type, Ts...) {}
334
335} // namespace detail
336} // namespace histogram
337} // namespace boost
338
339#endif // BOOST_HISTOGRAM_DETAIL_FILL_N_HPP