]>
Commit | Line | Data |
---|---|---|
92f5a8d4 TL |
1 | // Copyright 2019 Hans Dembinski |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. | |
4 | // (See accompanying file LICENSE_1_0.txt | |
5 | // or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
7 | #ifndef BOOST_HISTOGRAM_DETAIL_FILL_N_HPP | |
8 | #define BOOST_HISTOGRAM_DETAIL_FILL_N_HPP | |
9 | ||
10 | #include <algorithm> | |
92f5a8d4 TL |
11 | #include <boost/histogram/axis/option.hpp> |
12 | #include <boost/histogram/axis/traits.hpp> | |
13 | #include <boost/histogram/detail/axes.hpp> | |
14 | #include <boost/histogram/detail/detect.hpp> | |
15 | #include <boost/histogram/detail/fill.hpp> | |
16 | #include <boost/histogram/detail/linearize.hpp> | |
20effc67 | 17 | #include <boost/histogram/detail/nonmember_container_access.hpp> |
92f5a8d4 TL |
18 | #include <boost/histogram/detail/optional_index.hpp> |
19 | #include <boost/histogram/detail/span.hpp> | |
20 | #include <boost/histogram/detail/static_if.hpp> | |
21 | #include <boost/histogram/fwd.hpp> | |
22 | #include <boost/mp11/algorithm.hpp> | |
23 | #include <boost/mp11/bind.hpp> | |
24 | #include <boost/mp11/utility.hpp> | |
25 | #include <boost/throw_exception.hpp> | |
20effc67 TL |
26 | #include <boost/variant2/variant.hpp> |
27 | #include <cassert> | |
28 | #include <initializer_list> | |
92f5a8d4 TL |
29 | #include <stdexcept> |
30 | #include <type_traits> | |
31 | #include <utility> | |
32 | ||
33 | namespace boost { | |
34 | namespace histogram { | |
35 | namespace detail { | |
36 | ||
37 | namespace dtl = boost::histogram::detail; | |
38 | ||
39 | template <class Axes, class T> | |
40 | using is_convertible_to_any_value_type = | |
41 | mp11::mp_any_of_q<value_types<Axes>, mp11::mp_bind_front<std::is_convertible, T>>; | |
42 | ||
92f5a8d4 TL |
43 | template <class T> |
44 | auto to_ptr_size(const T& x) { | |
45 | return static_if<std::is_scalar<T>>( | |
46 | [](const auto& x) { return std::make_pair(&x, static_cast<std::size_t>(0)); }, | |
47 | [](const auto& x) { return std::make_pair(dtl::data(x), dtl::size(x)); }, x); | |
48 | } | |
49 | ||
50 | template <class F, class V> | |
51 | decltype(auto) maybe_visit(F&& f, V&& v) { | |
52 | return static_if<is_variant<std::decay_t<V>>>( | |
53 | [](auto&& f, auto&& v) { | |
54 | return variant2::visit(std::forward<F>(f), std::forward<V>(v)); | |
55 | }, | |
56 | [](auto&& f, auto&& v) { return std::forward<F>(f)(std::forward<V>(v)); }, | |
57 | std::forward<F>(f), std::forward<V>(v)); | |
58 | } | |
59 | ||
60 | template <class Index, class Axis, class IsGrowing> | |
61 | struct index_visitor { | |
62 | using index_type = Index; | |
63 | using pointer = index_type*; | |
64 | using value_type = axis::traits::value_type<Axis>; | |
f67539c2 | 65 | using Opt = axis::traits::get_options<Axis>; |
92f5a8d4 TL |
66 | |
67 | Axis& axis_; | |
68 | const std::size_t stride_, start_, size_; // start and size of value collection | |
69 | const pointer begin_; | |
70 | axis::index_type* shift_; | |
71 | ||
72 | index_visitor(Axis& a, std::size_t& str, const std::size_t& sta, const std::size_t& si, | |
73 | const pointer it, axis::index_type* shift) | |
74 | : axis_(a), stride_(str), start_(sta), size_(si), begin_(it), shift_(shift) {} | |
75 | ||
76 | template <class T> | |
77 | void call_2(std::true_type, pointer it, const T& x) const { | |
78 | // must use this code for all axes if one of them is growing | |
79 | axis::index_type shift; | |
80 | linearize_growth(*it, shift, stride_, axis_, | |
81 | try_cast<value_type, std::invalid_argument>(x)); | |
82 | if (shift > 0) { // shift previous indices, because axis zero-point has changed | |
83 | while (it != begin_) *--it += static_cast<std::size_t>(shift) * stride_; | |
84 | *shift_ += shift; | |
85 | } | |
86 | } | |
87 | ||
88 | template <class T> | |
89 | void call_2(std::false_type, pointer it, const T& x) const { | |
90 | // no axis is growing | |
91 | linearize(*it, stride_, axis_, try_cast<value_type, std::invalid_argument>(x)); | |
92 | } | |
93 | ||
94 | template <class T> | |
95 | void call_1(std::false_type, const T& iterable) const { | |
96 | // T is iterable; fill N values | |
97 | const auto* tp = dtl::data(iterable) + start_; | |
98 | for (auto it = begin_; it != begin_ + size_; ++it) call_2(IsGrowing{}, it, *tp++); | |
99 | } | |
100 | ||
101 | template <class T> | |
102 | void call_1(std::true_type, const T& value) const { | |
103 | // T is compatible value; fill single value N times | |
104 | index_type idx{*begin_}; | |
105 | call_2(IsGrowing{}, &idx, value); | |
106 | if (is_valid(idx)) { | |
107 | const auto delta = | |
108 | static_cast<std::intptr_t>(idx) - static_cast<std::intptr_t>(*begin_); | |
109 | for (auto&& i : make_span(begin_, size_)) i += delta; | |
110 | } else | |
111 | std::fill(begin_, begin_ + size_, invalid_index); | |
112 | } | |
113 | ||
114 | template <class T> | |
115 | void operator()(const T& iterable_or_value) const { | |
116 | call_1(mp11::mp_bool<(std::is_convertible<T, value_type>::value || | |
117 | !is_iterable<T>::value)>{}, | |
118 | iterable_or_value); | |
119 | } | |
120 | }; | |
121 | ||
122 | template <class Index, class S, class Axes, class T> | |
123 | void fill_n_indices(Index* indices, const std::size_t start, const std::size_t size, | |
124 | const std::size_t offset, S& storage, Axes& axes, const T* viter) { | |
125 | axis::index_type extents[buffer_size<Axes>::value]; | |
126 | axis::index_type shifts[buffer_size<Axes>::value]; | |
127 | for_each_axis(axes, [eit = extents, sit = shifts](const auto& a) mutable { | |
128 | *sit++ = 0; | |
129 | *eit++ = axis::traits::extent(a); | |
130 | }); // LCOV_EXCL_LINE: gcc-8 is missing this line for no reason | |
131 | ||
132 | // offset must be zero for growing axes | |
133 | using IsGrowing = has_growing_axis<Axes>; | |
134 | std::fill(indices, indices + size, IsGrowing::value ? 0 : offset); | |
135 | for_each_axis(axes, [&, stride = static_cast<std::size_t>(1), | |
136 | pshift = shifts](auto& axis) mutable { | |
137 | using Axis = std::decay_t<decltype(axis)>; | |
138 | maybe_visit( | |
139 | index_visitor<Index, Axis, IsGrowing>{axis, stride, start, size, indices, pshift}, | |
140 | *viter++); | |
141 | stride *= static_cast<std::size_t>(axis::traits::extent(axis)); | |
142 | ++pshift; | |
143 | }); | |
144 | ||
145 | bool update_needed = false; | |
146 | for_each_axis(axes, [&update_needed, eit = extents](const auto& a) mutable { | |
147 | update_needed |= *eit++ != axis::traits::extent(a); | |
148 | }); | |
149 | if (update_needed) { | |
150 | storage_grower<Axes> g(axes); | |
151 | g.from_extents(extents); | |
152 | g.apply(storage, shifts); | |
153 | } | |
154 | } | |
155 | ||
156 | template <class S, class Index, class... Ts> | |
157 | void fill_n_storage(S& s, const Index idx, Ts&&... p) noexcept { | |
158 | if (is_valid(idx)) { | |
20effc67 | 159 | assert(idx < s.size()); |
f67539c2 | 160 | fill_storage_element(s[idx], *p.first...); |
92f5a8d4 | 161 | } |
20effc67 TL |
162 | // operator folding emulation |
163 | (void)std::initializer_list<int>{(p.second ? (++p.first, 0) : 0)...}; | |
92f5a8d4 TL |
164 | } |
165 | ||
166 | template <class S, class Index, class T, class... Ts> | |
167 | void fill_n_storage(S& s, const Index idx, weight_type<T>&& w, Ts&&... ps) noexcept { | |
168 | if (is_valid(idx)) { | |
20effc67 | 169 | assert(idx < s.size()); |
f67539c2 | 170 | fill_storage_element(s[idx], weight(*w.value.first), *ps.first...); |
92f5a8d4 TL |
171 | } |
172 | if (w.value.second) ++w.value.first; | |
20effc67 TL |
173 | // operator folding emulation |
174 | (void)std::initializer_list<int>{(ps.second ? (++ps.first, 0) : 0)...}; | |
92f5a8d4 TL |
175 | } |
176 | ||
177 | // general Nd treatment | |
178 | template <class Index, class S, class A, class T, class... Ts> | |
179 | void fill_n_nd(const std::size_t offset, S& storage, A& axes, const std::size_t vsize, | |
180 | const T* values, Ts&&... ts) { | |
181 | constexpr std::size_t buffer_size = 1ul << 14; | |
182 | Index indices[buffer_size]; | |
183 | ||
184 | /* | |
185 | Parallelization options. | |
186 | ||
187 | A) Run the whole fill2 method in parallel, each thread fills its own buffer of | |
188 | indices, synchronization (atomics) are needed to synchronize the incrementing of | |
189 | the storage cells. This leads to a lot of congestion for small histograms. | |
190 | ||
191 | B) Run only fill_n_indices in parallel, subsections of the indices buffer | |
192 | can be filled by different threads. The final loop that fills the storage runs | |
193 | in the main thread, this requires no synchronization for the storage, cells do | |
194 | not need to support atomic operations. | |
195 | ||
196 | C) Like B), then sort the indices in the main thread and fill the | |
197 | storage in parallel, where each thread uses a disjunct set of indices. This | |
198 | should create less congestion and requires no synchronization for the storage. | |
199 | ||
200 | Note on C): Let's say we have an axis with 5 bins (with *flow to simplify). | |
201 | Then after filling 10 values, converting to indices and sorting, the index | |
202 | buffer may look like this: 0 0 0 1 2 2 2 4 4 5. Let's use two threads to fill | |
203 | the storage. Still in the main thread, we compute an iterator to the middle of | |
204 | the index buffer and move it to the right until the pointee changes. Now we have | |
205 | two ranges which contain disjunct sets of indices. We pass these ranges to the | |
206 | threads which then fill the storage. Since the threads by construction do not | |
207 | compete to increment the same cell, no further synchronization is required. | |
208 | ||
209 | In all cases, growing axes cannot be parallelized. | |
210 | */ | |
211 | ||
212 | for (std::size_t start = 0; start < vsize; start += buffer_size) { | |
213 | const std::size_t n = std::min(buffer_size, vsize - start); | |
214 | // fill buffer of indices... | |
215 | fill_n_indices(indices, start, n, offset, storage, axes, values); | |
216 | // ...and fill corresponding storage cells | |
217 | for (auto&& idx : make_span(indices, n)) | |
218 | fill_n_storage(storage, idx, std::forward<Ts>(ts)...); | |
219 | } | |
220 | } | |
221 | ||
222 | template <class S, class... As, class T, class... Us> | |
223 | void fill_n_1(const std::size_t offset, S& storage, std::tuple<As...>& axes, | |
224 | const std::size_t vsize, const T* values, Us&&... us) { | |
225 | using index_type = | |
226 | mp11::mp_if<has_non_inclusive_axis<std::tuple<As...>>, optional_index, std::size_t>; | |
227 | fill_n_nd<index_type>(offset, storage, axes, vsize, values, std::forward<Us>(us)...); | |
228 | } | |
229 | ||
230 | template <class S, class A, class T, class... Us> | |
231 | void fill_n_1(const std::size_t offset, S& storage, A& axes, const std::size_t vsize, | |
232 | const T* values, Us&&... us) { | |
233 | bool all_inclusive = true; | |
234 | for_each_axis(axes, | |
235 | [&](const auto& ax) { all_inclusive &= axis::traits::inclusive(ax); }); | |
236 | if (axes_rank(axes) == 1) { | |
237 | axis::visit( | |
238 | [&](auto& ax) { | |
239 | std::tuple<decltype(ax)> axes{ax}; | |
240 | fill_n_1(offset, storage, axes, vsize, values, std::forward<Us>(us)...); | |
241 | }, | |
242 | axes[0]); | |
243 | } else { | |
244 | if (all_inclusive) | |
245 | fill_n_nd<std::size_t>(offset, storage, axes, vsize, values, | |
246 | std::forward<Us>(us)...); | |
247 | else | |
248 | fill_n_nd<optional_index>(offset, storage, axes, vsize, values, | |
249 | std::forward<Us>(us)...); | |
250 | } | |
251 | } | |
252 | ||
253 | template <class A, class T, std::size_t N> | |
254 | std::size_t get_total_size(const A& axes, const dtl::span<const T, N>& values) { | |
255 | // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant): | |
256 | // - span<CT, N>: for any histogram, N == rank | |
257 | // - span<V<T, CT>, N>: for any histogram, N == rank | |
20effc67 | 258 | assert(axes_rank(axes) == values.size()); |
92f5a8d4 TL |
259 | constexpr auto unset = static_cast<std::size_t>(-1); |
260 | std::size_t size = unset; | |
261 | for_each_axis(axes, [&size, vit = values.begin()](const auto& ax) mutable { | |
262 | using AV = axis::traits::value_type<std::decay_t<decltype(ax)>>; | |
263 | maybe_visit( | |
264 | [&size](const auto& v) { | |
265 | // v is either convertible to value or a sequence of values | |
266 | using V = std::remove_const_t<std::remove_reference_t<decltype(v)>>; | |
267 | static_if_c<(std::is_convertible<decltype(v), AV>::value || | |
268 | !is_iterable<V>::value)>( | |
269 | [](const auto&) {}, | |
270 | [&size](const auto& v) { | |
271 | const auto n = dtl::size(v); | |
272 | // must repeat this here for msvc :( | |
273 | constexpr auto unset = static_cast<std::size_t>(-1); | |
274 | if (size == unset) | |
275 | size = dtl::size(v); | |
276 | else if (size != n) | |
277 | BOOST_THROW_EXCEPTION( | |
278 | std::invalid_argument("spans must have compatible lengths")); | |
279 | }, | |
280 | v); | |
281 | }, | |
282 | *vit++); | |
283 | }); | |
284 | // if all arguments are not iterables, return size of 1 | |
285 | return size == unset ? 1 : size; | |
286 | } | |
287 | ||
288 | inline void fill_n_check_extra_args(std::size_t) noexcept {} | |
289 | ||
290 | template <class T, class... Ts> | |
291 | void fill_n_check_extra_args(std::size_t size, T&& x, Ts&&... ts) { | |
292 | // sequences must have same lengths, but sequences of length 0 are broadcast | |
293 | if (x.second != 0 && x.second != size) | |
294 | BOOST_THROW_EXCEPTION(std::invalid_argument("spans must have compatible lengths")); | |
295 | fill_n_check_extra_args(size, std::forward<Ts>(ts)...); | |
296 | } | |
297 | ||
298 | template <class T, class... Ts> | |
299 | void fill_n_check_extra_args(std::size_t size, weight_type<T>&& w, Ts&&... ts) { | |
300 | fill_n_check_extra_args(size, w.value, std::forward<Ts>(ts)...); | |
301 | } | |
302 | ||
303 | template <class S, class A, class T, std::size_t N, class... Us> | |
304 | void fill_n(std::true_type, const std::size_t offset, S& storage, A& axes, | |
305 | const dtl::span<const T, N> values, Us&&... us) { | |
306 | // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant): | |
307 | // - span<T, N>: only valid for 1D histogram, N > 1 allowed | |
308 | // - span<CT, N>: for any histogram, N == rank | |
309 | // - span<V<T, CT>, N>: for any histogram, N == rank | |
310 | static_if<is_convertible_to_any_value_type<A, T>>( | |
311 | [&](const auto& values, auto&&... us) { | |
312 | // T matches one of the axis value types, must be 1D special case | |
313 | if (axes_rank(axes) != 1) | |
314 | BOOST_THROW_EXCEPTION( | |
315 | std::invalid_argument("number of arguments must match histogram rank")); | |
316 | fill_n_check_extra_args(values.size(), std::forward<Us>(us)...); | |
317 | fill_n_1(offset, storage, axes, values.size(), &values, std::forward<Us>(us)...); | |
318 | }, | |
319 | [&](const auto& values, auto&&... us) { | |
320 | // generic ND case | |
321 | if (axes_rank(axes) != values.size()) | |
322 | BOOST_THROW_EXCEPTION( | |
323 | std::invalid_argument("number of arguments must match histogram rank")); | |
324 | const auto vsize = get_total_size(axes, values); | |
325 | fill_n_check_extra_args(vsize, std::forward<Us>(us)...); | |
326 | fill_n_1(offset, storage, axes, vsize, values.data(), std::forward<Us>(us)...); | |
327 | }, | |
328 | values, std::forward<Us>(us)...); | |
329 | } | |
330 | ||
331 | // empty implementation for bad arguments to stop compiler from showing internals | |
332 | template <class... Ts> | |
333 | void fill_n(std::false_type, Ts...) {} | |
334 | ||
335 | } // namespace detail | |
336 | } // namespace histogram | |
337 | } // namespace boost | |
338 | ||
339 | #endif // BOOST_HISTOGRAM_DETAIL_FILL_N_HPP |