]>
Commit | Line | Data |
---|---|---|
1 | //---------------------------------------------------------------------------// | |
2 | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0 | |
5 | // See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt | |
7 | // | |
8 | // See http://boostorg.github.com/compute for more information. | |
9 | //---------------------------------------------------------------------------// | |
10 | ||
11 | #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP | |
12 | #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP | |
13 | ||
14 | #include <boost/static_assert.hpp> | |
15 | #include <boost/preprocessor/seq/for_each.hpp> | |
16 | ||
17 | #include <boost/compute/system.hpp> | |
18 | #include <boost/compute/functional.hpp> | |
19 | #include <boost/compute/command_queue.hpp> | |
20 | #include <boost/compute/algorithm/reduce.hpp> | |
21 | #include <boost/compute/algorithm/detail/serial_accumulate.hpp> | |
22 | #include <boost/compute/container/array.hpp> | |
23 | #include <boost/compute/container/vector.hpp> | |
24 | #include <boost/compute/type_traits/is_device_iterator.hpp> | |
25 | #include <boost/compute/detail/iterator_range_size.hpp> | |
26 | ||
27 | namespace boost { | |
28 | namespace compute { | |
29 | namespace detail { | |
30 | ||
31 | // Space complexity O(1) | |
32 | template<class InputIterator, class T, class BinaryFunction> | |
33 | inline T generic_accumulate(InputIterator first, | |
34 | InputIterator last, | |
35 | T init, | |
36 | BinaryFunction function, | |
37 | command_queue &queue) | |
38 | { | |
39 | const context &context = queue.get_context(); | |
40 | ||
41 | size_t size = iterator_range_size(first, last); | |
42 | if(size == 0){ | |
43 | return init; | |
44 | } | |
45 | ||
46 | // accumulate on device | |
47 | array<T, 1> device_result(context); | |
48 | detail::serial_accumulate( | |
49 | first, last, device_result.begin(), init, function, queue | |
50 | ); | |
51 | ||
52 | // copy result to host | |
53 | T result; | |
54 | ::boost::compute::copy_n(device_result.begin(), 1, &result, queue); | |
55 | return result; | |
56 | } | |
57 | ||
58 | // returns true if we can use reduce() instead of accumulate() when | |
59 | // accumulate() this is true when the function is commutative (such as | |
60 | // addition of integers) and the initial value is the identity value | |
61 | // for the operation (zero for addition, one for multiplication). | |
62 | template<class T, class F> | |
63 | inline bool can_accumulate_with_reduce(T init, F function) | |
64 | { | |
65 | (void) init; | |
66 | (void) function; | |
67 | ||
68 | return false; | |
69 | } | |
70 | ||
71 | /// \internal_ | |
72 | #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \ | |
73 | inline bool can_accumulate_with_reduce(type init, plus<type>) \ | |
74 | { \ | |
75 | return init == type(0); \ | |
76 | } \ | |
77 | inline bool can_accumulate_with_reduce(type init, multiplies<type>) \ | |
78 | { \ | |
79 | return init == type(1); \ | |
80 | } | |
81 | ||
82 | BOOST_PP_SEQ_FOR_EACH( | |
83 | BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE, | |
84 | _, | |
85 | (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_) | |
86 | ) | |
87 | ||
88 | template<class T> | |
89 | inline bool can_accumulate_with_reduce(T init, min<T>) | |
90 | { | |
91 | return init == (std::numeric_limits<T>::max)(); | |
92 | } | |
93 | ||
94 | template<class T> | |
95 | inline bool can_accumulate_with_reduce(T init, max<T>) | |
96 | { | |
97 | return init == (std::numeric_limits<T>::min)(); | |
98 | } | |
99 | ||
100 | #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE | |
101 | ||
102 | template<class InputIterator, class T, class BinaryFunction> | |
103 | inline T dispatch_accumulate(InputIterator first, | |
104 | InputIterator last, | |
105 | T init, | |
106 | BinaryFunction function, | |
107 | command_queue &queue) | |
108 | { | |
109 | size_t size = iterator_range_size(first, last); | |
110 | if(size == 0){ | |
111 | return init; | |
112 | } | |
113 | ||
114 | if(can_accumulate_with_reduce(init, function)){ | |
115 | T result; | |
116 | reduce(first, last, &result, function, queue); | |
117 | return result; | |
118 | } | |
119 | else { | |
120 | return generic_accumulate(first, last, init, function, queue); | |
121 | } | |
122 | } | |
123 | ||
124 | } // end detail namespace | |
125 | ||
126 | /// Returns the result of applying \p function to the elements in the | |
127 | /// range [\p first, \p last) and \p init. | |
128 | /// | |
129 | /// If no function is specified, \c plus will be used. | |
130 | /// | |
131 | /// \param first first element in the input range | |
132 | /// \param last last element in the input range | |
133 | /// \param init initial value | |
134 | /// \param function binary reduction function | |
135 | /// \param queue command queue to perform the operation | |
136 | /// | |
137 | /// \return the accumulated result value | |
138 | /// | |
139 | /// In specific situations the call to \c accumulate() can be automatically | |
140 | /// optimized to a call to the more efficient \c reduce() algorithm. This | |
141 | /// occurs when the binary reduction function is recognized as associative | |
142 | /// (such as the \c plus<int> function). | |
143 | /// | |
144 | /// Note that because floating-point addition is not associative, calling | |
145 | /// \c accumulate() with \c plus<float> results in a less efficient serial | |
146 | /// reduction algorithm being executed. If a slight loss in precision is | |
147 | /// acceptable, the more efficient parallel \c reduce() algorithm should be | |
148 | /// used instead. | |
149 | /// | |
150 | /// For example: | |
151 | /// \code | |
152 | /// // with vec = boost::compute::vector<int> | |
153 | /// accumulate(vec.begin(), vec.end(), 0, plus<int>()); // fast | |
154 | /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast | |
155 | /// | |
156 | /// // with vec = boost::compute::vector<float> | |
157 | /// accumulate(vec.begin(), vec.end(), 0, plus<float>()); // slow | |
158 | /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast | |
159 | /// \endcode | |
160 | /// | |
161 | /// Space complexity: \Omega(1)<br> | |
162 | /// Space complexity when optimized to \c reduce(): \Omega(n) | |
163 | /// | |
164 | /// \see reduce() | |
165 | template<class InputIterator, class T, class BinaryFunction> | |
166 | inline T accumulate(InputIterator first, | |
167 | InputIterator last, | |
168 | T init, | |
169 | BinaryFunction function, | |
170 | command_queue &queue = system::default_queue()) | |
171 | { | |
172 | BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); | |
173 | ||
174 | return detail::dispatch_accumulate(first, last, init, function, queue); | |
175 | } | |
176 | ||
177 | /// \overload | |
178 | template<class InputIterator, class T> | |
179 | inline T accumulate(InputIterator first, | |
180 | InputIterator last, | |
181 | T init, | |
182 | command_queue &queue = system::default_queue()) | |
183 | { | |
184 | BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); | |
185 | typedef typename std::iterator_traits<InputIterator>::value_type IT; | |
186 | ||
187 | return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue); | |
188 | } | |
189 | ||
190 | } // end compute namespace | |
191 | } // end boost namespace | |
192 | ||
193 | #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP |