]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | //---------------------------------------------------------------------------// |
2 | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0 | |
5 | // See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt | |
7 | // | |
8 | // See http://boostorg.github.com/compute for more information. | |
9 | //---------------------------------------------------------------------------// | |
10 | ||
11 | #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP | |
12 | #define BOOST_COMPUTE_ALGORITHM_FILL_HPP | |
13 | ||
14 | #include <iterator> | |
15 | ||
92f5a8d4 | 16 | #include <boost/static_assert.hpp> |
7c673cae FG |
17 | #include <boost/mpl/int.hpp> |
18 | #include <boost/mpl/vector.hpp> | |
19 | #include <boost/mpl/contains.hpp> | |
20 | #include <boost/utility/enable_if.hpp> | |
21 | ||
22 | #include <boost/compute/cl.hpp> | |
23 | #include <boost/compute/system.hpp> | |
24 | #include <boost/compute/command_queue.hpp> | |
25 | #include <boost/compute/algorithm/copy.hpp> | |
26 | #include <boost/compute/async/future.hpp> | |
27 | #include <boost/compute/iterator/constant_iterator.hpp> | |
28 | #include <boost/compute/iterator/discard_iterator.hpp> | |
29 | #include <boost/compute/detail/is_buffer_iterator.hpp> | |
30 | #include <boost/compute/detail/iterator_range_size.hpp> | |
92f5a8d4 TL |
31 | #include <boost/compute/type_traits/is_device_iterator.hpp> |
32 | ||
7c673cae FG |
33 | |
34 | namespace boost { | |
35 | namespace compute { | |
36 | namespace detail { | |
37 | ||
38 | namespace mpl = boost::mpl; | |
39 | ||
40 | // fills the range [first, first + count) with value using copy() | |
41 | template<class BufferIterator, class T> | |
42 | inline void fill_with_copy(BufferIterator first, | |
43 | size_t count, | |
44 | const T &value, | |
45 | command_queue &queue) | |
46 | { | |
47 | ::boost::compute::copy( | |
48 | ::boost::compute::make_constant_iterator(value, 0), | |
49 | ::boost::compute::make_constant_iterator(value, count), | |
50 | first, | |
51 | queue | |
52 | ); | |
53 | } | |
54 | ||
55 | // fills the range [first, first + count) with value using copy_async() | |
56 | template<class BufferIterator, class T> | |
57 | inline future<void> fill_async_with_copy(BufferIterator first, | |
58 | size_t count, | |
59 | const T &value, | |
60 | command_queue &queue) | |
61 | { | |
62 | return ::boost::compute::copy_async( | |
63 | ::boost::compute::make_constant_iterator(value, 0), | |
64 | ::boost::compute::make_constant_iterator(value, count), | |
65 | first, | |
66 | queue | |
67 | ); | |
68 | } | |
69 | ||
b32b8144 | 70 | #if defined(BOOST_COMPUTE_CL_VERSION_1_2) |
7c673cae FG |
71 | |
72 | // meta-function returing true if Iterator points to a range of values | |
73 | // that can be filled using clEnqueueFillBuffer(). to meet this criteria | |
74 | // it must have a buffer accessible through iter.get_buffer() and the | |
75 | // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}. | |
76 | template<class Iterator> | |
77 | struct is_valid_fill_buffer_iterator : | |
78 | public mpl::and_< | |
79 | is_buffer_iterator<Iterator>, | |
80 | mpl::contains< | |
81 | mpl::vector< | |
82 | mpl::int_<1>, | |
83 | mpl::int_<2>, | |
84 | mpl::int_<4>, | |
85 | mpl::int_<8>, | |
86 | mpl::int_<16>, | |
87 | mpl::int_<32>, | |
88 | mpl::int_<64>, | |
89 | mpl::int_<128> | |
90 | >, | |
91 | mpl::int_< | |
92 | sizeof(typename std::iterator_traits<Iterator>::value_type) | |
93 | > | |
94 | > | |
95 | >::type { }; | |
96 | ||
97 | template<> | |
98 | struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {}; | |
99 | ||
100 | // specialization which uses clEnqueueFillBuffer for buffer iterators | |
101 | template<class BufferIterator, class T> | |
102 | inline void | |
103 | dispatch_fill(BufferIterator first, | |
104 | size_t count, | |
105 | const T &value, | |
106 | command_queue &queue, | |
107 | typename boost::enable_if< | |
108 | is_valid_fill_buffer_iterator<BufferIterator> | |
109 | >::type* = 0) | |
110 | { | |
111 | typedef typename std::iterator_traits<BufferIterator>::value_type value_type; | |
112 | ||
113 | if(count == 0){ | |
114 | // nothing to do | |
115 | return; | |
116 | } | |
117 | ||
118 | // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) | |
119 | if(!queue.check_device_version(1, 2)){ | |
120 | return fill_with_copy(first, count, value, queue); | |
121 | } | |
122 | ||
123 | value_type pattern = static_cast<value_type>(value); | |
124 | size_t offset = static_cast<size_t>(first.get_index()); | |
125 | ||
126 | if(count == 1){ | |
127 | // use clEnqueueWriteBuffer() directly when writing a single value | |
128 | // to the device buffer. this is potentially more efficient and also | |
129 | // works around a bug in the intel opencl driver. | |
130 | queue.enqueue_write_buffer( | |
131 | first.get_buffer(), | |
132 | offset * sizeof(value_type), | |
133 | sizeof(value_type), | |
134 | &pattern | |
135 | ); | |
136 | } | |
137 | else { | |
138 | queue.enqueue_fill_buffer( | |
139 | first.get_buffer(), | |
140 | &pattern, | |
141 | sizeof(value_type), | |
142 | offset * sizeof(value_type), | |
143 | count * sizeof(value_type) | |
144 | ); | |
145 | } | |
146 | } | |
147 | ||
148 | template<class BufferIterator, class T> | |
149 | inline future<void> | |
150 | dispatch_fill_async(BufferIterator first, | |
151 | size_t count, | |
152 | const T &value, | |
153 | command_queue &queue, | |
154 | typename boost::enable_if< | |
155 | is_valid_fill_buffer_iterator<BufferIterator> | |
156 | >::type* = 0) | |
157 | { | |
158 | typedef typename std::iterator_traits<BufferIterator>::value_type value_type; | |
159 | ||
160 | // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) | |
161 | if(!queue.check_device_version(1, 2)){ | |
162 | return fill_async_with_copy(first, count, value, queue); | |
163 | } | |
164 | ||
165 | value_type pattern = static_cast<value_type>(value); | |
166 | size_t offset = static_cast<size_t>(first.get_index()); | |
167 | ||
168 | event event_ = | |
169 | queue.enqueue_fill_buffer(first.get_buffer(), | |
170 | &pattern, | |
171 | sizeof(value_type), | |
172 | offset * sizeof(value_type), | |
173 | count * sizeof(value_type)); | |
174 | ||
175 | return future<void>(event_); | |
176 | } | |
177 | ||
b32b8144 | 178 | #ifdef BOOST_COMPUTE_CL_VERSION_2_0 |
7c673cae FG |
179 | // specializations for svm_ptr<T> |
180 | template<class T> | |
181 | inline void dispatch_fill(svm_ptr<T> first, | |
182 | size_t count, | |
183 | const T &value, | |
184 | command_queue &queue) | |
185 | { | |
186 | if(count == 0){ | |
187 | return; | |
188 | } | |
189 | ||
190 | queue.enqueue_svm_fill( | |
191 | first.get(), &value, sizeof(T), count * sizeof(T) | |
192 | ); | |
193 | } | |
194 | ||
195 | template<class T> | |
196 | inline future<void> dispatch_fill_async(svm_ptr<T> first, | |
197 | size_t count, | |
198 | const T &value, | |
199 | command_queue &queue) | |
200 | { | |
201 | if(count == 0){ | |
202 | return future<void>(); | |
203 | } | |
204 | ||
205 | event event_ = queue.enqueue_svm_fill( | |
206 | first.get(), &value, sizeof(T), count * sizeof(T) | |
207 | ); | |
208 | ||
209 | return future<void>(event_); | |
210 | } | |
b32b8144 | 211 | #endif // BOOST_COMPUTE_CL_VERSION_2_0 |
7c673cae FG |
212 | |
213 | // default implementations | |
214 | template<class BufferIterator, class T> | |
215 | inline void | |
216 | dispatch_fill(BufferIterator first, | |
217 | size_t count, | |
218 | const T &value, | |
219 | command_queue &queue, | |
220 | typename boost::disable_if< | |
221 | is_valid_fill_buffer_iterator<BufferIterator> | |
222 | >::type* = 0) | |
223 | { | |
224 | fill_with_copy(first, count, value, queue); | |
225 | } | |
226 | ||
227 | template<class BufferIterator, class T> | |
228 | inline future<void> | |
229 | dispatch_fill_async(BufferIterator first, | |
230 | size_t count, | |
231 | const T &value, | |
232 | command_queue &queue, | |
233 | typename boost::disable_if< | |
234 | is_valid_fill_buffer_iterator<BufferIterator> | |
235 | >::type* = 0) | |
236 | { | |
237 | return fill_async_with_copy(first, count, value, queue); | |
238 | } | |
239 | #else | |
240 | template<class BufferIterator, class T> | |
241 | inline void dispatch_fill(BufferIterator first, | |
242 | size_t count, | |
243 | const T &value, | |
244 | command_queue &queue) | |
245 | { | |
246 | fill_with_copy(first, count, value, queue); | |
247 | } | |
248 | ||
249 | template<class BufferIterator, class T> | |
250 | inline future<void> dispatch_fill_async(BufferIterator first, | |
251 | size_t count, | |
252 | const T &value, | |
253 | command_queue &queue) | |
254 | { | |
255 | return fill_async_with_copy(first, count, value, queue); | |
256 | } | |
b32b8144 | 257 | #endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2) |
7c673cae FG |
258 | |
259 | } // end detail namespace | |
260 | ||
261 | /// Fills the range [\p first, \p last) with \p value. | |
262 | /// | |
263 | /// \param first first element in the range to fill | |
264 | /// \param last last element in the range to fill | |
265 | /// \param value value to copy to each element | |
266 | /// \param queue command queue to perform the operation | |
267 | /// | |
268 | /// For example, to fill a vector on the device with sevens: | |
269 | /// \code | |
270 | /// // vector on the device | |
271 | /// boost::compute::vector<int> vec(10, context); | |
272 | /// | |
273 | /// // fill vector with sevens | |
274 | /// boost::compute::fill(vec.begin(), vec.end(), 7, queue); | |
275 | /// \endcode | |
276 | /// | |
b32b8144 FG |
277 | /// Space complexity: \Omega(1) |
278 | /// | |
7c673cae FG |
279 | /// \see boost::compute::fill_n() |
280 | template<class BufferIterator, class T> | |
281 | inline void fill(BufferIterator first, | |
282 | BufferIterator last, | |
283 | const T &value, | |
284 | command_queue &queue = system::default_queue()) | |
285 | { | |
92f5a8d4 | 286 | BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value); |
7c673cae FG |
287 | size_t count = detail::iterator_range_size(first, last); |
288 | if(count == 0){ | |
289 | return; | |
290 | } | |
291 | ||
292 | detail::dispatch_fill(first, count, value, queue); | |
293 | } | |
294 | ||
295 | template<class BufferIterator, class T> | |
296 | inline future<void> fill_async(BufferIterator first, | |
297 | BufferIterator last, | |
298 | const T &value, | |
299 | command_queue &queue = system::default_queue()) | |
300 | { | |
92f5a8d4 | 301 | BOOST_STATIC_ASSERT(detail::is_buffer_iterator<BufferIterator>::value); |
7c673cae FG |
302 | size_t count = detail::iterator_range_size(first, last); |
303 | if(count == 0){ | |
304 | return future<void>(); | |
305 | } | |
306 | ||
307 | return detail::dispatch_fill_async(first, count, value, queue); | |
308 | } | |
309 | ||
310 | } // end compute namespace | |
311 | } // end boost namespace | |
312 | ||
313 | #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP |