1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
11 #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
12 #define BOOST_COMPUTE_ALGORITHM_FILL_HPP
16 #include <boost/static_assert.hpp>
17 #include <boost/mpl/int.hpp>
18 #include <boost/mpl/vector.hpp>
19 #include <boost/mpl/contains.hpp>
20 #include <boost/utility/enable_if.hpp>
22 #include <boost/compute/cl.hpp>
23 #include <boost/compute/system.hpp>
24 #include <boost/compute/command_queue.hpp>
25 #include <boost/compute/algorithm/copy.hpp>
26 #include <boost/compute/async/future.hpp>
27 #include <boost/compute/iterator/constant_iterator.hpp>
28 #include <boost/compute/iterator/discard_iterator.hpp>
29 #include <boost/compute/detail/is_buffer_iterator.hpp>
30 #include <boost/compute/detail/iterator_range_size.hpp>
31 #include <boost/compute/type_traits/is_device_iterator.hpp>
38 namespace mpl = boost::mpl;
40 // fills the range [first, first + count) with value using copy()
41 template<class BufferIterator, class T>
42 inline void fill_with_copy(BufferIterator first,
47 ::boost::compute::copy(
48 ::boost::compute::make_constant_iterator(value, 0),
49 ::boost::compute::make_constant_iterator(value, count),
55 // fills the range [first, first + count) with value using copy_async()
56 template<class BufferIterator, class T>
57 inline future<void> fill_async_with_copy(BufferIterator first,
62 return ::boost::compute::copy_async(
63 ::boost::compute::make_constant_iterator(value, 0),
64 ::boost::compute::make_constant_iterator(value, count),
70 #if defined(BOOST_COMPUTE_CL_VERSION_1_2)
72 // meta-function returing true if Iterator points to a range of values
73 // that can be filled using clEnqueueFillBuffer(). to meet this criteria
74 // it must have a buffer accessible through iter.get_buffer() and the
75 // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
76 template<class Iterator>
77 struct is_valid_fill_buffer_iterator :
79 is_buffer_iterator<Iterator>,
92 sizeof(typename std::iterator_traits<Iterator>::value_type)
98 struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
100 // specialization which uses clEnqueueFillBuffer for buffer iterators
101 template<class BufferIterator, class T>
103 dispatch_fill(BufferIterator first,
106 command_queue &queue,
107 typename boost::enable_if<
108 is_valid_fill_buffer_iterator<BufferIterator>
111 typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
118 // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
119 if(!queue.check_device_version(1, 2)){
120 return fill_with_copy(first, count, value, queue);
123 value_type pattern = static_cast<value_type>(value);
124 size_t offset = static_cast<size_t>(first.get_index());
127 // use clEnqueueWriteBuffer() directly when writing a single value
128 // to the device buffer. this is potentially more efficient and also
129 // works around a bug in the intel opencl driver.
130 queue.enqueue_write_buffer(
132 offset * sizeof(value_type),
138 queue.enqueue_fill_buffer(
142 offset * sizeof(value_type),
143 count * sizeof(value_type)
148 template<class BufferIterator, class T>
150 dispatch_fill_async(BufferIterator first,
153 command_queue &queue,
154 typename boost::enable_if<
155 is_valid_fill_buffer_iterator<BufferIterator>
158 typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
160 // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
161 if(!queue.check_device_version(1, 2)){
162 return fill_async_with_copy(first, count, value, queue);
165 value_type pattern = static_cast<value_type>(value);
166 size_t offset = static_cast<size_t>(first.get_index());
169 queue.enqueue_fill_buffer(first.get_buffer(),
172 offset * sizeof(value_type),
173 count * sizeof(value_type));
175 return future<void>(event_);
178 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
179 // specializations for svm_ptr<T>
181 inline void dispatch_fill(svm_ptr<T> first,
184 command_queue &queue)
190 queue.enqueue_svm_fill(
191 first.get(), &value, sizeof(T), count * sizeof(T)
196 inline future<void> dispatch_fill_async(svm_ptr<T> first,
199 command_queue &queue)
202 return future<void>();
205 event event_ = queue.enqueue_svm_fill(
206 first.get(), &value, sizeof(T), count * sizeof(T)
209 return future<void>(event_);
211 #endif // BOOST_COMPUTE_CL_VERSION_2_0
213 // default implementations
214 template<class BufferIterator, class T>
216 dispatch_fill(BufferIterator first,
219 command_queue &queue,
220 typename boost::disable_if<
221 is_valid_fill_buffer_iterator<BufferIterator>
224 fill_with_copy(first, count, value, queue);
227 template<class BufferIterator, class T>
229 dispatch_fill_async(BufferIterator first,
232 command_queue &queue,
233 typename boost::disable_if<
234 is_valid_fill_buffer_iterator<BufferIterator>
237 return fill_async_with_copy(first, count, value, queue);
240 template<class BufferIterator, class T>
241 inline void dispatch_fill(BufferIterator first,
244 command_queue &queue)
246 fill_with_copy(first, count, value, queue);
249 template<class BufferIterator, class T>
250 inline future<void> dispatch_fill_async(BufferIterator first,
253 command_queue &queue)
255 return fill_async_with_copy(first, count, value, queue);
257 #endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2)
259 } // end detail namespace
261 /// Fills the range [\p first, \p last) with \p value.
263 /// \param first first element in the range to fill
264 /// \param last last element in the range to fill
265 /// \param value value to copy to each element
266 /// \param queue command queue to perform the operation
268 /// For example, to fill a vector on the device with sevens:
270 /// // vector on the device
271 /// boost::compute::vector<int> vec(10, context);
273 /// // fill vector with sevens
274 /// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
277 /// Space complexity: \Omega(1)
279 /// \see boost::compute::fill_n()
280 template<class BufferIterator, class T>
281 inline void fill(BufferIterator first,
284 command_queue &queue = system::default_queue())
286 BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value);
287 size_t count = detail::iterator_range_size(first, last);
292 detail::dispatch_fill(first, count, value, queue);
295 template<class BufferIterator, class T>
296 inline future<void> fill_async(BufferIterator first,
299 command_queue &queue = system::default_queue())
301 BOOST_STATIC_ASSERT(detail::is_buffer_iterator<BufferIterator>::value);
302 size_t count = detail::iterator_range_size(first, last);
304 return future<void>();
307 return detail::dispatch_fill_async(first, count, value, queue);
310 } // end compute namespace
311 } // end boost namespace
313 #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP