]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/compute/kernel.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / compute / kernel.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_KERNEL_HPP
12 #define BOOST_COMPUTE_KERNEL_HPP
13
14 #include <string>
15
16 #include <boost/assert.hpp>
17 #include <boost/utility/enable_if.hpp>
18 #include <boost/optional.hpp>
19
20 #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
21
22 #include <boost/compute/config.hpp>
23 #include <boost/compute/exception.hpp>
24 #include <boost/compute/program.hpp>
25 #include <boost/compute/platform.hpp>
26 #include <boost/compute/type_traits/is_fundamental.hpp>
27 #include <boost/compute/detail/diagnostic.hpp>
28 #include <boost/compute/detail/get_object_info.hpp>
29 #include <boost/compute/detail/assert_cl_success.hpp>
30
31 namespace boost {
32 namespace compute {
33 namespace detail {
34
35 template<class T> struct set_kernel_arg;
36
37 } // end detail namespace
38
39 /// \class kernel
40 /// \brief A compute kernel.
41 ///
42 /// \see command_queue, program
43 class kernel
44 {
45 public:
46 /// Creates a null kernel object.
47 kernel()
48 : m_kernel(0)
49 {
50 }
51
52 /// Creates a new kernel object for \p kernel. If \p retain is
53 /// \c true, the reference count for \p kernel will be incremented.
54 explicit kernel(cl_kernel kernel, bool retain = true)
55 : m_kernel(kernel)
56 {
57 if(m_kernel && retain){
58 clRetainKernel(m_kernel);
59 }
60 }
61
62 /// Creates a new kernel object with \p name from \p program.
63 kernel(const program &program, const std::string &name)
64 {
65 cl_int error = 0;
66 m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
67
68 if(!m_kernel){
69 BOOST_THROW_EXCEPTION(opencl_error(error));
70 }
71 }
72
73 /// Creates a new kernel object as a copy of \p other.
74 kernel(const kernel &other)
75 : m_kernel(other.m_kernel)
76 {
77 if(m_kernel){
78 clRetainKernel(m_kernel);
79 }
80 }
81
82 /// Copies the kernel object from \p other to \c *this.
83 kernel& operator=(const kernel &other)
84 {
85 if(this != &other){
86 if(m_kernel){
87 clReleaseKernel(m_kernel);
88 }
89
90 m_kernel = other.m_kernel;
91
92 if(m_kernel){
93 clRetainKernel(m_kernel);
94 }
95 }
96
97 return *this;
98 }
99
100 #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
101 /// Move-constructs a new kernel object from \p other.
102 kernel(kernel&& other) BOOST_NOEXCEPT
103 : m_kernel(other.m_kernel)
104 {
105 other.m_kernel = 0;
106 }
107
108 /// Move-assigns the kernel from \p other to \c *this.
109 kernel& operator=(kernel&& other) BOOST_NOEXCEPT
110 {
111 if(m_kernel){
112 clReleaseKernel(m_kernel);
113 }
114
115 m_kernel = other.m_kernel;
116 other.m_kernel = 0;
117
118 return *this;
119 }
120 #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
121
122 /// Destroys the kernel object.
123 ~kernel()
124 {
125 if(m_kernel){
126 BOOST_COMPUTE_ASSERT_CL_SUCCESS(
127 clReleaseKernel(m_kernel)
128 );
129 }
130 }
131
132 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
133 /// Creates a new kernel object based on a shallow copy of
134 /// the undelying OpenCL kernel object.
135 ///
136 /// \opencl_version_warning{2,1}
137 ///
138 /// \see_opencl21_ref{clCloneKernel}
139 kernel clone()
140 {
141 cl_int ret = 0;
142 cl_kernel k = clCloneKernel(m_kernel, &ret);
143 return kernel(k, false);
144 }
145 #endif // BOOST_COMPUTE_CL_VERSION_2_1
146
147 /// Returns a reference to the underlying OpenCL kernel object.
148 cl_kernel& get() const
149 {
150 return const_cast<cl_kernel &>(m_kernel);
151 }
152
153 /// Returns the function name for the kernel.
154 std::string name() const
155 {
156 return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
157 }
158
159 /// Returns the number of arguments for the kernel.
160 size_t arity() const
161 {
162 return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
163 }
164
165 /// Returns the program for the kernel.
166 program get_program() const
167 {
168 return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
169 }
170
171 /// Returns the context for the kernel.
172 context get_context() const
173 {
174 return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
175 }
176
177 /// Returns information about the kernel.
178 ///
179 /// \see_opencl_ref{clGetKernelInfo}
180 template<class T>
181 T get_info(cl_kernel_info info) const
182 {
183 return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
184 }
185
186 /// \overload
187 template<int Enum>
188 typename detail::get_object_info_type<kernel, Enum>::type
189 get_info() const;
190
191 #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
192 /// Returns information about the argument at \p index.
193 ///
194 /// For example, to get the name of the first argument:
195 /// \code
196 /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
197 /// \endcode
198 ///
199 /// Note, this function requires that the program be compiled with the
200 /// \c "-cl-kernel-arg-info" flag. For example:
201 /// \code
202 /// program.build("-cl-kernel-arg-info");
203 /// \endcode
204 ///
205 /// \opencl_version_warning{1,2}
206 ///
207 /// \see_opencl_ref{clGetKernelArgInfo}
208 template<class T>
209 T get_arg_info(size_t index, cl_kernel_arg_info info) const
210 {
211 return detail::get_object_info<T>(
212 clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
213 );
214 }
215
216 /// \overload
217 template<int Enum>
218 typename detail::get_object_info_type<kernel, Enum>::type
219 get_arg_info(size_t index) const;
220 #endif // BOOST_COMPUTE_CL_VERSION_1_2
221
222 /// Returns work-group information for the kernel with \p device.
223 ///
224 /// \see_opencl_ref{clGetKernelWorkGroupInfo}
225 template<class T>
226 T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
227 {
228 return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
229 }
230
231 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
232 /// Returns sub-group information for the kernel with \p device. Returns a null
233 /// optional if \p device is not 2.1 device, or is not 2.0 device with support
234 /// for cl_khr_subgroups extension.
235 ///
236 /// \opencl_version_warning{2,1}
237 /// \see_opencl21_ref{clGetKernelSubGroupInfo}
238 /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
239 template<class T>
240 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
241 const size_t input_size, const void * input) const
242 {
243 if(device.check_version(2, 1))
244 {
245 return detail::get_object_info<T>(
246 clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
247 );
248 }
249 else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
250 {
251 return boost::optional<T>();
252 }
253 // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
254 // are supported in cl_khr_subgroups extension for 2.0 devices.
255 else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
256 {
257 return boost::optional<T>();
258 }
259
260 BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
261 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
262 reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
263 reinterpret_cast<size_t>(
264 device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
265 )
266 );
267 BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
268
269 return detail::get_object_info<T>(
270 clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
271 );
272 }
273
274 /// \overload
275 template<class T>
276 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
277 {
278 return get_sub_group_info<T>(device, info, 0, 0);
279 }
280
281 /// \overload
282 template<class T>
283 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
284 const size_t input) const
285 {
286 return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
287 }
288 #endif // BOOST_COMPUTE_CL_VERSION_2_1
289
290 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
291 /// Returns sub-group information for the kernel with \p device. Returns a null
292 /// optional if cl_khr_subgroups extension is not supported by \p device.
293 ///
294 /// \opencl_version_warning{2,0}
295 /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
296 template<class T>
297 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
298 const size_t input_size, const void * input) const
299 {
300 if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
301 {
302 return boost::optional<T>();
303 }
304
305 BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
306 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
307 reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
308 reinterpret_cast<size_t>(
309 device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
310 )
311 );
312 BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
313
314 return detail::get_object_info<T>(
315 clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
316 );
317 }
318 #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
319
320 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
321 /// \overload
322 template<class T>
323 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
324 const std::vector<size_t> input) const
325 {
326 BOOST_ASSERT(input.size() > 0);
327 return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
328 }
329 #endif // BOOST_COMPUTE_CL_VERSION_2_0
330
331 /// Sets the argument at \p index to \p value with \p size.
332 ///
333 /// \see_opencl_ref{clSetKernelArg}
334 void set_arg(size_t index, size_t size, const void *value)
335 {
336 BOOST_ASSERT(index < arity());
337
338 cl_int ret = clSetKernelArg(m_kernel,
339 static_cast<cl_uint>(index),
340 size,
341 value);
342 if(ret != CL_SUCCESS){
343 BOOST_THROW_EXCEPTION(opencl_error(ret));
344 }
345 }
346
347 /// Sets the argument at \p index to \p value.
348 ///
349 /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
350 /// calling set_arg(index, sizeof(type), &value).
351 ///
352 /// Additionally, this method is specialized for device memory objects
353 /// such as buffer and image2d. This allows for them to be passed directly
354 /// without having to extract their underlying cl_mem object.
355 ///
356 /// This method is also specialized for device container types such as
357 /// vector<T> and array<T, N>. This allows for them to be passed directly
358 /// as kernel arguments without having to extract their underlying buffer.
359 ///
360 /// For setting local memory arguments (e.g. "__local float *buf"), the
361 /// local_buffer<T> class may be used:
362 /// \code
363 /// // set argument to a local buffer with storage for 32 float's
364 /// kernel.set_arg(0, local_buffer<float>(32));
365 /// \endcode
366 template<class T>
367 void set_arg(size_t index, const T &value)
368 {
369 // if you get a compilation error pointing here it means you
370 // attempted to set a kernel argument from an invalid type.
371 detail::set_kernel_arg<T>()(*this, index, value);
372 }
373
374 /// \internal_
375 void set_arg(size_t index, const cl_mem mem)
376 {
377 set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
378 }
379
380 /// \internal_
381 void set_arg(size_t index, const cl_sampler sampler)
382 {
383 set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
384 }
385
386 /// \internal_
387 void set_arg_svm_ptr(size_t index, void* ptr)
388 {
389 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
390 cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
391 if(ret != CL_SUCCESS){
392 BOOST_THROW_EXCEPTION(opencl_error(ret));
393 }
394 #else
395 (void) index;
396 (void) ptr;
397 BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
398 #endif
399 }
400
401 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
402 /// Sets the arguments for the kernel to \p args.
403 template<class... T>
404 void set_args(T&&... args)
405 {
406 BOOST_ASSERT(sizeof...(T) <= arity());
407
408 _set_args<0>(args...);
409 }
410 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
411
412 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
413 /// Sets additional execution information for the kernel.
414 ///
415 /// \opencl_version_warning{2,0}
416 ///
417 /// \see_opencl2_ref{clSetKernelExecInfo}
418 void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
419 {
420 cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
421 if(ret != CL_SUCCESS){
422 BOOST_THROW_EXCEPTION(opencl_error(ret));
423 }
424 }
425 #endif // BOOST_COMPUTE_CL_VERSION_2_0
426
427 /// Returns \c true if the kernel is the same at \p other.
428 bool operator==(const kernel &other) const
429 {
430 return m_kernel == other.m_kernel;
431 }
432
433 /// Returns \c true if the kernel is different from \p other.
434 bool operator!=(const kernel &other) const
435 {
436 return m_kernel != other.m_kernel;
437 }
438
439 /// \internal_
440 operator cl_kernel() const
441 {
442 return m_kernel;
443 }
444
445 /// \internal_
446 static kernel create_with_source(const std::string &source,
447 const std::string &name,
448 const context &context)
449 {
450 return program::build_with_source(source, context).create_kernel(name);
451 }
452
453 private:
454 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
455 /// \internal_
456 template<size_t N>
457 void _set_args()
458 {
459 }
460
461 /// \internal_
462 template<size_t N, class T, class... Args>
463 void _set_args(T&& arg, Args&&... rest)
464 {
465 set_arg(N, arg);
466 _set_args<N+1>(rest...);
467 }
468 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
469
470 private:
471 cl_kernel m_kernel;
472 };
473
474 inline kernel program::create_kernel(const std::string &name) const
475 {
476 return kernel(*this, name);
477 }
478
479 /// \internal_ define get_info() specializations for kernel
480 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
481 ((std::string, CL_KERNEL_FUNCTION_NAME))
482 ((cl_uint, CL_KERNEL_NUM_ARGS))
483 ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
484 ((cl_context, CL_KERNEL_CONTEXT))
485 ((cl_program, CL_KERNEL_PROGRAM))
486 )
487
488 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
489 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
490 ((std::string, CL_KERNEL_ATTRIBUTES))
491 )
492 #endif // BOOST_COMPUTE_CL_VERSION_1_2
493
494 /// \internal_ define get_arg_info() specializations for kernel
495 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
496 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
497 namespace detail { \
498 template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
499 } \
500 template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
501 return get_arg_info<result_type>(index, value); \
502 }
503
504 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
505 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
506 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
507 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
508 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
509 #endif // BOOST_COMPUTE_CL_VERSION_1_2
510
511 namespace detail {
512
513 // set_kernel_arg implementation for built-in types
514 template<class T>
515 struct set_kernel_arg
516 {
517 typename boost::enable_if<is_fundamental<T> >::type
518 operator()(kernel &kernel_, size_t index, const T &value)
519 {
520 kernel_.set_arg(index, sizeof(T), &value);
521 }
522 };
523
524 // set_kernel_arg specialization for char (different from built-in cl_char)
525 template<>
526 struct set_kernel_arg<char>
527 {
528 void operator()(kernel &kernel_, size_t index, const char c)
529 {
530 kernel_.set_arg(index, sizeof(char), &c);
531 }
532 };
533
534 } // end detail namespace
535 } // end namespace compute
536 } // end namespace boost
537
538 #endif // BOOST_COMPUTE_KERNEL_HPP