]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/compute/kernel.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / compute / kernel.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_KERNEL_HPP
12 #define BOOST_COMPUTE_KERNEL_HPP
13
14 #include <string>
15
16 #include <boost/assert.hpp>
17 #include <boost/utility/enable_if.hpp>
18 #include <boost/optional.hpp>
19
20 #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
21
22 #include <boost/compute/config.hpp>
23 #include <boost/compute/exception.hpp>
24 #include <boost/compute/program.hpp>
25 #include <boost/compute/platform.hpp>
26 #include <boost/compute/type_traits/is_fundamental.hpp>
27 #include <boost/compute/detail/get_object_info.hpp>
28 #include <boost/compute/detail/assert_cl_success.hpp>
29
30 namespace boost {
31 namespace compute {
32 namespace detail {
33
34 template<class T> struct set_kernel_arg;
35
36 } // end detail namespace
37
38 /// \class kernel
39 /// \brief A compute kernel.
40 ///
41 /// \see command_queue, program
42 class kernel
43 {
44 public:
45 /// Creates a null kernel object.
46 kernel()
47 : m_kernel(0)
48 {
49 }
50
51 /// Creates a new kernel object for \p kernel. If \p retain is
52 /// \c true, the reference count for \p kernel will be incremented.
53 explicit kernel(cl_kernel kernel, bool retain = true)
54 : m_kernel(kernel)
55 {
56 if(m_kernel && retain){
57 clRetainKernel(m_kernel);
58 }
59 }
60
61 /// Creates a new kernel object with \p name from \p program.
62 kernel(const program &program, const std::string &name)
63 {
64 cl_int error = 0;
65 m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
66
67 if(!m_kernel){
68 BOOST_THROW_EXCEPTION(opencl_error(error));
69 }
70 }
71
72 /// Creates a new kernel object as a copy of \p other.
73 kernel(const kernel &other)
74 : m_kernel(other.m_kernel)
75 {
76 if(m_kernel){
77 clRetainKernel(m_kernel);
78 }
79 }
80
81 /// Copies the kernel object from \p other to \c *this.
82 kernel& operator=(const kernel &other)
83 {
84 if(this != &other){
85 if(m_kernel){
86 clReleaseKernel(m_kernel);
87 }
88
89 m_kernel = other.m_kernel;
90
91 if(m_kernel){
92 clRetainKernel(m_kernel);
93 }
94 }
95
96 return *this;
97 }
98
99 #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
100 /// Move-constructs a new kernel object from \p other.
101 kernel(kernel&& other) BOOST_NOEXCEPT
102 : m_kernel(other.m_kernel)
103 {
104 other.m_kernel = 0;
105 }
106
107 /// Move-assigns the kernel from \p other to \c *this.
108 kernel& operator=(kernel&& other) BOOST_NOEXCEPT
109 {
110 if(m_kernel){
111 clReleaseKernel(m_kernel);
112 }
113
114 m_kernel = other.m_kernel;
115 other.m_kernel = 0;
116
117 return *this;
118 }
119 #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
120
121 /// Destroys the kernel object.
122 ~kernel()
123 {
124 if(m_kernel){
125 BOOST_COMPUTE_ASSERT_CL_SUCCESS(
126 clReleaseKernel(m_kernel)
127 );
128 }
129 }
130
131 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
132 /// Creates a new kernel object based on a shallow copy of
133 /// the undelying OpenCL kernel object.
134 ///
135 /// \opencl_version_warning{2,1}
136 ///
137 /// \see_opencl21_ref{clCloneKernel}
138 kernel clone()
139 {
140 cl_int ret = 0;
141 cl_kernel k = clCloneKernel(m_kernel, &ret);
142 return kernel(k, false);
143 }
144 #endif // BOOST_COMPUTE_CL_VERSION_2_1
145
146 /// Returns a reference to the underlying OpenCL kernel object.
147 cl_kernel& get() const
148 {
149 return const_cast<cl_kernel &>(m_kernel);
150 }
151
152 /// Returns the function name for the kernel.
153 std::string name() const
154 {
155 return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
156 }
157
158 /// Returns the number of arguments for the kernel.
159 size_t arity() const
160 {
161 return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
162 }
163
164 /// Returns the program for the kernel.
165 program get_program() const
166 {
167 return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
168 }
169
170 /// Returns the context for the kernel.
171 context get_context() const
172 {
173 return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
174 }
175
176 /// Returns information about the kernel.
177 ///
178 /// \see_opencl_ref{clGetKernelInfo}
179 template<class T>
180 T get_info(cl_kernel_info info) const
181 {
182 return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
183 }
184
185 /// \overload
186 template<int Enum>
187 typename detail::get_object_info_type<kernel, Enum>::type
188 get_info() const;
189
190 #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
191 /// Returns information about the argument at \p index.
192 ///
193 /// For example, to get the name of the first argument:
194 /// \code
195 /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
196 /// \endcode
197 ///
198 /// Note, this function requires that the program be compiled with the
199 /// \c "-cl-kernel-arg-info" flag. For example:
200 /// \code
201 /// program.build("-cl-kernel-arg-info");
202 /// \endcode
203 ///
204 /// \opencl_version_warning{1,2}
205 ///
206 /// \see_opencl_ref{clGetKernelArgInfo}
207 template<class T>
208 T get_arg_info(size_t index, cl_kernel_arg_info info) const
209 {
210 return detail::get_object_info<T>(
211 clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
212 );
213 }
214
215 /// \overload
216 template<int Enum>
217 typename detail::get_object_info_type<kernel, Enum>::type
218 get_arg_info(size_t index) const;
219 #endif // BOOST_COMPUTE_CL_VERSION_1_2
220
221 /// Returns work-group information for the kernel with \p device.
222 ///
223 /// \see_opencl_ref{clGetKernelWorkGroupInfo}
224 template<class T>
225 T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
226 {
227 return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
228 }
229
230 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
231 /// Returns sub-group information for the kernel with \p device. Returns a null
232 /// optional if \p device is not 2.1 device, or is not 2.0 device with support
233 /// for cl_khr_subgroups extension.
234 ///
235 /// \opencl_version_warning{2,1}
236 /// \see_opencl21_ref{clGetKernelSubGroupInfo}
237 /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
238 template<class T>
239 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
240 const size_t input_size, const void * input) const
241 {
242 if(device.check_version(2, 1))
243 {
244 return detail::get_object_info<T>(
245 clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
246 );
247 }
248 else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
249 {
250 return boost::optional<T>();
251 }
252 // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
253 // are supported in cl_khr_subgroups extension for 2.0 devices.
254 else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
255 {
256 return boost::optional<T>();
257 }
258
259 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
260 reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
261 reinterpret_cast<size_t>(
262 device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
263 )
264 );
265
266 return detail::get_object_info<T>(
267 clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
268 );
269 }
270
271 /// \overload
272 template<class T>
273 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
274 {
275 return get_sub_group_info<T>(device, info, 0, 0);
276 }
277
278 /// \overload
279 template<class T>
280 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
281 const size_t input) const
282 {
283 return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
284 }
285 #endif // BOOST_COMPUTE_CL_VERSION_2_1
286
287 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
288 /// Returns sub-group information for the kernel with \p device. Returns a null
289 /// optional if cl_khr_subgroups extension is not supported by \p device.
290 ///
291 /// \opencl_version_warning{2,0}
292 /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
293 template<class T>
294 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
295 const size_t input_size, const void * input) const
296 {
297 if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
298 {
299 return boost::optional<T>();
300 }
301
302 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
303 reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
304 reinterpret_cast<size_t>(
305 device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
306 )
307 );
308
309 return detail::get_object_info<T>(
310 clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
311 );
312 }
313 #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
314
315 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
316 /// \overload
317 template<class T>
318 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
319 const std::vector<size_t> input) const
320 {
321 BOOST_ASSERT(input.size() > 0);
322 return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
323 }
324 #endif // BOOST_COMPUTE_CL_VERSION_2_0
325
326 /// Sets the argument at \p index to \p value with \p size.
327 ///
328 /// \see_opencl_ref{clSetKernelArg}
329 void set_arg(size_t index, size_t size, const void *value)
330 {
331 BOOST_ASSERT(index < arity());
332
333 cl_int ret = clSetKernelArg(m_kernel,
334 static_cast<cl_uint>(index),
335 size,
336 value);
337 if(ret != CL_SUCCESS){
338 BOOST_THROW_EXCEPTION(opencl_error(ret));
339 }
340 }
341
342 /// Sets the argument at \p index to \p value.
343 ///
344 /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
345 /// calling set_arg(index, sizeof(type), &value).
346 ///
347 /// Additionally, this method is specialized for device memory objects
348 /// such as buffer and image2d. This allows for them to be passed directly
349 /// without having to extract their underlying cl_mem object.
350 ///
351 /// This method is also specialized for device container types such as
352 /// vector<T> and array<T, N>. This allows for them to be passed directly
353 /// as kernel arguments without having to extract their underlying buffer.
354 ///
355 /// For setting local memory arguments (e.g. "__local float *buf"), the
356 /// local_buffer<T> class may be used:
357 /// \code
358 /// // set argument to a local buffer with storage for 32 float's
359 /// kernel.set_arg(0, local_buffer<float>(32));
360 /// \endcode
361 template<class T>
362 void set_arg(size_t index, const T &value)
363 {
364 // if you get a compilation error pointing here it means you
365 // attempted to set a kernel argument from an invalid type.
366 detail::set_kernel_arg<T>()(*this, index, value);
367 }
368
369 /// \internal_
370 void set_arg(size_t index, const cl_mem mem)
371 {
372 set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
373 }
374
375 /// \internal_
376 void set_arg(size_t index, const cl_sampler sampler)
377 {
378 set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
379 }
380
381 /// \internal_
382 void set_arg_svm_ptr(size_t index, void* ptr)
383 {
384 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
385 cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
386 if(ret != CL_SUCCESS){
387 BOOST_THROW_EXCEPTION(opencl_error(ret));
388 }
389 #else
390 (void) index;
391 (void) ptr;
392 BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
393 #endif
394 }
395
396 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
397 /// Sets the arguments for the kernel to \p args.
398 template<class... T>
399 void set_args(T&&... args)
400 {
401 BOOST_ASSERT(sizeof...(T) <= arity());
402
403 _set_args<0>(args...);
404 }
405 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
406
407 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
408 /// Sets additional execution information for the kernel.
409 ///
410 /// \opencl_version_warning{2,0}
411 ///
412 /// \see_opencl2_ref{clSetKernelExecInfo}
413 void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
414 {
415 cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
416 if(ret != CL_SUCCESS){
417 BOOST_THROW_EXCEPTION(opencl_error(ret));
418 }
419 }
420 #endif // BOOST_COMPUTE_CL_VERSION_2_0
421
422 /// Returns \c true if the kernel is the same at \p other.
423 bool operator==(const kernel &other) const
424 {
425 return m_kernel == other.m_kernel;
426 }
427
428 /// Returns \c true if the kernel is different from \p other.
429 bool operator!=(const kernel &other) const
430 {
431 return m_kernel != other.m_kernel;
432 }
433
434 /// \internal_
435 operator cl_kernel() const
436 {
437 return m_kernel;
438 }
439
440 /// \internal_
441 static kernel create_with_source(const std::string &source,
442 const std::string &name,
443 const context &context)
444 {
445 return program::build_with_source(source, context).create_kernel(name);
446 }
447
448 private:
449 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
450 /// \internal_
451 template<size_t N>
452 void _set_args()
453 {
454 }
455
456 /// \internal_
457 template<size_t N, class T, class... Args>
458 void _set_args(T&& arg, Args&&... rest)
459 {
460 set_arg(N, arg);
461 _set_args<N+1>(rest...);
462 }
463 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
464
465 private:
466 cl_kernel m_kernel;
467 };
468
469 inline kernel program::create_kernel(const std::string &name) const
470 {
471 return kernel(*this, name);
472 }
473
474 /// \internal_ define get_info() specializations for kernel
475 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
476 ((std::string, CL_KERNEL_FUNCTION_NAME))
477 ((cl_uint, CL_KERNEL_NUM_ARGS))
478 ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
479 ((cl_context, CL_KERNEL_CONTEXT))
480 ((cl_program, CL_KERNEL_PROGRAM))
481 )
482
483 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
484 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
485 ((std::string, CL_KERNEL_ATTRIBUTES))
486 )
487 #endif // BOOST_COMPUTE_CL_VERSION_1_2
488
489 /// \internal_ define get_arg_info() specializations for kernel
490 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
491 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
492 namespace detail { \
493 template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
494 } \
495 template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
496 return get_arg_info<result_type>(index, value); \
497 }
498
499 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
500 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
501 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
502 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
503 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
504 #endif // BOOST_COMPUTE_CL_VERSION_1_2
505
506 namespace detail {
507
508 // set_kernel_arg implementation for built-in types
509 template<class T>
510 struct set_kernel_arg
511 {
512 typename boost::enable_if<is_fundamental<T> >::type
513 operator()(kernel &kernel_, size_t index, const T &value)
514 {
515 kernel_.set_arg(index, sizeof(T), &value);
516 }
517 };
518
519 // set_kernel_arg specialization for char (different from built-in cl_char)
520 template<>
521 struct set_kernel_arg<char>
522 {
523 void operator()(kernel &kernel_, size_t index, const char c)
524 {
525 kernel_.set_arg(index, sizeof(char), &c);
526 }
527 };
528
529 } // end detail namespace
530 } // end namespace compute
531 } // end namespace boost
532
533 #endif // BOOST_COMPUTE_KERNEL_HPP