]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | //---------------------------------------------------------------------------// |
2 | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0 | |
5 | // See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt | |
7 | // | |
8 | // See http://boostorg.github.com/compute for more information. | |
9 | //---------------------------------------------------------------------------// | |
10 | ||
11 | #ifndef BOOST_COMPUTE_KERNEL_HPP | |
12 | #define BOOST_COMPUTE_KERNEL_HPP | |
13 | ||
14 | #include <string> | |
15 | ||
16 | #include <boost/assert.hpp> | |
17 | #include <boost/utility/enable_if.hpp> | |
18 | ||
19 | #include <boost/compute/config.hpp> | |
20 | #include <boost/compute/program.hpp> | |
21 | #include <boost/compute/exception.hpp> | |
22 | #include <boost/compute/type_traits/is_fundamental.hpp> | |
23 | #include <boost/compute/detail/get_object_info.hpp> | |
24 | #include <boost/compute/detail/assert_cl_success.hpp> | |
25 | ||
26 | namespace boost { | |
27 | namespace compute { | |
28 | namespace detail { | |
29 | ||
30 | template<class T> struct set_kernel_arg; | |
31 | ||
32 | } // end detail namespace | |
33 | ||
34 | /// \class kernel | |
35 | /// \brief A compute kernel. | |
36 | /// | |
37 | /// \see command_queue, program | |
38 | class kernel | |
39 | { | |
40 | public: | |
41 | /// Creates a null kernel object. | |
42 | kernel() | |
43 | : m_kernel(0) | |
44 | { | |
45 | } | |
46 | ||
47 | /// Creates a new kernel object for \p kernel. If \p retain is | |
48 | /// \c true, the reference count for \p kernel will be incremented. | |
49 | explicit kernel(cl_kernel kernel, bool retain = true) | |
50 | : m_kernel(kernel) | |
51 | { | |
52 | if(m_kernel && retain){ | |
53 | clRetainKernel(m_kernel); | |
54 | } | |
55 | } | |
56 | ||
57 | /// Creates a new kernel object with \p name from \p program. | |
58 | kernel(const program &program, const std::string &name) | |
59 | { | |
60 | cl_int error = 0; | |
61 | m_kernel = clCreateKernel(program.get(), name.c_str(), &error); | |
62 | ||
63 | if(!m_kernel){ | |
64 | BOOST_THROW_EXCEPTION(opencl_error(error)); | |
65 | } | |
66 | } | |
67 | ||
68 | /// Creates a new kernel object as a copy of \p other. | |
69 | kernel(const kernel &other) | |
70 | : m_kernel(other.m_kernel) | |
71 | { | |
72 | if(m_kernel){ | |
73 | clRetainKernel(m_kernel); | |
74 | } | |
75 | } | |
76 | ||
77 | /// Copies the kernel object from \p other to \c *this. | |
78 | kernel& operator=(const kernel &other) | |
79 | { | |
80 | if(this != &other){ | |
81 | if(m_kernel){ | |
82 | clReleaseKernel(m_kernel); | |
83 | } | |
84 | ||
85 | m_kernel = other.m_kernel; | |
86 | ||
87 | if(m_kernel){ | |
88 | clRetainKernel(m_kernel); | |
89 | } | |
90 | } | |
91 | ||
92 | return *this; | |
93 | } | |
94 | ||
95 | #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES | |
96 | /// Move-constructs a new kernel object from \p other. | |
97 | kernel(kernel&& other) BOOST_NOEXCEPT | |
98 | : m_kernel(other.m_kernel) | |
99 | { | |
100 | other.m_kernel = 0; | |
101 | } | |
102 | ||
103 | /// Move-assigns the kernel from \p other to \c *this. | |
104 | kernel& operator=(kernel&& other) BOOST_NOEXCEPT | |
105 | { | |
106 | if(m_kernel){ | |
107 | clReleaseKernel(m_kernel); | |
108 | } | |
109 | ||
110 | m_kernel = other.m_kernel; | |
111 | other.m_kernel = 0; | |
112 | ||
113 | return *this; | |
114 | } | |
115 | #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES | |
116 | ||
117 | /// Destroys the kernel object. | |
118 | ~kernel() | |
119 | { | |
120 | if(m_kernel){ | |
121 | BOOST_COMPUTE_ASSERT_CL_SUCCESS( | |
122 | clReleaseKernel(m_kernel) | |
123 | ); | |
124 | } | |
125 | } | |
126 | ||
127 | /// Returns a reference to the underlying OpenCL kernel object. | |
128 | cl_kernel& get() const | |
129 | { | |
130 | return const_cast<cl_kernel &>(m_kernel); | |
131 | } | |
132 | ||
133 | /// Returns the function name for the kernel. | |
134 | std::string name() const | |
135 | { | |
136 | return get_info<std::string>(CL_KERNEL_FUNCTION_NAME); | |
137 | } | |
138 | ||
139 | /// Returns the number of arguments for the kernel. | |
140 | size_t arity() const | |
141 | { | |
142 | return get_info<cl_uint>(CL_KERNEL_NUM_ARGS); | |
143 | } | |
144 | ||
145 | /// Returns the program for the kernel. | |
146 | program get_program() const | |
147 | { | |
148 | return program(get_info<cl_program>(CL_KERNEL_PROGRAM)); | |
149 | } | |
150 | ||
151 | /// Returns the context for the kernel. | |
152 | context get_context() const | |
153 | { | |
154 | return context(get_info<cl_context>(CL_KERNEL_CONTEXT)); | |
155 | } | |
156 | ||
157 | /// Returns information about the kernel. | |
158 | /// | |
159 | /// \see_opencl_ref{clGetKernelInfo} | |
160 | template<class T> | |
161 | T get_info(cl_kernel_info info) const | |
162 | { | |
163 | return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info); | |
164 | } | |
165 | ||
166 | /// \overload | |
167 | template<int Enum> | |
168 | typename detail::get_object_info_type<kernel, Enum>::type | |
169 | get_info() const; | |
170 | ||
171 | #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) | |
172 | /// Returns information about the argument at \p index. | |
173 | /// | |
174 | /// For example, to get the name of the first argument: | |
175 | /// \code | |
176 | /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME); | |
177 | /// \endcode | |
178 | /// | |
179 | /// Note, this function requires that the program be compiled with the | |
180 | /// \c "-cl-kernel-arg-info" flag. For example: | |
181 | /// \code | |
182 | /// program.build("-cl-kernel-arg-info"); | |
183 | /// \endcode | |
184 | /// | |
185 | /// \opencl_version_warning{1,2} | |
186 | /// | |
187 | /// \see_opencl_ref{clGetKernelArgInfo} | |
188 | template<class T> | |
189 | T get_arg_info(size_t index, cl_kernel_arg_info info) const | |
190 | { | |
191 | return detail::get_object_info<T>( | |
192 | clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index) | |
193 | ); | |
194 | } | |
195 | ||
196 | /// \overload | |
197 | template<int Enum> | |
198 | typename detail::get_object_info_type<kernel, Enum>::type | |
199 | get_arg_info(size_t index) const; | |
200 | #endif // CL_VERSION_1_2 | |
201 | ||
202 | /// Returns work-group information for the kernel with \p device. | |
203 | /// | |
204 | /// \see_opencl_ref{clGetKernelWorkGroupInfo} | |
205 | template<class T> | |
206 | T get_work_group_info(const device &device, cl_kernel_work_group_info info) const | |
207 | { | |
208 | return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id()); | |
209 | } | |
210 | ||
211 | /// Sets the argument at \p index to \p value with \p size. | |
212 | /// | |
213 | /// \see_opencl_ref{clSetKernelArg} | |
214 | void set_arg(size_t index, size_t size, const void *value) | |
215 | { | |
216 | BOOST_ASSERT(index < arity()); | |
217 | ||
218 | cl_int ret = clSetKernelArg(m_kernel, | |
219 | static_cast<cl_uint>(index), | |
220 | size, | |
221 | value); | |
222 | if(ret != CL_SUCCESS){ | |
223 | BOOST_THROW_EXCEPTION(opencl_error(ret)); | |
224 | } | |
225 | } | |
226 | ||
227 | /// Sets the argument at \p index to \p value. | |
228 | /// | |
229 | /// For built-in types (e.g. \c float, \c int4_), this is equivalent to | |
230 | /// calling set_arg(index, sizeof(type), &value). | |
231 | /// | |
232 | /// Additionally, this method is specialized for device memory objects | |
233 | /// such as buffer and image2d. This allows for them to be passed directly | |
234 | /// without having to extract their underlying cl_mem object. | |
235 | /// | |
236 | /// This method is also specialized for device container types such as | |
237 | /// vector<T> and array<T, N>. This allows for them to be passed directly | |
238 | /// as kernel arguments without having to extract their underlying buffer. | |
239 | /// | |
240 | /// For setting local memory arguments (e.g. "__local float *buf"), the | |
241 | /// local_buffer<T> class may be used: | |
242 | /// \code | |
243 | /// // set argument to a local buffer with storage for 32 float's | |
244 | /// kernel.set_arg(0, local_buffer<float>(32)); | |
245 | /// \endcode | |
246 | template<class T> | |
247 | void set_arg(size_t index, const T &value) | |
248 | { | |
249 | // if you get a compilation error pointing here it means you | |
250 | // attempted to set a kernel argument from an invalid type. | |
251 | detail::set_kernel_arg<T>()(*this, index, value); | |
252 | } | |
253 | ||
254 | /// \internal_ | |
255 | void set_arg(size_t index, const cl_mem mem) | |
256 | { | |
257 | set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem)); | |
258 | } | |
259 | ||
260 | /// \internal_ | |
261 | void set_arg(size_t index, const cl_sampler sampler) | |
262 | { | |
263 | set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler)); | |
264 | } | |
265 | ||
266 | /// \internal_ | |
267 | void set_arg_svm_ptr(size_t index, void* ptr) | |
268 | { | |
269 | #ifdef CL_VERSION_2_0 | |
270 | cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr); | |
271 | if(ret != CL_SUCCESS){ | |
272 | BOOST_THROW_EXCEPTION(opencl_error(ret)); | |
273 | } | |
274 | #else | |
275 | (void) index; | |
276 | (void) ptr; | |
277 | BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE)); | |
278 | #endif | |
279 | } | |
280 | ||
281 | #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES | |
282 | /// Sets the arguments for the kernel to \p args. | |
283 | template<class... T> | |
284 | void set_args(T&&... args) | |
285 | { | |
286 | BOOST_ASSERT(sizeof...(T) <= arity()); | |
287 | ||
288 | _set_args<0>(args...); | |
289 | } | |
290 | #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES | |
291 | ||
292 | #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) | |
293 | /// Sets additional execution information for the kernel. | |
294 | /// | |
295 | /// \opencl_version_warning{2,0} | |
296 | /// | |
297 | /// \see_opencl2_ref{clSetKernelExecInfo} | |
298 | void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value) | |
299 | { | |
300 | cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value); | |
301 | if(ret != CL_SUCCESS){ | |
302 | BOOST_THROW_EXCEPTION(opencl_error(ret)); | |
303 | } | |
304 | } | |
305 | #endif // CL_VERSION_2_0 | |
306 | ||
307 | /// Returns \c true if the kernel is the same at \p other. | |
308 | bool operator==(const kernel &other) const | |
309 | { | |
310 | return m_kernel == other.m_kernel; | |
311 | } | |
312 | ||
313 | /// Returns \c true if the kernel is different from \p other. | |
314 | bool operator!=(const kernel &other) const | |
315 | { | |
316 | return m_kernel != other.m_kernel; | |
317 | } | |
318 | ||
319 | /// \internal_ | |
320 | operator cl_kernel() const | |
321 | { | |
322 | return m_kernel; | |
323 | } | |
324 | ||
325 | /// \internal_ | |
326 | static kernel create_with_source(const std::string &source, | |
327 | const std::string &name, | |
328 | const context &context) | |
329 | { | |
330 | return program::build_with_source(source, context).create_kernel(name); | |
331 | } | |
332 | ||
333 | private: | |
334 | #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES | |
335 | /// \internal_ | |
336 | template<size_t N> | |
337 | void _set_args() | |
338 | { | |
339 | } | |
340 | ||
341 | /// \internal_ | |
342 | template<size_t N, class T, class... Args> | |
343 | void _set_args(T&& arg, Args&&... rest) | |
344 | { | |
345 | set_arg(N, arg); | |
346 | _set_args<N+1>(rest...); | |
347 | } | |
348 | #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES | |
349 | ||
350 | private: | |
351 | cl_kernel m_kernel; | |
352 | }; | |
353 | ||
354 | inline kernel program::create_kernel(const std::string &name) const | |
355 | { | |
356 | return kernel(*this, name); | |
357 | } | |
358 | ||
359 | /// \internal_ define get_info() specializations for kernel | |
360 | BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, | |
361 | ((std::string, CL_KERNEL_FUNCTION_NAME)) | |
362 | ((cl_uint, CL_KERNEL_NUM_ARGS)) | |
363 | ((cl_uint, CL_KERNEL_REFERENCE_COUNT)) | |
364 | ((cl_context, CL_KERNEL_CONTEXT)) | |
365 | ((cl_program, CL_KERNEL_PROGRAM)) | |
366 | ) | |
367 | ||
368 | #ifdef CL_VERSION_1_2 | |
369 | BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, | |
370 | ((std::string, CL_KERNEL_ATTRIBUTES)) | |
371 | ) | |
372 | #endif // CL_VERSION_1_2 | |
373 | ||
374 | /// \internal_ define get_arg_info() specializations for kernel | |
375 | #ifdef CL_VERSION_1_2 | |
376 | #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \ | |
377 | namespace detail { \ | |
378 | template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \ | |
379 | } \ | |
380 | template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \ | |
381 | return get_arg_info<result_type>(index, value); \ | |
382 | } | |
383 | ||
384 | BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER) | |
385 | BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER) | |
386 | BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME) | |
387 | BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER) | |
388 | BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME) | |
389 | #endif // CL_VERSION_1_2 | |
390 | ||
391 | namespace detail { | |
392 | ||
393 | // set_kernel_arg implementation for built-in types | |
394 | template<class T> | |
395 | struct set_kernel_arg | |
396 | { | |
397 | typename boost::enable_if<is_fundamental<T> >::type | |
398 | operator()(kernel &kernel_, size_t index, const T &value) | |
399 | { | |
400 | kernel_.set_arg(index, sizeof(T), &value); | |
401 | } | |
402 | }; | |
403 | ||
404 | // set_kernel_arg specialization for char (different from built-in cl_char) | |
405 | template<> | |
406 | struct set_kernel_arg<char> | |
407 | { | |
408 | void operator()(kernel &kernel_, size_t index, const char c) | |
409 | { | |
410 | kernel_.set_arg(index, sizeof(char), &c); | |
411 | } | |
412 | }; | |
413 | ||
414 | } // end detail namespace | |
415 | } // end namespace compute | |
416 | } // end namespace boost | |
417 | ||
418 | #endif // BOOST_COMPUTE_KERNEL_HPP |