]>
Commit | Line | Data |
---|---|---|
1 | //---------------------------------------------------------------------------// | |
2 | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0 | |
5 | // See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt | |
7 | // | |
8 | // See http://boostorg.github.com/compute for more information. | |
9 | //---------------------------------------------------------------------------// | |
10 | ||
11 | #ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP | |
12 | #define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP | |
13 | ||
14 | #include <set> | |
15 | #include <string> | |
16 | #include <vector> | |
17 | #include <iomanip> | |
18 | #include <sstream> | |
19 | #include <utility> | |
20 | ||
21 | #include <boost/tuple/tuple.hpp> | |
22 | #include <boost/type_traits.hpp> | |
23 | #include <boost/lexical_cast.hpp> | |
24 | #include <boost/static_assert.hpp> | |
25 | #include <boost/algorithm/string/find.hpp> | |
26 | #include <boost/preprocessor/repetition.hpp> | |
27 | ||
28 | #include <boost/compute/kernel.hpp> | |
29 | #include <boost/compute/closure.hpp> | |
30 | #include <boost/compute/function.hpp> | |
31 | #include <boost/compute/functional.hpp> | |
32 | #include <boost/compute/type_traits.hpp> | |
33 | #include <boost/compute/command_queue.hpp> | |
34 | #include <boost/compute/image/image2d.hpp> | |
35 | #include <boost/compute/image/image_sampler.hpp> | |
36 | #include <boost/compute/memory_object.hpp> | |
37 | #include <boost/compute/memory/svm_ptr.hpp> | |
38 | #include <boost/compute/detail/device_ptr.hpp> | |
39 | #include <boost/compute/detail/sha1.hpp> | |
40 | #include <boost/compute/utility/program_cache.hpp> | |
41 | ||
42 | namespace boost { | |
43 | namespace compute { | |
44 | namespace detail { | |
45 | ||
46 | template<class T> | |
47 | class meta_kernel_variable | |
48 | { | |
49 | public: | |
50 | typedef T result_type; | |
51 | ||
52 | meta_kernel_variable(const std::string &name) | |
53 | : m_name(name) | |
54 | { | |
55 | } | |
56 | ||
57 | meta_kernel_variable(const meta_kernel_variable &other) | |
58 | : m_name(other.m_name) | |
59 | { | |
60 | } | |
61 | ||
62 | meta_kernel_variable& operator=(const meta_kernel_variable &other) | |
63 | { | |
64 | if(this != &other){ | |
65 | m_name = other.m_name; | |
66 | } | |
67 | ||
68 | return *this; | |
69 | } | |
70 | ||
71 | ~meta_kernel_variable() | |
72 | { | |
73 | } | |
74 | ||
75 | std::string name() const | |
76 | { | |
77 | return m_name; | |
78 | } | |
79 | ||
80 | private: | |
81 | std::string m_name; | |
82 | }; | |
83 | ||
84 | template<class T> | |
85 | class meta_kernel_literal | |
86 | { | |
87 | public: | |
88 | typedef T result_type; | |
89 | ||
90 | meta_kernel_literal(const T &value) | |
91 | : m_value(value) | |
92 | { | |
93 | } | |
94 | ||
95 | meta_kernel_literal(const meta_kernel_literal &other) | |
96 | : m_value(other.m_value) | |
97 | { | |
98 | } | |
99 | ||
100 | meta_kernel_literal& operator=(const meta_kernel_literal &other) | |
101 | { | |
102 | if(this != &other){ | |
103 | m_value = other.m_value; | |
104 | } | |
105 | ||
106 | return *this; | |
107 | } | |
108 | ||
109 | ~meta_kernel_literal() | |
110 | { | |
111 | } | |
112 | ||
113 | const T& value() const | |
114 | { | |
115 | return m_value; | |
116 | } | |
117 | ||
118 | private: | |
119 | T m_value; | |
120 | }; | |
121 | ||
122 | struct meta_kernel_stored_arg | |
123 | { | |
124 | meta_kernel_stored_arg() | |
125 | : m_size(0), | |
126 | m_value(0) | |
127 | { | |
128 | } | |
129 | ||
130 | meta_kernel_stored_arg(const meta_kernel_stored_arg &other) | |
131 | : m_size(0), | |
132 | m_value(0) | |
133 | { | |
134 | set_value(other.m_size, other.m_value); | |
135 | } | |
136 | ||
137 | meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other) | |
138 | { | |
139 | if(this != &other){ | |
140 | set_value(other.m_size, other.m_value); | |
141 | } | |
142 | ||
143 | return *this; | |
144 | } | |
145 | ||
146 | template<class T> | |
147 | meta_kernel_stored_arg(const T &value) | |
148 | : m_size(0), | |
149 | m_value(0) | |
150 | { | |
151 | set_value(value); | |
152 | } | |
153 | ||
154 | ~meta_kernel_stored_arg() | |
155 | { | |
156 | if(m_value){ | |
157 | std::free(m_value); | |
158 | } | |
159 | } | |
160 | ||
161 | void set_value(size_t size, const void *value) | |
162 | { | |
163 | if(m_value){ | |
164 | std::free(m_value); | |
165 | } | |
166 | ||
167 | m_size = size; | |
168 | ||
169 | if(value){ | |
170 | m_value = std::malloc(size); | |
171 | std::memcpy(m_value, value, size); | |
172 | } | |
173 | else { | |
174 | m_value = 0; | |
175 | } | |
176 | } | |
177 | ||
178 | template<class T> | |
179 | void set_value(const T &value) | |
180 | { | |
181 | set_value(sizeof(T), boost::addressof(value)); | |
182 | } | |
183 | ||
184 | size_t m_size; | |
185 | void *m_value; | |
186 | }; | |
187 | ||
188 | struct meta_kernel_buffer_info | |
189 | { | |
190 | meta_kernel_buffer_info(const buffer &buffer, | |
191 | const std::string &id, | |
192 | memory_object::address_space addr_space, | |
193 | size_t i) | |
194 | : m_mem(buffer.get()), | |
195 | identifier(id), | |
196 | address_space(addr_space), | |
197 | index(i) | |
198 | { | |
199 | } | |
200 | ||
201 | cl_mem m_mem; | |
202 | std::string identifier; | |
203 | memory_object::address_space address_space; | |
204 | size_t index; | |
205 | }; | |
206 | ||
207 | struct meta_kernel_svm_info | |
208 | { | |
209 | template <class T> | |
210 | meta_kernel_svm_info(const svm_ptr<T> ptr, | |
211 | const std::string &id, | |
212 | memory_object::address_space addr_space, | |
213 | size_t i) | |
214 | : ptr(ptr.get()), | |
215 | identifier(id), | |
216 | address_space(addr_space), | |
217 | index(i) | |
218 | { | |
219 | ||
220 | } | |
221 | ||
222 | void* ptr; | |
223 | std::string identifier; | |
224 | memory_object::address_space address_space; | |
225 | size_t index; | |
226 | }; | |
227 | ||
228 | ||
229 | class meta_kernel; | |
230 | ||
231 | template<class Type> | |
232 | struct inject_type_impl | |
233 | { | |
234 | void operator()(meta_kernel &) | |
235 | { | |
236 | // default implementation does nothing | |
237 | } | |
238 | }; | |
239 | ||
240 | #define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \ | |
241 | meta_kernel& operator<<(const type &x) \ | |
242 | { \ | |
243 | m_source << x; \ | |
244 | return *this; \ | |
245 | } | |
246 | ||
247 | #define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \ | |
248 | meta_kernel& operator<<(const type &x) \ | |
249 | { \ | |
250 | m_source << "(" << type_name<type>() << ")"; \ | |
251 | m_source << "("; \ | |
252 | for(size_t i = 0; i < vector_size<type>::value; i++){ \ | |
253 | *this << lit(x[i]); \ | |
254 | \ | |
255 | if(i != vector_size<type>::value - 1){ \ | |
256 | m_source << ","; \ | |
257 | } \ | |
258 | } \ | |
259 | m_source << ")"; \ | |
260 | return *this; \ | |
261 | } | |
262 | ||
263 | #define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \ | |
264 | BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \ | |
265 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \ | |
266 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \ | |
267 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \ | |
268 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)) | |
269 | ||
270 | class meta_kernel | |
271 | { | |
272 | public: | |
273 | template<class T> | |
274 | class argument | |
275 | { | |
276 | public: | |
277 | argument(const std::string &name, size_t index) | |
278 | : m_name(name), | |
279 | m_index(index) | |
280 | { | |
281 | } | |
282 | ||
283 | const std::string &name() const | |
284 | { | |
285 | return m_name; | |
286 | } | |
287 | ||
288 | size_t index() const | |
289 | { | |
290 | return m_index; | |
291 | } | |
292 | ||
293 | private: | |
294 | std::string m_name; | |
295 | size_t m_index; | |
296 | }; | |
297 | ||
298 | explicit meta_kernel(const std::string &name) | |
299 | : m_name(name) | |
300 | { | |
301 | } | |
302 | ||
303 | meta_kernel(const meta_kernel &other) | |
304 | { | |
305 | m_source.str(other.m_source.str()); | |
306 | m_options = other.m_options; | |
307 | } | |
308 | ||
309 | meta_kernel& operator=(const meta_kernel &other) | |
310 | { | |
311 | if(this != &other){ | |
312 | m_source.str(other.m_source.str()); | |
313 | m_options = other.m_options; | |
314 | } | |
315 | ||
316 | return *this; | |
317 | } | |
318 | ||
319 | ~meta_kernel() | |
320 | { | |
321 | } | |
322 | ||
323 | std::string name() const | |
324 | { | |
325 | return m_name; | |
326 | } | |
327 | ||
328 | std::string source() const | |
329 | { | |
330 | std::stringstream stream; | |
331 | ||
332 | // add pragmas | |
333 | if(!m_pragmas.empty()){ | |
334 | stream << m_pragmas << "\n"; | |
335 | } | |
336 | ||
337 | // add macros | |
338 | stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n"; | |
339 | stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n"; | |
340 | stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n"; | |
341 | stream << "#define boost_tuple_get(x, n) (x.v ## n)\n"; | |
342 | ||
343 | // add type declaration source | |
344 | stream << m_type_declaration_source.str() << "\n"; | |
345 | ||
346 | // add external function source | |
347 | stream << m_external_function_source.str() << "\n"; | |
348 | ||
349 | // add kernel source | |
350 | stream << "__kernel void " << m_name | |
351 | << "(" << boost::join(m_args, ", ") << ")\n" | |
352 | << "{\n" << m_source.str() << "\n}\n"; | |
353 | ||
354 | return stream.str(); | |
355 | } | |
356 | ||
357 | kernel compile(const context &context, const std::string &options = std::string()) | |
358 | { | |
359 | // generate the program source | |
360 | std::string source = this->source(); | |
361 | ||
362 | // generate cache key | |
363 | std::string cache_key = "__boost_meta_kernel_" + | |
364 | static_cast<std::string>(detail::sha1(source)); | |
365 | ||
366 | // load program cache | |
367 | boost::shared_ptr<program_cache> cache = | |
368 | program_cache::get_global_cache(context); | |
369 | ||
370 | std::string compile_options = m_options + options; | |
371 | ||
372 | // load (or build) program from cache | |
373 | ::boost::compute::program program = | |
374 | cache->get_or_build(cache_key, compile_options, source, context); | |
375 | ||
376 | // create kernel | |
377 | ::boost::compute::kernel kernel = program.create_kernel(name()); | |
378 | ||
379 | // bind stored args | |
380 | for(size_t i = 0; i < m_stored_args.size(); i++){ | |
381 | const detail::meta_kernel_stored_arg &arg = m_stored_args[i]; | |
382 | ||
383 | if(arg.m_size != 0){ | |
384 | kernel.set_arg(i, arg.m_size, arg.m_value); | |
385 | } | |
386 | } | |
387 | ||
388 | // bind buffer args | |
389 | for(size_t i = 0; i < m_stored_buffers.size(); i++){ | |
390 | const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; | |
391 | ||
392 | kernel.set_arg(bi.index, bi.m_mem); | |
393 | } | |
394 | ||
395 | // bind svm args | |
396 | for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){ | |
397 | const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i]; | |
398 | ||
399 | kernel.set_arg_svm_ptr(spi.index, spi.ptr); | |
400 | } | |
401 | ||
402 | return kernel; | |
403 | } | |
404 | ||
405 | template<class T> | |
406 | size_t add_arg(const std::string &name) | |
407 | { | |
408 | std::stringstream stream; | |
409 | stream << type<T>() << " " << name; | |
410 | ||
411 | // add argument to list | |
412 | m_args.push_back(stream.str()); | |
413 | ||
414 | // return index | |
415 | return m_args.size() - 1; | |
416 | } | |
417 | ||
418 | template<class T> | |
419 | size_t add_arg(memory_object::address_space address_space, | |
420 | const std::string &name) | |
421 | { | |
422 | return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name); | |
423 | } | |
424 | ||
425 | template<class T> | |
426 | void set_arg(size_t index, const T &value) | |
427 | { | |
428 | if(index >= m_stored_args.size()){ | |
429 | m_stored_args.resize(index + 1); | |
430 | } | |
431 | ||
432 | m_stored_args[index] = detail::meta_kernel_stored_arg(value); | |
433 | } | |
434 | ||
435 | void set_arg(size_t index, const memory_object &mem) | |
436 | { | |
437 | set_arg<cl_mem>(index, mem.get()); | |
438 | } | |
439 | ||
440 | void set_arg(size_t index, const image_sampler &sampler) | |
441 | { | |
442 | set_arg<cl_sampler>(index, cl_sampler(sampler)); | |
443 | } | |
444 | ||
445 | template<class T> | |
446 | size_t add_set_arg(const std::string &name, const T &value) | |
447 | { | |
448 | size_t index = add_arg<T>(name); | |
449 | set_arg<T>(index, value); | |
450 | return index; | |
451 | } | |
452 | ||
453 | void add_extension_pragma(const std::string &extension, | |
454 | const std::string &value = "enable") | |
455 | { | |
456 | m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n"; | |
457 | } | |
458 | ||
459 | void add_extension_pragma(const std::string &extension, | |
460 | const std::string &value) const | |
461 | { | |
462 | return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value); | |
463 | } | |
464 | ||
465 | template<class T> | |
466 | std::string type() const | |
467 | { | |
468 | std::stringstream stream; | |
469 | ||
470 | // const qualifier | |
471 | if(boost::is_const<T>::value){ | |
472 | stream << "const "; | |
473 | } | |
474 | ||
475 | // volatile qualifier | |
476 | if(boost::is_volatile<T>::value){ | |
477 | stream << "volatile "; | |
478 | } | |
479 | ||
480 | // type | |
481 | typedef | |
482 | typename boost::remove_cv< | |
483 | typename boost::remove_pointer<T>::type | |
484 | >::type Type; | |
485 | stream << type_name<Type>(); | |
486 | ||
487 | // pointer | |
488 | if(boost::is_pointer<T>::value){ | |
489 | stream << "*"; | |
490 | } | |
491 | ||
492 | // inject type pragmas and/or definitions | |
493 | inject_type<Type>(); | |
494 | ||
495 | return stream.str(); | |
496 | } | |
497 | ||
498 | template<class T> | |
499 | std::string decl(const std::string &name) const | |
500 | { | |
501 | return type<T>() + " " + name; | |
502 | } | |
503 | ||
504 | template<class T, class Expr> | |
505 | std::string decl(const std::string &name, const Expr &init) const | |
506 | { | |
507 | meta_kernel tmp((std::string())); | |
508 | tmp << tmp.decl<T>(name) << " = " << init; | |
509 | return tmp.m_source.str(); | |
510 | } | |
511 | ||
512 | template<class T> | |
513 | detail::meta_kernel_variable<T> var(const std::string &name) const | |
514 | { | |
515 | type<T>(); | |
516 | ||
517 | return make_var<T>(name); | |
518 | } | |
519 | ||
520 | template<class T> | |
521 | detail::meta_kernel_literal<T> lit(const T &value) const | |
522 | { | |
523 | type<T>(); | |
524 | ||
525 | return detail::meta_kernel_literal<T>(value); | |
526 | } | |
527 | ||
528 | template<class T> | |
529 | detail::meta_kernel_variable<T> expr(const std::string &expr) const | |
530 | { | |
531 | type<T>(); | |
532 | ||
533 | return detail::meta_kernel_variable<T>(expr); | |
534 | } | |
535 | ||
536 | // define stream operators for scalar and vector types | |
537 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char) | |
538 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar) | |
539 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short) | |
540 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort) | |
541 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int) | |
542 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint) | |
543 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long) | |
544 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong) | |
545 | BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double) | |
546 | ||
547 | // define stream operators for float scalar and vector types | |
548 | meta_kernel& operator<<(const float &x) | |
549 | { | |
550 | m_source << std::showpoint << x << 'f'; | |
551 | return *this; | |
552 | } | |
553 | ||
554 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_) | |
555 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_) | |
556 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_) | |
557 | BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_) | |
558 | ||
559 | // define stream operators for variable types | |
560 | template<class T> | |
561 | meta_kernel& operator<<(const meta_kernel_variable<T> &variable) | |
562 | { | |
563 | return *this << variable.name(); | |
564 | } | |
565 | ||
566 | // define stream operators for literal types | |
567 | template<class T> | |
568 | meta_kernel& operator<<(const meta_kernel_literal<T> &literal) | |
569 | { | |
570 | return *this << literal.value(); | |
571 | } | |
572 | ||
573 | meta_kernel& operator<<(const meta_kernel_literal<bool> &literal) | |
574 | { | |
575 | return *this << (literal.value() ? "true" : "false"); | |
576 | } | |
577 | ||
578 | meta_kernel& operator<<(const meta_kernel_literal<char> &literal) | |
579 | { | |
580 | const char c = literal.value(); | |
581 | ||
582 | switch(c){ | |
583 | // control characters | |
584 | case '\0': | |
585 | return *this << "'\\0'"; | |
586 | case '\a': | |
587 | return *this << "'\\a'"; | |
588 | case '\b': | |
589 | return *this << "'\\b'"; | |
590 | case '\t': | |
591 | return *this << "'\\t'"; | |
592 | case '\n': | |
593 | return *this << "'\\n'"; | |
594 | case '\v': | |
595 | return *this << "'\\v'"; | |
596 | case '\f': | |
597 | return *this << "'\\f'"; | |
598 | case '\r': | |
599 | return *this << "'\\r'"; | |
600 | ||
601 | // characters which need escaping | |
602 | case '\"': | |
603 | case '\'': | |
604 | case '\?': | |
605 | case '\\': | |
606 | return *this << "'\\" << c << "'"; | |
607 | ||
608 | // all other characters | |
609 | default: | |
610 | return *this << "'" << c << "'"; | |
611 | } | |
612 | } | |
613 | ||
614 | meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal) | |
615 | { | |
616 | return *this << lit<char>(literal.value()); | |
617 | } | |
618 | ||
619 | meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal) | |
620 | { | |
621 | return *this << uint_(literal.value()); | |
622 | } | |
623 | ||
624 | // define stream operators for strings | |
625 | meta_kernel& operator<<(char ch) | |
626 | { | |
627 | m_source << ch; | |
628 | return *this; | |
629 | } | |
630 | ||
631 | meta_kernel& operator<<(const char *string) | |
632 | { | |
633 | m_source << string; | |
634 | return *this; | |
635 | } | |
636 | ||
637 | meta_kernel& operator<<(const std::string &string) | |
638 | { | |
639 | m_source << string; | |
640 | return *this; | |
641 | } | |
642 | ||
643 | template<class T> | |
644 | static detail::meta_kernel_variable<T> make_var(const std::string &name) | |
645 | { | |
646 | return detail::meta_kernel_variable<T>(name); | |
647 | } | |
648 | ||
649 | template<class T> | |
650 | static detail::meta_kernel_literal<T> make_lit(const T &value) | |
651 | { | |
652 | return detail::meta_kernel_literal<T>(value); | |
653 | } | |
654 | ||
655 | template<class T> | |
656 | static detail::meta_kernel_variable<T> make_expr(const std::string &expr) | |
657 | { | |
658 | return detail::meta_kernel_variable<T>(expr); | |
659 | } | |
660 | ||
661 | event exec(command_queue &queue) | |
662 | { | |
663 | return exec_1d(queue, 0, 1); | |
664 | } | |
665 | ||
666 | event exec_1d(command_queue &queue, | |
667 | size_t global_work_offset, | |
668 | size_t global_work_size) | |
669 | { | |
670 | const context &context = queue.get_context(); | |
671 | ||
672 | ::boost::compute::kernel kernel = compile(context); | |
673 | ||
674 | return queue.enqueue_1d_range_kernel( | |
675 | kernel, | |
676 | global_work_offset, | |
677 | global_work_size, | |
678 | 0 | |
679 | ); | |
680 | } | |
681 | ||
682 | event exec_1d(command_queue &queue, | |
683 | size_t global_work_offset, | |
684 | size_t global_work_size, | |
685 | size_t local_work_size) | |
686 | { | |
687 | const context &context = queue.get_context(); | |
688 | ||
689 | ::boost::compute::kernel kernel = compile(context); | |
690 | ||
691 | return queue.enqueue_1d_range_kernel( | |
692 | kernel, | |
693 | global_work_offset, | |
694 | global_work_size, | |
695 | local_work_size | |
696 | ); | |
697 | } | |
698 | ||
699 | template<class T> | |
700 | std::string get_buffer_identifier(const buffer &buffer, | |
701 | const memory_object::address_space address_space = | |
702 | memory_object::global_memory) | |
703 | { | |
704 | // check if we've already seen buffer | |
705 | for(size_t i = 0; i < m_stored_buffers.size(); i++){ | |
706 | const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; | |
707 | ||
708 | if(bi.m_mem == buffer.get() && | |
709 | bi.address_space == address_space){ | |
710 | return bi.identifier; | |
711 | } | |
712 | } | |
713 | ||
714 | // create a new binding | |
715 | std::string identifier = | |
716 | "_buf" + lexical_cast<std::string>(m_stored_buffers.size()); | |
717 | size_t index = add_arg<T *>(address_space, identifier); | |
718 | ||
719 | // store new buffer info | |
720 | m_stored_buffers.push_back( | |
721 | detail::meta_kernel_buffer_info(buffer, identifier, address_space, index)); | |
722 | ||
723 | return identifier; | |
724 | } | |
725 | ||
726 | template<class T> | |
727 | std::string get_svm_identifier(const svm_ptr<T> &svm_ptr, | |
728 | const memory_object::address_space address_space = | |
729 | memory_object::global_memory) | |
730 | { | |
731 | BOOST_ASSERT( | |
732 | (address_space == memory_object::global_memory) | |
733 | || (address_space == memory_object::constant_memory) | |
734 | ); | |
735 | ||
736 | // check if we've already seen this pointer | |
737 | for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){ | |
738 | const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i]; | |
739 | ||
740 | if(spi.ptr == svm_ptr.get() && | |
741 | spi.address_space == address_space){ | |
742 | return spi.identifier; | |
743 | } | |
744 | } | |
745 | ||
746 | // create a new binding | |
747 | std::string identifier = | |
748 | "_svm_ptr" + lexical_cast<std::string>(m_stored_svm_ptrs.size()); | |
749 | size_t index = add_arg<T *>(address_space, identifier); | |
750 | ||
751 | if(m_stored_svm_ptrs.empty()) { | |
752 | m_options += std::string(" -cl-std=CL2.0"); | |
753 | } | |
754 | ||
755 | // store new svm pointer info | |
756 | m_stored_svm_ptrs.push_back( | |
757 | detail::meta_kernel_svm_info( | |
758 | svm_ptr, identifier, address_space, index | |
759 | ) | |
760 | ); | |
761 | ||
762 | return identifier; | |
763 | } | |
764 | ||
765 | std::string get_image_identifier(const char *qualifiers, const image2d &image) | |
766 | { | |
767 | size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image"); | |
768 | ||
769 | set_arg(index, image); | |
770 | ||
771 | return "image"; | |
772 | } | |
773 | ||
774 | std::string get_sampler_identifier(bool normalized_coords, | |
775 | cl_addressing_mode addressing_mode, | |
776 | cl_filter_mode filter_mode) | |
777 | { | |
778 | (void) normalized_coords; | |
779 | (void) addressing_mode; | |
780 | (void) filter_mode; | |
781 | ||
782 | m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n" | |
783 | " CLK_ADDRESS_NONE |\n" | |
784 | " CLK_FILTER_NEAREST;\n"; | |
785 | ||
786 | return "sampler"; | |
787 | } | |
788 | ||
789 | template<class Expr> | |
790 | static std::string expr_to_string(const Expr &expr) | |
791 | { | |
792 | meta_kernel tmp((std::string())); | |
793 | tmp << expr; | |
794 | return tmp.m_source.str(); | |
795 | } | |
796 | ||
797 | template<class Predicate> | |
798 | detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const | |
799 | { | |
800 | return detail::invoked_function<bool, boost::tuple<Predicate> >( | |
801 | "if", std::string(), boost::make_tuple(pred) | |
802 | ); | |
803 | } | |
804 | ||
805 | template<class Predicate> | |
806 | detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const | |
807 | { | |
808 | return detail::invoked_function<bool, boost::tuple<Predicate> >( | |
809 | "else if", std::string(), boost::make_tuple(pred) | |
810 | ); | |
811 | } | |
812 | ||
813 | detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const | |
814 | { | |
815 | return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")"); | |
816 | } | |
817 | ||
818 | void add_function(const std::string &name, const std::string &source) | |
819 | { | |
820 | if(m_external_function_names.count(name)){ | |
821 | return; | |
822 | } | |
823 | ||
824 | m_external_function_names.insert(name); | |
825 | m_external_function_source << source << "\n"; | |
826 | } | |
827 | ||
828 | void add_function(const std::string &name, | |
829 | const std::string &source, | |
830 | const std::map<std::string, std::string> &definitions) | |
831 | { | |
832 | typedef std::map<std::string, std::string>::const_iterator iter; | |
833 | ||
834 | std::stringstream s; | |
835 | ||
836 | // add #define's | |
837 | for(iter i = definitions.begin(); i != definitions.end(); i++){ | |
838 | s << "#define " << i->first; | |
839 | if(!i->second.empty()){ | |
840 | s << " " << i->second; | |
841 | } | |
842 | s << "\n"; | |
843 | } | |
844 | ||
845 | s << source << "\n"; | |
846 | ||
847 | // add #undef's | |
848 | for(iter i = definitions.begin(); i != definitions.end(); i++){ | |
849 | s << "#undef " << i->first << "\n"; | |
850 | } | |
851 | ||
852 | add_function(name, s.str()); | |
853 | } | |
854 | ||
855 | template<class Type> | |
856 | void add_type_declaration(const std::string &declaration) | |
857 | { | |
858 | const char *name = type_name<Type>(); | |
859 | ||
860 | // check if the type has already been declared | |
861 | std::string source = m_type_declaration_source.str(); | |
862 | if(source.find(name) != std::string::npos){ | |
863 | return; | |
864 | } | |
865 | ||
866 | m_type_declaration_source << declaration; | |
867 | } | |
868 | ||
869 | template<class Type> | |
870 | void inject_type() const | |
871 | { | |
872 | inject_type_impl<Type>()(const_cast<meta_kernel &>(*this)); | |
873 | } | |
874 | ||
875 | // the insert_function_call() method inserts a call to a function with | |
876 | // the given name tuple of argument values. | |
877 | template<class ArgTuple> | |
878 | void insert_function_call(const std::string &name, const ArgTuple &args) | |
879 | { | |
880 | *this << name << '('; | |
881 | insert_function_call_args(args); | |
882 | *this << ')'; | |
883 | } | |
884 | ||
885 | // the insert_function_call_args() method takes a tuple of argument values | |
886 | // and inserts them into the source string with a comma in-between each. | |
887 | // this is useful for creating function calls given a tuple of values. | |
888 | void insert_function_call_args(const boost::tuple<>&) | |
889 | { | |
890 | } | |
891 | ||
892 | #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \ | |
893 | inject_type<BOOST_PP_CAT(T, n)>(); | |
894 | ||
895 | #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \ | |
896 | << boost::get<BOOST_PP_DEC(n)>(args) << ", " | |
897 | ||
898 | #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \ | |
899 | template<BOOST_PP_ENUM_PARAMS(n, class T)> \ | |
900 | void insert_function_call_args( \ | |
901 | const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \ | |
902 | ) \ | |
903 | { \ | |
904 | BOOST_PP_REPEAT_FROM_TO( \ | |
905 | 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \ | |
906 | ) \ | |
907 | *this \ | |
908 | BOOST_PP_REPEAT_FROM_TO( \ | |
909 | 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \ | |
910 | ) \ | |
911 | << boost::get<BOOST_PP_DEC(n)>(args); \ | |
912 | } | |
913 | ||
914 | BOOST_PP_REPEAT_FROM_TO( | |
915 | 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~ | |
916 | ) | |
917 | ||
918 | #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE | |
919 | #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG | |
920 | #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS | |
921 | ||
922 | static const char* address_space_prefix(const memory_object::address_space value) | |
923 | { | |
924 | switch(value){ | |
925 | case memory_object::global_memory: return "__global"; | |
926 | case memory_object::local_memory: return "__local"; | |
927 | case memory_object::private_memory: return "__private"; | |
928 | case memory_object::constant_memory: return "__constant"; | |
929 | }; | |
930 | ||
931 | return 0; // unreachable | |
932 | } | |
933 | ||
934 | private: | |
935 | template<class T> | |
936 | size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name) | |
937 | { | |
938 | size_t index = add_arg<T>(name); | |
939 | ||
940 | // update argument type declaration with qualifiers | |
941 | std::stringstream s; | |
942 | s << qualifiers << " " << m_args[index]; | |
943 | m_args[index] = s.str(); | |
944 | ||
945 | return index; | |
946 | } | |
947 | ||
948 | private: | |
949 | std::string m_name; | |
950 | std::stringstream m_source; | |
951 | std::stringstream m_external_function_source; | |
952 | std::stringstream m_type_declaration_source; | |
953 | std::set<std::string> m_external_function_names; | |
954 | std::vector<std::string> m_args; | |
955 | std::string m_pragmas; | |
956 | std::string m_options; | |
957 | std::vector<detail::meta_kernel_stored_arg> m_stored_args; | |
958 | std::vector<detail::meta_kernel_buffer_info> m_stored_buffers; | |
959 | std::vector<detail::meta_kernel_svm_info> m_stored_svm_ptrs; | |
960 | }; | |
961 | ||
962 | template<class ResultType, class ArgTuple> | |
963 | inline meta_kernel& | |
964 | operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr) | |
965 | { | |
966 | if(!expr.source().empty()){ | |
967 | kernel.add_function(expr.name(), expr.source(), expr.definitions()); | |
968 | } | |
969 | ||
970 | kernel.insert_function_call(expr.name(), expr.args()); | |
971 | ||
972 | return kernel; | |
973 | } | |
974 | ||
975 | template<class ResultType, class ArgTuple, class CaptureTuple> | |
976 | inline meta_kernel& | |
977 | operator<<(meta_kernel &kernel, | |
978 | const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr) | |
979 | { | |
980 | if(!expr.source().empty()){ | |
981 | kernel.add_function(expr.name(), expr.source(), expr.definitions()); | |
982 | } | |
983 | ||
984 | kernel << expr.name() << '('; | |
985 | kernel.insert_function_call_args(expr.args()); | |
986 | kernel << ", "; | |
987 | kernel.insert_function_call_args(expr.capture()); | |
988 | kernel << ')'; | |
989 | ||
990 | return kernel; | |
991 | } | |
992 | ||
993 | template<class Arg1, class Arg2, class Result> | |
994 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
995 | const invoked_binary_operator<Arg1, | |
996 | Arg2, | |
997 | Result> &expr) | |
998 | { | |
999 | return kernel << "((" << expr.arg1() << ")" | |
1000 | << expr.op() | |
1001 | << "(" << expr.arg2() << "))"; | |
1002 | } | |
1003 | ||
1004 | template<class T, class IndexExpr> | |
1005 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1006 | const detail::device_ptr_index_expr<T, IndexExpr> &expr) | |
1007 | { | |
1008 | if(expr.m_index == 0){ | |
1009 | return kernel << | |
1010 | kernel.get_buffer_identifier<T>(expr.m_buffer) << | |
1011 | '[' << expr.m_expr << ']'; | |
1012 | } | |
1013 | else { | |
1014 | return kernel << | |
1015 | kernel.get_buffer_identifier<T>(expr.m_buffer) << | |
1016 | '[' << expr.m_index << "+(" << expr.m_expr << ")]"; | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | template<class T1, class T2, class IndexExpr> | |
1021 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1022 | const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr) | |
1023 | { | |
1024 | typedef std::pair<T1, T2> T; | |
1025 | ||
1026 | if(expr.m_index == 0){ | |
1027 | return kernel << | |
1028 | kernel.get_buffer_identifier<T>(expr.m_buffer) << | |
1029 | '[' << expr.m_expr << ']'; | |
1030 | } | |
1031 | else { | |
1032 | return kernel << | |
1033 | kernel.get_buffer_identifier<T>(expr.m_buffer) << | |
1034 | '[' << expr.m_index << "+(" << expr.m_expr << ")]"; | |
1035 | } | |
1036 | } | |
1037 | ||
1038 | // SVM requires OpenCL 2.0 | |
1039 | #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) | |
1040 | template<class T, class IndexExpr> | |
1041 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1042 | const svm_ptr_index_expr<T, IndexExpr> &expr) | |
1043 | { | |
1044 | return kernel << | |
1045 | kernel.get_svm_identifier<T>(expr.m_svm_ptr) << | |
1046 | '[' << expr.m_expr << ']'; | |
1047 | } | |
1048 | #endif | |
1049 | ||
1050 | template<class Predicate, class Arg> | |
1051 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1052 | const invoked_unary_negate_function<Predicate, | |
1053 | Arg> &expr) | |
1054 | { | |
1055 | return kernel << "!(" << expr.pred()(expr.expr()) << ')'; | |
1056 | } | |
1057 | ||
1058 | template<class Predicate, class Arg1, class Arg2> | |
1059 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1060 | const invoked_binary_negate_function<Predicate, | |
1061 | Arg1, | |
1062 | Arg2> &expr) | |
1063 | { | |
1064 | return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')'; | |
1065 | } | |
1066 | ||
1067 | // get<N>() for vector types | |
1068 | template<size_t N, class Arg, class T> | |
1069 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1070 | const invoked_get<N, Arg, T> &expr) | |
1071 | { | |
1072 | BOOST_STATIC_ASSERT(N < 16); | |
1073 | ||
1074 | if(N < 10){ | |
1075 | return kernel << expr.m_arg << ".s" << uint_(N); | |
1076 | } | |
1077 | else if(N < 16){ | |
1078 | #ifdef _MSC_VER | |
1079 | # pragma warning(push) | |
1080 | # pragma warning(disable: 4307) | |
1081 | #endif | |
1082 | return kernel << expr.m_arg << ".s" << char('a' + (N - 10)); | |
1083 | #ifdef _MSC_VER | |
1084 | # pragma warning(pop) | |
1085 | #endif | |
1086 | } | |
1087 | ||
1088 | return kernel; | |
1089 | } | |
1090 | ||
1091 | template<class T, class Arg> | |
1092 | inline meta_kernel& operator<<(meta_kernel &kernel, | |
1093 | const invoked_field<T, Arg> &expr) | |
1094 | { | |
1095 | return kernel << expr.m_arg << "." << expr.m_field; | |
1096 | } | |
1097 | ||
1098 | template<class T, class Arg> | |
1099 | inline meta_kernel& operator<<(meta_kernel &k, | |
1100 | const invoked_as<T, Arg> &expr) | |
1101 | { | |
1102 | return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")"; | |
1103 | } | |
1104 | ||
1105 | template<class T, class Arg> | |
1106 | inline meta_kernel& operator<<(meta_kernel &k, | |
1107 | const invoked_convert<T, Arg> &expr) | |
1108 | { | |
1109 | return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")"; | |
1110 | } | |
1111 | ||
1112 | template<class T, class Arg> | |
1113 | inline meta_kernel& operator<<(meta_kernel &k, | |
1114 | const invoked_identity<T, Arg> &expr) | |
1115 | { | |
1116 | return k << expr.m_arg; | |
1117 | } | |
1118 | ||
1119 | template<> | |
1120 | struct inject_type_impl<double_> | |
1121 | { | |
1122 | void operator()(meta_kernel &kernel) | |
1123 | { | |
1124 | kernel.add_extension_pragma("cl_khr_fp64", "enable"); | |
1125 | } | |
1126 | }; | |
1127 | ||
1128 | template<class Scalar, size_t N> | |
1129 | struct inject_type_impl<vector_type<Scalar, N> > | |
1130 | { | |
1131 | void operator()(meta_kernel &kernel) | |
1132 | { | |
1133 | kernel.inject_type<Scalar>(); | |
1134 | } | |
1135 | }; | |
1136 | ||
1137 | } // end detail namespace | |
1138 | } // end compute namespace | |
1139 | } // end boost namespace | |
1140 | ||
1141 | #endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP |