3 // Copyright (c) 2018 Fady Essam
4 // Copyright (c) 2018 Stefan Seefeld
6 // Distributed under the Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt or
8 // copy at http://www.boost.org/LICENSE_1_0.txt)
10 #ifndef boost_numeric_ublas_opencl_elementwise_hpp_
11 #define boost_numeric_ublas_opencl_elementwise_hpp_
13 #include <boost/numeric/ublas/opencl/library.hpp>
14 #include <boost/numeric/ublas/opencl/vector.hpp>
15 #include <boost/numeric/ublas/opencl/matrix.hpp>
17 namespace boost { namespace numeric { namespace ublas { namespace opencl {
19 namespace compute = boost::compute;
20 namespace lambda = boost::compute::lambda;
22 template <typename T, typename L1, typename L2, typename L3, class O>
23 void element_wise(ublas::matrix<T, L1, opencl::storage> const &a,
24 ublas::matrix<T, L2, opencl::storage> const &b,
25 ublas::matrix<T, L3, opencl::storage> &result,
26 O op, compute::command_queue& queue)
28 assert(a.device() == b.device() &&
29 a.device() == result.device() &&
30 a.device() == queue.get_device());
31 assert(a.size1() == b.size1() && a.size2() == b.size2());
33 compute::transform(a.begin(),
42 template <typename T, typename L1, typename L2, typename L3, typename A, class O>
43 void element_wise(ublas::matrix<T, L1, A> const &a,
44 ublas::matrix<T, L2, A> const &b,
45 ublas::matrix<T, L3, A> &result,
47 compute::command_queue &queue)
49 ublas::matrix<T, L1, opencl::storage> adev(a, queue);
50 ublas::matrix<T, L2, opencl::storage> bdev(b, queue);
51 ublas::matrix<T, L3, opencl::storage> rdev(a.size1(), b.size2(), queue.get_context());
52 element_wise(adev, bdev, rdev, op, queue);
53 rdev.to_host(result, queue);
56 template <typename T, typename L1, typename L2, typename A, typename O>
57 ublas::matrix<T, L1, A> element_wise(ublas::matrix<T, L1, A> const &a,
58 ublas::matrix<T, L2, A> const &b,
60 compute::command_queue &queue)
62 ublas::matrix<T, L1, A> result(a.size1(), b.size2());
63 element_wise(a, b, result, op, queue);
67 template <typename T, typename O>
68 void element_wise(ublas::vector<T, opencl::storage> const &a,
69 ublas::vector<T, opencl::storage> const &b,
70 ublas::vector<T, opencl::storage> &result,
72 compute::command_queue& queue)
74 assert(a.device() == b.device() &&
75 a.device() == result.device() &&
76 a.device() == queue.get_device());
77 assert(a.size() == b.size());
78 compute::transform(a.begin(),
87 template <typename T, typename A, typename O>
88 void element_wise(ublas::vector<T, A> const &a,
89 ublas::vector<T, A> const &b,
90 ublas::vector<T, A>& result,
92 compute::command_queue &queue)
94 ublas::vector<T, opencl::storage> adev(a, queue);
95 ublas::vector<T, opencl::storage> bdev(b, queue);
96 ublas::vector<T, opencl::storage> rdev(a.size(), queue.get_context());
97 element_wise(adev, bdev, rdev, op, queue);
98 rdev.to_host(result, queue);
101 template <typename T, typename A, typename O>
102 ublas::vector<T, A> element_wise(ublas::vector<T, A> const &a,
103 ublas::vector<T, A> const &b,
105 compute::command_queue &queue)
107 ublas::vector<T, A> result(a.size());
108 element_wise(a, b, result, op, queue);
112 template <typename T, typename L1, typename L2, typename L3>
113 void element_add(ublas::matrix<T, L1, opencl::storage> const &a,
114 ublas::matrix<T, L2, opencl::storage> const &b,
115 ublas::matrix<T, L3, opencl::storage> &result,
116 compute::command_queue &queue)
118 element_wise(a, b, result, compute::plus<T>(), queue);
121 template <typename T, typename L1, typename L2, typename L3, typename A>
122 void element_add(ublas::matrix<T, L1, A> const &a,
123 ublas::matrix<T, L2, A> const &b,
124 ublas::matrix<T, L3, A> &result,
125 compute::command_queue &queue)
127 element_wise(a, b, result, compute::plus<T>(), queue);
130 template <typename T, typename L1, typename L2, typename A>
131 ublas::matrix<T, L1, A> element_add(ublas::matrix<T, L1, A> const &a,
132 ublas::matrix<T, L2, A> const &b,
133 compute::command_queue &queue)
135 return element_wise(a, b, compute::plus<T>(), queue);
138 template <typename T>
139 void element_add(ublas::vector<T, opencl::storage> const &a,
140 ublas::vector<T, opencl::storage> const &b,
141 ublas::vector<T, opencl::storage> &result,
142 compute::command_queue& queue)
144 element_wise(a, b, result, compute::plus<T>(), queue);
147 template <typename T, typename A>
148 void element_add(ublas::vector<T, A> const &a,
149 ublas::vector<T, A> const &b,
150 ublas::vector<T, A> &result,
151 compute::command_queue &queue)
153 element_wise(a, b, result, compute::plus<T>(), queue);
156 template <typename T, typename A>
157 ublas::vector<T, A> element_add(ublas::vector<T, A> const &a,
158 ublas::vector<T, A> const &b,
159 compute::command_queue &queue)
161 return element_wise(a, b, compute::plus<T>(), queue);
164 template<typename T, typename L>
165 void element_add(ublas::matrix<T, L, opencl::storage> const &m, T value,
166 ublas::matrix<T, L, opencl::storage> &result,
167 compute::command_queue& queue)
169 assert(m.device() == result.device() && m.device() == queue.get_device());
170 assert(m.size1() == result.size1() && m.size2() == result.size2());
171 compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 + value, queue);
175 template<typename T, typename L, typename A>
176 void element_add(ublas::matrix<T, L, A> const &m, T value,
177 ublas::matrix<T, L, A> &result,
178 compute::command_queue& queue)
180 ublas::matrix<T, L, opencl::storage> mdev(m, queue);
181 ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
182 element_add(mdev, value, rdev, queue);
183 rdev.to_host(result, queue);
186 template<typename T, typename L, typename A>
187 ublas::matrix<T, L, A> element_add(ublas::matrix<T, L, A> const &m, T value,
188 compute::command_queue& queue)
190 ublas::matrix<T, L, A> result(m.size1(), m.size2());
191 element_add(m, value, result, queue);
196 void element_add(ublas::vector<T, opencl::storage> const &v, T value,
197 ublas::vector<T, opencl::storage> &result,
198 compute::command_queue& queue)
200 assert(v.device() == result.device() && v.device() == queue.get_device());
201 assert(v.size() == result.size());
202 compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 + value, queue);
206 template<typename T, typename A>
207 void element_add(ublas::vector<T, A> const &v, T value,
208 ublas::vector<T, A> &result,
209 compute::command_queue& queue)
211 ublas::vector<T, opencl::storage> vdev(v, queue);
212 ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
213 element_add(vdev, value, rdev, queue);
214 rdev.to_host(result, queue);
217 template <typename T, typename A>
218 ublas::vector<T, A> element_add(ublas::vector<T, A> const &v, T value,
219 compute::command_queue& queue)
221 ublas::vector<T, A> result(v.size());
222 element_add(v, value, result, queue);
226 template <typename T, typename L1, typename L2, typename L3>
227 void element_sub(ublas::matrix<T, L1, opencl::storage> const &a,
228 ublas::matrix<T, L2, opencl::storage> const &b,
229 ublas::matrix<T, L3, opencl::storage> &result,
230 compute::command_queue& queue)
232 element_wise(a, b, compute::minus<T>(), result, queue);
235 template <typename T, typename L1, typename L2, typename L3, typename A>
236 void element_sub(ublas::matrix<T, L1, A> const &a,
237 ublas::matrix<T, L2, A> const &b,
238 ublas::matrix<T, L3, A> &result,
239 compute::command_queue &queue)
241 element_wise(a, b, result, compute::minus<T>(), queue);
244 template <typename T, typename L1, typename L2, typename A>
245 ublas::matrix<T, L1, A> element_sub(ublas::matrix<T, L1, A> const &a,
246 ublas::matrix<T, L2, A> const &b,
247 compute::command_queue &queue)
249 return element_wise(a, b, compute::minus<T>(), queue);
252 template <typename T>
253 void element_sub(ublas::vector<T, opencl::storage> const &a,
254 ublas::vector<T, opencl::storage> const &b,
255 ublas::vector<T, opencl::storage> &result,
256 compute::command_queue& queue)
258 element_wise(a, b, result, compute::minus<T>(), queue);
261 template <typename T, typename A>
262 void element_sub(ublas::vector<T, A> const &a,
263 ublas::vector<T, A> const &b,
264 ublas::vector<T, A> &result,
265 compute::command_queue &queue)
267 element_wise(a, b, result, compute::minus<T>(), queue);
270 template <typename T, typename A>
271 ublas::vector<T, A> element_sub(ublas::vector<T, A> const &a,
272 ublas::vector<T, A> const &b,
273 compute::command_queue &queue)
275 return element_wise(a, b, compute::minus<T>(), queue);
278 template <typename T, typename L>
279 void element_sub(ublas::matrix<T, L, opencl::storage> const &m, T value,
280 ublas::matrix<T, L, opencl::storage> &result,
281 compute::command_queue& queue)
283 assert(m.device() == result.device() && m.device() == queue.get_device());
284 assert(m.size1() == result.size1() && m.size2() == result.size2());
285 compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 - value, queue);
289 template <typename T, typename L, typename A>
290 void element_sub(ublas::matrix<T, L, A> const &m, T value,
291 ublas::matrix<T, L, A> &result,
292 compute::command_queue& queue)
294 ublas::matrix<T, L, opencl::storage> mdev(m, queue);
295 ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
296 element_sub(mdev, value, rdev, queue);
297 rdev.to_host(result, queue);
300 template <typename T, typename L, typename A>
301 ublas::matrix<T, L, A> element_sub(ublas::matrix<T, L, A> const &m, T value,
302 compute::command_queue& queue)
304 ublas::matrix<T, L, A> result(m.size1(), m.size2());
305 element_sub(m, value, result, queue);
309 template <typename T>
310 void element_sub(ublas::vector<T, opencl::storage> const &v, T value,
311 ublas::vector<T, opencl::storage> &result,
312 compute::command_queue& queue)
314 assert(v.device() == result.device() && v.device() == queue.get_device());
315 assert(v.size() == result.size());
316 compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 - value, queue);
320 template <typename T, typename A>
321 void element_sub(ublas::vector<T, A> const &v, T value,
322 ublas::vector<T, A> &result,
323 compute::command_queue& queue)
325 ublas::vector<T, opencl::storage> vdev(v, queue);
326 ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
327 element_sub(vdev, value, rdev, queue);
328 rdev.to_host(result, queue);
331 template <typename T, typename A>
332 ublas::vector<T, A> element_sub(ublas::vector<T, A> const &v, T value,
333 compute::command_queue& queue)
335 ublas::vector<T, A> result(v.size());
336 element_sub(v, value, result, queue);
340 template <typename T, typename L1, typename L2, typename L3>
341 void element_prod(ublas::matrix<T, L1, opencl::storage> const &a,
342 ublas::matrix<T, L2, opencl::storage> const &b,
343 ublas::matrix<T, L3, opencl::storage> &result,
344 compute::command_queue& queue)
346 element_wise(a, b, result, compute::multiplies<T>(), queue);
349 template <typename T, typename L1, typename L2, typename L3, typename A>
350 void element_prod(ublas::matrix<T, L1, A> const &a,
351 ublas::matrix<T, L2, A> const &b,
352 ublas::matrix<T, L3, A> &result,
353 compute::command_queue &queue)
355 element_wise(a, b, result, compute::multiplies<T>(), queue);
358 template <typename T, typename L1, typename L2, typename A>
359 ublas::matrix<T, L1, A> element_prod(ublas::matrix<T, L1, A> const &a,
360 ublas::matrix<T, L2, A> const &b,
361 compute::command_queue &queue)
363 return element_wise(a, b, compute::multiplies<T>(), queue);
366 template <typename T>
367 void element_prod(ublas::vector<T, opencl::storage> const &a,
368 ublas::vector<T, opencl::storage> const &b,
369 ublas::vector<T, opencl::storage> &result,
370 compute::command_queue& queue)
372 element_wise(a, b, result, compute::multiplies<T>(), queue);
375 template <typename T, typename A>
376 void element_prod(ublas::vector<T, A> const &a,
377 ublas::vector<T, A> const &b,
378 ublas::vector<T, A> &result,
379 compute::command_queue &queue)
381 element_wise(a, b, result, compute::multiplies<T>(), queue);
384 template <typename T, typename A>
385 ublas::vector<T, A> element_prod(ublas::vector<T, A> const &a,
386 ublas::vector<T, A> const &b,
387 compute::command_queue &queue)
389 return element_wise(a, b, compute::multiplies<T>(), queue);
392 template <typename T, typename L>
393 void element_scale(ublas::matrix<T, L, opencl::storage> const &m, T value,
394 ublas::matrix<T, L, opencl::storage> &result,
395 compute::command_queue& queue)
397 assert(m.device() == result.device() && m.device() == queue.get_device());
398 assert(m.size1() == result.size1() && m.size2() == result.size2());
399 compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 * value, queue);
403 template <typename T, typename L, typename A>
404 void element_scale(ublas::matrix<T, L, A> const &m, T value,
405 ublas::matrix<T, L, A> &result,
406 compute::command_queue& queue)
408 ublas::matrix<T, L, opencl::storage> mdev(m, queue);
409 ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
410 element_scale(mdev, value, rdev, queue);
411 rdev.to_host(result, queue);
414 template <typename T, typename L, typename A>
415 ublas::matrix<T, L, A> element_scale(ublas::matrix<T, L, A> const &m, T value,
416 compute::command_queue& queue)
418 ublas::matrix<T, L, A> result(m.size1(), m.size2());
419 element_scale(m, value, result, queue);
423 template <typename T>
424 void element_scale(ublas::vector<T, opencl::storage> const &v, T value,
425 ublas::vector<T, opencl::storage> &result,
426 compute::command_queue& queue)
428 assert(v.device() == result.device() && v.device() == queue.get_device());
429 assert(v.size() == result.size());
430 compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 * value, queue);
434 template <typename T, typename A>
435 void element_scale(ublas::vector<T, A> const &v, T value,
436 ublas::vector<T, A> & result,
437 compute::command_queue& queue)
439 ublas::vector<T, opencl::storage> vdev(v, queue);
440 ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
441 element_scale(vdev, value, rdev, queue);
442 rdev.to_host(result, queue);
445 template <typename T, typename A>
446 ublas::vector<T,A> element_scale(ublas::vector<T, A> const &v, T value,
447 compute::command_queue& queue)
449 ublas::vector<T, A> result(v.size());
450 element_scale(v, value, result, queue);
454 template <typename T, typename L1, typename L2, typename L3>
455 void element_div(ublas::matrix<T, L1, opencl::storage> const &a,
456 ublas::matrix<T, L2, opencl::storage> const &b,
457 ublas::matrix<T, L3, opencl::storage> &result,
458 compute::command_queue& queue)
460 element_wise(a, b, result, compute::divides<T>(), queue);
463 template <typename T, typename L1, typename L2, typename L3, typename A>
464 void element_div(ublas::matrix<T, L1, A> const &a,
465 ublas::matrix<T, L2, A> const &b,
466 ublas::matrix<T, L3, A> &result,
467 compute::command_queue &queue)
469 element_wise(a, b, result, compute::divides<T>(), queue);
472 template <typename T, typename L1, typename L2, typename A>
473 ublas::matrix<T, L1, A> element_div(ublas::matrix<T, L1, A> const &a,
474 ublas::matrix<T, L2, A> const &b,
475 compute::command_queue &queue)
477 return element_wise(a, b, compute::divides<T>(), queue);
480 template <typename T>
481 void element_div(ublas::vector<T, opencl::storage> const &a,
482 ublas::vector<T, opencl::storage> const &b,
483 ublas::vector<T, opencl::storage> &result,
484 compute::command_queue& queue)
486 element_wise(a, b, result, compute::divides<T>(), queue);
489 template <typename T, typename A>
490 void element_div(ublas::vector<T, A> const &a,
491 ublas::vector<T, A> const &b,
492 ublas::vector<T, A> &result,
493 compute::command_queue &queue)
495 element_wise(a, b, result, compute::divides<T>(), queue);
498 template <typename T, typename A>
499 ublas::vector<T, A> element_div(ublas::vector<T, A> const &a,
500 ublas::vector<T, A> const &b,
501 compute::command_queue &queue)
503 return element_wise(a, b, compute::divides<T>(), queue);