]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/compute/algorithm/copy.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / compute / algorithm / copy.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
12 #define BOOST_COMPUTE_ALGORITHM_COPY_HPP
13
14 #include <algorithm>
15 #include <iterator>
16
17 #include <boost/utility/enable_if.hpp>
18
19 #include <boost/mpl/and.hpp>
20 #include <boost/mpl/not.hpp>
21 #include <boost/mpl/or.hpp>
22
23 #include <boost/compute/buffer.hpp>
24 #include <boost/compute/system.hpp>
25 #include <boost/compute/command_queue.hpp>
26 #include <boost/compute/algorithm/detail/copy_on_device.hpp>
27 #include <boost/compute/algorithm/detail/copy_to_device.hpp>
28 #include <boost/compute/algorithm/detail/copy_to_host.hpp>
29 #include <boost/compute/async/future.hpp>
30 #include <boost/compute/container/mapped_view.hpp>
31 #include <boost/compute/detail/device_ptr.hpp>
32 #include <boost/compute/detail/is_contiguous_iterator.hpp>
33 #include <boost/compute/detail/iterator_range_size.hpp>
34 #include <boost/compute/detail/parameter_cache.hpp>
35 #include <boost/compute/iterator/buffer_iterator.hpp>
36 #include <boost/compute/type_traits/type_name.hpp>
37 #include <boost/compute/type_traits/is_device_iterator.hpp>
38
39 namespace boost {
40 namespace compute {
41 namespace detail {
42
43 namespace mpl = boost::mpl;
44
45 // meta-function returning true if copy() between InputIterator and
46 // OutputIterator can be implemented with clEnqueueCopyBuffer().
47 template<class InputIterator, class OutputIterator>
48 struct can_copy_with_copy_buffer :
49 mpl::and_<
50 mpl::or_<
51 boost::is_same<
52 InputIterator,
53 buffer_iterator<typename InputIterator::value_type>
54 >,
55 boost::is_same<
56 InputIterator,
57 detail::device_ptr<typename InputIterator::value_type>
58 >
59 >,
60 mpl::or_<
61 boost::is_same<
62 OutputIterator,
63 buffer_iterator<typename OutputIterator::value_type>
64 >,
65 boost::is_same<
66 OutputIterator,
67 detail::device_ptr<typename OutputIterator::value_type>
68 >
69 >,
70 boost::is_same<
71 typename InputIterator::value_type,
72 typename OutputIterator::value_type
73 >
74 >::type {};
75
76 // meta-function returning true if value_types of HostIterator and
77 // DeviceIterator are same
78 template<class HostIterator, class DeviceIterator>
79 struct is_same_value_type :
80 boost::is_same<
81 typename boost::remove_cv<
82 typename std::iterator_traits<HostIterator>::value_type
83 >::type,
84 typename boost::remove_cv<
85 typename DeviceIterator::value_type
86 >::type
87 >::type {};
88
89 // meta-function returning true if value_type of HostIterator is bool
90 template<class HostIterator>
91 struct is_bool_value_type :
92 boost::is_same<
93 typename boost::remove_cv<
94 typename std::iterator_traits<HostIterator>::value_type
95 >::type,
96 bool
97 >::type {};
98
99 // host -> device (async)
100 template<class InputIterator, class OutputIterator>
101 inline future<OutputIterator>
102 dispatch_copy_async(InputIterator first,
103 InputIterator last,
104 OutputIterator result,
105 command_queue &queue,
106 const wait_list &events,
107 typename boost::enable_if<
108 mpl::and_<
109 mpl::not_<
110 is_device_iterator<InputIterator>
111 >,
112 is_device_iterator<OutputIterator>,
113 is_same_value_type<InputIterator, OutputIterator>
114 >
115 >::type* = 0)
116 {
117 BOOST_STATIC_ASSERT_MSG(
118 is_contiguous_iterator<InputIterator>::value,
119 "copy_async() is only supported for contiguous host iterators"
120 );
121
122 return copy_to_device_async(first, last, result, queue, events);
123 }
124
125 // host -> device (async)
126 // Type mismatch between InputIterator and OutputIterator value_types
127 template<class InputIterator, class OutputIterator>
128 inline future<OutputIterator>
129 dispatch_copy_async(InputIterator first,
130 InputIterator last,
131 OutputIterator result,
132 command_queue &queue,
133 const wait_list &events,
134 typename boost::enable_if<
135 mpl::and_<
136 mpl::not_<
137 is_device_iterator<InputIterator>
138 >,
139 is_device_iterator<OutputIterator>,
140 mpl::not_<
141 is_same_value_type<InputIterator, OutputIterator>
142 >
143 >
144 >::type* = 0)
145 {
146 BOOST_STATIC_ASSERT_MSG(
147 is_contiguous_iterator<InputIterator>::value,
148 "copy_async() is only supported for contiguous host iterators"
149 );
150
151 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
152
153 const context &context = queue.get_context();
154 size_t count = iterator_range_size(first, last);
155
156 if(count < size_t(1)) {
157 return future<OutputIterator>();
158 }
159
160 // map [first; last) to device and run copy kernel
161 // on device for copying & casting
162 ::boost::compute::mapped_view<input_type> mapped_host(
163 // make sure it's a pointer to constant data
164 // to force read only mapping
165 const_cast<const input_type*>(
166 ::boost::addressof(*first)
167 ),
168 count,
169 context
170 );
171 return copy_on_device_async(
172 mapped_host.begin(), mapped_host.end(), result, queue, events
173 );
174 }
175
176 // host -> device
177 // InputIterator is a contiguous iterator
178 template<class InputIterator, class OutputIterator>
179 inline OutputIterator
180 dispatch_copy(InputIterator first,
181 InputIterator last,
182 OutputIterator result,
183 command_queue &queue,
184 const wait_list &events,
185 typename boost::enable_if<
186 mpl::and_<
187 mpl::not_<
188 is_device_iterator<InputIterator>
189 >,
190 is_device_iterator<OutputIterator>,
191 is_same_value_type<InputIterator, OutputIterator>,
192 is_contiguous_iterator<InputIterator>
193 >
194 >::type* = 0)
195 {
196 return copy_to_device(first, last, result, queue, events);
197 }
198
199 // host -> device
200 // Type mismatch between InputIterator and OutputIterator value_types
201 // InputIterator is a contiguous iterator
202 template<class InputIterator, class OutputIterator>
203 inline OutputIterator
204 dispatch_copy(InputIterator first,
205 InputIterator last,
206 OutputIterator result,
207 command_queue &queue,
208 const wait_list &events,
209 typename boost::enable_if<
210 mpl::and_<
211 mpl::not_<
212 is_device_iterator<InputIterator>
213 >,
214 is_device_iterator<OutputIterator>,
215 mpl::not_<
216 is_same_value_type<InputIterator, OutputIterator>
217 >,
218 is_contiguous_iterator<InputIterator>
219 >
220 >::type* = 0)
221 {
222 typedef typename OutputIterator::value_type output_type;
223 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
224
225 const device &device = queue.get_device();
226
227 // loading parameters
228 std::string cache_key =
229 std::string("__boost_compute_copy_to_device_")
230 + type_name<input_type>() + "_" + type_name<output_type>();
231 boost::shared_ptr<parameter_cache> parameters =
232 detail::parameter_cache::get_global_cache(device);
233
234 uint_ map_copy_threshold;
235 uint_ direct_copy_threshold;
236
237 // calculate default values of thresholds
238 if (device.type() & device::gpu) {
239 // GPUs
240 map_copy_threshold = 524288; // 0.5 MB
241 direct_copy_threshold = 52428800; // 50 MB
242 }
243 else {
244 // CPUs and other devices
245 map_copy_threshold = 134217728; // 128 MB
246 direct_copy_threshold = 0; // it's never efficient for CPUs
247 }
248
249 // load thresholds
250 map_copy_threshold =
251 parameters->get(
252 cache_key, "map_copy_threshold", map_copy_threshold
253 );
254 direct_copy_threshold =
255 parameters->get(
256 cache_key, "direct_copy_threshold", direct_copy_threshold
257 );
258
259 // select copy method based on thresholds & input_size_bytes
260 size_t count = iterator_range_size(first, last);
261 size_t input_size_bytes = count * sizeof(input_type);
262
263 // [0; map_copy_threshold) -> copy_to_device_map()
264 if(input_size_bytes < map_copy_threshold) {
265 return copy_to_device_map(first, last, result, queue, events);
266 }
267 // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
268 // on host and then perform copy_to_device()
269 else if(input_size_bytes < direct_copy_threshold) {
270 std::vector<output_type> vector(first, last);
271 return copy_to_device(
272 vector.begin(), vector.end(), result, queue, events
273 );
274 }
275
276 // [direct_copy_threshold; inf) -> map [first; last) to device and
277 // run copy kernel on device for copying & casting
278 // At this point we are sure that count > 1 (first != last).
279
280 // Perform async copy to device, wait for it to be finished and
281 // return the result.
282 // At this point we are sure that count > 1 (first != last), so event
283 // returned by dispatch_copy_async() must be valid.
284 return dispatch_copy_async(first, last, result, queue, events).get();
285 }
286
287 // host -> device
288 // InputIterator is NOT a contiguous iterator
289 template<class InputIterator, class OutputIterator>
290 inline OutputIterator
291 dispatch_copy(InputIterator first,
292 InputIterator last,
293 OutputIterator result,
294 command_queue &queue,
295 const wait_list &events,
296 typename boost::enable_if<
297 mpl::and_<
298 mpl::not_<
299 is_device_iterator<InputIterator>
300 >,
301 is_device_iterator<OutputIterator>,
302 mpl::not_<
303 is_contiguous_iterator<InputIterator>
304 >
305 >
306 >::type* = 0)
307 {
308 typedef typename OutputIterator::value_type output_type;
309 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
310
311 const device &device = queue.get_device();
312
313 // loading parameters
314 std::string cache_key =
315 std::string("__boost_compute_copy_to_device_")
316 + type_name<input_type>() + "_" + type_name<output_type>();
317 boost::shared_ptr<parameter_cache> parameters =
318 detail::parameter_cache::get_global_cache(device);
319
320 uint_ map_copy_threshold;
321 uint_ direct_copy_threshold;
322
323 // calculate default values of thresholds
324 if (device.type() & device::gpu) {
325 // GPUs
326 map_copy_threshold = 524288; // 0.5 MB
327 direct_copy_threshold = 52428800; // 50 MB
328 }
329 else {
330 // CPUs and other devices
331 map_copy_threshold = 134217728; // 128 MB
332 direct_copy_threshold = 0; // it's never efficient for CPUs
333 }
334
335 // load thresholds
336 map_copy_threshold =
337 parameters->get(
338 cache_key, "map_copy_threshold", map_copy_threshold
339 );
340 direct_copy_threshold =
341 parameters->get(
342 cache_key, "direct_copy_threshold", direct_copy_threshold
343 );
344
345 // select copy method based on thresholds & input_size_bytes
346 size_t input_size = iterator_range_size(first, last);
347 size_t input_size_bytes = input_size * sizeof(input_type);
348
349 // [0; map_copy_threshold) -> copy_to_device_map()
350 //
351 // if direct_copy_threshold is less than map_copy_threshold
352 // copy_to_device_map() is used for every input
353 if(input_size_bytes < map_copy_threshold
354 || direct_copy_threshold <= map_copy_threshold) {
355 return copy_to_device_map(first, last, result, queue, events);
356 }
357 // [map_copy_threshold; inf) -> convert [first; last)
358 // on host and then perform copy_to_device()
359 std::vector<output_type> vector(first, last);
360 return copy_to_device(vector.begin(), vector.end(), result, queue, events);
361 }
362
363 // device -> host (async)
364 template<class InputIterator, class OutputIterator>
365 inline future<OutputIterator>
366 dispatch_copy_async(InputIterator first,
367 InputIterator last,
368 OutputIterator result,
369 command_queue &queue,
370 const wait_list &events,
371 typename boost::enable_if<
372 mpl::and_<
373 is_device_iterator<InputIterator>,
374 mpl::not_<
375 is_device_iterator<OutputIterator>
376 >,
377 is_same_value_type<OutputIterator, InputIterator>
378 >
379 >::type* = 0)
380 {
381 BOOST_STATIC_ASSERT_MSG(
382 is_contiguous_iterator<OutputIterator>::value,
383 "copy_async() is only supported for contiguous host iterators"
384 );
385
386 return copy_to_host_async(first, last, result, queue, events);
387 }
388
389 // device -> host (async)
390 // Type mismatch between InputIterator and OutputIterator value_types
391 template<class InputIterator, class OutputIterator>
392 inline future<OutputIterator>
393 dispatch_copy_async(InputIterator first,
394 InputIterator last,
395 OutputIterator result,
396 command_queue &queue,
397 const wait_list &events,
398 typename boost::enable_if<
399 mpl::and_<
400 is_device_iterator<InputIterator>,
401 mpl::not_<
402 is_device_iterator<OutputIterator>
403 >,
404 mpl::not_<
405 is_same_value_type<OutputIterator, InputIterator>
406 >
407 >
408 >::type* = 0)
409 {
410 BOOST_STATIC_ASSERT_MSG(
411 is_contiguous_iterator<OutputIterator>::value,
412 "copy_async() is only supported for contiguous host iterators"
413 );
414
415 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
416 const context &context = queue.get_context();
417 size_t count = iterator_range_size(first, last);
418
419 if(count < size_t(1)) {
420 return future<OutputIterator>();
421 }
422
423 // map host memory to device
424 buffer mapped_host(
425 context,
426 count * sizeof(output_type),
427 buffer::write_only | buffer::use_host_ptr,
428 static_cast<void*>(
429 ::boost::addressof(*result)
430 )
431 );
432 // copy async on device
433 ::boost::compute::future<buffer_iterator<output_type> > future =
434 copy_on_device_async(
435 first,
436 last,
437 make_buffer_iterator<output_type>(mapped_host),
438 queue,
439 events
440 );
441 // update host memory asynchronously by maping and unmaping memory
442 event map_event;
443 void* ptr = queue.enqueue_map_buffer_async(
444 mapped_host,
445 CL_MAP_READ,
446 0,
447 count * sizeof(output_type),
448 map_event,
449 future.get_event()
450 );
451 event unmap_event =
452 queue.enqueue_unmap_buffer(mapped_host, ptr, map_event);
453 return make_future(result + count, unmap_event);
454 }
455
456 // device -> host
457 // OutputIterator is a contiguous iterator
458 template<class InputIterator, class OutputIterator>
459 inline OutputIterator
460 dispatch_copy(InputIterator first,
461 InputIterator last,
462 OutputIterator result,
463 command_queue &queue,
464 const wait_list &events,
465 typename boost::enable_if<
466 mpl::and_<
467 is_device_iterator<InputIterator>,
468 mpl::not_<
469 is_device_iterator<OutputIterator>
470 >,
471 is_same_value_type<OutputIterator, InputIterator>,
472 is_contiguous_iterator<OutputIterator>,
473 mpl::not_<
474 is_bool_value_type<OutputIterator>
475 >
476 >
477 >::type* = 0)
478 {
479 return copy_to_host(first, last, result, queue, events);
480 }
481
482 // device -> host
483 // Type mismatch between InputIterator and OutputIterator value_types
484 // OutputIterator is NOT a contiguous iterator or value_type of OutputIterator
485 // is a boolean type.
486 template<class InputIterator, class OutputIterator>
487 inline OutputIterator
488 dispatch_copy(InputIterator first,
489 InputIterator last,
490 OutputIterator result,
491 command_queue &queue,
492 const wait_list &events,
493 typename boost::enable_if<
494 mpl::and_<
495 is_device_iterator<InputIterator>,
496 mpl::not_<
497 is_device_iterator<OutputIterator>
498 >,
499 mpl::or_<
500 mpl::not_<
501 is_contiguous_iterator<OutputIterator>
502 >,
503 is_bool_value_type<OutputIterator>
504 >
505 >
506 >::type* = 0)
507 {
508 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
509 typedef typename InputIterator::value_type input_type;
510
511 const device &device = queue.get_device();
512
513 // loading parameters
514 std::string cache_key =
515 std::string("__boost_compute_copy_to_host_")
516 + type_name<input_type>() + "_" + type_name<output_type>();
517 boost::shared_ptr<parameter_cache> parameters =
518 detail::parameter_cache::get_global_cache(device);
519
520 uint_ map_copy_threshold;
521 uint_ direct_copy_threshold;
522
523 // calculate default values of thresholds
524 if (device.type() & device::gpu) {
525 // GPUs
526 map_copy_threshold = 33554432; // 30 MB
527 direct_copy_threshold = 0; // it's never efficient for GPUs
528 }
529 else {
530 // CPUs and other devices
531 map_copy_threshold = 134217728; // 128 MB
532 direct_copy_threshold = 0; // it's never efficient for CPUs
533 }
534
535 // load thresholds
536 map_copy_threshold =
537 parameters->get(
538 cache_key, "map_copy_threshold", map_copy_threshold
539 );
540 direct_copy_threshold =
541 parameters->get(
542 cache_key, "direct_copy_threshold", direct_copy_threshold
543 );
544
545 // select copy method based on thresholds & input_size_bytes
546 size_t count = iterator_range_size(first, last);
547 size_t input_size_bytes = count * sizeof(input_type);
548
549 // [0; map_copy_threshold) -> copy_to_host_map()
550 //
551 // if direct_copy_threshold is less than map_copy_threshold
552 // copy_to_host_map() is used for every input
553 if(input_size_bytes < map_copy_threshold
554 || direct_copy_threshold <= map_copy_threshold) {
555 return copy_to_host_map(first, last, result, queue, events);
556 }
557 // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
558 // then copy (and convert) to result using std::copy()
559 std::vector<input_type> vector(count);
560 copy_to_host(first, last, vector.begin(), queue, events);
561 return std::copy(vector.begin(), vector.end(), result);
562 }
563
564 // device -> host
565 // Type mismatch between InputIterator and OutputIterator value_types
566 // OutputIterator is a contiguous iterator
567 // value_type of OutputIterator is NOT a boolean type
568 template<class InputIterator, class OutputIterator>
569 inline OutputIterator
570 dispatch_copy(InputIterator first,
571 InputIterator last,
572 OutputIterator result,
573 command_queue &queue,
574 const wait_list &events,
575 typename boost::enable_if<
576 mpl::and_<
577 is_device_iterator<InputIterator>,
578 mpl::not_<
579 is_device_iterator<OutputIterator>
580 >,
581 mpl::not_<
582 is_same_value_type<OutputIterator, InputIterator>
583 >,
584 is_contiguous_iterator<OutputIterator>,
585 mpl::not_<
586 is_bool_value_type<OutputIterator>
587 >
588 >
589 >::type* = 0)
590 {
591 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
592 typedef typename InputIterator::value_type input_type;
593
594 const device &device = queue.get_device();
595
596 // loading parameters
597 std::string cache_key =
598 std::string("__boost_compute_copy_to_host_")
599 + type_name<input_type>() + "_" + type_name<output_type>();
600 boost::shared_ptr<parameter_cache> parameters =
601 detail::parameter_cache::get_global_cache(device);
602
603 uint_ map_copy_threshold;
604 uint_ direct_copy_threshold;
605
606 // calculate default values of thresholds
607 if (device.type() & device::gpu) {
608 // GPUs
609 map_copy_threshold = 524288; // 0.5 MB
610 direct_copy_threshold = 52428800; // 50 MB
611 }
612 else {
613 // CPUs and other devices
614 map_copy_threshold = 134217728; // 128 MB
615 direct_copy_threshold = 0; // it's never efficient for CPUs
616 }
617
618 // load thresholds
619 map_copy_threshold =
620 parameters->get(
621 cache_key, "map_copy_threshold", map_copy_threshold
622 );
623 direct_copy_threshold =
624 parameters->get(
625 cache_key, "direct_copy_threshold", direct_copy_threshold
626 );
627
628 // select copy method based on thresholds & input_size_bytes
629 size_t count = iterator_range_size(first, last);
630 size_t input_size_bytes = count * sizeof(input_type);
631
632 // [0; map_copy_threshold) -> copy_to_host_map()
633 if(input_size_bytes < map_copy_threshold) {
634 return copy_to_host_map(first, last, result, queue, events);
635 }
636 // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
637 // temporary vector then copy (and convert) to result using std::copy()
638 else if(input_size_bytes < direct_copy_threshold) {
639 std::vector<input_type> vector(count);
640 copy_to_host(first, last, vector.begin(), queue, events);
641 return std::copy(vector.begin(), vector.end(), result);
642 }
643
644 // [direct_copy_threshold; inf) -> map [result; result + input_size) to
645 // device and run copy kernel on device for copying & casting
646 // map host memory to device.
647
648 // Perform async copy to host, wait for it to be finished and
649 // return the result.
650 // At this point we are sure that count > 1 (first != last), so event
651 // returned by dispatch_copy_async() must be valid.
652 return dispatch_copy_async(first, last, result, queue, events).get();
653 }
654
655 // device -> device
656 template<class InputIterator, class OutputIterator>
657 inline OutputIterator
658 dispatch_copy(InputIterator first,
659 InputIterator last,
660 OutputIterator result,
661 command_queue &queue,
662 const wait_list &events,
663 typename boost::enable_if<
664 mpl::and_<
665 is_device_iterator<InputIterator>,
666 is_device_iterator<OutputIterator>,
667 mpl::not_<
668 can_copy_with_copy_buffer<
669 InputIterator, OutputIterator
670 >
671 >
672 >
673 >::type* = 0)
674 {
675 return copy_on_device(first, last, result, queue, events);
676 }
677
678 // device -> device (specialization for buffer iterators)
679 template<class InputIterator, class OutputIterator>
680 inline OutputIterator
681 dispatch_copy(InputIterator first,
682 InputIterator last,
683 OutputIterator result,
684 command_queue &queue,
685 const wait_list &events,
686 typename boost::enable_if<
687 mpl::and_<
688 is_device_iterator<InputIterator>,
689 is_device_iterator<OutputIterator>,
690 can_copy_with_copy_buffer<
691 InputIterator, OutputIterator
692 >
693 >
694 >::type* = 0)
695 {
696 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
697 typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
698
699 difference_type n = std::distance(first, last);
700 if(n < 1){
701 // nothing to copy
702 return result;
703 }
704
705 queue.enqueue_copy_buffer(first.get_buffer(),
706 result.get_buffer(),
707 first.get_index() * sizeof(value_type),
708 result.get_index() * sizeof(value_type),
709 static_cast<size_t>(n) * sizeof(value_type),
710 events);
711 return result + n;
712 }
713
714 // device -> device (async)
715 template<class InputIterator, class OutputIterator>
716 inline future<OutputIterator>
717 dispatch_copy_async(InputIterator first,
718 InputIterator last,
719 OutputIterator result,
720 command_queue &queue,
721 const wait_list &events,
722 typename boost::enable_if<
723 mpl::and_<
724 is_device_iterator<InputIterator>,
725 is_device_iterator<OutputIterator>,
726 mpl::not_<
727 can_copy_with_copy_buffer<
728 InputIterator, OutputIterator
729 >
730 >
731 >
732 >::type* = 0)
733 {
734 return copy_on_device_async(first, last, result, queue, events);
735 }
736
737 // device -> device (async, specialization for buffer iterators)
738 template<class InputIterator, class OutputIterator>
739 inline future<OutputIterator>
740 dispatch_copy_async(InputIterator first,
741 InputIterator last,
742 OutputIterator result,
743 command_queue &queue,
744 const wait_list &events,
745 typename boost::enable_if<
746 mpl::and_<
747 is_device_iterator<InputIterator>,
748 is_device_iterator<OutputIterator>,
749 can_copy_with_copy_buffer<
750 InputIterator, OutputIterator
751 >
752 >
753 >::type* = 0)
754 {
755 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
756 typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
757
758 difference_type n = std::distance(first, last);
759 if(n < 1){
760 // nothing to copy
761 return make_future(result, event());
762 }
763
764 event event_ =
765 queue.enqueue_copy_buffer(
766 first.get_buffer(),
767 result.get_buffer(),
768 first.get_index() * sizeof(value_type),
769 result.get_index() * sizeof(value_type),
770 static_cast<size_t>(n) * sizeof(value_type),
771 events
772 );
773
774 return make_future(result + n, event_);
775 }
776
777 // host -> host
778 template<class InputIterator, class OutputIterator>
779 inline OutputIterator
780 dispatch_copy(InputIterator first,
781 InputIterator last,
782 OutputIterator result,
783 command_queue &queue,
784 const wait_list &events,
785 typename boost::enable_if_c<
786 !is_device_iterator<InputIterator>::value &&
787 !is_device_iterator<OutputIterator>::value
788 >::type* = 0)
789 {
790 (void) queue;
791 (void) events;
792
793 return std::copy(first, last, result);
794 }
795
796 } // end detail namespace
797
798 /// Copies the values in the range [\p first, \p last) to the range
799 /// beginning at \p result.
800 ///
801 /// The generic copy() function can be used for a variety of data
802 /// transfer tasks and provides a standard interface to the following
803 /// OpenCL functions:
804 ///
805 /// \li \c clEnqueueReadBuffer()
806 /// \li \c clEnqueueWriteBuffer()
807 /// \li \c clEnqueueCopyBuffer()
808 ///
809 /// Unlike the aforementioned OpenCL functions, copy() will also work
810 /// with non-contiguous data-structures (e.g. \c std::list<T>) as
811 /// well as with "fancy" iterators (e.g. transform_iterator).
812 ///
813 /// \param first first element in the range to copy
814 /// \param last last element in the range to copy
815 /// \param result first element in the result range
816 /// \param queue command queue to perform the operation
817 ///
818 /// \return \c OutputIterator to the end of the result range
819 ///
820 /// For example, to copy an array of \c int values on the host to a vector on
821 /// the device:
822 /// \code
823 /// // array on the host
824 /// int data[] = { 1, 2, 3, 4 };
825 ///
826 /// // vector on the device
827 /// boost::compute::vector<int> vec(4, context);
828 ///
829 /// // copy values to the device vector
830 /// boost::compute::copy(data, data + 4, vec.begin(), queue);
831 /// \endcode
832 ///
833 /// The copy algorithm can also be used with standard containers such as
834 /// \c std::vector<T>:
835 /// \code
836 /// std::vector<int> host_vector = ...
837 /// boost::compute::vector<int> device_vector = ...
838 ///
839 /// // copy from the host to the device
840 /// boost::compute::copy(
841 /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
842 /// );
843 ///
844 /// // copy from the device to the host
845 /// boost::compute::copy(
846 /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
847 /// );
848 /// \endcode
849 ///
850 /// Space complexity: \Omega(1)
851 ///
852 /// \see copy_n(), copy_if(), copy_async()
853 template<class InputIterator, class OutputIterator>
854 inline OutputIterator copy(InputIterator first,
855 InputIterator last,
856 OutputIterator result,
857 command_queue &queue = system::default_queue(),
858 const wait_list &events = wait_list())
859 {
860 return detail::dispatch_copy(first, last, result, queue, events);
861 }
862
863 /// Copies the values in the range [\p first, \p last) to the range
864 /// beginning at \p result. The copy is performed asynchronously.
865 ///
866 /// \see copy()
867 template<class InputIterator, class OutputIterator>
868 inline future<OutputIterator>
869 copy_async(InputIterator first,
870 InputIterator last,
871 OutputIterator result,
872 command_queue &queue = system::default_queue(),
873 const wait_list &events = wait_list())
874 {
875 return detail::dispatch_copy_async(first, last, result, queue, events);
876 }
877
878 } // end compute namespace
879 } // end boost namespace
880
881 #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP