]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/compute/algorithm/copy.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / compute / algorithm / copy.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
12 #define BOOST_COMPUTE_ALGORITHM_COPY_HPP
13
14 #include <algorithm>
15 #include <iterator>
16
17 #include <boost/utility/enable_if.hpp>
18
19 #include <boost/mpl/and.hpp>
20 #include <boost/mpl/not.hpp>
21 #include <boost/mpl/or.hpp>
22
23 #include <boost/compute/buffer.hpp>
24 #include <boost/compute/system.hpp>
25 #include <boost/compute/command_queue.hpp>
26 #include <boost/compute/algorithm/detail/copy_on_device.hpp>
27 #include <boost/compute/algorithm/detail/copy_to_device.hpp>
28 #include <boost/compute/algorithm/detail/copy_to_host.hpp>
29 #include <boost/compute/async/future.hpp>
30 #include <boost/compute/container/mapped_view.hpp>
31 #include <boost/compute/detail/device_ptr.hpp>
32 #include <boost/compute/detail/is_contiguous_iterator.hpp>
33 #include <boost/compute/detail/iterator_range_size.hpp>
34 #include <boost/compute/detail/parameter_cache.hpp>
35 #include <boost/compute/iterator/buffer_iterator.hpp>
36 #include <boost/compute/type_traits/type_name.hpp>
37 #include <boost/compute/type_traits/is_device_iterator.hpp>
38
39 namespace boost {
40 namespace compute {
41 namespace detail {
42
43 namespace mpl = boost::mpl;
44
45 // meta-function returning true if copy() between InputIterator and
46 // OutputIterator can be implemented with clEnqueueCopyBuffer().
47 template<class InputIterator, class OutputIterator>
48 struct can_copy_with_copy_buffer :
49 mpl::and_<
50 mpl::or_<
51 boost::is_same<
52 InputIterator,
53 buffer_iterator<typename InputIterator::value_type>
54 >,
55 boost::is_same<
56 InputIterator,
57 detail::device_ptr<typename InputIterator::value_type>
58 >
59 >,
60 mpl::or_<
61 boost::is_same<
62 OutputIterator,
63 buffer_iterator<typename OutputIterator::value_type>
64 >,
65 boost::is_same<
66 OutputIterator,
67 detail::device_ptr<typename OutputIterator::value_type>
68 >
69 >,
70 boost::is_same<
71 typename InputIterator::value_type,
72 typename OutputIterator::value_type
73 >
74 >::type {};
75
76 // meta-function returning true if value_types of HostIterator and
77 // DeviceIterator are same
78 template<class HostIterator, class DeviceIterator>
79 struct is_same_value_type :
80 boost::is_same<
81 typename boost::remove_cv<
82 typename std::iterator_traits<HostIterator>::value_type
83 >::type,
84 typename boost::remove_cv<
85 typename DeviceIterator::value_type
86 >::type
87 >::type {};
88
89 // meta-function returning true if value_type of HostIterator is bool
90 template<class HostIterator>
91 struct is_bool_value_type :
92 boost::is_same<
93 typename boost::remove_cv<
94 typename std::iterator_traits<HostIterator>::value_type
95 >::type,
96 bool
97 >::type {};
98
99 // host -> device (async)
100 template<class InputIterator, class OutputIterator>
101 inline future<OutputIterator>
102 dispatch_copy_async(InputIterator first,
103 InputIterator last,
104 OutputIterator result,
105 command_queue &queue,
106 typename boost::enable_if<
107 mpl::and_<
108 mpl::not_<
109 is_device_iterator<InputIterator>
110 >,
111 is_device_iterator<OutputIterator>,
112 is_same_value_type<InputIterator, OutputIterator>
113 >
114 >::type* = 0)
115 {
116 BOOST_STATIC_ASSERT_MSG(
117 is_contiguous_iterator<InputIterator>::value,
118 "copy_async() is only supported for contiguous host iterators"
119 );
120
121 return copy_to_device_async(first, last, result, queue);
122 }
123
124 // host -> device (async)
125 // Type mismatch between InputIterator and OutputIterator value_types
126 template<class InputIterator, class OutputIterator>
127 inline future<OutputIterator>
128 dispatch_copy_async(InputIterator first,
129 InputIterator last,
130 OutputIterator result,
131 command_queue &queue,
132 typename boost::enable_if<
133 mpl::and_<
134 mpl::not_<
135 is_device_iterator<InputIterator>
136 >,
137 is_device_iterator<OutputIterator>,
138 mpl::not_<
139 is_same_value_type<InputIterator, OutputIterator>
140 >
141 >
142 >::type* = 0)
143 {
144 BOOST_STATIC_ASSERT_MSG(
145 is_contiguous_iterator<InputIterator>::value,
146 "copy_async() is only supported for contiguous host iterators"
147 );
148
149 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
150
151 const context &context = queue.get_context();
152 size_t count = iterator_range_size(first, last);
153
154 if(count < size_t(1)) {
155 return future<OutputIterator>();
156 }
157
158 // map [first; last) to device and run copy kernel
159 // on device for copying & casting
160 ::boost::compute::mapped_view<input_type> mapped_host(
161 // make sure it's a pointer to constant data
162 // to force read only mapping
163 const_cast<const input_type*>(
164 ::boost::addressof(*first)
165 ),
166 count,
167 context
168 );
169 return copy_on_device_async(
170 mapped_host.begin(), mapped_host.end(), result, queue
171 );
172 }
173
174 // host -> device
175 // InputIterator is a contiguous iterator
176 template<class InputIterator, class OutputIterator>
177 inline OutputIterator
178 dispatch_copy(InputIterator first,
179 InputIterator last,
180 OutputIterator result,
181 command_queue &queue,
182 typename boost::enable_if<
183 mpl::and_<
184 mpl::not_<
185 is_device_iterator<InputIterator>
186 >,
187 is_device_iterator<OutputIterator>,
188 is_same_value_type<InputIterator, OutputIterator>,
189 is_contiguous_iterator<InputIterator>
190 >
191 >::type* = 0)
192 {
193 return copy_to_device(first, last, result, queue);
194 }
195
196 // host -> device
197 // Type mismatch between InputIterator and OutputIterator value_types
198 // InputIterator is a contiguous iterator
199 template<class InputIterator, class OutputIterator>
200 inline OutputIterator
201 dispatch_copy(InputIterator first,
202 InputIterator last,
203 OutputIterator result,
204 command_queue &queue,
205 typename boost::enable_if<
206 mpl::and_<
207 mpl::not_<
208 is_device_iterator<InputIterator>
209 >,
210 is_device_iterator<OutputIterator>,
211 mpl::not_<
212 is_same_value_type<InputIterator, OutputIterator>
213 >,
214 is_contiguous_iterator<InputIterator>
215 >
216 >::type* = 0)
217 {
218 typedef typename OutputIterator::value_type output_type;
219 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
220
221 const device &device = queue.get_device();
222
223 // loading parameters
224 std::string cache_key =
225 std::string("__boost_compute_copy_to_device_")
226 + type_name<input_type>() + "_" + type_name<output_type>();
227 boost::shared_ptr<parameter_cache> parameters =
228 detail::parameter_cache::get_global_cache(device);
229
230 uint_ map_copy_threshold;
231 uint_ direct_copy_threshold;
232
233 // calculate default values of thresholds
234 if (device.type() & device::gpu) {
235 // GPUs
236 map_copy_threshold = 524288; // 0.5 MB
237 direct_copy_threshold = 52428800; // 50 MB
238 }
239 else {
240 // CPUs and other devices
241 map_copy_threshold = 134217728; // 128 MB
242 direct_copy_threshold = 0; // it's never efficient for CPUs
243 }
244
245 // load thresholds
246 map_copy_threshold =
247 parameters->get(
248 cache_key, "map_copy_threshold", map_copy_threshold
249 );
250 direct_copy_threshold =
251 parameters->get(
252 cache_key, "direct_copy_threshold", direct_copy_threshold
253 );
254
255 // select copy method based on thresholds & input_size_bytes
256 size_t count = iterator_range_size(first, last);
257 size_t input_size_bytes = count * sizeof(input_type);
258
259 // [0; map_copy_threshold) -> copy_to_device_map()
260 if(input_size_bytes < map_copy_threshold) {
261 return copy_to_device_map(first, last, result, queue);
262 }
263 // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
264 // on host and then perform copy_to_device()
265 else if(input_size_bytes < direct_copy_threshold) {
266 std::vector<output_type> vector(first, last);
267 return copy_to_device(vector.begin(), vector.end(), result, queue);
268 }
269
270 // [direct_copy_threshold; inf) -> map [first; last) to device and
271 // run copy kernel on device for copying & casting
272 // At this point we are sure that count > 1 (first != last).
273
274 // Perform async copy to device, wait for it to be finished and
275 // return the result.
276 // At this point we are sure that count > 1 (first != last), so event
277 // returned by dispatch_copy_async() must be valid.
278 return dispatch_copy_async(first, last, result, queue).get();
279 }
280
281 // host -> device
282 // InputIterator is NOT a contiguous iterator
283 template<class InputIterator, class OutputIterator>
284 inline OutputIterator
285 dispatch_copy(InputIterator first,
286 InputIterator last,
287 OutputIterator result,
288 command_queue &queue,
289 typename boost::enable_if<
290 mpl::and_<
291 mpl::not_<
292 is_device_iterator<InputIterator>
293 >,
294 is_device_iterator<OutputIterator>,
295 mpl::not_<
296 is_contiguous_iterator<InputIterator>
297 >
298 >
299 >::type* = 0)
300 {
301 typedef typename OutputIterator::value_type output_type;
302 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
303
304 const device &device = queue.get_device();
305
306 // loading parameters
307 std::string cache_key =
308 std::string("__boost_compute_copy_to_device_")
309 + type_name<input_type>() + "_" + type_name<output_type>();
310 boost::shared_ptr<parameter_cache> parameters =
311 detail::parameter_cache::get_global_cache(device);
312
313 uint_ map_copy_threshold;
314 uint_ direct_copy_threshold;
315
316 // calculate default values of thresholds
317 if (device.type() & device::gpu) {
318 // GPUs
319 map_copy_threshold = 524288; // 0.5 MB
320 direct_copy_threshold = 52428800; // 50 MB
321 }
322 else {
323 // CPUs and other devices
324 map_copy_threshold = 134217728; // 128 MB
325 direct_copy_threshold = 0; // it's never efficient for CPUs
326 }
327
328 // load thresholds
329 map_copy_threshold =
330 parameters->get(
331 cache_key, "map_copy_threshold", map_copy_threshold
332 );
333 direct_copy_threshold =
334 parameters->get(
335 cache_key, "direct_copy_threshold", direct_copy_threshold
336 );
337
338 // select copy method based on thresholds & input_size_bytes
339 size_t input_size = iterator_range_size(first, last);
340 size_t input_size_bytes = input_size * sizeof(input_type);
341
342 // [0; map_copy_threshold) -> copy_to_device_map()
343 //
344 // if direct_copy_threshold is less than map_copy_threshold
345 // copy_to_device_map() is used for every input
346 if(input_size_bytes < map_copy_threshold
347 || direct_copy_threshold <= map_copy_threshold) {
348 return copy_to_device_map(first, last, result, queue);
349 }
350 // [map_copy_threshold; inf) -> convert [first; last)
351 // on host and then perform copy_to_device()
352 std::vector<output_type> vector(first, last);
353 return copy_to_device(vector.begin(), vector.end(), result, queue);
354 }
355
356 // device -> host (async)
357 template<class InputIterator, class OutputIterator>
358 inline future<OutputIterator>
359 dispatch_copy_async(InputIterator first,
360 InputIterator last,
361 OutputIterator result,
362 command_queue &queue,
363 typename boost::enable_if<
364 mpl::and_<
365 is_device_iterator<InputIterator>,
366 mpl::not_<
367 is_device_iterator<OutputIterator>
368 >,
369 is_same_value_type<OutputIterator, InputIterator>
370 >
371 >::type* = 0)
372 {
373 BOOST_STATIC_ASSERT_MSG(
374 is_contiguous_iterator<OutputIterator>::value,
375 "copy_async() is only supported for contiguous host iterators"
376 );
377
378 return copy_to_host_async(first, last, result, queue);
379 }
380
381 // device -> host (async)
382 // Type mismatch between InputIterator and OutputIterator value_types
383 template<class InputIterator, class OutputIterator>
384 inline future<OutputIterator>
385 dispatch_copy_async(InputIterator first,
386 InputIterator last,
387 OutputIterator result,
388 command_queue &queue,
389 typename boost::enable_if<
390 mpl::and_<
391 is_device_iterator<InputIterator>,
392 mpl::not_<
393 is_device_iterator<OutputIterator>
394 >,
395 mpl::not_<
396 is_same_value_type<OutputIterator, InputIterator>
397 >
398 >
399 >::type* = 0)
400 {
401 BOOST_STATIC_ASSERT_MSG(
402 is_contiguous_iterator<OutputIterator>::value,
403 "copy_async() is only supported for contiguous host iterators"
404 );
405
406 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
407 const context &context = queue.get_context();
408 size_t count = iterator_range_size(first, last);
409
410 if(count < size_t(1)) {
411 return future<OutputIterator>();
412 }
413
414 // map host memory to device
415 buffer mapped_host(
416 context,
417 count * sizeof(output_type),
418 buffer::write_only | buffer::use_host_ptr,
419 static_cast<void*>(
420 ::boost::addressof(*result)
421 )
422 );
423 // copy async on device
424 ::boost::compute::future<buffer_iterator<output_type> > future =
425 copy_on_device_async(
426 first,
427 last,
428 make_buffer_iterator<output_type>(mapped_host),
429 queue
430 );
431 // update host memory asynchronously by maping and unmaping memory
432 event map_event;
433 void* ptr = queue.enqueue_map_buffer_async(
434 mapped_host,
435 CL_MAP_READ,
436 0,
437 count * sizeof(output_type),
438 map_event,
439 future.get_event()
440 );
441 event unmap_event =
442 queue.enqueue_unmap_buffer(mapped_host, ptr, map_event);
443 return make_future(result + count, unmap_event);
444 }
445
446 // device -> host
447 // OutputIterator is a contiguous iterator
448 template<class InputIterator, class OutputIterator>
449 inline OutputIterator
450 dispatch_copy(InputIterator first,
451 InputIterator last,
452 OutputIterator result,
453 command_queue &queue,
454 typename boost::enable_if<
455 mpl::and_<
456 is_device_iterator<InputIterator>,
457 mpl::not_<
458 is_device_iterator<OutputIterator>
459 >,
460 is_same_value_type<OutputIterator, InputIterator>,
461 is_contiguous_iterator<OutputIterator>,
462 mpl::not_<
463 is_bool_value_type<OutputIterator>
464 >
465 >
466 >::type* = 0)
467 {
468 return copy_to_host(first, last, result, queue);
469 }
470
471 // device -> host
472 // Type mismatch between InputIterator and OutputIterator value_types
473 // OutputIterator is NOT a contiguous iterator or value_type of OutputIterator
474 // is a boolean type.
475 template<class InputIterator, class OutputIterator>
476 inline OutputIterator
477 dispatch_copy(InputIterator first,
478 InputIterator last,
479 OutputIterator result,
480 command_queue &queue,
481 typename boost::enable_if<
482 mpl::and_<
483 is_device_iterator<InputIterator>,
484 mpl::not_<
485 is_device_iterator<OutputIterator>
486 >,
487 mpl::or_<
488 mpl::not_<
489 is_contiguous_iterator<OutputIterator>
490 >,
491 is_bool_value_type<OutputIterator>
492 >
493 >
494 >::type* = 0)
495 {
496 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
497 typedef typename InputIterator::value_type input_type;
498
499 const device &device = queue.get_device();
500
501 // loading parameters
502 std::string cache_key =
503 std::string("__boost_compute_copy_to_host_")
504 + type_name<input_type>() + "_" + type_name<output_type>();
505 boost::shared_ptr<parameter_cache> parameters =
506 detail::parameter_cache::get_global_cache(device);
507
508 uint_ map_copy_threshold;
509 uint_ direct_copy_threshold;
510
511 // calculate default values of thresholds
512 if (device.type() & device::gpu) {
513 // GPUs
514 map_copy_threshold = 33554432; // 30 MB
515 direct_copy_threshold = 0; // it's never efficient for GPUs
516 }
517 else {
518 // CPUs and other devices
519 map_copy_threshold = 134217728; // 128 MB
520 direct_copy_threshold = 0; // it's never efficient for CPUs
521 }
522
523 // load thresholds
524 map_copy_threshold =
525 parameters->get(
526 cache_key, "map_copy_threshold", map_copy_threshold
527 );
528 direct_copy_threshold =
529 parameters->get(
530 cache_key, "direct_copy_threshold", direct_copy_threshold
531 );
532
533 // select copy method based on thresholds & input_size_bytes
534 size_t count = iterator_range_size(first, last);
535 size_t input_size_bytes = count * sizeof(input_type);
536
537 // [0; map_copy_threshold) -> copy_to_host_map()
538 //
539 // if direct_copy_threshold is less than map_copy_threshold
540 // copy_to_host_map() is used for every input
541 if(input_size_bytes < map_copy_threshold
542 || direct_copy_threshold <= map_copy_threshold) {
543 return copy_to_host_map(first, last, result, queue);
544 }
545 // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
546 // then copy (and convert) to result using std::copy()
547 std::vector<input_type> vector(count);
548 copy_to_host(first, last, vector.begin(), queue);
549 return std::copy(vector.begin(), vector.end(), result);
550 }
551
552 // device -> host
553 // Type mismatch between InputIterator and OutputIterator value_types
554 // OutputIterator is a contiguous iterator
555 // value_type of OutputIterator is NOT a boolean type
556 template<class InputIterator, class OutputIterator>
557 inline OutputIterator
558 dispatch_copy(InputIterator first,
559 InputIterator last,
560 OutputIterator result,
561 command_queue &queue,
562 typename boost::enable_if<
563 mpl::and_<
564 is_device_iterator<InputIterator>,
565 mpl::not_<
566 is_device_iterator<OutputIterator>
567 >,
568 mpl::not_<
569 is_same_value_type<OutputIterator, InputIterator>
570 >,
571 is_contiguous_iterator<OutputIterator>,
572 mpl::not_<
573 is_bool_value_type<OutputIterator>
574 >
575 >
576 >::type* = 0)
577 {
578 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
579 typedef typename InputIterator::value_type input_type;
580
581 const device &device = queue.get_device();
582
583 // loading parameters
584 std::string cache_key =
585 std::string("__boost_compute_copy_to_host_")
586 + type_name<input_type>() + "_" + type_name<output_type>();
587 boost::shared_ptr<parameter_cache> parameters =
588 detail::parameter_cache::get_global_cache(device);
589
590 uint_ map_copy_threshold;
591 uint_ direct_copy_threshold;
592
593 // calculate default values of thresholds
594 if (device.type() & device::gpu) {
595 // GPUs
596 map_copy_threshold = 524288; // 0.5 MB
597 direct_copy_threshold = 52428800; // 50 MB
598 }
599 else {
600 // CPUs and other devices
601 map_copy_threshold = 134217728; // 128 MB
602 direct_copy_threshold = 0; // it's never efficient for CPUs
603 }
604
605 // load thresholds
606 map_copy_threshold =
607 parameters->get(
608 cache_key, "map_copy_threshold", map_copy_threshold
609 );
610 direct_copy_threshold =
611 parameters->get(
612 cache_key, "direct_copy_threshold", direct_copy_threshold
613 );
614
615 // select copy method based on thresholds & input_size_bytes
616 size_t count = iterator_range_size(first, last);
617 size_t input_size_bytes = count * sizeof(input_type);
618
619 // [0; map_copy_threshold) -> copy_to_host_map()
620 if(input_size_bytes < map_copy_threshold) {
621 return copy_to_host_map(first, last, result, queue);
622 }
623 // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
624 // temporary vector then copy (and convert) to result using std::copy()
625 else if(input_size_bytes < direct_copy_threshold) {
626 std::vector<input_type> vector(count);
627 copy_to_host(first, last, vector.begin(), queue);
628 return std::copy(vector.begin(), vector.end(), result);
629 }
630
631 // [direct_copy_threshold; inf) -> map [result; result + input_size) to
632 // device and run copy kernel on device for copying & casting
633 // map host memory to device.
634
635 // Perform async copy to host, wait for it to be finished and
636 // return the result.
637 // At this point we are sure that count > 1 (first != last), so event
638 // returned by dispatch_copy_async() must be valid.
639 return dispatch_copy_async(first, last, result, queue).get();
640 }
641
642 // device -> device
643 template<class InputIterator, class OutputIterator>
644 inline OutputIterator
645 dispatch_copy(InputIterator first,
646 InputIterator last,
647 OutputIterator result,
648 command_queue &queue,
649 typename boost::enable_if<
650 mpl::and_<
651 is_device_iterator<InputIterator>,
652 is_device_iterator<OutputIterator>,
653 mpl::not_<
654 can_copy_with_copy_buffer<
655 InputIterator, OutputIterator
656 >
657 >
658 >
659 >::type* = 0)
660 {
661 return copy_on_device(first, last, result, queue);
662 }
663
664 // device -> device (specialization for buffer iterators)
665 template<class InputIterator, class OutputIterator>
666 inline OutputIterator
667 dispatch_copy(InputIterator first,
668 InputIterator last,
669 OutputIterator result,
670 command_queue &queue,
671 typename boost::enable_if<
672 mpl::and_<
673 is_device_iterator<InputIterator>,
674 is_device_iterator<OutputIterator>,
675 can_copy_with_copy_buffer<
676 InputIterator, OutputIterator
677 >
678 >
679 >::type* = 0)
680 {
681 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
682 typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
683
684 difference_type n = std::distance(first, last);
685 if(n < 1){
686 // nothing to copy
687 return result;
688 }
689
690 queue.enqueue_copy_buffer(first.get_buffer(),
691 result.get_buffer(),
692 first.get_index() * sizeof(value_type),
693 result.get_index() * sizeof(value_type),
694 static_cast<size_t>(n) * sizeof(value_type));
695 return result + n;
696 }
697
698 // device -> device (async)
699 template<class InputIterator, class OutputIterator>
700 inline future<OutputIterator>
701 dispatch_copy_async(InputIterator first,
702 InputIterator last,
703 OutputIterator result,
704 command_queue &queue,
705 typename boost::enable_if<
706 mpl::and_<
707 is_device_iterator<InputIterator>,
708 is_device_iterator<OutputIterator>,
709 mpl::not_<
710 can_copy_with_copy_buffer<
711 InputIterator, OutputIterator
712 >
713 >
714 >
715 >::type* = 0)
716 {
717 return copy_on_device_async(first, last, result, queue);
718 }
719
720 // device -> device (async, specialization for buffer iterators)
721 template<class InputIterator, class OutputIterator>
722 inline future<OutputIterator>
723 dispatch_copy_async(InputIterator first,
724 InputIterator last,
725 OutputIterator result,
726 command_queue &queue,
727 typename boost::enable_if<
728 mpl::and_<
729 is_device_iterator<InputIterator>,
730 is_device_iterator<OutputIterator>,
731 can_copy_with_copy_buffer<
732 InputIterator, OutputIterator
733 >
734 >
735 >::type* = 0)
736 {
737 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
738 typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
739
740 difference_type n = std::distance(first, last);
741 if(n < 1){
742 // nothing to copy
743 return make_future(result, event());
744 }
745
746 event event_ =
747 queue.enqueue_copy_buffer(
748 first.get_buffer(),
749 result.get_buffer(),
750 first.get_index() * sizeof(value_type),
751 result.get_index() * sizeof(value_type),
752 static_cast<size_t>(n) * sizeof(value_type)
753 );
754
755 return make_future(result + n, event_);
756 }
757
758 // host -> host
759 template<class InputIterator, class OutputIterator>
760 inline OutputIterator
761 dispatch_copy(InputIterator first,
762 InputIterator last,
763 OutputIterator result,
764 command_queue &queue,
765 typename boost::enable_if_c<
766 !is_device_iterator<InputIterator>::value &&
767 !is_device_iterator<OutputIterator>::value
768 >::type* = 0)
769 {
770 (void) queue;
771
772 return std::copy(first, last, result);
773 }
774
775 } // end detail namespace
776
777 /// Copies the values in the range [\p first, \p last) to the range
778 /// beginning at \p result.
779 ///
780 /// The generic copy() function can be used for a variety of data
781 /// transfer tasks and provides a standard interface to the following
782 /// OpenCL functions:
783 ///
784 /// \li \c clEnqueueReadBuffer()
785 /// \li \c clEnqueueWriteBuffer()
786 /// \li \c clEnqueueCopyBuffer()
787 ///
788 /// Unlike the aforementioned OpenCL functions, copy() will also work
789 /// with non-contiguous data-structures (e.g. \c std::list<T>) as
790 /// well as with "fancy" iterators (e.g. transform_iterator).
791 ///
792 /// \param first first element in the range to copy
793 /// \param last last element in the range to copy
794 /// \param result first element in the result range
795 /// \param queue command queue to perform the operation
796 ///
797 /// \return \c OutputIterator to the end of the result range
798 ///
799 /// For example, to copy an array of \c int values on the host to a vector on
800 /// the device:
801 /// \code
802 /// // array on the host
803 /// int data[] = { 1, 2, 3, 4 };
804 ///
805 /// // vector on the device
806 /// boost::compute::vector<int> vec(4, context);
807 ///
808 /// // copy values to the device vector
809 /// boost::compute::copy(data, data + 4, vec.begin(), queue);
810 /// \endcode
811 ///
812 /// The copy algorithm can also be used with standard containers such as
813 /// \c std::vector<T>:
814 /// \code
815 /// std::vector<int> host_vector = ...
816 /// boost::compute::vector<int> device_vector = ...
817 ///
818 /// // copy from the host to the device
819 /// boost::compute::copy(
820 /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
821 /// );
822 ///
823 /// // copy from the device to the host
824 /// boost::compute::copy(
825 /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
826 /// );
827 /// \endcode
828 ///
829 /// Space complexity: \Omega(1)
830 ///
831 /// \see copy_n(), copy_if(), copy_async()
832 template<class InputIterator, class OutputIterator>
833 inline OutputIterator copy(InputIterator first,
834 InputIterator last,
835 OutputIterator result,
836 command_queue &queue = system::default_queue())
837 {
838 return detail::dispatch_copy(first, last, result, queue);
839 }
840
841 /// Copies the values in the range [\p first, \p last) to the range
842 /// beginning at \p result. The copy is performed asynchronously.
843 ///
844 /// \see copy()
845 template<class InputIterator, class OutputIterator>
846 inline future<OutputIterator>
847 copy_async(InputIterator first,
848 InputIterator last,
849 OutputIterator result,
850 command_queue &queue = system::default_queue())
851 {
852 return detail::dispatch_copy_async(first, last, result, queue);
853 }
854
855 } // end compute namespace
856 } // end boost namespace
857
858 #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP