ceph/src/boost/libs/mpi/include/boost/mpi/collectives.hpp

   1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
   2
   3 // Use, modification and distribution is subject to the Boost Software
   4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
   5 // http://www.boost.org/LICENSE_1_0.txt)
   6
   7 // Message Passing Interface 1.1 -- Section 4. MPI Collectives
   8
   9 /** @file collectives.hpp
  10  *
  11  *  This header contains MPI collective operations, which implement
  12  *  various parallel algorithms that require the coordination of all
  13  *  processes within a communicator. The header @c collectives_fwd.hpp
  14  *  provides forward declarations for each of these operations. To
  15  *  include only specific collective algorithms, use the headers @c
  16  *  boost/mpi/collectives/algorithm_name.hpp.
  17  */
  18 #ifndef BOOST_MPI_COLLECTIVES_HPP
  19 #define BOOST_MPI_COLLECTIVES_HPP
  20
  21 #include <boost/mpi/communicator.hpp>
  22 #include <boost/mpi/inplace.hpp>
  23 #include <vector>
  24
  25 namespace boost { namespace mpi {
  26 /**
  27  *  @brief Gather the values stored at every process into vectors of
  28  *  values from each process.
  29  *
  30  *  @c all_gather is a collective algorithm that collects the values
  31  *  stored at each process into a vector of values indexed by the
  32  *  process number they came from. The type @c T of the values may be
  33  *  any type that is serializable or has an associated MPI data type.
  34  *
  35  *  When the type @c T has an associated MPI data type, this routine
  36  *  invokes @c MPI_Allgather to gather the values.
  37  *
  38  *    @param comm The communicator over which the all-gather will
  39  *    occur.
  40  *
  41  *    @param in_value The value to be transmitted by each process. To
  42  *    gather an array of values, @c in_values points to the @c n local
  43  *    values to be transmitted.
  44  *
  45  *    @param out_values A vector or pointer to storage that will be
  46  *    populated with the values from each process, indexed by the
  47  *    process ID number. If it is a vector, the vector will be resized
  48  *    accordingly.
  49  */
  50 template<typename T>
  51 void
  52 all_gather(const communicator& comm, const T& in_value,
  53            std::vector<T>& out_values);
  54
  55 /**
  56  * \overload
  57  */
  58 template<typename T>
  59 void
  60 all_gather(const communicator& comm, const T& in_value, T* out_values);
  61
  62 /**
  63  * \overload
  64  */
  65 template<typename T>
  66 void
  67 all_gather(const communicator& comm, const T* in_values, int n,
  68            std::vector<T>& out_values);
  69
  70 /**
  71  * \overload
  72  */
  73 template<typename T>
  74 void
  75 all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
  76
  77 /**
  78  *  @brief Combine the values stored by each process into a single
  79  *  value available to all processes.
  80  *
  81  *  @c all_reduce is a collective algorithm that combines the values
  82  *  stored by each process into a single value available to all
  83  *  processes. The values are combined in a user-defined way,
  84  *  specified via a function object. The type @c T of the values may
  85  *  be any type that is serializable or has an associated MPI data
  86  *  type. One can think of this operation as a @c all_gather, followed
  87  *  by an @c std::accumulate() over the gather values and using the
  88  *  operation @c op.
  89  *
  90  *  When the type @c T has an associated MPI data type, this routine
  91  *  invokes @c MPI_Allreduce to perform the reduction. If possible,
  92  *  built-in MPI operations will be used; otherwise, @c all_reduce()
  93  *  will create a custom MPI_Op for the call to MPI_Allreduce.
  94  *
  95  *    @param comm The communicator over which the reduction will
  96  *    occur.
  97  *    @param value The local value to be combined with the local
  98  *    values of every other process. For reducing arrays, @c in_values
  99  *    is a pointer to the local values to be reduced and @c n is the
 100  *    number of values to reduce. See @c reduce for more information.
 101  *
 102  *    If wrapped in a @c inplace_t object, combine the usage of both
 103  *    input and $c out_value and the local value will be overwritten
 104  *    (a convenience function @c inplace is provided for the wrapping).
 105  *
 106  *    @param out_value Will receive the result of the reduction
 107  *    operation. If this parameter is omitted, the outgoing value will
 108  *    instead be returned.
 109  *
 110  *    @param op The binary operation that combines two values of type
 111  *    @c T and returns a third value of type @c T. For types @c T that has
 112  *    ssociated MPI data types, @c op will either be translated into
 113  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
 114  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
 115  *    operations.hpp header for more details on this mapping. For any
 116  *    non-built-in operation, commutativity will be determined by the
 117  *    @c is_commmutative trait (also in @c operations.hpp): users are
 118  *    encouraged to mark commutative operations as such, because it
 119  *    gives the implementation additional lattitude to optimize the
 120  *    reduction operation.
 121  *
 122  *    @param n Indicated the size of the buffers of array type.
 123  *    @returns If no @p out_value parameter is supplied, returns the
 124  *    result of the reduction operation.
 125  */
 126 template<typename T, typename Op>
 127 void
 128 all_reduce(const communicator& comm, const T* value, int n, T* out_value,
 129            Op op);
 130 /**
 131  * \overload
 132  */
 133 template<typename T, typename Op>
 134 void
 135 all_reduce(const communicator& comm, const T& value, T& out_value, Op op);
 136 /**
 137  * \overload
 138  */
 139 template<typename T, typename Op>
 140 T all_reduce(const communicator& comm, const T& value, Op op);
 141
 142 /**
 143  * \overload
 144  */
 145 template<typename T, typename Op>
 146 void
 147 all_reduce(const communicator& comm, inplace_t<T*> value, int n,
 148            Op op);
 149 /**
 150  * \overload
 151  */
 152 template<typename T, typename Op>
 153 void
 154 all_reduce(const communicator& comm, inplace_t<T> value, Op op);
 155
 156 /**
 157  *  @brief Send data from every process to every other process.
 158  *
 159  *  @c all_to_all is a collective algorithm that transmits @c p values
 160  *  from every process to every other process. On process i, jth value
 161  *  of the @p in_values vector is sent to process j and placed in the
 162  *  ith position of the @p out_values vector in process @p j. The type
 163  *  @c T of the values may be any type that is serializable or has an
 164  *  associated MPI data type. If @c n is provided, then arrays of @p n
 165  *  values will be transferred from one process to another.
 166  *
 167  *  When the type @c T has an associated MPI data type, this routine
 168  *  invokes @c MPI_Alltoall to scatter the values.
 169  *
 170  *    @param comm The communicator over which the all-to-all
 171  *    communication will occur.
 172  *
 173  *    @param in_values A vector or pointer to storage that contains
 174  *    the values to send to each process, indexed by the process ID
 175  *    number.
 176  *
 177  *    @param out_values A vector or pointer to storage that will be
 178  *    updated to contain the values received from other processes. The
 179  *    jth value in @p out_values will come from the procss with rank j.
 180  */
 181 template<typename T>
 182 void
 183 all_to_all(const communicator& comm, const std::vector<T>& in_values,
 184            std::vector<T>& out_values);
 185
 186 /**
 187  * \overload
 188  */
 189 template<typename T>
 190 void all_to_all(const communicator& comm, const T* in_values, T* out_values);
 191
 192 /**
 193  * \overload
 194  */
 195 template<typename T>
 196 void
 197 all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
 198            std::vector<T>& out_values);
 199
 200 /**
 201  * \overload
 202  */
 203 template<typename T>
 204 void
 205 all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
 206
 207 /**
 208  * @brief Broadcast a value from a root process to all other
 209  * processes.
 210  *
 211  * @c broadcast is a collective algorithm that transfers a value from
 212  * an arbitrary @p root process to every other process that is part of
 213  * the given communicator. The @c broadcast algorithm can transmit any
 214  * Serializable value, values that have associated MPI data types,
 215  * packed archives, skeletons, and the content of skeletons; see the
 216  * @c send primitive for communicators for a complete list. The type
 217  * @c T shall be the same for all processes that are a part of the
 218  * communicator @p comm, unless packed archives are being transferred:
 219  * with packed archives, the root sends a @c packed_oarchive or @c
 220  * packed_skeleton_oarchive whereas the other processes receive a
 221  * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
 222  *
 223  * When the type @c T has an associated MPI data type, this routine
 224  * invokes @c MPI_Bcast to perform the broadcast.
 225  *
 226  *   @param comm The communicator over which the broadcast will
 227  *   occur.
 228  *
 229  *   @param value The value (or values, if @p n is provided) to be
 230  *   transmitted (if the rank of @p comm is equal to @p root) or
 231  *   received (if the rank of @p comm is not equal to @p root). When
 232  *   the @p value is a @c skeleton_proxy, only the skeleton of the
 233  *   object will be broadcast. In this case, the @p root will build a
 234  *   skeleton from the object help in the proxy and all of the
 235  *   non-roots will reshape the objects held in their proxies based on
 236  *   the skeleton sent from the root.
 237  *
 238  *   @param n When supplied, the number of values that the pointer @p
 239  *   values points to, for broadcasting an array of values. The value
 240  *   of @p n must be the same for all processes in @p comm.
 241  *
 242  *   @param root The rank/process ID of the process that will be
 243  *   transmitting the value.
 244  */
 245 template<typename T>
 246 void broadcast(const communicator& comm, T& value, int root);
 247
 248 /**
 249  * \overload
 250  */
 251 template<typename T>
 252 void broadcast(const communicator& comm, T* values, int n, int root);
 253
 254 /**
 255  * \overload
 256  */
 257 template<typename T>
 258 void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
 259
 260 /**
 261  * \overload
 262  */
 263 template<typename T>
 264 void
 265 broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
 266
 267 /**
 268  *  @brief Gather the values stored at every process into a vector at
 269  *  the root process.
 270  *
 271  *  @c gather is a collective algorithm that collects the values
 272  *  stored at each process into a vector of values at the @p root
 273  *  process. This vector is indexed by the process number that the
 274  *  value came from. The type @c T of the values may be any type that
 275  *  is serializable or has an associated MPI data type.
 276  *
 277  *  When the type @c T has an associated MPI data type, this routine
 278  *  invokes @c MPI_Gather to gather the values.
 279  *
 280  *    @param comm The communicator over which the gather will occur.
 281  *
 282  *    @param in_value The value to be transmitted by each process. For
 283  *    gathering arrays of values, @c in_values points to storage for
 284  *    @c n*comm.size() values.
 285  *
 286  *    @param out_values A vector or pointer to storage that will be
 287  *    populated with the values from each process, indexed by the
 288  *    process ID number. If it is a vector, it will be resized
 289  *    accordingly. For non-root processes, this parameter may be
 290  *    omitted. If it is still provided, however, it will be unchanged.
 291  *
 292  *    @param root The process ID number that will collect the
 293  *    values. This value must be the same on all processes.
 294  */
 295 template<typename T>
 296 void
 297 gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
 298        int root);
 299
 300 /**
 301  * \overload
 302  */
 303 template<typename T>
 304 void
 305 gather(const communicator& comm, const T& in_value, T* out_values, int root);
 306
 307 /**
 308  * \overload
 309  */
 310 template<typename T>
 311 void gather(const communicator& comm, const T& in_value, int root);
 312
 313 /**
 314  * \overload
 315  */
 316 template<typename T>
 317 void
 318 gather(const communicator& comm, const T* in_values, int n,
 319        std::vector<T>& out_values, int root);
 320
 321 /**
 322  * \overload
 323  */
 324 template<typename T>
 325 void
 326 gather(const communicator& comm, const T* in_values, int n, T* out_values,
 327        int root);
 328
 329 /**
 330  * \overload
 331  */
 332 template<typename T>
 333 void gather(const communicator& comm, const T* in_values, int n, int root);
 334
 335 /**
 336  *  @brief Similar to boost::mpi::gather with the difference that the number
 337  *  of values to be send by non-root processes can vary.
 338  *
 339  *    @param comm The communicator over which the gather will occur.
 340  *
 341  *    @param in_values The array of values to be transmitted by each process.
 342  *
 343  *    @param in_size For each non-root process this specifies the size
 344  *    of @p in_values.
 345  *
 346  *    @param out_values A pointer to storage that will be populated with
 347  *    the values from each process. For non-root processes, this parameter
 348  *    may be omitted. If it is still provided, however, it will be unchanged.
 349  *
 350  *    @param sizes A vector containing the number of elements each non-root
 351  *    process will send.
 352  *
 353  *    @param displs A vector such that the i-th entry specifies the
 354  *    displacement (relative to @p out_values) from which to take the ingoing
 355  *    data at the @p root process. Overloaded versions for which @p displs is
 356  *    omitted assume that the data is to be placed contiguously at the root process.
 357  *
 358  *    @param root The process ID number that will collect the
 359  *    values. This value must be the same on all processes.
 360  */
 361 template<typename T>
 362 void
 363 gatherv(const communicator& comm, const std::vector<T>& in_values,
 364         T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
 365         int root);
 366
 367 /**
 368  * \overload
 369  */
 370 template<typename T>
 371 void
 372 gatherv(const communicator& comm, const T* in_values, int in_size,
 373         T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
 374         int root);
 375
 376 /**
 377  * \overload
 378  */
 379 template<typename T>
 380 void gatherv(const communicator& comm, const std::vector<T>& in_values, int root);
 381
 382 /**
 383  * \overload
 384  */
 385 template<typename T>
 386 void gatherv(const communicator& comm, const T* in_values, int in_size, int root);
 387
 388 /**
 389  * \overload
 390  */
 391 template<typename T>
 392 void
 393 gatherv(const communicator& comm, const T* in_values, int in_size,
 394         T* out_values, const std::vector<int>& sizes, int root);
 395
 396 /**
 397  * \overload
 398  */
 399 template<typename T>
 400 void
 401 gatherv(const communicator& comm, const std::vector<T>& in_values,
 402         T* out_values, const std::vector<int>& sizes, int root);
 403
 404 /**
 405  *  @brief Scatter the values stored at the root to all processes
 406  *  within the communicator.
 407  *
 408  *  @c scatter is a collective algorithm that scatters the values
 409  *  stored in the @p root process (inside a vector) to all of the
 410  *  processes in the communicator. The vector @p out_values (only
 411  *  significant at the @p root) is indexed by the process number to
 412  *  which the corresponding value will be sent. The type @c T of the
 413  *  values may be any type that is serializable or has an associated
 414  *  MPI data type.
 415  *
 416  *  When the type @c T has an associated MPI data type, this routine
 417  *  invokes @c MPI_Scatter to scatter the values.
 418  *
 419  *    @param comm The communicator over which the scatter will occur.
 420  *
 421  *    @param in_values A vector or pointer to storage that will contain
 422  *    the values to send to each process, indexed by the process rank.
 423  *    For non-root processes, this parameter may be omitted. If it is
 424  *    still provided, however, it will be unchanged.
 425  *
 426  *    @param out_value The value received by each process. When
 427  *    scattering an array of values, @p out_values points to the @p n
 428  *    values that will be received by each process.
 429  *
 430  *    @param root The process ID number that will scatter the
 431  *    values. This value must be the same on all processes.
 432  */
 433 template<typename T>
 434 void
 435 scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
 436         int root);
 437
 438 /**
 439  * \overload
 440  */
 441 template<typename T>
 442 void
 443 scatter(const communicator& comm, const T* in_values, T& out_value, int root);
 444
 445 /**
 446  * \overload
 447  */
 448 template<typename T>
 449 void scatter(const communicator& comm, T& out_value, int root);
 450
 451 /**
 452  * \overload
 453  */
 454 template<typename T>
 455 void
 456 scatter(const communicator& comm, const std::vector<T>& in_values,
 457         T* out_values, int n, int root);
 458
 459 /**
 460  * \overload
 461  */
 462 template<typename T>
 463 void
 464 scatter(const communicator& comm, const T* in_values, T* out_values, int n,
 465         int root);
 466
 467 /**
 468  * \overload
 469  */
 470 template<typename T>
 471 void scatter(const communicator& comm, T* out_values, int n, int root);
 472
 473 /**
 474  *  @brief Similar to boost::mpi::scatter with the difference that the number
 475  *  of values stored at the root process does not need to be a multiple of
 476  *  the communicator's size.
 477  *
 478  *    @param comm The communicator over which the scatter will occur.
 479  *
 480  *    @param in_values A vector or pointer to storage that will contain
 481  *    the values to send to each process, indexed by the process rank.
 482  *    For non-root processes, this parameter may be omitted. If it is
 483  *    still provided, however, it will be unchanged.
 484  *
 485  *    @param sizes A vector containing the number of elements each non-root
 486  *    process will receive.
 487  *
 488  *    @param displs A vector such that the i-th entry specifies the
 489  *    displacement (relative to @p in_values) from which to take the outgoing
 490  *    data to process i. Overloaded versions for which @p displs is omitted
 491  *    assume that the data is contiguous at the @p root process.
 492  *
 493  *    @param out_values The array of values received by each process.
 494  *
 495  *    @param out_size For each non-root process this will contain the size
 496  *    of @p out_values.
 497  *
 498  *    @param root The process ID number that will scatter the
 499  *    values. This value must be the same on all processes.
 500  */
 501 template<typename T>
 502 void
 503 scatterv(const communicator& comm, const std::vector<T>& in_values,
 504          const std::vector<int>& sizes, const std::vector<int>& displs,
 505          T* out_values, int out_size, int root);
 506
 507 /**
 508  * \overload
 509  */
 510 template<typename T>
 511 void
 512 scatterv(const communicator& comm, const T* in_values,
 513          const std::vector<int>& sizes, const std::vector<int>& displs,
 514          T* out_values, int out_size, int root);
 515
 516 /**
 517  * \overload
 518  */
 519 template<typename T>
 520 void scatterv(const communicator& comm, T* out_values, int out_size, int root);
 521
 522 /**
 523  * \overload
 524  */
 525 template<typename T>
 526 void
 527 scatterv(const communicator& comm, const T* in_values,
 528          const std::vector<int>& sizes, T* out_values, int root);
 529
 530 /**
 531  * \overload
 532  */
 533 template<typename T>
 534 void
 535 scatterv(const communicator& comm, const std::vector<T>& in_values,
 536          const std::vector<int>& sizes, T* out_values, int root);
 537
 538 /**
 539  *  @brief Combine the values stored by each process into a single
 540  *  value at the root.
 541  *
 542  *  @c reduce is a collective algorithm that combines the values
 543  *  stored by each process into a single value at the @c root. The
 544  *  values can be combined arbitrarily, specified via a function
 545  *  object. The type @c T of the values may be any type that is
 546  *  serializable or has an associated MPI data type. One can think of
 547  *  this operation as a @c gather to the @p root, followed by an @c
 548  *  std::accumulate() over the gathered values and using the operation
 549  *  @c op.
 550  *
 551  *  When the type @c T has an associated MPI data type, this routine
 552  *  invokes @c MPI_Reduce to perform the reduction. If possible,
 553  *  built-in MPI operations will be used; otherwise, @c reduce() will
 554  *  create a custom MPI_Op for the call to MPI_Reduce.
 555  *
 556  *    @param comm The communicator over which the reduction will
 557  *    occur.
 558  *
 559  *    @param in_value The local value to be combined with the local
 560  *    values of every other process. For reducing arrays, @c in_values
 561  *    contains a pointer to the local values. In this case, @c n is
 562  *    the number of values that will be reduced. Reduction occurs
 563  *    independently for each of the @p n values referenced by @p
 564  *    in_values, e.g., calling reduce on an array of @p n values is
 565  *    like calling @c reduce @p n separate times, one for each
 566  *    location in @p in_values and @p out_values.
 567  *
 568  *    @param out_value Will receive the result of the reduction
 569  *    operation, but only for the @p root process. Non-root processes
 570  *    may omit if parameter; if they choose to supply the parameter,
 571  *    it will be unchanged. For reducing arrays, @c out_values
 572  *    contains a pointer to the storage for the output values.
 573  *
 574  *    @param op The binary operation that combines two values of type
 575  *    @c T into a third value of type @c T. For types @c T that has
 576  *    ssociated MPI data types, @c op will either be translated into
 577  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
 578  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
 579  *    operations.hpp header for more details on this mapping. For any
 580  *    non-built-in operation, commutativity will be determined by the
 581  *    @c is_commmutative trait (also in @c operations.hpp): users are
 582  *    encouraged to mark commutative operations as such, because it
 583  *    gives the implementation additional lattitude to optimize the
 584  *    reduction operation.
 585  *
 586  *    @param root The process ID number that will receive the final,
 587  *    combined value. This value must be the same on all processes.
 588  */
 589 template<typename T, typename Op>
 590 void
 591 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
 592        int root);
 593
 594 /**
 595  * \overload
 596  */
 597 template<typename T, typename Op>
 598 void reduce(const communicator& comm, const T& in_value, Op op, int root);
 599
 600 /**
 601  * \overload
 602  */
 603 template<typename T, typename Op>
 604 void
 605 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
 606        Op op, int root);
 607
 608 /**
 609  * \overload
 610  */
 611 template<typename T, typename Op>
 612 void
 613 reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
 614
 615 /**
 616  *  @brief Compute a prefix reduction of values from all processes in
 617  *  the communicator.
 618  *
 619  *  @c scan is a collective algorithm that combines the values stored
 620  *  by each process with the values of all processes with a smaller
 621  *  rank. The values can be arbitrarily combined, specified via a
 622  *  function object @p op. The type @c T of the values may be any type
 623  *  that is serializable or has an associated MPI data type. One can
 624  *  think of this operation as a @c gather to some process, followed
 625  *  by an @c std::prefix_sum() over the gathered values using the
 626  *  operation @c op. The ith process returns the ith value emitted by
 627  *  @c std::prefix_sum().
 628  *
 629  *  When the type @c T has an associated MPI data type, this routine
 630  *  invokes @c MPI_Scan to perform the reduction. If possible,
 631  *  built-in MPI operations will be used; otherwise, @c scan() will
 632  *  create a custom @c MPI_Op for the call to MPI_Scan.
 633  *
 634  *    @param comm The communicator over which the prefix reduction
 635  *    will occur.
 636  *
 637  *    @param in_value The local value to be combined with the local
 638  *    values of other processes. For the array variant, the @c
 639  *    in_values parameter points to the @c n local values that will be
 640  *    combined.
 641  *
 642  *    @param out_value If provided, the ith process will receive the
 643  *    value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
 644  *    ... )). For the array variant, @c out_values contains a pointer
 645  *    to storage for the @c n output values. The prefix reduction
 646  *    occurs independently for each of the @p n values referenced by
 647  *    @p in_values, e.g., calling scan on an array of @p n values is
 648  *    like calling @c scan @p n separate times, one for each location
 649  *    in @p in_values and @p out_values.
 650  *
 651  *    @param op The binary operation that combines two values of type
 652  *    @c T into a third value of type @c T. For types @c T that has
 653  *    ssociated MPI data types, @c op will either be translated into
 654  *    an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
 655  *    directly to a built-in MPI operation. See @c is_mpi_op in the @c
 656  *    operations.hpp header for more details on this mapping. For any
 657  *    non-built-in operation, commutativity will be determined by the
 658  *    @c is_commmutative trait (also in @c operations.hpp).
 659  *
 660  *    @returns If no @p out_value parameter is provided, returns the
 661  *    result of prefix reduction.
 662  */
 663 template<typename T, typename Op>
 664 void
 665 scan(const communicator& comm, const T& in_value, T& out_value, Op op);
 666
 667 /**
 668  * \overload
 669  */
 670 template<typename T, typename Op>
 671 T
 672 scan(const communicator& comm, const T& in_value, Op op);
 673
 674 /**
 675  * \overload
 676  */
 677 template<typename T, typename Op>
 678 void
 679 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
 680
 681 } } // end namespace boost::mpi
 682 #endif // BOOST_MPI_COLLECTIVES_HPP
 683
 684 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
 685 // Include implementations of each of the collectives
 686 #  include <boost/mpi/collectives/all_gather.hpp>
 687 #  include <boost/mpi/collectives/all_reduce.hpp>
 688 #  include <boost/mpi/collectives/all_to_all.hpp>
 689 #  include <boost/mpi/collectives/broadcast.hpp>
 690 #  include <boost/mpi/collectives/gather.hpp>
 691 #  include <boost/mpi/collectives/gatherv.hpp>
 692 #  include <boost/mpi/collectives/scatter.hpp>
 693 #  include <boost/mpi/collectives/scatterv.hpp>
 694 #  include <boost/mpi/collectives/reduce.hpp>
 695 #  include <boost/mpi/collectives/scan.hpp>
 696 #endif
 697