ceph/src/arrow/cpp/src/arrow/gpu/cuda_context.h

   1 // Licensed to the Apache Software Foundation (ASF) under one
   2 // or more contributor license agreements.  See the NOTICE file
   3 // distributed with this work for additional information
   4 // regarding copyright ownership.  The ASF licenses this file
   5 // to you under the Apache License, Version 2.0 (the
   6 // "License"); you may not use this file except in compliance
   7 // with the License.  You may obtain a copy of the License at
   8 //
   9 //   http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing,
  12 // software distributed under the License is distributed on an
  13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14 // KIND, either express or implied.  See the License for the
  15 // specific language governing permissions and limitations
  16 // under the License.
  17
  18 #pragma once
  19
  20 #include <cstdint>
  21 #include <memory>
  22 #include <string>
  23
  24 #include "arrow/device.h"
  25 #include "arrow/result.h"
  26 #include "arrow/util/visibility.h"
  27
  28 namespace arrow {
  29 namespace cuda {
  30
  31 // Forward declaration
  32 class CudaContext;
  33 class CudaDevice;
  34 class CudaDeviceManager;
  35 class CudaBuffer;
  36 class CudaHostBuffer;
  37 class CudaIpcMemHandle;
  38 class CudaMemoryManager;
  39
  40 // XXX Should CudaContext be merged into CudaMemoryManager?
  41
  42 class ARROW_EXPORT CudaDeviceManager {
  43  public:
  44   static Result<CudaDeviceManager*> Instance();
  45
  46   /// \brief Get a CudaDevice instance for a particular device
  47   /// \param[in] device_number the CUDA device number
  48   Result<std::shared_ptr<CudaDevice>> GetDevice(int device_number);
  49
  50   /// \brief Get the CUDA driver context for a particular device
  51   /// \param[in] device_number the CUDA device number
  52   /// \return cached context
  53   Result<std::shared_ptr<CudaContext>> GetContext(int device_number);
  54
  55   /// \brief Get the shared CUDA driver context for a particular device
  56   /// \param[in] device_number the CUDA device number
  57   /// \param[in] handle CUDA context handle created by another library
  58   /// \return shared context
  59   Result<std::shared_ptr<CudaContext>> GetSharedContext(int device_number, void* handle);
  60
  61   /// \brief Allocate host memory with fast access to given GPU device
  62   /// \param[in] device_number the CUDA device number
  63   /// \param[in] nbytes number of bytes
  64   /// \return Host buffer or Status
  65   Result<std::shared_ptr<CudaHostBuffer>> AllocateHost(int device_number, int64_t nbytes);
  66
  67   /// \brief Free host memory
  68   ///
  69   /// The given memory pointer must have been allocated with AllocateHost.
  70   Status FreeHost(void* data, int64_t nbytes);
  71
  72   int num_devices() const;
  73
  74  private:
  75   CudaDeviceManager();
  76   static std::unique_ptr<CudaDeviceManager> instance_;
  77
  78   class Impl;
  79   std::unique_ptr<Impl> impl_;
  80
  81   friend class CudaContext;
  82   friend class CudaDevice;
  83 };
  84
  85 /// \brief Device implementation for CUDA
  86 ///
  87 /// Each CudaDevice instance is tied to a particular CUDA device
  88 /// (identified by its logical device number).
  89 class ARROW_EXPORT CudaDevice : public Device {
  90  public:
  91   const char* type_name() const override;
  92   std::string ToString() const override;
  93   bool Equals(const Device&) const override;
  94   std::shared_ptr<MemoryManager> default_memory_manager() override;
  95
  96   /// \brief Return a CudaDevice instance for a particular device
  97   /// \param[in] device_number the CUDA device number
  98   static Result<std::shared_ptr<CudaDevice>> Make(int device_number);
  99
 100   /// \brief Return the device logical number
 101   int device_number() const;
 102
 103   /// \brief Return the GPU model name
 104   std::string device_name() const;
 105
 106   /// \brief Return total memory on this device
 107   int64_t total_memory() const;
 108
 109   /// \brief Return a raw CUDA device handle
 110   ///
 111   /// The returned value can be used to expose this device to other libraries.
 112   /// It should be interpreted as `CUdevice`.
 113   int handle() const;
 114
 115   /// \brief Get a CUDA driver context for this device
 116   ///
 117   /// The returned context is associated with the primary CUDA context for the
 118   /// device.  This is the recommended way of getting a context for a device,
 119   /// as it allows interoperating transparently with any library using the
 120   /// primary CUDA context API.
 121   Result<std::shared_ptr<CudaContext>> GetContext();
 122
 123   /// \brief Get a CUDA driver context for this device, using an existing handle
 124   ///
 125   /// The handle is not owned: it will not be released when the CudaContext
 126   /// is destroyed.  This function should only be used if you need interoperation
 127   /// with a library that uses a non-primary context.
 128   ///
 129   /// \param[in] handle CUDA context handle created by another library
 130   Result<std::shared_ptr<CudaContext>> GetSharedContext(void* handle);
 131
 132   /// \brief Allocate a host-residing, GPU-accessible buffer
 133   ///
 134   /// The buffer is allocated using this device's primary context.
 135   ///
 136   /// \param[in] size The buffer size in bytes
 137   Result<std::shared_ptr<CudaHostBuffer>> AllocateHostBuffer(int64_t size);
 138
 139  protected:
 140   struct Impl;
 141
 142   friend class CudaContext;
 143   /// \cond FALSE
 144   // (note: emits warning on Doxygen < 1.8.15)
 145   friend class CudaDeviceManager::Impl;
 146   /// \endcond
 147
 148   explicit CudaDevice(Impl);
 149   std::unique_ptr<Impl> impl_;
 150 };
 151
 152 /// \brief Return whether a device instance is a CudaDevice
 153 ARROW_EXPORT
 154 bool IsCudaDevice(const Device& device);
 155
 156 /// \brief Cast a device instance to a CudaDevice
 157 ///
 158 /// An error is returned if the device is not a CudaDevice.
 159 ARROW_EXPORT
 160 Result<std::shared_ptr<CudaDevice>> AsCudaDevice(const std::shared_ptr<Device>& device);
 161
 162 /// \brief MemoryManager implementation for CUDA
 163 class ARROW_EXPORT CudaMemoryManager : public MemoryManager {
 164  public:
 165   Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
 166       std::shared_ptr<Buffer> buf) override;
 167   Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
 168       std::shared_ptr<Buffer> buf) override;
 169
 170   Result<std::shared_ptr<Buffer>> AllocateBuffer(int64_t size) override;
 171
 172   /// \brief The CudaDevice instance tied to this MemoryManager
 173   ///
 174   /// This is a useful shorthand returning a concrete-typed pointer, avoiding
 175   /// having to cast the `device()` result.
 176   std::shared_ptr<CudaDevice> cuda_device() const;
 177
 178  protected:
 179   using MemoryManager::MemoryManager;
 180   static std::shared_ptr<CudaMemoryManager> Make(const std::shared_ptr<Device>& device);
 181
 182   Result<std::shared_ptr<Buffer>> CopyBufferFrom(
 183       const std::shared_ptr<Buffer>& buf,
 184       const std::shared_ptr<MemoryManager>& from) override;
 185   Result<std::shared_ptr<Buffer>> CopyBufferTo(
 186       const std::shared_ptr<Buffer>& buf,
 187       const std::shared_ptr<MemoryManager>& to) override;
 188   Result<std::shared_ptr<Buffer>> ViewBufferFrom(
 189       const std::shared_ptr<Buffer>& buf,
 190       const std::shared_ptr<MemoryManager>& from) override;
 191   Result<std::shared_ptr<Buffer>> ViewBufferTo(
 192       const std::shared_ptr<Buffer>& buf,
 193       const std::shared_ptr<MemoryManager>& to) override;
 194
 195   friend class CudaDevice;
 196 };
 197
 198 /// \brief Return whether a MemoryManager instance is a CudaMemoryManager
 199 ARROW_EXPORT
 200 bool IsCudaMemoryManager(const MemoryManager& mm);
 201
 202 /// \brief Cast a MemoryManager instance to a CudaMemoryManager
 203 ///
 204 /// An error is returned if the MemoryManager is not a CudaMemoryManager.
 205 ARROW_EXPORT
 206 Result<std::shared_ptr<CudaMemoryManager>> AsCudaMemoryManager(
 207     const std::shared_ptr<MemoryManager>& mm);
 208
 209 /// \class CudaContext
 210 /// \brief Object-oriented interface to the low-level CUDA driver API
 211 class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext> {
 212  public:
 213   ~CudaContext();
 214
 215   Status Close();
 216
 217   /// \brief Allocate CUDA memory on GPU device for this context
 218   /// \param[in] nbytes number of bytes
 219   /// \return the allocated buffer
 220   Result<std::shared_ptr<CudaBuffer>> Allocate(int64_t nbytes);
 221
 222   /// \brief Release CUDA memory on GPU device for this context
 223   /// \param[in] device_ptr the buffer address
 224   /// \param[in] nbytes number of bytes
 225   /// \return Status
 226   Status Free(void* device_ptr, int64_t nbytes);
 227
 228   /// \brief Create a view of CUDA memory on GPU device of this context
 229   /// \param[in] data the starting device address
 230   /// \param[in] nbytes number of bytes
 231   /// \return the view buffer
 232   ///
 233   /// \note The caller is responsible for allocating and freeing the
 234   /// memory as well as ensuring that the memory belongs to the CUDA
 235   /// context that this CudaContext instance holds.
 236   Result<std::shared_ptr<CudaBuffer>> View(uint8_t* data, int64_t nbytes);
 237
 238   /// \brief Open existing CUDA IPC memory handle
 239   /// \param[in] ipc_handle opaque pointer to CUipcMemHandle (driver API)
 240   /// \return a CudaBuffer referencing the IPC segment
 241   Result<std::shared_ptr<CudaBuffer>> OpenIpcBuffer(const CudaIpcMemHandle& ipc_handle);
 242
 243   /// \brief Close memory mapped with IPC buffer
 244   /// \param[in] buffer a CudaBuffer referencing
 245   /// \return Status
 246   Status CloseIpcBuffer(CudaBuffer* buffer);
 247
 248   /// \brief Block until the all device tasks are completed.
 249   Status Synchronize(void);
 250
 251   int64_t bytes_allocated() const;
 252
 253   /// \brief Expose CUDA context handle to other libraries
 254   void* handle() const;
 255
 256   /// \brief Return the default memory manager tied to this context's device
 257   std::shared_ptr<CudaMemoryManager> memory_manager() const;
 258
 259   /// \brief Return the device instance associated with this context
 260   std::shared_ptr<CudaDevice> device() const;
 261
 262   /// \brief Return the logical device number
 263   int device_number() const;
 264
 265   /// \brief Return the device address that is reachable from kernels
 266   /// running in the context
 267   /// \param[in] addr device or host memory address
 268   /// \return the device address
 269   ///
 270   /// The device address is defined as a memory address accessible by
 271   /// device. While it is often a device memory address, it can be
 272   /// also a host memory address, for instance, when the memory is
 273   /// allocated as host memory (using cudaMallocHost or cudaHostAlloc)
 274   /// or as managed memory (using cudaMallocManaged) or the host
 275   /// memory is page-locked (using cudaHostRegister).
 276   Result<uintptr_t> GetDeviceAddress(uint8_t* addr);
 277   Result<uintptr_t> GetDeviceAddress(uintptr_t addr);
 278
 279  private:
 280   CudaContext();
 281
 282   Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(const void* data,
 283                                                             int64_t size);
 284   Status CopyHostToDevice(void* dst, const void* src, int64_t nbytes);
 285   Status CopyHostToDevice(uintptr_t dst, const void* src, int64_t nbytes);
 286   Status CopyDeviceToHost(void* dst, const void* src, int64_t nbytes);
 287   Status CopyDeviceToHost(void* dst, uintptr_t src, int64_t nbytes);
 288   Status CopyDeviceToDevice(void* dst, const void* src, int64_t nbytes);
 289   Status CopyDeviceToDevice(uintptr_t dst, uintptr_t src, int64_t nbytes);
 290   Status CopyDeviceToAnotherDevice(const std::shared_ptr<CudaContext>& dst_ctx, void* dst,
 291                                    const void* src, int64_t nbytes);
 292   Status CopyDeviceToAnotherDevice(const std::shared_ptr<CudaContext>& dst_ctx,
 293                                    uintptr_t dst, uintptr_t src, int64_t nbytes);
 294
 295   class Impl;
 296   std::unique_ptr<Impl> impl_;
 297
 298   friend class CudaBuffer;
 299   friend class CudaBufferReader;
 300   friend class CudaBufferWriter;
 301   friend class CudaDevice;
 302   friend class CudaMemoryManager;
 303   /// \cond FALSE
 304   // (note: emits warning on Doxygen < 1.8.15)
 305   friend class CudaDeviceManager::Impl;
 306   /// \endcond
 307 };
 308
 309 }  // namespace cuda
 310 }  // namespace arrow