1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
24 #include "arrow/device.h"
25 #include "arrow/result.h"
26 #include "arrow/util/visibility.h"
31 // Forward declaration
34 class CudaDeviceManager
;
37 class CudaIpcMemHandle
;
38 class CudaMemoryManager
;
40 // XXX Should CudaContext be merged into CudaMemoryManager?
42 class ARROW_EXPORT CudaDeviceManager
{
44 static Result
<CudaDeviceManager
*> Instance();
46 /// \brief Get a CudaDevice instance for a particular device
47 /// \param[in] device_number the CUDA device number
48 Result
<std::shared_ptr
<CudaDevice
>> GetDevice(int device_number
);
50 /// \brief Get the CUDA driver context for a particular device
51 /// \param[in] device_number the CUDA device number
52 /// \return cached context
53 Result
<std::shared_ptr
<CudaContext
>> GetContext(int device_number
);
55 /// \brief Get the shared CUDA driver context for a particular device
56 /// \param[in] device_number the CUDA device number
57 /// \param[in] handle CUDA context handle created by another library
58 /// \return shared context
59 Result
<std::shared_ptr
<CudaContext
>> GetSharedContext(int device_number
, void* handle
);
61 /// \brief Allocate host memory with fast access to given GPU device
62 /// \param[in] device_number the CUDA device number
63 /// \param[in] nbytes number of bytes
64 /// \return Host buffer or Status
65 Result
<std::shared_ptr
<CudaHostBuffer
>> AllocateHost(int device_number
, int64_t nbytes
);
67 /// \brief Free host memory
69 /// The given memory pointer must have been allocated with AllocateHost.
70 Status
FreeHost(void* data
, int64_t nbytes
);
72 int num_devices() const;
76 static std::unique_ptr
<CudaDeviceManager
> instance_
;
79 std::unique_ptr
<Impl
> impl_
;
81 friend class CudaContext
;
82 friend class CudaDevice
;
85 /// \brief Device implementation for CUDA
87 /// Each CudaDevice instance is tied to a particular CUDA device
88 /// (identified by its logical device number).
89 class ARROW_EXPORT CudaDevice
: public Device
{
91 const char* type_name() const override
;
92 std::string
ToString() const override
;
93 bool Equals(const Device
&) const override
;
94 std::shared_ptr
<MemoryManager
> default_memory_manager() override
;
96 /// \brief Return a CudaDevice instance for a particular device
97 /// \param[in] device_number the CUDA device number
98 static Result
<std::shared_ptr
<CudaDevice
>> Make(int device_number
);
100 /// \brief Return the device logical number
101 int device_number() const;
103 /// \brief Return the GPU model name
104 std::string
device_name() const;
106 /// \brief Return total memory on this device
107 int64_t total_memory() const;
109 /// \brief Return a raw CUDA device handle
111 /// The returned value can be used to expose this device to other libraries.
112 /// It should be interpreted as `CUdevice`.
115 /// \brief Get a CUDA driver context for this device
117 /// The returned context is associated with the primary CUDA context for the
118 /// device. This is the recommended way of getting a context for a device,
119 /// as it allows interoperating transparently with any library using the
120 /// primary CUDA context API.
121 Result
<std::shared_ptr
<CudaContext
>> GetContext();
123 /// \brief Get a CUDA driver context for this device, using an existing handle
125 /// The handle is not owned: it will not be released when the CudaContext
126 /// is destroyed. This function should only be used if you need interoperation
127 /// with a library that uses a non-primary context.
129 /// \param[in] handle CUDA context handle created by another library
130 Result
<std::shared_ptr
<CudaContext
>> GetSharedContext(void* handle
);
132 /// \brief Allocate a host-residing, GPU-accessible buffer
134 /// The buffer is allocated using this device's primary context.
136 /// \param[in] size The buffer size in bytes
137 Result
<std::shared_ptr
<CudaHostBuffer
>> AllocateHostBuffer(int64_t size
);
142 friend class CudaContext
;
144 // (note: emits warning on Doxygen < 1.8.15)
145 friend class CudaDeviceManager::Impl
;
148 explicit CudaDevice(Impl
);
149 std::unique_ptr
<Impl
> impl_
;
152 /// \brief Return whether a device instance is a CudaDevice
154 bool IsCudaDevice(const Device
& device
);
156 /// \brief Cast a device instance to a CudaDevice
158 /// An error is returned if the device is not a CudaDevice.
160 Result
<std::shared_ptr
<CudaDevice
>> AsCudaDevice(const std::shared_ptr
<Device
>& device
);
162 /// \brief MemoryManager implementation for CUDA
163 class ARROW_EXPORT CudaMemoryManager
: public MemoryManager
{
165 Result
<std::shared_ptr
<io::RandomAccessFile
>> GetBufferReader(
166 std::shared_ptr
<Buffer
> buf
) override
;
167 Result
<std::shared_ptr
<io::OutputStream
>> GetBufferWriter(
168 std::shared_ptr
<Buffer
> buf
) override
;
170 Result
<std::shared_ptr
<Buffer
>> AllocateBuffer(int64_t size
) override
;
172 /// \brief The CudaDevice instance tied to this MemoryManager
174 /// This is a useful shorthand returning a concrete-typed pointer, avoiding
175 /// having to cast the `device()` result.
176 std::shared_ptr
<CudaDevice
> cuda_device() const;
179 using MemoryManager::MemoryManager
;
180 static std::shared_ptr
<CudaMemoryManager
> Make(const std::shared_ptr
<Device
>& device
);
182 Result
<std::shared_ptr
<Buffer
>> CopyBufferFrom(
183 const std::shared_ptr
<Buffer
>& buf
,
184 const std::shared_ptr
<MemoryManager
>& from
) override
;
185 Result
<std::shared_ptr
<Buffer
>> CopyBufferTo(
186 const std::shared_ptr
<Buffer
>& buf
,
187 const std::shared_ptr
<MemoryManager
>& to
) override
;
188 Result
<std::shared_ptr
<Buffer
>> ViewBufferFrom(
189 const std::shared_ptr
<Buffer
>& buf
,
190 const std::shared_ptr
<MemoryManager
>& from
) override
;
191 Result
<std::shared_ptr
<Buffer
>> ViewBufferTo(
192 const std::shared_ptr
<Buffer
>& buf
,
193 const std::shared_ptr
<MemoryManager
>& to
) override
;
195 friend class CudaDevice
;
198 /// \brief Return whether a MemoryManager instance is a CudaMemoryManager
200 bool IsCudaMemoryManager(const MemoryManager
& mm
);
202 /// \brief Cast a MemoryManager instance to a CudaMemoryManager
204 /// An error is returned if the MemoryManager is not a CudaMemoryManager.
206 Result
<std::shared_ptr
<CudaMemoryManager
>> AsCudaMemoryManager(
207 const std::shared_ptr
<MemoryManager
>& mm
);
209 /// \class CudaContext
210 /// \brief Object-oriented interface to the low-level CUDA driver API
211 class ARROW_EXPORT CudaContext
: public std::enable_shared_from_this
<CudaContext
> {
217 /// \brief Allocate CUDA memory on GPU device for this context
218 /// \param[in] nbytes number of bytes
219 /// \return the allocated buffer
220 Result
<std::shared_ptr
<CudaBuffer
>> Allocate(int64_t nbytes
);
222 /// \brief Release CUDA memory on GPU device for this context
223 /// \param[in] device_ptr the buffer address
224 /// \param[in] nbytes number of bytes
226 Status
Free(void* device_ptr
, int64_t nbytes
);
228 /// \brief Create a view of CUDA memory on GPU device of this context
229 /// \param[in] data the starting device address
230 /// \param[in] nbytes number of bytes
231 /// \return the view buffer
233 /// \note The caller is responsible for allocating and freeing the
234 /// memory as well as ensuring that the memory belongs to the CUDA
235 /// context that this CudaContext instance holds.
236 Result
<std::shared_ptr
<CudaBuffer
>> View(uint8_t* data
, int64_t nbytes
);
238 /// \brief Open existing CUDA IPC memory handle
239 /// \param[in] ipc_handle opaque pointer to CUipcMemHandle (driver API)
240 /// \return a CudaBuffer referencing the IPC segment
241 Result
<std::shared_ptr
<CudaBuffer
>> OpenIpcBuffer(const CudaIpcMemHandle
& ipc_handle
);
243 /// \brief Close memory mapped with IPC buffer
244 /// \param[in] buffer a CudaBuffer referencing
246 Status
CloseIpcBuffer(CudaBuffer
* buffer
);
248 /// \brief Block until the all device tasks are completed.
249 Status
Synchronize(void);
251 int64_t bytes_allocated() const;
253 /// \brief Expose CUDA context handle to other libraries
254 void* handle() const;
256 /// \brief Return the default memory manager tied to this context's device
257 std::shared_ptr
<CudaMemoryManager
> memory_manager() const;
259 /// \brief Return the device instance associated with this context
260 std::shared_ptr
<CudaDevice
> device() const;
262 /// \brief Return the logical device number
263 int device_number() const;
265 /// \brief Return the device address that is reachable from kernels
266 /// running in the context
267 /// \param[in] addr device or host memory address
268 /// \return the device address
270 /// The device address is defined as a memory address accessible by
271 /// device. While it is often a device memory address, it can be
272 /// also a host memory address, for instance, when the memory is
273 /// allocated as host memory (using cudaMallocHost or cudaHostAlloc)
274 /// or as managed memory (using cudaMallocManaged) or the host
275 /// memory is page-locked (using cudaHostRegister).
276 Result
<uintptr_t> GetDeviceAddress(uint8_t* addr
);
277 Result
<uintptr_t> GetDeviceAddress(uintptr_t addr
);
282 Result
<std::shared_ptr
<CudaIpcMemHandle
>> ExportIpcBuffer(const void* data
,
284 Status
CopyHostToDevice(void* dst
, const void* src
, int64_t nbytes
);
285 Status
CopyHostToDevice(uintptr_t dst
, const void* src
, int64_t nbytes
);
286 Status
CopyDeviceToHost(void* dst
, const void* src
, int64_t nbytes
);
287 Status
CopyDeviceToHost(void* dst
, uintptr_t src
, int64_t nbytes
);
288 Status
CopyDeviceToDevice(void* dst
, const void* src
, int64_t nbytes
);
289 Status
CopyDeviceToDevice(uintptr_t dst
, uintptr_t src
, int64_t nbytes
);
290 Status
CopyDeviceToAnotherDevice(const std::shared_ptr
<CudaContext
>& dst_ctx
, void* dst
,
291 const void* src
, int64_t nbytes
);
292 Status
CopyDeviceToAnotherDevice(const std::shared_ptr
<CudaContext
>& dst_ctx
,
293 uintptr_t dst
, uintptr_t src
, int64_t nbytes
);
296 std::unique_ptr
<Impl
> impl_
;
298 friend class CudaBuffer
;
299 friend class CudaBufferReader
;
300 friend class CudaBufferWriter
;
301 friend class CudaDevice
;
302 friend class CudaMemoryManager
;
304 // (note: emits warning on Doxygen < 1.8.15)
305 friend class CudaDeviceManager::Impl
;