]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/gpu/cuda_context.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / gpu / cuda_context.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23
24 #include "arrow/device.h"
25 #include "arrow/result.h"
26 #include "arrow/util/visibility.h"
27
28 namespace arrow {
29 namespace cuda {
30
31 // Forward declaration
32 class CudaContext;
33 class CudaDevice;
34 class CudaDeviceManager;
35 class CudaBuffer;
36 class CudaHostBuffer;
37 class CudaIpcMemHandle;
38 class CudaMemoryManager;
39
40 // XXX Should CudaContext be merged into CudaMemoryManager?
41
42 class ARROW_EXPORT CudaDeviceManager {
43 public:
44 static Result<CudaDeviceManager*> Instance();
45
46 /// \brief Get a CudaDevice instance for a particular device
47 /// \param[in] device_number the CUDA device number
48 Result<std::shared_ptr<CudaDevice>> GetDevice(int device_number);
49
50 /// \brief Get the CUDA driver context for a particular device
51 /// \param[in] device_number the CUDA device number
52 /// \return cached context
53 Result<std::shared_ptr<CudaContext>> GetContext(int device_number);
54
55 /// \brief Get the shared CUDA driver context for a particular device
56 /// \param[in] device_number the CUDA device number
57 /// \param[in] handle CUDA context handle created by another library
58 /// \return shared context
59 Result<std::shared_ptr<CudaContext>> GetSharedContext(int device_number, void* handle);
60
61 /// \brief Allocate host memory with fast access to given GPU device
62 /// \param[in] device_number the CUDA device number
63 /// \param[in] nbytes number of bytes
64 /// \return Host buffer or Status
65 Result<std::shared_ptr<CudaHostBuffer>> AllocateHost(int device_number, int64_t nbytes);
66
67 /// \brief Free host memory
68 ///
69 /// The given memory pointer must have been allocated with AllocateHost.
70 Status FreeHost(void* data, int64_t nbytes);
71
72 int num_devices() const;
73
74 private:
75 CudaDeviceManager();
76 static std::unique_ptr<CudaDeviceManager> instance_;
77
78 class Impl;
79 std::unique_ptr<Impl> impl_;
80
81 friend class CudaContext;
82 friend class CudaDevice;
83 };
84
85 /// \brief Device implementation for CUDA
86 ///
87 /// Each CudaDevice instance is tied to a particular CUDA device
88 /// (identified by its logical device number).
89 class ARROW_EXPORT CudaDevice : public Device {
90 public:
91 const char* type_name() const override;
92 std::string ToString() const override;
93 bool Equals(const Device&) const override;
94 std::shared_ptr<MemoryManager> default_memory_manager() override;
95
96 /// \brief Return a CudaDevice instance for a particular device
97 /// \param[in] device_number the CUDA device number
98 static Result<std::shared_ptr<CudaDevice>> Make(int device_number);
99
100 /// \brief Return the device logical number
101 int device_number() const;
102
103 /// \brief Return the GPU model name
104 std::string device_name() const;
105
106 /// \brief Return total memory on this device
107 int64_t total_memory() const;
108
109 /// \brief Return a raw CUDA device handle
110 ///
111 /// The returned value can be used to expose this device to other libraries.
112 /// It should be interpreted as `CUdevice`.
113 int handle() const;
114
115 /// \brief Get a CUDA driver context for this device
116 ///
117 /// The returned context is associated with the primary CUDA context for the
118 /// device. This is the recommended way of getting a context for a device,
119 /// as it allows interoperating transparently with any library using the
120 /// primary CUDA context API.
121 Result<std::shared_ptr<CudaContext>> GetContext();
122
123 /// \brief Get a CUDA driver context for this device, using an existing handle
124 ///
125 /// The handle is not owned: it will not be released when the CudaContext
126 /// is destroyed. This function should only be used if you need interoperation
127 /// with a library that uses a non-primary context.
128 ///
129 /// \param[in] handle CUDA context handle created by another library
130 Result<std::shared_ptr<CudaContext>> GetSharedContext(void* handle);
131
132 /// \brief Allocate a host-residing, GPU-accessible buffer
133 ///
134 /// The buffer is allocated using this device's primary context.
135 ///
136 /// \param[in] size The buffer size in bytes
137 Result<std::shared_ptr<CudaHostBuffer>> AllocateHostBuffer(int64_t size);
138
139 protected:
140 struct Impl;
141
142 friend class CudaContext;
143 /// \cond FALSE
144 // (note: emits warning on Doxygen < 1.8.15)
145 friend class CudaDeviceManager::Impl;
146 /// \endcond
147
148 explicit CudaDevice(Impl);
149 std::unique_ptr<Impl> impl_;
150 };
151
152 /// \brief Return whether a device instance is a CudaDevice
153 ARROW_EXPORT
154 bool IsCudaDevice(const Device& device);
155
156 /// \brief Cast a device instance to a CudaDevice
157 ///
158 /// An error is returned if the device is not a CudaDevice.
159 ARROW_EXPORT
160 Result<std::shared_ptr<CudaDevice>> AsCudaDevice(const std::shared_ptr<Device>& device);
161
162 /// \brief MemoryManager implementation for CUDA
163 class ARROW_EXPORT CudaMemoryManager : public MemoryManager {
164 public:
165 Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
166 std::shared_ptr<Buffer> buf) override;
167 Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
168 std::shared_ptr<Buffer> buf) override;
169
170 Result<std::shared_ptr<Buffer>> AllocateBuffer(int64_t size) override;
171
172 /// \brief The CudaDevice instance tied to this MemoryManager
173 ///
174 /// This is a useful shorthand returning a concrete-typed pointer, avoiding
175 /// having to cast the `device()` result.
176 std::shared_ptr<CudaDevice> cuda_device() const;
177
178 protected:
179 using MemoryManager::MemoryManager;
180 static std::shared_ptr<CudaMemoryManager> Make(const std::shared_ptr<Device>& device);
181
182 Result<std::shared_ptr<Buffer>> CopyBufferFrom(
183 const std::shared_ptr<Buffer>& buf,
184 const std::shared_ptr<MemoryManager>& from) override;
185 Result<std::shared_ptr<Buffer>> CopyBufferTo(
186 const std::shared_ptr<Buffer>& buf,
187 const std::shared_ptr<MemoryManager>& to) override;
188 Result<std::shared_ptr<Buffer>> ViewBufferFrom(
189 const std::shared_ptr<Buffer>& buf,
190 const std::shared_ptr<MemoryManager>& from) override;
191 Result<std::shared_ptr<Buffer>> ViewBufferTo(
192 const std::shared_ptr<Buffer>& buf,
193 const std::shared_ptr<MemoryManager>& to) override;
194
195 friend class CudaDevice;
196 };
197
198 /// \brief Return whether a MemoryManager instance is a CudaMemoryManager
199 ARROW_EXPORT
200 bool IsCudaMemoryManager(const MemoryManager& mm);
201
202 /// \brief Cast a MemoryManager instance to a CudaMemoryManager
203 ///
204 /// An error is returned if the MemoryManager is not a CudaMemoryManager.
205 ARROW_EXPORT
206 Result<std::shared_ptr<CudaMemoryManager>> AsCudaMemoryManager(
207 const std::shared_ptr<MemoryManager>& mm);
208
209 /// \class CudaContext
210 /// \brief Object-oriented interface to the low-level CUDA driver API
211 class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext> {
212 public:
213 ~CudaContext();
214
215 Status Close();
216
217 /// \brief Allocate CUDA memory on GPU device for this context
218 /// \param[in] nbytes number of bytes
219 /// \return the allocated buffer
220 Result<std::shared_ptr<CudaBuffer>> Allocate(int64_t nbytes);
221
222 /// \brief Release CUDA memory on GPU device for this context
223 /// \param[in] device_ptr the buffer address
224 /// \param[in] nbytes number of bytes
225 /// \return Status
226 Status Free(void* device_ptr, int64_t nbytes);
227
228 /// \brief Create a view of CUDA memory on GPU device of this context
229 /// \param[in] data the starting device address
230 /// \param[in] nbytes number of bytes
231 /// \return the view buffer
232 ///
233 /// \note The caller is responsible for allocating and freeing the
234 /// memory as well as ensuring that the memory belongs to the CUDA
235 /// context that this CudaContext instance holds.
236 Result<std::shared_ptr<CudaBuffer>> View(uint8_t* data, int64_t nbytes);
237
238 /// \brief Open existing CUDA IPC memory handle
239 /// \param[in] ipc_handle opaque pointer to CUipcMemHandle (driver API)
240 /// \return a CudaBuffer referencing the IPC segment
241 Result<std::shared_ptr<CudaBuffer>> OpenIpcBuffer(const CudaIpcMemHandle& ipc_handle);
242
243 /// \brief Close memory mapped with IPC buffer
244 /// \param[in] buffer a CudaBuffer referencing
245 /// \return Status
246 Status CloseIpcBuffer(CudaBuffer* buffer);
247
248 /// \brief Block until the all device tasks are completed.
249 Status Synchronize(void);
250
251 int64_t bytes_allocated() const;
252
253 /// \brief Expose CUDA context handle to other libraries
254 void* handle() const;
255
256 /// \brief Return the default memory manager tied to this context's device
257 std::shared_ptr<CudaMemoryManager> memory_manager() const;
258
259 /// \brief Return the device instance associated with this context
260 std::shared_ptr<CudaDevice> device() const;
261
262 /// \brief Return the logical device number
263 int device_number() const;
264
265 /// \brief Return the device address that is reachable from kernels
266 /// running in the context
267 /// \param[in] addr device or host memory address
268 /// \return the device address
269 ///
270 /// The device address is defined as a memory address accessible by
271 /// device. While it is often a device memory address, it can be
272 /// also a host memory address, for instance, when the memory is
273 /// allocated as host memory (using cudaMallocHost or cudaHostAlloc)
274 /// or as managed memory (using cudaMallocManaged) or the host
275 /// memory is page-locked (using cudaHostRegister).
276 Result<uintptr_t> GetDeviceAddress(uint8_t* addr);
277 Result<uintptr_t> GetDeviceAddress(uintptr_t addr);
278
279 private:
280 CudaContext();
281
282 Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(const void* data,
283 int64_t size);
284 Status CopyHostToDevice(void* dst, const void* src, int64_t nbytes);
285 Status CopyHostToDevice(uintptr_t dst, const void* src, int64_t nbytes);
286 Status CopyDeviceToHost(void* dst, const void* src, int64_t nbytes);
287 Status CopyDeviceToHost(void* dst, uintptr_t src, int64_t nbytes);
288 Status CopyDeviceToDevice(void* dst, const void* src, int64_t nbytes);
289 Status CopyDeviceToDevice(uintptr_t dst, uintptr_t src, int64_t nbytes);
290 Status CopyDeviceToAnotherDevice(const std::shared_ptr<CudaContext>& dst_ctx, void* dst,
291 const void* src, int64_t nbytes);
292 Status CopyDeviceToAnotherDevice(const std::shared_ptr<CudaContext>& dst_ctx,
293 uintptr_t dst, uintptr_t src, int64_t nbytes);
294
295 class Impl;
296 std::unique_ptr<Impl> impl_;
297
298 friend class CudaBuffer;
299 friend class CudaBufferReader;
300 friend class CudaBufferWriter;
301 friend class CudaDevice;
302 friend class CudaMemoryManager;
303 /// \cond FALSE
304 // (note: emits warning on Doxygen < 1.8.15)
305 friend class CudaDeviceManager::Impl;
306 /// \endcond
307 };
308
309 } // namespace cuda
310 } // namespace arrow