]> git.proxmox.com Git - ceph.git/blame - ceph/src/liburing/src/setup.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / liburing / src / setup.c
CommitLineData
f67539c2
TL
1/* SPDX-License-Identifier: MIT */
2#include <sys/types.h>
3#include <sys/stat.h>
4#include <sys/mman.h>
5#include <unistd.h>
6#include <errno.h>
7#include <string.h>
8#include <stdlib.h>
9
10#include "liburing/compat.h"
11#include "liburing/io_uring.h"
12#include "liburing.h"
13
14#include "syscall.h"
15
16static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq)
17{
18 munmap(sq->ring_ptr, sq->ring_sz);
19 if (cq->ring_ptr && cq->ring_ptr != sq->ring_ptr)
20 munmap(cq->ring_ptr, cq->ring_sz);
21}
22
23static int io_uring_mmap(int fd, struct io_uring_params *p,
24 struct io_uring_sq *sq, struct io_uring_cq *cq)
25{
26 size_t size;
27 int ret;
28
29 sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
30 cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
31
32 if (p->features & IORING_FEAT_SINGLE_MMAP) {
33 if (cq->ring_sz > sq->ring_sz)
34 sq->ring_sz = cq->ring_sz;
35 cq->ring_sz = sq->ring_sz;
36 }
37 sq->ring_ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
38 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
39 if (sq->ring_ptr == MAP_FAILED)
40 return -errno;
41
42 if (p->features & IORING_FEAT_SINGLE_MMAP) {
43 cq->ring_ptr = sq->ring_ptr;
44 } else {
45 cq->ring_ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
46 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
47 if (cq->ring_ptr == MAP_FAILED) {
48 cq->ring_ptr = NULL;
49 ret = -errno;
50 goto err;
51 }
52 }
53
54 sq->khead = sq->ring_ptr + p->sq_off.head;
55 sq->ktail = sq->ring_ptr + p->sq_off.tail;
56 sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask;
57 sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries;
58 sq->kflags = sq->ring_ptr + p->sq_off.flags;
59 sq->kdropped = sq->ring_ptr + p->sq_off.dropped;
60 sq->array = sq->ring_ptr + p->sq_off.array;
61
62 size = p->sq_entries * sizeof(struct io_uring_sqe);
63 sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
64 MAP_SHARED | MAP_POPULATE, fd,
65 IORING_OFF_SQES);
66 if (sq->sqes == MAP_FAILED) {
67 ret = -errno;
68err:
69 io_uring_unmap_rings(sq, cq);
70 return ret;
71 }
72
73 cq->khead = cq->ring_ptr + p->cq_off.head;
74 cq->ktail = cq->ring_ptr + p->cq_off.tail;
75 cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask;
76 cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries;
77 cq->koverflow = cq->ring_ptr + p->cq_off.overflow;
78 cq->cqes = cq->ring_ptr + p->cq_off.cqes;
79 if (p->cq_off.flags)
80 cq->kflags = cq->ring_ptr + p->cq_off.flags;
81 return 0;
82}
83
84/*
85 * For users that want to specify sq_thread_cpu or sq_thread_idle, this
86 * interface is a convenient helper for mmap()ing the rings.
87 * Returns -errno on error, or zero on success. On success, 'ring'
88 * contains the necessary information to read/write to the rings.
89 */
90int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring)
91{
92 int ret;
93
94 memset(ring, 0, sizeof(*ring));
95 ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq);
96 if (!ret) {
97 ring->flags = p->flags;
98 ring->ring_fd = fd;
99 }
100 return ret;
101}
102
103/*
104 * Ensure that the mmap'ed rings aren't available to a child after a fork(2).
105 * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges.
106 */
107int io_uring_ring_dontfork(struct io_uring *ring)
108{
109 size_t len;
110 int ret;
111
112 if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr)
113 return -EINVAL;
114
115 len = *ring->sq.kring_entries * sizeof(struct io_uring_sqe);
116 ret = madvise(ring->sq.sqes, len, MADV_DONTFORK);
117 if (ret == -1)
118 return -errno;
119
120 len = ring->sq.ring_sz;
121 ret = madvise(ring->sq.ring_ptr, len, MADV_DONTFORK);
122 if (ret == -1)
123 return -errno;
124
125 if (ring->cq.ring_ptr != ring->sq.ring_ptr) {
126 len = ring->cq.ring_sz;
127 ret = madvise(ring->cq.ring_ptr, len, MADV_DONTFORK);
128 if (ret == -1)
129 return -errno;
130 }
131
132 return 0;
133}
134
135int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
136 struct io_uring_params *p)
137{
138 int fd, ret;
139
140 fd = __sys_io_uring_setup(entries, p);
141 if (fd < 0)
142 return -errno;
143
144 ret = io_uring_queue_mmap(fd, p, ring);
145 if (ret)
146 close(fd);
147
148 return ret;
149}
150
151/*
152 * Returns -errno on error, or zero on success. On success, 'ring'
153 * contains the necessary information to read/write to the rings.
154 */
155int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
156{
157 struct io_uring_params p;
158
159 memset(&p, 0, sizeof(p));
160 p.flags = flags;
161
162 return io_uring_queue_init_params(entries, ring, &p);
163}
164
165void io_uring_queue_exit(struct io_uring *ring)
166{
167 struct io_uring_sq *sq = &ring->sq;
168 struct io_uring_cq *cq = &ring->cq;
169
170 munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
171 io_uring_unmap_rings(sq, cq);
172 close(ring->ring_fd);
173}
174
175struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring)
176{
177 struct io_uring_probe *probe;
178 int r;
179
180 size_t len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op);
181 probe = malloc(len);
182 memset(probe, 0, len);
183 r = io_uring_register_probe(ring, probe, 256);
184 if (r < 0)
185 goto fail;
186
187 return probe;
188fail:
189 free(probe);
190 return NULL;
191}
192
193struct io_uring_probe *io_uring_get_probe(void)
194{
195 struct io_uring ring;
196 struct io_uring_probe* probe = NULL;
197
198 int r = io_uring_queue_init(2, &ring, 0);
199 if (r < 0)
200 return NULL;
201
202 probe = io_uring_get_probe_ring(&ring);
203 io_uring_queue_exit(&ring);
204 return probe;
205}