]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | /* SPDX-License-Identifier: MIT */ |
2 | #include <sys/types.h> | |
3 | #include <sys/stat.h> | |
4 | #include <sys/mman.h> | |
5 | #include <unistd.h> | |
6 | #include <errno.h> | |
7 | #include <string.h> | |
8 | #include <stdlib.h> | |
9 | ||
10 | #include "liburing/compat.h" | |
11 | #include "liburing/io_uring.h" | |
12 | #include "liburing.h" | |
13 | ||
14 | #include "syscall.h" | |
15 | ||
16 | static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq) | |
17 | { | |
18 | munmap(sq->ring_ptr, sq->ring_sz); | |
19 | if (cq->ring_ptr && cq->ring_ptr != sq->ring_ptr) | |
20 | munmap(cq->ring_ptr, cq->ring_sz); | |
21 | } | |
22 | ||
23 | static int io_uring_mmap(int fd, struct io_uring_params *p, | |
24 | struct io_uring_sq *sq, struct io_uring_cq *cq) | |
25 | { | |
26 | size_t size; | |
27 | int ret; | |
28 | ||
29 | sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); | |
30 | cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); | |
31 | ||
32 | if (p->features & IORING_FEAT_SINGLE_MMAP) { | |
33 | if (cq->ring_sz > sq->ring_sz) | |
34 | sq->ring_sz = cq->ring_sz; | |
35 | cq->ring_sz = sq->ring_sz; | |
36 | } | |
37 | sq->ring_ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, | |
38 | MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); | |
39 | if (sq->ring_ptr == MAP_FAILED) | |
40 | return -errno; | |
41 | ||
42 | if (p->features & IORING_FEAT_SINGLE_MMAP) { | |
43 | cq->ring_ptr = sq->ring_ptr; | |
44 | } else { | |
45 | cq->ring_ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, | |
46 | MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); | |
47 | if (cq->ring_ptr == MAP_FAILED) { | |
48 | cq->ring_ptr = NULL; | |
49 | ret = -errno; | |
50 | goto err; | |
51 | } | |
52 | } | |
53 | ||
54 | sq->khead = sq->ring_ptr + p->sq_off.head; | |
55 | sq->ktail = sq->ring_ptr + p->sq_off.tail; | |
56 | sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask; | |
57 | sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries; | |
58 | sq->kflags = sq->ring_ptr + p->sq_off.flags; | |
59 | sq->kdropped = sq->ring_ptr + p->sq_off.dropped; | |
60 | sq->array = sq->ring_ptr + p->sq_off.array; | |
61 | ||
62 | size = p->sq_entries * sizeof(struct io_uring_sqe); | |
63 | sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, | |
64 | MAP_SHARED | MAP_POPULATE, fd, | |
65 | IORING_OFF_SQES); | |
66 | if (sq->sqes == MAP_FAILED) { | |
67 | ret = -errno; | |
68 | err: | |
69 | io_uring_unmap_rings(sq, cq); | |
70 | return ret; | |
71 | } | |
72 | ||
73 | cq->khead = cq->ring_ptr + p->cq_off.head; | |
74 | cq->ktail = cq->ring_ptr + p->cq_off.tail; | |
75 | cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask; | |
76 | cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries; | |
77 | cq->koverflow = cq->ring_ptr + p->cq_off.overflow; | |
78 | cq->cqes = cq->ring_ptr + p->cq_off.cqes; | |
79 | if (p->cq_off.flags) | |
80 | cq->kflags = cq->ring_ptr + p->cq_off.flags; | |
81 | return 0; | |
82 | } | |
83 | ||
84 | /* | |
85 | * For users that want to specify sq_thread_cpu or sq_thread_idle, this | |
86 | * interface is a convenient helper for mmap()ing the rings. | |
87 | * Returns -errno on error, or zero on success. On success, 'ring' | |
88 | * contains the necessary information to read/write to the rings. | |
89 | */ | |
90 | int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring) | |
91 | { | |
92 | int ret; | |
93 | ||
94 | memset(ring, 0, sizeof(*ring)); | |
95 | ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); | |
96 | if (!ret) { | |
97 | ring->flags = p->flags; | |
98 | ring->ring_fd = fd; | |
99 | } | |
100 | return ret; | |
101 | } | |
102 | ||
103 | /* | |
104 | * Ensure that the mmap'ed rings aren't available to a child after a fork(2). | |
105 | * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges. | |
106 | */ | |
107 | int io_uring_ring_dontfork(struct io_uring *ring) | |
108 | { | |
109 | size_t len; | |
110 | int ret; | |
111 | ||
112 | if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr) | |
113 | return -EINVAL; | |
114 | ||
115 | len = *ring->sq.kring_entries * sizeof(struct io_uring_sqe); | |
116 | ret = madvise(ring->sq.sqes, len, MADV_DONTFORK); | |
117 | if (ret == -1) | |
118 | return -errno; | |
119 | ||
120 | len = ring->sq.ring_sz; | |
121 | ret = madvise(ring->sq.ring_ptr, len, MADV_DONTFORK); | |
122 | if (ret == -1) | |
123 | return -errno; | |
124 | ||
125 | if (ring->cq.ring_ptr != ring->sq.ring_ptr) { | |
126 | len = ring->cq.ring_sz; | |
127 | ret = madvise(ring->cq.ring_ptr, len, MADV_DONTFORK); | |
128 | if (ret == -1) | |
129 | return -errno; | |
130 | } | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
135 | int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, | |
136 | struct io_uring_params *p) | |
137 | { | |
138 | int fd, ret; | |
139 | ||
140 | fd = __sys_io_uring_setup(entries, p); | |
141 | if (fd < 0) | |
142 | return -errno; | |
143 | ||
144 | ret = io_uring_queue_mmap(fd, p, ring); | |
145 | if (ret) | |
146 | close(fd); | |
147 | ||
148 | return ret; | |
149 | } | |
150 | ||
151 | /* | |
152 | * Returns -errno on error, or zero on success. On success, 'ring' | |
153 | * contains the necessary information to read/write to the rings. | |
154 | */ | |
155 | int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) | |
156 | { | |
157 | struct io_uring_params p; | |
158 | ||
159 | memset(&p, 0, sizeof(p)); | |
160 | p.flags = flags; | |
161 | ||
162 | return io_uring_queue_init_params(entries, ring, &p); | |
163 | } | |
164 | ||
165 | void io_uring_queue_exit(struct io_uring *ring) | |
166 | { | |
167 | struct io_uring_sq *sq = &ring->sq; | |
168 | struct io_uring_cq *cq = &ring->cq; | |
169 | ||
170 | munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); | |
171 | io_uring_unmap_rings(sq, cq); | |
172 | close(ring->ring_fd); | |
173 | } | |
174 | ||
175 | struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring) | |
176 | { | |
177 | struct io_uring_probe *probe; | |
178 | int r; | |
179 | ||
180 | size_t len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op); | |
181 | probe = malloc(len); | |
182 | memset(probe, 0, len); | |
183 | r = io_uring_register_probe(ring, probe, 256); | |
184 | if (r < 0) | |
185 | goto fail; | |
186 | ||
187 | return probe; | |
188 | fail: | |
189 | free(probe); | |
190 | return NULL; | |
191 | } | |
192 | ||
193 | struct io_uring_probe *io_uring_get_probe(void) | |
194 | { | |
195 | struct io_uring ring; | |
196 | struct io_uring_probe* probe = NULL; | |
197 | ||
198 | int r = io_uring_queue_init(2, &ring, 0); | |
199 | if (r < 0) | |
200 | return NULL; | |
201 | ||
202 | probe = io_uring_get_probe_ring(&ring); | |
203 | io_uring_queue_exit(&ring); | |
204 | return probe; | |
205 | } |