]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/lib/librte_vhost/fd_man.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / lib / librte_vhost / fd_man.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <sys/socket.h>
9 #include <sys/time.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12 #include <string.h>
13
14 #include <rte_common.h>
15 #include <rte_log.h>
16
17 #include "fd_man.h"
18
19
20 #define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
21
22 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
23
24 static int
25 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
26 {
27 int i;
28
29 for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
30 ;
31
32 return i;
33 }
34
35 static void
36 fdset_move(struct fdset *pfdset, int dst, int src)
37 {
38 pfdset->fd[dst] = pfdset->fd[src];
39 pfdset->rwfds[dst] = pfdset->rwfds[src];
40 }
41
42 static void
43 fdset_shrink_nolock(struct fdset *pfdset)
44 {
45 int i;
46 int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
47
48 for (i = 0; i < last_valid_idx; i++) {
49 if (pfdset->fd[i].fd != -1)
50 continue;
51
52 fdset_move(pfdset, i, last_valid_idx);
53 last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
54 }
55 pfdset->num = last_valid_idx + 1;
56 }
57
58 /*
59 * Find deleted fd entries and remove them
60 */
61 static void
62 fdset_shrink(struct fdset *pfdset)
63 {
64 pthread_mutex_lock(&pfdset->fd_mutex);
65 fdset_shrink_nolock(pfdset);
66 pthread_mutex_unlock(&pfdset->fd_mutex);
67 }
68
69 /**
70 * Returns the index in the fdset for a given fd.
71 * @return
72 * index for the fd, or -1 if fd isn't in the fdset.
73 */
74 static int
75 fdset_find_fd(struct fdset *pfdset, int fd)
76 {
77 int i;
78
79 for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
80 ;
81
82 return i == pfdset->num ? -1 : i;
83 }
84
85 static void
86 fdset_add_fd(struct fdset *pfdset, int idx, int fd,
87 fd_cb rcb, fd_cb wcb, void *dat)
88 {
89 struct fdentry *pfdentry = &pfdset->fd[idx];
90 struct pollfd *pfd = &pfdset->rwfds[idx];
91
92 pfdentry->fd = fd;
93 pfdentry->rcb = rcb;
94 pfdentry->wcb = wcb;
95 pfdentry->dat = dat;
96
97 pfd->fd = fd;
98 pfd->events = rcb ? POLLIN : 0;
99 pfd->events |= wcb ? POLLOUT : 0;
100 pfd->revents = 0;
101 }
102
103 void
104 fdset_init(struct fdset *pfdset)
105 {
106 int i;
107
108 if (pfdset == NULL)
109 return;
110
111 for (i = 0; i < MAX_FDS; i++) {
112 pfdset->fd[i].fd = -1;
113 pfdset->fd[i].dat = NULL;
114 }
115 pfdset->num = 0;
116 }
117
118 /**
119 * Register the fd in the fdset with read/write handler and context.
120 */
121 int
122 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
123 {
124 int i;
125
126 if (pfdset == NULL || fd == -1)
127 return -1;
128
129 pthread_mutex_lock(&pfdset->fd_mutex);
130 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
131 if (i == -1) {
132 pthread_mutex_lock(&pfdset->fd_pooling_mutex);
133 fdset_shrink_nolock(pfdset);
134 pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
135 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
136 if (i == -1) {
137 pthread_mutex_unlock(&pfdset->fd_mutex);
138 return -2;
139 }
140 }
141
142 fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
143 pthread_mutex_unlock(&pfdset->fd_mutex);
144
145 return 0;
146 }
147
148 /**
149 * Unregister the fd from the fdset.
150 * Returns context of a given fd or NULL.
151 */
152 void *
153 fdset_del(struct fdset *pfdset, int fd)
154 {
155 int i;
156 void *dat = NULL;
157
158 if (pfdset == NULL || fd == -1)
159 return NULL;
160
161 do {
162 pthread_mutex_lock(&pfdset->fd_mutex);
163
164 i = fdset_find_fd(pfdset, fd);
165 if (i != -1 && pfdset->fd[i].busy == 0) {
166 /* busy indicates r/wcb is executing! */
167 dat = pfdset->fd[i].dat;
168 pfdset->fd[i].fd = -1;
169 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
170 pfdset->fd[i].dat = NULL;
171 i = -1;
172 }
173 pthread_mutex_unlock(&pfdset->fd_mutex);
174 } while (i != -1);
175
176 return dat;
177 }
178
179 /**
180 * Unregister the fd from the fdset.
181 *
182 * If parameters are invalid, return directly -2.
183 * And check whether fd is busy, if yes, return -1.
184 * Otherwise, try to delete the fd from fdset and
185 * return true.
186 */
187 int
188 fdset_try_del(struct fdset *pfdset, int fd)
189 {
190 int i;
191
192 if (pfdset == NULL || fd == -1)
193 return -2;
194
195 pthread_mutex_lock(&pfdset->fd_mutex);
196 i = fdset_find_fd(pfdset, fd);
197 if (i != -1 && pfdset->fd[i].busy) {
198 pthread_mutex_unlock(&pfdset->fd_mutex);
199 return -1;
200 }
201
202 if (i != -1) {
203 pfdset->fd[i].fd = -1;
204 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
205 pfdset->fd[i].dat = NULL;
206 }
207
208 pthread_mutex_unlock(&pfdset->fd_mutex);
209 return 0;
210 }
211
212 /**
213 * This functions runs in infinite blocking loop until there is no fd in
214 * pfdset. It calls corresponding r/w handler if there is event on the fd.
215 *
216 * Before the callback is called, we set the flag to busy status; If other
217 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
218 * will wait until the flag is reset to zero(which indicates the callback is
219 * finished), then it could free the context after fdset_del.
220 */
221 void *
222 fdset_event_dispatch(void *arg)
223 {
224 int i;
225 struct pollfd *pfd;
226 struct fdentry *pfdentry;
227 fd_cb rcb, wcb;
228 void *dat;
229 int fd, numfds;
230 int remove1, remove2;
231 int need_shrink;
232 struct fdset *pfdset = arg;
233 int val;
234
235 if (pfdset == NULL)
236 return NULL;
237
238 while (1) {
239
240 /*
241 * When poll is blocked, other threads might unregister
242 * listenfds from and register new listenfds into fdset.
243 * When poll returns, the entries for listenfds in the fdset
244 * might have been updated. It is ok if there is unwanted call
245 * for new listenfds.
246 */
247 pthread_mutex_lock(&pfdset->fd_mutex);
248 numfds = pfdset->num;
249 pthread_mutex_unlock(&pfdset->fd_mutex);
250
251 pthread_mutex_lock(&pfdset->fd_pooling_mutex);
252 val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
253 pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
254 if (val < 0)
255 continue;
256
257 need_shrink = 0;
258 for (i = 0; i < numfds; i++) {
259 pthread_mutex_lock(&pfdset->fd_mutex);
260
261 pfdentry = &pfdset->fd[i];
262 fd = pfdentry->fd;
263 pfd = &pfdset->rwfds[i];
264
265 if (fd < 0) {
266 need_shrink = 1;
267 pthread_mutex_unlock(&pfdset->fd_mutex);
268 continue;
269 }
270
271 if (!pfd->revents) {
272 pthread_mutex_unlock(&pfdset->fd_mutex);
273 continue;
274 }
275
276 remove1 = remove2 = 0;
277
278 rcb = pfdentry->rcb;
279 wcb = pfdentry->wcb;
280 dat = pfdentry->dat;
281 pfdentry->busy = 1;
282
283 pthread_mutex_unlock(&pfdset->fd_mutex);
284
285 if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
286 rcb(fd, dat, &remove1);
287 if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
288 wcb(fd, dat, &remove2);
289 pfdentry->busy = 0;
290 /*
291 * fdset_del needs to check busy flag.
292 * We don't allow fdset_del to be called in callback
293 * directly.
294 */
295 /*
296 * When we are to clean up the fd from fdset,
297 * because the fd is closed in the cb,
298 * the old fd val could be reused by when creates new
299 * listen fd in another thread, we couldn't call
300 * fdset_del.
301 */
302 if (remove1 || remove2) {
303 pfdentry->fd = -1;
304 need_shrink = 1;
305 }
306 }
307
308 if (need_shrink)
309 fdset_shrink(pfdset);
310 }
311
312 return NULL;
313 }
314
315 static void
316 fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
317 int *remove __rte_unused)
318 {
319 char charbuf[16];
320 int r = read(readfd, charbuf, sizeof(charbuf));
321 /*
322 * Just an optimization, we don't care if read() failed
323 * so ignore explicitly its return value to make the
324 * compiler happy
325 */
326 RTE_SET_USED(r);
327 }
328
329 void
330 fdset_pipe_uninit(struct fdset *fdset)
331 {
332 fdset_del(fdset, fdset->u.readfd);
333 close(fdset->u.readfd);
334 close(fdset->u.writefd);
335 }
336
337 int
338 fdset_pipe_init(struct fdset *fdset)
339 {
340 int ret;
341
342 if (pipe(fdset->u.pipefd) < 0) {
343 RTE_LOG(ERR, VHOST_FDMAN,
344 "failed to create pipe for vhost fdset\n");
345 return -1;
346 }
347
348 ret = fdset_add(fdset, fdset->u.readfd,
349 fdset_pipe_read_cb, NULL, NULL);
350
351 if (ret < 0) {
352 RTE_LOG(ERR, VHOST_FDMAN,
353 "failed to add pipe readfd %d into vhost server fdset\n",
354 fdset->u.readfd);
355
356 fdset_pipe_uninit(fdset);
357 return -1;
358 }
359
360 return 0;
361 }
362
363 void
364 fdset_pipe_notify(struct fdset *fdset)
365 {
366 int r = write(fdset->u.writefd, "1", 1);
367 /*
368 * Just an optimization, we don't care if write() failed
369 * so ignore explicitly its return value to make the
370 * compiler happy
371 */
372 RTE_SET_USED(r);
373
374 }