1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
15 #include <sys/epoll.h>
16 #include <sys/signalfd.h>
17 #include <sys/ioctl.h>
18 #include <sys/eventfd.h>
22 #include <rte_common.h>
23 #include <rte_interrupts.h>
24 #include <rte_memory.h>
25 #include <rte_launch.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_debug.h>
33 #include <rte_errno.h>
34 #include <rte_spinlock.h>
35 #include <rte_pause.h>
38 #include "eal_private.h"
40 #include "eal_thread.h"
42 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
43 #define NB_OTHER_INTR 1
45 static RTE_DEFINE_PER_LCORE(int, _epfd
) = -1; /**< epoll fd per thread */
61 * union buffer for reading on different devices
63 union rte_intr_read_buffer
{
64 int uio_intr_count
; /* for uio device */
66 uint64_t vfio_intr_count
; /* for vfio device */
68 uint64_t timerfd_num
; /* for timerfd */
69 char charbuf
[16]; /* for others */
72 TAILQ_HEAD(rte_intr_cb_list
, rte_intr_callback
);
73 TAILQ_HEAD(rte_intr_source_list
, rte_intr_source
);
75 struct rte_intr_callback
{
76 TAILQ_ENTRY(rte_intr_callback
) next
;
77 rte_intr_callback_fn cb_fn
; /**< callback address */
78 void *cb_arg
; /**< parameter for callback */
79 uint8_t pending_delete
; /**< delete after callback is called */
80 rte_intr_unregister_callback_fn ucb_fn
; /**< fn to call before cb is deleted */
83 struct rte_intr_source
{
84 TAILQ_ENTRY(rte_intr_source
) next
;
85 struct rte_intr_handle intr_handle
; /**< interrupt handle */
86 struct rte_intr_cb_list callbacks
; /**< user callbacks */
90 /* global spinlock for interrupt data operation */
91 static rte_spinlock_t intr_lock
= RTE_SPINLOCK_INITIALIZER
;
93 /* union buffer for pipe read/write */
94 static union intr_pipefds intr_pipe
;
96 /* interrupt sources list */
97 static struct rte_intr_source_list intr_sources
;
99 /* interrupt handling thread */
100 static pthread_t intr_thread
;
102 /* VFIO interrupts */
105 #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
106 /* irq set buffer length for queue interrupts and LSC interrupt */
107 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
108 sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))
110 /* enable legacy (INTx) interrupts */
112 vfio_enable_intx(const struct rte_intr_handle
*intr_handle
) {
113 struct vfio_irq_set
*irq_set
;
114 char irq_set_buf
[IRQ_SET_BUF_LEN
];
118 len
= sizeof(irq_set_buf
);
121 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
122 irq_set
->argsz
= len
;
124 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
;
125 irq_set
->index
= VFIO_PCI_INTX_IRQ_INDEX
;
127 fd_ptr
= (int *) &irq_set
->data
;
128 *fd_ptr
= intr_handle
->fd
;
130 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
133 RTE_LOG(ERR
, EAL
, "Error enabling INTx interrupts for fd %d\n",
138 /* unmask INTx after enabling */
139 memset(irq_set
, 0, len
);
140 len
= sizeof(struct vfio_irq_set
);
141 irq_set
->argsz
= len
;
143 irq_set
->flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_UNMASK
;
144 irq_set
->index
= VFIO_PCI_INTX_IRQ_INDEX
;
147 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
150 RTE_LOG(ERR
, EAL
, "Error unmasking INTx interrupts for fd %d\n",
157 /* disable legacy (INTx) interrupts */
159 vfio_disable_intx(const struct rte_intr_handle
*intr_handle
) {
160 struct vfio_irq_set
*irq_set
;
161 char irq_set_buf
[IRQ_SET_BUF_LEN
];
164 len
= sizeof(struct vfio_irq_set
);
166 /* mask interrupts before disabling */
167 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
168 irq_set
->argsz
= len
;
170 irq_set
->flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_MASK
;
171 irq_set
->index
= VFIO_PCI_INTX_IRQ_INDEX
;
174 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
177 RTE_LOG(ERR
, EAL
, "Error masking INTx interrupts for fd %d\n",
183 memset(irq_set
, 0, len
);
184 irq_set
->argsz
= len
;
186 irq_set
->flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_TRIGGER
;
187 irq_set
->index
= VFIO_PCI_INTX_IRQ_INDEX
;
190 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
194 "Error disabling INTx interrupts for fd %d\n", intr_handle
->fd
);
200 /* enable MSI interrupts */
202 vfio_enable_msi(const struct rte_intr_handle
*intr_handle
) {
204 char irq_set_buf
[IRQ_SET_BUF_LEN
];
205 struct vfio_irq_set
*irq_set
;
208 len
= sizeof(irq_set_buf
);
210 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
211 irq_set
->argsz
= len
;
213 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
;
214 irq_set
->index
= VFIO_PCI_MSI_IRQ_INDEX
;
216 fd_ptr
= (int *) &irq_set
->data
;
217 *fd_ptr
= intr_handle
->fd
;
219 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
222 RTE_LOG(ERR
, EAL
, "Error enabling MSI interrupts for fd %d\n",
229 /* disable MSI interrupts */
231 vfio_disable_msi(const struct rte_intr_handle
*intr_handle
) {
232 struct vfio_irq_set
*irq_set
;
233 char irq_set_buf
[IRQ_SET_BUF_LEN
];
236 len
= sizeof(struct vfio_irq_set
);
238 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
239 irq_set
->argsz
= len
;
241 irq_set
->flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_TRIGGER
;
242 irq_set
->index
= VFIO_PCI_MSI_IRQ_INDEX
;
245 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
249 "Error disabling MSI interrupts for fd %d\n", intr_handle
->fd
);
254 /* enable MSI-X interrupts */
256 vfio_enable_msix(const struct rte_intr_handle
*intr_handle
) {
258 char irq_set_buf
[MSIX_IRQ_SET_BUF_LEN
];
259 struct vfio_irq_set
*irq_set
;
262 len
= sizeof(irq_set_buf
);
264 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
265 irq_set
->argsz
= len
;
266 /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */
267 irq_set
->count
= intr_handle
->max_intr
?
268 (intr_handle
->max_intr
> RTE_MAX_RXTX_INTR_VEC_ID
+ 1 ?
269 RTE_MAX_RXTX_INTR_VEC_ID
+ 1 : intr_handle
->max_intr
) : 1;
270 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
;
271 irq_set
->index
= VFIO_PCI_MSIX_IRQ_INDEX
;
273 fd_ptr
= (int *) &irq_set
->data
;
274 /* INTR vector offset 0 reserve for non-efds mapping */
275 fd_ptr
[RTE_INTR_VEC_ZERO_OFFSET
] = intr_handle
->fd
;
276 memcpy(&fd_ptr
[RTE_INTR_VEC_RXTX_OFFSET
], intr_handle
->efds
,
277 sizeof(*intr_handle
->efds
) * intr_handle
->nb_efd
);
279 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
282 RTE_LOG(ERR
, EAL
, "Error enabling MSI-X interrupts for fd %d\n",
290 /* disable MSI-X interrupts */
292 vfio_disable_msix(const struct rte_intr_handle
*intr_handle
) {
293 struct vfio_irq_set
*irq_set
;
294 char irq_set_buf
[MSIX_IRQ_SET_BUF_LEN
];
297 len
= sizeof(struct vfio_irq_set
);
299 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
300 irq_set
->argsz
= len
;
302 irq_set
->flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_TRIGGER
;
303 irq_set
->index
= VFIO_PCI_MSIX_IRQ_INDEX
;
306 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
310 "Error disabling MSI-X interrupts for fd %d\n", intr_handle
->fd
);
315 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
316 /* enable req notifier */
318 vfio_enable_req(const struct rte_intr_handle
*intr_handle
)
321 char irq_set_buf
[IRQ_SET_BUF_LEN
];
322 struct vfio_irq_set
*irq_set
;
325 len
= sizeof(irq_set_buf
);
327 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
328 irq_set
->argsz
= len
;
330 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
|
331 VFIO_IRQ_SET_ACTION_TRIGGER
;
332 irq_set
->index
= VFIO_PCI_REQ_IRQ_INDEX
;
334 fd_ptr
= (int *) &irq_set
->data
;
335 *fd_ptr
= intr_handle
->fd
;
337 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
340 RTE_LOG(ERR
, EAL
, "Error enabling req interrupts for fd %d\n",
348 /* disable req notifier */
350 vfio_disable_req(const struct rte_intr_handle
*intr_handle
)
352 struct vfio_irq_set
*irq_set
;
353 char irq_set_buf
[IRQ_SET_BUF_LEN
];
356 len
= sizeof(struct vfio_irq_set
);
358 irq_set
= (struct vfio_irq_set
*) irq_set_buf
;
359 irq_set
->argsz
= len
;
361 irq_set
->flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_TRIGGER
;
362 irq_set
->index
= VFIO_PCI_REQ_IRQ_INDEX
;
365 ret
= ioctl(intr_handle
->vfio_dev_fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
368 RTE_LOG(ERR
, EAL
, "Error disabling req interrupts for fd %d\n",
377 uio_intx_intr_disable(const struct rte_intr_handle
*intr_handle
)
379 unsigned char command_high
;
381 /* use UIO config file descriptor for uio_pci_generic */
382 if (pread(intr_handle
->uio_cfg_fd
, &command_high
, 1, 5) != 1) {
384 "Error reading interrupts status for fd %d\n",
385 intr_handle
->uio_cfg_fd
);
388 /* disable interrupts */
390 if (pwrite(intr_handle
->uio_cfg_fd
, &command_high
, 1, 5) != 1) {
392 "Error disabling interrupts for fd %d\n",
393 intr_handle
->uio_cfg_fd
);
401 uio_intx_intr_enable(const struct rte_intr_handle
*intr_handle
)
403 unsigned char command_high
;
405 /* use UIO config file descriptor for uio_pci_generic */
406 if (pread(intr_handle
->uio_cfg_fd
, &command_high
, 1, 5) != 1) {
408 "Error reading interrupts status for fd %d\n",
409 intr_handle
->uio_cfg_fd
);
412 /* enable interrupts */
413 command_high
&= ~0x4;
414 if (pwrite(intr_handle
->uio_cfg_fd
, &command_high
, 1, 5) != 1) {
416 "Error enabling interrupts for fd %d\n",
417 intr_handle
->uio_cfg_fd
);
425 uio_intr_disable(const struct rte_intr_handle
*intr_handle
)
429 if (write(intr_handle
->fd
, &value
, sizeof(value
)) < 0) {
431 "Error disabling interrupts for fd %d (%s)\n",
432 intr_handle
->fd
, strerror(errno
));
439 uio_intr_enable(const struct rte_intr_handle
*intr_handle
)
443 if (write(intr_handle
->fd
, &value
, sizeof(value
)) < 0) {
445 "Error enabling interrupts for fd %d (%s)\n",
446 intr_handle
->fd
, strerror(errno
));
453 rte_intr_callback_register(const struct rte_intr_handle
*intr_handle
,
454 rte_intr_callback_fn cb
, void *cb_arg
)
456 int ret
, wake_thread
;
457 struct rte_intr_source
*src
;
458 struct rte_intr_callback
*callback
;
462 /* first do parameter checking */
463 if (intr_handle
== NULL
|| intr_handle
->fd
< 0 || cb
== NULL
) {
465 "Registering with invalid input parameter\n");
469 /* allocate a new interrupt callback entity */
470 callback
= calloc(1, sizeof(*callback
));
471 if (callback
== NULL
) {
472 RTE_LOG(ERR
, EAL
, "Can not allocate memory\n");
475 callback
->cb_fn
= cb
;
476 callback
->cb_arg
= cb_arg
;
477 callback
->pending_delete
= 0;
478 callback
->ucb_fn
= NULL
;
480 rte_spinlock_lock(&intr_lock
);
482 /* check if there is at least one callback registered for the fd */
483 TAILQ_FOREACH(src
, &intr_sources
, next
) {
484 if (src
->intr_handle
.fd
== intr_handle
->fd
) {
485 /* we had no interrupts for this */
486 if (TAILQ_EMPTY(&src
->callbacks
))
489 TAILQ_INSERT_TAIL(&(src
->callbacks
), callback
, next
);
495 /* no existing callbacks for this - add new source */
497 src
= calloc(1, sizeof(*src
));
499 RTE_LOG(ERR
, EAL
, "Can not allocate memory\n");
503 src
->intr_handle
= *intr_handle
;
504 TAILQ_INIT(&src
->callbacks
);
505 TAILQ_INSERT_TAIL(&(src
->callbacks
), callback
, next
);
506 TAILQ_INSERT_TAIL(&intr_sources
, src
, next
);
512 rte_spinlock_unlock(&intr_lock
);
515 * check if need to notify the pipe fd waited by epoll_wait to
516 * rebuild the wait list.
519 if (write(intr_pipe
.writefd
, "1", 1) < 0)
525 int __rte_experimental
526 rte_intr_callback_unregister_pending(const struct rte_intr_handle
*intr_handle
,
527 rte_intr_callback_fn cb_fn
, void *cb_arg
,
528 rte_intr_unregister_callback_fn ucb_fn
)
531 struct rte_intr_source
*src
;
532 struct rte_intr_callback
*cb
, *next
;
534 /* do parameter checking first */
535 if (intr_handle
== NULL
|| intr_handle
->fd
< 0) {
537 "Unregistering with invalid input parameter\n");
541 rte_spinlock_lock(&intr_lock
);
543 /* check if the insterrupt source for the fd is existent */
544 TAILQ_FOREACH(src
, &intr_sources
, next
)
545 if (src
->intr_handle
.fd
== intr_handle
->fd
)
548 /* No interrupt source registered for the fd */
552 /* only usable if the source is active */
553 } else if (src
->active
== 0) {
559 /* walk through the callbacks and mark all that match. */
560 for (cb
= TAILQ_FIRST(&src
->callbacks
); cb
!= NULL
; cb
= next
) {
561 next
= TAILQ_NEXT(cb
, next
);
562 if (cb
->cb_fn
== cb_fn
&& (cb_arg
== (void *)-1 ||
563 cb
->cb_arg
== cb_arg
)) {
564 cb
->pending_delete
= 1;
571 rte_spinlock_unlock(&intr_lock
);
577 rte_intr_callback_unregister(const struct rte_intr_handle
*intr_handle
,
578 rte_intr_callback_fn cb_fn
, void *cb_arg
)
581 struct rte_intr_source
*src
;
582 struct rte_intr_callback
*cb
, *next
;
584 /* do parameter checking first */
585 if (intr_handle
== NULL
|| intr_handle
->fd
< 0) {
587 "Unregistering with invalid input parameter\n");
591 rte_spinlock_lock(&intr_lock
);
593 /* check if the insterrupt source for the fd is existent */
594 TAILQ_FOREACH(src
, &intr_sources
, next
)
595 if (src
->intr_handle
.fd
== intr_handle
->fd
)
598 /* No interrupt source registered for the fd */
602 /* interrupt source has some active callbacks right now. */
603 } else if (src
->active
!= 0) {
610 /*walk through the callbacks and remove all that match. */
611 for (cb
= TAILQ_FIRST(&src
->callbacks
); cb
!= NULL
; cb
= next
) {
613 next
= TAILQ_NEXT(cb
, next
);
615 if (cb
->cb_fn
== cb_fn
&& (cb_arg
== (void *)-1 ||
616 cb
->cb_arg
== cb_arg
)) {
617 TAILQ_REMOVE(&src
->callbacks
, cb
, next
);
623 /* all callbacks for that source are removed. */
624 if (TAILQ_EMPTY(&src
->callbacks
)) {
625 TAILQ_REMOVE(&intr_sources
, src
, next
);
630 rte_spinlock_unlock(&intr_lock
);
632 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
633 if (ret
>= 0 && write(intr_pipe
.writefd
, "1", 1) < 0) {
641 rte_intr_enable(const struct rte_intr_handle
*intr_handle
)
643 if (intr_handle
&& intr_handle
->type
== RTE_INTR_HANDLE_VDEV
)
646 if (!intr_handle
|| intr_handle
->fd
< 0 || intr_handle
->uio_cfg_fd
< 0)
649 switch (intr_handle
->type
){
650 /* write to the uio fd to enable the interrupt */
651 case RTE_INTR_HANDLE_UIO
:
652 if (uio_intr_enable(intr_handle
))
655 case RTE_INTR_HANDLE_UIO_INTX
:
656 if (uio_intx_intr_enable(intr_handle
))
659 /* not used at this moment */
660 case RTE_INTR_HANDLE_ALARM
:
663 case RTE_INTR_HANDLE_VFIO_MSIX
:
664 if (vfio_enable_msix(intr_handle
))
667 case RTE_INTR_HANDLE_VFIO_MSI
:
668 if (vfio_enable_msi(intr_handle
))
671 case RTE_INTR_HANDLE_VFIO_LEGACY
:
672 if (vfio_enable_intx(intr_handle
))
675 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
676 case RTE_INTR_HANDLE_VFIO_REQ
:
677 if (vfio_enable_req(intr_handle
))
682 /* not used at this moment */
683 case RTE_INTR_HANDLE_DEV_EVENT
:
685 /* unknown handle type */
688 "Unknown handle type of fd %d\n",
697 rte_intr_disable(const struct rte_intr_handle
*intr_handle
)
699 if (intr_handle
&& intr_handle
->type
== RTE_INTR_HANDLE_VDEV
)
702 if (!intr_handle
|| intr_handle
->fd
< 0 || intr_handle
->uio_cfg_fd
< 0)
705 switch (intr_handle
->type
){
706 /* write to the uio fd to disable the interrupt */
707 case RTE_INTR_HANDLE_UIO
:
708 if (uio_intr_disable(intr_handle
))
711 case RTE_INTR_HANDLE_UIO_INTX
:
712 if (uio_intx_intr_disable(intr_handle
))
715 /* not used at this moment */
716 case RTE_INTR_HANDLE_ALARM
:
719 case RTE_INTR_HANDLE_VFIO_MSIX
:
720 if (vfio_disable_msix(intr_handle
))
723 case RTE_INTR_HANDLE_VFIO_MSI
:
724 if (vfio_disable_msi(intr_handle
))
727 case RTE_INTR_HANDLE_VFIO_LEGACY
:
728 if (vfio_disable_intx(intr_handle
))
731 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
732 case RTE_INTR_HANDLE_VFIO_REQ
:
733 if (vfio_disable_req(intr_handle
))
738 /* not used at this moment */
739 case RTE_INTR_HANDLE_DEV_EVENT
:
741 /* unknown handle type */
744 "Unknown handle type of fd %d\n",
753 eal_intr_process_interrupts(struct epoll_event
*events
, int nfds
)
756 int n
, bytes_read
, rv
;
757 struct rte_intr_source
*src
;
758 struct rte_intr_callback
*cb
, *next
;
759 union rte_intr_read_buffer buf
;
760 struct rte_intr_callback active_cb
;
762 for (n
= 0; n
< nfds
; n
++) {
765 * if the pipe fd is ready to read, return out to
766 * rebuild the wait list.
768 if (events
[n
].data
.fd
== intr_pipe
.readfd
){
769 int r
= read(intr_pipe
.readfd
, buf
.charbuf
,
770 sizeof(buf
.charbuf
));
774 rte_spinlock_lock(&intr_lock
);
775 TAILQ_FOREACH(src
, &intr_sources
, next
)
776 if (src
->intr_handle
.fd
==
780 rte_spinlock_unlock(&intr_lock
);
784 /* mark this interrupt source as active and release the lock. */
786 rte_spinlock_unlock(&intr_lock
);
788 /* set the length to be read dor different handle type */
789 switch (src
->intr_handle
.type
) {
790 case RTE_INTR_HANDLE_UIO
:
791 case RTE_INTR_HANDLE_UIO_INTX
:
792 bytes_read
= sizeof(buf
.uio_intr_count
);
794 case RTE_INTR_HANDLE_ALARM
:
795 bytes_read
= sizeof(buf
.timerfd_num
);
798 case RTE_INTR_HANDLE_VFIO_MSIX
:
799 case RTE_INTR_HANDLE_VFIO_MSI
:
800 case RTE_INTR_HANDLE_VFIO_LEGACY
:
801 bytes_read
= sizeof(buf
.vfio_intr_count
);
803 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
804 case RTE_INTR_HANDLE_VFIO_REQ
:
810 case RTE_INTR_HANDLE_VDEV
:
811 case RTE_INTR_HANDLE_EXT
:
815 case RTE_INTR_HANDLE_DEV_EVENT
:
824 if (bytes_read
> 0) {
826 * read out to clear the ready-to-be-read flag
829 bytes_read
= read(events
[n
].data
.fd
, &buf
, bytes_read
);
830 if (bytes_read
< 0) {
831 if (errno
== EINTR
|| errno
== EWOULDBLOCK
)
834 RTE_LOG(ERR
, EAL
, "Error reading from file "
835 "descriptor %d: %s\n",
839 * The device is unplugged or buggy, remove
840 * it as an interrupt source and return to
841 * force the wait list to be rebuilt.
843 rte_spinlock_lock(&intr_lock
);
844 TAILQ_REMOVE(&intr_sources
, src
, next
);
845 rte_spinlock_unlock(&intr_lock
);
847 for (cb
= TAILQ_FIRST(&src
->callbacks
); cb
;
849 next
= TAILQ_NEXT(cb
, next
);
850 TAILQ_REMOVE(&src
->callbacks
, cb
, next
);
855 } else if (bytes_read
== 0)
856 RTE_LOG(ERR
, EAL
, "Read nothing from file "
857 "descriptor %d\n", events
[n
].data
.fd
);
862 /* grab a lock, again to call callbacks and update status. */
863 rte_spinlock_lock(&intr_lock
);
867 /* Finally, call all callbacks. */
868 TAILQ_FOREACH(cb
, &src
->callbacks
, next
) {
870 /* make a copy and unlock. */
872 rte_spinlock_unlock(&intr_lock
);
874 /* call the actual callback */
875 active_cb
.cb_fn(active_cb
.cb_arg
);
877 /*get the lock back. */
878 rte_spinlock_lock(&intr_lock
);
881 /* we done with that interrupt source, release it. */
886 /* check if any callback are supposed to be removed */
887 for (cb
= TAILQ_FIRST(&src
->callbacks
); cb
!= NULL
; cb
= next
) {
888 next
= TAILQ_NEXT(cb
, next
);
889 if (cb
->pending_delete
) {
890 TAILQ_REMOVE(&src
->callbacks
, cb
, next
);
892 cb
->ucb_fn(&src
->intr_handle
, cb
->cb_arg
);
898 /* all callbacks for that source are removed. */
899 if (TAILQ_EMPTY(&src
->callbacks
)) {
900 TAILQ_REMOVE(&intr_sources
, src
, next
);
904 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
905 if (rv
>= 0 && write(intr_pipe
.writefd
, "1", 1) < 0) {
906 rte_spinlock_unlock(&intr_lock
);
910 rte_spinlock_unlock(&intr_lock
);
917 * It handles all the interrupts.
920 * epoll file descriptor.
922 * The number of file descriptors added in epoll.
928 eal_intr_handle_interrupts(int pfd
, unsigned totalfds
)
930 struct epoll_event events
[totalfds
];
934 nfds
= epoll_wait(pfd
, events
, totalfds
,
935 EAL_INTR_EPOLL_WAIT_FOREVER
);
936 /* epoll_wait fail */
941 "epoll_wait returns with fail\n");
944 /* epoll_wait timeout, will never happens here */
947 /* epoll_wait has at least one fd ready to read */
948 if (eal_intr_process_interrupts(events
, nfds
) < 0)
954 * It builds/rebuilds up the epoll file descriptor with all the
955 * file descriptors being waited on. Then handles the interrupts.
963 static __attribute__((noreturn
)) void *
964 eal_intr_thread_main(__rte_unused
void *arg
)
966 struct epoll_event ev
;
968 /* host thread, never break out */
970 /* build up the epoll fd with all descriptors we are to
971 * wait on then pass it to the handle_interrupts function
973 static struct epoll_event pipe_event
= {
974 .events
= EPOLLIN
| EPOLLPRI
,
976 struct rte_intr_source
*src
;
979 /* create epoll fd */
980 int pfd
= epoll_create(1);
982 rte_panic("Cannot create epoll instance\n");
984 pipe_event
.data
.fd
= intr_pipe
.readfd
;
986 * add pipe fd into wait list, this pipe is used to
987 * rebuild the wait list.
989 if (epoll_ctl(pfd
, EPOLL_CTL_ADD
, intr_pipe
.readfd
,
991 rte_panic("Error adding fd to %d epoll_ctl, %s\n",
992 intr_pipe
.readfd
, strerror(errno
));
996 rte_spinlock_lock(&intr_lock
);
998 TAILQ_FOREACH(src
, &intr_sources
, next
) {
999 if (src
->callbacks
.tqh_first
== NULL
)
1000 continue; /* skip those with no callbacks */
1001 ev
.events
= EPOLLIN
| EPOLLPRI
| EPOLLRDHUP
| EPOLLHUP
;
1002 ev
.data
.fd
= src
->intr_handle
.fd
;
1005 * add all the uio device file descriptor
1008 if (epoll_ctl(pfd
, EPOLL_CTL_ADD
,
1009 src
->intr_handle
.fd
, &ev
) < 0){
1010 rte_panic("Error adding fd %d epoll_ctl, %s\n",
1011 src
->intr_handle
.fd
, strerror(errno
));
1016 rte_spinlock_unlock(&intr_lock
);
1017 /* serve the interrupt */
1018 eal_intr_handle_interrupts(pfd
, numfds
);
1021 * when we return, we need to rebuild the
1022 * list of fds to monitor.
1029 rte_eal_intr_init(void)
1033 /* init the global interrupt source head */
1034 TAILQ_INIT(&intr_sources
);
1037 * create a pipe which will be waited by epoll and notified to
1038 * rebuild the wait list of epoll.
1040 if (pipe(intr_pipe
.pipefd
) < 0) {
1045 /* create the host thread to wait/handle the interrupt */
1046 ret
= rte_ctrl_thread_create(&intr_thread
, "eal-intr-thread", NULL
,
1047 eal_intr_thread_main
, NULL
);
1051 "Failed to create thread for interrupt handling\n");
1058 eal_intr_proc_rxtx_intr(int fd
, const struct rte_intr_handle
*intr_handle
)
1060 union rte_intr_read_buffer buf
;
1064 switch (intr_handle
->type
) {
1065 case RTE_INTR_HANDLE_UIO
:
1066 case RTE_INTR_HANDLE_UIO_INTX
:
1067 bytes_read
= sizeof(buf
.uio_intr_count
);
1070 case RTE_INTR_HANDLE_VFIO_MSIX
:
1071 case RTE_INTR_HANDLE_VFIO_MSI
:
1072 case RTE_INTR_HANDLE_VFIO_LEGACY
:
1073 bytes_read
= sizeof(buf
.vfio_intr_count
);
1076 case RTE_INTR_HANDLE_VDEV
:
1077 bytes_read
= intr_handle
->efd_counter_size
;
1078 /* For vdev, number of bytes to read is set by driver */
1080 case RTE_INTR_HANDLE_EXT
:
1084 RTE_LOG(INFO
, EAL
, "unexpected intr type\n");
1089 * read out to clear the ready-to-be-read flag
1092 if (bytes_read
== 0)
1095 nbytes
= read(fd
, &buf
, bytes_read
);
1097 if (errno
== EINTR
|| errno
== EWOULDBLOCK
||
1101 "Error reading from fd %d: %s\n",
1102 fd
, strerror(errno
));
1103 } else if (nbytes
== 0)
1104 RTE_LOG(ERR
, EAL
, "Read nothing from fd %d\n", fd
);
1110 eal_epoll_process_event(struct epoll_event
*evs
, unsigned int n
,
1111 struct rte_epoll_event
*events
)
1113 unsigned int i
, count
= 0;
1114 struct rte_epoll_event
*rev
;
1116 for (i
= 0; i
< n
; i
++) {
1117 rev
= evs
[i
].data
.ptr
;
1118 if (!rev
|| !rte_atomic32_cmpset(&rev
->status
, RTE_EPOLL_VALID
,
1122 events
[count
].status
= RTE_EPOLL_VALID
;
1123 events
[count
].fd
= rev
->fd
;
1124 events
[count
].epfd
= rev
->epfd
;
1125 events
[count
].epdata
.event
= rev
->epdata
.event
;
1126 events
[count
].epdata
.data
= rev
->epdata
.data
;
1127 if (rev
->epdata
.cb_fun
)
1128 rev
->epdata
.cb_fun(rev
->fd
,
1129 rev
->epdata
.cb_arg
);
1131 rte_compiler_barrier();
1132 rev
->status
= RTE_EPOLL_VALID
;
1139 eal_init_tls_epfd(void)
1141 int pfd
= epoll_create(255);
1145 "Cannot create epoll instance\n");
1152 rte_intr_tls_epfd(void)
1154 if (RTE_PER_LCORE(_epfd
) == -1)
1155 RTE_PER_LCORE(_epfd
) = eal_init_tls_epfd();
1157 return RTE_PER_LCORE(_epfd
);
1161 rte_epoll_wait(int epfd
, struct rte_epoll_event
*events
,
1162 int maxevents
, int timeout
)
1164 struct epoll_event evs
[maxevents
];
1168 RTE_LOG(ERR
, EAL
, "rte_epoll_event can't be NULL\n");
1172 /* using per thread epoll fd */
1173 if (epfd
== RTE_EPOLL_PER_THREAD
)
1174 epfd
= rte_intr_tls_epfd();
1177 rc
= epoll_wait(epfd
, evs
, maxevents
, timeout
);
1178 if (likely(rc
> 0)) {
1179 /* epoll_wait has at least one fd ready to read */
1180 rc
= eal_epoll_process_event(evs
, rc
, events
);
1182 } else if (rc
< 0) {
1185 /* epoll_wait fail */
1186 RTE_LOG(ERR
, EAL
, "epoll_wait returns with fail %s\n",
1191 /* rc == 0, epoll_wait timed out */
1200 eal_epoll_data_safe_free(struct rte_epoll_event
*ev
)
1202 while (!rte_atomic32_cmpset(&ev
->status
, RTE_EPOLL_VALID
,
1204 while (ev
->status
!= RTE_EPOLL_VALID
)
1206 memset(&ev
->epdata
, 0, sizeof(ev
->epdata
));
1212 rte_epoll_ctl(int epfd
, int op
, int fd
,
1213 struct rte_epoll_event
*event
)
1215 struct epoll_event ev
;
1218 RTE_LOG(ERR
, EAL
, "rte_epoll_event can't be NULL\n");
1222 /* using per thread epoll fd */
1223 if (epfd
== RTE_EPOLL_PER_THREAD
)
1224 epfd
= rte_intr_tls_epfd();
1226 if (op
== EPOLL_CTL_ADD
) {
1227 event
->status
= RTE_EPOLL_VALID
;
1228 event
->fd
= fd
; /* ignore fd in event */
1230 ev
.data
.ptr
= (void *)event
;
1233 ev
.events
= event
->epdata
.event
;
1234 if (epoll_ctl(epfd
, op
, fd
, &ev
) < 0) {
1235 RTE_LOG(ERR
, EAL
, "Error op %d fd %d epoll_ctl, %s\n",
1236 op
, fd
, strerror(errno
));
1237 if (op
== EPOLL_CTL_ADD
)
1238 /* rollback status when CTL_ADD fail */
1239 event
->status
= RTE_EPOLL_INVALID
;
1243 if (op
== EPOLL_CTL_DEL
&& event
->status
!= RTE_EPOLL_INVALID
)
1244 eal_epoll_data_safe_free(event
);
1250 rte_intr_rx_ctl(struct rte_intr_handle
*intr_handle
, int epfd
,
1251 int op
, unsigned int vec
, void *data
)
1253 struct rte_epoll_event
*rev
;
1254 struct rte_epoll_data
*epdata
;
1256 unsigned int efd_idx
;
1259 efd_idx
= (vec
>= RTE_INTR_VEC_RXTX_OFFSET
) ?
1260 (vec
- RTE_INTR_VEC_RXTX_OFFSET
) : vec
;
1262 if (!intr_handle
|| intr_handle
->nb_efd
== 0 ||
1263 efd_idx
>= intr_handle
->nb_efd
) {
1264 RTE_LOG(ERR
, EAL
, "Wrong intr vector number.\n");
1269 case RTE_INTR_EVENT_ADD
:
1270 epfd_op
= EPOLL_CTL_ADD
;
1271 rev
= &intr_handle
->elist
[efd_idx
];
1272 if (rev
->status
!= RTE_EPOLL_INVALID
) {
1273 RTE_LOG(INFO
, EAL
, "Event already been added.\n");
1277 /* attach to intr vector fd */
1278 epdata
= &rev
->epdata
;
1279 epdata
->event
= EPOLLIN
| EPOLLPRI
| EPOLLET
;
1280 epdata
->data
= data
;
1281 epdata
->cb_fun
= (rte_intr_event_cb_t
)eal_intr_proc_rxtx_intr
;
1282 epdata
->cb_arg
= (void *)intr_handle
;
1283 rc
= rte_epoll_ctl(epfd
, epfd_op
,
1284 intr_handle
->efds
[efd_idx
], rev
);
1287 "efd %d associated with vec %d added on epfd %d"
1288 "\n", rev
->fd
, vec
, epfd
);
1292 case RTE_INTR_EVENT_DEL
:
1293 epfd_op
= EPOLL_CTL_DEL
;
1294 rev
= &intr_handle
->elist
[efd_idx
];
1295 if (rev
->status
== RTE_EPOLL_INVALID
) {
1296 RTE_LOG(INFO
, EAL
, "Event does not exist.\n");
1300 rc
= rte_epoll_ctl(rev
->epfd
, epfd_op
, rev
->fd
, rev
);
1305 RTE_LOG(ERR
, EAL
, "event op type mismatch\n");
1313 rte_intr_free_epoll_fd(struct rte_intr_handle
*intr_handle
)
1316 struct rte_epoll_event
*rev
;
1318 for (i
= 0; i
< intr_handle
->nb_efd
; i
++) {
1319 rev
= &intr_handle
->elist
[i
];
1320 if (rev
->status
== RTE_EPOLL_INVALID
)
1322 if (rte_epoll_ctl(rev
->epfd
, EPOLL_CTL_DEL
, rev
->fd
, rev
)) {
1323 /* force free if the entry valid */
1324 eal_epoll_data_safe_free(rev
);
1325 rev
->status
= RTE_EPOLL_INVALID
;
1331 rte_intr_efd_enable(struct rte_intr_handle
*intr_handle
, uint32_t nb_efd
)
1335 uint32_t n
= RTE_MIN(nb_efd
, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID
);
1337 assert(nb_efd
!= 0);
1339 if (intr_handle
->type
== RTE_INTR_HANDLE_VFIO_MSIX
) {
1340 for (i
= 0; i
< n
; i
++) {
1341 fd
= eventfd(0, EFD_NONBLOCK
| EFD_CLOEXEC
);
1344 "can't setup eventfd, error %i (%s)\n",
1345 errno
, strerror(errno
));
1348 intr_handle
->efds
[i
] = fd
;
1350 intr_handle
->nb_efd
= n
;
1351 intr_handle
->max_intr
= NB_OTHER_INTR
+ n
;
1352 } else if (intr_handle
->type
== RTE_INTR_HANDLE_VDEV
) {
1353 /* only check, initialization would be done in vdev driver.*/
1354 if (intr_handle
->efd_counter_size
>
1355 sizeof(union rte_intr_read_buffer
)) {
1356 RTE_LOG(ERR
, EAL
, "the efd_counter_size is oversized");
1360 intr_handle
->efds
[0] = intr_handle
->fd
;
1361 intr_handle
->nb_efd
= RTE_MIN(nb_efd
, 1U);
1362 intr_handle
->max_intr
= NB_OTHER_INTR
;
1369 rte_intr_efd_disable(struct rte_intr_handle
*intr_handle
)
1373 rte_intr_free_epoll_fd(intr_handle
);
1374 if (intr_handle
->max_intr
> intr_handle
->nb_efd
) {
1375 for (i
= 0; i
< intr_handle
->nb_efd
; i
++)
1376 close(intr_handle
->efds
[i
]);
1378 intr_handle
->nb_efd
= 0;
1379 intr_handle
->max_intr
= 0;
1383 rte_intr_dp_is_en(struct rte_intr_handle
*intr_handle
)
1385 return !(!intr_handle
->nb_efd
);
1389 rte_intr_allow_others(struct rte_intr_handle
*intr_handle
)
1391 if (!rte_intr_dp_is_en(intr_handle
))
1394 return !!(intr_handle
->max_intr
- intr_handle
->nb_efd
);
1398 rte_intr_cap_multiple(struct rte_intr_handle
*intr_handle
)
1400 if (intr_handle
->type
== RTE_INTR_HANDLE_VFIO_MSIX
)
1403 if (intr_handle
->type
== RTE_INTR_HANDLE_VDEV
)