1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
6 #ifdef RTE_EXEC_ENV_LINUXAPP
14 #include "virtio_pci.h"
15 #include "virtio_logs.h"
16 #include "virtqueue.h"
19 * Following macros are derived from linux/pci_regs.h, however,
20 * we can't simply include that header here, as there is no such
21 * file for non-Linux platform.
23 #define PCI_CAPABILITY_LIST 0x34
24 #define PCI_CAP_ID_VNDR 0x09
25 #define PCI_CAP_ID_MSIX 0x11
28 * The remaining space is defined by each driver as the per-driver
29 * configuration space.
31 #define VIRTIO_PCI_CONFIG(hw) \
32 (((hw)->use_msix == VIRTIO_MSIX_ENABLED) ? 24 : 20)
35 check_vq_phys_addr_ok(struct virtqueue
*vq
)
37 /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
38 * and only accepts 32 bit page frame number.
39 * Check if the allocated physical memory exceeds 16TB.
41 if ((vq
->vq_ring_mem
+ vq
->vq_ring_size
- 1) >>
42 (VIRTIO_PCI_QUEUE_ADDR_SHIFT
+ 32)) {
43 PMD_INIT_LOG(ERR
, "vring address shouldn't be above 16TB!");
51 * Since we are in legacy mode:
52 * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
54 * "Note that this is possible because while the virtio header is PCI (i.e.
55 * little) endian, the device-specific region is encoded in the native endian of
56 * the guest (where such distinction is applicable)."
58 * For powerpc which supports both, qemu supposes that cpu is big endian and
59 * enforces this for the virtio-net stuff.
62 legacy_read_dev_config(struct virtio_hw
*hw
, size_t offset
,
63 void *dst
, int length
)
65 #ifdef RTE_ARCH_PPC_64
71 rte_pci_ioport_read(VTPCI_IO(hw
), dst
, size
,
72 VIRTIO_PCI_CONFIG(hw
) + offset
);
73 *(uint32_t *)dst
= rte_be_to_cpu_32(*(uint32_t *)dst
);
74 } else if (length
>= 2) {
76 rte_pci_ioport_read(VTPCI_IO(hw
), dst
, size
,
77 VIRTIO_PCI_CONFIG(hw
) + offset
);
78 *(uint16_t *)dst
= rte_be_to_cpu_16(*(uint16_t *)dst
);
81 rte_pci_ioport_read(VTPCI_IO(hw
), dst
, size
,
82 VIRTIO_PCI_CONFIG(hw
) + offset
);
85 dst
= (char *)dst
+ size
;
90 rte_pci_ioport_read(VTPCI_IO(hw
), dst
, length
,
91 VIRTIO_PCI_CONFIG(hw
) + offset
);
96 legacy_write_dev_config(struct virtio_hw
*hw
, size_t offset
,
97 const void *src
, int length
)
99 #ifdef RTE_ARCH_PPC_64
109 tmp
.u32
= rte_cpu_to_be_32(*(const uint32_t *)src
);
110 rte_pci_ioport_write(VTPCI_IO(hw
), &tmp
.u32
, size
,
111 VIRTIO_PCI_CONFIG(hw
) + offset
);
112 } else if (length
>= 2) {
114 tmp
.u16
= rte_cpu_to_be_16(*(const uint16_t *)src
);
115 rte_pci_ioport_write(VTPCI_IO(hw
), &tmp
.u16
, size
,
116 VIRTIO_PCI_CONFIG(hw
) + offset
);
119 rte_pci_ioport_write(VTPCI_IO(hw
), src
, size
,
120 VIRTIO_PCI_CONFIG(hw
) + offset
);
123 src
= (const char *)src
+ size
;
128 rte_pci_ioport_write(VTPCI_IO(hw
), src
, length
,
129 VIRTIO_PCI_CONFIG(hw
) + offset
);
134 legacy_get_features(struct virtio_hw
*hw
)
138 rte_pci_ioport_read(VTPCI_IO(hw
), &dst
, 4, VIRTIO_PCI_HOST_FEATURES
);
143 legacy_set_features(struct virtio_hw
*hw
, uint64_t features
)
145 if ((features
>> 32) != 0) {
147 "only 32 bit features are allowed for legacy virtio!");
150 rte_pci_ioport_write(VTPCI_IO(hw
), &features
, 4,
151 VIRTIO_PCI_GUEST_FEATURES
);
155 legacy_get_status(struct virtio_hw
*hw
)
159 rte_pci_ioport_read(VTPCI_IO(hw
), &dst
, 1, VIRTIO_PCI_STATUS
);
164 legacy_set_status(struct virtio_hw
*hw
, uint8_t status
)
166 rte_pci_ioport_write(VTPCI_IO(hw
), &status
, 1, VIRTIO_PCI_STATUS
);
170 legacy_reset(struct virtio_hw
*hw
)
172 legacy_set_status(hw
, VIRTIO_CONFIG_STATUS_RESET
);
176 legacy_get_isr(struct virtio_hw
*hw
)
180 rte_pci_ioport_read(VTPCI_IO(hw
), &dst
, 1, VIRTIO_PCI_ISR
);
184 /* Enable one vector (0) for Link State Intrerrupt */
186 legacy_set_config_irq(struct virtio_hw
*hw
, uint16_t vec
)
190 rte_pci_ioport_write(VTPCI_IO(hw
), &vec
, 2, VIRTIO_MSI_CONFIG_VECTOR
);
191 rte_pci_ioport_read(VTPCI_IO(hw
), &dst
, 2, VIRTIO_MSI_CONFIG_VECTOR
);
196 legacy_set_queue_irq(struct virtio_hw
*hw
, struct virtqueue
*vq
, uint16_t vec
)
200 rte_pci_ioport_write(VTPCI_IO(hw
), &vq
->vq_queue_index
, 2,
201 VIRTIO_PCI_QUEUE_SEL
);
202 rte_pci_ioport_write(VTPCI_IO(hw
), &vec
, 2, VIRTIO_MSI_QUEUE_VECTOR
);
203 rte_pci_ioport_read(VTPCI_IO(hw
), &dst
, 2, VIRTIO_MSI_QUEUE_VECTOR
);
208 legacy_get_queue_num(struct virtio_hw
*hw
, uint16_t queue_id
)
212 rte_pci_ioport_write(VTPCI_IO(hw
), &queue_id
, 2, VIRTIO_PCI_QUEUE_SEL
);
213 rte_pci_ioport_read(VTPCI_IO(hw
), &dst
, 2, VIRTIO_PCI_QUEUE_NUM
);
218 legacy_setup_queue(struct virtio_hw
*hw
, struct virtqueue
*vq
)
222 if (!check_vq_phys_addr_ok(vq
))
225 rte_pci_ioport_write(VTPCI_IO(hw
), &vq
->vq_queue_index
, 2,
226 VIRTIO_PCI_QUEUE_SEL
);
227 src
= vq
->vq_ring_mem
>> VIRTIO_PCI_QUEUE_ADDR_SHIFT
;
228 rte_pci_ioport_write(VTPCI_IO(hw
), &src
, 4, VIRTIO_PCI_QUEUE_PFN
);
234 legacy_del_queue(struct virtio_hw
*hw
, struct virtqueue
*vq
)
238 rte_pci_ioport_write(VTPCI_IO(hw
), &vq
->vq_queue_index
, 2,
239 VIRTIO_PCI_QUEUE_SEL
);
240 rte_pci_ioport_write(VTPCI_IO(hw
), &src
, 4, VIRTIO_PCI_QUEUE_PFN
);
244 legacy_notify_queue(struct virtio_hw
*hw
, struct virtqueue
*vq
)
246 rte_pci_ioport_write(VTPCI_IO(hw
), &vq
->vq_queue_index
, 2,
247 VIRTIO_PCI_QUEUE_NOTIFY
);
250 const struct virtio_pci_ops legacy_ops
= {
251 .read_dev_cfg
= legacy_read_dev_config
,
252 .write_dev_cfg
= legacy_write_dev_config
,
253 .reset
= legacy_reset
,
254 .get_status
= legacy_get_status
,
255 .set_status
= legacy_set_status
,
256 .get_features
= legacy_get_features
,
257 .set_features
= legacy_set_features
,
258 .get_isr
= legacy_get_isr
,
259 .set_config_irq
= legacy_set_config_irq
,
260 .set_queue_irq
= legacy_set_queue_irq
,
261 .get_queue_num
= legacy_get_queue_num
,
262 .setup_queue
= legacy_setup_queue
,
263 .del_queue
= legacy_del_queue
,
264 .notify_queue
= legacy_notify_queue
,
268 io_write64_twopart(uint64_t val
, uint32_t *lo
, uint32_t *hi
)
270 rte_write32(val
& ((1ULL << 32) - 1), lo
);
271 rte_write32(val
>> 32, hi
);
275 modern_read_dev_config(struct virtio_hw
*hw
, size_t offset
,
276 void *dst
, int length
)
280 uint8_t old_gen
, new_gen
;
283 old_gen
= rte_read8(&hw
->common_cfg
->config_generation
);
286 for (i
= 0; i
< length
; i
++)
287 *p
++ = rte_read8((uint8_t *)hw
->dev_cfg
+ offset
+ i
);
289 new_gen
= rte_read8(&hw
->common_cfg
->config_generation
);
290 } while (old_gen
!= new_gen
);
294 modern_write_dev_config(struct virtio_hw
*hw
, size_t offset
,
295 const void *src
, int length
)
298 const uint8_t *p
= src
;
300 for (i
= 0; i
< length
; i
++)
301 rte_write8((*p
++), (((uint8_t *)hw
->dev_cfg
) + offset
+ i
));
305 modern_get_features(struct virtio_hw
*hw
)
307 uint32_t features_lo
, features_hi
;
309 rte_write32(0, &hw
->common_cfg
->device_feature_select
);
310 features_lo
= rte_read32(&hw
->common_cfg
->device_feature
);
312 rte_write32(1, &hw
->common_cfg
->device_feature_select
);
313 features_hi
= rte_read32(&hw
->common_cfg
->device_feature
);
315 return ((uint64_t)features_hi
<< 32) | features_lo
;
319 modern_set_features(struct virtio_hw
*hw
, uint64_t features
)
321 rte_write32(0, &hw
->common_cfg
->guest_feature_select
);
322 rte_write32(features
& ((1ULL << 32) - 1),
323 &hw
->common_cfg
->guest_feature
);
325 rte_write32(1, &hw
->common_cfg
->guest_feature_select
);
326 rte_write32(features
>> 32,
327 &hw
->common_cfg
->guest_feature
);
331 modern_get_status(struct virtio_hw
*hw
)
333 return rte_read8(&hw
->common_cfg
->device_status
);
337 modern_set_status(struct virtio_hw
*hw
, uint8_t status
)
339 rte_write8(status
, &hw
->common_cfg
->device_status
);
343 modern_reset(struct virtio_hw
*hw
)
345 modern_set_status(hw
, VIRTIO_CONFIG_STATUS_RESET
);
346 modern_get_status(hw
);
350 modern_get_isr(struct virtio_hw
*hw
)
352 return rte_read8(hw
->isr
);
356 modern_set_config_irq(struct virtio_hw
*hw
, uint16_t vec
)
358 rte_write16(vec
, &hw
->common_cfg
->msix_config
);
359 return rte_read16(&hw
->common_cfg
->msix_config
);
363 modern_set_queue_irq(struct virtio_hw
*hw
, struct virtqueue
*vq
, uint16_t vec
)
365 rte_write16(vq
->vq_queue_index
, &hw
->common_cfg
->queue_select
);
366 rte_write16(vec
, &hw
->common_cfg
->queue_msix_vector
);
367 return rte_read16(&hw
->common_cfg
->queue_msix_vector
);
371 modern_get_queue_num(struct virtio_hw
*hw
, uint16_t queue_id
)
373 rte_write16(queue_id
, &hw
->common_cfg
->queue_select
);
374 return rte_read16(&hw
->common_cfg
->queue_size
);
378 modern_setup_queue(struct virtio_hw
*hw
, struct virtqueue
*vq
)
380 uint64_t desc_addr
, avail_addr
, used_addr
;
383 if (!check_vq_phys_addr_ok(vq
))
386 desc_addr
= vq
->vq_ring_mem
;
387 avail_addr
= desc_addr
+ vq
->vq_nentries
* sizeof(struct vring_desc
);
388 used_addr
= RTE_ALIGN_CEIL(avail_addr
+ offsetof(struct vring_avail
,
389 ring
[vq
->vq_nentries
]),
390 VIRTIO_PCI_VRING_ALIGN
);
392 rte_write16(vq
->vq_queue_index
, &hw
->common_cfg
->queue_select
);
394 io_write64_twopart(desc_addr
, &hw
->common_cfg
->queue_desc_lo
,
395 &hw
->common_cfg
->queue_desc_hi
);
396 io_write64_twopart(avail_addr
, &hw
->common_cfg
->queue_avail_lo
,
397 &hw
->common_cfg
->queue_avail_hi
);
398 io_write64_twopart(used_addr
, &hw
->common_cfg
->queue_used_lo
,
399 &hw
->common_cfg
->queue_used_hi
);
401 notify_off
= rte_read16(&hw
->common_cfg
->queue_notify_off
);
402 vq
->notify_addr
= (void *)((uint8_t *)hw
->notify_base
+
403 notify_off
* hw
->notify_off_multiplier
);
405 rte_write16(1, &hw
->common_cfg
->queue_enable
);
407 PMD_INIT_LOG(DEBUG
, "queue %u addresses:", vq
->vq_queue_index
);
408 PMD_INIT_LOG(DEBUG
, "\t desc_addr: %" PRIx64
, desc_addr
);
409 PMD_INIT_LOG(DEBUG
, "\t aval_addr: %" PRIx64
, avail_addr
);
410 PMD_INIT_LOG(DEBUG
, "\t used_addr: %" PRIx64
, used_addr
);
411 PMD_INIT_LOG(DEBUG
, "\t notify addr: %p (notify offset: %u)",
412 vq
->notify_addr
, notify_off
);
418 modern_del_queue(struct virtio_hw
*hw
, struct virtqueue
*vq
)
420 rte_write16(vq
->vq_queue_index
, &hw
->common_cfg
->queue_select
);
422 io_write64_twopart(0, &hw
->common_cfg
->queue_desc_lo
,
423 &hw
->common_cfg
->queue_desc_hi
);
424 io_write64_twopart(0, &hw
->common_cfg
->queue_avail_lo
,
425 &hw
->common_cfg
->queue_avail_hi
);
426 io_write64_twopart(0, &hw
->common_cfg
->queue_used_lo
,
427 &hw
->common_cfg
->queue_used_hi
);
429 rte_write16(0, &hw
->common_cfg
->queue_enable
);
433 modern_notify_queue(struct virtio_hw
*hw __rte_unused
, struct virtqueue
*vq
)
435 rte_write16(vq
->vq_queue_index
, vq
->notify_addr
);
438 const struct virtio_pci_ops modern_ops
= {
439 .read_dev_cfg
= modern_read_dev_config
,
440 .write_dev_cfg
= modern_write_dev_config
,
441 .reset
= modern_reset
,
442 .get_status
= modern_get_status
,
443 .set_status
= modern_set_status
,
444 .get_features
= modern_get_features
,
445 .set_features
= modern_set_features
,
446 .get_isr
= modern_get_isr
,
447 .set_config_irq
= modern_set_config_irq
,
448 .set_queue_irq
= modern_set_queue_irq
,
449 .get_queue_num
= modern_get_queue_num
,
450 .setup_queue
= modern_setup_queue
,
451 .del_queue
= modern_del_queue
,
452 .notify_queue
= modern_notify_queue
,
457 vtpci_read_dev_config(struct virtio_hw
*hw
, size_t offset
,
458 void *dst
, int length
)
460 VTPCI_OPS(hw
)->read_dev_cfg(hw
, offset
, dst
, length
);
464 vtpci_write_dev_config(struct virtio_hw
*hw
, size_t offset
,
465 const void *src
, int length
)
467 VTPCI_OPS(hw
)->write_dev_cfg(hw
, offset
, src
, length
);
471 vtpci_negotiate_features(struct virtio_hw
*hw
, uint64_t host_features
)
476 * Limit negotiated features to what the driver, virtqueue, and
479 features
= host_features
& hw
->guest_features
;
480 VTPCI_OPS(hw
)->set_features(hw
, features
);
486 vtpci_reset(struct virtio_hw
*hw
)
488 VTPCI_OPS(hw
)->set_status(hw
, VIRTIO_CONFIG_STATUS_RESET
);
489 /* flush status write */
490 VTPCI_OPS(hw
)->get_status(hw
);
494 vtpci_reinit_complete(struct virtio_hw
*hw
)
496 vtpci_set_status(hw
, VIRTIO_CONFIG_STATUS_DRIVER_OK
);
500 vtpci_set_status(struct virtio_hw
*hw
, uint8_t status
)
502 if (status
!= VIRTIO_CONFIG_STATUS_RESET
)
503 status
|= VTPCI_OPS(hw
)->get_status(hw
);
505 VTPCI_OPS(hw
)->set_status(hw
, status
);
509 vtpci_get_status(struct virtio_hw
*hw
)
511 return VTPCI_OPS(hw
)->get_status(hw
);
515 vtpci_isr(struct virtio_hw
*hw
)
517 return VTPCI_OPS(hw
)->get_isr(hw
);
521 get_cfg_addr(struct rte_pci_device
*dev
, struct virtio_pci_cap
*cap
)
523 uint8_t bar
= cap
->bar
;
524 uint32_t length
= cap
->length
;
525 uint32_t offset
= cap
->offset
;
528 if (bar
>= PCI_MAX_RESOURCE
) {
529 PMD_INIT_LOG(ERR
, "invalid bar: %u", bar
);
533 if (offset
+ length
< offset
) {
534 PMD_INIT_LOG(ERR
, "offset(%u) + length(%u) overflows",
539 if (offset
+ length
> dev
->mem_resource
[bar
].len
) {
541 "invalid cap: overflows bar space: %u > %" PRIu64
,
542 offset
+ length
, dev
->mem_resource
[bar
].len
);
546 base
= dev
->mem_resource
[bar
].addr
;
548 PMD_INIT_LOG(ERR
, "bar %u base addr is NULL", bar
);
552 return base
+ offset
;
555 #define PCI_MSIX_ENABLE 0x8000
558 virtio_read_caps(struct rte_pci_device
*dev
, struct virtio_hw
*hw
)
561 struct virtio_pci_cap cap
;
564 if (rte_pci_map_device(dev
)) {
565 PMD_INIT_LOG(DEBUG
, "failed to map pci device!");
569 ret
= rte_pci_read_config(dev
, &pos
, 1, PCI_CAPABILITY_LIST
);
571 PMD_INIT_LOG(DEBUG
, "failed to read pci capability list");
576 ret
= rte_pci_read_config(dev
, &cap
, sizeof(cap
), pos
);
579 "failed to read pci cap at pos: %x", pos
);
583 if (cap
.cap_vndr
== PCI_CAP_ID_MSIX
) {
584 /* Transitional devices would also have this capability,
585 * that's why we also check if msix is enabled.
586 * 1st byte is cap ID; 2nd byte is the position of next
587 * cap; next two bytes are the flags.
589 uint16_t flags
= ((uint16_t *)&cap
)[1];
591 if (flags
& PCI_MSIX_ENABLE
)
592 hw
->use_msix
= VIRTIO_MSIX_ENABLED
;
594 hw
->use_msix
= VIRTIO_MSIX_DISABLED
;
597 if (cap
.cap_vndr
!= PCI_CAP_ID_VNDR
) {
599 "[%2x] skipping non VNDR cap id: %02x",
605 "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
606 pos
, cap
.cfg_type
, cap
.bar
, cap
.offset
, cap
.length
);
608 switch (cap
.cfg_type
) {
609 case VIRTIO_PCI_CAP_COMMON_CFG
:
610 hw
->common_cfg
= get_cfg_addr(dev
, &cap
);
612 case VIRTIO_PCI_CAP_NOTIFY_CFG
:
613 rte_pci_read_config(dev
, &hw
->notify_off_multiplier
,
614 4, pos
+ sizeof(cap
));
615 hw
->notify_base
= get_cfg_addr(dev
, &cap
);
617 case VIRTIO_PCI_CAP_DEVICE_CFG
:
618 hw
->dev_cfg
= get_cfg_addr(dev
, &cap
);
620 case VIRTIO_PCI_CAP_ISR_CFG
:
621 hw
->isr
= get_cfg_addr(dev
, &cap
);
629 if (hw
->common_cfg
== NULL
|| hw
->notify_base
== NULL
||
630 hw
->dev_cfg
== NULL
|| hw
->isr
== NULL
) {
631 PMD_INIT_LOG(INFO
, "no modern virtio pci device found.");
635 PMD_INIT_LOG(INFO
, "found modern virtio pci device.");
637 PMD_INIT_LOG(DEBUG
, "common cfg mapped at: %p", hw
->common_cfg
);
638 PMD_INIT_LOG(DEBUG
, "device cfg mapped at: %p", hw
->dev_cfg
);
639 PMD_INIT_LOG(DEBUG
, "isr cfg mapped at: %p", hw
->isr
);
640 PMD_INIT_LOG(DEBUG
, "notify base: %p, notify off multiplier: %u",
641 hw
->notify_base
, hw
->notify_off_multiplier
);
648 * if there is error mapping with VFIO/UIO.
649 * if port map error when driver type is KDRV_NONE.
650 * if whitelisted but driver type is KDRV_UNKNOWN.
651 * Return 1 if kernel driver is managing the device.
652 * Return 0 on success.
655 vtpci_init(struct rte_pci_device
*dev
, struct virtio_hw
*hw
)
658 * Try if we can succeed reading virtio pci caps, which exists
659 * only on modern pci device. If failed, we fallback to legacy
662 if (virtio_read_caps(dev
, hw
) == 0) {
663 PMD_INIT_LOG(INFO
, "modern virtio pci detected.");
664 virtio_hw_internal
[hw
->port_id
].vtpci_ops
= &modern_ops
;
669 PMD_INIT_LOG(INFO
, "trying with legacy virtio pci.");
670 if (rte_pci_ioport_map(dev
, 0, VTPCI_IO(hw
)) < 0) {
671 if (dev
->kdrv
== RTE_KDRV_UNKNOWN
&&
672 (!dev
->device
.devargs
||
673 dev
->device
.devargs
->bus
!=
674 rte_bus_find_by_name("pci"))) {
676 "skip kernel managed virtio device.");
682 virtio_hw_internal
[hw
->port_id
].vtpci_ops
= &legacy_ops
;
688 enum virtio_msix_status
689 vtpci_msix_detect(struct rte_pci_device
*dev
)
692 struct virtio_pci_cap cap
;
695 ret
= rte_pci_read_config(dev
, &pos
, 1, PCI_CAPABILITY_LIST
);
697 PMD_INIT_LOG(DEBUG
, "failed to read pci capability list");
698 return VIRTIO_MSIX_NONE
;
702 ret
= rte_pci_read_config(dev
, &cap
, sizeof(cap
), pos
);
705 "failed to read pci cap at pos: %x", pos
);
709 if (cap
.cap_vndr
== PCI_CAP_ID_MSIX
) {
710 uint16_t flags
= ((uint16_t *)&cap
)[1];
712 if (flags
& PCI_MSIX_ENABLE
)
713 return VIRTIO_MSIX_ENABLED
;
715 return VIRTIO_MSIX_DISABLED
;
721 return VIRTIO_MSIX_NONE
;