]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/spdk/lib/vmd/vmd.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / lib / vmd / vmd.c
index f9630237f9f9ff6a6baba2cf70383e644e8eed68..14d9558c20c04342a88ba712167eae2a811ab1cc 100644 (file)
@@ -34,6 +34,7 @@
 #include "vmd.h"
 
 #include "spdk/stdinc.h"
+#include "spdk/likely.h"
 
 static unsigned char *device_type[] = {
        "PCI Express Endpoint",
@@ -72,6 +73,7 @@ static void
 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
 {
        uint32_t pad;
+
        /*
         *  Device is not in hot plug path, align the base address remaining from membar 1.
         */
@@ -82,6 +84,126 @@ vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
        }
 }
 
+static bool
+vmd_device_is_enumerated(const struct vmd_pci_device *vmd_device)
+{
+       return vmd_device->header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
+              vmd_device->header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
+}
+
+static bool
+vmd_device_is_root_port(const struct vmd_pci_device *vmd_device)
+{
+       return vmd_device->header->common.vendor_id == 0x8086 &&
+              (vmd_device->header->common.device_id == 0x2030 ||
+               vmd_device->header->common.device_id == 0x2031 ||
+               vmd_device->header->common.device_id == 0x2032 ||
+               vmd_device->header->common.device_id == 0x2033);
+}
+
+static void
+vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
+{
+       struct pci_mem_mgr *region, *prev;
+
+       do {
+               prev = NULL;
+               TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
+                       if (prev != NULL && (prev->addr + prev->size == region->addr)) {
+                               break;
+                       }
+
+                       prev = region;
+               }
+
+               if (region != NULL) {
+                       prev->size += region->size;
+                       TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
+                       TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
+               }
+       } while (region != NULL);
+}
+
+static void
+vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
+{
+       struct pci_mem_mgr *current, *prev = NULL;
+
+       assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
+
+       TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
+               if (current->addr > region->addr) {
+                       break;
+               }
+
+               prev = current;
+       }
+
+       if (prev != NULL) {
+               assert(prev->addr + prev->size <= region->addr);
+               assert(current == NULL || (region->addr + region->size <= current->addr));
+               TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
+       } else {
+               TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
+       }
+
+       vmd_hotplug_coalesce_regions(hp);
+}
+
+static void
+vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
+{
+       struct pci_mem_mgr *region;
+
+       TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
+               if (region->addr == addr) {
+                       break;
+               }
+       }
+
+       assert(region != NULL);
+       TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
+
+       vmd_hotplug_free_region(hp, region);
+}
+
+static uint64_t
+vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
+{
+       struct pci_mem_mgr *region = NULL, *free_region;
+
+       TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
+               if (region->size >= size) {
+                       break;
+               }
+       }
+
+       if (region == NULL) {
+               SPDK_DEBUGLOG(SPDK_LOG_VMD, "Unable to find free hotplug memory region of size:"
+                             "%"PRIx32"\n", size);
+               return 0;
+       }
+
+       TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
+       if (size < region->size) {
+               free_region = TAILQ_FIRST(&hp->unused_mem_queue);
+               if (free_region == NULL) {
+                       SPDK_DEBUGLOG(SPDK_LOG_VMD, "Unable to find unused descriptor to store the "
+                                     "free region of size: %"PRIu32"\n", region->size - size);
+               } else {
+                       TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
+                       free_region->size = region->size - size;
+                       free_region->addr = region->addr + size;
+                       region->size = size;
+                       vmd_hotplug_free_region(hp, free_region);
+               }
+       }
+
+       TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
+
+       return region->addr;
+}
+
 /*
  *  Allocates an address from vmd membar for the input memory size
  *  vmdAdapter - vmd adapter object
@@ -94,7 +216,7 @@ vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
 static uint64_t
 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
 {
-       uint64_t base_address = 0;
+       uint64_t base_address = 0, padding = 0;
        struct vmd_pci_bus *hp_bus;
 
        if (size && ((size & (~size + 1)) != size)) {
@@ -108,24 +230,22 @@ vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint
         *  get a buffer from the  unused chunk. First fit algorithm, is used.
         */
        if (dev) {
-               hp_bus = vmd_is_dev_in_hotplug_path(dev);
-               if (hp_bus && hp_bus->self) {
-                       return vmd_hp_allocate_base_addr(hp_bus->self->hp, size);
+               hp_bus = dev->parent;
+               if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
+                       return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
                }
        }
 
        /* Ensure physical membar allocated is size aligned */
        if (vmd->physical_addr & (size - 1)) {
-               uint32_t pad = size - (vmd->physical_addr & (size - 1));
-               vmd->physical_addr += pad;
-               vmd->current_addr_size -= pad;
+               padding = size - (vmd->physical_addr & (size - 1));
        }
 
        /* Allocate from membar if enough memory is left */
-       if (vmd->current_addr_size >= size) {
-               base_address = vmd->physical_addr;
-               vmd->physical_addr += size;
-               vmd->current_addr_size -= size;
+       if (vmd->current_addr_size >= size + padding) {
+               base_address = vmd->physical_addr + padding;
+               vmd->physical_addr += size + padding;
+               vmd->current_addr_size -= size + padding;
        }
 
        SPDK_DEBUGLOG(SPDK_LOG_VMD, "allocated(size) %lx (%x)\n", base_address, size);
@@ -143,13 +263,27 @@ vmd_is_end_device(struct vmd_pci_device *dev)
 static void
 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
 {
-       struct vmd_pci_bus *bus = dev->parent;
+       struct vmd_pci_bus *bus;
        struct vmd_pci_device *bridge;
 
        if (base == 0 ||  limit == 0) {
                return;
        }
 
+       if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
+               bus = dev->bus_object;
+       } else {
+               bus = dev->parent;
+       }
+
+       bridge = bus->self;
+       SPDK_DEBUGLOG(SPDK_LOG_VMD, "base:limit = %x:%x\n", bridge->header->one.mem_base,
+                     bridge->header->one.mem_limit);
+
+       if (dev->bus->vmd->scan_completed) {
+               return;
+       }
+
        while (bus && bus->self != NULL) {
                bridge = bus->self;
 
@@ -168,12 +302,28 @@ vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16
        }
 }
 
+static uint64_t
+vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
+{
+       struct vmd_pci_bus *bus = dev->parent;
+
+       if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
+               return dev->header->zero.BAR[index] & ~0xf;
+       } else {
+               if (bus->self->hotplug_capable) {
+                       return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
+               } else {
+                       return (uint64_t)bus->self->header->one.mem_base << 16;
+               }
+       }
+}
+
 static bool
 vmd_assign_base_addrs(struct vmd_pci_device *dev)
 {
        uint16_t mem_base = 0, mem_limit = 0;
        unsigned char mem_attr = 0;
-       int last = dev->header_type ? 2 : 6;
+       int last;
        struct vmd_adapter *vmd = NULL;
        bool ret_val = false;
        uint32_t bar_value;
@@ -189,6 +339,7 @@ vmd_assign_base_addrs(struct vmd_pci_device *dev)
 
        vmd_align_base_addrs(vmd, ONE_MB);
 
+       last = dev->header_type ? 2 : 6;
        for (int i = 0; i < last; i++) {
                bar_value = dev->header->zero.BAR[i];
                dev->header->zero.BAR[i] = ~(0U);
@@ -202,7 +353,13 @@ vmd_assign_base_addrs(struct vmd_pci_device *dev)
                }
                mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
                dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
-               dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
+
+               if (vmd->scan_completed) {
+                       dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
+               } else {
+                       dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
+               }
+
                dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
 
                if (!dev->bar[i].start) {
@@ -328,6 +485,126 @@ vmd_read_config_space(struct vmd_pci_device *dev)
                        DEVICE_SERIAL_NUMBER_CAP_ID);
 }
 
+static void
+vmd_update_scan_info(struct vmd_pci_device *dev)
+{
+       struct vmd_adapter *vmd_adapter = dev->bus->vmd;
+
+       if (vmd_adapter->root_port_updated) {
+               return;
+       }
+
+       if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
+               return;
+       }
+
+       if (vmd_device_is_root_port(dev)) {
+               vmd_adapter->root_port_updated = 1;
+               SPDK_DEBUGLOG(SPDK_LOG_VMD, "root_port_updated = %d\n",
+                             vmd_adapter->root_port_updated);
+               SPDK_DEBUGLOG(SPDK_LOG_VMD, "upper:limit = %x : %x\n",
+                             dev->header->one.prefetch_base_upper,
+                             dev->header->one.prefetch_limit_upper);
+               if (vmd_device_is_enumerated(dev)) {
+                       vmd_adapter->scan_completed = 1;
+                       SPDK_DEBUGLOG(SPDK_LOG_VMD, "scan_completed = %d\n",
+                                     vmd_adapter->scan_completed);
+               }
+       }
+}
+
+static void
+vmd_reset_base_limit_registers(struct vmd_pci_device *dev)
+{
+       uint32_t reg __attribute__((unused));
+
+       assert(dev->header_type != PCI_HEADER_TYPE_NORMAL);
+       /*
+        * Writes to the pci config space are posted writes.
+        * To ensure transaction reaches its destination
+        * before another write is posted, an immediate read
+        * of the written value should be performed.
+        */
+       dev->header->one.mem_base = 0xfff0;
+       reg = dev->header->one.mem_base;
+       dev->header->one.mem_limit = 0x0;
+       reg = dev->header->one.mem_limit;
+       dev->header->one.prefetch_base = 0x0;
+       reg = dev->header->one.prefetch_base;
+       dev->header->one.prefetch_limit = 0x0;
+       reg = dev->header->one.prefetch_limit;
+       dev->header->one.prefetch_base_upper = 0x0;
+       reg = dev->header->one.prefetch_base_upper;
+       dev->header->one.prefetch_limit_upper = 0x0;
+       reg = dev->header->one.prefetch_limit_upper;
+       dev->header->one.io_base_upper = 0x0;
+       reg = dev->header->one.io_base_upper;
+       dev->header->one.io_limit_upper = 0x0;
+       reg = dev->header->one.io_limit_upper;
+       dev->header->one.primary = 0;
+       reg = dev->header->one.primary;
+       dev->header->one.secondary = 0;
+       reg = dev->header->one.secondary;
+       dev->header->one.subordinate = 0;
+       reg = dev->header->one.subordinate;
+}
+
+static void
+vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
+{
+       struct vmd_adapter *vmd = bus->vmd;
+       struct vmd_hot_plug *hp = &dev->hp;
+       size_t mem_id;
+
+       dev->hotplug_capable = true;
+       hp->bar.size = 1 << 20;
+
+       if (!vmd->scan_completed) {
+               hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
+               bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
+               bus->self->header->one.mem_limit =
+                       bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
+       } else {
+               hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
+       }
+
+       hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
+
+       TAILQ_INIT(&hp->free_mem_queue);
+       TAILQ_INIT(&hp->unused_mem_queue);
+       TAILQ_INIT(&hp->alloc_mem_queue);
+
+       hp->mem[0].size = hp->bar.size;
+       hp->mem[0].addr = hp->bar.start;
+
+       TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
+
+       for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
+               TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
+       }
+
+       SPDK_DEBUGLOG(SPDK_LOG_VMD, "%s: mem_base:mem_limit = %x : %x\n", __func__,
+                     bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
+}
+
+static bool
+vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
+{
+       volatile struct pci_header *header;
+
+       header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
+                                               CONFIG_OFFSET_ADDR(bus->bus_number, devfn, 0, 0));
+       if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
+               return false;
+       }
+
+       if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
+               return false;
+       }
+
+       return true;
+}
+
 static struct vmd_pci_device *
 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
 {
@@ -336,16 +613,20 @@ vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
        uint8_t header_type;
        uint32_t rev_class;
 
-       header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
-                                               CONFIG_OFFSET_ADDR(bus->bus_number, devfn, 0, 0));
-       if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
-               return NULL;
+       /* Make sure we're not creating two devices on the same dev/fn */
+       TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
+               if (dev->devfn == devfn) {
+                       return NULL;
+               }
        }
 
-       if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
+       if (!vmd_bus_device_present(bus, devfn)) {
                return NULL;
        }
 
+       header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
+                                               CONFIG_OFFSET_ADDR(bus->bus_number, devfn, 0, 0));
+
        SPDK_DEBUGLOG(SPDK_LOG_VMD, "PCI device found: %04x:%04x ***\n",
                      header->common.vendor_id, header->common.device_id);
 
@@ -366,15 +647,10 @@ vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
        dev->header_type = header_type & 0x7;
 
        if (header_type == PCI_HEADER_TYPE_BRIDGE) {
-               dev->header->one.mem_base = 0xfff0;
-               dev->header->one.mem_limit = 0x0;
-               dev->header->one.prefetch_base_upper = 0x0;
-               dev->header->one.prefetch_limit_upper = 0x0;
-               dev->header->one.io_base_upper = 0x0;
-               dev->header->one.io_limit_upper = 0x0;
-               dev->header->one.primary = 0;
-               dev->header->one.secondary = 0;
-               dev->header->one.subordinate = 0;
+               vmd_update_scan_info(dev);
+               if (!dev->bus->vmd->scan_completed) {
+                       vmd_reset_base_limit_registers(dev);
+               }
        }
 
        vmd_read_config_space(dev);
@@ -382,64 +658,6 @@ vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
        return dev;
 }
 
-static void
-vmd_add_bus_to_list(struct vmd_adapter *vmd, struct vmd_pci_bus *bus)
-{
-       struct vmd_pci_bus *blist;
-
-       blist = vmd->bus_list;
-       bus->next = NULL;
-       if (blist == NULL) {
-               vmd->bus_list = bus;
-               return;
-       }
-
-       while (blist->next != NULL) {
-               blist = blist->next;
-       }
-
-       blist->next = bus;
-}
-
-static void
-vmd_pcibus_remove_device(struct vmd_pci_bus *bus, struct vmd_pci_device *device)
-{
-       struct vmd_pci_device *list = bus->dev_list;
-
-       if (list == device) {
-               bus->dev_list = NULL;
-       }
-
-       while (list->next != NULL) {
-               if (list->next == device) {
-                       assert(list->next->next);
-                       list->next = list->next->next;
-               }
-               list = list->next;
-       }
-}
-
-
-static bool
-vmd_bus_add_device(struct vmd_pci_bus *bus, struct vmd_pci_device *device)
-{
-       struct vmd_pci_device *next_dev = bus->dev_list;
-
-       device->next = NULL;
-       if (next_dev == NULL) {
-               bus->dev_list = device;
-               return 1;
-       }
-
-       while (next_dev->next != NULL) {
-               next_dev = next_dev->next;
-       }
-
-       next_dev->next = device;
-
-       return 1;
-}
-
 static struct vmd_pci_bus *
 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
 {
@@ -456,12 +674,14 @@ vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, ui
        new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
        new_bus->self = bridge;
        new_bus->vmd = parent->vmd;
+       TAILQ_INIT(&new_bus->dev_list);
+
        bridge->subordinate = new_bus;
 
        bridge->pci.addr.bus = new_bus->bus_number;
        bridge->pci.addr.dev = bridge->devfn;
        bridge->pci.addr.func = 0;
-       bridge->pci.addr.domain = parent->vmd->pci.addr.domain;
+       bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
 
        return new_bus;
 }
@@ -480,8 +700,8 @@ vmd_get_next_bus_number(struct vmd_pci_device *dev, struct vmd_adapter *vmd)
 
        if (dev) {
                hp_bus = vmd_is_dev_in_hotplug_path(dev);
-               if (hp_bus && hp_bus->self && hp_bus->self->hp) {
-                       return vmd_hp_get_next_bus_number(hp_bus->self->hp);
+               if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
+                       return vmd_hp_get_next_bus_number(&hp_bus->self->hp);
                }
        }
 
@@ -646,7 +866,24 @@ vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
 static void
 vmd_dev_detach(struct spdk_pci_device *dev)
 {
-       return;
+       struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
+       struct vmd_pci_device *bus_device = vmd_device->bus->self;
+       struct vmd_pci_bus *bus = vmd_device->bus;
+       size_t i, num_bars = vmd_device->header_type ? 2 : 6;
+
+       spdk_pci_unhook_device(dev);
+       TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
+
+       /* Release the hotplug region if the device is under hotplug-capable bus */
+       if (bus_device && bus_device->hotplug_capable) {
+               for (i = 0; i < num_bars; ++i) {
+                       if (vmd_device->bar[i].start != 0) {
+                               vmd_hotplug_free_addr(&bus_device->hp, vmd_device->bar[i].start);
+                       }
+               }
+       }
+
+       free(dev);
 }
 
 static void
@@ -660,15 +897,20 @@ vmd_dev_init(struct vmd_pci_device *dev)
        dev->pci.addr.func = 0;
        dev->pci.id.vendor_id = dev->header->common.vendor_id;
        dev->pci.id.device_id = dev->header->common.device_id;
+       dev->pci.type = "vmd";
        dev->pci.map_bar = vmd_dev_map_bar;
        dev->pci.unmap_bar = vmd_dev_unmap_bar;
        dev->pci.cfg_read = vmd_dev_cfg_read;
        dev->pci.cfg_write = vmd_dev_cfg_write;
-       dev->pci.detach = vmd_dev_detach;
+       dev->hotplug_capable = false;
+       if (dev->pcie_cap != NULL) {
+               dev->cached_slot_control = dev->pcie_cap->slot_control;
+       }
 
        if (vmd_is_supported_device(dev)) {
                spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
                SPDK_DEBUGLOG(SPDK_LOG_VMD, "Initalizing NVMe device at %s\n", bdf);
+               dev->pci.parent = dev->bus->vmd->pci;
                spdk_pci_hook_device(spdk_pci_nvme_get_driver(), &dev->pci);
        }
 }
@@ -731,25 +973,33 @@ vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridg
                        new_bus->self = new_dev;
                        new_dev->bus_object = new_bus;
 
-                       if (slot_cap.bit_field.hotplug_capable) {
+                       if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
+                           new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
                                new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
                                new_bus->subordinate_bus += new_bus->hotplug_buses;
+
+                               /* Attach hot plug instance if HP is supported */
+                               /* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
+                               SPDK_DEBUGLOG(SPDK_LOG_VMD, "hotplug_capable/slot_implemented = "
+                                             "%x:%x\n", slot_cap.bit_field.hotplug_capable,
+                                             new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
                        }
+
                        new_dev->parent_bridge = parent_bridge;
                        new_dev->header->one.primary = new_bus->primary_bus;
                        new_dev->header->one.secondary = new_bus->secondary_bus;
                        new_dev->header->one.subordinate = new_bus->subordinate_bus;
 
                        vmd_bus_update_bridge_info(new_dev);
-                       vmd_add_bus_to_list(bus->vmd, new_bus);
-
-                       /* Attach hot plug instance if HP is supported */
-                       if (slot_cap.bit_field.hotplug_capable) {
-                               new_dev->hp = vmd_new_hotplug(new_bus, new_bus->hotplug_buses);
-                       }
+                       TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
 
                        vmd_dev_init(new_dev);
 
+                       if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
+                           new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
+                               vmd_init_hotplug(new_dev, new_bus);
+                       }
+
                        dev_cnt += vmd_scan_single_bus(new_bus, new_dev);
                        if (new_dev->pcie_cap != NULL) {
                                if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
@@ -758,7 +1008,7 @@ vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridg
                        }
                } else {
                        /* Attach the device to the current bus and assign base addresses */
-                       vmd_bus_add_device(bus, new_dev);
+                       TAILQ_INSERT_TAIL(&bus->dev_list, new_dev, tailq);
                        g_end_device_count++;
                        if (vmd_assign_base_addrs(new_dev)) {
                                vmd_setup_msix(new_dev, &bus->vmd->msix_table[0]);
@@ -770,7 +1020,8 @@ vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridg
                                }
                        } else {
                                SPDK_DEBUGLOG(SPDK_LOG_VMD, "Removing failed device:%p\n", new_dev);
-                               vmd_pcibus_remove_device(bus, new_dev);
+                               TAILQ_REMOVE(&bus->dev_list, new_dev, tailq);
+                               free(new_dev);
                                if (dev_cnt) {
                                        dev_cnt--;
                                }
@@ -827,51 +1078,69 @@ vmd_print_pci_info(struct vmd_pci_device *dev)
 }
 
 static void
-vmd_pci_print(struct vmd_pci_bus *bus_list)
+vmd_cache_scan_info(struct vmd_pci_device *dev)
 {
-       struct vmd_pci_bus *bus = bus_list;
-       struct vmd_pci_device *dev;
+       uint32_t reg __attribute__((unused));
 
-       SPDK_INFOLOG(SPDK_LOG_VMD, "\n ...PCIE devices attached to VMD %04x:%02x:%02x:%x...\n",
-                    bus_list->vmd->pci.addr.domain, bus_list->vmd->pci.addr.bus,
-                    bus_list->vmd->pci.addr.dev, bus_list->vmd->pci.addr.func);
-       SPDK_INFOLOG(SPDK_LOG_VMD, "----------------------------------------------\n");
+       if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
+               return;
+       }
 
-       while (bus != NULL) {
-               vmd_print_pci_info(bus->self);
-               dev = bus->dev_list;
-               while (dev != NULL) {
-                       vmd_print_pci_info(dev);
-                       dev = dev->next;
-               }
-               bus = bus->next;
+       SPDK_DEBUGLOG(SPDK_LOG_VMD, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
+                     dev->header->common.device_id);
+
+       if (vmd_device_is_root_port(dev)) {
+               dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
+               reg = dev->header->one.prefetch_base_upper;
+               dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
+               reg = dev->header->one.prefetch_limit_upper;
+
+               SPDK_DEBUGLOG(SPDK_LOG_VMD, "prefetch: %x:%x\n",
+                             dev->header->one.prefetch_base_upper,
+                             dev->header->one.prefetch_limit_upper);
        }
 }
 
 static uint8_t
 vmd_scan_pcibus(struct vmd_pci_bus *bus)
 {
+       struct vmd_pci_bus *bus_entry;
+       struct vmd_pci_device *dev;
        uint8_t dev_cnt;
 
        g_end_device_count = 0;
-       vmd_add_bus_to_list(bus->vmd, bus);
+       TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
        bus->vmd->next_bus_number = bus->bus_number + 1;
        dev_cnt = vmd_scan_single_bus(bus, NULL);
 
-       SPDK_DEBUGLOG(SPDK_LOG_VMD, "\tVMD scan found %u devices\n", dev_cnt);
-       SPDK_DEBUGLOG(SPDK_LOG_VMD, "\tVMD scan found %u END DEVICES\n", g_end_device_count);
+       SPDK_DEBUGLOG(SPDK_LOG_VMD, "VMD scan found %u devices\n", dev_cnt);
+       SPDK_DEBUGLOG(SPDK_LOG_VMD, "VMD scan found %u END DEVICES\n", g_end_device_count);
+
+       SPDK_INFOLOG(SPDK_LOG_VMD, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
+                    bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
+                    bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
 
-       vmd_pci_print(bus->vmd->bus_list);
+       TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
+               if (bus_entry->self != NULL) {
+                       vmd_print_pci_info(bus_entry->self);
+                       vmd_cache_scan_info(bus_entry->self);
+               }
+
+               TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
+                       vmd_print_pci_info(dev);
+               }
+       }
 
        return dev_cnt;
 }
 
-
 static int
 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
 {
-       int rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
-                                        &vmd->cfgbar, &vmd->cfgbar_size);
+       int rc;
+
+       rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
+                                    &vmd->cfgbar, &vmd->cfgbar_size);
        if (rc == 0) {
                rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
                                             &vmd->membar, &vmd->membar_size);
@@ -895,11 +1164,37 @@ vmd_enumerate_devices(struct vmd_adapter *vmd)
        vmd->vmd_bus.vmd = vmd;
        vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
        vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
-       vmd->vmd_bus.domain = vmd->pci.addr.domain;
+       vmd->vmd_bus.domain = vmd->pci->addr.domain;
 
        return vmd_scan_pcibus(&vmd->vmd_bus);
 }
 
+struct vmd_pci_device *
+vmd_find_device(const struct spdk_pci_addr *addr)
+{
+       struct vmd_pci_bus *bus;
+       struct vmd_pci_device *dev;
+       int i;
+
+       for (i = 0; i < MAX_VMD_TARGET; ++i) {
+               TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
+                       if (bus->self) {
+                               if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
+                                       return bus->self;
+                               }
+                       }
+
+                       TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
+                               if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
+                                       return dev;
+                               }
+                       }
+               }
+       }
+
+       return NULL;
+}
+
 static int
 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
 {
@@ -917,11 +1212,13 @@ vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
 
        /* map vmd bars */
        i = vmd_c->count;
-       vmd_c->vmd[i].pci = *pci_dev;
+       vmd_c->vmd[i].pci = pci_dev;
        vmd_c->vmd[i].vmd_index = i;
        vmd_c->vmd[i].domain =
                (pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
        vmd_c->vmd[i].max_pci_bus = PCI_MAX_BUS_NUMBER;
+       TAILQ_INIT(&vmd_c->vmd[i].bus_list);
+
        if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
                return -1;
        }
@@ -955,19 +1252,15 @@ spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *
        }
 
        for (int i = 0; i < MAX_VMD_TARGET; ++i) {
-               if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci.addr) == 0) {
-                       bus = g_vmd_container.vmd[i].bus_list;
-                       while (bus != NULL) {
-                               dev = bus->dev_list;
-                               while (dev != NULL) {
+               if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
+                       TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
+                               TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
                                        nvme_list[cnt++] = dev->pci;
                                        if (!dev->is_hooked) {
                                                vmd_dev_init(dev);
                                                dev->is_hooked = 1;
                                        }
-                                       dev = dev->next;
                                }
-                               bus = bus->next;
                        }
                }
        }
@@ -975,10 +1268,109 @@ spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *
        return cnt;
 }
 
+static void
+vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
+{
+       struct vmd_pci_device *device = bus->self;
+       uint16_t status __attribute__((unused));
+
+       status = device->pcie_cap->slot_status.as_uint16_t;
+       device->pcie_cap->slot_status.as_uint16_t = status;
+       status = device->pcie_cap->slot_status.as_uint16_t;
+
+       status = device->pcie_cap->link_status.as_uint16_t;
+       device->pcie_cap->link_status.as_uint16_t = status;
+       status = device->pcie_cap->link_status.as_uint16_t;
+}
+
+static void
+vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
+{
+       uint8_t num_devices, sleep_count;
+
+       for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
+               /* Scan until a new device is found */
+               num_devices = vmd_scan_single_bus(bus, bus->self);
+               if (num_devices > 0) {
+                       break;
+               }
+
+               spdk_delay_us(200000);
+       }
+
+       if (num_devices == 0) {
+               SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
+       }
+}
+
+static void
+vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
+{
+       struct vmd_pci_device *device, *tmpdev;
+
+       TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
+               if (!vmd_bus_device_present(bus, device->devfn)) {
+                       device->pci.internal.pending_removal = true;
+
+                       /* If the device isn't attached, remove it immediately */
+                       if (!device->pci.internal.attached) {
+                               vmd_dev_detach(&device->pci);
+                       }
+               }
+       }
+}
+
+int
+spdk_vmd_hotplug_monitor(void)
+{
+       struct vmd_pci_bus *bus;
+       struct vmd_pci_device *device;
+       int num_hotplugs = 0;
+       uint32_t i;
+
+       for (i = 0; i < g_vmd_container.count; ++i) {
+               TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
+                       device = bus->self;
+                       if (device == NULL || !device->hotplug_capable) {
+                               continue;
+                       }
+
+                       if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
+                               continue;
+                       }
+
+                       if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
+                               SPDK_DEBUGLOG(SPDK_LOG_VMD, "Device hotplug detected on bus "
+                                             "%"PRIu32"\n", bus->bus_number);
+                               vmd_bus_handle_hotplug(bus);
+                       } else {
+                               SPDK_DEBUGLOG(SPDK_LOG_VMD, "Device hotremove detected on bus "
+                                             "%"PRIu32"\n", bus->bus_number);
+                               vmd_bus_handle_hotremove(bus);
+                       }
+
+                       vmd_clear_hotplug_status(bus);
+                       num_hotplugs++;
+               }
+       }
+
+       return num_hotplugs;
+}
+
 int
 spdk_vmd_init(void)
 {
        return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
 }
 
+void
+spdk_vmd_fini(void)
+{
+       uint32_t i;
+
+       for (i = 0; i < g_vmd_container.count; ++i) {
+               spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
+       }
+}
+
 SPDK_LOG_REGISTER_COMPONENT("vmd", SPDK_LOG_VMD)