]> git.proxmox.com Git - pve-kernel.git/commitdiff
fix 4770: backport "nvme: don't reject probe due to duplicate IDs"
authorThomas Lamprecht <t.lamprecht@proxmox.com>
Sat, 15 Jul 2023 16:45:18 +0000 (18:45 +0200)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Sat, 15 Jul 2023 16:45:20 +0000 (18:45 +0200)
we got quite some reports for this (e.g., Bugzilla or [0]), well in
non-enterprise setups as those cheap NVMe's just don't bother holding
up basic principles...

[0]: https://forum.proxmox.com/threads/128738/#post-567249

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
patches/kernel/0010-nvme-don-t-reject-probe-due-to-duplicate-IDs-for-sin.patch [new file with mode: 0644]

diff --git a/patches/kernel/0010-nvme-don-t-reject-probe-due-to-duplicate-IDs-for-sin.patch b/patches/kernel/0010-nvme-don-t-reject-probe-due-to-duplicate-IDs-for-sin.patch
new file mode 100644 (file)
index 0000000..dd6f907
--- /dev/null
@@ -0,0 +1,76 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 13 Jul 2023 15:30:42 +0200
+Subject: [PATCH] nvme: don't reject probe due to duplicate IDs for
+ single-ported PCIe devices
+
+While duplicate IDs are still very harmful, including the potential to easily
+see changing devices in /dev/disk/by-id, it turn out they are extremely
+common for cheap end user NVMe devices.
+
+Relax our check for them for so that it doesn't reject the probe on
+single-ported PCIe devices, but prints a big warning instead.  In doubt
+we'd still like to see quirk entries to disable the potential for
+changing supposed stable device identifier links, but this will at least
+allow users how have two (or more) of these devices to use them without
+having to manually add a new PCI ID entry with the quirk through sysfs or
+by patching the kernel.
+
+Fixes: 2079f41ec6ff ("nvme: check that EUI/GUID/UUID are globally unique")
+Cc: stable@vger.kernel.org # 6.0+
+Co-developed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ drivers/nvme/host/core.c | 36 +++++++++++++++++++++++++++++++++---
+ 1 file changed, 33 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index d567762545b0..f350df252d27 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -4162,10 +4162,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
+       ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
+       if (ret) {
+-              dev_err(ctrl->device,
+-                      "globally duplicate IDs for nsid %d\n", info->nsid);
++              /*
++               * We've found two different namespaces on two different
++               * subsystems that report the same ID.  This is pretty nasty
++               * for anything that actually requires unique device
++               * identification.  In the kernel we need this for multipathing,
++               * and in user space the /dev/disk/by-id/ links rely on it.
++               *
++               * If the device also claims to be multi-path capable back off
++               * here now and refuse the probe the second device as this is a
++               * recipe for data corruption.  If not this is probably a
++               * cheap consumer device if on the PCIe bus, so let the user
++               * proceed and use the shiny toy, but warn that with changing
++               * probing order (which due to our async probing could just be
++               * device taking longer to startup) the other device could show
++               * up at any time.
++               */
+               nvme_print_device_info(ctrl);
+-              return ret;
++              if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
++                  ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
++                   info->is_shared)) {
++                      dev_err(ctrl->device,
++                              "ignoring nsid %d because of duplicate IDs\n",
++                              info->nsid);
++                      return ret;
++              }
++
++              dev_err(ctrl->device,
++                      "clearing duplicate IDs for nsid %d\n", info->nsid);
++              dev_err(ctrl->device,
++                      "use of /dev/disk/by-id/ may cause data corruption\n");
++              memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
++              memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
++              memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
++              ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
+       }
+       mutex_lock(&ctrl->subsys->lock);