Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 8 Jul 2019 23:31:06 +0000 (16:31 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 8 Jul 2019 23:31:06 +0000 (16:31 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jul 2019 23:31:06 +0000 (16:31 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jul 2019 23:31:06 +0000 (16:31 -0700)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c

index 785050af85e560524d8c1994bc14bd1dffae3cf7..6ea7fdc82f3c752dd0f34d4dc3700f451765d178 100644 (file)
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
         [SMCA_PCIE]     = { "pcie",             "PCI Express Unit" },
  };
  
-static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
-{
-       [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
-};
-
  static const char *smca_get_name(enum smca_bank_types t)
  {
         if (t >= N_SMCA_BANK_TYPES)
@@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
  static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
  
+/* Map of banks that have more than MCA_MISC0 available. */
+static DEFINE_PER_CPU(u32, smca_misc_banks_map);
+
  static void amd_threshold_interrupt(void);
  static void amd_deferred_error_interrupt(void);
  
@@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
  }
  void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
  
+static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
+{
+       u32 low, high;
+
+       /*
+        * For SMCA enabled processors, BLKPTR field of the first MISC register
+        * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+        */
+       if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+               return;
+
+       if (!(low & MCI_CONFIG_MCAX))
+               return;
+
+       if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
+               return;
+
+       if (low & MASK_BLKPTR_LO)
+               per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
+
+}
+
  static void smca_configure(unsigned int bank, unsigned int cpu)
  {
         unsigned int i, hwid_mcatype;
@@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
                 wrmsr(smca_config, low, high);
         }
  
+       smca_set_misc_banks_map(bank, cpu);
+
         /* Return early if this bank was already initialized. */
         if (smca_banks[bank].hwid)
                 return;
@@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
         wrmsr(MSR_CU_DEF_ERR, low, high);
  }
  
-static u32 smca_get_block_address(unsigned int bank, unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block,
+                                 unsigned int cpu)
  {
-       u32 low, high;
-       u32 addr = 0;
-
-       if (smca_get_bank_type(bank) == SMCA_RESERVED)
-               return addr;
-
         if (!block)
                 return MSR_AMD64_SMCA_MCx_MISC(bank);
  
-       /* Check our cache first: */
-       if (smca_bank_addrs[bank][block] != -1)
-               return smca_bank_addrs[bank][block];
-
-       /*
-        * For SMCA enabled processors, BLKPTR field of the first MISC register
-        * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
-        */
-       if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
-               goto out;
-
-       if (!(low & MCI_CONFIG_MCAX))
-               goto out;
-
-       if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
-           (low & MASK_BLKPTR_LO))
-               addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+       if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
+               return 0;
  
-out:
-       smca_bank_addrs[bank][block] = addr;
-       return addr;
+       return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
  }
  
  static u32 get_block_address(u32 current_addr, u32 low, u32 high,
-                            unsigned int bank, unsigned int block)
+                            unsigned int bank, unsigned int block,
+                            unsigned int cpu)
  {
         u32 addr = 0, offset = 0;
  
-       if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+       if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
                 return addr;
  
         if (mce_flags.smca)
-               return smca_get_block_address(bank, block);
+               return smca_get_block_address(bank, block, cpu);
  
         /* Fall back to method we used for older processors: */
         switch (block) {
@@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
  /* cpu init entry point, called from mce.c with preempt off */
  void mce_amd_feature_init(struct cpuinfo_x86 *c)
  {
-       u32 low = 0, high = 0, address = 0;
         unsigned int bank, block, cpu = smp_processor_id();
+       u32 low = 0, high = 0, address = 0;
         int offset = -1;
  
-       for (bank = 0; bank < mca_cfg.banks; ++bank) {
+
+       for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
                 if (mce_flags.smca)
                         smca_configure(bank, cpu);
  
                 disable_err_thresholding(c, bank);
  
                 for (block = 0; block < NR_BLOCKS; ++block) {
-                       address = get_block_address(address, low, high, bank, block);
+                       address = get_block_address(address, low, high, bank, block, cpu);
                         if (!address)
                                 break;
  
@@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
  {
         unsigned int bank;
  
-       for (bank = 0; bank < mca_cfg.banks; ++bank)
+       for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
                 log_error_deferred(bank);
  }
  
@@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
         struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
         unsigned int bank, cpu = smp_processor_id();
  
-       for (bank = 0; bank < mca_cfg.banks; ++bank) {
+       for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
                 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
                         continue;
  
@@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
         u32 low, high;
         int err;
  
-       if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+       if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
                 return 0;
  
         if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
         if (err)
                 goto out_free;
  recurse:
-       address = get_block_address(address, low, high, bank, ++block);
+       address = get_block_address(address, low, high, bank, ++block, cpu);
         if (!address)
                 return 0;
  
@@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
  {
         unsigned int bank;
  
-       for (bank = 0; bank < mca_cfg.banks; ++bank) {
+       for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
                 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
                         continue;
                 threshold_remove_bank(cpu, bank);
@@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
         if (bp)
                 return 0;
  
-       bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
+       bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
                      GFP_KERNEL);
         if (!bp)
                 return -ENOMEM;
  
         per_cpu(threshold_banks, cpu) = bp;
  
-       for (bank = 0; bank < mca_cfg.banks; ++bank) {
+       for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
                 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
                         continue;
                 err = threshold_create_bank(cpu, bank);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c

index 282916f3b8d8c6c7ac4d5a6086a0ac4b1506ef6a..066562a1ea20bf7efa3a5e93d7b1253b412b9427 100644 (file)
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
  
  DEFINE_PER_CPU(unsigned, mce_exception_count);
  
-struct mce_bank *mce_banks __read_mostly;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
+
+struct mce_bank {
+       u64                     ctl;                    /* subevents to enable */
+       bool                    init;                   /* initialise bank? */
+};
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
+#define ATTR_LEN               16
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank_dev {
+       struct device_attribute attr;                   /* device attribute */
+       char                    attrname[ATTR_LEN];     /* attribute name */
+       u8                      bank;                   /* bank number */
+};
+static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
+
  struct mce_vendor_flags mce_flags __read_mostly;
  
  struct mca_config mca_cfg __read_mostly = {
@@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
   */
  bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
         bool error_seen = false;
         struct mce m;
         int i;
@@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
         if (flags & MCP_TIMESTAMP)
                 m.tsc = rdtsc();
  
-       for (i = 0; i < mca_cfg.banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 if (!mce_banks[i].ctl || !test_bit(i, *b))
                         continue;
  
@@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
         char *tmp;
         int i;
  
-       for (i = 0; i < mca_cfg.banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 m->status = mce_rdmsrl(msr_ops.status(i));
                 if (!(m->status & MCI_STATUS_VAL))
                         continue;
@@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear)
  {
         int i;
  
-       for (i = 0; i < mca_cfg.banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 if (test_bit(i, toclear))
                         mce_wrmsrl(msr_ops.status(i), 0);
         }
@@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
                             unsigned long *toclear, unsigned long *valid_banks,
                             int no_way_out, int *worst)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
         struct mca_config *cfg = &mca_cfg;
         int severity, i;
  
-       for (i = 0; i < cfg->banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 __clear_bit(i, toclear);
                 if (!test_bit(i, valid_banks))
                         continue;
@@ -1463,27 +1481,29 @@ int mce_notify_irq(void)
  }
  EXPORT_SYMBOL_GPL(mce_notify_irq);
  
-static int __mcheck_cpu_mce_banks_init(void)
+static void __mcheck_cpu_mce_banks_init(void)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+       u8 n_banks = this_cpu_read(mce_num_banks);
         int i;
  
-       mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
-       if (!mce_banks)
-               return -ENOMEM;
-
-       for (i = 0; i < MAX_NR_BANKS; i++) {
+       for (i = 0; i < n_banks; i++) {
                 struct mce_bank *b = &mce_banks[i];
  
+               /*
+                * Init them all, __mcheck_cpu_apply_quirks() is going to apply
+                * the required vendor quirks before
+                * __mcheck_cpu_init_clear_banks() does the final bank setup.
+                */
                 b->ctl = -1ULL;
                 b->init = 1;
         }
-       return 0;
  }
  
  /*
   * Initialize Machine Checks for a CPU.
   */
-static int __mcheck_cpu_cap_init(void)
+static void __mcheck_cpu_cap_init(void)
  {
         u64 cap;
         u8 b;
@@ -1491,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void)
         rdmsrl(MSR_IA32_MCG_CAP, cap);
  
         b = cap & MCG_BANKCNT_MASK;
-       if (WARN_ON_ONCE(b > MAX_NR_BANKS))
+
+       if (b > MAX_NR_BANKS) {
+               pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
+                       smp_processor_id(), MAX_NR_BANKS, b);
                 b = MAX_NR_BANKS;
+       }
  
-       mca_cfg.banks = max(mca_cfg.banks, b);
+       this_cpu_write(mce_num_banks, b);
  
-       if (!mce_banks) {
-               int err = __mcheck_cpu_mce_banks_init();
-               if (err)
-                       return err;
-       }
+       __mcheck_cpu_mce_banks_init();
  
         /* Use accurate RIP reporting if available. */
         if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
@@ -1508,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void)
  
         if (cap & MCG_SER_P)
                 mca_cfg.ser = 1;
-
-       return 0;
  }
  
  static void __mcheck_cpu_init_generic(void)
@@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void)
  
  static void __mcheck_cpu_init_clear_banks(void)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
         int i;
  
-       for (i = 0; i < mca_cfg.banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 struct mce_bank *b = &mce_banks[i];
  
                 if (!b->init)
@@ -1548,6 +1567,33 @@ static void __mcheck_cpu_init_clear_banks(void)
         }
  }
  
+/*
+ * Do a final check to see if there are any unused/RAZ banks.
+ *
+ * This must be done after the banks have been initialized and any quirks have
+ * been applied.
+ *
+ * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
+ * Otherwise, a user who disables a bank will not be able to re-enable it
+ * without a system reboot.
+ */
+static void __mcheck_cpu_check_banks(void)
+{
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+       u64 msrval;
+       int i;
+
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
+               struct mce_bank *b = &mce_banks[i];
+
+               if (!b->init)
+                       continue;
+
+               rdmsrl(msr_ops.ctl(i), msrval);
+               b->init = !!msrval;
+       }
+}
+
  /*
   * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
   * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
@@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
  /* Add per CPU specific workarounds here */
  static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
         struct mca_config *cfg = &mca_cfg;
  
         if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
  
         /* This should be disabled by the BIOS, but isn't always */
         if (c->x86_vendor == X86_VENDOR_AMD) {
-               if (c->x86 == 15 && cfg->banks > 4) {
+               if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
                         /*
                          * disable GART TBL walk error reporting, which
                          * trips off incorrectly with the IOMMU & 3ware
@@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
                  * Various K7s with broken bank 0 around. Always disable
                  * by default.
                  */
-               if (c->x86 == 6 && cfg->banks > 0)
+               if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
                         mce_banks[0].ctl = 0;
  
                 /*
@@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
                  * valid event later, merely don't write CTL0.
                  */
  
-               if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
+               if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
                         mce_banks[0].init = 0;
  
                 /*
@@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
         if (!mce_available(c))
                 return;
  
-       if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
+       __mcheck_cpu_cap_init();
+
+       if (__mcheck_cpu_apply_quirks(c) < 0) {
                 mca_cfg.disabled = 1;
                 return;
         }
@@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
         __mcheck_cpu_init_generic();
         __mcheck_cpu_init_vendor(c);
         __mcheck_cpu_init_clear_banks();
+       __mcheck_cpu_check_banks();
         __mcheck_cpu_setup_timer();
  }
  
@@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg)
  
  void mce_disable_bank(int bank)
  {
-       if (bank >= mca_cfg.banks) {
+       if (bank >= this_cpu_read(mce_num_banks)) {
                 pr_warn(FW_BUG
                         "Ignoring request to disable invalid MCA bank %d.\n",
                         bank);
@@ -1949,9 +1999,10 @@ int __init mcheck_init(void)
   */
  static void mce_disable_error_reporting(void)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
         int i;
  
-       for (i = 0; i < mca_cfg.banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 struct mce_bank *b = &mce_banks[i];
  
                 if (b->init)
@@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = {
  
  DEFINE_PER_CPU(struct device *, mce_device);
  
-static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
+static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
  {
-       return container_of(attr, struct mce_bank, attr);
+       return container_of(attr, struct mce_bank_dev, attr);
  }
  
  static ssize_t show_bank(struct device *s, struct device_attribute *attr,
                          char *buf)
  {
-       return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
+       u8 bank = attr_to_bank(attr)->bank;
+       struct mce_bank *b;
+
+       if (bank >= per_cpu(mce_num_banks, s->id))
+               return -EINVAL;
+
+       b = &per_cpu(mce_banks_array, s->id)[bank];
+
+       if (!b->init)
+               return -ENODEV;
+
+       return sprintf(buf, "%llx\n", b->ctl);
  }
  
  static ssize_t set_bank(struct device *s, struct device_attribute *attr,
                         const char *buf, size_t size)
  {
+       u8 bank = attr_to_bank(attr)->bank;
+       struct mce_bank *b;
         u64 new;
  
         if (kstrtou64(buf, 0, &new) < 0)
                 return -EINVAL;
  
-       attr_to_bank(attr)->ctl = new;
+       if (bank >= per_cpu(mce_num_banks, s->id))
+               return -EINVAL;
+
+       b = &per_cpu(mce_banks_array, s->id)[bank];
+
+       if (!b->init)
+               return -ENODEV;
+
+       b->ctl = new;
         mce_restart();
  
         return size;
@@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev)
         kfree(dev);
  }
  
-/* Per cpu device init. All of the cpus still share the same ctrl bank: */
+/* Per CPU device init. All of the CPUs still share the same bank device: */
  static int mce_device_create(unsigned int cpu)
  {
         struct device *dev;
@@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu)
                 if (err)
                         goto error;
         }
-       for (j = 0; j < mca_cfg.banks; j++) {
-               err = device_create_file(dev, &mce_banks[j].attr);
+       for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
+               err = device_create_file(dev, &mce_bank_devs[j].attr);
                 if (err)
                         goto error2;
         }
@@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu)
         return 0;
  error2:
         while (--j >= 0)
-               device_remove_file(dev, &mce_banks[j].attr);
+               device_remove_file(dev, &mce_bank_devs[j].attr);
  error:
         while (--i >= 0)
                 device_remove_file(dev, mce_device_attrs[i]);
@@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu)
         for (i = 0; mce_device_attrs[i]; i++)
                 device_remove_file(dev, mce_device_attrs[i]);
  
-       for (i = 0; i < mca_cfg.banks; i++)
-               device_remove_file(dev, &mce_banks[i].attr);
+       for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
+               device_remove_file(dev, &mce_bank_devs[i].attr);
  
         device_unregister(dev);
         cpumask_clear_cpu(cpu, mce_device_initialized);
@@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void)
  
  static void mce_reenable_cpu(void)
  {
+       struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
         int i;
  
         if (!mce_available(raw_cpu_ptr(&cpu_info)))
@@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void)
  
         if (!cpuhp_tasks_frozen)
                 cmci_reenable();
-       for (i = 0; i < mca_cfg.banks; i++) {
+       for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
                 struct mce_bank *b = &mce_banks[i];
  
                 if (b->init)
@@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void)
  {
         int i;
  
-       for (i = 0; i < mca_cfg.banks; i++) {
-               struct mce_bank *b = &mce_banks[i];
+       for (i = 0; i < MAX_NR_BANKS; i++) {
+               struct mce_bank_dev *b = &mce_bank_devs[i];
                 struct device_attribute *a = &b->attr;
  
+               b->bank = i;
+
                 sysfs_attr_init(&a->attr);
                 a->attr.name    = b->attrname;
                 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
@@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val)
  DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
                          "%llu\n");
  
-static int __init mcheck_debugfs_init(void)
+static void __init mcheck_debugfs_init(void)
  {
-       struct dentry *dmce, *ffake_panic;
+       struct dentry *dmce;
  
         dmce = mce_get_debugfs_dir();
-       if (!dmce)
-               return -ENOMEM;
-       ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
-                                                NULL, &fake_panic_fops);
-       if (!ffake_panic)
-               return -ENOMEM;
-
-       return 0;
+       debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
+                                  &fake_panic_fops);
  }
  #else
-static int __init mcheck_debugfs_init(void) { return -EINVAL; }
+static void __init mcheck_debugfs_init(void) { }
  #endif
  
  DEFINE_STATIC_KEY_FALSE(mcsafe_key);
@@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
  
  static int __init mcheck_late_init(void)
  {
-       pr_info("Using %d MCE banks\n", mca_cfg.banks);
-
         if (mca_cfg.recovery)
                 static_branch_inc(&mcsafe_key);
  
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c

index 5d108f70f3151d4ddf805092106170958d64479f..1f30117b24ba7af6220fb8a5001f46b9a536a7db 100644 (file)
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -645,7 +645,6 @@ static const struct file_operations readme_fops = {
  
  static struct dfs_node {
         char *name;
-       struct dentry *d;
         const struct file_operations *fops;
         umode_t perm;
  } dfs_fls[] = {
@@ -659,49 +658,23 @@ static struct dfs_node {
         { .name = "README",     .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
  };
  
-static int __init debugfs_init(void)
+static void __init debugfs_init(void)
  {
         unsigned int i;
  
         dfs_inj = debugfs_create_dir("mce-inject", NULL);
-       if (!dfs_inj)
-               return -EINVAL;
-
-       for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
-               dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
-                                                   dfs_fls[i].perm,
-                                                   dfs_inj,
-                                                   &i_mce,
-                                                   dfs_fls[i].fops);
-
-               if (!dfs_fls[i].d)
-                       goto err_dfs_add;
-       }
-
-       return 0;
-
-err_dfs_add:
-       while (i-- > 0)
-               debugfs_remove(dfs_fls[i].d);
  
-       debugfs_remove(dfs_inj);
-       dfs_inj = NULL;
-
-       return -ENODEV;
+       for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
+               debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
+                                   &i_mce, dfs_fls[i].fops);
  }
  
  static int __init inject_init(void)
  {
-       int err;
-
         if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
                 return -ENOMEM;
  
-       err = debugfs_init();
-       if (err) {
-               free_cpumask_var(mce_inject_cpumask);
-               return err;
-       }
+       debugfs_init();
  
         register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
         mce_register_injector_chain(&inject_nb);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h

index a34b55baa7aa8e4eb3ac69ebc11a2cc10e4da278..43031db429d2498eb748fb9fff80e9d9576c8ce5 100644 (file)
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -22,17 +22,8 @@ enum severity_level {
  
  extern struct blocking_notifier_head x86_mce_decoder_chain;
  
-#define ATTR_LEN               16
  #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
  
-/* One object for each MCE bank, shared by all CPUs */
-struct mce_bank {
-       u64                     ctl;                    /* subevents to enable */
-       unsigned char init;                             /* initialise bank? */
-       struct device_attribute attr;                   /* device attribute */
-       char                    attrname[ATTR_LEN];     /* attribute name */
-};
-
  struct mce_evt_llist {
         struct llist_node llnode;
         struct mce mce;
@@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void);
  extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
  struct dentry *mce_get_debugfs_dir(void);
  
-extern struct mce_bank *mce_banks;
  extern mce_banks_t mce_banks_ce_disabled;
  
  #ifdef CONFIG_X86_MCE_INTEL
@@ -128,7 +118,6 @@ struct mca_config {
               bios_cmci_threshold       : 1,
               __reserved                : 59;
  
-       u8 banks;
         s8 bootlog;
         int tolerant;
         int monarch_timeout;
@@ -137,6 +126,7 @@ struct mca_config {
  };
  
  extern struct mca_config mca_cfg;
+DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
  
  struct mce_vendor_flags {
         /*
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c

index 2d33a26d257ec4101f47743a4a458a5a3cb902d9..210f1f5db5f75e3f320002a9b6660c62b7806683 100644 (file)
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = {
  
  static int __init severities_debugfs_init(void)
  {
-       struct dentry *dmce, *fsev;
+       struct dentry *dmce;
  
         dmce = mce_get_debugfs_dir();
-       if (!dmce)
-               goto err_out;
-
-       fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
-                                  &severities_coverage_fops);
-       if (!fsev)
-               goto err_out;
  
+       debugfs_create_file("severities-coverage", 0444, dmce, NULL,
+                           &severities_coverage_fops);
         return 0;
-
-err_out:
-       return -ENOMEM;
  }
  late_initcall(severities_debugfs_init);
  #endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig

index a9c3db1252221aa710d8e81652488ca8059aaf0a..9ad6842de4b430bf62f13fb2fcba7b303d1b7447 100644 (file)
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -11,3 +11,13 @@ config RAS_CEC
  
           Bear in mind that this is absolutely useless if your platform doesn't
           have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
+
+config RAS_CEC_DEBUG
+       bool "CEC debugging machinery"
+       default n
+       depends on RAS_CEC
+       help
+         Add extra files to (debugfs)/ras/cec to test the correctable error
+         collector feature. "pfn" is a writable file that allows user to
+         simulate an error in a particular page frame. "array" is a read-only
+         file that dumps out the current state of all pages logged so far.
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c

index 673f8a128397e4d3a9080fd7dcc82f8b0b5f35ef..5d545806d930372ce02c5637d230b77e03dd2f5a 100644 (file)
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -1,4 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019 Borislav Petkov, SUSE Labs.
+ */
  #include <linux/mm.h>
  #include <linux/gfp.h>
  #include <linux/kernel.h>
@@ -37,9 +40,9 @@
   * thus emulate an an LRU-like behavior when deleting elements to free up space
   * in the page.
   *
- * When an element reaches it's max count of count_threshold, we try to poison
- * it by assuming that errors triggered count_threshold times in a single page
- * are excessive and that page shouldn't be used anymore. count_threshold is
+ * When an element reaches it's max count of action_threshold, we try to poison
+ * it by assuming that errors triggered action_threshold times in a single page
+ * are excessive and that page shouldn't be used anymore. action_threshold is
   * initialized to COUNT_MASK which is the maximum.
   *
   * That error event entry causes cec_add_elem() to return !0 value and thus
@@ -122,7 +125,7 @@ static DEFINE_MUTEX(ce_mutex);
  static u64 dfs_pfn;
  
  /* Amount of errors after which we offline */
-static unsigned int count_threshold = COUNT_MASK;
+static u64 action_threshold = COUNT_MASK;
  
  /* Each element "decays" each decay_interval which is 24hrs by default. */
  #define CEC_DECAY_DEFAULT_INTERVAL     24 * 60 * 60    /* 24 hrs */
@@ -276,11 +279,39 @@ static u64 __maybe_unused del_lru_elem(void)
         return pfn;
  }
  
+static bool sanity_check(struct ce_array *ca)
+{
+       bool ret = false;
+       u64 prev = 0;
+       int i;
+
+       for (i = 0; i < ca->n; i++) {
+               u64 this = PFN(ca->array[i]);
+
+               if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this))
+                       ret = true;
+
+               prev = this;
+       }
+
+       if (!ret)
+               return ret;
+
+       pr_info("Sanity check dump:\n{ n: %d\n", ca->n);
+       for (i = 0; i < ca->n; i++) {
+               u64 this = PFN(ca->array[i]);
+
+               pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
+       }
+       pr_info("}\n");
+
+       return ret;
+}
  
  int cec_add_elem(u64 pfn)
  {
         struct ce_array *ca = &ce_arr;
-       unsigned int to;
+       unsigned int to = 0;
         int count, ret = 0;
  
         /*
@@ -294,6 +325,7 @@ int cec_add_elem(u64 pfn)
  
         ca->ces_entered++;
  
+       /* Array full, free the LRU slot. */
         if (ca->n == MAX_ELEMS)
                 WARN_ON(!del_lru_elem_unlocked(ca));
  
@@ -306,24 +338,17 @@ int cec_add_elem(u64 pfn)
                         (void *)&ca->array[to],
                         (ca->n - to) * sizeof(u64));
  
-               ca->array[to] = (pfn << PAGE_SHIFT) |
-                               (DECAY_MASK << COUNT_BITS) | 1;
-
+               ca->array[to] = pfn << PAGE_SHIFT;
                 ca->n++;
-
-               ret = 0;
-
-               goto decay;
         }
  
-       count = COUNT(ca->array[to]);
-
-       if (count < count_threshold) {
-               ca->array[to] |= (DECAY_MASK << COUNT_BITS);
-               ca->array[to]++;
+       /* Add/refresh element generation and increment count */
+       ca->array[to] |= DECAY_MASK << COUNT_BITS;
+       ca->array[to]++;
  
-               ret = 0;
-       } else {
+       /* Check action threshold and soft-offline, if reached. */
+       count = COUNT(ca->array[to]);
+       if (count >= action_threshold) {
                 u64 pfn = ca->array[to] >> PAGE_SHIFT;
  
                 if (!pfn_valid(pfn)) {
@@ -338,20 +363,21 @@ int cec_add_elem(u64 pfn)
                 del_elem(ca, to);
  
                 /*
-                * Return a >0 value to denote that we've reached the offlining
-                * threshold.
+                * Return a >0 value to callers, to denote that we've reached
+                * the offlining threshold.
                  */
                 ret = 1;
  
                 goto unlock;
         }
  
-decay:
         ca->decay_count++;
  
         if (ca->decay_count >= CLEAN_ELEMS)
                 do_spring_cleaning(ca);
  
+       WARN_ON_ONCE(sanity_check(ca));
+
  unlock:
         mutex_unlock(&ce_mutex);
  
@@ -369,45 +395,48 @@ static int pfn_set(void *data, u64 val)
  {
         *(u64 *)data = val;
  
-       return cec_add_elem(val);
+       cec_add_elem(val);
+
+       return 0;
  }
  
  DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n");
  
  static int decay_interval_set(void *data, u64 val)
  {
-       *(u64 *)data = val;
-
         if (val < CEC_DECAY_MIN_INTERVAL)
                 return -EINVAL;
  
         if (val > CEC_DECAY_MAX_INTERVAL)
                 return -EINVAL;
  
+       *(u64 *)data   = val;
         decay_interval = val;
  
         cec_mod_work(decay_interval);
+
         return 0;
  }
  DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
  
-static int count_threshold_set(void *data, u64 val)
+static int action_threshold_set(void *data, u64 val)
  {
         *(u64 *)data = val;
  
         if (val > COUNT_MASK)
                 val = COUNT_MASK;
  
-       count_threshold = val;
+       action_threshold = val;
  
         return 0;
  }
-DEFINE_DEBUGFS_ATTRIBUTE(count_threshold_ops, u64_get, count_threshold_set, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n");
+
+static const char * const bins[] = { "00", "01", "10", "11" };
  
  static int array_dump(struct seq_file *m, void *v)
  {
         struct ce_array *ca = &ce_arr;
-       u64 prev = 0;
         int i;
  
         mutex_lock(&ce_mutex);
@@ -416,11 +445,8 @@ static int array_dump(struct seq_file *m, void *v)
         for (i = 0; i < ca->n; i++) {
                 u64 this = PFN(ca->array[i]);
  
-               seq_printf(m, " %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
-
-               WARN_ON(prev > this);
-
-               prev = this;
+               seq_printf(m, " %3d: [%016llx|%s|%03llx]\n",
+                          i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i]));
         }
  
         seq_printf(m, "}\n");
@@ -433,7 +459,7 @@ static int array_dump(struct seq_file *m, void *v)
         seq_printf(m, "Decay interval: %lld seconds\n", decay_interval);
         seq_printf(m, "Decays: %lld\n", ca->decays_done);
  
-       seq_printf(m, "Action threshold: %d\n", count_threshold);
+       seq_printf(m, "Action threshold: %lld\n", action_threshold);
  
         mutex_unlock(&ce_mutex);
  
@@ -463,18 +489,6 @@ static int __init create_debugfs_nodes(void)
                 return -1;
         }
  
-       pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
-       if (!pfn) {
-               pr_warn("Error creating pfn debugfs node!\n");
-               goto err;
-       }
-
-       array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
-       if (!array) {
-               pr_warn("Error creating array debugfs node!\n");
-               goto err;
-       }
-
         decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
                                     &decay_interval, &decay_interval_ops);
         if (!decay) {
@@ -482,13 +496,27 @@ static int __init create_debugfs_nodes(void)
                 goto err;
         }
  
-       count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d,
-                                   &count_threshold, &count_threshold_ops);
+       count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d,
+                                   &action_threshold, &action_threshold_ops);
         if (!count) {
-               pr_warn("Error creating count_threshold debugfs node!\n");
+               pr_warn("Error creating action_threshold debugfs node!\n");
+               goto err;
+       }
+
+       if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG))
+               return 0;
+
+       pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
+       if (!pfn) {
+               pr_warn("Error creating pfn debugfs node!\n");
                 goto err;
         }
  
+       array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
+       if (!array) {
+               pr_warn("Error creating array debugfs node!\n");
+               goto err;
+       }
  
         return 0;
  
@@ -509,8 +537,10 @@ void __init cec_init(void)
                 return;
         }
  
-       if (create_debugfs_nodes())
+       if (create_debugfs_nodes()) {
+               free_page((unsigned long)ce_arr.array);
                 return;
+       }
  
         INIT_DELAYED_WORK(&cec_work, cec_work_fn);
         schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 8 Jul 2019 23:31:06 +0000 (16:31 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 8 Jul 2019 23:31:06 +0000 (16:31 -0700)
arch/x86/kernel/cpu/mce/amd.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mce/core.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mce/inject.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mce/internal.h		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mce/severity.c		patch \| blob \| blame \| history
arch/x86/ras/Kconfig		patch \| blob \| blame \| history
drivers/ras/cec.c		patch \| blob \| blame \| history