]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86_64/kernel/mce.c
[PATCH] x86-64: Fix kobject_init() WARN_ON on resume from disk
[mirror_ubuntu-bionic-kernel.git] / arch / x86_64 / kernel / mce.c
CommitLineData
1da177e4
LT
1/*
2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it.
6 */
7
8#include <linux/init.h>
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/string.h>
13#include <linux/rcupdate.h>
14#include <linux/kallsyms.h>
15#include <linux/sysdev.h>
16#include <linux/miscdevice.h>
17#include <linux/fs.h>
a9415644 18#include <linux/capability.h>
91c6d400
AK
19#include <linux/cpu.h>
20#include <linux/percpu.h>
8c566ef5 21#include <linux/ctype.h>
1da177e4
LT
22#include <asm/processor.h>
23#include <asm/msr.h>
24#include <asm/mce.h>
25#include <asm/kdebug.h>
26#include <asm/uaccess.h>
0a9c3ee7 27#include <asm/smp.h>
1da177e4
LT
28
29#define MISC_MCELOG_MINOR 227
73ca5358 30#define NR_BANKS 6
1da177e4 31
553f265f
AK
32atomic_t mce_entry;
33
1da177e4
LT
34static int mce_dont_init;
35
36/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
37 3: never panic or exit (for testing only) */
38static int tolerant = 1;
39static int banks;
40static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
41static unsigned long console_logged;
42static int notify_user;
94ad8474 43static int rip_msr;
e583538f 44static int mce_bootlog = 1;
1da177e4
LT
45
46/*
47 * Lockless MCE logging infrastructure.
48 * This avoids deadlocks on printk locks without having to break locks. Also
49 * separate MCEs from kernel messages to avoid bogus bug reports.
50 */
51
52struct mce_log mcelog = {
53 MCE_LOG_SIGNATURE,
54 MCE_LOG_LEN,
55};
56
57void mce_log(struct mce *mce)
58{
59 unsigned next, entry;
60 mce->finished = 0;
7644143c 61 wmb();
1da177e4
LT
62 for (;;) {
63 entry = rcu_dereference(mcelog.next);
7644143c
MW
64 /* The rmb forces the compiler to reload next in each
65 iteration */
66 rmb();
673242c1
AK
67 for (;;) {
68 /* When the buffer fills up discard new entries. Assume
69 that the earlier errors are the more interesting. */
70 if (entry >= MCE_LOG_LEN) {
71 set_bit(MCE_OVERFLOW, &mcelog.flags);
72 return;
73 }
74 /* Old left over entry. Skip. */
75 if (mcelog.entry[entry].finished) {
76 entry++;
77 continue;
78 }
7644143c 79 break;
1da177e4 80 }
1da177e4
LT
81 smp_rmb();
82 next = entry + 1;
83 if (cmpxchg(&mcelog.next, entry, next) == entry)
84 break;
85 }
86 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
7644143c 87 wmb();
1da177e4 88 mcelog.entry[entry].finished = 1;
7644143c 89 wmb();
1da177e4
LT
90
91 if (!test_and_set_bit(0, &console_logged))
92 notify_user = 1;
93}
94
95static void print_mce(struct mce *m)
96{
97 printk(KERN_EMERG "\n"
4855170f 98 KERN_EMERG "HARDWARE ERROR\n"
1da177e4
LT
99 KERN_EMERG
100 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
101 m->cpu, m->mcgstatus, m->bank, m->status);
102 if (m->rip) {
103 printk(KERN_EMERG
104 "RIP%s %02x:<%016Lx> ",
105 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
106 m->cs, m->rip);
107 if (m->cs == __KERNEL_CS)
108 print_symbol("{%s}", m->rip);
109 printk("\n");
110 }
111 printk(KERN_EMERG "TSC %Lx ", m->tsc);
112 if (m->addr)
113 printk("ADDR %Lx ", m->addr);
114 if (m->misc)
115 printk("MISC %Lx ", m->misc);
116 printk("\n");
4855170f
AK
117 printk(KERN_EMERG "This is not a software problem!\n");
118 printk(KERN_EMERG
119 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
1da177e4
LT
120}
121
122static void mce_panic(char *msg, struct mce *backup, unsigned long start)
123{
124 int i;
125 oops_begin();
126 for (i = 0; i < MCE_LOG_LEN; i++) {
127 unsigned long tsc = mcelog.entry[i].tsc;
128 if (time_before(tsc, start))
129 continue;
130 print_mce(&mcelog.entry[i]);
131 if (backup && mcelog.entry[i].tsc == backup->tsc)
132 backup = NULL;
133 }
134 if (backup)
135 print_mce(backup);
136 if (tolerant >= 3)
137 printk("Fake panic: %s\n", msg);
138 else
139 panic(msg);
140}
141
142static int mce_available(struct cpuinfo_x86 *c)
143{
3d1712c9 144 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
1da177e4
LT
145}
146
94ad8474
AK
147static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
148{
149 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
150 m->rip = regs->rip;
151 m->cs = regs->cs;
152 } else {
153 m->rip = 0;
154 m->cs = 0;
155 }
156 if (rip_msr) {
157 /* Assume the RIP in the MSR is exact. Is this true? */
158 m->mcgstatus |= MCG_STATUS_EIPV;
159 rdmsrl(rip_msr, m->rip);
160 m->cs = 0;
161 }
162}
163
1da177e4
LT
164/*
165 * The actual machine check handler
166 */
167
168void do_machine_check(struct pt_regs * regs, long error_code)
169{
170 struct mce m, panicm;
171 int nowayout = (tolerant < 1);
172 int kill_it = 0;
173 u64 mcestart = 0;
174 int i;
175 int panicm_found = 0;
176
553f265f
AK
177 atomic_inc(&mce_entry);
178
1da177e4 179 if (regs)
6e3f3617 180 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
1da177e4 181 if (!banks)
553f265f 182 goto out2;
1da177e4
LT
183
184 memset(&m, 0, sizeof(struct mce));
151f8cc1 185 m.cpu = smp_processor_id();
1da177e4
LT
186 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
187 if (!(m.mcgstatus & MCG_STATUS_RIPV))
188 kill_it = 1;
189
190 rdtscll(mcestart);
191 barrier();
192
193 for (i = 0; i < banks; i++) {
194 if (!bank[i])
195 continue;
196
197 m.misc = 0;
198 m.addr = 0;
199 m.bank = i;
200 m.tsc = 0;
201
202 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
203 if ((m.status & MCI_STATUS_VAL) == 0)
204 continue;
205
206 if (m.status & MCI_STATUS_EN) {
207 /* In theory _OVER could be a nowayout too, but
208 assume any overflowed errors were no fatal. */
209 nowayout |= !!(m.status & MCI_STATUS_PCC);
210 kill_it |= !!(m.status & MCI_STATUS_UC);
211 }
212
213 if (m.status & MCI_STATUS_MISCV)
214 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
215 if (m.status & MCI_STATUS_ADDRV)
216 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
217
94ad8474 218 mce_get_rip(&m, regs);
d5172f26 219 if (error_code >= 0)
1da177e4
LT
220 rdtscll(m.tsc);
221 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
d5172f26
AK
222 if (error_code != -2)
223 mce_log(&m);
1da177e4
LT
224
225 /* Did this bank cause the exception? */
226 /* Assume that the bank with uncorrectable errors did it,
227 and that there is only a single one. */
228 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
229 panicm = m;
230 panicm_found = 1;
231 }
232
9f158333 233 add_taint(TAINT_MACHINE_CHECK);
1da177e4
LT
234 }
235
236 /* Never do anything final in the polling timer */
237 if (!regs)
238 goto out;
239
240 /* If we didn't find an uncorrectable error, pick
241 the last one (shouldn't happen, just being safe). */
242 if (!panicm_found)
243 panicm = m;
244 if (nowayout)
245 mce_panic("Machine check", &panicm, mcestart);
246 if (kill_it) {
247 int user_space = 0;
248
249 if (m.mcgstatus & MCG_STATUS_RIPV)
250 user_space = panicm.rip && (panicm.cs & 3);
251
252 /* When the machine was in user space and the CPU didn't get
253 confused it's normally not necessary to panic, unless you
254 are paranoid (tolerant == 0)
255
256 RED-PEN could be more tolerant for MCEs in idle,
257 but most likely they occur at boot anyways, where
258 it is best to just halt the machine. */
259 if ((!user_space && (panic_on_oops || tolerant < 2)) ||
260 (unsigned)current->pid <= 1)
261 mce_panic("Uncorrected machine check", &panicm, mcestart);
262
263 /* do_exit takes an awful lot of locks and has as
264 slight risk of deadlocking. If you don't want that
265 don't set tolerant >= 2 */
266 if (tolerant < 3)
267 do_exit(SIGBUS);
268 }
269
270 out:
271 /* Last thing done in the machine check exception to clear state. */
272 wrmsrl(MSR_IA32_MCG_STATUS, 0);
553f265f
AK
273 out2:
274 atomic_dec(&mce_entry);
1da177e4
LT
275}
276
15d5f839
DZ
277#ifdef CONFIG_X86_MCE_INTEL
278/***
279 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
280 * @cpu: The CPU on which the event occured.
281 * @status: Event status information
282 *
283 * This function should be called by the thermal interrupt after the
284 * event has been processed and the decision was made to log the event
285 * further.
286 *
287 * The status parameter will be saved to the 'status' field of 'struct mce'
288 * and historically has been the register value of the
289 * MSR_IA32_THERMAL_STATUS (Intel) msr.
290 */
291void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
292{
293 struct mce m;
294
295 memset(&m, 0, sizeof(m));
296 m.cpu = cpu;
297 m.bank = MCE_THERMAL_BANK;
298 m.status = status;
299 rdtscll(m.tsc);
300 mce_log(&m);
301}
302#endif /* CONFIG_X86_MCE_INTEL */
303
1da177e4
LT
304/*
305 * Periodic polling timer for "silent" machine check errors.
306 */
307
308static int check_interval = 5 * 60; /* 5 minutes */
309static void mcheck_timer(void *data);
310static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
311
312static void mcheck_check_cpu(void *info)
313{
314 if (mce_available(&current_cpu_data))
315 do_machine_check(NULL, 0);
316}
317
318static void mcheck_timer(void *data)
319{
320 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
321 schedule_delayed_work(&mcheck_work, check_interval * HZ);
322
323 /*
324 * It's ok to read stale data here for notify_user and
325 * console_logged as we'll simply get the updated versions
326 * on the next mcheck_timer execution and atomic operations
327 * on console_logged act as synchronization for notify_user
328 * writes.
329 */
330 if (notify_user && console_logged) {
331 notify_user = 0;
332 clear_bit(0, &console_logged);
333 printk(KERN_INFO "Machine check events logged\n");
334 }
335}
336
337
338static __init int periodic_mcheck_init(void)
339{
340 if (check_interval)
341 schedule_delayed_work(&mcheck_work, check_interval*HZ);
342 return 0;
343}
344__initcall(periodic_mcheck_init);
345
346
347/*
348 * Initialize Machine Checks for a CPU.
349 */
350static void mce_init(void *dummy)
351{
352 u64 cap;
353 int i;
354
355 rdmsrl(MSR_IA32_MCG_CAP, cap);
356 banks = cap & 0xff;
357 if (banks > NR_BANKS) {
358 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
359 banks = NR_BANKS;
360 }
94ad8474
AK
361 /* Use accurate RIP reporting if available. */
362 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
363 rip_msr = MSR_IA32_MCG_EIP;
1da177e4
LT
364
365 /* Log the machine checks left over from the previous reset.
366 This also clears all registers */
d5172f26 367 do_machine_check(NULL, mce_bootlog ? -1 : -2);
1da177e4
LT
368
369 set_in_cr4(X86_CR4_MCE);
370
371 if (cap & MCG_CTL_P)
372 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
373
374 for (i = 0; i < banks; i++) {
375 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
376 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
377 }
378}
379
380/* Add per CPU specific workarounds here */
e6982c67 381static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1da177e4
LT
382{
383 /* This should be disabled by the BIOS, but isn't always */
384 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
385 /* disable GART TBL walk error reporting, which trips off
386 incorrectly with the IOMMU & 3ware & Cerberus. */
387 clear_bit(10, &bank[4]);
e583538f
AK
388 /* Lots of broken BIOS around that don't clear them
389 by default and leave crap in there. Don't log. */
390 mce_bootlog = 0;
1da177e4 391 }
e583538f 392
1da177e4
LT
393}
394
e6982c67 395static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
1da177e4
LT
396{
397 switch (c->x86_vendor) {
398 case X86_VENDOR_INTEL:
399 mce_intel_feature_init(c);
400 break;
89b831ef
JS
401 case X86_VENDOR_AMD:
402 mce_amd_feature_init(c);
403 break;
1da177e4
LT
404 default:
405 break;
406 }
407}
408
409/*
410 * Called for each booted CPU to set up machine checks.
411 * Must be called with preempt off.
412 */
e6982c67 413void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1da177e4 414{
7ded5689 415 static cpumask_t mce_cpus = CPU_MASK_NONE;
1da177e4
LT
416
417 mce_cpu_quirks(c);
418
419 if (mce_dont_init ||
420 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
421 !mce_available(c))
422 return;
423
424 mce_init(NULL);
425 mce_cpu_features(c);
426}
427
428/*
429 * Character device to read and clear the MCE log.
430 */
431
432static void collect_tscs(void *data)
433{
434 unsigned long *cpu_tsc = (unsigned long *)data;
435 rdtscll(cpu_tsc[smp_processor_id()]);
436}
437
438static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
439{
f0de53bb 440 unsigned long *cpu_tsc;
1da177e4
LT
441 static DECLARE_MUTEX(mce_read_sem);
442 unsigned next;
443 char __user *buf = ubuf;
444 int i, err;
445
f0de53bb
AK
446 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
447 if (!cpu_tsc)
448 return -ENOMEM;
449
1da177e4
LT
450 down(&mce_read_sem);
451 next = rcu_dereference(mcelog.next);
452
453 /* Only supports full reads right now */
454 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
455 up(&mce_read_sem);
f0de53bb 456 kfree(cpu_tsc);
1da177e4
LT
457 return -EINVAL;
458 }
459
460 err = 0;
673242c1
AK
461 for (i = 0; i < next; i++) {
462 unsigned long start = jiffies;
463 while (!mcelog.entry[i].finished) {
464 if (!time_before(jiffies, start + 2)) {
465 memset(mcelog.entry + i,0, sizeof(struct mce));
466 continue;
467 }
468 cpu_relax();
469 }
1da177e4
LT
470 smp_rmb();
471 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
472 buf += sizeof(struct mce);
473 }
474
475 memset(mcelog.entry, 0, next * sizeof(struct mce));
476 mcelog.next = 0;
477
b2b18660 478 synchronize_sched();
1da177e4
LT
479
480 /* Collect entries that were still getting written before the synchronize. */
481
482 on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
483 for (i = next; i < MCE_LOG_LEN; i++) {
484 if (mcelog.entry[i].finished &&
485 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
486 err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
487 smp_rmb();
488 buf += sizeof(struct mce);
489 memset(&mcelog.entry[i], 0, sizeof(struct mce));
490 }
491 }
492 up(&mce_read_sem);
f0de53bb 493 kfree(cpu_tsc);
1da177e4
LT
494 return err ? -EFAULT : buf - ubuf;
495}
496
497static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
498{
499 int __user *p = (int __user *)arg;
500 if (!capable(CAP_SYS_ADMIN))
501 return -EPERM;
502 switch (cmd) {
503 case MCE_GET_RECORD_LEN:
504 return put_user(sizeof(struct mce), p);
505 case MCE_GET_LOG_LEN:
506 return put_user(MCE_LOG_LEN, p);
507 case MCE_GETCLEAR_FLAGS: {
508 unsigned flags;
509 do {
510 flags = mcelog.flags;
511 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
512 return put_user(flags, p);
513 }
514 default:
515 return -ENOTTY;
516 }
517}
518
519static struct file_operations mce_chrdev_ops = {
520 .read = mce_read,
521 .ioctl = mce_ioctl,
522};
523
524static struct miscdevice mce_log_device = {
525 MISC_MCELOG_MINOR,
526 "mcelog",
527 &mce_chrdev_ops,
528};
529
530/*
531 * Old style boot options parsing. Only for compatibility.
532 */
533
534static int __init mcheck_disable(char *str)
535{
536 mce_dont_init = 1;
9b41046c 537 return 1;
1da177e4
LT
538}
539
540/* mce=off disables machine check. Note you can reenable it later
d5172f26 541 using sysfs.
8c566ef5 542 mce=TOLERANCELEVEL (number, see above)
e583538f
AK
543 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
544 mce=nobootlog Don't log MCEs from before booting. */
1da177e4
LT
545static int __init mcheck_enable(char *str)
546{
d5172f26
AK
547 if (*str == '=')
548 str++;
1da177e4
LT
549 if (!strcmp(str, "off"))
550 mce_dont_init = 1;
e583538f
AK
551 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
552 mce_bootlog = str[0] == 'b';
8c566ef5
AK
553 else if (isdigit(str[0]))
554 get_option(&str, &tolerant);
1da177e4
LT
555 else
556 printk("mce= argument %s ignored. Please use /sys", str);
9b41046c 557 return 1;
1da177e4
LT
558}
559
560__setup("nomce", mcheck_disable);
561__setup("mce", mcheck_enable);
562
563/*
564 * Sysfs support
565 */
566
413588c7
AK
567/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
568 Only one CPU is active at this time, the others get readded later using
569 CPU hotplug. */
1da177e4
LT
570static int mce_resume(struct sys_device *dev)
571{
413588c7 572 mce_init(NULL);
1da177e4
LT
573 return 0;
574}
575
576/* Reinit MCEs after user configuration changes */
577static void mce_restart(void)
578{
579 if (check_interval)
580 cancel_delayed_work(&mcheck_work);
581 /* Timer race is harmless here */
582 on_each_cpu(mce_init, NULL, 1, 1);
583 if (check_interval)
584 schedule_delayed_work(&mcheck_work, check_interval*HZ);
585}
586
587static struct sysdev_class mce_sysclass = {
588 .resume = mce_resume,
589 set_kset_name("machinecheck"),
590};
591
fff2e89f 592DEFINE_PER_CPU(struct sys_device, device_mce);
1da177e4
LT
593
594/* Why are there no generic functions for this? */
595#define ACCESSOR(name, var, start) \
596 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
597 return sprintf(buf, "%lx\n", (unsigned long)var); \
598 } \
599 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
600 char *end; \
601 unsigned long new = simple_strtoul(buf, &end, 0); \
602 if (end == buf) return -EINVAL; \
603 var = new; \
604 start; \
605 return end-buf; \
606 } \
607 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
608
609ACCESSOR(bank0ctl,bank[0],mce_restart())
610ACCESSOR(bank1ctl,bank[1],mce_restart())
611ACCESSOR(bank2ctl,bank[2],mce_restart())
612ACCESSOR(bank3ctl,bank[3],mce_restart())
613ACCESSOR(bank4ctl,bank[4],mce_restart())
73ca5358
SL
614ACCESSOR(bank5ctl,bank[5],mce_restart())
615static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
616 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
617 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
1da177e4
LT
618ACCESSOR(tolerant,tolerant,)
619ACCESSOR(check_interval,check_interval,mce_restart())
620
91c6d400
AK
621/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
622static __cpuinit int mce_create_device(unsigned int cpu)
1da177e4
LT
623{
624 int err;
73ca5358 625 int i;
91c6d400
AK
626 if (!mce_available(&cpu_data[cpu]))
627 return -EIO;
628
629 per_cpu(device_mce,cpu).id = cpu;
630 per_cpu(device_mce,cpu).cls = &mce_sysclass;
631
632 err = sysdev_register(&per_cpu(device_mce,cpu));
633
634 if (!err) {
73ca5358
SL
635 for (i = 0; i < banks; i++)
636 sysdev_create_file(&per_cpu(device_mce,cpu),
637 bank_attributes[i]);
91c6d400
AK
638 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
639 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
640 }
641 return err;
642}
643
644#ifdef CONFIG_HOTPLUG_CPU
be6b5a35 645static void mce_remove_device(unsigned int cpu)
91c6d400 646{
73ca5358
SL
647 int i;
648
649 for (i = 0; i < banks; i++)
650 sysdev_remove_file(&per_cpu(device_mce,cpu),
651 bank_attributes[i]);
91c6d400
AK
652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
653 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
654 sysdev_unregister(&per_cpu(device_mce,cpu));
d4c45718 655 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
91c6d400 656}
91c6d400
AK
657
658/* Get notified when a cpu comes on/off. Be hotplug friendly. */
be6b5a35 659static int
91c6d400
AK
660mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
661{
662 unsigned int cpu = (unsigned long)hcpu;
663
664 switch (action) {
665 case CPU_ONLINE:
666 mce_create_device(cpu);
667 break;
91c6d400
AK
668 case CPU_DEAD:
669 mce_remove_device(cpu);
670 break;
91c6d400
AK
671 }
672 return NOTIFY_OK;
673}
674
be6b5a35 675static struct notifier_block mce_cpu_notifier = {
91c6d400
AK
676 .notifier_call = mce_cpu_callback,
677};
be6b5a35 678#endif
91c6d400
AK
679
680static __init int mce_init_device(void)
681{
682 int err;
683 int i = 0;
684
1da177e4
LT
685 if (!mce_available(&boot_cpu_data))
686 return -EIO;
687 err = sysdev_class_register(&mce_sysclass);
91c6d400
AK
688
689 for_each_online_cpu(i) {
690 mce_create_device(i);
691 }
692
be6b5a35 693 register_hotcpu_notifier(&mce_cpu_notifier);
1da177e4
LT
694 misc_register(&mce_log_device);
695 return err;
1da177e4 696}
91c6d400 697
1da177e4 698device_initcall(mce_init_device);