[mirror_ubuntu-bionic-kernel.git] / arch / x86_64 / kernel / mce.c

/*
 * Machine check handler.
 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
 * Rest from unknown author(s). 
 * 2004 Andi Kleen. Rewrote most of it. 
 */

#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
#include <linux/sysdev.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
#include <linux/ctype.h>
#include <asm/processor.h> 
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/kdebug.h>
#include <asm/uaccess.h>
#include <asm/smp.h>

#define MISC_MCELOG_MINOR 227
#define NR_BANKS 6

atomic_t mce_entry;

static int mce_dont_init;

/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
   3: never panic or exit (for testing only) */
static int tolerant = 1;
static int banks;
static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
static unsigned long console_logged;
static int notify_user;
static int rip_msr;
static int mce_bootlog = 1;

/*
 * Lockless MCE logging infrastructure.
 * This avoids deadlocks on printk locks without having to break locks. Also
 * separate MCEs from kernel messages to avoid bogus bug reports.
 */

struct mce_log mcelog = { 
	MCE_LOG_SIGNATURE,
	MCE_LOG_LEN,
}; 

void mce_log(struct mce *mce)
{
	unsigned next, entry;
	mce->finished = 0;
	wmb();
	for (;;) {
		entry = rcu_dereference(mcelog.next);
		/* The rmb forces the compiler to reload next in each
		    iteration */
		rmb();
		for (;;) {
			/* When the buffer fills up discard new entries. Assume
			   that the earlier errors are the more interesting. */
			if (entry >= MCE_LOG_LEN) {
				set_bit(MCE_OVERFLOW, &mcelog.flags);
				return;
			}
			/* Old left over entry. Skip. */
			if (mcelog.entry[entry].finished) {
				entry++;
				continue;
			}
			break;
		}
		smp_rmb();
		next = entry + 1;
		if (cmpxchg(&mcelog.next, entry, next) == entry)
			break;
	}
	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
	wmb();
	mcelog.entry[entry].finished = 1;
	wmb();

	if (!test_and_set_bit(0, &console_logged))
		notify_user = 1;
}

static void print_mce(struct mce *m)
{
	printk(KERN_EMERG "\n"
	       KERN_EMERG "HARDWARE ERROR\n"
	       KERN_EMERG
	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
	       m->cpu, m->mcgstatus, m->bank, m->status);
	if (m->rip) {
		printk(KERN_EMERG 
		       "RIP%s %02x:<%016Lx> ",
		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
		       m->cs, m->rip);
		if (m->cs == __KERNEL_CS)
			print_symbol("{%s}", m->rip);
		printk("\n");
	}
	printk(KERN_EMERG "TSC %Lx ", m->tsc); 
	if (m->addr)
		printk("ADDR %Lx ", m->addr);
	if (m->misc)
		printk("MISC %Lx ", m->misc); 	
	printk("\n");
	printk(KERN_EMERG "This is not a software problem!\n");
        printk(KERN_EMERG
    "Run through mcelog --ascii to decode and contact your hardware vendor\n");
}

static void mce_panic(char *msg, struct mce *backup, unsigned long start)
{ 
	int i;
	oops_begin();
	for (i = 0; i < MCE_LOG_LEN; i++) {
		unsigned long tsc = mcelog.entry[i].tsc;
		if (time_before(tsc, start))
			continue;
		print_mce(&mcelog.entry[i]); 
		if (backup && mcelog.entry[i].tsc == backup->tsc)
			backup = NULL;
	}
	if (backup)
		print_mce(backup);
	if (tolerant >= 3)
		printk("Fake panic: %s\n", msg);
	else
		panic(msg);
} 

static int mce_available(struct cpuinfo_x86 *c)
{
	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}

static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
{
	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
		m->rip = regs->rip;
		m->cs = regs->cs;
	} else {
		m->rip = 0;
		m->cs = 0;
	}
	if (rip_msr) {
		/* Assume the RIP in the MSR is exact. Is this true? */
		m->mcgstatus |= MCG_STATUS_EIPV;
		rdmsrl(rip_msr, m->rip);
		m->cs = 0;
	}
}

/* 
 * The actual machine check handler
 */

void do_machine_check(struct pt_regs * regs, long error_code)
{
	struct mce m, panicm;
	int nowayout = (tolerant < 1); 
	int kill_it = 0;
	u64 mcestart = 0;
	int i;
	int panicm_found = 0;

	atomic_inc(&mce_entry);

	if (regs)
		notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
	if (!banks)
		goto out2;

	memset(&m, 0, sizeof(struct mce));
	m.cpu = smp_processor_id();
	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
	if (!(m.mcgstatus & MCG_STATUS_RIPV))
		kill_it = 1;
	
	rdtscll(mcestart);
	barrier();

	for (i = 0; i < banks; i++) {
		if (!bank[i])
			continue;
		
		m.misc = 0; 
		m.addr = 0;
		m.bank = i;
		m.tsc = 0;

		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
		if ((m.status & MCI_STATUS_VAL) == 0)
			continue;

		if (m.status & MCI_STATUS_EN) {
			/* In theory _OVER could be a nowayout too, but
			   assume any overflowed errors were no fatal. */
			nowayout |= !!(m.status & MCI_STATUS_PCC);
			kill_it |= !!(m.status & MCI_STATUS_UC);
		}

		if (m.status & MCI_STATUS_MISCV)
			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
		if (m.status & MCI_STATUS_ADDRV)
			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);

		mce_get_rip(&m, regs);
		if (error_code >= 0)
			rdtscll(m.tsc);
		wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
		if (error_code != -2)
			mce_log(&m);

		/* Did this bank cause the exception? */
		/* Assume that the bank with uncorrectable errors did it,
		   and that there is only a single one. */
		if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
			panicm = m;
			panicm_found = 1;
		}

		add_taint(TAINT_MACHINE_CHECK);
	}

	/* Never do anything final in the polling timer */
	if (!regs)
		goto out;

	/* If we didn't find an uncorrectable error, pick
	   the last one (shouldn't happen, just being safe). */
	if (!panicm_found)
		panicm = m;
	if (nowayout)
		mce_panic("Machine check", &panicm, mcestart);
	if (kill_it) {
		int user_space = 0;

		if (m.mcgstatus & MCG_STATUS_RIPV)
			user_space = panicm.rip && (panicm.cs & 3);
		
		/* When the machine was in user space and the CPU didn't get
		   confused it's normally not necessary to panic, unless you 
		   are paranoid (tolerant == 0)

		   RED-PEN could be more tolerant for MCEs in idle,
		   but most likely they occur at boot anyways, where
		   it is best to just halt the machine. */
		if ((!user_space && (panic_on_oops || tolerant < 2)) ||
		    (unsigned)current->pid <= 1)
			mce_panic("Uncorrected machine check", &panicm, mcestart);

		/* do_exit takes an awful lot of locks and has as
		   slight risk of deadlocking. If you don't want that
		   don't set tolerant >= 2 */
		if (tolerant < 3)
			do_exit(SIGBUS);
	}

 out:
	/* Last thing done in the machine check exception to clear state. */
	wrmsrl(MSR_IA32_MCG_STATUS, 0);
 out2:
	atomic_dec(&mce_entry);
}

#ifdef CONFIG_X86_MCE_INTEL
/***
 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
 * @cpu: The CPU on which the event occured.
 * @status: Event status information
 *
 * This function should be called by the thermal interrupt after the
 * event has been processed and the decision was made to log the event
 * further.
 *
 * The status parameter will be saved to the 'status' field of 'struct mce'
 * and historically has been the register value of the
 * MSR_IA32_THERMAL_STATUS (Intel) msr.
 */
void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
{
	struct mce m;

	memset(&m, 0, sizeof(m));
	m.cpu = cpu;
	m.bank = MCE_THERMAL_BANK;
	m.status = status;
	rdtscll(m.tsc);
	mce_log(&m);
}
#endif /* CONFIG_X86_MCE_INTEL */

/*
 * Periodic polling timer for "silent" machine check errors.
 */

static int check_interval = 5 * 60; /* 5 minutes */
static void mcheck_timer(void *data);
static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);

static void mcheck_check_cpu(void *info)
{
	if (mce_available(&current_cpu_data))
		do_machine_check(NULL, 0);
}

static void mcheck_timer(void *data)
{
	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
	schedule_delayed_work(&mcheck_work, check_interval * HZ);

	/*
	 * It's ok to read stale data here for notify_user and
	 * console_logged as we'll simply get the updated versions
	 * on the next mcheck_timer execution and atomic operations
	 * on console_logged act as synchronization for notify_user
	 * writes.
	 */
	if (notify_user && console_logged) {
		notify_user = 0;
		clear_bit(0, &console_logged);
		printk(KERN_INFO "Machine check events logged\n");
	}
}


static __init int periodic_mcheck_init(void)
{ 
	if (check_interval)
		schedule_delayed_work(&mcheck_work, check_interval*HZ);
	return 0;
} 
__initcall(periodic_mcheck_init);


/* 
 * Initialize Machine Checks for a CPU.
 */
static void mce_init(void *dummy)
{
	u64 cap;
	int i;

	rdmsrl(MSR_IA32_MCG_CAP, cap);
	banks = cap & 0xff;
	if (banks > NR_BANKS) { 
		printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
		banks = NR_BANKS; 
	}
	/* Use accurate RIP reporting if available. */
	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
		rip_msr = MSR_IA32_MCG_EIP;

	/* Log the machine checks left over from the previous reset.
	   This also clears all registers */
	do_machine_check(NULL, mce_bootlog ? -1 : -2);

	set_in_cr4(X86_CR4_MCE);

	if (cap & MCG_CTL_P)
		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);

	for (i = 0; i < banks; i++) {
		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
	}	
}

/* Add per CPU specific workarounds here */
static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
{ 
	/* This should be disabled by the BIOS, but isn't always */
	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
		/* disable GART TBL walk error reporting, which trips off 
		   incorrectly with the IOMMU & 3ware & Cerberus. */
		clear_bit(10, &bank[4]);
		/* Lots of broken BIOS around that don't clear them
		   by default and leave crap in there. Don't log. */
		mce_bootlog = 0;
	}

}			

static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
{
	switch (c->x86_vendor) {
	case X86_VENDOR_INTEL:
		mce_intel_feature_init(c);
		break;
	case X86_VENDOR_AMD:
		mce_amd_feature_init(c);
		break;
	default:
		break;
	}
}

/* 
 * Called for each booted CPU to set up machine checks.
 * Must be called with preempt off. 
 */
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
{
	static cpumask_t mce_cpus = CPU_MASK_NONE;

	mce_cpu_quirks(c); 

	if (mce_dont_init ||
	    cpu_test_and_set(smp_processor_id(), mce_cpus) ||
	    !mce_available(c))
		return;

	mce_init(NULL);
	mce_cpu_features(c);
}

/*
 * Character device to read and clear the MCE log.
 */

static void collect_tscs(void *data) 
{ 
	unsigned long *cpu_tsc = (unsigned long *)data;
	rdtscll(cpu_tsc[smp_processor_id()]);
} 

static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
{
	unsigned long *cpu_tsc;
	static DECLARE_MUTEX(mce_read_sem);
	unsigned next;
	char __user *buf = ubuf;
	int i, err;

	cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
	if (!cpu_tsc)
		return -ENOMEM;

	down(&mce_read_sem); 
	next = rcu_dereference(mcelog.next);

	/* Only supports full reads right now */
	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 
		up(&mce_read_sem);
		kfree(cpu_tsc);
		return -EINVAL;
	}

	err = 0;
	for (i = 0; i < next; i++) {		
		unsigned long start = jiffies;
		while (!mcelog.entry[i].finished) {
			if (!time_before(jiffies, start + 2)) {
				memset(mcelog.entry + i,0, sizeof(struct mce));
				continue;
			}
			cpu_relax();
		}
		smp_rmb();
		err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
		buf += sizeof(struct mce); 
	} 

	memset(mcelog.entry, 0, next * sizeof(struct mce));
	mcelog.next = 0;

	synchronize_sched();

	/* Collect entries that were still getting written before the synchronize. */

	on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
	for (i = next; i < MCE_LOG_LEN; i++) { 
		if (mcelog.entry[i].finished && 
		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {  
			err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
			smp_rmb();
			buf += sizeof(struct mce);
			memset(&mcelog.entry[i], 0, sizeof(struct mce));
		}
	} 	
	up(&mce_read_sem);
	kfree(cpu_tsc);
	return err ? -EFAULT : buf - ubuf; 
}

static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
{
	int __user *p = (int __user *)arg;
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM; 
	switch (cmd) {
	case MCE_GET_RECORD_LEN: 
		return put_user(sizeof(struct mce), p);
	case MCE_GET_LOG_LEN:
		return put_user(MCE_LOG_LEN, p);		
	case MCE_GETCLEAR_FLAGS: {
		unsigned flags;
		do { 
			flags = mcelog.flags;
		} while (cmpxchg(&mcelog.flags, flags, 0) != flags); 
		return put_user(flags, p); 
	}
	default:
		return -ENOTTY; 
	} 
}

static struct file_operations mce_chrdev_ops = {
	.read = mce_read,
	.ioctl = mce_ioctl,
};

static struct miscdevice mce_log_device = {
	MISC_MCELOG_MINOR,
	"mcelog",
	&mce_chrdev_ops,
};

/* 
 * Old style boot options parsing. Only for compatibility. 
 */

static int __init mcheck_disable(char *str)
{
	mce_dont_init = 1;
	return 1;
}

/* mce=off disables machine check. Note you can reenable it later
   using sysfs.
   mce=TOLERANCELEVEL (number, see above)
   mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
   mce=nobootlog Don't log MCEs from before booting. */
static int __init mcheck_enable(char *str)
{
	if (*str == '=')
		str++;
	if (!strcmp(str, "off"))
		mce_dont_init = 1;
	else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
		mce_bootlog = str[0] == 'b';
	else if (isdigit(str[0]))
		get_option(&str, &tolerant);
	else
		printk("mce= argument %s ignored. Please use /sys", str); 
	return 1;
}

__setup("nomce", mcheck_disable);
__setup("mce", mcheck_enable);

/* 
 * Sysfs support
 */ 

/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
   Only one CPU is active at this time, the others get readded later using
   CPU hotplug. */
static int mce_resume(struct sys_device *dev)
{
	mce_init(NULL);
	return 0;
}

/* Reinit MCEs after user configuration changes */
static void mce_restart(void) 
{ 
	if (check_interval)
		cancel_delayed_work(&mcheck_work);
	/* Timer race is harmless here */
	on_each_cpu(mce_init, NULL, 1, 1);       
	if (check_interval)
		schedule_delayed_work(&mcheck_work, check_interval*HZ);
}

static struct sysdev_class mce_sysclass = {
	.resume = mce_resume,
	set_kset_name("machinecheck"),
};

DEFINE_PER_CPU(struct sys_device, device_mce);

/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
	static ssize_t show_ ## name(struct sys_device *s, char *buf) { 	   	   \
		return sprintf(buf, "%lx\n", (unsigned long)var);		   \
	} 									   \
	static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
		char *end; 							   \
		unsigned long new = simple_strtoul(buf, &end, 0); 		   \
		if (end == buf) return -EINVAL;					   \
		var = new;							   \
		start; 								   \
		return end-buf;		     					   \
	}									   \
	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);

ACCESSOR(bank0ctl,bank[0],mce_restart())
ACCESSOR(bank1ctl,bank[1],mce_restart())
ACCESSOR(bank2ctl,bank[2],mce_restart())
ACCESSOR(bank3ctl,bank[3],mce_restart())
ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(bank5ctl,bank[5],mce_restart())
static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
ACCESSOR(tolerant,tolerant,)
ACCESSOR(check_interval,check_interval,mce_restart())

/* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
static __cpuinit int mce_create_device(unsigned int cpu)
{
	int err;
	int i;
	if (!mce_available(&cpu_data[cpu]))
		return -EIO;

	per_cpu(device_mce,cpu).id = cpu;
	per_cpu(device_mce,cpu).cls = &mce_sysclass;

	err = sysdev_register(&per_cpu(device_mce,cpu));

	if (!err) {
		for (i = 0; i < banks; i++)
			sysdev_create_file(&per_cpu(device_mce,cpu),
				bank_attributes[i]);
		sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
		sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
	}
	return err;
}

#ifdef CONFIG_HOTPLUG_CPU
static void mce_remove_device(unsigned int cpu)
{
	int i;

	for (i = 0; i < banks; i++)
		sysdev_remove_file(&per_cpu(device_mce,cpu),
			bank_attributes[i]);
	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
	sysdev_unregister(&per_cpu(device_mce,cpu));
	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
}

/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static int
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;

	switch (action) {
	case CPU_ONLINE:
		mce_create_device(cpu);
		break;
	case CPU_DEAD:
		mce_remove_device(cpu);
		break;
	}
	return NOTIFY_OK;
}

static struct notifier_block mce_cpu_notifier = {
	.notifier_call = mce_cpu_callback,
};
#endif

static __init int mce_init_device(void)
{
	int err;
	int i = 0;

	if (!mce_available(&boot_cpu_data))
		return -EIO;
	err = sysdev_class_register(&mce_sysclass);

	for_each_online_cpu(i) {
		mce_create_device(i);
	}

	register_hotcpu_notifier(&mce_cpu_notifier);
	misc_register(&mce_log_device);
	return err;
}

device_initcall(mce_init_device);
Commit	Line	Data
1da177e4 LT	1	/*
	2	* Machine check handler.
	3	* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
	4	* Rest from unknown author(s).
	5	* 2004 Andi Kleen. Rewrote most of it.
	6	*/
	7
	8	#include <linux/init.h>
	9	#include <linux/types.h>
	10	#include <linux/kernel.h>
	11	#include <linux/sched.h>
	12	#include <linux/string.h>
	13	#include <linux/rcupdate.h>
	14	#include <linux/kallsyms.h>
	15	#include <linux/sysdev.h>
	16	#include <linux/miscdevice.h>
	17	#include <linux/fs.h>
a9415644	18	#include <linux/capability.h>
91c6d400 AK	19	#include <linux/cpu.h>
91c6d400 AK	20	#include <linux/percpu.h>
8c566ef5	21	#include <linux/ctype.h>
1da177e4 LT	22	#include <asm/processor.h>
	23	#include <asm/msr.h>
	24	#include <asm/mce.h>
	25	#include <asm/kdebug.h>
	26	#include <asm/uaccess.h>
0a9c3ee7	27	#include <asm/smp.h>
1da177e4 LT	28
1da177e4 LT	29	#define MISC_MCELOG_MINOR 227
73ca5358	30	#define NR_BANKS 6
1da177e4	31
553f265f AK	32	atomic_t mce_entry;
553f265f AK	33
1da177e4 LT	34	static int mce_dont_init;
	35
	36	/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
	37	3: never panic or exit (for testing only) */
	38	static int tolerant = 1;
	39	static int banks;
	40	static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
	41	static unsigned long console_logged;
	42	static int notify_user;
94ad8474	43	static int rip_msr;
e583538f	44	static int mce_bootlog = 1;
1da177e4 LT	45
	46	/*
	47	* Lockless MCE logging infrastructure.
	48	* This avoids deadlocks on printk locks without having to break locks. Also
	49	* separate MCEs from kernel messages to avoid bogus bug reports.
	50	*/
	51
	52	struct mce_log mcelog = {
	53	MCE_LOG_SIGNATURE,
	54	MCE_LOG_LEN,
	55	};
	56
	57	void mce_log(struct mce *mce)
	58	{
	59	unsigned next, entry;
	60	mce->finished = 0;
7644143c	61	wmb();
1da177e4 LT	62	for (;;) {
1da177e4 LT	63	entry = rcu_dereference(mcelog.next);
7644143c MW	64	/* The rmb forces the compiler to reload next in each
	65	iteration */
	66	rmb();
673242c1 AK	67	for (;;) {
	68	/* When the buffer fills up discard new entries. Assume
	69	that the earlier errors are the more interesting. */
	70	if (entry >= MCE_LOG_LEN) {
	71	set_bit(MCE_OVERFLOW, &mcelog.flags);
	72	return;
	73	}
	74	/* Old left over entry. Skip. */
	75	if (mcelog.entry[entry].finished) {
	76	entry++;
	77	continue;
	78	}
7644143c	79	break;
1da177e4	80	}
1da177e4 LT	81	smp_rmb();
	82	next = entry + 1;
	83	if (cmpxchg(&mcelog.next, entry, next) == entry)
	84	break;
	85	}
	86	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
7644143c	87	wmb();
1da177e4	88	mcelog.entry[entry].finished = 1;
7644143c	89	wmb();
1da177e4 LT	90
	91	if (!test_and_set_bit(0, &console_logged))
	92	notify_user = 1;
	93	}
	94
	95	static void print_mce(struct mce *m)
	96	{
	97	printk(KERN_EMERG "\n"
4855170f	98	KERN_EMERG "HARDWARE ERROR\n"
1da177e4 LT	99	KERN_EMERG
	100	"CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
	101	m->cpu, m->mcgstatus, m->bank, m->status);
	102	if (m->rip) {
	103	printk(KERN_EMERG
	104	"RIP%s %02x:<%016Lx> ",
	105	!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
	106	m->cs, m->rip);
	107	if (m->cs == __KERNEL_CS)
	108	print_symbol("{%s}", m->rip);
	109	printk("\n");
	110	}
	111	printk(KERN_EMERG "TSC %Lx ", m->tsc);
	112	if (m->addr)
	113	printk("ADDR %Lx ", m->addr);
	114	if (m->misc)
	115	printk("MISC %Lx ", m->misc);
	116	printk("\n");
4855170f AK	117	printk(KERN_EMERG "This is not a software problem!\n");
	118	printk(KERN_EMERG
	119	"Run through mcelog --ascii to decode and contact your hardware vendor\n");
1da177e4 LT	120	}
	121
	122	static void mce_panic(char msg, struct mce backup, unsigned long start)
	123	{
	124	int i;
	125	oops_begin();
	126	for (i = 0; i < MCE_LOG_LEN; i++) {
	127	unsigned long tsc = mcelog.entry[i].tsc;
	128	if (time_before(tsc, start))
	129	continue;
	130	print_mce(&mcelog.entry[i]);
	131	if (backup && mcelog.entry[i].tsc == backup->tsc)
	132	backup = NULL;
	133	}
	134	if (backup)
	135	print_mce(backup);
	136	if (tolerant >= 3)
	137	printk("Fake panic: %s\n", msg);
	138	else
	139	panic(msg);
	140	}
	141
	142	static int mce_available(struct cpuinfo_x86 *c)
	143	{
3d1712c9	144	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
1da177e4 LT	145	}
1da177e4 LT	146
94ad8474 AK	147	static inline void mce_get_rip(struct mce m, struct pt_regs regs)
	148	{
	149	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
	150	m->rip = regs->rip;
	151	m->cs = regs->cs;
	152	} else {
	153	m->rip = 0;
	154	m->cs = 0;
	155	}
	156	if (rip_msr) {
	157	/* Assume the RIP in the MSR is exact. Is this true? */
	158	m->mcgstatus \|= MCG_STATUS_EIPV;
	159	rdmsrl(rip_msr, m->rip);
	160	m->cs = 0;
	161	}
	162	}
	163
1da177e4 LT	164	/*
	165	* The actual machine check handler
	166	*/
	167
	168	void do_machine_check(struct pt_regs * regs, long error_code)
	169	{
	170	struct mce m, panicm;
	171	int nowayout = (tolerant < 1);
	172	int kill_it = 0;
	173	u64 mcestart = 0;
	174	int i;
	175	int panicm_found = 0;
	176
553f265f AK	177	atomic_inc(&mce_entry);
553f265f AK	178
1da177e4	179	if (regs)
6e3f3617	180	notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
1da177e4	181	if (!banks)
553f265f	182	goto out2;
1da177e4 LT	183
1da177e4 LT	184	memset(&m, 0, sizeof(struct mce));
151f8cc1	185	m.cpu = smp_processor_id();
1da177e4 LT	186	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
	187	if (!(m.mcgstatus & MCG_STATUS_RIPV))
	188	kill_it = 1;
	189
	190	rdtscll(mcestart);
	191	barrier();
	192
	193	for (i = 0; i < banks; i++) {
	194	if (!bank[i])
	195	continue;
	196
	197	m.misc = 0;
	198	m.addr = 0;
	199	m.bank = i;
	200	m.tsc = 0;
	201
	202	rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
	203	if ((m.status & MCI_STATUS_VAL) == 0)
	204	continue;
	205
	206	if (m.status & MCI_STATUS_EN) {
	207	/* In theory _OVER could be a nowayout too, but
	208	assume any overflowed errors were no fatal. */
	209	nowayout \|= !!(m.status & MCI_STATUS_PCC);
	210	kill_it \|= !!(m.status & MCI_STATUS_UC);
	211	}
	212
	213	if (m.status & MCI_STATUS_MISCV)
	214	rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
	215	if (m.status & MCI_STATUS_ADDRV)
	216	rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
	217
94ad8474	218	mce_get_rip(&m, regs);
d5172f26	219	if (error_code >= 0)
1da177e4 LT	220	rdtscll(m.tsc);
1da177e4 LT	221	wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
d5172f26 AK	222	if (error_code != -2)
d5172f26 AK	223	mce_log(&m);
1da177e4 LT	224
	225	/* Did this bank cause the exception? */
	226	/* Assume that the bank with uncorrectable errors did it,
	227	and that there is only a single one. */
	228	if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
	229	panicm = m;
	230	panicm_found = 1;
	231	}
	232
9f158333	233	add_taint(TAINT_MACHINE_CHECK);
1da177e4 LT	234	}
	235
	236	/* Never do anything final in the polling timer */
	237	if (!regs)
	238	goto out;
	239
	240	/* If we didn't find an uncorrectable error, pick
	241	the last one (shouldn't happen, just being safe). */
	242	if (!panicm_found)
	243	panicm = m;
	244	if (nowayout)
	245	mce_panic("Machine check", &panicm, mcestart);
	246	if (kill_it) {
	247	int user_space = 0;
	248
	249	if (m.mcgstatus & MCG_STATUS_RIPV)
	250	user_space = panicm.rip && (panicm.cs & 3);
	251
	252	/* When the machine was in user space and the CPU didn't get
	253	confused it's normally not necessary to panic, unless you
	254	are paranoid (tolerant == 0)
	255
	256	RED-PEN could be more tolerant for MCEs in idle,
	257	but most likely they occur at boot anyways, where
	258	it is best to just halt the machine. */
	259	if ((!user_space && (panic_on_oops \|\| tolerant < 2)) \|\|
	260	(unsigned)current->pid <= 1)
	261	mce_panic("Uncorrected machine check", &panicm, mcestart);
	262
	263	/* do_exit takes an awful lot of locks and has as
	264	slight risk of deadlocking. If you don't want that
	265	don't set tolerant >= 2 */
	266	if (tolerant < 3)
	267	do_exit(SIGBUS);
	268	}
	269
	270	out:
	271	/* Last thing done in the machine check exception to clear state. */
	272	wrmsrl(MSR_IA32_MCG_STATUS, 0);
553f265f AK	273	out2:
553f265f AK	274	atomic_dec(&mce_entry);
1da177e4 LT	275	}
1da177e4 LT	276
15d5f839 DZ	277	#ifdef CONFIG_X86_MCE_INTEL
	278	/***
	279	* mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
	280	* @cpu: The CPU on which the event occured.
	281	* @status: Event status information
	282	*
	283	* This function should be called by the thermal interrupt after the
	284	* event has been processed and the decision was made to log the event
	285	* further.
	286	*
	287	* The status parameter will be saved to the 'status' field of 'struct mce'
	288	* and historically has been the register value of the
	289	* MSR_IA32_THERMAL_STATUS (Intel) msr.
	290	*/
	291	void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
	292	{
	293	struct mce m;
	294
	295	memset(&m, 0, sizeof(m));
	296	m.cpu = cpu;
	297	m.bank = MCE_THERMAL_BANK;
	298	m.status = status;
	299	rdtscll(m.tsc);
	300	mce_log(&m);
	301	}
	302	#endif /* CONFIG_X86_MCE_INTEL */
	303
1da177e4 LT	304	/*
	305	* Periodic polling timer for "silent" machine check errors.
	306	*/
	307
	308	static int check_interval = 5 * 60; /* 5 minutes */
	309	static void mcheck_timer(void *data);
	310	static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
	311
	312	static void mcheck_check_cpu(void *info)
	313	{
	314	if (mce_available(&current_cpu_data))
	315	do_machine_check(NULL, 0);
	316	}
	317
	318	static void mcheck_timer(void *data)
	319	{
	320	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
	321	schedule_delayed_work(&mcheck_work, check_interval * HZ);
	322
	323	/*
	324	* It's ok to read stale data here for notify_user and
	325	* console_logged as we'll simply get the updated versions
	326	* on the next mcheck_timer execution and atomic operations
	327	* on console_logged act as synchronization for notify_user
	328	* writes.
	329	*/
	330	if (notify_user && console_logged) {
	331	notify_user = 0;
	332	clear_bit(0, &console_logged);
	333	printk(KERN_INFO "Machine check events logged\n");
	334	}
	335	}
	336
	337
	338	static __init int periodic_mcheck_init(void)
	339	{
	340	if (check_interval)
	341	schedule_delayed_work(&mcheck_work, check_interval*HZ);
	342	return 0;
	343	}
	344	__initcall(periodic_mcheck_init);
	345
	346
	347	/*
	348	* Initialize Machine Checks for a CPU.
	349	*/
	350	static void mce_init(void *dummy)
	351	{
	352	u64 cap;
	353	int i;
	354
	355	rdmsrl(MSR_IA32_MCG_CAP, cap);
	356	banks = cap & 0xff;
	357	if (banks > NR_BANKS) {
	358	printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
	359	banks = NR_BANKS;
	360	}
94ad8474 AK	361	/* Use accurate RIP reporting if available. */
	362	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
	363	rip_msr = MSR_IA32_MCG_EIP;
1da177e4 LT	364
	365	/* Log the machine checks left over from the previous reset.
	366	This also clears all registers */
d5172f26	367	do_machine_check(NULL, mce_bootlog ? -1 : -2);
1da177e4 LT	368
	369	set_in_cr4(X86_CR4_MCE);
	370
	371	if (cap & MCG_CTL_P)
	372	wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
	373
	374	for (i = 0; i < banks; i++) {
	375	wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
	376	wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
	377	}
	378	}
	379
	380	/* Add per CPU specific workarounds here */
e6982c67	381	static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1da177e4 LT	382	{
	383	/* This should be disabled by the BIOS, but isn't always */
	384	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
	385	/* disable GART TBL walk error reporting, which trips off
	386	incorrectly with the IOMMU & 3ware & Cerberus. */
	387	clear_bit(10, &bank[4]);
e583538f AK	388	/* Lots of broken BIOS around that don't clear them
	389	by default and leave crap in there. Don't log. */
	390	mce_bootlog = 0;
1da177e4	391	}
e583538f	392
1da177e4 LT	393	}
1da177e4 LT	394
e6982c67	395	static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
1da177e4 LT	396	{
	397	switch (c->x86_vendor) {
	398	case X86_VENDOR_INTEL:
	399	mce_intel_feature_init(c);
	400	break;
89b831ef JS	401	case X86_VENDOR_AMD:
	402	mce_amd_feature_init(c);
	403	break;
1da177e4 LT	404	default:
	405	break;
	406	}
	407	}
	408
	409	/*
	410	* Called for each booted CPU to set up machine checks.
	411	* Must be called with preempt off.
	412	*/
e6982c67	413	void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1da177e4	414	{
7ded5689	415	static cpumask_t mce_cpus = CPU_MASK_NONE;
1da177e4 LT	416
	417	mce_cpu_quirks(c);
	418
	419	if (mce_dont_init \|\|
	420	cpu_test_and_set(smp_processor_id(), mce_cpus) \|\|
	421	!mce_available(c))
	422	return;
	423
	424	mce_init(NULL);
	425	mce_cpu_features(c);
	426	}
	427
	428	/*
	429	* Character device to read and clear the MCE log.
	430	*/
	431
	432	static void collect_tscs(void *data)
	433	{
	434	unsigned long cpu_tsc = (unsigned long )data;
	435	rdtscll(cpu_tsc[smp_processor_id()]);
	436	}
	437
	438	static ssize_t mce_read(struct file filp, char __user ubuf, size_t usize, loff_t *off)
	439	{
f0de53bb	440	unsigned long *cpu_tsc;
1da177e4 LT	441	static DECLARE_MUTEX(mce_read_sem);
	442	unsigned next;
	443	char __user *buf = ubuf;
	444	int i, err;
	445
f0de53bb AK	446	cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
	447	if (!cpu_tsc)
	448	return -ENOMEM;
	449
1da177e4 LT	450	down(&mce_read_sem);
	451	next = rcu_dereference(mcelog.next);
	452
	453	/* Only supports full reads right now */
	454	if (off != 0 \|\| usize < MCE_LOG_LENsizeof(struct mce)) {
	455	up(&mce_read_sem);
f0de53bb	456	kfree(cpu_tsc);
1da177e4 LT	457	return -EINVAL;
	458	}
	459
	460	err = 0;
673242c1 AK	461	for (i = 0; i < next; i++) {
	462	unsigned long start = jiffies;
	463	while (!mcelog.entry[i].finished) {
	464	if (!time_before(jiffies, start + 2)) {
	465	memset(mcelog.entry + i,0, sizeof(struct mce));
	466	continue;
	467	}
	468	cpu_relax();
	469	}
1da177e4 LT	470	smp_rmb();
	471	err \|= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
	472	buf += sizeof(struct mce);
	473	}
	474
	475	memset(mcelog.entry, 0, next * sizeof(struct mce));
	476	mcelog.next = 0;
	477
b2b18660	478	synchronize_sched();
1da177e4 LT	479
	480	/* Collect entries that were still getting written before the synchronize. */
	481
	482	on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
	483	for (i = next; i < MCE_LOG_LEN; i++) {
	484	if (mcelog.entry[i].finished &&
	485	mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
	486	err \|= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
	487	smp_rmb();
	488	buf += sizeof(struct mce);
	489	memset(&mcelog.entry[i], 0, sizeof(struct mce));
	490	}
	491	}
	492	up(&mce_read_sem);
f0de53bb	493	kfree(cpu_tsc);
1da177e4 LT	494	return err ? -EFAULT : buf - ubuf;
	495	}
	496
	497	static int mce_ioctl(struct inode i, struct file f,unsigned int cmd, unsigned long arg)
	498	{
	499	int __user p = (int __user )arg;
	500	if (!capable(CAP_SYS_ADMIN))
	501	return -EPERM;
	502	switch (cmd) {
	503	case MCE_GET_RECORD_LEN:
	504	return put_user(sizeof(struct mce), p);
	505	case MCE_GET_LOG_LEN:
	506	return put_user(MCE_LOG_LEN, p);
	507	case MCE_GETCLEAR_FLAGS: {
	508	unsigned flags;
	509	do {
	510	flags = mcelog.flags;
	511	} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
	512	return put_user(flags, p);
	513	}
	514	default:
	515	return -ENOTTY;
	516	}
	517	}
	518
	519	static struct file_operations mce_chrdev_ops = {
	520	.read = mce_read,
	521	.ioctl = mce_ioctl,
	522	};
	523
	524	static struct miscdevice mce_log_device = {
	525	MISC_MCELOG_MINOR,
	526	"mcelog",
	527	&mce_chrdev_ops,
	528	};
	529
	530	/*
	531	* Old style boot options parsing. Only for compatibility.
	532	*/
	533
	534	static int __init mcheck_disable(char *str)
	535	{
	536	mce_dont_init = 1;
9b41046c	537	return 1;
1da177e4 LT	538	}
	539
	540	/* mce=off disables machine check. Note you can reenable it later
d5172f26	541	using sysfs.
8c566ef5	542	mce=TOLERANCELEVEL (number, see above)
e583538f AK	543	mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
e583538f AK	544	mce=nobootlog Don't log MCEs from before booting. */
1da177e4 LT	545	static int __init mcheck_enable(char *str)
1da177e4 LT	546	{
d5172f26 AK	547	if (*str == '=')
d5172f26 AK	548	str++;
1da177e4 LT	549	if (!strcmp(str, "off"))
1da177e4 LT	550	mce_dont_init = 1;
e583538f AK	551	else if (!strcmp(str, "bootlog") \|\| !strcmp(str,"nobootlog"))
e583538f AK	552	mce_bootlog = str[0] == 'b';
8c566ef5 AK	553	else if (isdigit(str[0]))
8c566ef5 AK	554	get_option(&str, &tolerant);
1da177e4 LT	555	else
1da177e4 LT	556	printk("mce= argument %s ignored. Please use /sys", str);
9b41046c	557	return 1;
1da177e4 LT	558	}
	559
	560	__setup("nomce", mcheck_disable);
	561	__setup("mce", mcheck_enable);
	562
	563	/*
	564	* Sysfs support
	565	*/
	566
413588c7 AK	567	/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
	568	Only one CPU is active at this time, the others get readded later using
	569	CPU hotplug. */
1da177e4 LT	570	static int mce_resume(struct sys_device *dev)
1da177e4 LT	571	{
413588c7	572	mce_init(NULL);
1da177e4 LT	573	return 0;
	574	}
	575
	576	/* Reinit MCEs after user configuration changes */
	577	static void mce_restart(void)
	578	{
	579	if (check_interval)
	580	cancel_delayed_work(&mcheck_work);
	581	/* Timer race is harmless here */
	582	on_each_cpu(mce_init, NULL, 1, 1);
	583	if (check_interval)
	584	schedule_delayed_work(&mcheck_work, check_interval*HZ);
	585	}
	586
	587	static struct sysdev_class mce_sysclass = {
	588	.resume = mce_resume,
	589	set_kset_name("machinecheck"),
	590	};
	591
fff2e89f	592	DEFINE_PER_CPU(struct sys_device, device_mce);
1da177e4 LT	593
	594	/* Why are there no generic functions for this? */
	595	#define ACCESSOR(name, var, start) \
	596	static ssize_t show_ ## name(struct sys_device s, char buf) { \
	597	return sprintf(buf, "%lx\n", (unsigned long)var); \
	598	} \
	599	static ssize_t set_ ## name(struct sys_device s,const char buf,size_t siz) { \
	600	char *end; \
	601	unsigned long new = simple_strtoul(buf, &end, 0); \
	602	if (end == buf) return -EINVAL; \
	603	var = new; \
	604	start; \
	605	return end-buf; \
	606	} \
	607	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
	608
	609	ACCESSOR(bank0ctl,bank[0],mce_restart())
	610	ACCESSOR(bank1ctl,bank[1],mce_restart())
	611	ACCESSOR(bank2ctl,bank[2],mce_restart())
	612	ACCESSOR(bank3ctl,bank[3],mce_restart())
	613	ACCESSOR(bank4ctl,bank[4],mce_restart())
73ca5358 SL	614	ACCESSOR(bank5ctl,bank[5],mce_restart())
	615	static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
	616	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
	617	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
1da177e4 LT	618	ACCESSOR(tolerant,tolerant,)
	619	ACCESSOR(check_interval,check_interval,mce_restart())
	620
91c6d400 AK	621	/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
91c6d400 AK	622	static __cpuinit int mce_create_device(unsigned int cpu)
1da177e4 LT	623	{
1da177e4 LT	624	int err;
73ca5358	625	int i;
91c6d400 AK	626	if (!mce_available(&cpu_data[cpu]))
	627	return -EIO;
	628
	629	per_cpu(device_mce,cpu).id = cpu;
	630	per_cpu(device_mce,cpu).cls = &mce_sysclass;
	631
	632	err = sysdev_register(&per_cpu(device_mce,cpu));
	633
	634	if (!err) {
73ca5358 SL	635	for (i = 0; i < banks; i++)
	636	sysdev_create_file(&per_cpu(device_mce,cpu),
	637	bank_attributes[i]);
91c6d400 AK	638	sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
	639	sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
	640	}
	641	return err;
	642	}
	643
	644	#ifdef CONFIG_HOTPLUG_CPU
be6b5a35	645	static void mce_remove_device(unsigned int cpu)
91c6d400	646	{
73ca5358 SL	647	int i;
	648
	649	for (i = 0; i < banks; i++)
	650	sysdev_remove_file(&per_cpu(device_mce,cpu),
	651	bank_attributes[i]);
91c6d400 AK	652	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
	653	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
	654	sysdev_unregister(&per_cpu(device_mce,cpu));
d4c45718	655	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
91c6d400	656	}
91c6d400 AK	657
91c6d400 AK	658	/* Get notified when a cpu comes on/off. Be hotplug friendly. */
be6b5a35	659	static int
91c6d400 AK	660	mce_cpu_callback(struct notifier_block nfb, unsigned long action, void hcpu)
	661	{
	662	unsigned int cpu = (unsigned long)hcpu;
	663
	664	switch (action) {
	665	case CPU_ONLINE:
	666	mce_create_device(cpu);
	667	break;
91c6d400 AK	668	case CPU_DEAD:
	669	mce_remove_device(cpu);
	670	break;
91c6d400 AK	671	}
	672	return NOTIFY_OK;
	673	}
	674
be6b5a35	675	static struct notifier_block mce_cpu_notifier = {
91c6d400 AK	676	.notifier_call = mce_cpu_callback,
91c6d400 AK	677	};
be6b5a35	678	#endif
91c6d400 AK	679
	680	static __init int mce_init_device(void)
	681	{
	682	int err;
	683	int i = 0;
	684
1da177e4 LT	685	if (!mce_available(&boot_cpu_data))
	686	return -EIO;
	687	err = sysdev_class_register(&mce_sysclass);
91c6d400 AK	688
	689	for_each_online_cpu(i) {
	690	mce_create_device(i);
	691	}
	692
be6b5a35	693	register_hotcpu_notifier(&mce_cpu_notifier);
1da177e4 LT	694	misc_register(&mce_log_device);
1da177e4 LT	695	return err;
1da177e4	696	}
91c6d400	697
1da177e4	698	device_initcall(mce_init_device);