]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/edac/edac_mc.c
Merge remote branch 'nouveau/drm-nouveau-next' of /ssd/git/drm-nouveau-next into...
[mirror_ubuntu-zesty-kernel.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/sysdev.h>
29#include <linux/ctype.h>
c0d12172 30#include <linux/edac.h>
da9bb1d2
AC
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
20bcb7a8 34#include "edac_core.h"
7c9281d7 35#include "edac_module.h"
da9bb1d2 36
da9bb1d2 37/* lock to memory controller's control array */
63b7df91 38static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 39static LIST_HEAD(mc_devices);
da9bb1d2 40
da9bb1d2
AC
41#ifdef CONFIG_EDAC_DEBUG
42
2da1c119 43static void edac_mc_dump_channel(struct channel_info *chan)
da9bb1d2
AC
44{
45 debugf4("\tchannel = %p\n", chan);
46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48 debugf4("\tchannel->label = '%s'\n", chan->label);
49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50}
51
2da1c119 52static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2
AC
53{
54 debugf4("\tcsrow = %p\n", csrow);
55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
079708b9 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
da9bb1d2
AC
57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
079708b9 60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
da9bb1d2
AC
61 debugf4("\tcsrow->channels = %p\n", csrow->channels);
62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63}
64
2da1c119 65static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2
AC
66{
67 debugf3("\tmci = %p\n", mci);
68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73 mci->nr_csrows, mci->csrows);
37f04581 74 debugf3("\tdev = %p\n", mci->dev);
079708b9 75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
da9bb1d2
AC
76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77}
78
24f9a7fe
BP
79#endif /* CONFIG_EDAC_DEBUG */
80
239642fe
BP
81/*
82 * keep those in sync with the enum mem_type
83 */
84const char *edac_mem_types[] = {
85 "Empty csrow",
86 "Reserved csrow type",
87 "Unknown csrow type",
88 "Fast page mode RAM",
89 "Extended data out RAM",
90 "Burst Extended data out RAM",
91 "Single data rate SDRAM",
92 "Registered single data rate SDRAM",
93 "Double data rate SDRAM",
94 "Registered Double data rate SDRAM",
95 "Rambus DRAM",
96 "Unbuffered DDR2 RAM",
97 "Fully buffered DDR2",
98 "Registered DDR2 RAM",
99 "Rambus XDR",
100 "Unbuffered DDR3 RAM",
101 "Registered DDR3 RAM",
102};
103EXPORT_SYMBOL_GPL(edac_mem_types);
104
da9bb1d2
AC
105/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106 * Adjust 'ptr' so that its alignment is at least as stringent as what the
107 * compiler would provide for X and return the aligned result.
108 *
109 * If 'size' is a constant, the compiler will optimize this whole function
110 * down to either a no-op or the addition of a constant to the value of 'ptr'.
111 */
7391c6dc 112void *edac_align_ptr(void *ptr, unsigned size)
da9bb1d2
AC
113{
114 unsigned align, r;
115
116 /* Here we assume that the alignment of a "long long" is the most
117 * stringent alignment that the compiler will ever provide by default.
118 * As far as I know, this is a reasonable assumption.
119 */
120 if (size > sizeof(long))
121 align = sizeof(long long);
122 else if (size > sizeof(int))
123 align = sizeof(long);
124 else if (size > sizeof(short))
125 align = sizeof(int);
126 else if (size > sizeof(char))
127 align = sizeof(short);
128 else
079708b9 129 return (char *)ptr;
da9bb1d2
AC
130
131 r = size % align;
132
133 if (r == 0)
079708b9 134 return (char *)ptr;
da9bb1d2 135
7391c6dc 136 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
137}
138
da9bb1d2
AC
139/**
140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141 * @size_pvt: size of private storage needed
142 * @nr_csrows: Number of CWROWS needed for this MC
143 * @nr_chans: Number of channels for the MC
144 *
145 * Everything is kmalloc'ed as one big chunk - more efficient.
146 * Only can be used if all structures have the same lifetime - otherwise
147 * you have to allocate and initialize your own structures.
148 *
149 * Use edac_mc_free() to free mc structures allocated by this function.
150 *
151 * Returns:
152 * NULL allocation failed
153 * struct mem_ctl_info pointer
154 */
155struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
b8f6f975 156 unsigned nr_chans, int edac_index)
da9bb1d2
AC
157{
158 struct mem_ctl_info *mci;
159 struct csrow_info *csi, *csrow;
160 struct channel_info *chi, *chp, *chan;
161 void *pvt;
162 unsigned size;
163 int row, chn;
8096cfaf 164 int err;
da9bb1d2
AC
165
166 /* Figure out the offsets of the various items from the start of an mc
167 * structure. We want the alignment of each item to be at least as
168 * stringent as what the compiler would provide if we could simply
169 * hardcode everything into a single struct.
170 */
079708b9 171 mci = (struct mem_ctl_info *)0;
7391c6dc
DT
172 csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
e27e3dac 174 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
079708b9 175 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 176
8096cfaf
DT
177 mci = kzalloc(size, GFP_KERNEL);
178 if (mci == NULL)
da9bb1d2
AC
179 return NULL;
180
181 /* Adjust pointers so they point within the memory we just allocated
182 * rather than an imaginary chunk of memory located at address 0.
183 */
079708b9
DT
184 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 187
b8f6f975
DT
188 /* setup index and various internal pointers */
189 mci->mc_idx = edac_index;
da9bb1d2
AC
190 mci->csrows = csi;
191 mci->pvt_info = pvt;
192 mci->nr_csrows = nr_csrows;
193
194 for (row = 0; row < nr_csrows; row++) {
195 csrow = &csi[row];
196 csrow->csrow_idx = row;
197 csrow->mci = mci;
198 csrow->nr_channels = nr_chans;
199 chp = &chi[row * nr_chans];
200 csrow->channels = chp;
201
202 for (chn = 0; chn < nr_chans; chn++) {
203 chan = &chp[chn];
204 chan->chan_idx = chn;
205 chan->csrow = csrow;
206 }
207 }
208
81d87cb1 209 mci->op_state = OP_ALLOC;
6fe1108f 210 INIT_LIST_HEAD(&mci->grp_kobj_list);
81d87cb1 211
8096cfaf
DT
212 /*
213 * Initialize the 'root' kobj for the edac_mc controller
214 */
215 err = edac_mc_register_sysfs_main_kobj(mci);
216 if (err) {
217 kfree(mci);
218 return NULL;
219 }
220
221 /* at this point, the root kobj is valid, and in order to
222 * 'free' the object, then the function:
223 * edac_mc_unregister_sysfs_main_kobj() must be called
224 * which will perform kobj unregistration and the actual free
225 * will occur during the kobject callback operation
226 */
da9bb1d2
AC
227 return mci;
228}
9110540f 229EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 230
da9bb1d2 231/**
8096cfaf
DT
232 * edac_mc_free
233 * 'Free' a previously allocated 'mci' structure
da9bb1d2 234 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
235 */
236void edac_mc_free(struct mem_ctl_info *mci)
237{
bbc560ae
MCC
238 debugf1("%s()\n", __func__);
239
8096cfaf 240 edac_mc_unregister_sysfs_main_kobj(mci);
accf74ff
MCC
241
242 /* free the mci instance memory here */
243 kfree(mci);
da9bb1d2 244}
9110540f 245EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 246
bce19683 247
939747bd 248/**
bce19683
DT
249 * find_mci_by_dev
250 *
251 * scan list of controllers looking for the one that manages
252 * the 'dev' device
939747bd 253 * @dev: pointer to a struct device related with the MCI
bce19683 254 */
939747bd 255struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
256{
257 struct mem_ctl_info *mci;
258 struct list_head *item;
259
537fba28 260 debugf3("%s()\n", __func__);
da9bb1d2
AC
261
262 list_for_each(item, &mc_devices) {
263 mci = list_entry(item, struct mem_ctl_info, link);
264
37f04581 265 if (mci->dev == dev)
da9bb1d2
AC
266 return mci;
267 }
268
269 return NULL;
270}
939747bd 271EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 272
81d87cb1
DJ
273/*
274 * handler for EDAC to check if NMI type handler has asserted interrupt
275 */
276static int edac_mc_assert_error_check_and_clear(void)
277{
66ee2f94 278 int old_state;
81d87cb1 279
079708b9 280 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
281 return 1;
282
66ee2f94
DJ
283 old_state = edac_err_assert;
284 edac_err_assert = 0;
81d87cb1 285
66ee2f94 286 return old_state;
81d87cb1
DJ
287}
288
289/*
290 * edac_mc_workq_function
291 * performs the operation scheduled by a workq request
292 */
81d87cb1
DJ
293static void edac_mc_workq_function(struct work_struct *work_req)
294{
fbeb4384 295 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 296 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
297
298 mutex_lock(&mem_ctls_mutex);
299
bf52fa4a
DT
300 /* if this control struct has movd to offline state, we are done */
301 if (mci->op_state == OP_OFFLINE) {
302 mutex_unlock(&mem_ctls_mutex);
303 return;
304 }
305
81d87cb1
DJ
306 /* Only poll controllers that are running polled and have a check */
307 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
308 mci->edac_check(mci);
309
81d87cb1
DJ
310 mutex_unlock(&mem_ctls_mutex);
311
312 /* Reschedule */
4de78c68 313 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 314 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
315}
316
317/*
318 * edac_mc_workq_setup
319 * initialize a workq item for this mci
320 * passing in the new delay period in msec
bf52fa4a
DT
321 *
322 * locking model:
323 *
324 * called with the mem_ctls_mutex held
81d87cb1 325 */
bf52fa4a 326static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1
DJ
327{
328 debugf0("%s()\n", __func__);
329
bf52fa4a
DT
330 /* if this instance is not in the POLL state, then simply return */
331 if (mci->op_state != OP_RUNNING_POLL)
332 return;
333
81d87cb1 334 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
81d87cb1
DJ
335 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
336}
337
338/*
339 * edac_mc_workq_teardown
340 * stop the workq processing on this mci
bf52fa4a
DT
341 *
342 * locking model:
343 *
344 * called WITHOUT lock held
81d87cb1 345 */
bf52fa4a 346static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
347{
348 int status;
349
00740c58
BP
350 if (mci->op_state != OP_RUNNING_POLL)
351 return;
352
bce19683
DT
353 status = cancel_delayed_work(&mci->work);
354 if (status == 0) {
355 debugf0("%s() not canceled, flush the queue\n",
356 __func__);
bf52fa4a 357
bce19683
DT
358 /* workq instance might be running, wait for it */
359 flush_workqueue(edac_workqueue);
81d87cb1
DJ
360 }
361}
362
363/*
bce19683
DT
364 * edac_mc_reset_delay_period(unsigned long value)
365 *
366 * user space has updated our poll period value, need to
367 * reset our workq delays
81d87cb1 368 */
bce19683 369void edac_mc_reset_delay_period(int value)
81d87cb1 370{
bce19683
DT
371 struct mem_ctl_info *mci;
372 struct list_head *item;
373
374 mutex_lock(&mem_ctls_mutex);
375
376 /* scan the list and turn off all workq timers, doing so under lock
377 */
378 list_for_each(item, &mc_devices) {
379 mci = list_entry(item, struct mem_ctl_info, link);
380
381 if (mci->op_state == OP_RUNNING_POLL)
382 cancel_delayed_work(&mci->work);
383 }
384
385 mutex_unlock(&mem_ctls_mutex);
81d87cb1 386
bce19683
DT
387
388 /* re-walk the list, and reset the poll delay */
bf52fa4a
DT
389 mutex_lock(&mem_ctls_mutex);
390
bce19683
DT
391 list_for_each(item, &mc_devices) {
392 mci = list_entry(item, struct mem_ctl_info, link);
393
394 edac_mc_workq_setup(mci, (unsigned long) value);
395 }
81d87cb1
DJ
396
397 mutex_unlock(&mem_ctls_mutex);
398}
399
bce19683
DT
400
401
2d7bbb91
DT
402/* Return 0 on success, 1 on failure.
403 * Before calling this function, caller must
404 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
405 *
406 * locking model:
407 *
408 * called with the mem_ctls_mutex lock held
2d7bbb91 409 */
079708b9 410static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
411{
412 struct list_head *item, *insert_before;
413 struct mem_ctl_info *p;
da9bb1d2 414
2d7bbb91 415 insert_before = &mc_devices;
da9bb1d2 416
bf52fa4a
DT
417 p = find_mci_by_dev(mci->dev);
418 if (unlikely(p != NULL))
2d7bbb91 419 goto fail0;
da9bb1d2 420
2d7bbb91
DT
421 list_for_each(item, &mc_devices) {
422 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 423
2d7bbb91
DT
424 if (p->mc_idx >= mci->mc_idx) {
425 if (unlikely(p->mc_idx == mci->mc_idx))
426 goto fail1;
da9bb1d2 427
2d7bbb91
DT
428 insert_before = item;
429 break;
da9bb1d2 430 }
da9bb1d2
AC
431 }
432
433 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 434 atomic_inc(&edac_handlers);
da9bb1d2 435 return 0;
2d7bbb91 436
052dfb45 437fail0:
2d7bbb91 438 edac_printk(KERN_WARNING, EDAC_MC,
281efb17 439 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
17aa7e03 440 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
441 return 1;
442
052dfb45 443fail1:
2d7bbb91 444 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
445 "bug in low-level driver: attempt to assign\n"
446 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 447 return 1;
da9bb1d2
AC
448}
449
e7ecd891 450static void complete_mc_list_del(struct rcu_head *head)
a1d03fcc
DP
451{
452 struct mem_ctl_info *mci;
453
454 mci = container_of(head, struct mem_ctl_info, rcu);
455 INIT_LIST_HEAD(&mci->link);
a1d03fcc
DP
456}
457
e7ecd891 458static void del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 459{
c0d12172 460 atomic_dec(&edac_handlers);
a1d03fcc 461 list_del_rcu(&mci->link);
a1d03fcc 462 call_rcu(&mci->rcu, complete_mc_list_del);
458e5ff1 463 rcu_barrier();
a1d03fcc
DP
464}
465
5da0831c
DT
466/**
467 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
468 *
469 * If found, return a pointer to the structure.
470 * Else return NULL.
471 *
472 * Caller must hold mem_ctls_mutex.
473 */
079708b9 474struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
475{
476 struct list_head *item;
477 struct mem_ctl_info *mci;
478
479 list_for_each(item, &mc_devices) {
480 mci = list_entry(item, struct mem_ctl_info, link);
481
482 if (mci->mc_idx >= idx) {
483 if (mci->mc_idx == idx)
484 return mci;
485
486 break;
487 }
488 }
489
490 return NULL;
491}
492EXPORT_SYMBOL(edac_mc_find);
493
da9bb1d2 494/**
472678eb
DP
495 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
496 * create sysfs entries associated with mci structure
da9bb1d2 497 * @mci: pointer to the mci structure to be added to the list
2d7bbb91 498 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
da9bb1d2
AC
499 *
500 * Return:
501 * 0 Success
502 * !0 Failure
503 */
504
505/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 506int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 507{
537fba28 508 debugf0("%s()\n", __func__);
b8f6f975 509
da9bb1d2
AC
510#ifdef CONFIG_EDAC_DEBUG
511 if (edac_debug_level >= 3)
512 edac_mc_dump_mci(mci);
e7ecd891 513
da9bb1d2
AC
514 if (edac_debug_level >= 4) {
515 int i;
516
517 for (i = 0; i < mci->nr_csrows; i++) {
518 int j;
e7ecd891 519
da9bb1d2
AC
520 edac_mc_dump_csrow(&mci->csrows[i]);
521 for (j = 0; j < mci->csrows[i].nr_channels; j++)
079708b9 522 edac_mc_dump_channel(&mci->csrows[i].
052dfb45 523 channels[j]);
da9bb1d2
AC
524 }
525 }
526#endif
63b7df91 527 mutex_lock(&mem_ctls_mutex);
da9bb1d2
AC
528
529 if (add_mc_to_global_list(mci))
028a7b6d 530 goto fail0;
da9bb1d2
AC
531
532 /* set load time so that error rate can be tracked */
533 mci->start_time = jiffies;
534
9794f33d 535 if (edac_create_sysfs_mci_device(mci)) {
536 edac_mc_printk(mci, KERN_WARNING,
052dfb45 537 "failed to create sysfs device\n");
9794f33d 538 goto fail1;
539 }
da9bb1d2 540
81d87cb1
DJ
541 /* If there IS a check routine, then we are running POLLED */
542 if (mci->edac_check != NULL) {
543 /* This instance is NOW RUNNING */
544 mci->op_state = OP_RUNNING_POLL;
545
546 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
547 } else {
548 mci->op_state = OP_RUNNING_INTERRUPT;
549 }
550
da9bb1d2 551 /* Report action taken */
bf52fa4a 552 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 553 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 554
63b7df91 555 mutex_unlock(&mem_ctls_mutex);
028a7b6d 556 return 0;
da9bb1d2 557
052dfb45 558fail1:
028a7b6d
DP
559 del_mc_from_global_list(mci);
560
052dfb45 561fail0:
63b7df91 562 mutex_unlock(&mem_ctls_mutex);
028a7b6d 563 return 1;
da9bb1d2 564}
9110540f 565EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 566
da9bb1d2 567/**
472678eb
DP
568 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
569 * remove mci structure from global list
37f04581 570 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 571 *
18dbc337 572 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 573 */
079708b9 574struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 575{
18dbc337 576 struct mem_ctl_info *mci;
da9bb1d2 577
bf52fa4a
DT
578 debugf0("%s()\n", __func__);
579
63b7df91 580 mutex_lock(&mem_ctls_mutex);
18dbc337 581
bf52fa4a
DT
582 /* find the requested mci struct in the global list */
583 mci = find_mci_by_dev(dev);
584 if (mci == NULL) {
63b7df91 585 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
586 return NULL;
587 }
588
da9bb1d2 589 del_mc_from_global_list(mci);
63b7df91 590 mutex_unlock(&mem_ctls_mutex);
bf52fa4a 591
bb31b312 592 /* flush workq processes */
bf52fa4a 593 edac_mc_workq_teardown(mci);
bb31b312
BP
594
595 /* marking MCI offline */
596 mci->op_state = OP_OFFLINE;
597
598 /* remove from sysfs */
bf52fa4a
DT
599 edac_remove_sysfs_mci_device(mci);
600
537fba28 601 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 602 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 603 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 604
18dbc337 605 return mci;
da9bb1d2 606}
9110540f 607EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 608
2da1c119
AB
609static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
610 u32 size)
da9bb1d2
AC
611{
612 struct page *pg;
613 void *virt_addr;
614 unsigned long flags = 0;
615
537fba28 616 debugf3("%s()\n", __func__);
da9bb1d2
AC
617
618 /* ECC error page was not in our memory. Ignore it. */
079708b9 619 if (!pfn_valid(page))
da9bb1d2
AC
620 return;
621
622 /* Find the actual page structure then map it and fix */
623 pg = pfn_to_page(page);
624
625 if (PageHighMem(pg))
626 local_irq_save(flags);
627
628 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
629
630 /* Perform architecture specific atomic scrub operation */
631 atomic_scrub(virt_addr + offset, size);
632
633 /* Unmap and complete */
634 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
635
636 if (PageHighMem(pg))
637 local_irq_restore(flags);
638}
639
da9bb1d2 640/* FIXME - should return -1 */
e7ecd891 641int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2
AC
642{
643 struct csrow_info *csrows = mci->csrows;
644 int row, i;
645
537fba28 646 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
da9bb1d2
AC
647 row = -1;
648
649 for (i = 0; i < mci->nr_csrows; i++) {
650 struct csrow_info *csrow = &csrows[i];
651
652 if (csrow->nr_pages == 0)
653 continue;
654
537fba28
DP
655 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
656 "mask(0x%lx)\n", mci->mc_idx, __func__,
657 csrow->first_page, page, csrow->last_page,
658 csrow->page_mask);
da9bb1d2
AC
659
660 if ((page >= csrow->first_page) &&
661 (page <= csrow->last_page) &&
662 ((page & csrow->page_mask) ==
663 (csrow->first_page & csrow->page_mask))) {
664 row = i;
665 break;
666 }
667 }
668
669 if (row == -1)
537fba28 670 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
671 "could not look up page error address %lx\n",
672 (unsigned long)page);
da9bb1d2
AC
673
674 return row;
675}
9110540f 676EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 677
da9bb1d2
AC
678/* FIXME - setable log (warning/emerg) levels */
679/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
680void edac_mc_handle_ce(struct mem_ctl_info *mci,
052dfb45
DT
681 unsigned long page_frame_number,
682 unsigned long offset_in_page, unsigned long syndrome,
683 int row, int channel, const char *msg)
da9bb1d2
AC
684{
685 unsigned long remapped_page;
686
537fba28 687 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
688
689 /* FIXME - maybe make panic on INTERNAL ERROR an option */
690 if (row >= mci->nr_csrows || row < 0) {
691 /* something is wrong */
537fba28 692 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
693 "INTERNAL ERROR: row out of range "
694 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
695 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
696 return;
697 }
e7ecd891 698
da9bb1d2
AC
699 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
700 /* something is wrong */
537fba28 701 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
702 "INTERNAL ERROR: channel out of range "
703 "(%d >= %d)\n", channel,
704 mci->csrows[row].nr_channels);
da9bb1d2
AC
705 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
706 return;
707 }
708
4de78c68 709 if (edac_mc_get_log_ce())
da9bb1d2 710 /* FIXME - put in DIMM location */
537fba28 711 edac_mc_printk(mci, KERN_WARNING,
052dfb45
DT
712 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
713 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
714 page_frame_number, offset_in_page,
715 mci->csrows[row].grain, syndrome, row, channel,
716 mci->csrows[row].channels[channel].label, msg);
da9bb1d2
AC
717
718 mci->ce_count++;
719 mci->csrows[row].ce_count++;
720 mci->csrows[row].channels[channel].ce_count++;
721
722 if (mci->scrub_mode & SCRUB_SW_SRC) {
723 /*
724 * Some MC's can remap memory so that it is still available
725 * at a different address when PCI devices map into memory.
726 * MC's that can't do this lose the memory where PCI devices
727 * are mapped. This mapping is MC dependant and so we call
728 * back into the MC driver for it to map the MC page to
729 * a physical (CPU) page which can then be mapped to a virtual
730 * page - which can then be scrubbed.
731 */
732 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
733 mci->ctl_page_to_phys(mci, page_frame_number) :
734 page_frame_number;
da9bb1d2
AC
735
736 edac_mc_scrub_block(remapped_page, offset_in_page,
052dfb45 737 mci->csrows[row].grain);
da9bb1d2
AC
738 }
739}
9110540f 740EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
da9bb1d2 741
e7ecd891 742void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 743{
4de78c68 744 if (edac_mc_get_log_ce())
537fba28 745 edac_mc_printk(mci, KERN_WARNING,
052dfb45 746 "CE - no information available: %s\n", msg);
e7ecd891 747
da9bb1d2
AC
748 mci->ce_noinfo_count++;
749 mci->ce_count++;
750}
9110540f 751EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
da9bb1d2 752
da9bb1d2 753void edac_mc_handle_ue(struct mem_ctl_info *mci,
052dfb45
DT
754 unsigned long page_frame_number,
755 unsigned long offset_in_page, int row, const char *msg)
da9bb1d2
AC
756{
757 int len = EDAC_MC_LABEL_LEN * 4;
758 char labels[len + 1];
759 char *pos = labels;
760 int chan;
761 int chars;
762
537fba28 763 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2
AC
764
765 /* FIXME - maybe make panic on INTERNAL ERROR an option */
766 if (row >= mci->nr_csrows || row < 0) {
767 /* something is wrong */
537fba28 768 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
769 "INTERNAL ERROR: row out of range "
770 "(%d >= %d)\n", row, mci->nr_csrows);
da9bb1d2
AC
771 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
772 return;
773 }
774
775 chars = snprintf(pos, len + 1, "%s",
079708b9 776 mci->csrows[row].channels[0].label);
da9bb1d2
AC
777 len -= chars;
778 pos += chars;
e7ecd891 779
da9bb1d2 780 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
052dfb45 781 chan++) {
da9bb1d2 782 chars = snprintf(pos, len + 1, ":%s",
079708b9 783 mci->csrows[row].channels[chan].label);
da9bb1d2
AC
784 len -= chars;
785 pos += chars;
786 }
787
4de78c68 788 if (edac_mc_get_log_ue())
537fba28 789 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
790 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
791 "labels \"%s\": %s\n", page_frame_number,
792 offset_in_page, mci->csrows[row].grain, row,
793 labels, msg);
da9bb1d2 794
4de78c68 795 if (edac_mc_get_panic_on_ue())
e7ecd891 796 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
052dfb45
DT
797 "row %d, labels \"%s\": %s\n", mci->mc_idx,
798 page_frame_number, offset_in_page,
799 mci->csrows[row].grain, row, labels, msg);
da9bb1d2
AC
800
801 mci->ue_count++;
802 mci->csrows[row].ue_count++;
803}
9110540f 804EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
da9bb1d2 805
e7ecd891 806void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
da9bb1d2 807{
4de78c68 808 if (edac_mc_get_panic_on_ue())
da9bb1d2
AC
809 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
810
4de78c68 811 if (edac_mc_get_log_ue())
537fba28 812 edac_mc_printk(mci, KERN_WARNING,
052dfb45 813 "UE - no information available: %s\n", msg);
da9bb1d2
AC
814 mci->ue_noinfo_count++;
815 mci->ue_count++;
816}
079708b9 817EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
da9bb1d2 818
9794f33d 819/*************************************************************
820 * On Fully Buffered DIMM modules, this help function is
821 * called to process UE events
822 */
823void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
052dfb45
DT
824 unsigned int csrow,
825 unsigned int channela,
826 unsigned int channelb, char *msg)
9794f33d 827{
828 int len = EDAC_MC_LABEL_LEN * 4;
829 char labels[len + 1];
830 char *pos = labels;
831 int chars;
832
833 if (csrow >= mci->nr_csrows) {
834 /* something is wrong */
835 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
836 "INTERNAL ERROR: row out of range (%d >= %d)\n",
837 csrow, mci->nr_csrows);
9794f33d 838 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
839 return;
840 }
841
842 if (channela >= mci->csrows[csrow].nr_channels) {
843 /* something is wrong */
844 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
845 "INTERNAL ERROR: channel-a out of range "
846 "(%d >= %d)\n",
847 channela, mci->csrows[csrow].nr_channels);
9794f33d 848 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
849 return;
850 }
851
852 if (channelb >= mci->csrows[csrow].nr_channels) {
853 /* something is wrong */
854 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
855 "INTERNAL ERROR: channel-b out of range "
856 "(%d >= %d)\n",
857 channelb, mci->csrows[csrow].nr_channels);
9794f33d 858 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
859 return;
860 }
861
862 mci->ue_count++;
863 mci->csrows[csrow].ue_count++;
864
865 /* Generate the DIMM labels from the specified channels */
866 chars = snprintf(pos, len + 1, "%s",
867 mci->csrows[csrow].channels[channela].label);
079708b9
DT
868 len -= chars;
869 pos += chars;
9794f33d 870 chars = snprintf(pos, len + 1, "-%s",
871 mci->csrows[csrow].channels[channelb].label);
872
4de78c68 873 if (edac_mc_get_log_ue())
9794f33d 874 edac_mc_printk(mci, KERN_EMERG,
052dfb45
DT
875 "UE row %d, channel-a= %d channel-b= %d "
876 "labels \"%s\": %s\n", csrow, channela, channelb,
877 labels, msg);
9794f33d 878
4de78c68 879 if (edac_mc_get_panic_on_ue())
9794f33d 880 panic("UE row %d, channel-a= %d channel-b= %d "
052dfb45
DT
881 "labels \"%s\": %s\n", csrow, channela,
882 channelb, labels, msg);
9794f33d 883}
884EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
885
886/*************************************************************
887 * On Fully Buffered DIMM modules, this help function is
888 * called to process CE events
889 */
890void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
052dfb45 891 unsigned int csrow, unsigned int channel, char *msg)
9794f33d 892{
893
894 /* Ensure boundary values */
895 if (csrow >= mci->nr_csrows) {
896 /* something is wrong */
897 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
898 "INTERNAL ERROR: row out of range (%d >= %d)\n",
899 csrow, mci->nr_csrows);
9794f33d 900 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
901 return;
902 }
903 if (channel >= mci->csrows[csrow].nr_channels) {
904 /* something is wrong */
905 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
906 "INTERNAL ERROR: channel out of range (%d >= %d)\n",
907 channel, mci->csrows[csrow].nr_channels);
9794f33d 908 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
909 return;
910 }
911
4de78c68 912 if (edac_mc_get_log_ce())
9794f33d 913 /* FIXME - put in DIMM location */
914 edac_mc_printk(mci, KERN_WARNING,
052dfb45
DT
915 "CE row %d, channel %d, label \"%s\": %s\n",
916 csrow, channel,
917 mci->csrows[csrow].channels[channel].label, msg);
9794f33d 918
919 mci->ce_count++;
920 mci->csrows[csrow].ce_count++;
921 mci->csrows[csrow].channels[channel].ce_count++;
922}
079708b9 923EXPORT_SYMBOL(edac_mc_handle_fbd_ce);