]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - drivers/edac/edac_mc.c
edac: Only expose csrows/channels on legacy API if they're populated
[mirror_ubuntu-hirsute-kernel.git] / drivers / edac / edac_mc.c
CommitLineData
da9bb1d2
AC
1/*
2 * edac_mc kernel module
49c0dab7 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
da9bb1d2
AC
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
da9bb1d2
AC
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
da9bb1d2 28#include <linux/ctype.h>
c0d12172 29#include <linux/edac.h>
53f2d028 30#include <linux/bitops.h>
da9bb1d2
AC
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
20bcb7a8 34#include "edac_core.h"
7c9281d7 35#include "edac_module.h"
da9bb1d2 36
53f2d028
MCC
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
da9bb1d2 41/* lock to memory controller's control array */
63b7df91 42static DEFINE_MUTEX(mem_ctls_mutex);
ff6ac2a6 43static LIST_HEAD(mc_devices);
da9bb1d2 44
da9bb1d2
AC
45#ifdef CONFIG_EDAC_DEBUG
46
a4b4be3f 47static void edac_mc_dump_channel(struct rank_info *chan)
da9bb1d2
AC
48{
49 debugf4("\tchannel = %p\n", chan);
50 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
da9bb1d2 51 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
4275be63
MCC
52 debugf4("\tchannel->dimm = %p\n", chan->dimm);
53}
54
55static void edac_mc_dump_dimm(struct dimm_info *dimm)
56{
57 int i;
58
59 debugf4("\tdimm = %p\n", dimm);
60 debugf4("\tdimm->label = '%s'\n", dimm->label);
61 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
62 debugf4("\tdimm location ");
63 for (i = 0; i < dimm->mci->n_layers; i++) {
64 printk(KERN_CONT "%d", dimm->location[i]);
65 if (i < dimm->mci->n_layers - 1)
66 printk(KERN_CONT ".");
67 }
68 printk(KERN_CONT "\n");
69 debugf4("\tdimm->grain = %d\n", dimm->grain);
70 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
da9bb1d2
AC
71}
72
2da1c119 73static void edac_mc_dump_csrow(struct csrow_info *csrow)
da9bb1d2
AC
74{
75 debugf4("\tcsrow = %p\n", csrow);
76 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
079708b9 77 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
da9bb1d2
AC
78 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
79 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
079708b9 80 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
da9bb1d2
AC
81 debugf4("\tcsrow->channels = %p\n", csrow->channels);
82 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
83}
84
2da1c119 85static void edac_mc_dump_mci(struct mem_ctl_info *mci)
da9bb1d2
AC
86{
87 debugf3("\tmci = %p\n", mci);
88 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
89 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
90 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
91 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
92 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
93 mci->nr_csrows, mci->csrows);
4275be63
MCC
94 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
95 mci->tot_dimms, mci->dimms);
fd687502 96 debugf3("\tdev = %p\n", mci->pdev);
079708b9 97 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
da9bb1d2
AC
98 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
99}
100
24f9a7fe
BP
101#endif /* CONFIG_EDAC_DEBUG */
102
239642fe
BP
103/*
104 * keep those in sync with the enum mem_type
105 */
106const char *edac_mem_types[] = {
107 "Empty csrow",
108 "Reserved csrow type",
109 "Unknown csrow type",
110 "Fast page mode RAM",
111 "Extended data out RAM",
112 "Burst Extended data out RAM",
113 "Single data rate SDRAM",
114 "Registered single data rate SDRAM",
115 "Double data rate SDRAM",
116 "Registered Double data rate SDRAM",
117 "Rambus DRAM",
118 "Unbuffered DDR2 RAM",
119 "Fully buffered DDR2",
120 "Registered DDR2 RAM",
121 "Rambus XDR",
122 "Unbuffered DDR3 RAM",
123 "Registered DDR3 RAM",
124};
125EXPORT_SYMBOL_GPL(edac_mem_types);
126
93e4fe64
MCC
127/**
128 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
129 * @p: pointer to a pointer with the memory offset to be used. At
130 * return, this will be incremented to point to the next offset
131 * @size: Size of the data structure to be reserved
132 * @n_elems: Number of elements that should be reserved
da9bb1d2
AC
133 *
134 * If 'size' is a constant, the compiler will optimize this whole function
93e4fe64
MCC
135 * down to either a no-op or the addition of a constant to the value of '*p'.
136 *
137 * The 'p' pointer is absolutely needed to keep the proper advancing
138 * further in memory to the proper offsets when allocating the struct along
139 * with its embedded structs, as edac_device_alloc_ctl_info() does it
140 * above, for example.
141 *
142 * At return, the pointer 'p' will be incremented to be used on a next call
143 * to this function.
da9bb1d2 144 */
93e4fe64 145void *edac_align_ptr(void **p, unsigned size, int n_elems)
da9bb1d2
AC
146{
147 unsigned align, r;
93e4fe64 148 void *ptr = *p;
da9bb1d2 149
93e4fe64
MCC
150 *p += size * n_elems;
151
152 /*
153 * 'p' can possibly be an unaligned item X such that sizeof(X) is
154 * 'size'. Adjust 'p' so that its alignment is at least as
155 * stringent as what the compiler would provide for X and return
156 * the aligned result.
157 * Here we assume that the alignment of a "long long" is the most
da9bb1d2
AC
158 * stringent alignment that the compiler will ever provide by default.
159 * As far as I know, this is a reasonable assumption.
160 */
161 if (size > sizeof(long))
162 align = sizeof(long long);
163 else if (size > sizeof(int))
164 align = sizeof(long);
165 else if (size > sizeof(short))
166 align = sizeof(int);
167 else if (size > sizeof(char))
168 align = sizeof(short);
169 else
079708b9 170 return (char *)ptr;
da9bb1d2
AC
171
172 r = size % align;
173
174 if (r == 0)
079708b9 175 return (char *)ptr;
da9bb1d2 176
93e4fe64
MCC
177 *p += align - r;
178
7391c6dc 179 return (void *)(((unsigned long)ptr) + align - r);
da9bb1d2
AC
180}
181
da9bb1d2 182/**
4275be63
MCC
183 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
184 * @mc_num: Memory controller number
185 * @n_layers: Number of MC hierarchy layers
186 * layers: Describes each layer as seen by the Memory Controller
187 * @size_pvt: size of private storage needed
188 *
da9bb1d2
AC
189 *
190 * Everything is kmalloc'ed as one big chunk - more efficient.
191 * Only can be used if all structures have the same lifetime - otherwise
192 * you have to allocate and initialize your own structures.
193 *
194 * Use edac_mc_free() to free mc structures allocated by this function.
195 *
4275be63
MCC
196 * NOTE: drivers handle multi-rank memories in different ways: in some
197 * drivers, one multi-rank memory stick is mapped as one entry, while, in
198 * others, a single multi-rank memory stick would be mapped into several
199 * entries. Currently, this function will allocate multiple struct dimm_info
200 * on such scenarios, as grouping the multiple ranks require drivers change.
201 *
da9bb1d2 202 * Returns:
ca0907b9
MCC
203 * On failure: NULL
204 * On success: struct mem_ctl_info pointer
da9bb1d2 205 */
ca0907b9
MCC
206struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
207 unsigned n_layers,
208 struct edac_mc_layer *layers,
209 unsigned sz_pvt)
da9bb1d2
AC
210{
211 struct mem_ctl_info *mci;
4275be63
MCC
212 struct edac_mc_layer *layer;
213 struct csrow_info *csi, *csr;
a4b4be3f 214 struct rank_info *chi, *chp, *chan;
a7d7d2e1 215 struct dimm_info *dimm;
4275be63
MCC
216 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
217 unsigned pos[EDAC_MAX_LAYERS];
4275be63
MCC
218 unsigned size, tot_dimms = 1, count = 1;
219 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
5926ff50 220 void *pvt, *p, *ptr = NULL;
7a623c03 221 int i, j, row, chn, n, len;
4275be63
MCC
222 bool per_rank = false;
223
224 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
225 /*
226 * Calculate the total amount of dimms and csrows/cschannels while
227 * in the old API emulation mode
228 */
229 for (i = 0; i < n_layers; i++) {
230 tot_dimms *= layers[i].size;
231 if (layers[i].is_virt_csrow)
232 tot_csrows *= layers[i].size;
233 else
234 tot_channels *= layers[i].size;
235
236 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
237 per_rank = true;
238 }
da9bb1d2
AC
239
240 /* Figure out the offsets of the various items from the start of an mc
241 * structure. We want the alignment of each item to be at least as
242 * stringent as what the compiler would provide if we could simply
243 * hardcode everything into a single struct.
244 */
93e4fe64 245 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
4275be63
MCC
246 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
247 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
248 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
249 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
250 for (i = 0; i < n_layers; i++) {
251 count *= layers[i].size;
252 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
253 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
254 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
255 tot_errcount += 2 * count;
256 }
257
258 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
93e4fe64 259 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
079708b9 260 size = ((unsigned long)pvt) + sz_pvt;
da9bb1d2 261
4275be63
MCC
262 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
263 __func__, size,
264 tot_dimms,
265 per_rank ? "ranks" : "dimms",
266 tot_csrows * tot_channels);
8096cfaf
DT
267 mci = kzalloc(size, GFP_KERNEL);
268 if (mci == NULL)
da9bb1d2
AC
269 return NULL;
270
271 /* Adjust pointers so they point within the memory we just allocated
272 * rather than an imaginary chunk of memory located at address 0.
273 */
4275be63 274 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
079708b9 275 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
a4b4be3f 276 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
a7d7d2e1 277 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
4275be63
MCC
278 for (i = 0; i < n_layers; i++) {
279 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
280 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
281 }
079708b9 282 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
da9bb1d2 283
b8f6f975 284 /* setup index and various internal pointers */
4275be63 285 mci->mc_idx = mc_num;
da9bb1d2 286 mci->csrows = csi;
a7d7d2e1 287 mci->dimms = dimm;
4275be63 288 mci->tot_dimms = tot_dimms;
da9bb1d2 289 mci->pvt_info = pvt;
4275be63
MCC
290 mci->n_layers = n_layers;
291 mci->layers = layer;
292 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
293 mci->nr_csrows = tot_csrows;
294 mci->num_cschannel = tot_channels;
295 mci->mem_is_per_rank = per_rank;
da9bb1d2 296
a7d7d2e1 297 /*
4275be63 298 * Fill the csrow struct
a7d7d2e1 299 */
4275be63
MCC
300 for (row = 0; row < tot_csrows; row++) {
301 csr = &csi[row];
302 csr->csrow_idx = row;
303 csr->mci = mci;
304 csr->nr_channels = tot_channels;
305 chp = &chi[row * tot_channels];
306 csr->channels = chp;
307
308 for (chn = 0; chn < tot_channels; chn++) {
da9bb1d2
AC
309 chan = &chp[chn];
310 chan->chan_idx = chn;
4275be63
MCC
311 chan->csrow = csr;
312 }
313 }
314
315 /*
316 * Fill the dimm struct
317 */
318 memset(&pos, 0, sizeof(pos));
319 row = 0;
320 chn = 0;
321 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
322 per_rank ? "ranks" : "dimms");
323 for (i = 0; i < tot_dimms; i++) {
324 chan = &csi[row].channels[chn];
325 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
326 pos[0], pos[1], pos[2]);
327 dimm->mci = mci;
328
329 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
330 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
331 pos[0], pos[1], pos[2], row, chn);
332
5926ff50
MCC
333 /*
334 * Copy DIMM location and initialize it.
335 */
336 len = sizeof(dimm->label);
337 p = dimm->label;
338 n = snprintf(p, len, "mc#%u", mc_num);
339 p += n;
340 len -= n;
341 for (j = 0; j < n_layers; j++) {
342 n = snprintf(p, len, "%s#%u",
343 edac_layer_name[layers[j].type],
344 pos[j]);
345 p += n;
346 len -= n;
4275be63
MCC
347 dimm->location[j] = pos[j];
348
5926ff50
MCC
349 if (len <= 0)
350 break;
351 }
352
4275be63
MCC
353 /* Link it to the csrows old API data */
354 chan->dimm = dimm;
355 dimm->csrow = row;
356 dimm->cschannel = chn;
357
358 /* Increment csrow location */
359 row++;
360 if (row == tot_csrows) {
361 row = 0;
362 chn++;
363 }
a7d7d2e1 364
4275be63
MCC
365 /* Increment dimm location */
366 for (j = n_layers - 1; j >= 0; j--) {
367 pos[j]++;
368 if (pos[j] < layers[j].size)
369 break;
370 pos[j] = 0;
da9bb1d2
AC
371 }
372 }
373
81d87cb1
DJ
374 mci->op_state = OP_ALLOC;
375
8096cfaf
DT
376 /* at this point, the root kobj is valid, and in order to
377 * 'free' the object, then the function:
378 * edac_mc_unregister_sysfs_main_kobj() must be called
379 * which will perform kobj unregistration and the actual free
380 * will occur during the kobject callback operation
381 */
53f2d028 382
da9bb1d2 383 return mci;
4275be63 384}
9110540f 385EXPORT_SYMBOL_GPL(edac_mc_alloc);
da9bb1d2 386
da9bb1d2 387/**
8096cfaf
DT
388 * edac_mc_free
389 * 'Free' a previously allocated 'mci' structure
da9bb1d2 390 * @mci: pointer to a struct mem_ctl_info structure
da9bb1d2
AC
391 */
392void edac_mc_free(struct mem_ctl_info *mci)
393{
bbc560ae
MCC
394 debugf1("%s()\n", __func__);
395
7a623c03 396 edac_unregister_sysfs(mci);
accf74ff
MCC
397
398 /* free the mci instance memory here */
399 kfree(mci);
da9bb1d2 400}
9110540f 401EXPORT_SYMBOL_GPL(edac_mc_free);
da9bb1d2 402
bce19683 403
939747bd 404/**
bce19683
DT
405 * find_mci_by_dev
406 *
407 * scan list of controllers looking for the one that manages
408 * the 'dev' device
939747bd 409 * @dev: pointer to a struct device related with the MCI
bce19683 410 */
939747bd 411struct mem_ctl_info *find_mci_by_dev(struct device *dev)
da9bb1d2
AC
412{
413 struct mem_ctl_info *mci;
414 struct list_head *item;
415
537fba28 416 debugf3("%s()\n", __func__);
da9bb1d2
AC
417
418 list_for_each(item, &mc_devices) {
419 mci = list_entry(item, struct mem_ctl_info, link);
420
fd687502 421 if (mci->pdev == dev)
da9bb1d2
AC
422 return mci;
423 }
424
425 return NULL;
426}
939747bd 427EXPORT_SYMBOL_GPL(find_mci_by_dev);
da9bb1d2 428
81d87cb1
DJ
429/*
430 * handler for EDAC to check if NMI type handler has asserted interrupt
431 */
432static int edac_mc_assert_error_check_and_clear(void)
433{
66ee2f94 434 int old_state;
81d87cb1 435
079708b9 436 if (edac_op_state == EDAC_OPSTATE_POLL)
81d87cb1
DJ
437 return 1;
438
66ee2f94
DJ
439 old_state = edac_err_assert;
440 edac_err_assert = 0;
81d87cb1 441
66ee2f94 442 return old_state;
81d87cb1
DJ
443}
444
445/*
446 * edac_mc_workq_function
447 * performs the operation scheduled by a workq request
448 */
81d87cb1
DJ
449static void edac_mc_workq_function(struct work_struct *work_req)
450{
fbeb4384 451 struct delayed_work *d_work = to_delayed_work(work_req);
81d87cb1 452 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
81d87cb1
DJ
453
454 mutex_lock(&mem_ctls_mutex);
455
bf52fa4a
DT
456 /* if this control struct has movd to offline state, we are done */
457 if (mci->op_state == OP_OFFLINE) {
458 mutex_unlock(&mem_ctls_mutex);
459 return;
460 }
461
81d87cb1
DJ
462 /* Only poll controllers that are running polled and have a check */
463 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
464 mci->edac_check(mci);
465
81d87cb1
DJ
466 mutex_unlock(&mem_ctls_mutex);
467
468 /* Reschedule */
4de78c68 469 queue_delayed_work(edac_workqueue, &mci->work,
052dfb45 470 msecs_to_jiffies(edac_mc_get_poll_msec()));
81d87cb1
DJ
471}
472
473/*
474 * edac_mc_workq_setup
475 * initialize a workq item for this mci
476 * passing in the new delay period in msec
bf52fa4a
DT
477 *
478 * locking model:
479 *
480 * called with the mem_ctls_mutex held
81d87cb1 481 */
bf52fa4a 482static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
81d87cb1
DJ
483{
484 debugf0("%s()\n", __func__);
485
bf52fa4a
DT
486 /* if this instance is not in the POLL state, then simply return */
487 if (mci->op_state != OP_RUNNING_POLL)
488 return;
489
81d87cb1 490 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
81d87cb1
DJ
491 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
492}
493
494/*
495 * edac_mc_workq_teardown
496 * stop the workq processing on this mci
bf52fa4a
DT
497 *
498 * locking model:
499 *
500 * called WITHOUT lock held
81d87cb1 501 */
bf52fa4a 502static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
81d87cb1
DJ
503{
504 int status;
505
00740c58
BP
506 if (mci->op_state != OP_RUNNING_POLL)
507 return;
508
bce19683
DT
509 status = cancel_delayed_work(&mci->work);
510 if (status == 0) {
511 debugf0("%s() not canceled, flush the queue\n",
512 __func__);
bf52fa4a 513
bce19683
DT
514 /* workq instance might be running, wait for it */
515 flush_workqueue(edac_workqueue);
81d87cb1
DJ
516 }
517}
518
519/*
bce19683
DT
520 * edac_mc_reset_delay_period(unsigned long value)
521 *
522 * user space has updated our poll period value, need to
523 * reset our workq delays
81d87cb1 524 */
bce19683 525void edac_mc_reset_delay_period(int value)
81d87cb1 526{
bce19683
DT
527 struct mem_ctl_info *mci;
528 struct list_head *item;
529
530 mutex_lock(&mem_ctls_mutex);
531
532 /* scan the list and turn off all workq timers, doing so under lock
533 */
534 list_for_each(item, &mc_devices) {
535 mci = list_entry(item, struct mem_ctl_info, link);
536
537 if (mci->op_state == OP_RUNNING_POLL)
538 cancel_delayed_work(&mci->work);
539 }
540
541 mutex_unlock(&mem_ctls_mutex);
81d87cb1 542
bce19683
DT
543
544 /* re-walk the list, and reset the poll delay */
bf52fa4a
DT
545 mutex_lock(&mem_ctls_mutex);
546
bce19683
DT
547 list_for_each(item, &mc_devices) {
548 mci = list_entry(item, struct mem_ctl_info, link);
549
550 edac_mc_workq_setup(mci, (unsigned long) value);
551 }
81d87cb1
DJ
552
553 mutex_unlock(&mem_ctls_mutex);
554}
555
bce19683
DT
556
557
2d7bbb91
DT
558/* Return 0 on success, 1 on failure.
559 * Before calling this function, caller must
560 * assign a unique value to mci->mc_idx.
bf52fa4a
DT
561 *
562 * locking model:
563 *
564 * called with the mem_ctls_mutex lock held
2d7bbb91 565 */
079708b9 566static int add_mc_to_global_list(struct mem_ctl_info *mci)
da9bb1d2
AC
567{
568 struct list_head *item, *insert_before;
569 struct mem_ctl_info *p;
da9bb1d2 570
2d7bbb91 571 insert_before = &mc_devices;
da9bb1d2 572
fd687502 573 p = find_mci_by_dev(mci->pdev);
bf52fa4a 574 if (unlikely(p != NULL))
2d7bbb91 575 goto fail0;
da9bb1d2 576
2d7bbb91
DT
577 list_for_each(item, &mc_devices) {
578 p = list_entry(item, struct mem_ctl_info, link);
da9bb1d2 579
2d7bbb91
DT
580 if (p->mc_idx >= mci->mc_idx) {
581 if (unlikely(p->mc_idx == mci->mc_idx))
582 goto fail1;
da9bb1d2 583
2d7bbb91
DT
584 insert_before = item;
585 break;
da9bb1d2 586 }
da9bb1d2
AC
587 }
588
589 list_add_tail_rcu(&mci->link, insert_before);
c0d12172 590 atomic_inc(&edac_handlers);
da9bb1d2 591 return 0;
2d7bbb91 592
052dfb45 593fail0:
2d7bbb91 594 edac_printk(KERN_WARNING, EDAC_MC,
fd687502 595 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
17aa7e03 596 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
2d7bbb91
DT
597 return 1;
598
052dfb45 599fail1:
2d7bbb91 600 edac_printk(KERN_WARNING, EDAC_MC,
052dfb45
DT
601 "bug in low-level driver: attempt to assign\n"
602 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
2d7bbb91 603 return 1;
da9bb1d2
AC
604}
605
e7ecd891 606static void del_mc_from_global_list(struct mem_ctl_info *mci)
a1d03fcc 607{
c0d12172 608 atomic_dec(&edac_handlers);
a1d03fcc 609 list_del_rcu(&mci->link);
e2e77098
LJ
610
611 /* these are for safe removal of devices from global list while
612 * NMI handlers may be traversing list
613 */
614 synchronize_rcu();
615 INIT_LIST_HEAD(&mci->link);
a1d03fcc
DP
616}
617
5da0831c
DT
618/**
619 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
620 *
621 * If found, return a pointer to the structure.
622 * Else return NULL.
623 *
624 * Caller must hold mem_ctls_mutex.
625 */
079708b9 626struct mem_ctl_info *edac_mc_find(int idx)
5da0831c
DT
627{
628 struct list_head *item;
629 struct mem_ctl_info *mci;
630
631 list_for_each(item, &mc_devices) {
632 mci = list_entry(item, struct mem_ctl_info, link);
633
634 if (mci->mc_idx >= idx) {
635 if (mci->mc_idx == idx)
636 return mci;
637
638 break;
639 }
640 }
641
642 return NULL;
643}
644EXPORT_SYMBOL(edac_mc_find);
645
da9bb1d2 646/**
472678eb
DP
647 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
648 * create sysfs entries associated with mci structure
da9bb1d2
AC
649 * @mci: pointer to the mci structure to be added to the list
650 *
651 * Return:
652 * 0 Success
653 * !0 Failure
654 */
655
656/* FIXME - should a warning be printed if no error detection? correction? */
b8f6f975 657int edac_mc_add_mc(struct mem_ctl_info *mci)
da9bb1d2 658{
537fba28 659 debugf0("%s()\n", __func__);
b8f6f975 660
da9bb1d2
AC
661#ifdef CONFIG_EDAC_DEBUG
662 if (edac_debug_level >= 3)
663 edac_mc_dump_mci(mci);
e7ecd891 664
da9bb1d2
AC
665 if (edac_debug_level >= 4) {
666 int i;
667
668 for (i = 0; i < mci->nr_csrows; i++) {
669 int j;
e7ecd891 670
da9bb1d2
AC
671 edac_mc_dump_csrow(&mci->csrows[i]);
672 for (j = 0; j < mci->csrows[i].nr_channels; j++)
079708b9 673 edac_mc_dump_channel(&mci->csrows[i].
052dfb45 674 channels[j]);
da9bb1d2 675 }
4275be63
MCC
676 for (i = 0; i < mci->tot_dimms; i++)
677 edac_mc_dump_dimm(&mci->dimms[i]);
da9bb1d2
AC
678 }
679#endif
63b7df91 680 mutex_lock(&mem_ctls_mutex);
da9bb1d2
AC
681
682 if (add_mc_to_global_list(mci))
028a7b6d 683 goto fail0;
da9bb1d2
AC
684
685 /* set load time so that error rate can be tracked */
686 mci->start_time = jiffies;
687
9794f33d 688 if (edac_create_sysfs_mci_device(mci)) {
689 edac_mc_printk(mci, KERN_WARNING,
052dfb45 690 "failed to create sysfs device\n");
9794f33d 691 goto fail1;
692 }
da9bb1d2 693
81d87cb1
DJ
694 /* If there IS a check routine, then we are running POLLED */
695 if (mci->edac_check != NULL) {
696 /* This instance is NOW RUNNING */
697 mci->op_state = OP_RUNNING_POLL;
698
699 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
700 } else {
701 mci->op_state = OP_RUNNING_INTERRUPT;
702 }
703
da9bb1d2 704 /* Report action taken */
bf52fa4a 705 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
17aa7e03 706 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
da9bb1d2 707
63b7df91 708 mutex_unlock(&mem_ctls_mutex);
028a7b6d 709 return 0;
da9bb1d2 710
052dfb45 711fail1:
028a7b6d
DP
712 del_mc_from_global_list(mci);
713
052dfb45 714fail0:
63b7df91 715 mutex_unlock(&mem_ctls_mutex);
028a7b6d 716 return 1;
da9bb1d2 717}
9110540f 718EXPORT_SYMBOL_GPL(edac_mc_add_mc);
da9bb1d2 719
da9bb1d2 720/**
472678eb
DP
721 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
722 * remove mci structure from global list
37f04581 723 * @pdev: Pointer to 'struct device' representing mci structure to remove.
da9bb1d2 724 *
18dbc337 725 * Return pointer to removed mci structure, or NULL if device not found.
da9bb1d2 726 */
079708b9 727struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
da9bb1d2 728{
18dbc337 729 struct mem_ctl_info *mci;
da9bb1d2 730
bf52fa4a
DT
731 debugf0("%s()\n", __func__);
732
63b7df91 733 mutex_lock(&mem_ctls_mutex);
18dbc337 734
bf52fa4a
DT
735 /* find the requested mci struct in the global list */
736 mci = find_mci_by_dev(dev);
737 if (mci == NULL) {
63b7df91 738 mutex_unlock(&mem_ctls_mutex);
18dbc337
DP
739 return NULL;
740 }
741
da9bb1d2 742 del_mc_from_global_list(mci);
63b7df91 743 mutex_unlock(&mem_ctls_mutex);
bf52fa4a 744
bb31b312 745 /* flush workq processes */
bf52fa4a 746 edac_mc_workq_teardown(mci);
bb31b312
BP
747
748 /* marking MCI offline */
749 mci->op_state = OP_OFFLINE;
750
751 /* remove from sysfs */
bf52fa4a
DT
752 edac_remove_sysfs_mci_device(mci);
753
537fba28 754 edac_printk(KERN_INFO, EDAC_MC,
052dfb45 755 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
17aa7e03 756 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
bf52fa4a 757
18dbc337 758 return mci;
da9bb1d2 759}
9110540f 760EXPORT_SYMBOL_GPL(edac_mc_del_mc);
da9bb1d2 761
2da1c119
AB
762static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
763 u32 size)
da9bb1d2
AC
764{
765 struct page *pg;
766 void *virt_addr;
767 unsigned long flags = 0;
768
537fba28 769 debugf3("%s()\n", __func__);
da9bb1d2
AC
770
771 /* ECC error page was not in our memory. Ignore it. */
079708b9 772 if (!pfn_valid(page))
da9bb1d2
AC
773 return;
774
775 /* Find the actual page structure then map it and fix */
776 pg = pfn_to_page(page);
777
778 if (PageHighMem(pg))
779 local_irq_save(flags);
780
4e5df7ca 781 virt_addr = kmap_atomic(pg);
da9bb1d2
AC
782
783 /* Perform architecture specific atomic scrub operation */
784 atomic_scrub(virt_addr + offset, size);
785
786 /* Unmap and complete */
4e5df7ca 787 kunmap_atomic(virt_addr);
da9bb1d2
AC
788
789 if (PageHighMem(pg))
790 local_irq_restore(flags);
791}
792
da9bb1d2 793/* FIXME - should return -1 */
e7ecd891 794int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
da9bb1d2
AC
795{
796 struct csrow_info *csrows = mci->csrows;
a895bf8b 797 int row, i, j, n;
da9bb1d2 798
537fba28 799 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
da9bb1d2
AC
800 row = -1;
801
802 for (i = 0; i < mci->nr_csrows; i++) {
803 struct csrow_info *csrow = &csrows[i];
a895bf8b
MCC
804 n = 0;
805 for (j = 0; j < csrow->nr_channels; j++) {
806 struct dimm_info *dimm = csrow->channels[j].dimm;
807 n += dimm->nr_pages;
808 }
809 if (n == 0)
da9bb1d2
AC
810 continue;
811
537fba28
DP
812 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
813 "mask(0x%lx)\n", mci->mc_idx, __func__,
814 csrow->first_page, page, csrow->last_page,
815 csrow->page_mask);
da9bb1d2
AC
816
817 if ((page >= csrow->first_page) &&
818 (page <= csrow->last_page) &&
819 ((page & csrow->page_mask) ==
820 (csrow->first_page & csrow->page_mask))) {
821 row = i;
822 break;
823 }
824 }
825
826 if (row == -1)
537fba28 827 edac_mc_printk(mci, KERN_ERR,
052dfb45
DT
828 "could not look up page error address %lx\n",
829 (unsigned long)page);
da9bb1d2
AC
830
831 return row;
832}
9110540f 833EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
da9bb1d2 834
4275be63
MCC
835const char *edac_layer_name[] = {
836 [EDAC_MC_LAYER_BRANCH] = "branch",
837 [EDAC_MC_LAYER_CHANNEL] = "channel",
838 [EDAC_MC_LAYER_SLOT] = "slot",
839 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
840};
841EXPORT_SYMBOL_GPL(edac_layer_name);
842
843static void edac_inc_ce_error(struct mem_ctl_info *mci,
844 bool enable_per_layer_report,
845 const int pos[EDAC_MAX_LAYERS])
da9bb1d2 846{
4275be63 847 int i, index = 0;
da9bb1d2 848
5926ff50 849 mci->ce_mc++;
da9bb1d2 850
4275be63
MCC
851 if (!enable_per_layer_report) {
852 mci->ce_noinfo_count++;
da9bb1d2
AC
853 return;
854 }
e7ecd891 855
4275be63
MCC
856 for (i = 0; i < mci->n_layers; i++) {
857 if (pos[i] < 0)
858 break;
859 index += pos[i];
860 mci->ce_per_layer[i][index]++;
861
862 if (i < mci->n_layers - 1)
863 index *= mci->layers[i + 1].size;
864 }
865}
866
867static void edac_inc_ue_error(struct mem_ctl_info *mci,
868 bool enable_per_layer_report,
869 const int pos[EDAC_MAX_LAYERS])
870{
871 int i, index = 0;
872
5926ff50 873 mci->ue_mc++;
4275be63
MCC
874
875 if (!enable_per_layer_report) {
876 mci->ce_noinfo_count++;
da9bb1d2
AC
877 return;
878 }
879
4275be63
MCC
880 for (i = 0; i < mci->n_layers; i++) {
881 if (pos[i] < 0)
882 break;
883 index += pos[i];
884 mci->ue_per_layer[i][index]++;
a7d7d2e1 885
4275be63
MCC
886 if (i < mci->n_layers - 1)
887 index *= mci->layers[i + 1].size;
888 }
889}
da9bb1d2 890
4275be63
MCC
891static void edac_ce_error(struct mem_ctl_info *mci,
892 const int pos[EDAC_MAX_LAYERS],
893 const char *msg,
894 const char *location,
895 const char *label,
896 const char *detail,
897 const char *other_detail,
898 const bool enable_per_layer_report,
899 const unsigned long page_frame_number,
900 const unsigned long offset_in_page,
53f2d028 901 long grain)
4275be63
MCC
902{
903 unsigned long remapped_page;
904
905 if (edac_mc_get_log_ce()) {
906 if (other_detail && *other_detail)
907 edac_mc_printk(mci, KERN_WARNING,
53f2d028 908 "CE %s on %s (%s %s - %s)\n",
4275be63
MCC
909 msg, label, location,
910 detail, other_detail);
911 else
912 edac_mc_printk(mci, KERN_WARNING,
53f2d028 913 "CE %s on %s (%s %s)\n",
4275be63
MCC
914 msg, label, location,
915 detail);
916 }
917 edac_inc_ce_error(mci, enable_per_layer_report, pos);
da9bb1d2
AC
918
919 if (mci->scrub_mode & SCRUB_SW_SRC) {
920 /*
4275be63
MCC
921 * Some memory controllers (called MCs below) can remap
922 * memory so that it is still available at a different
923 * address when PCI devices map into memory.
924 * MC's that can't do this, lose the memory where PCI
925 * devices are mapped. This mapping is MC-dependent
926 * and so we call back into the MC driver for it to
927 * map the MC page to a physical (CPU) page which can
928 * then be mapped to a virtual page - which can then
929 * be scrubbed.
930 */
da9bb1d2 931 remapped_page = mci->ctl_page_to_phys ?
052dfb45
DT
932 mci->ctl_page_to_phys(mci, page_frame_number) :
933 page_frame_number;
da9bb1d2 934
4275be63
MCC
935 edac_mc_scrub_block(remapped_page,
936 offset_in_page, grain);
da9bb1d2
AC
937 }
938}
939
4275be63
MCC
940static void edac_ue_error(struct mem_ctl_info *mci,
941 const int pos[EDAC_MAX_LAYERS],
942 const char *msg,
943 const char *location,
944 const char *label,
945 const char *detail,
946 const char *other_detail,
947 const bool enable_per_layer_report)
da9bb1d2 948{
4275be63
MCC
949 if (edac_mc_get_log_ue()) {
950 if (other_detail && *other_detail)
951 edac_mc_printk(mci, KERN_WARNING,
53f2d028 952 "UE %s on %s (%s %s - %s)\n",
4275be63
MCC
953 msg, label, location, detail,
954 other_detail);
955 else
956 edac_mc_printk(mci, KERN_WARNING,
53f2d028 957 "UE %s on %s (%s %s)\n",
4275be63
MCC
958 msg, label, location, detail);
959 }
e7ecd891 960
4275be63
MCC
961 if (edac_mc_get_panic_on_ue()) {
962 if (other_detail && *other_detail)
963 panic("UE %s on %s (%s%s - %s)\n",
964 msg, label, location, detail, other_detail);
965 else
966 panic("UE %s on %s (%s%s)\n",
967 msg, label, location, detail);
968 }
969
970 edac_inc_ue_error(mci, enable_per_layer_report, pos);
da9bb1d2
AC
971}
972
4275be63 973#define OTHER_LABEL " or "
53f2d028
MCC
974
975/**
976 * edac_mc_handle_error - reports a memory event to userspace
977 *
978 * @type: severity of the error (CE/UE/Fatal)
979 * @mci: a struct mem_ctl_info pointer
980 * @page_frame_number: mem page where the error occurred
981 * @offset_in_page: offset of the error inside the page
982 * @syndrome: ECC syndrome
983 * @top_layer: Memory layer[0] position
984 * @mid_layer: Memory layer[1] position
985 * @low_layer: Memory layer[2] position
986 * @msg: Message meaningful to the end users that
987 * explains the event
988 * @other_detail: Technical details about the event that
989 * may help hardware manufacturers and
990 * EDAC developers to analyse the event
991 * @arch_log: Architecture-specific struct that can
992 * be used to add extended information to the
993 * tracepoint, like dumping MCE registers.
994 */
4275be63
MCC
995void edac_mc_handle_error(const enum hw_event_mc_err_type type,
996 struct mem_ctl_info *mci,
997 const unsigned long page_frame_number,
998 const unsigned long offset_in_page,
999 const unsigned long syndrome,
53f2d028
MCC
1000 const int top_layer,
1001 const int mid_layer,
1002 const int low_layer,
4275be63
MCC
1003 const char *msg,
1004 const char *other_detail,
53f2d028 1005 const void *arch_log)
da9bb1d2 1006{
4275be63
MCC
1007 /* FIXME: too much for stack: move it to some pre-alocated area */
1008 char detail[80], location[80];
1009 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1010 char *p;
1011 int row = -1, chan = -1;
53f2d028 1012 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
4275be63 1013 int i;
53f2d028 1014 long grain;
4275be63 1015 bool enable_per_layer_report = false;
53f2d028
MCC
1016 u16 error_count; /* FIXME: make it a parameter */
1017 u8 grain_bits;
da9bb1d2 1018
537fba28 1019 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
da9bb1d2 1020
4275be63
MCC
1021 /*
1022 * Check if the event report is consistent and if the memory
1023 * location is known. If it is known, enable_per_layer_report will be
1024 * true, the DIMM(s) label info will be filled and the per-layer
1025 * error counters will be incremented.
1026 */
1027 for (i = 0; i < mci->n_layers; i++) {
1028 if (pos[i] >= (int)mci->layers[i].size) {
1029 if (type == HW_EVENT_ERR_CORRECTED)
1030 p = "CE";
1031 else
1032 p = "UE";
1033
1034 edac_mc_printk(mci, KERN_ERR,
1035 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1036 edac_layer_name[mci->layers[i].type],
1037 pos[i], mci->layers[i].size);
1038 /*
1039 * Instead of just returning it, let's use what's
1040 * known about the error. The increment routines and
1041 * the DIMM filter logic will do the right thing by
1042 * pointing the likely damaged DIMMs.
1043 */
1044 pos[i] = -1;
1045 }
1046 if (pos[i] >= 0)
1047 enable_per_layer_report = true;
da9bb1d2
AC
1048 }
1049
4275be63
MCC
1050 /*
1051 * Get the dimm label/grain that applies to the match criteria.
1052 * As the error algorithm may not be able to point to just one memory
1053 * stick, the logic here will get all possible labels that could
1054 * pottentially be affected by the error.
1055 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1056 * to have only the MC channel and the MC dimm (also called "branch")
1057 * but the channel is not known, as the memory is arranged in pairs,
1058 * where each memory belongs to a separate channel within the same
1059 * branch.
1060 */
1061 grain = 0;
1062 p = label;
1063 *p = '\0';
1064 for (i = 0; i < mci->tot_dimms; i++) {
1065 struct dimm_info *dimm = &mci->dimms[i];
da9bb1d2 1066
53f2d028 1067 if (top_layer >= 0 && top_layer != dimm->location[0])
4275be63 1068 continue;
53f2d028 1069 if (mid_layer >= 0 && mid_layer != dimm->location[1])
4275be63 1070 continue;
53f2d028 1071 if (low_layer >= 0 && low_layer != dimm->location[2])
4275be63 1072 continue;
da9bb1d2 1073
4275be63
MCC
1074 /* get the max grain, over the error match range */
1075 if (dimm->grain > grain)
1076 grain = dimm->grain;
9794f33d 1077
4275be63
MCC
1078 /*
1079 * If the error is memory-controller wide, there's no need to
1080 * seek for the affected DIMMs because the whole
1081 * channel/memory controller/... may be affected.
1082 * Also, don't show errors for empty DIMM slots.
1083 */
1084 if (enable_per_layer_report && dimm->nr_pages) {
1085 if (p != label) {
1086 strcpy(p, OTHER_LABEL);
1087 p += strlen(OTHER_LABEL);
1088 }
1089 strcpy(p, dimm->label);
1090 p += strlen(p);
1091 *p = '\0';
1092
1093 /*
1094 * get csrow/channel of the DIMM, in order to allow
1095 * incrementing the compat API counters
1096 */
1097 debugf4("%s: %s csrows map: (%d,%d)\n",
1098 __func__,
1099 mci->mem_is_per_rank ? "rank" : "dimm",
1100 dimm->csrow, dimm->cschannel);
1101
1102 if (row == -1)
1103 row = dimm->csrow;
1104 else if (row >= 0 && row != dimm->csrow)
1105 row = -2;
1106
1107 if (chan == -1)
1108 chan = dimm->cschannel;
1109 else if (chan >= 0 && chan != dimm->cschannel)
1110 chan = -2;
1111 }
9794f33d 1112 }
1113
4275be63
MCC
1114 if (!enable_per_layer_report) {
1115 strcpy(label, "any memory");
1116 } else {
1117 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1118 __func__, row, chan);
1119 if (p == label)
1120 strcpy(label, "unknown memory");
1121 if (type == HW_EVENT_ERR_CORRECTED) {
1122 if (row >= 0) {
1123 mci->csrows[row].ce_count++;
1124 if (chan >= 0)
1125 mci->csrows[row].channels[chan].ce_count++;
1126 }
1127 } else
1128 if (row >= 0)
1129 mci->csrows[row].ue_count++;
9794f33d 1130 }
1131
4275be63
MCC
1132 /* Fill the RAM location data */
1133 p = location;
1134 for (i = 0; i < mci->n_layers; i++) {
1135 if (pos[i] < 0)
1136 continue;
9794f33d 1137
4275be63
MCC
1138 p += sprintf(p, "%s:%d ",
1139 edac_layer_name[mci->layers[i].type],
1140 pos[i]);
9794f33d 1141 }
53f2d028
MCC
1142 if (p > location)
1143 *(p - 1) = '\0';
1144
1145 /* Report the error via the trace interface */
1146
1147 error_count = 1; /* FIXME: allow change it */
1148 grain_bits = fls_long(grain) + 1;
1149 trace_mc_event(type, msg, label, error_count,
1150 mci->mc_idx, top_layer, mid_layer, low_layer,
1151 PAGES_TO_MiB(page_frame_number) | offset_in_page,
1152 grain_bits, syndrome, other_detail);
a7d7d2e1 1153
4275be63
MCC
1154 /* Memory type dependent details about the error */
1155 if (type == HW_EVENT_ERR_CORRECTED) {
1156 snprintf(detail, sizeof(detail),
53f2d028 1157 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
4275be63
MCC
1158 page_frame_number, offset_in_page,
1159 grain, syndrome);
1160 edac_ce_error(mci, pos, msg, location, label, detail,
1161 other_detail, enable_per_layer_report,
1162 page_frame_number, offset_in_page, grain);
1163 } else {
1164 snprintf(detail, sizeof(detail),
53f2d028 1165 "page:0x%lx offset:0x%lx grain:%ld",
4275be63 1166 page_frame_number, offset_in_page, grain);
9794f33d 1167
4275be63
MCC
1168 edac_ue_error(mci, pos, msg, location, label, detail,
1169 other_detail, enable_per_layer_report);
1170 }
9794f33d 1171}
4275be63 1172EXPORT_SYMBOL_GPL(edac_mc_handle_error);