drivers/edac/edac_mc.c

   1 /*
   2  * edac_mc kernel module
   3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4  * This file may be distributed under the terms of the
   5  * GNU General Public License.
   6  *
   7  * Written by Thayne Harbaugh
   8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9  *      http://www.anime.net/~goemon/linux-ecc/
  10  *
  11  * Modified by Dave Peterson and Doug Thompson
  12  *
  13  */
  14
  15 #include <linux/module.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/kernel.h>
  18 #include <linux/types.h>
  19 #include <linux/smp.h>
  20 #include <linux/init.h>
  21 #include <linux/sysctl.h>
  22 #include <linux/highmem.h>
  23 #include <linux/timer.h>
  24 #include <linux/slab.h>
  25 #include <linux/jiffies.h>
  26 #include <linux/spinlock.h>
  27 #include <linux/list.h>
  28 #include <linux/sysdev.h>
  29 #include <linux/ctype.h>
  30 #include <linux/edac.h>
  31 #include <asm/uaccess.h>
  32 #include <asm/page.h>
  33 #include <asm/edac.h>
  34 #include "edac_core.h"
  35 #include "edac_module.h"
  36
  37 /* lock to memory controller's control array */
  38 static DEFINE_MUTEX(mem_ctls_mutex);
  39 static LIST_HEAD(mc_devices);
  40
  41 #ifdef CONFIG_EDAC_DEBUG
  42
  43 static void edac_mc_dump_channel(struct channel_info *chan)
  44 {
  45         debugf4("\tchannel = %p\n", chan);
  46         debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  47         debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
  48         debugf4("\tchannel->label = '%s'\n", chan->label);
  49         debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  50 }
  51
  52 static void edac_mc_dump_csrow(struct csrow_info *csrow)
  53 {
  54         debugf4("\tcsrow = %p\n", csrow);
  55         debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  56         debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  57         debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  58         debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  59         debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
  60         debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  61         debugf4("\tcsrow->channels = %p\n", csrow->channels);
  62         debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  63 }
  64
  65 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  66 {
  67         debugf3("\tmci = %p\n", mci);
  68         debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  69         debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  70         debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  71         debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  72         debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  73                 mci->nr_csrows, mci->csrows);
  74         debugf3("\tdev = %p\n", mci->dev);
  75         debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  76         debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  77 }
  78
  79 #endif                          /* CONFIG_EDAC_DEBUG */
  80
  81 /*
  82  * keep those in sync with the enum mem_type
  83  */
  84 const char *edac_mem_types[] = {
  85         "Empty csrow",
  86         "Reserved csrow type",
  87         "Unknown csrow type",
  88         "Fast page mode RAM",
  89         "Extended data out RAM",
  90         "Burst Extended data out RAM",
  91         "Single data rate SDRAM",
  92         "Registered single data rate SDRAM",
  93         "Double data rate SDRAM",
  94         "Registered Double data rate SDRAM",
  95         "Rambus DRAM",
  96         "Unbuffered DDR2 RAM",
  97         "Fully buffered DDR2",
  98         "Registered DDR2 RAM",
  99         "Rambus XDR",
 100         "Unbuffered DDR3 RAM",
 101         "Registered DDR3 RAM",
 102 };
 103 EXPORT_SYMBOL_GPL(edac_mem_types);
 104
 105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
 106  * Adjust 'ptr' so that its alignment is at least as stringent as what the
 107  * compiler would provide for X and return the aligned result.
 108  *
 109  * If 'size' is a constant, the compiler will optimize this whole function
 110  * down to either a no-op or the addition of a constant to the value of 'ptr'.
 111  */
 112 void *edac_align_ptr(void *ptr, unsigned size)
 113 {
 114         unsigned align, r;
 115
 116         /* Here we assume that the alignment of a "long long" is the most
 117          * stringent alignment that the compiler will ever provide by default.
 118          * As far as I know, this is a reasonable assumption.
 119          */
 120         if (size > sizeof(long))
 121                 align = sizeof(long long);
 122         else if (size > sizeof(int))
 123                 align = sizeof(long);
 124         else if (size > sizeof(short))
 125                 align = sizeof(int);
 126         else if (size > sizeof(char))
 127                 align = sizeof(short);
 128         else
 129                 return (char *)ptr;
 130
 131         r = size % align;
 132
 133         if (r == 0)
 134                 return (char *)ptr;
 135
 136         return (void *)(((unsigned long)ptr) + align - r);
 137 }
 138
 139 /**
 140  * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 141  * @size_pvt:   size of private storage needed
 142  * @nr_csrows:  Number of CWROWS needed for this MC
 143  * @nr_chans:   Number of channels for the MC
 144  *
 145  * Everything is kmalloc'ed as one big chunk - more efficient.
 146  * Only can be used if all structures have the same lifetime - otherwise
 147  * you have to allocate and initialize your own structures.
 148  *
 149  * Use edac_mc_free() to free mc structures allocated by this function.
 150  *
 151  * Returns:
 152  *      NULL allocation failed
 153  *      struct mem_ctl_info pointer
 154  */
 155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 156                                 unsigned nr_chans, int edac_index)
 157 {
 158         struct mem_ctl_info *mci;
 159         struct csrow_info *csi, *csrow;
 160         struct channel_info *chi, *chp, *chan;
 161         void *pvt;
 162         unsigned size;
 163         int row, chn;
 164         int err;
 165
 166         /* Figure out the offsets of the various items from the start of an mc
 167          * structure.  We want the alignment of each item to be at least as
 168          * stringent as what the compiler would provide if we could simply
 169          * hardcode everything into a single struct.
 170          */
 171         mci = (struct mem_ctl_info *)0;
 172         csi = edac_align_ptr(&mci[1], sizeof(*csi));
 173         chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
 174         pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
 175         size = ((unsigned long)pvt) + sz_pvt;
 176
 177         mci = kzalloc(size, GFP_KERNEL);
 178         if (mci == NULL)
 179                 return NULL;
 180
 181         /* Adjust pointers so they point within the memory we just allocated
 182          * rather than an imaginary chunk of memory located at address 0.
 183          */
 184         csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 185         chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
 186         pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 187
 188         /* setup index and various internal pointers */
 189         mci->mc_idx = edac_index;
 190         mci->csrows = csi;
 191         mci->pvt_info = pvt;
 192         mci->nr_csrows = nr_csrows;
 193
 194         for (row = 0; row < nr_csrows; row++) {
 195                 csrow = &csi[row];
 196                 csrow->csrow_idx = row;
 197                 csrow->mci = mci;
 198                 csrow->nr_channels = nr_chans;
 199                 chp = &chi[row * nr_chans];
 200                 csrow->channels = chp;
 201
 202                 for (chn = 0; chn < nr_chans; chn++) {
 203                         chan = &chp[chn];
 204                         chan->chan_idx = chn;
 205                         chan->csrow = csrow;
 206                 }
 207         }
 208
 209         mci->op_state = OP_ALLOC;
 210         INIT_LIST_HEAD(&mci->grp_kobj_list);
 211
 212         /*
 213          * Initialize the 'root' kobj for the edac_mc controller
 214          */
 215         err = edac_mc_register_sysfs_main_kobj(mci);
 216         if (err) {
 217                 kfree(mci);
 218                 return NULL;
 219         }
 220
 221         /* at this point, the root kobj is valid, and in order to
 222          * 'free' the object, then the function:
 223          *      edac_mc_unregister_sysfs_main_kobj() must be called
 224          * which will perform kobj unregistration and the actual free
 225          * will occur during the kobject callback operation
 226          */
 227         return mci;
 228 }
 229 EXPORT_SYMBOL_GPL(edac_mc_alloc);
 230
 231 /**
 232  * edac_mc_free
 233  *      'Free' a previously allocated 'mci' structure
 234  * @mci: pointer to a struct mem_ctl_info structure
 235  */
 236 void edac_mc_free(struct mem_ctl_info *mci)
 237 {
 238         debugf1("%s()\n", __func__);
 239
 240         edac_mc_unregister_sysfs_main_kobj(mci);
 241
 242         /* free the mci instance memory here */
 243         kfree(mci);
 244 }
 245 EXPORT_SYMBOL_GPL(edac_mc_free);
 246
 247
 248 /**
 249  * find_mci_by_dev
 250  *
 251  *      scan list of controllers looking for the one that manages
 252  *      the 'dev' device
 253  * @dev: pointer to a struct device related with the MCI
 254  */
 255 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 256 {
 257         struct mem_ctl_info *mci;
 258         struct list_head *item;
 259
 260         debugf3("%s()\n", __func__);
 261
 262         list_for_each(item, &mc_devices) {
 263                 mci = list_entry(item, struct mem_ctl_info, link);
 264
 265                 if (mci->dev == dev)
 266                         return mci;
 267         }
 268
 269         return NULL;
 270 }
 271 EXPORT_SYMBOL_GPL(find_mci_by_dev);
 272
 273 /*
 274  * handler for EDAC to check if NMI type handler has asserted interrupt
 275  */
 276 static int edac_mc_assert_error_check_and_clear(void)
 277 {
 278         int old_state;
 279
 280         if (edac_op_state == EDAC_OPSTATE_POLL)
 281                 return 1;
 282
 283         old_state = edac_err_assert;
 284         edac_err_assert = 0;
 285
 286         return old_state;
 287 }
 288
 289 /*
 290  * edac_mc_workq_function
 291  *      performs the operation scheduled by a workq request
 292  */
 293 static void edac_mc_workq_function(struct work_struct *work_req)
 294 {
 295         struct delayed_work *d_work = to_delayed_work(work_req);
 296         struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 297
 298         mutex_lock(&mem_ctls_mutex);
 299
 300         /* if this control struct has movd to offline state, we are done */
 301         if (mci->op_state == OP_OFFLINE) {
 302                 mutex_unlock(&mem_ctls_mutex);
 303                 return;
 304         }
 305
 306         /* Only poll controllers that are running polled and have a check */
 307         if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 308                 mci->edac_check(mci);
 309
 310         mutex_unlock(&mem_ctls_mutex);
 311
 312         /* Reschedule */
 313         queue_delayed_work(edac_workqueue, &mci->work,
 314                         msecs_to_jiffies(edac_mc_get_poll_msec()));
 315 }
 316
 317 /*
 318  * edac_mc_workq_setup
 319  *      initialize a workq item for this mci
 320  *      passing in the new delay period in msec
 321  *
 322  *      locking model:
 323  *
 324  *              called with the mem_ctls_mutex held
 325  */
 326 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 327 {
 328         debugf0("%s()\n", __func__);
 329
 330         /* if this instance is not in the POLL state, then simply return */
 331         if (mci->op_state != OP_RUNNING_POLL)
 332                 return;
 333
 334         INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 335         queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 336 }
 337
 338 /*
 339  * edac_mc_workq_teardown
 340  *      stop the workq processing on this mci
 341  *
 342  *      locking model:
 343  *
 344  *              called WITHOUT lock held
 345  */
 346 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 347 {
 348         int status;
 349
 350         if (mci->op_state != OP_RUNNING_POLL)
 351                 return;
 352
 353         status = cancel_delayed_work(&mci->work);
 354         if (status == 0) {
 355                 debugf0("%s() not canceled, flush the queue\n",
 356                         __func__);
 357
 358                 /* workq instance might be running, wait for it */
 359                 flush_workqueue(edac_workqueue);
 360         }
 361 }
 362
 363 /*
 364  * edac_mc_reset_delay_period(unsigned long value)
 365  *
 366  *      user space has updated our poll period value, need to
 367  *      reset our workq delays
 368  */
 369 void edac_mc_reset_delay_period(int value)
 370 {
 371         struct mem_ctl_info *mci;
 372         struct list_head *item;
 373
 374         mutex_lock(&mem_ctls_mutex);
 375
 376         /* scan the list and turn off all workq timers, doing so under lock
 377          */
 378         list_for_each(item, &mc_devices) {
 379                 mci = list_entry(item, struct mem_ctl_info, link);
 380
 381                 if (mci->op_state == OP_RUNNING_POLL)
 382                         cancel_delayed_work(&mci->work);
 383         }
 384
 385         mutex_unlock(&mem_ctls_mutex);
 386
 387
 388         /* re-walk the list, and reset the poll delay */
 389         mutex_lock(&mem_ctls_mutex);
 390
 391         list_for_each(item, &mc_devices) {
 392                 mci = list_entry(item, struct mem_ctl_info, link);
 393
 394                 edac_mc_workq_setup(mci, (unsigned long) value);
 395         }
 396
 397         mutex_unlock(&mem_ctls_mutex);
 398 }
 399
 400
 401
 402 /* Return 0 on success, 1 on failure.
 403  * Before calling this function, caller must
 404  * assign a unique value to mci->mc_idx.
 405  *
 406  *      locking model:
 407  *
 408  *              called with the mem_ctls_mutex lock held
 409  */
 410 static int add_mc_to_global_list(struct mem_ctl_info *mci)
 411 {
 412         struct list_head *item, *insert_before;
 413         struct mem_ctl_info *p;
 414
 415         insert_before = &mc_devices;
 416
 417         p = find_mci_by_dev(mci->dev);
 418         if (unlikely(p != NULL))
 419                 goto fail0;
 420
 421         list_for_each(item, &mc_devices) {
 422                 p = list_entry(item, struct mem_ctl_info, link);
 423
 424                 if (p->mc_idx >= mci->mc_idx) {
 425                         if (unlikely(p->mc_idx == mci->mc_idx))
 426                                 goto fail1;
 427
 428                         insert_before = item;
 429                         break;
 430                 }
 431         }
 432
 433         list_add_tail_rcu(&mci->link, insert_before);
 434         atomic_inc(&edac_handlers);
 435         return 0;
 436
 437 fail0:
 438         edac_printk(KERN_WARNING, EDAC_MC,
 439                 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 440                 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 441         return 1;
 442
 443 fail1:
 444         edac_printk(KERN_WARNING, EDAC_MC,
 445                 "bug in low-level driver: attempt to assign\n"
 446                 "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 447         return 1;
 448 }
 449
 450 static void del_mc_from_global_list(struct mem_ctl_info *mci)
 451 {
 452         atomic_dec(&edac_handlers);
 453         list_del_rcu(&mci->link);
 454
 455         /* these are for safe removal of devices from global list while
 456          * NMI handlers may be traversing list
 457          */
 458         synchronize_rcu();
 459         INIT_LIST_HEAD(&mci->link);
 460 }
 461
 462 /**
 463  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 464  *
 465  * If found, return a pointer to the structure.
 466  * Else return NULL.
 467  *
 468  * Caller must hold mem_ctls_mutex.
 469  */
 470 struct mem_ctl_info *edac_mc_find(int idx)
 471 {
 472         struct list_head *item;
 473         struct mem_ctl_info *mci;
 474
 475         list_for_each(item, &mc_devices) {
 476                 mci = list_entry(item, struct mem_ctl_info, link);
 477
 478                 if (mci->mc_idx >= idx) {
 479                         if (mci->mc_idx == idx)
 480                                 return mci;
 481
 482                         break;
 483                 }
 484         }
 485
 486         return NULL;
 487 }
 488 EXPORT_SYMBOL(edac_mc_find);
 489
 490 /**
 491  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 492  *                 create sysfs entries associated with mci structure
 493  * @mci: pointer to the mci structure to be added to the list
 494  * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
 495  *
 496  * Return:
 497  *      0       Success
 498  *      !0      Failure
 499  */
 500
 501 /* FIXME - should a warning be printed if no error detection? correction? */
 502 int edac_mc_add_mc(struct mem_ctl_info *mci)
 503 {
 504         debugf0("%s()\n", __func__);
 505
 506 #ifdef CONFIG_EDAC_DEBUG
 507         if (edac_debug_level >= 3)
 508                 edac_mc_dump_mci(mci);
 509
 510         if (edac_debug_level >= 4) {
 511                 int i;
 512
 513                 for (i = 0; i < mci->nr_csrows; i++) {
 514                         int j;
 515
 516                         edac_mc_dump_csrow(&mci->csrows[i]);
 517                         for (j = 0; j < mci->csrows[i].nr_channels; j++)
 518                                 edac_mc_dump_channel(&mci->csrows[i].
 519                                                 channels[j]);
 520                 }
 521         }
 522 #endif
 523         mutex_lock(&mem_ctls_mutex);
 524
 525         if (add_mc_to_global_list(mci))
 526                 goto fail0;
 527
 528         /* set load time so that error rate can be tracked */
 529         mci->start_time = jiffies;
 530
 531         if (edac_create_sysfs_mci_device(mci)) {
 532                 edac_mc_printk(mci, KERN_WARNING,
 533                         "failed to create sysfs device\n");
 534                 goto fail1;
 535         }
 536
 537         /* If there IS a check routine, then we are running POLLED */
 538         if (mci->edac_check != NULL) {
 539                 /* This instance is NOW RUNNING */
 540                 mci->op_state = OP_RUNNING_POLL;
 541
 542                 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 543         } else {
 544                 mci->op_state = OP_RUNNING_INTERRUPT;
 545         }
 546
 547         /* Report action taken */
 548         edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 549                 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 550
 551         mutex_unlock(&mem_ctls_mutex);
 552         return 0;
 553
 554 fail1:
 555         del_mc_from_global_list(mci);
 556
 557 fail0:
 558         mutex_unlock(&mem_ctls_mutex);
 559         return 1;
 560 }
 561 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 562
 563 /**
 564  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 565  *                 remove mci structure from global list
 566  * @pdev: Pointer to 'struct device' representing mci structure to remove.
 567  *
 568  * Return pointer to removed mci structure, or NULL if device not found.
 569  */
 570 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 571 {
 572         struct mem_ctl_info *mci;
 573
 574         debugf0("%s()\n", __func__);
 575
 576         mutex_lock(&mem_ctls_mutex);
 577
 578         /* find the requested mci struct in the global list */
 579         mci = find_mci_by_dev(dev);
 580         if (mci == NULL) {
 581                 mutex_unlock(&mem_ctls_mutex);
 582                 return NULL;
 583         }
 584
 585         del_mc_from_global_list(mci);
 586         mutex_unlock(&mem_ctls_mutex);
 587
 588         /* flush workq processes */
 589         edac_mc_workq_teardown(mci);
 590
 591         /* marking MCI offline */
 592         mci->op_state = OP_OFFLINE;
 593
 594         /* remove from sysfs */
 595         edac_remove_sysfs_mci_device(mci);
 596
 597         edac_printk(KERN_INFO, EDAC_MC,
 598                 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 599                 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 600
 601         return mci;
 602 }
 603 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 604
 605 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 606                                 u32 size)
 607 {
 608         struct page *pg;
 609         void *virt_addr;
 610         unsigned long flags = 0;
 611
 612         debugf3("%s()\n", __func__);
 613
 614         /* ECC error page was not in our memory. Ignore it. */
 615         if (!pfn_valid(page))
 616                 return;
 617
 618         /* Find the actual page structure then map it and fix */
 619         pg = pfn_to_page(page);
 620
 621         if (PageHighMem(pg))
 622                 local_irq_save(flags);
 623
 624         virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
 625
 626         /* Perform architecture specific atomic scrub operation */
 627         atomic_scrub(virt_addr + offset, size);
 628
 629         /* Unmap and complete */
 630         kunmap_atomic(virt_addr, KM_BOUNCE_READ);
 631
 632         if (PageHighMem(pg))
 633                 local_irq_restore(flags);
 634 }
 635
 636 /* FIXME - should return -1 */
 637 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 638 {
 639         struct csrow_info *csrows = mci->csrows;
 640         int row, i;
 641
 642         debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 643         row = -1;
 644
 645         for (i = 0; i < mci->nr_csrows; i++) {
 646                 struct csrow_info *csrow = &csrows[i];
 647
 648                 if (csrow->nr_pages == 0)
 649                         continue;
 650
 651                 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 652                         "mask(0x%lx)\n", mci->mc_idx, __func__,
 653                         csrow->first_page, page, csrow->last_page,
 654                         csrow->page_mask);
 655
 656                 if ((page >= csrow->first_page) &&
 657                     (page <= csrow->last_page) &&
 658                     ((page & csrow->page_mask) ==
 659                      (csrow->first_page & csrow->page_mask))) {
 660                         row = i;
 661                         break;
 662                 }
 663         }
 664
 665         if (row == -1)
 666                 edac_mc_printk(mci, KERN_ERR,
 667                         "could not look up page error address %lx\n",
 668                         (unsigned long)page);
 669
 670         return row;
 671 }
 672 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 673
 674 /* FIXME - setable log (warning/emerg) levels */
 675 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
 676 void edac_mc_handle_ce(struct mem_ctl_info *mci,
 677                 unsigned long page_frame_number,
 678                 unsigned long offset_in_page, unsigned long syndrome,
 679                 int row, int channel, const char *msg)
 680 {
 681         unsigned long remapped_page;
 682
 683         debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 684
 685         /* FIXME - maybe make panic on INTERNAL ERROR an option */
 686         if (row >= mci->nr_csrows || row < 0) {
 687                 /* something is wrong */
 688                 edac_mc_printk(mci, KERN_ERR,
 689                         "INTERNAL ERROR: row out of range "
 690                         "(%d >= %d)\n", row, mci->nr_csrows);
 691                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 692                 return;
 693         }
 694
 695         if (channel >= mci->csrows[row].nr_channels || channel < 0) {
 696                 /* something is wrong */
 697                 edac_mc_printk(mci, KERN_ERR,
 698                         "INTERNAL ERROR: channel out of range "
 699                         "(%d >= %d)\n", channel,
 700                         mci->csrows[row].nr_channels);
 701                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 702                 return;
 703         }
 704
 705         if (edac_mc_get_log_ce())
 706                 /* FIXME - put in DIMM location */
 707                 edac_mc_printk(mci, KERN_WARNING,
 708                         "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
 709                         "0x%lx, row %d, channel %d, label \"%s\": %s\n",
 710                         page_frame_number, offset_in_page,
 711                         mci->csrows[row].grain, syndrome, row, channel,
 712                         mci->csrows[row].channels[channel].label, msg);
 713
 714         mci->ce_count++;
 715         mci->csrows[row].ce_count++;
 716         mci->csrows[row].channels[channel].ce_count++;
 717
 718         if (mci->scrub_mode & SCRUB_SW_SRC) {
 719                 /*
 720                  * Some MC's can remap memory so that it is still available
 721                  * at a different address when PCI devices map into memory.
 722                  * MC's that can't do this lose the memory where PCI devices
 723                  * are mapped.  This mapping is MC dependent and so we call
 724                  * back into the MC driver for it to map the MC page to
 725                  * a physical (CPU) page which can then be mapped to a virtual
 726                  * page - which can then be scrubbed.
 727                  */
 728                 remapped_page = mci->ctl_page_to_phys ?
 729                         mci->ctl_page_to_phys(mci, page_frame_number) :
 730                         page_frame_number;
 731
 732                 edac_mc_scrub_block(remapped_page, offset_in_page,
 733                                 mci->csrows[row].grain);
 734         }
 735 }
 736 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
 737
 738 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
 739 {
 740         if (edac_mc_get_log_ce())
 741                 edac_mc_printk(mci, KERN_WARNING,
 742                         "CE - no information available: %s\n", msg);
 743
 744         mci->ce_noinfo_count++;
 745         mci->ce_count++;
 746 }
 747 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
 748
 749 void edac_mc_handle_ue(struct mem_ctl_info *mci,
 750                 unsigned long page_frame_number,
 751                 unsigned long offset_in_page, int row, const char *msg)
 752 {
 753         int len = EDAC_MC_LABEL_LEN * 4;
 754         char labels[len + 1];
 755         char *pos = labels;
 756         int chan;
 757         int chars;
 758
 759         debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 760
 761         /* FIXME - maybe make panic on INTERNAL ERROR an option */
 762         if (row >= mci->nr_csrows || row < 0) {
 763                 /* something is wrong */
 764                 edac_mc_printk(mci, KERN_ERR,
 765                         "INTERNAL ERROR: row out of range "
 766                         "(%d >= %d)\n", row, mci->nr_csrows);
 767                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 768                 return;
 769         }
 770
 771         chars = snprintf(pos, len + 1, "%s",
 772                          mci->csrows[row].channels[0].label);
 773         len -= chars;
 774         pos += chars;
 775
 776         for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
 777                 chan++) {
 778                 chars = snprintf(pos, len + 1, ":%s",
 779                                  mci->csrows[row].channels[chan].label);
 780                 len -= chars;
 781                 pos += chars;
 782         }
 783
 784         if (edac_mc_get_log_ue())
 785                 edac_mc_printk(mci, KERN_EMERG,
 786                         "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
 787                         "labels \"%s\": %s\n", page_frame_number,
 788                         offset_in_page, mci->csrows[row].grain, row,
 789                         labels, msg);
 790
 791         if (edac_mc_get_panic_on_ue())
 792                 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
 793                         "row %d, labels \"%s\": %s\n", mci->mc_idx,
 794                         page_frame_number, offset_in_page,
 795                         mci->csrows[row].grain, row, labels, msg);
 796
 797         mci->ue_count++;
 798         mci->csrows[row].ue_count++;
 799 }
 800 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
 801
 802 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
 803 {
 804         if (edac_mc_get_panic_on_ue())
 805                 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
 806
 807         if (edac_mc_get_log_ue())
 808                 edac_mc_printk(mci, KERN_WARNING,
 809                         "UE - no information available: %s\n", msg);
 810         mci->ue_noinfo_count++;
 811         mci->ue_count++;
 812 }
 813 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
 814
 815 /*************************************************************
 816  * On Fully Buffered DIMM modules, this help function is
 817  * called to process UE events
 818  */
 819 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 820                         unsigned int csrow,
 821                         unsigned int channela,
 822                         unsigned int channelb, char *msg)
 823 {
 824         int len = EDAC_MC_LABEL_LEN * 4;
 825         char labels[len + 1];
 826         char *pos = labels;
 827         int chars;
 828
 829         if (csrow >= mci->nr_csrows) {
 830                 /* something is wrong */
 831                 edac_mc_printk(mci, KERN_ERR,
 832                         "INTERNAL ERROR: row out of range (%d >= %d)\n",
 833                         csrow, mci->nr_csrows);
 834                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 835                 return;
 836         }
 837
 838         if (channela >= mci->csrows[csrow].nr_channels) {
 839                 /* something is wrong */
 840                 edac_mc_printk(mci, KERN_ERR,
 841                         "INTERNAL ERROR: channel-a out of range "
 842                         "(%d >= %d)\n",
 843                         channela, mci->csrows[csrow].nr_channels);
 844                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 845                 return;
 846         }
 847
 848         if (channelb >= mci->csrows[csrow].nr_channels) {
 849                 /* something is wrong */
 850                 edac_mc_printk(mci, KERN_ERR,
 851                         "INTERNAL ERROR: channel-b out of range "
 852                         "(%d >= %d)\n",
 853                         channelb, mci->csrows[csrow].nr_channels);
 854                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 855                 return;
 856         }
 857
 858         mci->ue_count++;
 859         mci->csrows[csrow].ue_count++;
 860
 861         /* Generate the DIMM labels from the specified channels */
 862         chars = snprintf(pos, len + 1, "%s",
 863                          mci->csrows[csrow].channels[channela].label);
 864         len -= chars;
 865         pos += chars;
 866         chars = snprintf(pos, len + 1, "-%s",
 867                          mci->csrows[csrow].channels[channelb].label);
 868
 869         if (edac_mc_get_log_ue())
 870                 edac_mc_printk(mci, KERN_EMERG,
 871                         "UE row %d, channel-a= %d channel-b= %d "
 872                         "labels \"%s\": %s\n", csrow, channela, channelb,
 873                         labels, msg);
 874
 875         if (edac_mc_get_panic_on_ue())
 876                 panic("UE row %d, channel-a= %d channel-b= %d "
 877                         "labels \"%s\": %s\n", csrow, channela,
 878                         channelb, labels, msg);
 879 }
 880 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
 881
 882 /*************************************************************
 883  * On Fully Buffered DIMM modules, this help function is
 884  * called to process CE events
 885  */
 886 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 887                         unsigned int csrow, unsigned int channel, char *msg)
 888 {
 889
 890         /* Ensure boundary values */
 891         if (csrow >= mci->nr_csrows) {
 892                 /* something is wrong */
 893                 edac_mc_printk(mci, KERN_ERR,
 894                         "INTERNAL ERROR: row out of range (%d >= %d)\n",
 895                         csrow, mci->nr_csrows);
 896                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 897                 return;
 898         }
 899         if (channel >= mci->csrows[csrow].nr_channels) {
 900                 /* something is wrong */
 901                 edac_mc_printk(mci, KERN_ERR,
 902                         "INTERNAL ERROR: channel out of range (%d >= %d)\n",
 903                         channel, mci->csrows[csrow].nr_channels);
 904                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 905                 return;
 906         }
 907
 908         if (edac_mc_get_log_ce())
 909                 /* FIXME - put in DIMM location */
 910                 edac_mc_printk(mci, KERN_WARNING,
 911                         "CE row %d, channel %d, label \"%s\": %s\n",
 912                         csrow, channel,
 913                         mci->csrows[csrow].channels[channel].label, msg);
 914
 915         mci->ce_count++;
 916         mci->csrows[csrow].ce_count++;
 917         mci->csrows[csrow].channels[channel].ce_count++;
 918 }
 919 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);