]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/edac/i7core_edac.c
i7core_edac: Use a lockless ringbuffer
[mirror_ubuntu-bionic-kernel.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
d5381642 30#include <linux/edac_mce.h>
f4742949 31#include <linux/smp.h>
14d2c083 32#include <asm/processor.h>
a0c36a1f
MCC
33
34#include "edac_core.h"
35
f4742949
MCC
36/*
37 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
38 * registers start at bus 255, and are not reported by BIOS.
39 * We currently find devices with only 2 sockets. In order to support more QPI
40 * Quick Path Interconnect, just increment this number.
41 */
42#define MAX_SOCKET_BUSES 2
43
44
a0c36a1f
MCC
45/*
46 * Alter this version for the module when modifications are made
47 */
48#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
49#define EDAC_MOD_STR "i7core_edac"
50
a0c36a1f
MCC
51/*
52 * Debug macros
53 */
54#define i7core_printk(level, fmt, arg...) \
55 edac_printk(level, "i7core", fmt, ##arg)
56
57#define i7core_mc_printk(mci, level, fmt, arg...) \
58 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
59
60/*
61 * i7core Memory Controller Registers
62 */
63
e9bd2e73
MCC
64 /* OFFSETS for Device 0 Function 0 */
65
66#define MC_CFG_CONTROL 0x90
67
a0c36a1f
MCC
68 /* OFFSETS for Device 3 Function 0 */
69
70#define MC_CONTROL 0x48
71#define MC_STATUS 0x4c
72#define MC_MAX_DOD 0x64
73
442305b1
MCC
74/*
75 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
76 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
77 */
78
79#define MC_TEST_ERR_RCV1 0x60
80 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
81
82#define MC_TEST_ERR_RCV0 0x64
83 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
84 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
85
b4e8f0b6
MCC
86/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
87#define MC_COR_ECC_CNT_0 0x80
88#define MC_COR_ECC_CNT_1 0x84
89#define MC_COR_ECC_CNT_2 0x88
90#define MC_COR_ECC_CNT_3 0x8c
91#define MC_COR_ECC_CNT_4 0x90
92#define MC_COR_ECC_CNT_5 0x94
93
94#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
95#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
96
97
a0c36a1f
MCC
98 /* OFFSETS for Devices 4,5 and 6 Function 0 */
99
0b2b7b7e
MCC
100#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
101 #define THREE_DIMMS_PRESENT (1 << 24)
102 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
103 #define QUAD_RANK_PRESENT (1 << 22)
104 #define REGISTERED_DIMM (1 << 15)
105
f122a892
MCC
106#define MC_CHANNEL_MAPPER 0x60
107 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
108 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
109
0b2b7b7e
MCC
110#define MC_CHANNEL_RANK_PRESENT 0x7c
111 #define RANK_PRESENT_MASK 0xffff
112
a0c36a1f 113#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
114#define MC_CHANNEL_ERROR_MASK 0xf8
115#define MC_CHANNEL_ERROR_INJECT 0xfc
116 #define INJECT_ADDR_PARITY 0x10
117 #define INJECT_ECC 0x08
118 #define MASK_CACHELINE 0x06
119 #define MASK_FULL_CACHELINE 0x06
120 #define MASK_MSB32_CACHELINE 0x04
121 #define MASK_LSB32_CACHELINE 0x02
122 #define NO_MASK_CACHELINE 0x00
123 #define REPEAT_EN 0x01
a0c36a1f 124
0b2b7b7e 125 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 126
0b2b7b7e
MCC
127#define MC_DOD_CH_DIMM0 0x48
128#define MC_DOD_CH_DIMM1 0x4c
129#define MC_DOD_CH_DIMM2 0x50
130 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
131 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
132 #define DIMM_PRESENT_MASK (1 << 9)
133 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
134 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
135 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
136 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
137 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 138 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 139 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
140 #define MC_DOD_NUMCOL_MASK 3
141 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 142
f122a892
MCC
143#define MC_RANK_PRESENT 0x7c
144
0b2b7b7e
MCC
145#define MC_SAG_CH_0 0x80
146#define MC_SAG_CH_1 0x84
147#define MC_SAG_CH_2 0x88
148#define MC_SAG_CH_3 0x8c
149#define MC_SAG_CH_4 0x90
150#define MC_SAG_CH_5 0x94
151#define MC_SAG_CH_6 0x98
152#define MC_SAG_CH_7 0x9c
153
154#define MC_RIR_LIMIT_CH_0 0x40
155#define MC_RIR_LIMIT_CH_1 0x44
156#define MC_RIR_LIMIT_CH_2 0x48
157#define MC_RIR_LIMIT_CH_3 0x4C
158#define MC_RIR_LIMIT_CH_4 0x50
159#define MC_RIR_LIMIT_CH_5 0x54
160#define MC_RIR_LIMIT_CH_6 0x58
161#define MC_RIR_LIMIT_CH_7 0x5C
162#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
163
164#define MC_RIR_WAY_CH 0x80
165 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
166 #define MC_RIR_WAY_RANK_MASK 0x7
167
a0c36a1f
MCC
168/*
169 * i7core structs
170 */
171
172#define NUM_CHANS 3
442305b1
MCC
173#define MAX_DIMMS 3 /* Max DIMMS per channel */
174#define MAX_MCR_FUNC 4
175#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
176
177struct i7core_info {
178 u32 mc_control;
179 u32 mc_status;
180 u32 max_dod;
f122a892 181 u32 ch_map;
a0c36a1f
MCC
182};
183
194a40fe
MCC
184
185struct i7core_inject {
186 int enable;
187
188 u32 section;
189 u32 type;
190 u32 eccmask;
191
192 /* Error address mask */
193 int channel, dimm, rank, bank, page, col;
194};
195
0b2b7b7e 196struct i7core_channel {
442305b1
MCC
197 u32 ranks;
198 u32 dimms;
0b2b7b7e
MCC
199};
200
8f331907 201struct pci_id_descr {
66607706
MCC
202 int dev;
203 int func;
204 int dev_id;
8f331907
MCC
205};
206
f4742949
MCC
207struct i7core_dev {
208 struct list_head list;
209 u8 socket;
210 struct pci_dev **pdev;
211 struct mem_ctl_info *mci;
212};
213
a0c36a1f 214struct i7core_pvt {
f4742949
MCC
215 struct pci_dev *pci_noncore;
216 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
217 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
218
219 struct i7core_dev *i7core_dev;
67166af4 220
a0c36a1f 221 struct i7core_info info;
194a40fe 222 struct i7core_inject inject;
f4742949 223 struct i7core_channel channel[NUM_CHANS];
67166af4 224
f4742949 225 int channels; /* Number of active channels */
442305b1 226
f4742949
MCC
227 int ce_count_available;
228 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
229
230 /* ECC corrected errors counts per udimm */
f4742949
MCC
231 unsigned long udimm_ce_count[MAX_DIMMS];
232 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 233 /* ECC corrected errors counts per rdimm */
f4742949
MCC
234 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
235 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 236
f4742949 237 unsigned int is_registered;
14d2c083 238
d5381642
MCC
239 /* mcelog glue */
240 struct edac_mce edac_mce;
ca9c90ba
MCC
241
242 /* Fifo double buffers */
d5381642 243 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
244 struct mce mce_outentry[MCE_LOG_LEN];
245
246 /* Fifo in/out counters */
247 unsigned mce_in, mce_out;
248
249 /* Count indicator to show errors not got */
250 unsigned mce_overrun;
a0c36a1f
MCC
251};
252
66607706
MCC
253/* Static vars */
254static LIST_HEAD(i7core_edac_list);
255static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 256
8f331907
MCC
257#define PCI_DESCR(device, function, device_id) \
258 .dev = (device), \
259 .func = (function), \
260 .dev_id = (device_id)
261
66607706 262struct pci_id_descr pci_dev_descr[] = {
8f331907
MCC
263 /* Memory controller */
264 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
265 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
b990538a 266 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM */
8f331907
MCC
267 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
268
269 /* Channel 0 */
270 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
271 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
272 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
273 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
274
275 /* Channel 1 */
276 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
277 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
278 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
279 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
280
281 /* Channel 2 */
282 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
283 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
284 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
285 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
286
287 /* Generic Non-core registers */
288 /*
289 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
290 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
291 * the probing code needs to test for the other address in case of
292 * failure of this one
293 */
294 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
295
a0c36a1f 296};
66607706 297#define N_DEVS ARRAY_SIZE(pci_dev_descr)
8f331907
MCC
298
299/*
300 * pci_device_id table for which devices we are looking for
8f331907
MCC
301 */
302static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 303 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
8f331907
MCC
304 {0,} /* 0 terminated list. */
305};
306
a0c36a1f
MCC
307static struct edac_pci_ctl_info *i7core_pci;
308
309/****************************************************************************
310 Anciliary status routines
311 ****************************************************************************/
312
313 /* MC_CONTROL bits */
ef708b53
MCC
314#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
315#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
316
317 /* MC_STATUS bits */
61053fde 318#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 319#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
320
321 /* MC_MAX_DOD read functions */
854d3349 322static inline int numdimms(u32 dimms)
a0c36a1f 323{
854d3349 324 return (dimms & 0x3) + 1;
a0c36a1f
MCC
325}
326
854d3349 327static inline int numrank(u32 rank)
a0c36a1f
MCC
328{
329 static int ranks[4] = { 1, 2, 4, -EINVAL };
330
854d3349 331 return ranks[rank & 0x3];
a0c36a1f
MCC
332}
333
854d3349 334static inline int numbank(u32 bank)
a0c36a1f
MCC
335{
336 static int banks[4] = { 4, 8, 16, -EINVAL };
337
854d3349 338 return banks[bank & 0x3];
a0c36a1f
MCC
339}
340
854d3349 341static inline int numrow(u32 row)
a0c36a1f
MCC
342{
343 static int rows[8] = {
344 1 << 12, 1 << 13, 1 << 14, 1 << 15,
345 1 << 16, -EINVAL, -EINVAL, -EINVAL,
346 };
347
854d3349 348 return rows[row & 0x7];
a0c36a1f
MCC
349}
350
854d3349 351static inline int numcol(u32 col)
a0c36a1f
MCC
352{
353 static int cols[8] = {
354 1 << 10, 1 << 11, 1 << 12, -EINVAL,
355 };
854d3349 356 return cols[col & 0x3];
a0c36a1f
MCC
357}
358
f4742949 359static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
360{
361 struct i7core_dev *i7core_dev;
362
363 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
364 if (i7core_dev->socket == socket)
365 return i7core_dev;
366 }
367
368 return NULL;
369}
370
a0c36a1f
MCC
371/****************************************************************************
372 Memory check routines
373 ****************************************************************************/
67166af4
MCC
374static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
375 unsigned func)
ef708b53 376{
66607706 377 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 378 int i;
ef708b53 379
66607706
MCC
380 if (!i7core_dev)
381 return NULL;
382
ef708b53 383 for (i = 0; i < N_DEVS; i++) {
66607706 384 if (!i7core_dev->pdev[i])
ef708b53
MCC
385 continue;
386
66607706
MCC
387 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
388 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
389 return i7core_dev->pdev[i];
ef708b53
MCC
390 }
391 }
392
eb94fc40
MCC
393 return NULL;
394}
395
ec6df24c
MCC
396/**
397 * i7core_get_active_channels() - gets the number of channels and csrows
398 * @socket: Quick Path Interconnect socket
399 * @channels: Number of channels that will be returned
400 * @csrows: Number of csrows found
401 *
402 * Since EDAC core needs to know in advance the number of available channels
403 * and csrows, in order to allocate memory for csrows/channels, it is needed
404 * to run two similar steps. At the first step, implemented on this function,
405 * it checks the number of csrows/channels present at one socket.
406 * this is used in order to properly allocate the size of mci components.
407 *
408 * It should be noticed that none of the current available datasheets explain
409 * or even mention how csrows are seen by the memory controller. So, we need
410 * to add a fake description for csrows.
411 * So, this driver is attributing one DIMM memory for one csrow.
412 */
67166af4
MCC
413static int i7core_get_active_channels(u8 socket, unsigned *channels,
414 unsigned *csrows)
eb94fc40
MCC
415{
416 struct pci_dev *pdev = NULL;
417 int i, j;
418 u32 status, control;
419
420 *channels = 0;
421 *csrows = 0;
422
67166af4 423 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 424 if (!pdev) {
67166af4
MCC
425 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
426 socket);
ef708b53 427 return -ENODEV;
b7c76151 428 }
ef708b53
MCC
429
430 /* Device 3 function 0 reads */
431 pci_read_config_dword(pdev, MC_STATUS, &status);
432 pci_read_config_dword(pdev, MC_CONTROL, &control);
433
434 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 435 u32 dimm_dod[3];
ef708b53
MCC
436 /* Check if the channel is active */
437 if (!(control & (1 << (8 + i))))
438 continue;
439
440 /* Check if the channel is disabled */
41fcb7fe 441 if (status & (1 << i))
ef708b53 442 continue;
ef708b53 443
67166af4 444 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 445 if (!pdev) {
67166af4
MCC
446 i7core_printk(KERN_ERR, "Couldn't find socket %d "
447 "fn %d.%d!!!\n",
448 socket, i + 4, 1);
eb94fc40
MCC
449 return -ENODEV;
450 }
451 /* Devices 4-6 function 1 */
452 pci_read_config_dword(pdev,
453 MC_DOD_CH_DIMM0, &dimm_dod[0]);
454 pci_read_config_dword(pdev,
455 MC_DOD_CH_DIMM1, &dimm_dod[1]);
456 pci_read_config_dword(pdev,
457 MC_DOD_CH_DIMM2, &dimm_dod[2]);
458
ef708b53 459 (*channels)++;
eb94fc40
MCC
460
461 for (j = 0; j < 3; j++) {
462 if (!DIMM_PRESENT(dimm_dod[j]))
463 continue;
464 (*csrows)++;
465 }
ef708b53
MCC
466 }
467
c77720b9 468 debugf0("Number of active channels on socket %d: %d\n",
67166af4 469 socket, *channels);
1c6fed80 470
ef708b53
MCC
471 return 0;
472}
473
f4742949 474static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
475{
476 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 477 struct csrow_info *csr;
854d3349 478 struct pci_dev *pdev;
ba6c5c62 479 int i, j;
5566cb7c 480 unsigned long last_page = 0;
1c6fed80 481 enum edac_type mode;
854d3349 482 enum mem_type mtype;
a0c36a1f 483
854d3349 484 /* Get data from the MC register, function 0 */
f4742949 485 pdev = pvt->pci_mcr[0];
7dd6953c 486 if (!pdev)
8f331907
MCC
487 return -ENODEV;
488
f122a892 489 /* Device 3 function 0 reads */
7dd6953c
MCC
490 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
491 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
492 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
493 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 494
17cb7b0c 495 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 496 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 497 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 498
1c6fed80 499 if (ECC_ENABLED(pvt)) {
41fcb7fe 500 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
501 if (ECCx8(pvt))
502 mode = EDAC_S8ECD8ED;
503 else
504 mode = EDAC_S4ECD4ED;
505 } else {
a0c36a1f 506 debugf0("ECC disabled\n");
1c6fed80
MCC
507 mode = EDAC_NONE;
508 }
a0c36a1f
MCC
509
510 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
511 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
512 "x%x x 0x%x\n",
854d3349
MCC
513 numdimms(pvt->info.max_dod),
514 numrank(pvt->info.max_dod >> 2),
276b824c 515 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
516 numrow(pvt->info.max_dod >> 6),
517 numcol(pvt->info.max_dod >> 9));
a0c36a1f 518
0b2b7b7e 519 for (i = 0; i < NUM_CHANS; i++) {
854d3349 520 u32 data, dimm_dod[3], value[8];
0b2b7b7e
MCC
521
522 if (!CH_ACTIVE(pvt, i)) {
523 debugf0("Channel %i is not active\n", i);
524 continue;
525 }
526 if (CH_DISABLED(pvt, i)) {
527 debugf0("Channel %i is disabled\n", i);
528 continue;
529 }
530
f122a892 531 /* Devices 4-6 function 0 */
f4742949 532 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
533 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
534
f4742949 535 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 536 4 : 2;
0b2b7b7e 537
854d3349
MCC
538 if (data & REGISTERED_DIMM)
539 mtype = MEM_RDDR3;
14d2c083 540 else
854d3349
MCC
541 mtype = MEM_DDR3;
542#if 0
0b2b7b7e
MCC
543 if (data & THREE_DIMMS_PRESENT)
544 pvt->channel[i].dimms = 3;
545 else if (data & SINGLE_QUAD_RANK_PRESENT)
546 pvt->channel[i].dimms = 1;
547 else
548 pvt->channel[i].dimms = 2;
854d3349
MCC
549#endif
550
551 /* Devices 4-6 function 1 */
f4742949 552 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 553 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 554 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 555 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 556 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 557 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 558
1c6fed80 559 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 560 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
561 i,
562 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
563 data,
f4742949 564 pvt->channel[i].ranks,
41fcb7fe 565 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
566
567 for (j = 0; j < 3; j++) {
568 u32 banks, ranks, rows, cols;
5566cb7c 569 u32 size, npages;
854d3349
MCC
570
571 if (!DIMM_PRESENT(dimm_dod[j]))
572 continue;
573
574 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
575 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
576 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
577 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
578
5566cb7c
MCC
579 /* DDR3 has 8 I/O banks */
580 size = (rows * cols * banks * ranks) >> (20 - 3);
581
f4742949 582 pvt->channel[i].dimms++;
854d3349 583
17cb7b0c
MCC
584 debugf0("\tdimm %d %d Mb offset: %x, "
585 "bank: %d, rank: %d, row: %#x, col: %#x\n",
586 j, size,
854d3349
MCC
587 RANKOFFSET(dimm_dod[j]),
588 banks, ranks, rows, cols);
589
eb94fc40
MCC
590#if PAGE_SHIFT > 20
591 npages = size >> (PAGE_SHIFT - 20);
592#else
593 npages = size << (20 - PAGE_SHIFT);
594#endif
5566cb7c 595
ba6c5c62 596 csr = &mci->csrows[*csrow];
5566cb7c
MCC
597 csr->first_page = last_page + 1;
598 last_page += npages;
599 csr->last_page = last_page;
600 csr->nr_pages = npages;
601
854d3349 602 csr->page_mask = 0;
eb94fc40 603 csr->grain = 8;
ba6c5c62 604 csr->csrow_idx = *csrow;
eb94fc40
MCC
605 csr->nr_channels = 1;
606
607 csr->channels[0].chan_idx = i;
608 csr->channels[0].ce_count = 0;
854d3349 609
f4742949 610 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 611
854d3349
MCC
612 switch (banks) {
613 case 4:
614 csr->dtype = DEV_X4;
615 break;
616 case 8:
617 csr->dtype = DEV_X8;
618 break;
619 case 16:
620 csr->dtype = DEV_X16;
621 break;
622 default:
623 csr->dtype = DEV_UNKNOWN;
624 }
625
626 csr->edac_mode = mode;
627 csr->mtype = mtype;
628
ba6c5c62 629 (*csrow)++;
854d3349 630 }
1c6fed80 631
854d3349
MCC
632 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
633 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
634 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
635 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
636 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
637 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
638 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
639 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 640 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 641 for (j = 0; j < 8; j++)
17cb7b0c 642 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
643 (value[j] >> 27) & 0x1,
644 (value[j] >> 24) & 0x7,
645 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
646 }
647
a0c36a1f
MCC
648 return 0;
649}
650
194a40fe
MCC
651/****************************************************************************
652 Error insertion routines
653 ****************************************************************************/
654
655/* The i7core has independent error injection features per channel.
656 However, to have a simpler code, we don't allow enabling error injection
657 on more than one channel.
658 Also, since a change at an inject parameter will be applied only at enable,
659 we're disabling error injection on all write calls to the sysfs nodes that
660 controls the error code injection.
661 */
8f331907 662static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
663{
664 struct i7core_pvt *pvt = mci->pvt_info;
665
666 pvt->inject.enable = 0;
667
f4742949 668 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
669 return -ENODEV;
670
f4742949 671 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 672 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
673
674 return 0;
194a40fe
MCC
675}
676
677/*
678 * i7core inject inject.section
679 *
680 * accept and store error injection inject.section value
681 * bit 0 - refers to the lower 32-byte half cacheline
682 * bit 1 - refers to the upper 32-byte half cacheline
683 */
684static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
685 const char *data, size_t count)
686{
687 struct i7core_pvt *pvt = mci->pvt_info;
688 unsigned long value;
689 int rc;
690
691 if (pvt->inject.enable)
41fcb7fe 692 disable_inject(mci);
194a40fe
MCC
693
694 rc = strict_strtoul(data, 10, &value);
695 if ((rc < 0) || (value > 3))
2068def5 696 return -EIO;
194a40fe
MCC
697
698 pvt->inject.section = (u32) value;
699 return count;
700}
701
702static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
703 char *data)
704{
705 struct i7core_pvt *pvt = mci->pvt_info;
706 return sprintf(data, "0x%08x\n", pvt->inject.section);
707}
708
709/*
710 * i7core inject.type
711 *
712 * accept and store error injection inject.section value
713 * bit 0 - repeat enable - Enable error repetition
714 * bit 1 - inject ECC error
715 * bit 2 - inject parity error
716 */
717static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
718 const char *data, size_t count)
719{
720 struct i7core_pvt *pvt = mci->pvt_info;
721 unsigned long value;
722 int rc;
723
724 if (pvt->inject.enable)
41fcb7fe 725 disable_inject(mci);
194a40fe
MCC
726
727 rc = strict_strtoul(data, 10, &value);
728 if ((rc < 0) || (value > 7))
2068def5 729 return -EIO;
194a40fe
MCC
730
731 pvt->inject.type = (u32) value;
732 return count;
733}
734
735static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
736 char *data)
737{
738 struct i7core_pvt *pvt = mci->pvt_info;
739 return sprintf(data, "0x%08x\n", pvt->inject.type);
740}
741
742/*
743 * i7core_inject_inject.eccmask_store
744 *
745 * The type of error (UE/CE) will depend on the inject.eccmask value:
746 * Any bits set to a 1 will flip the corresponding ECC bit
747 * Correctable errors can be injected by flipping 1 bit or the bits within
748 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
749 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
750 * uncorrectable error to be injected.
751 */
752static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
753 const char *data, size_t count)
754{
755 struct i7core_pvt *pvt = mci->pvt_info;
756 unsigned long value;
757 int rc;
758
759 if (pvt->inject.enable)
41fcb7fe 760 disable_inject(mci);
194a40fe
MCC
761
762 rc = strict_strtoul(data, 10, &value);
763 if (rc < 0)
2068def5 764 return -EIO;
194a40fe
MCC
765
766 pvt->inject.eccmask = (u32) value;
767 return count;
768}
769
770static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
771 char *data)
772{
773 struct i7core_pvt *pvt = mci->pvt_info;
774 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
775}
776
777/*
778 * i7core_addrmatch
779 *
780 * The type of error (UE/CE) will depend on the inject.eccmask value:
781 * Any bits set to a 1 will flip the corresponding ECC bit
782 * Correctable errors can be injected by flipping 1 bit or the bits within
783 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
784 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
785 * uncorrectable error to be injected.
786 */
194a40fe 787
a5538e53
MCC
788#define DECLARE_ADDR_MATCH(param, limit) \
789static ssize_t i7core_inject_store_##param( \
790 struct mem_ctl_info *mci, \
791 const char *data, size_t count) \
792{ \
cc301b3a 793 struct i7core_pvt *pvt; \
a5538e53
MCC
794 long value; \
795 int rc; \
796 \
cc301b3a
MCC
797 debugf1("%s()\n", __func__); \
798 pvt = mci->pvt_info; \
799 \
a5538e53
MCC
800 if (pvt->inject.enable) \
801 disable_inject(mci); \
802 \
803 if (!strcasecmp(data, "any")) \
804 value = -1; \
805 else { \
806 rc = strict_strtoul(data, 10, &value); \
807 if ((rc < 0) || (value >= limit)) \
808 return -EIO; \
809 } \
810 \
811 pvt->inject.param = value; \
812 \
813 return count; \
814} \
815 \
816static ssize_t i7core_inject_show_##param( \
817 struct mem_ctl_info *mci, \
818 char *data) \
819{ \
cc301b3a
MCC
820 struct i7core_pvt *pvt; \
821 \
822 pvt = mci->pvt_info; \
823 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
824 if (pvt->inject.param < 0) \
825 return sprintf(data, "any\n"); \
826 else \
827 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
828}
829
a5538e53
MCC
830#define ATTR_ADDR_MATCH(param) \
831 { \
832 .attr = { \
833 .name = #param, \
834 .mode = (S_IRUGO | S_IWUSR) \
835 }, \
836 .show = i7core_inject_show_##param, \
837 .store = i7core_inject_store_##param, \
838 }
194a40fe 839
a5538e53
MCC
840DECLARE_ADDR_MATCH(channel, 3);
841DECLARE_ADDR_MATCH(dimm, 3);
842DECLARE_ADDR_MATCH(rank, 4);
843DECLARE_ADDR_MATCH(bank, 32);
844DECLARE_ADDR_MATCH(page, 0x10000);
845DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 846
276b824c
MCC
847static int write_and_test(struct pci_dev *dev, int where, u32 val)
848{
849 u32 read;
850 int count;
851
4157d9f5
MCC
852 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
853 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
854 where, val);
855
276b824c
MCC
856 for (count = 0; count < 10; count++) {
857 if (count)
b990538a 858 msleep(100);
276b824c
MCC
859 pci_write_config_dword(dev, where, val);
860 pci_read_config_dword(dev, where, &read);
861
862 if (read == val)
863 return 0;
864 }
865
4157d9f5
MCC
866 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
867 "write=%08x. Read=%08x\n",
868 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
869 where, val, read);
276b824c
MCC
870
871 return -EINVAL;
872}
873
194a40fe
MCC
874/*
875 * This routine prepares the Memory Controller for error injection.
876 * The error will be injected when some process tries to write to the
877 * memory that matches the given criteria.
878 * The criteria can be set in terms of a mask where dimm, rank, bank, page
879 * and col can be specified.
880 * A -1 value for any of the mask items will make the MCU to ignore
881 * that matching criteria for error injection.
882 *
883 * It should be noticed that the error will only happen after a write operation
884 * on a memory that matches the condition. if REPEAT_EN is not enabled at
885 * inject mask, then it will produce just one error. Otherwise, it will repeat
886 * until the injectmask would be cleaned.
887 *
888 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
889 * is reliable enough to check if the MC is using the
890 * three channels. However, this is not clear at the datasheet.
891 */
892static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
893 const char *data, size_t count)
894{
895 struct i7core_pvt *pvt = mci->pvt_info;
896 u32 injectmask;
897 u64 mask = 0;
898 int rc;
899 long enable;
900
f4742949 901 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
902 return 0;
903
194a40fe
MCC
904 rc = strict_strtoul(data, 10, &enable);
905 if ((rc < 0))
906 return 0;
907
908 if (enable) {
909 pvt->inject.enable = 1;
910 } else {
911 disable_inject(mci);
912 return count;
913 }
914
915 /* Sets pvt->inject.dimm mask */
916 if (pvt->inject.dimm < 0)
7b029d03 917 mask |= 1L << 41;
194a40fe 918 else {
f4742949 919 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 920 mask |= (pvt->inject.dimm & 0x3L) << 35;
194a40fe 921 else
7b029d03 922 mask |= (pvt->inject.dimm & 0x1L) << 36;
194a40fe
MCC
923 }
924
925 /* Sets pvt->inject.rank mask */
926 if (pvt->inject.rank < 0)
7b029d03 927 mask |= 1L << 40;
194a40fe 928 else {
f4742949 929 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 930 mask |= (pvt->inject.rank & 0x1L) << 34;
194a40fe 931 else
7b029d03 932 mask |= (pvt->inject.rank & 0x3L) << 34;
194a40fe
MCC
933 }
934
935 /* Sets pvt->inject.bank mask */
936 if (pvt->inject.bank < 0)
7b029d03 937 mask |= 1L << 39;
194a40fe 938 else
7b029d03 939 mask |= (pvt->inject.bank & 0x15L) << 30;
194a40fe
MCC
940
941 /* Sets pvt->inject.page mask */
942 if (pvt->inject.page < 0)
7b029d03 943 mask |= 1L << 38;
194a40fe 944 else
7b029d03 945 mask |= (pvt->inject.page & 0xffffL) << 14;
194a40fe
MCC
946
947 /* Sets pvt->inject.column mask */
948 if (pvt->inject.col < 0)
7b029d03 949 mask |= 1L << 37;
194a40fe 950 else
7b029d03 951 mask |= (pvt->inject.col & 0x3fffL);
194a40fe 952
276b824c
MCC
953 /*
954 * bit 0: REPEAT_EN
955 * bits 1-2: MASK_HALF_CACHELINE
956 * bit 3: INJECT_ECC
957 * bit 4: INJECT_ADDR_PARITY
958 */
959
960 injectmask = (pvt->inject.type & 1) |
961 (pvt->inject.section & 0x3) << 1 |
962 (pvt->inject.type & 0x6) << (3 - 1);
963
964 /* Unlock writes to registers - this register is write only */
f4742949 965 pci_write_config_dword(pvt->pci_noncore,
67166af4 966 MC_CFG_CONTROL, 0x2);
e9bd2e73 967
f4742949 968 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 969 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 970 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 971 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 972
f4742949 973 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
974 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
975
f4742949 976 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 977 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 978
194a40fe 979 /*
276b824c
MCC
980 * This is something undocumented, based on my tests
981 * Without writing 8 to this register, errors aren't injected. Not sure
982 * why.
194a40fe 983 */
f4742949 984 pci_write_config_dword(pvt->pci_noncore,
276b824c 985 MC_CFG_CONTROL, 8);
194a40fe 986
41fcb7fe
MCC
987 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
988 " inject 0x%08x\n",
194a40fe
MCC
989 mask, pvt->inject.eccmask, injectmask);
990
7b029d03 991
194a40fe
MCC
992 return count;
993}
994
995static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
996 char *data)
997{
998 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
999 u32 injectmask;
1000
f4742949 1001 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1002 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1003
1004 debugf0("Inject error read: 0x%018x\n", injectmask);
1005
1006 if (injectmask & 0x0c)
1007 pvt->inject.enable = 1;
1008
194a40fe
MCC
1009 return sprintf(data, "%d\n", pvt->inject.enable);
1010}
1011
f338d736
MCC
1012#define DECLARE_COUNTER(param) \
1013static ssize_t i7core_show_counter_##param( \
1014 struct mem_ctl_info *mci, \
1015 char *data) \
1016{ \
1017 struct i7core_pvt *pvt = mci->pvt_info; \
1018 \
1019 debugf1("%s() \n", __func__); \
1020 if (!pvt->ce_count_available || (pvt->is_registered)) \
1021 return sprintf(data, "data unavailable\n"); \
1022 return sprintf(data, "%lu\n", \
1023 pvt->udimm_ce_count[param]); \
1024}
442305b1 1025
f338d736
MCC
1026#define ATTR_COUNTER(param) \
1027 { \
1028 .attr = { \
1029 .name = __stringify(udimm##param), \
1030 .mode = (S_IRUGO | S_IWUSR) \
1031 }, \
1032 .show = i7core_show_counter_##param \
d88b8507 1033 }
442305b1 1034
f338d736
MCC
1035DECLARE_COUNTER(0);
1036DECLARE_COUNTER(1);
1037DECLARE_COUNTER(2);
442305b1 1038
194a40fe
MCC
1039/*
1040 * Sysfs struct
1041 */
a5538e53
MCC
1042
1043
1044static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1045 ATTR_ADDR_MATCH(channel),
1046 ATTR_ADDR_MATCH(dimm),
1047 ATTR_ADDR_MATCH(rank),
1048 ATTR_ADDR_MATCH(bank),
1049 ATTR_ADDR_MATCH(page),
1050 ATTR_ADDR_MATCH(col),
1051 { .attr = { .name = NULL } }
1052};
1053
a5538e53
MCC
1054static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1055 .name = "inject_addrmatch",
1056 .mcidev_attr = i7core_addrmatch_attrs,
1057};
1058
f338d736
MCC
1059static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1060 ATTR_COUNTER(0),
1061 ATTR_COUNTER(1),
1062 ATTR_COUNTER(2),
1063};
1064
1065static struct mcidev_sysfs_group i7core_udimm_counters = {
1066 .name = "all_channel_counts",
1067 .mcidev_attr = i7core_udimm_counters_attrs,
1068};
1069
a5538e53 1070static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
194a40fe
MCC
1071 {
1072 .attr = {
1073 .name = "inject_section",
1074 .mode = (S_IRUGO | S_IWUSR)
1075 },
1076 .show = i7core_inject_section_show,
1077 .store = i7core_inject_section_store,
1078 }, {
1079 .attr = {
1080 .name = "inject_type",
1081 .mode = (S_IRUGO | S_IWUSR)
1082 },
1083 .show = i7core_inject_type_show,
1084 .store = i7core_inject_type_store,
1085 }, {
1086 .attr = {
1087 .name = "inject_eccmask",
1088 .mode = (S_IRUGO | S_IWUSR)
1089 },
1090 .show = i7core_inject_eccmask_show,
1091 .store = i7core_inject_eccmask_store,
1092 }, {
a5538e53 1093 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1094 }, {
1095 .attr = {
1096 .name = "inject_enable",
1097 .mode = (S_IRUGO | S_IWUSR)
1098 },
1099 .show = i7core_inject_enable_show,
1100 .store = i7core_inject_enable_store,
1101 },
f338d736 1102 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
42538680 1103 { .attr = { .name = NULL } }
194a40fe
MCC
1104};
1105
a0c36a1f
MCC
1106/****************************************************************************
1107 Device initialization routines: put/get, init/exit
1108 ****************************************************************************/
1109
1110/*
1111 * i7core_put_devices 'put' all the devices that we have
1112 * reserved via 'get'
1113 */
13d6e9b6 1114static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1115{
13d6e9b6 1116 int i;
a0c36a1f 1117
22e6bcbd
MCC
1118 debugf0(__FILE__ ": %s()\n", __func__);
1119 for (i = 0; i < N_DEVS; i++) {
1120 struct pci_dev *pdev = i7core_dev->pdev[i];
1121 if (!pdev)
1122 continue;
1123 debugf0("Removing dev %02x:%02x.%d\n",
1124 pdev->bus->number,
1125 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1126 pci_dev_put(pdev);
1127 }
13d6e9b6 1128 kfree(i7core_dev->pdev);
22e6bcbd 1129 list_del(&i7core_dev->list);
13d6e9b6
MCC
1130 kfree(i7core_dev);
1131}
66607706 1132
13d6e9b6
MCC
1133static void i7core_put_all_devices(void)
1134{
42538680 1135 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1136
42538680 1137 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1138 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1139}
1140
bc2d7245
KM
1141static void i7core_xeon_pci_fixup(void)
1142{
1143 struct pci_dev *pdev = NULL;
1144 int i;
1145 /*
1146 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1147 * aren't announced by acpi. So, we need to use a legacy scan probing
1148 * to detect them
1149 */
1150 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1151 pci_dev_descr[0].dev_id, NULL);
bc2d7245 1152 if (unlikely(!pdev)) {
f4742949 1153 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1154 pcibios_scan_specific_bus(255-i);
1155 }
1156}
1157
a0c36a1f
MCC
1158/*
1159 * i7core_get_devices Find and perform 'get' operation on the MCH's
1160 * device/functions we want to reference for this driver
1161 *
1162 * Need to 'get' device 16 func 1 and func 2
1163 */
c77720b9 1164int i7core_get_onedevice(struct pci_dev **prev, int devno)
a0c36a1f 1165{
66607706
MCC
1166 struct i7core_dev *i7core_dev;
1167
8f331907 1168 struct pci_dev *pdev = NULL;
67166af4
MCC
1169 u8 bus = 0;
1170 u8 socket = 0;
a0c36a1f 1171
c77720b9 1172 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1173 pci_dev_descr[devno].dev_id, *prev);
c77720b9 1174
c77720b9
MCC
1175 /*
1176 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1177 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1178 * to probe for the alternate address in case of failure
1179 */
66607706 1180 if (pci_dev_descr[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
c77720b9
MCC
1181 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1182 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
d1fd4fb6 1183
c77720b9
MCC
1184 if (!pdev) {
1185 if (*prev) {
1186 *prev = pdev;
1187 return 0;
d1fd4fb6
MCC
1188 }
1189
310cbb72 1190 /*
c77720b9
MCC
1191 * Dev 3 function 2 only exists on chips with RDIMMs
1192 * so, it is ok to not found it
310cbb72 1193 */
66607706 1194 if ((pci_dev_descr[devno].dev == 3) && (pci_dev_descr[devno].func == 2)) {
c77720b9
MCC
1195 *prev = pdev;
1196 return 0;
1197 }
310cbb72 1198
c77720b9
MCC
1199 i7core_printk(KERN_ERR,
1200 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1201 pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1202 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
67166af4 1203
c77720b9
MCC
1204 /* End of list, leave */
1205 return -ENODEV;
1206 }
1207 bus = pdev->bus->number;
67166af4 1208
c77720b9
MCC
1209 if (bus == 0x3f)
1210 socket = 0;
1211 else
1212 socket = 255 - bus;
1213
66607706
MCC
1214 i7core_dev = get_i7core_dev(socket);
1215 if (!i7core_dev) {
1216 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1217 if (!i7core_dev)
1218 return -ENOMEM;
1219 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * N_DEVS,
1220 GFP_KERNEL);
1221 if (!i7core_dev->pdev)
1222 return -ENOMEM;
1223 i7core_dev->socket = socket;
1224 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1225 }
67166af4 1226
66607706 1227 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1228 i7core_printk(KERN_ERR,
1229 "Duplicated device for "
1230 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1231 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1232 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1233 pci_dev_put(pdev);
1234 return -ENODEV;
1235 }
67166af4 1236
66607706 1237 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1238
1239 /* Sanity check */
66607706
MCC
1240 if (unlikely(PCI_SLOT(pdev->devfn) != pci_dev_descr[devno].dev ||
1241 PCI_FUNC(pdev->devfn) != pci_dev_descr[devno].func)) {
c77720b9
MCC
1242 i7core_printk(KERN_ERR,
1243 "Device PCI ID %04x:%04x "
1244 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
66607706 1245 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id,
c77720b9 1246 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
66607706 1247 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func);
c77720b9
MCC
1248 return -ENODEV;
1249 }
ef708b53 1250
c77720b9
MCC
1251 /* Be sure that the device is enabled */
1252 if (unlikely(pci_enable_device(pdev) < 0)) {
1253 i7core_printk(KERN_ERR,
1254 "Couldn't enable "
1255 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1256 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1257 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1258 return -ENODEV;
1259 }
ef708b53 1260
d4c27795
MCC
1261 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1262 socket, bus, pci_dev_descr[devno].dev,
1263 pci_dev_descr[devno].func,
1264 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
8f331907 1265
c77720b9 1266 *prev = pdev;
ef708b53 1267
c77720b9
MCC
1268 return 0;
1269}
a0c36a1f 1270
f4742949 1271static int i7core_get_devices(void)
c77720b9
MCC
1272{
1273 int i;
1274 struct pci_dev *pdev = NULL;
ef708b53 1275
c77720b9
MCC
1276 for (i = 0; i < N_DEVS; i++) {
1277 pdev = NULL;
1278 do {
1279 if (i7core_get_onedevice(&pdev, i) < 0) {
13d6e9b6 1280 i7core_put_all_devices();
c77720b9
MCC
1281 return -ENODEV;
1282 }
1283 } while (pdev);
1284 }
66607706 1285
ef708b53 1286 return 0;
ef708b53
MCC
1287}
1288
f4742949
MCC
1289static int mci_bind_devs(struct mem_ctl_info *mci,
1290 struct i7core_dev *i7core_dev)
ef708b53
MCC
1291{
1292 struct i7core_pvt *pvt = mci->pvt_info;
1293 struct pci_dev *pdev;
f4742949 1294 int i, func, slot;
ef708b53 1295
f4742949
MCC
1296 /* Associates i7core_dev and mci for future usage */
1297 pvt->i7core_dev = i7core_dev;
1298 i7core_dev->mci = mci;
66607706 1299
f4742949
MCC
1300 pvt->is_registered = 0;
1301 for (i = 0; i < N_DEVS; i++) {
1302 pdev = i7core_dev->pdev[i];
1303 if (!pdev)
66607706
MCC
1304 continue;
1305
f4742949
MCC
1306 func = PCI_FUNC(pdev->devfn);
1307 slot = PCI_SLOT(pdev->devfn);
1308 if (slot == 3) {
1309 if (unlikely(func > MAX_MCR_FUNC))
1310 goto error;
1311 pvt->pci_mcr[func] = pdev;
1312 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1313 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1314 goto error;
f4742949
MCC
1315 pvt->pci_ch[slot - 4][func] = pdev;
1316 } else if (!slot && !func)
1317 pvt->pci_noncore = pdev;
1318 else
1319 goto error;
ef708b53 1320
f4742949
MCC
1321 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1322 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1323 pdev, i7core_dev->socket);
14d2c083 1324
f4742949
MCC
1325 if (PCI_SLOT(pdev->devfn) == 3 &&
1326 PCI_FUNC(pdev->devfn) == 2)
1327 pvt->is_registered = 1;
a0c36a1f 1328 }
e9bd2e73 1329
f338d736
MCC
1330 /*
1331 * Add extra nodes to count errors on udimm
1332 * For registered memory, this is not needed, since the counters
1333 * are already displayed at the standard locations
1334 */
1335 if (!pvt->is_registered)
1336 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1337 &i7core_udimm_counters;
1338
a0c36a1f 1339 return 0;
ef708b53
MCC
1340
1341error:
1342 i7core_printk(KERN_ERR, "Device %d, function %d "
1343 "is out of the expected range\n",
1344 slot, func);
1345 return -EINVAL;
a0c36a1f
MCC
1346}
1347
442305b1
MCC
1348/****************************************************************************
1349 Error check routines
1350 ****************************************************************************/
f4742949 1351static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1352 int chan, int dimm, int add)
1353{
1354 char *msg;
1355 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1356 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1357
1358 for (i = 0; i < add; i++) {
1359 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1360 "(Socket=%d channel=%d dimm=%d)",
1361 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1362
1363 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1364 kfree (msg);
1365 }
1366}
1367
1368static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1369 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1370{
1371 struct i7core_pvt *pvt = mci->pvt_info;
1372 int add0 = 0, add1 = 0, add2 = 0;
1373 /* Updates CE counters if it is not the first time here */
f4742949 1374 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1375 /* Updates CE counters */
1376
f4742949
MCC
1377 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1378 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1379 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1380
1381 if (add2 < 0)
1382 add2 += 0x7fff;
f4742949 1383 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1384
1385 if (add1 < 0)
1386 add1 += 0x7fff;
f4742949 1387 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1388
1389 if (add0 < 0)
1390 add0 += 0x7fff;
f4742949 1391 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1392 } else
f4742949 1393 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1394
1395 /* Store the new values */
f4742949
MCC
1396 pvt->rdimm_last_ce_count[chan][2] = new2;
1397 pvt->rdimm_last_ce_count[chan][1] = new1;
1398 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1399
1400 /*updated the edac core */
1401 if (add0 != 0)
f4742949 1402 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1403 if (add1 != 0)
f4742949 1404 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1405 if (add2 != 0)
f4742949 1406 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1407
1408}
1409
f4742949 1410static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1411{
1412 struct i7core_pvt *pvt = mci->pvt_info;
1413 u32 rcv[3][2];
1414 int i, new0, new1, new2;
1415
1416 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1417 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1418 &rcv[0][0]);
f4742949 1419 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1420 &rcv[0][1]);
f4742949 1421 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1422 &rcv[1][0]);
f4742949 1423 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1424 &rcv[1][1]);
f4742949 1425 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1426 &rcv[2][0]);
f4742949 1427 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1428 &rcv[2][1]);
1429 for (i = 0 ; i < 3; i++) {
1430 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1431 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1432 /*if the channel has 3 dimms*/
f4742949 1433 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1434 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1435 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1436 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1437 } else {
1438 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1439 DIMM_BOT_COR_ERR(rcv[i][0]);
1440 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1441 DIMM_BOT_COR_ERR(rcv[i][1]);
1442 new2 = 0;
1443 }
1444
f4742949 1445 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1446 }
1447}
442305b1
MCC
1448
1449/* This function is based on the device 3 function 4 registers as described on:
1450 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1451 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1452 * also available at:
1453 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1454 */
f4742949 1455static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1456{
1457 struct i7core_pvt *pvt = mci->pvt_info;
1458 u32 rcv1, rcv0;
1459 int new0, new1, new2;
1460
f4742949 1461 if (!pvt->pci_mcr[4]) {
b990538a 1462 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1463 return;
1464 }
1465
b4e8f0b6 1466 /* Corrected test errors */
f4742949
MCC
1467 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1468 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1469
1470 /* Store the new values */
1471 new2 = DIMM2_COR_ERR(rcv1);
1472 new1 = DIMM1_COR_ERR(rcv0);
1473 new0 = DIMM0_COR_ERR(rcv0);
1474
442305b1 1475 /* Updates CE counters if it is not the first time here */
f4742949 1476 if (pvt->ce_count_available) {
442305b1
MCC
1477 /* Updates CE counters */
1478 int add0, add1, add2;
1479
f4742949
MCC
1480 add2 = new2 - pvt->udimm_last_ce_count[2];
1481 add1 = new1 - pvt->udimm_last_ce_count[1];
1482 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1483
1484 if (add2 < 0)
1485 add2 += 0x7fff;
f4742949 1486 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1487
1488 if (add1 < 0)
1489 add1 += 0x7fff;
f4742949 1490 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1491
1492 if (add0 < 0)
1493 add0 += 0x7fff;
f4742949 1494 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1495
1496 if (add0 | add1 | add2)
1497 i7core_printk(KERN_ERR, "New Corrected error(s): "
1498 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1499 add0, add1, add2);
442305b1 1500 } else
f4742949 1501 pvt->ce_count_available = 1;
442305b1
MCC
1502
1503 /* Store the new values */
f4742949
MCC
1504 pvt->udimm_last_ce_count[2] = new2;
1505 pvt->udimm_last_ce_count[1] = new1;
1506 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1507}
1508
8a2f118e
MCC
1509/*
1510 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1511 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1512 * Nehalem are defined as family 0x06, model 0x1a
1513 *
1514 * The MCA registers used here are the following ones:
8a2f118e 1515 * struct mce field MCA Register
f237fcf2
MCC
1516 * m->status MSR_IA32_MC8_STATUS
1517 * m->addr MSR_IA32_MC8_ADDR
1518 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1519 * In the case of Nehalem, the error information is masked at .status and .misc
1520 * fields
1521 */
d5381642
MCC
1522static void i7core_mce_output_error(struct mem_ctl_info *mci,
1523 struct mce *m)
1524{
b4e8f0b6 1525 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1526 char *type, *optype, *err, *msg;
8a2f118e 1527 unsigned long error = m->status & 0x1ff0000l;
a639539f 1528 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1529 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1530 u32 dimm = (m->misc >> 16) & 0x3;
1531 u32 channel = (m->misc >> 18) & 0x3;
1532 u32 syndrome = m->misc >> 32;
1533 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1534 int csrow;
8a2f118e 1535
c5d34528
MCC
1536 if (m->mcgstatus & 1)
1537 type = "FATAL";
1538 else
1539 type = "NON_FATAL";
1540
a639539f 1541 switch (optypenum) {
b990538a
MCC
1542 case 0:
1543 optype = "generic undef request";
1544 break;
1545 case 1:
1546 optype = "read error";
1547 break;
1548 case 2:
1549 optype = "write error";
1550 break;
1551 case 3:
1552 optype = "addr/cmd error";
1553 break;
1554 case 4:
1555 optype = "scrubbing error";
1556 break;
1557 default:
1558 optype = "reserved";
1559 break;
a639539f
MCC
1560 }
1561
8a2f118e
MCC
1562 switch (errnum) {
1563 case 16:
1564 err = "read ECC error";
1565 break;
1566 case 17:
1567 err = "RAS ECC error";
1568 break;
1569 case 18:
1570 err = "write parity error";
1571 break;
1572 case 19:
1573 err = "redundacy loss";
1574 break;
1575 case 20:
1576 err = "reserved";
1577 break;
1578 case 21:
1579 err = "memory range error";
1580 break;
1581 case 22:
1582 err = "RTID out of range";
1583 break;
1584 case 23:
1585 err = "address parity error";
1586 break;
1587 case 24:
1588 err = "byte enable parity error";
1589 break;
1590 default:
1591 err = "unknown";
d5381642 1592 }
d5381642 1593
f237fcf2 1594 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1595 msg = kasprintf(GFP_ATOMIC,
f4742949 1596 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1597 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1598 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1599 syndrome, core_err_cnt, (long long)m->status,
1600 (long long)m->misc, optype, err);
8a2f118e
MCC
1601
1602 debugf0("%s", msg);
d5381642 1603
f4742949 1604 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1605
d5381642 1606 /* Call the helper to output message */
b4e8f0b6
MCC
1607 if (m->mcgstatus & 1)
1608 edac_mc_handle_fbd_ue(mci, csrow, 0,
1609 0 /* FIXME: should be channel here */, msg);
f4742949 1610 else if (!pvt->is_registered)
b4e8f0b6
MCC
1611 edac_mc_handle_fbd_ce(mci, csrow,
1612 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1613
1614 kfree(msg);
d5381642
MCC
1615}
1616
87d1d272
MCC
1617/*
1618 * i7core_check_error Retrieve and process errors reported by the
1619 * hardware. Called by the Core module.
1620 */
1621static void i7core_check_error(struct mem_ctl_info *mci)
1622{
d5381642
MCC
1623 struct i7core_pvt *pvt = mci->pvt_info;
1624 int i;
1625 unsigned count = 0;
ca9c90ba 1626 struct mce *m;
d5381642 1627
ca9c90ba
MCC
1628 /*
1629 * MCE first step: Copy all mce errors into a temporary buffer
1630 * We use a double buffering here, to reduce the risk of
1631 * loosing an error.
1632 */
1633 smp_rmb();
1634 count = (pvt->mce_out + sizeof(mce_entry) - pvt->mce_in)
1635 % sizeof(mce_entry);
1636 if (!count)
1637 return;
f4742949 1638
ca9c90ba
MCC
1639 m = pvt->mce_outentry;
1640 if (pvt->mce_in + count > sizeof(mce_entry)) {
1641 unsigned l = sizeof(mce_entry) - pvt->mce_in;
f4742949 1642
ca9c90ba
MCC
1643 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1644 smp_wmb();
1645 pvt->mce_in = 0;
1646 count -= l;
1647 m += l;
1648 }
1649 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1650 smp_wmb();
1651 pvt->mce_in += count;
1652
1653 smp_rmb();
1654 if (pvt->mce_overrun) {
1655 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1656 pvt->mce_overrun);
1657 smp_wmb();
1658 pvt->mce_overrun = 0;
1659 }
d5381642 1660
ca9c90ba
MCC
1661 /*
1662 * MCE second step: parse errors and display
1663 */
d5381642 1664 for (i = 0; i < count; i++)
ca9c90ba 1665 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1666
ca9c90ba
MCC
1667 /*
1668 * Now, let's increment CE error counts
1669 */
f4742949
MCC
1670 if (!pvt->is_registered)
1671 i7core_udimm_check_mc_ecc_err(mci);
1672 else
1673 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1674}
1675
d5381642
MCC
1676/*
1677 * i7core_mce_check_error Replicates mcelog routine to get errors
1678 * This routine simply queues mcelog errors, and
1679 * return. The error itself should be handled later
1680 * by i7core_check_error.
1681 */
1682static int i7core_mce_check_error(void *priv, struct mce *mce)
1683{
c5d34528
MCC
1684 struct mem_ctl_info *mci = priv;
1685 struct i7core_pvt *pvt = mci->pvt_info;
d5381642 1686
8a2f118e
MCC
1687 /*
1688 * Just let mcelog handle it if the error is
1689 * outside the memory controller
1690 */
1691 if (((mce->status & 0xffff) >> 7) != 1)
1692 return 0;
1693
f237fcf2
MCC
1694 /* Bank 8 registers are the only ones that we know how to handle */
1695 if (mce->bank != 8)
1696 return 0;
1697
f4742949 1698 /* Only handle if it is the right mc controller */
6c6aa3af
MCC
1699 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) {
1700 debugf0("mc%d: ignoring mce log for socket %d. "
1701 "Another mc should get it.\n",
1702 pvt->i7core_dev->socket,
1703 cpu_data(mce->cpu).phys_proc_id);
f4742949 1704 return 0;
6c6aa3af 1705 }
f4742949 1706
ca9c90ba
MCC
1707 smp_rmb();
1708 if ((pvt->mce_out + 1) % sizeof(mce_entry) == pvt->mce_in) {
1709 smp_wmb();
1710 pvt->mce_overrun++;
1711 return 0;
d5381642 1712 }
ca9c90ba
MCC
1713 smp_wmb();
1714 pvt->mce_out = (pvt->mce_out + 1) % sizeof(mce_entry);
1715 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
d5381642 1716
c5d34528
MCC
1717 /* Handle fatal errors immediately */
1718 if (mce->mcgstatus & 1)
1719 i7core_check_error(mci);
1720
d5381642 1721 /* Advice mcelog that the error were handled */
8a2f118e 1722 return 1;
d5381642
MCC
1723}
1724
f4742949
MCC
1725static int i7core_register_mci(struct i7core_dev *i7core_dev,
1726 int num_channels, int num_csrows)
a0c36a1f
MCC
1727{
1728 struct mem_ctl_info *mci;
1729 struct i7core_pvt *pvt;
ba6c5c62 1730 int csrow = 0;
f4742949 1731 int rc;
a0c36a1f 1732
a0c36a1f 1733 /* allocate a new MC control structure */
d4c27795
MCC
1734 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1735 i7core_dev->socket);
f4742949
MCC
1736 if (unlikely(!mci))
1737 return -ENOMEM;
a0c36a1f
MCC
1738
1739 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1740
f4742949
MCC
1741 /* record ptr to the generic device */
1742 mci->dev = &i7core_dev->pdev[0]->dev;
1743
a0c36a1f 1744 pvt = mci->pvt_info;
ef708b53 1745 memset(pvt, 0, sizeof(*pvt));
67166af4 1746
41fcb7fe
MCC
1747 /*
1748 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1749 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1750 * memory channels
1751 */
1752 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1753 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1754 mci->edac_cap = EDAC_FLAG_NONE;
1755 mci->mod_name = "i7core_edac.c";
1756 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1757 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1758 i7core_dev->socket);
1759 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1760 mci->ctl_page_to_phys = NULL;
a5538e53 1761 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
87d1d272
MCC
1762 /* Set the function pointer to an actual operation function */
1763 mci->edac_check = i7core_check_error;
8f331907 1764
ef708b53 1765 /* Store pci devices at mci for faster access */
f4742949 1766 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1767 if (unlikely(rc < 0))
f4742949 1768 goto fail;
ef708b53
MCC
1769
1770 /* Get dimm basic config */
f4742949 1771 get_dimm_config(mci, &csrow);
ef708b53 1772
a0c36a1f 1773 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1774 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1775 debugf0("MC: " __FILE__
1776 ": %s(): failed edac_mc_add_mc()\n", __func__);
1777 /* FIXME: perhaps some code should go here that disables error
1778 * reporting if we just enabled it
1779 */
b7c76151
MCC
1780
1781 rc = -EINVAL;
f4742949 1782 goto fail;
a0c36a1f
MCC
1783 }
1784
1785 /* allocating generic PCI control info */
f4742949
MCC
1786 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1787 EDAC_MOD_STR);
41fcb7fe 1788 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1789 printk(KERN_WARNING
1790 "%s(): Unable to create PCI control\n",
1791 __func__);
1792 printk(KERN_WARNING
1793 "%s(): PCI error report via EDAC not setup\n",
1794 __func__);
1795 }
1796
194a40fe 1797 /* Default error mask is any memory */
ef708b53 1798 pvt->inject.channel = 0;
194a40fe
MCC
1799 pvt->inject.dimm = -1;
1800 pvt->inject.rank = -1;
1801 pvt->inject.bank = -1;
1802 pvt->inject.page = -1;
1803 pvt->inject.col = -1;
1804
d5381642 1805 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1806 pvt->edac_mce.priv = mci;
d5381642 1807 pvt->edac_mce.check_error = i7core_mce_check_error;
d5381642
MCC
1808
1809 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1810 if (unlikely(rc < 0)) {
d5381642
MCC
1811 debugf0("MC: " __FILE__
1812 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1813 }
1814
1815fail:
1816 edac_mc_free(mci);
1817 return rc;
1818}
1819
1820/*
1821 * i7core_probe Probe for ONE instance of device to see if it is
1822 * present.
1823 * return:
1824 * 0 for FOUND a device
1825 * < 0 for error code
1826 */
1827static int __devinit i7core_probe(struct pci_dev *pdev,
1828 const struct pci_device_id *id)
1829{
1830 int dev_idx = id->driver_data;
1831 int rc;
1832 struct i7core_dev *i7core_dev;
1833
1834 /*
d4c27795 1835 * All memory controllers are allocated at the first pass.
f4742949
MCC
1836 */
1837 if (unlikely(dev_idx >= 1))
1838 return -EINVAL;
1839
1840 /* get the pci devices we want to reserve for our use */
1841 mutex_lock(&i7core_edac_lock);
1842 rc = i7core_get_devices();
1843 if (unlikely(rc < 0))
1844 goto fail0;
1845
1846 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1847 int channels;
1848 int csrows;
1849
1850 /* Check the number of active and not disabled channels */
1851 rc = i7core_get_active_channels(i7core_dev->socket,
1852 &channels, &csrows);
1853 if (unlikely(rc < 0))
1854 goto fail1;
1855
d4c27795
MCC
1856 rc = i7core_register_mci(i7core_dev, channels, csrows);
1857 if (unlikely(rc < 0))
1858 goto fail1;
d5381642
MCC
1859 }
1860
ef708b53 1861 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1862
66607706 1863 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1864 return 0;
1865
66607706 1866fail1:
13d6e9b6 1867 i7core_put_all_devices();
66607706
MCC
1868fail0:
1869 mutex_unlock(&i7core_edac_lock);
b7c76151 1870 return rc;
a0c36a1f
MCC
1871}
1872
1873/*
1874 * i7core_remove destructor for one instance of device
1875 *
1876 */
1877static void __devexit i7core_remove(struct pci_dev *pdev)
1878{
1879 struct mem_ctl_info *mci;
22e6bcbd 1880 struct i7core_dev *i7core_dev, *tmp;
a0c36a1f
MCC
1881
1882 debugf0(__FILE__ ": %s()\n", __func__);
1883
1884 if (i7core_pci)
1885 edac_pci_release_generic_ctl(i7core_pci);
1886
22e6bcbd
MCC
1887 /*
1888 * we have a trouble here: pdev value for removal will be wrong, since
1889 * it will point to the X58 register used to detect that the machine
1890 * is a Nehalem or upper design. However, due to the way several PCI
1891 * devices are grouped together to provide MC functionality, we need
1892 * to use a different method for releasing the devices
1893 */
87d1d272 1894
66607706 1895 mutex_lock(&i7core_edac_lock);
22e6bcbd
MCC
1896 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1897 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1898 if (mci) {
1899 struct i7core_pvt *pvt = mci->pvt_info;
1900
1901 i7core_dev = pvt->i7core_dev;
1902 edac_mce_unregister(&pvt->edac_mce);
1903 kfree(mci->ctl_name);
1904 edac_mc_free(mci);
1905 i7core_put_devices(i7core_dev);
1906 } else {
1907 i7core_printk(KERN_ERR,
1908 "Couldn't find mci for socket %d\n",
1909 i7core_dev->socket);
1910 }
1911 }
66607706 1912 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1913}
1914
a0c36a1f
MCC
1915MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1916
1917/*
1918 * i7core_driver pci_driver structure for this module
1919 *
1920 */
1921static struct pci_driver i7core_driver = {
1922 .name = "i7core_edac",
1923 .probe = i7core_probe,
1924 .remove = __devexit_p(i7core_remove),
1925 .id_table = i7core_pci_tbl,
1926};
1927
1928/*
1929 * i7core_init Module entry function
1930 * Try to initialize this module for its devices
1931 */
1932static int __init i7core_init(void)
1933{
1934 int pci_rc;
1935
1936 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1937
1938 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1939 opstate_init();
1940
bc2d7245
KM
1941 i7core_xeon_pci_fixup();
1942
a0c36a1f
MCC
1943 pci_rc = pci_register_driver(&i7core_driver);
1944
3ef288a9
MCC
1945 if (pci_rc >= 0)
1946 return 0;
1947
1948 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1949 pci_rc);
1950
1951 return pci_rc;
a0c36a1f
MCC
1952}
1953
1954/*
1955 * i7core_exit() Module exit function
1956 * Unregister the driver
1957 */
1958static void __exit i7core_exit(void)
1959{
1960 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1961 pci_unregister_driver(&i7core_driver);
1962}
1963
1964module_init(i7core_init);
1965module_exit(i7core_exit);
1966
1967MODULE_LICENSE("GPL");
1968MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1969MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1970MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1971 I7CORE_REVISION);
1972
1973module_param(edac_op_state, int, 0444);
1974MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");