]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/edac/i7core_edac.c
Add support for Westmere to i7core_edac driver
[mirror_ubuntu-jammy-kernel.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
3b918c12 28#include <linux/delay.h>
a0c36a1f
MCC
29#include <linux/edac.h>
30#include <linux/mmzone.h>
d5381642 31#include <linux/edac_mce.h>
f4742949 32#include <linux/smp.h>
14d2c083 33#include <asm/processor.h>
a0c36a1f
MCC
34
35#include "edac_core.h"
36
f4742949
MCC
37/*
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
42 */
43#define MAX_SOCKET_BUSES 2
44
45
a0c36a1f
MCC
46/*
47 * Alter this version for the module when modifications are made
48 */
49#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50#define EDAC_MOD_STR "i7core_edac"
51
a0c36a1f
MCC
52/*
53 * Debug macros
54 */
55#define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
57
58#define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
60
61/*
62 * i7core Memory Controller Registers
63 */
64
e9bd2e73
MCC
65 /* OFFSETS for Device 0 Function 0 */
66
67#define MC_CFG_CONTROL 0x90
68
a0c36a1f
MCC
69 /* OFFSETS for Device 3 Function 0 */
70
71#define MC_CONTROL 0x48
72#define MC_STATUS 0x4c
73#define MC_MAX_DOD 0x64
74
442305b1
MCC
75/*
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
78 */
79
80#define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82
83#define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86
b4e8f0b6
MCC
87/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88#define MC_COR_ECC_CNT_0 0x80
89#define MC_COR_ECC_CNT_1 0x84
90#define MC_COR_ECC_CNT_2 0x88
91#define MC_COR_ECC_CNT_3 0x8c
92#define MC_COR_ECC_CNT_4 0x90
93#define MC_COR_ECC_CNT_5 0x94
94
95#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
97
98
a0c36a1f
MCC
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100
0b2b7b7e
MCC
101#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
106
f122a892
MCC
107#define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110
0b2b7b7e
MCC
111#define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
113
a0c36a1f 114#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
115#define MC_CHANNEL_ERROR_MASK 0xf8
116#define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
a0c36a1f 125
0b2b7b7e 126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 127
0b2b7b7e
MCC
128#define MC_DOD_CH_DIMM0 0x48
129#define MC_DOD_CH_DIMM1 0x4c
130#define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 143
f122a892
MCC
144#define MC_RANK_PRESENT 0x7c
145
0b2b7b7e
MCC
146#define MC_SAG_CH_0 0x80
147#define MC_SAG_CH_1 0x84
148#define MC_SAG_CH_2 0x88
149#define MC_SAG_CH_3 0x8c
150#define MC_SAG_CH_4 0x90
151#define MC_SAG_CH_5 0x94
152#define MC_SAG_CH_6 0x98
153#define MC_SAG_CH_7 0x9c
154
155#define MC_RIR_LIMIT_CH_0 0x40
156#define MC_RIR_LIMIT_CH_1 0x44
157#define MC_RIR_LIMIT_CH_2 0x48
158#define MC_RIR_LIMIT_CH_3 0x4C
159#define MC_RIR_LIMIT_CH_4 0x50
160#define MC_RIR_LIMIT_CH_5 0x54
161#define MC_RIR_LIMIT_CH_6 0x58
162#define MC_RIR_LIMIT_CH_7 0x5C
163#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164
165#define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
168
a0c36a1f
MCC
169/*
170 * i7core structs
171 */
172
173#define NUM_CHANS 3
442305b1
MCC
174#define MAX_DIMMS 3 /* Max DIMMS per channel */
175#define MAX_MCR_FUNC 4
176#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
177
178struct i7core_info {
179 u32 mc_control;
180 u32 mc_status;
181 u32 max_dod;
f122a892 182 u32 ch_map;
a0c36a1f
MCC
183};
184
194a40fe
MCC
185
186struct i7core_inject {
187 int enable;
188
189 u32 section;
190 u32 type;
191 u32 eccmask;
192
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
195};
196
0b2b7b7e 197struct i7core_channel {
442305b1
MCC
198 u32 ranks;
199 u32 dimms;
0b2b7b7e
MCC
200};
201
8f331907 202struct pci_id_descr {
66607706
MCC
203 int dev;
204 int func;
205 int dev_id;
de06eeef 206 int optional;
8f331907
MCC
207};
208
bd9e19ca
VM
209struct pci_id_table {
210 struct pci_id_descr *descr;
211 int n_devs;
212};
213
f4742949
MCC
214struct i7core_dev {
215 struct list_head list;
216 u8 socket;
217 struct pci_dev **pdev;
de06eeef 218 int n_devs;
f4742949
MCC
219 struct mem_ctl_info *mci;
220};
221
a0c36a1f 222struct i7core_pvt {
f4742949
MCC
223 struct pci_dev *pci_noncore;
224 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
225 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
226
227 struct i7core_dev *i7core_dev;
67166af4 228
a0c36a1f 229 struct i7core_info info;
194a40fe 230 struct i7core_inject inject;
f4742949 231 struct i7core_channel channel[NUM_CHANS];
67166af4 232
f4742949 233 int channels; /* Number of active channels */
442305b1 234
f4742949
MCC
235 int ce_count_available;
236 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
237
238 /* ECC corrected errors counts per udimm */
f4742949
MCC
239 unsigned long udimm_ce_count[MAX_DIMMS];
240 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 241 /* ECC corrected errors counts per rdimm */
f4742949
MCC
242 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
243 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 244
f4742949 245 unsigned int is_registered;
14d2c083 246
d5381642
MCC
247 /* mcelog glue */
248 struct edac_mce edac_mce;
ca9c90ba
MCC
249
250 /* Fifo double buffers */
d5381642 251 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
252 struct mce mce_outentry[MCE_LOG_LEN];
253
254 /* Fifo in/out counters */
255 unsigned mce_in, mce_out;
256
257 /* Count indicator to show errors not got */
258 unsigned mce_overrun;
a0c36a1f
MCC
259};
260
66607706
MCC
261/* Static vars */
262static LIST_HEAD(i7core_edac_list);
263static DEFINE_MUTEX(i7core_edac_lock);
a0c36a1f 264
8f331907
MCC
265#define PCI_DESCR(device, function, device_id) \
266 .dev = (device), \
267 .func = (function), \
268 .dev_id = (device_id)
269
bd9e19ca 270struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
8f331907
MCC
271 /* Memory controller */
272 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
273 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
de06eeef
MCC
274 /* Exists only for RDIMM */
275 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
8f331907
MCC
276 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
277
278 /* Channel 0 */
279 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
280 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
281 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
282 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
283
284 /* Channel 1 */
285 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
286 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
287 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
288 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
289
290 /* Channel 2 */
291 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
292 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
293 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
294 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
295
296 /* Generic Non-core registers */
297 /*
298 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
299 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
300 * the probing code needs to test for the other address in case of
301 * failure of this one
302 */
fd382654 303 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
310cbb72 304
a0c36a1f 305};
8f331907 306
52a2e4fc
MCC
307struct pci_id_descr pci_dev_descr_lynnfield[] = {
308 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
309 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
310 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
311
312 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
313 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
314 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
315 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
316
508fa179
MCC
317 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
318 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
319 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
320 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
52a2e4fc 321
f05da2f7
MCC
322 /*
323 * This is the PCI device has an alternate address on some
324 * processors like Core i7 860
325 */
52a2e4fc
MCC
326 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
327};
328
bd9e19ca
VM
329struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
330 /* Memory controller */
331 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
332 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
333 /* Exists only for RDIMM */
334 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
335 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
336
337 /* Channel 0 */
338 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
339 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
340 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
341 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
342
343 /* Channel 1 */
344 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
345 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
346 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
347 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
348
349 /* Channel 2 */
350 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
351 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
352 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
353 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
354
355 /* Generic Non-core registers */
356 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
357
358};
359
360#define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
361struct pci_id_table pci_dev_table[] = {
362 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
363 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
364 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
365};
366
8f331907
MCC
367/*
368 * pci_device_id table for which devices we are looking for
8f331907
MCC
369 */
370static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 371 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
f05da2f7 372 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
8f331907
MCC
373 {0,} /* 0 terminated list. */
374};
375
a0c36a1f
MCC
376static struct edac_pci_ctl_info *i7core_pci;
377
378/****************************************************************************
379 Anciliary status routines
380 ****************************************************************************/
381
382 /* MC_CONTROL bits */
ef708b53
MCC
383#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
384#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
385
386 /* MC_STATUS bits */
61053fde 387#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 388#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
389
390 /* MC_MAX_DOD read functions */
854d3349 391static inline int numdimms(u32 dimms)
a0c36a1f 392{
854d3349 393 return (dimms & 0x3) + 1;
a0c36a1f
MCC
394}
395
854d3349 396static inline int numrank(u32 rank)
a0c36a1f
MCC
397{
398 static int ranks[4] = { 1, 2, 4, -EINVAL };
399
854d3349 400 return ranks[rank & 0x3];
a0c36a1f
MCC
401}
402
854d3349 403static inline int numbank(u32 bank)
a0c36a1f
MCC
404{
405 static int banks[4] = { 4, 8, 16, -EINVAL };
406
854d3349 407 return banks[bank & 0x3];
a0c36a1f
MCC
408}
409
854d3349 410static inline int numrow(u32 row)
a0c36a1f
MCC
411{
412 static int rows[8] = {
413 1 << 12, 1 << 13, 1 << 14, 1 << 15,
414 1 << 16, -EINVAL, -EINVAL, -EINVAL,
415 };
416
854d3349 417 return rows[row & 0x7];
a0c36a1f
MCC
418}
419
854d3349 420static inline int numcol(u32 col)
a0c36a1f
MCC
421{
422 static int cols[8] = {
423 1 << 10, 1 << 11, 1 << 12, -EINVAL,
424 };
854d3349 425 return cols[col & 0x3];
a0c36a1f
MCC
426}
427
f4742949 428static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
429{
430 struct i7core_dev *i7core_dev;
431
432 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
433 if (i7core_dev->socket == socket)
434 return i7core_dev;
435 }
436
437 return NULL;
438}
439
a0c36a1f
MCC
440/****************************************************************************
441 Memory check routines
442 ****************************************************************************/
67166af4
MCC
443static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
444 unsigned func)
ef708b53 445{
66607706 446 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 447 int i;
ef708b53 448
66607706
MCC
449 if (!i7core_dev)
450 return NULL;
451
de06eeef 452 for (i = 0; i < i7core_dev->n_devs; i++) {
66607706 453 if (!i7core_dev->pdev[i])
ef708b53
MCC
454 continue;
455
66607706
MCC
456 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
457 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
458 return i7core_dev->pdev[i];
ef708b53
MCC
459 }
460 }
461
eb94fc40
MCC
462 return NULL;
463}
464
ec6df24c
MCC
465/**
466 * i7core_get_active_channels() - gets the number of channels and csrows
467 * @socket: Quick Path Interconnect socket
468 * @channels: Number of channels that will be returned
469 * @csrows: Number of csrows found
470 *
471 * Since EDAC core needs to know in advance the number of available channels
472 * and csrows, in order to allocate memory for csrows/channels, it is needed
473 * to run two similar steps. At the first step, implemented on this function,
474 * it checks the number of csrows/channels present at one socket.
475 * this is used in order to properly allocate the size of mci components.
476 *
477 * It should be noticed that none of the current available datasheets explain
478 * or even mention how csrows are seen by the memory controller. So, we need
479 * to add a fake description for csrows.
480 * So, this driver is attributing one DIMM memory for one csrow.
481 */
67166af4
MCC
482static int i7core_get_active_channels(u8 socket, unsigned *channels,
483 unsigned *csrows)
eb94fc40
MCC
484{
485 struct pci_dev *pdev = NULL;
486 int i, j;
487 u32 status, control;
488
489 *channels = 0;
490 *csrows = 0;
491
67166af4 492 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 493 if (!pdev) {
67166af4
MCC
494 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
495 socket);
ef708b53 496 return -ENODEV;
b7c76151 497 }
ef708b53
MCC
498
499 /* Device 3 function 0 reads */
500 pci_read_config_dword(pdev, MC_STATUS, &status);
501 pci_read_config_dword(pdev, MC_CONTROL, &control);
502
503 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 504 u32 dimm_dod[3];
ef708b53
MCC
505 /* Check if the channel is active */
506 if (!(control & (1 << (8 + i))))
507 continue;
508
509 /* Check if the channel is disabled */
41fcb7fe 510 if (status & (1 << i))
ef708b53 511 continue;
ef708b53 512
67166af4 513 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 514 if (!pdev) {
67166af4
MCC
515 i7core_printk(KERN_ERR, "Couldn't find socket %d "
516 "fn %d.%d!!!\n",
517 socket, i + 4, 1);
eb94fc40
MCC
518 return -ENODEV;
519 }
520 /* Devices 4-6 function 1 */
521 pci_read_config_dword(pdev,
522 MC_DOD_CH_DIMM0, &dimm_dod[0]);
523 pci_read_config_dword(pdev,
524 MC_DOD_CH_DIMM1, &dimm_dod[1]);
525 pci_read_config_dword(pdev,
526 MC_DOD_CH_DIMM2, &dimm_dod[2]);
527
ef708b53 528 (*channels)++;
eb94fc40
MCC
529
530 for (j = 0; j < 3; j++) {
531 if (!DIMM_PRESENT(dimm_dod[j]))
532 continue;
533 (*csrows)++;
534 }
ef708b53
MCC
535 }
536
c77720b9 537 debugf0("Number of active channels on socket %d: %d\n",
67166af4 538 socket, *channels);
1c6fed80 539
ef708b53
MCC
540 return 0;
541}
542
f4742949 543static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
544{
545 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 546 struct csrow_info *csr;
854d3349 547 struct pci_dev *pdev;
ba6c5c62 548 int i, j;
5566cb7c 549 unsigned long last_page = 0;
1c6fed80 550 enum edac_type mode;
854d3349 551 enum mem_type mtype;
a0c36a1f 552
854d3349 553 /* Get data from the MC register, function 0 */
f4742949 554 pdev = pvt->pci_mcr[0];
7dd6953c 555 if (!pdev)
8f331907
MCC
556 return -ENODEV;
557
f122a892 558 /* Device 3 function 0 reads */
7dd6953c
MCC
559 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
560 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
561 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
562 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 563
17cb7b0c 564 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 565 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 566 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 567
1c6fed80 568 if (ECC_ENABLED(pvt)) {
41fcb7fe 569 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
570 if (ECCx8(pvt))
571 mode = EDAC_S8ECD8ED;
572 else
573 mode = EDAC_S4ECD4ED;
574 } else {
a0c36a1f 575 debugf0("ECC disabled\n");
1c6fed80
MCC
576 mode = EDAC_NONE;
577 }
a0c36a1f
MCC
578
579 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
580 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
581 "x%x x 0x%x\n",
854d3349
MCC
582 numdimms(pvt->info.max_dod),
583 numrank(pvt->info.max_dod >> 2),
276b824c 584 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
585 numrow(pvt->info.max_dod >> 6),
586 numcol(pvt->info.max_dod >> 9));
a0c36a1f 587
0b2b7b7e 588 for (i = 0; i < NUM_CHANS; i++) {
854d3349 589 u32 data, dimm_dod[3], value[8];
0b2b7b7e 590
52a2e4fc
MCC
591 if (!pvt->pci_ch[i][0])
592 continue;
593
0b2b7b7e
MCC
594 if (!CH_ACTIVE(pvt, i)) {
595 debugf0("Channel %i is not active\n", i);
596 continue;
597 }
598 if (CH_DISABLED(pvt, i)) {
599 debugf0("Channel %i is disabled\n", i);
600 continue;
601 }
602
f122a892 603 /* Devices 4-6 function 0 */
f4742949 604 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
605 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
606
f4742949 607 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 608 4 : 2;
0b2b7b7e 609
854d3349
MCC
610 if (data & REGISTERED_DIMM)
611 mtype = MEM_RDDR3;
14d2c083 612 else
854d3349
MCC
613 mtype = MEM_DDR3;
614#if 0
0b2b7b7e
MCC
615 if (data & THREE_DIMMS_PRESENT)
616 pvt->channel[i].dimms = 3;
617 else if (data & SINGLE_QUAD_RANK_PRESENT)
618 pvt->channel[i].dimms = 1;
619 else
620 pvt->channel[i].dimms = 2;
854d3349
MCC
621#endif
622
623 /* Devices 4-6 function 1 */
f4742949 624 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 625 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 626 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 627 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 628 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 629 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 630
1c6fed80 631 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 632 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
633 i,
634 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
635 data,
f4742949 636 pvt->channel[i].ranks,
41fcb7fe 637 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
638
639 for (j = 0; j < 3; j++) {
640 u32 banks, ranks, rows, cols;
5566cb7c 641 u32 size, npages;
854d3349
MCC
642
643 if (!DIMM_PRESENT(dimm_dod[j]))
644 continue;
645
646 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
647 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
648 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
649 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
650
5566cb7c
MCC
651 /* DDR3 has 8 I/O banks */
652 size = (rows * cols * banks * ranks) >> (20 - 3);
653
f4742949 654 pvt->channel[i].dimms++;
854d3349 655
17cb7b0c
MCC
656 debugf0("\tdimm %d %d Mb offset: %x, "
657 "bank: %d, rank: %d, row: %#x, col: %#x\n",
658 j, size,
854d3349
MCC
659 RANKOFFSET(dimm_dod[j]),
660 banks, ranks, rows, cols);
661
eb94fc40
MCC
662#if PAGE_SHIFT > 20
663 npages = size >> (PAGE_SHIFT - 20);
664#else
665 npages = size << (20 - PAGE_SHIFT);
666#endif
5566cb7c 667
ba6c5c62 668 csr = &mci->csrows[*csrow];
5566cb7c
MCC
669 csr->first_page = last_page + 1;
670 last_page += npages;
671 csr->last_page = last_page;
672 csr->nr_pages = npages;
673
854d3349 674 csr->page_mask = 0;
eb94fc40 675 csr->grain = 8;
ba6c5c62 676 csr->csrow_idx = *csrow;
eb94fc40
MCC
677 csr->nr_channels = 1;
678
679 csr->channels[0].chan_idx = i;
680 csr->channels[0].ce_count = 0;
854d3349 681
f4742949 682 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 683
854d3349
MCC
684 switch (banks) {
685 case 4:
686 csr->dtype = DEV_X4;
687 break;
688 case 8:
689 csr->dtype = DEV_X8;
690 break;
691 case 16:
692 csr->dtype = DEV_X16;
693 break;
694 default:
695 csr->dtype = DEV_UNKNOWN;
696 }
697
698 csr->edac_mode = mode;
699 csr->mtype = mtype;
700
ba6c5c62 701 (*csrow)++;
854d3349 702 }
1c6fed80 703
854d3349
MCC
704 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
705 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
706 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
707 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
708 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
709 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
710 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
711 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 712 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 713 for (j = 0; j < 8; j++)
17cb7b0c 714 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
715 (value[j] >> 27) & 0x1,
716 (value[j] >> 24) & 0x7,
717 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
718 }
719
a0c36a1f
MCC
720 return 0;
721}
722
194a40fe
MCC
723/****************************************************************************
724 Error insertion routines
725 ****************************************************************************/
726
727/* The i7core has independent error injection features per channel.
728 However, to have a simpler code, we don't allow enabling error injection
729 on more than one channel.
730 Also, since a change at an inject parameter will be applied only at enable,
731 we're disabling error injection on all write calls to the sysfs nodes that
732 controls the error code injection.
733 */
8f331907 734static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
735{
736 struct i7core_pvt *pvt = mci->pvt_info;
737
738 pvt->inject.enable = 0;
739
f4742949 740 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
741 return -ENODEV;
742
f4742949 743 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 744 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
745
746 return 0;
194a40fe
MCC
747}
748
749/*
750 * i7core inject inject.section
751 *
752 * accept and store error injection inject.section value
753 * bit 0 - refers to the lower 32-byte half cacheline
754 * bit 1 - refers to the upper 32-byte half cacheline
755 */
756static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
757 const char *data, size_t count)
758{
759 struct i7core_pvt *pvt = mci->pvt_info;
760 unsigned long value;
761 int rc;
762
763 if (pvt->inject.enable)
41fcb7fe 764 disable_inject(mci);
194a40fe
MCC
765
766 rc = strict_strtoul(data, 10, &value);
767 if ((rc < 0) || (value > 3))
2068def5 768 return -EIO;
194a40fe
MCC
769
770 pvt->inject.section = (u32) value;
771 return count;
772}
773
774static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
775 char *data)
776{
777 struct i7core_pvt *pvt = mci->pvt_info;
778 return sprintf(data, "0x%08x\n", pvt->inject.section);
779}
780
781/*
782 * i7core inject.type
783 *
784 * accept and store error injection inject.section value
785 * bit 0 - repeat enable - Enable error repetition
786 * bit 1 - inject ECC error
787 * bit 2 - inject parity error
788 */
789static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
790 const char *data, size_t count)
791{
792 struct i7core_pvt *pvt = mci->pvt_info;
793 unsigned long value;
794 int rc;
795
796 if (pvt->inject.enable)
41fcb7fe 797 disable_inject(mci);
194a40fe
MCC
798
799 rc = strict_strtoul(data, 10, &value);
800 if ((rc < 0) || (value > 7))
2068def5 801 return -EIO;
194a40fe
MCC
802
803 pvt->inject.type = (u32) value;
804 return count;
805}
806
807static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
808 char *data)
809{
810 struct i7core_pvt *pvt = mci->pvt_info;
811 return sprintf(data, "0x%08x\n", pvt->inject.type);
812}
813
814/*
815 * i7core_inject_inject.eccmask_store
816 *
817 * The type of error (UE/CE) will depend on the inject.eccmask value:
818 * Any bits set to a 1 will flip the corresponding ECC bit
819 * Correctable errors can be injected by flipping 1 bit or the bits within
820 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
821 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
822 * uncorrectable error to be injected.
823 */
824static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
825 const char *data, size_t count)
826{
827 struct i7core_pvt *pvt = mci->pvt_info;
828 unsigned long value;
829 int rc;
830
831 if (pvt->inject.enable)
41fcb7fe 832 disable_inject(mci);
194a40fe
MCC
833
834 rc = strict_strtoul(data, 10, &value);
835 if (rc < 0)
2068def5 836 return -EIO;
194a40fe
MCC
837
838 pvt->inject.eccmask = (u32) value;
839 return count;
840}
841
842static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
843 char *data)
844{
845 struct i7core_pvt *pvt = mci->pvt_info;
846 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
847}
848
849/*
850 * i7core_addrmatch
851 *
852 * The type of error (UE/CE) will depend on the inject.eccmask value:
853 * Any bits set to a 1 will flip the corresponding ECC bit
854 * Correctable errors can be injected by flipping 1 bit or the bits within
855 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
856 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
857 * uncorrectable error to be injected.
858 */
194a40fe 859
a5538e53
MCC
860#define DECLARE_ADDR_MATCH(param, limit) \
861static ssize_t i7core_inject_store_##param( \
862 struct mem_ctl_info *mci, \
863 const char *data, size_t count) \
864{ \
cc301b3a 865 struct i7core_pvt *pvt; \
a5538e53
MCC
866 long value; \
867 int rc; \
868 \
cc301b3a
MCC
869 debugf1("%s()\n", __func__); \
870 pvt = mci->pvt_info; \
871 \
a5538e53
MCC
872 if (pvt->inject.enable) \
873 disable_inject(mci); \
874 \
4f87fad1 875 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
a5538e53
MCC
876 value = -1; \
877 else { \
878 rc = strict_strtoul(data, 10, &value); \
879 if ((rc < 0) || (value >= limit)) \
880 return -EIO; \
881 } \
882 \
883 pvt->inject.param = value; \
884 \
885 return count; \
886} \
887 \
888static ssize_t i7core_inject_show_##param( \
889 struct mem_ctl_info *mci, \
890 char *data) \
891{ \
cc301b3a
MCC
892 struct i7core_pvt *pvt; \
893 \
894 pvt = mci->pvt_info; \
895 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
896 if (pvt->inject.param < 0) \
897 return sprintf(data, "any\n"); \
898 else \
899 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
900}
901
a5538e53
MCC
902#define ATTR_ADDR_MATCH(param) \
903 { \
904 .attr = { \
905 .name = #param, \
906 .mode = (S_IRUGO | S_IWUSR) \
907 }, \
908 .show = i7core_inject_show_##param, \
909 .store = i7core_inject_store_##param, \
910 }
194a40fe 911
a5538e53
MCC
912DECLARE_ADDR_MATCH(channel, 3);
913DECLARE_ADDR_MATCH(dimm, 3);
914DECLARE_ADDR_MATCH(rank, 4);
915DECLARE_ADDR_MATCH(bank, 32);
916DECLARE_ADDR_MATCH(page, 0x10000);
917DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 918
276b824c
MCC
919static int write_and_test(struct pci_dev *dev, int where, u32 val)
920{
921 u32 read;
922 int count;
923
4157d9f5
MCC
924 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
925 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
926 where, val);
927
276b824c
MCC
928 for (count = 0; count < 10; count++) {
929 if (count)
b990538a 930 msleep(100);
276b824c
MCC
931 pci_write_config_dword(dev, where, val);
932 pci_read_config_dword(dev, where, &read);
933
934 if (read == val)
935 return 0;
936 }
937
4157d9f5
MCC
938 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
939 "write=%08x. Read=%08x\n",
940 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
941 where, val, read);
276b824c
MCC
942
943 return -EINVAL;
944}
945
194a40fe
MCC
946/*
947 * This routine prepares the Memory Controller for error injection.
948 * The error will be injected when some process tries to write to the
949 * memory that matches the given criteria.
950 * The criteria can be set in terms of a mask where dimm, rank, bank, page
951 * and col can be specified.
952 * A -1 value for any of the mask items will make the MCU to ignore
953 * that matching criteria for error injection.
954 *
955 * It should be noticed that the error will only happen after a write operation
956 * on a memory that matches the condition. if REPEAT_EN is not enabled at
957 * inject mask, then it will produce just one error. Otherwise, it will repeat
958 * until the injectmask would be cleaned.
959 *
960 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
961 * is reliable enough to check if the MC is using the
962 * three channels. However, this is not clear at the datasheet.
963 */
964static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
965 const char *data, size_t count)
966{
967 struct i7core_pvt *pvt = mci->pvt_info;
968 u32 injectmask;
969 u64 mask = 0;
970 int rc;
971 long enable;
972
f4742949 973 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
974 return 0;
975
194a40fe
MCC
976 rc = strict_strtoul(data, 10, &enable);
977 if ((rc < 0))
978 return 0;
979
980 if (enable) {
981 pvt->inject.enable = 1;
982 } else {
983 disable_inject(mci);
984 return count;
985 }
986
987 /* Sets pvt->inject.dimm mask */
988 if (pvt->inject.dimm < 0)
486dd09f 989 mask |= 1LL << 41;
194a40fe 990 else {
f4742949 991 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 992 mask |= (pvt->inject.dimm & 0x3LL) << 35;
194a40fe 993 else
486dd09f 994 mask |= (pvt->inject.dimm & 0x1LL) << 36;
194a40fe
MCC
995 }
996
997 /* Sets pvt->inject.rank mask */
998 if (pvt->inject.rank < 0)
486dd09f 999 mask |= 1LL << 40;
194a40fe 1000 else {
f4742949 1001 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 1002 mask |= (pvt->inject.rank & 0x1LL) << 34;
194a40fe 1003 else
486dd09f 1004 mask |= (pvt->inject.rank & 0x3LL) << 34;
194a40fe
MCC
1005 }
1006
1007 /* Sets pvt->inject.bank mask */
1008 if (pvt->inject.bank < 0)
486dd09f 1009 mask |= 1LL << 39;
194a40fe 1010 else
486dd09f 1011 mask |= (pvt->inject.bank & 0x15LL) << 30;
194a40fe
MCC
1012
1013 /* Sets pvt->inject.page mask */
1014 if (pvt->inject.page < 0)
486dd09f 1015 mask |= 1LL << 38;
194a40fe 1016 else
486dd09f 1017 mask |= (pvt->inject.page & 0xffff) << 14;
194a40fe
MCC
1018
1019 /* Sets pvt->inject.column mask */
1020 if (pvt->inject.col < 0)
486dd09f 1021 mask |= 1LL << 37;
194a40fe 1022 else
486dd09f 1023 mask |= (pvt->inject.col & 0x3fff);
194a40fe 1024
276b824c
MCC
1025 /*
1026 * bit 0: REPEAT_EN
1027 * bits 1-2: MASK_HALF_CACHELINE
1028 * bit 3: INJECT_ECC
1029 * bit 4: INJECT_ADDR_PARITY
1030 */
1031
1032 injectmask = (pvt->inject.type & 1) |
1033 (pvt->inject.section & 0x3) << 1 |
1034 (pvt->inject.type & 0x6) << (3 - 1);
1035
1036 /* Unlock writes to registers - this register is write only */
f4742949 1037 pci_write_config_dword(pvt->pci_noncore,
67166af4 1038 MC_CFG_CONTROL, 0x2);
e9bd2e73 1039
f4742949 1040 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1041 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1042 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1043 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1044
f4742949 1045 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1046 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1047
f4742949 1048 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1049 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1050
194a40fe 1051 /*
276b824c
MCC
1052 * This is something undocumented, based on my tests
1053 * Without writing 8 to this register, errors aren't injected. Not sure
1054 * why.
194a40fe 1055 */
f4742949 1056 pci_write_config_dword(pvt->pci_noncore,
276b824c 1057 MC_CFG_CONTROL, 8);
194a40fe 1058
41fcb7fe
MCC
1059 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1060 " inject 0x%08x\n",
194a40fe
MCC
1061 mask, pvt->inject.eccmask, injectmask);
1062
7b029d03 1063
194a40fe
MCC
1064 return count;
1065}
1066
1067static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1068 char *data)
1069{
1070 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1071 u32 injectmask;
1072
52a2e4fc
MCC
1073 if (!pvt->pci_ch[pvt->inject.channel][0])
1074 return 0;
1075
f4742949 1076 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1077 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1078
1079 debugf0("Inject error read: 0x%018x\n", injectmask);
1080
1081 if (injectmask & 0x0c)
1082 pvt->inject.enable = 1;
1083
194a40fe
MCC
1084 return sprintf(data, "%d\n", pvt->inject.enable);
1085}
1086
f338d736
MCC
1087#define DECLARE_COUNTER(param) \
1088static ssize_t i7core_show_counter_##param( \
1089 struct mem_ctl_info *mci, \
1090 char *data) \
1091{ \
1092 struct i7core_pvt *pvt = mci->pvt_info; \
1093 \
1094 debugf1("%s() \n", __func__); \
1095 if (!pvt->ce_count_available || (pvt->is_registered)) \
1096 return sprintf(data, "data unavailable\n"); \
1097 return sprintf(data, "%lu\n", \
1098 pvt->udimm_ce_count[param]); \
1099}
442305b1 1100
f338d736
MCC
1101#define ATTR_COUNTER(param) \
1102 { \
1103 .attr = { \
1104 .name = __stringify(udimm##param), \
1105 .mode = (S_IRUGO | S_IWUSR) \
1106 }, \
1107 .show = i7core_show_counter_##param \
d88b8507 1108 }
442305b1 1109
f338d736
MCC
1110DECLARE_COUNTER(0);
1111DECLARE_COUNTER(1);
1112DECLARE_COUNTER(2);
442305b1 1113
194a40fe
MCC
1114/*
1115 * Sysfs struct
1116 */
a5538e53
MCC
1117
1118
1119static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1120 ATTR_ADDR_MATCH(channel),
1121 ATTR_ADDR_MATCH(dimm),
1122 ATTR_ADDR_MATCH(rank),
1123 ATTR_ADDR_MATCH(bank),
1124 ATTR_ADDR_MATCH(page),
1125 ATTR_ADDR_MATCH(col),
1126 { .attr = { .name = NULL } }
1127};
1128
a5538e53
MCC
1129static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1130 .name = "inject_addrmatch",
1131 .mcidev_attr = i7core_addrmatch_attrs,
1132};
1133
f338d736
MCC
1134static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1135 ATTR_COUNTER(0),
1136 ATTR_COUNTER(1),
1137 ATTR_COUNTER(2),
1138};
1139
1140static struct mcidev_sysfs_group i7core_udimm_counters = {
1141 .name = "all_channel_counts",
1142 .mcidev_attr = i7core_udimm_counters_attrs,
1143};
1144
a5538e53 1145static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
194a40fe
MCC
1146 {
1147 .attr = {
1148 .name = "inject_section",
1149 .mode = (S_IRUGO | S_IWUSR)
1150 },
1151 .show = i7core_inject_section_show,
1152 .store = i7core_inject_section_store,
1153 }, {
1154 .attr = {
1155 .name = "inject_type",
1156 .mode = (S_IRUGO | S_IWUSR)
1157 },
1158 .show = i7core_inject_type_show,
1159 .store = i7core_inject_type_store,
1160 }, {
1161 .attr = {
1162 .name = "inject_eccmask",
1163 .mode = (S_IRUGO | S_IWUSR)
1164 },
1165 .show = i7core_inject_eccmask_show,
1166 .store = i7core_inject_eccmask_store,
1167 }, {
a5538e53 1168 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1169 }, {
1170 .attr = {
1171 .name = "inject_enable",
1172 .mode = (S_IRUGO | S_IWUSR)
1173 },
1174 .show = i7core_inject_enable_show,
1175 .store = i7core_inject_enable_store,
1176 },
f338d736 1177 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
42538680 1178 { .attr = { .name = NULL } }
194a40fe
MCC
1179};
1180
a0c36a1f
MCC
1181/****************************************************************************
1182 Device initialization routines: put/get, init/exit
1183 ****************************************************************************/
1184
1185/*
1186 * i7core_put_devices 'put' all the devices that we have
1187 * reserved via 'get'
1188 */
13d6e9b6 1189static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1190{
13d6e9b6 1191 int i;
a0c36a1f 1192
22e6bcbd 1193 debugf0(__FILE__ ": %s()\n", __func__);
de06eeef 1194 for (i = 0; i < i7core_dev->n_devs; i++) {
22e6bcbd
MCC
1195 struct pci_dev *pdev = i7core_dev->pdev[i];
1196 if (!pdev)
1197 continue;
1198 debugf0("Removing dev %02x:%02x.%d\n",
1199 pdev->bus->number,
1200 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1201 pci_dev_put(pdev);
1202 }
13d6e9b6 1203 kfree(i7core_dev->pdev);
22e6bcbd 1204 list_del(&i7core_dev->list);
13d6e9b6
MCC
1205 kfree(i7core_dev);
1206}
66607706 1207
13d6e9b6
MCC
1208static void i7core_put_all_devices(void)
1209{
42538680 1210 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1211
42538680 1212 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
13d6e9b6 1213 i7core_put_devices(i7core_dev);
a0c36a1f
MCC
1214}
1215
bd9e19ca 1216static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
bc2d7245
KM
1217{
1218 struct pci_dev *pdev = NULL;
1219 int i;
1220 /*
1221 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1222 * aren't announced by acpi. So, we need to use a legacy scan probing
1223 * to detect them
1224 */
bd9e19ca
VM
1225 while (table && table->descr) {
1226 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1227 if (unlikely(!pdev)) {
1228 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1229 pcibios_scan_specific_bus(255-i);
1230 }
1231 table++;
bc2d7245
KM
1232 }
1233}
1234
a0c36a1f
MCC
1235/*
1236 * i7core_get_devices Find and perform 'get' operation on the MCH's
1237 * device/functions we want to reference for this driver
1238 *
1239 * Need to 'get' device 16 func 1 and func 2
1240 */
de06eeef
MCC
1241int i7core_get_onedevice(struct pci_dev **prev, int devno,
1242 struct pci_id_descr *dev_descr, unsigned n_devs)
a0c36a1f 1243{
66607706
MCC
1244 struct i7core_dev *i7core_dev;
1245
8f331907 1246 struct pci_dev *pdev = NULL;
67166af4
MCC
1247 u8 bus = 0;
1248 u8 socket = 0;
a0c36a1f 1249
c77720b9 1250 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
de06eeef 1251 dev_descr->dev_id, *prev);
c77720b9 1252
c77720b9
MCC
1253 /*
1254 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1255 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1256 * to probe for the alternate address in case of failure
1257 */
de06eeef 1258 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
c77720b9 1259 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
fd382654 1260 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
d1fd4fb6 1261
bd9e19ca 1262 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
f05da2f7
MCC
1263 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1264 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1265 *prev);
1266
c77720b9
MCC
1267 if (!pdev) {
1268 if (*prev) {
1269 *prev = pdev;
1270 return 0;
d1fd4fb6
MCC
1271 }
1272
de06eeef 1273 if (dev_descr->optional)
c77720b9 1274 return 0;
310cbb72 1275
bd9e19ca
VM
1276 if (devno == 0)
1277 return -ENODEV;
1278
c77720b9
MCC
1279 i7core_printk(KERN_ERR,
1280 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1281 dev_descr->dev, dev_descr->func,
1282 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
67166af4 1283
c77720b9
MCC
1284 /* End of list, leave */
1285 return -ENODEV;
1286 }
1287 bus = pdev->bus->number;
67166af4 1288
c77720b9
MCC
1289 if (bus == 0x3f)
1290 socket = 0;
1291 else
1292 socket = 255 - bus;
1293
66607706
MCC
1294 i7core_dev = get_i7core_dev(socket);
1295 if (!i7core_dev) {
1296 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1297 if (!i7core_dev)
1298 return -ENOMEM;
de06eeef 1299 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
66607706 1300 GFP_KERNEL);
2a6fae32
AB
1301 if (!i7core_dev->pdev) {
1302 kfree(i7core_dev);
66607706 1303 return -ENOMEM;
2a6fae32 1304 }
66607706 1305 i7core_dev->socket = socket;
de06eeef 1306 i7core_dev->n_devs = n_devs;
66607706 1307 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1308 }
67166af4 1309
66607706 1310 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1311 i7core_printk(KERN_ERR,
1312 "Duplicated device for "
1313 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1314 bus, dev_descr->dev, dev_descr->func,
1315 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1316 pci_dev_put(pdev);
1317 return -ENODEV;
1318 }
67166af4 1319
66607706 1320 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1321
1322 /* Sanity check */
de06eeef
MCC
1323 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1324 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
c77720b9
MCC
1325 i7core_printk(KERN_ERR,
1326 "Device PCI ID %04x:%04x "
1327 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
de06eeef 1328 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
c77720b9 1329 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
de06eeef 1330 bus, dev_descr->dev, dev_descr->func);
c77720b9
MCC
1331 return -ENODEV;
1332 }
ef708b53 1333
c77720b9
MCC
1334 /* Be sure that the device is enabled */
1335 if (unlikely(pci_enable_device(pdev) < 0)) {
1336 i7core_printk(KERN_ERR,
1337 "Couldn't enable "
1338 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1339 bus, dev_descr->dev, dev_descr->func,
1340 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1341 return -ENODEV;
1342 }
ef708b53 1343
d4c27795 1344 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1345 socket, bus, dev_descr->dev,
1346 dev_descr->func,
1347 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
8f331907 1348
c77720b9 1349 *prev = pdev;
ef708b53 1350
c77720b9
MCC
1351 return 0;
1352}
a0c36a1f 1353
bd9e19ca 1354static int i7core_get_devices(struct pci_id_table *table)
c77720b9 1355{
de06eeef 1356 int i, rc;
c77720b9 1357 struct pci_dev *pdev = NULL;
bd9e19ca
VM
1358 struct pci_id_descr *dev_descr;
1359
1360 while (table && table->descr) {
1361 dev_descr = table->descr;
1362 for (i = 0; i < table->n_devs; i++) {
1363 pdev = NULL;
1364 do {
1365 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1366 table->n_devs);
1367 if (rc < 0) {
1368 if (i == 0) {
1369 i = table->n_devs;
1370 break;
1371 }
1372 i7core_put_all_devices();
1373 return -ENODEV;
1374 }
1375 } while (pdev);
1376 }
1377 table++;
c77720b9 1378 }
66607706 1379
ef708b53 1380 return 0;
bd9e19ca 1381 return 0;
ef708b53
MCC
1382}
1383
f4742949
MCC
1384static int mci_bind_devs(struct mem_ctl_info *mci,
1385 struct i7core_dev *i7core_dev)
ef708b53
MCC
1386{
1387 struct i7core_pvt *pvt = mci->pvt_info;
1388 struct pci_dev *pdev;
f4742949 1389 int i, func, slot;
ef708b53 1390
f4742949
MCC
1391 /* Associates i7core_dev and mci for future usage */
1392 pvt->i7core_dev = i7core_dev;
1393 i7core_dev->mci = mci;
66607706 1394
f4742949 1395 pvt->is_registered = 0;
de06eeef 1396 for (i = 0; i < i7core_dev->n_devs; i++) {
f4742949
MCC
1397 pdev = i7core_dev->pdev[i];
1398 if (!pdev)
66607706
MCC
1399 continue;
1400
f4742949
MCC
1401 func = PCI_FUNC(pdev->devfn);
1402 slot = PCI_SLOT(pdev->devfn);
1403 if (slot == 3) {
1404 if (unlikely(func > MAX_MCR_FUNC))
1405 goto error;
1406 pvt->pci_mcr[func] = pdev;
1407 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1408 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1409 goto error;
f4742949
MCC
1410 pvt->pci_ch[slot - 4][func] = pdev;
1411 } else if (!slot && !func)
1412 pvt->pci_noncore = pdev;
1413 else
1414 goto error;
ef708b53 1415
f4742949
MCC
1416 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1417 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1418 pdev, i7core_dev->socket);
14d2c083 1419
f4742949
MCC
1420 if (PCI_SLOT(pdev->devfn) == 3 &&
1421 PCI_FUNC(pdev->devfn) == 2)
1422 pvt->is_registered = 1;
a0c36a1f 1423 }
e9bd2e73 1424
f338d736
MCC
1425 /*
1426 * Add extra nodes to count errors on udimm
1427 * For registered memory, this is not needed, since the counters
1428 * are already displayed at the standard locations
1429 */
1430 if (!pvt->is_registered)
1431 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1432 &i7core_udimm_counters;
1433
a0c36a1f 1434 return 0;
ef708b53
MCC
1435
1436error:
1437 i7core_printk(KERN_ERR, "Device %d, function %d "
1438 "is out of the expected range\n",
1439 slot, func);
1440 return -EINVAL;
a0c36a1f
MCC
1441}
1442
442305b1
MCC
1443/****************************************************************************
1444 Error check routines
1445 ****************************************************************************/
f4742949 1446static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1447 int chan, int dimm, int add)
1448{
1449 char *msg;
1450 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1451 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1452
1453 for (i = 0; i < add; i++) {
1454 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1455 "(Socket=%d channel=%d dimm=%d)",
1456 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1457
1458 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1459 kfree (msg);
1460 }
1461}
1462
1463static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1464 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1465{
1466 struct i7core_pvt *pvt = mci->pvt_info;
1467 int add0 = 0, add1 = 0, add2 = 0;
1468 /* Updates CE counters if it is not the first time here */
f4742949 1469 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1470 /* Updates CE counters */
1471
f4742949
MCC
1472 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1473 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1474 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1475
1476 if (add2 < 0)
1477 add2 += 0x7fff;
f4742949 1478 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1479
1480 if (add1 < 0)
1481 add1 += 0x7fff;
f4742949 1482 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1483
1484 if (add0 < 0)
1485 add0 += 0x7fff;
f4742949 1486 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1487 } else
f4742949 1488 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1489
1490 /* Store the new values */
f4742949
MCC
1491 pvt->rdimm_last_ce_count[chan][2] = new2;
1492 pvt->rdimm_last_ce_count[chan][1] = new1;
1493 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1494
1495 /*updated the edac core */
1496 if (add0 != 0)
f4742949 1497 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1498 if (add1 != 0)
f4742949 1499 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1500 if (add2 != 0)
f4742949 1501 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1502
1503}
1504
f4742949 1505static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1506{
1507 struct i7core_pvt *pvt = mci->pvt_info;
1508 u32 rcv[3][2];
1509 int i, new0, new1, new2;
1510
1511 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1512 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1513 &rcv[0][0]);
f4742949 1514 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1515 &rcv[0][1]);
f4742949 1516 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1517 &rcv[1][0]);
f4742949 1518 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1519 &rcv[1][1]);
f4742949 1520 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1521 &rcv[2][0]);
f4742949 1522 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1523 &rcv[2][1]);
1524 for (i = 0 ; i < 3; i++) {
1525 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1526 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1527 /*if the channel has 3 dimms*/
f4742949 1528 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1529 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1530 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1531 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1532 } else {
1533 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1534 DIMM_BOT_COR_ERR(rcv[i][0]);
1535 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1536 DIMM_BOT_COR_ERR(rcv[i][1]);
1537 new2 = 0;
1538 }
1539
f4742949 1540 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1541 }
1542}
442305b1
MCC
1543
1544/* This function is based on the device 3 function 4 registers as described on:
1545 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1546 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1547 * also available at:
1548 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1549 */
f4742949 1550static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1551{
1552 struct i7core_pvt *pvt = mci->pvt_info;
1553 u32 rcv1, rcv0;
1554 int new0, new1, new2;
1555
f4742949 1556 if (!pvt->pci_mcr[4]) {
b990538a 1557 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1558 return;
1559 }
1560
b4e8f0b6 1561 /* Corrected test errors */
f4742949
MCC
1562 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1563 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1564
1565 /* Store the new values */
1566 new2 = DIMM2_COR_ERR(rcv1);
1567 new1 = DIMM1_COR_ERR(rcv0);
1568 new0 = DIMM0_COR_ERR(rcv0);
1569
442305b1 1570 /* Updates CE counters if it is not the first time here */
f4742949 1571 if (pvt->ce_count_available) {
442305b1
MCC
1572 /* Updates CE counters */
1573 int add0, add1, add2;
1574
f4742949
MCC
1575 add2 = new2 - pvt->udimm_last_ce_count[2];
1576 add1 = new1 - pvt->udimm_last_ce_count[1];
1577 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1578
1579 if (add2 < 0)
1580 add2 += 0x7fff;
f4742949 1581 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1582
1583 if (add1 < 0)
1584 add1 += 0x7fff;
f4742949 1585 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1586
1587 if (add0 < 0)
1588 add0 += 0x7fff;
f4742949 1589 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1590
1591 if (add0 | add1 | add2)
1592 i7core_printk(KERN_ERR, "New Corrected error(s): "
1593 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1594 add0, add1, add2);
442305b1 1595 } else
f4742949 1596 pvt->ce_count_available = 1;
442305b1
MCC
1597
1598 /* Store the new values */
f4742949
MCC
1599 pvt->udimm_last_ce_count[2] = new2;
1600 pvt->udimm_last_ce_count[1] = new1;
1601 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1602}
1603
8a2f118e
MCC
1604/*
1605 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1606 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1607 * Nehalem are defined as family 0x06, model 0x1a
1608 *
1609 * The MCA registers used here are the following ones:
8a2f118e 1610 * struct mce field MCA Register
f237fcf2
MCC
1611 * m->status MSR_IA32_MC8_STATUS
1612 * m->addr MSR_IA32_MC8_ADDR
1613 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1614 * In the case of Nehalem, the error information is masked at .status and .misc
1615 * fields
1616 */
d5381642
MCC
1617static void i7core_mce_output_error(struct mem_ctl_info *mci,
1618 struct mce *m)
1619{
b4e8f0b6 1620 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1621 char *type, *optype, *err, *msg;
8a2f118e 1622 unsigned long error = m->status & 0x1ff0000l;
a639539f 1623 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1624 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1625 u32 dimm = (m->misc >> 16) & 0x3;
1626 u32 channel = (m->misc >> 18) & 0x3;
1627 u32 syndrome = m->misc >> 32;
1628 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1629 int csrow;
8a2f118e 1630
c5d34528
MCC
1631 if (m->mcgstatus & 1)
1632 type = "FATAL";
1633 else
1634 type = "NON_FATAL";
1635
a639539f 1636 switch (optypenum) {
b990538a
MCC
1637 case 0:
1638 optype = "generic undef request";
1639 break;
1640 case 1:
1641 optype = "read error";
1642 break;
1643 case 2:
1644 optype = "write error";
1645 break;
1646 case 3:
1647 optype = "addr/cmd error";
1648 break;
1649 case 4:
1650 optype = "scrubbing error";
1651 break;
1652 default:
1653 optype = "reserved";
1654 break;
a639539f
MCC
1655 }
1656
8a2f118e
MCC
1657 switch (errnum) {
1658 case 16:
1659 err = "read ECC error";
1660 break;
1661 case 17:
1662 err = "RAS ECC error";
1663 break;
1664 case 18:
1665 err = "write parity error";
1666 break;
1667 case 19:
1668 err = "redundacy loss";
1669 break;
1670 case 20:
1671 err = "reserved";
1672 break;
1673 case 21:
1674 err = "memory range error";
1675 break;
1676 case 22:
1677 err = "RTID out of range";
1678 break;
1679 case 23:
1680 err = "address parity error";
1681 break;
1682 case 24:
1683 err = "byte enable parity error";
1684 break;
1685 default:
1686 err = "unknown";
d5381642 1687 }
d5381642 1688
f237fcf2 1689 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1690 msg = kasprintf(GFP_ATOMIC,
f4742949 1691 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1692 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1693 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1694 syndrome, core_err_cnt, (long long)m->status,
1695 (long long)m->misc, optype, err);
8a2f118e
MCC
1696
1697 debugf0("%s", msg);
d5381642 1698
f4742949 1699 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1700
d5381642 1701 /* Call the helper to output message */
b4e8f0b6
MCC
1702 if (m->mcgstatus & 1)
1703 edac_mc_handle_fbd_ue(mci, csrow, 0,
1704 0 /* FIXME: should be channel here */, msg);
f4742949 1705 else if (!pvt->is_registered)
b4e8f0b6
MCC
1706 edac_mc_handle_fbd_ce(mci, csrow,
1707 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1708
1709 kfree(msg);
d5381642
MCC
1710}
1711
87d1d272
MCC
1712/*
1713 * i7core_check_error Retrieve and process errors reported by the
1714 * hardware. Called by the Core module.
1715 */
1716static void i7core_check_error(struct mem_ctl_info *mci)
1717{
d5381642
MCC
1718 struct i7core_pvt *pvt = mci->pvt_info;
1719 int i;
1720 unsigned count = 0;
ca9c90ba 1721 struct mce *m;
d5381642 1722
ca9c90ba
MCC
1723 /*
1724 * MCE first step: Copy all mce errors into a temporary buffer
1725 * We use a double buffering here, to reduce the risk of
1726 * loosing an error.
1727 */
1728 smp_rmb();
321ece4d
MCC
1729 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1730 % MCE_LOG_LEN;
ca9c90ba 1731 if (!count)
8a311e17 1732 goto check_ce_error;
f4742949 1733
ca9c90ba 1734 m = pvt->mce_outentry;
321ece4d
MCC
1735 if (pvt->mce_in + count > MCE_LOG_LEN) {
1736 unsigned l = MCE_LOG_LEN - pvt->mce_in;
f4742949 1737
ca9c90ba
MCC
1738 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1739 smp_wmb();
1740 pvt->mce_in = 0;
1741 count -= l;
1742 m += l;
1743 }
1744 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1745 smp_wmb();
1746 pvt->mce_in += count;
1747
1748 smp_rmb();
1749 if (pvt->mce_overrun) {
1750 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1751 pvt->mce_overrun);
1752 smp_wmb();
1753 pvt->mce_overrun = 0;
1754 }
d5381642 1755
ca9c90ba
MCC
1756 /*
1757 * MCE second step: parse errors and display
1758 */
d5381642 1759 for (i = 0; i < count; i++)
ca9c90ba 1760 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1761
ca9c90ba
MCC
1762 /*
1763 * Now, let's increment CE error counts
1764 */
8a311e17 1765check_ce_error:
f4742949
MCC
1766 if (!pvt->is_registered)
1767 i7core_udimm_check_mc_ecc_err(mci);
1768 else
1769 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1770}
1771
d5381642
MCC
1772/*
1773 * i7core_mce_check_error Replicates mcelog routine to get errors
1774 * This routine simply queues mcelog errors, and
1775 * return. The error itself should be handled later
1776 * by i7core_check_error.
6e103be1
MCC
1777 * WARNING: As this routine should be called at NMI time, extra care should
1778 * be taken to avoid deadlocks, and to be as fast as possible.
d5381642
MCC
1779 */
1780static int i7core_mce_check_error(void *priv, struct mce *mce)
1781{
c5d34528
MCC
1782 struct mem_ctl_info *mci = priv;
1783 struct i7core_pvt *pvt = mci->pvt_info;
d5381642 1784
8a2f118e
MCC
1785 /*
1786 * Just let mcelog handle it if the error is
1787 * outside the memory controller
1788 */
1789 if (((mce->status & 0xffff) >> 7) != 1)
1790 return 0;
1791
f237fcf2
MCC
1792 /* Bank 8 registers are the only ones that we know how to handle */
1793 if (mce->bank != 8)
1794 return 0;
1795
3b918c12 1796#ifdef CONFIG_SMP
f4742949 1797 /* Only handle if it is the right mc controller */
6e103be1 1798 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
f4742949 1799 return 0;
3b918c12 1800#endif
f4742949 1801
ca9c90ba 1802 smp_rmb();
321ece4d 1803 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
ca9c90ba
MCC
1804 smp_wmb();
1805 pvt->mce_overrun++;
1806 return 0;
d5381642 1807 }
6e103be1
MCC
1808
1809 /* Copy memory error at the ringbuffer */
1810 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
ca9c90ba 1811 smp_wmb();
321ece4d 1812 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
d5381642 1813
c5d34528
MCC
1814 /* Handle fatal errors immediately */
1815 if (mce->mcgstatus & 1)
1816 i7core_check_error(mci);
1817
d5381642 1818 /* Advice mcelog that the error were handled */
8a2f118e 1819 return 1;
d5381642
MCC
1820}
1821
f4742949
MCC
1822static int i7core_register_mci(struct i7core_dev *i7core_dev,
1823 int num_channels, int num_csrows)
a0c36a1f
MCC
1824{
1825 struct mem_ctl_info *mci;
1826 struct i7core_pvt *pvt;
ba6c5c62 1827 int csrow = 0;
f4742949 1828 int rc;
a0c36a1f 1829
a0c36a1f 1830 /* allocate a new MC control structure */
d4c27795
MCC
1831 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1832 i7core_dev->socket);
f4742949
MCC
1833 if (unlikely(!mci))
1834 return -ENOMEM;
a0c36a1f
MCC
1835
1836 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1837
f4742949
MCC
1838 /* record ptr to the generic device */
1839 mci->dev = &i7core_dev->pdev[0]->dev;
1840
a0c36a1f 1841 pvt = mci->pvt_info;
ef708b53 1842 memset(pvt, 0, sizeof(*pvt));
67166af4 1843
41fcb7fe
MCC
1844 /*
1845 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1846 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1847 * memory channels
1848 */
1849 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1850 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1851 mci->edac_cap = EDAC_FLAG_NONE;
1852 mci->mod_name = "i7core_edac.c";
1853 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1854 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1855 i7core_dev->socket);
1856 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1857 mci->ctl_page_to_phys = NULL;
a5538e53 1858 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
87d1d272
MCC
1859 /* Set the function pointer to an actual operation function */
1860 mci->edac_check = i7core_check_error;
8f331907 1861
ef708b53 1862 /* Store pci devices at mci for faster access */
f4742949 1863 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1864 if (unlikely(rc < 0))
f4742949 1865 goto fail;
ef708b53
MCC
1866
1867 /* Get dimm basic config */
f4742949 1868 get_dimm_config(mci, &csrow);
ef708b53 1869
a0c36a1f 1870 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1871 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1872 debugf0("MC: " __FILE__
1873 ": %s(): failed edac_mc_add_mc()\n", __func__);
1874 /* FIXME: perhaps some code should go here that disables error
1875 * reporting if we just enabled it
1876 */
b7c76151
MCC
1877
1878 rc = -EINVAL;
f4742949 1879 goto fail;
a0c36a1f
MCC
1880 }
1881
1882 /* allocating generic PCI control info */
f4742949
MCC
1883 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1884 EDAC_MOD_STR);
41fcb7fe 1885 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1886 printk(KERN_WARNING
1887 "%s(): Unable to create PCI control\n",
1888 __func__);
1889 printk(KERN_WARNING
1890 "%s(): PCI error report via EDAC not setup\n",
1891 __func__);
1892 }
1893
194a40fe 1894 /* Default error mask is any memory */
ef708b53 1895 pvt->inject.channel = 0;
194a40fe
MCC
1896 pvt->inject.dimm = -1;
1897 pvt->inject.rank = -1;
1898 pvt->inject.bank = -1;
1899 pvt->inject.page = -1;
1900 pvt->inject.col = -1;
1901
d5381642 1902 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1903 pvt->edac_mce.priv = mci;
d5381642 1904 pvt->edac_mce.check_error = i7core_mce_check_error;
d5381642
MCC
1905
1906 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1907 if (unlikely(rc < 0)) {
d5381642
MCC
1908 debugf0("MC: " __FILE__
1909 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1910 }
1911
1912fail:
d4d1ef45
TL
1913 if (rc < 0)
1914 edac_mc_free(mci);
f4742949
MCC
1915 return rc;
1916}
1917
1918/*
1919 * i7core_probe Probe for ONE instance of device to see if it is
1920 * present.
1921 * return:
1922 * 0 for FOUND a device
1923 * < 0 for error code
1924 */
1925static int __devinit i7core_probe(struct pci_dev *pdev,
1926 const struct pci_device_id *id)
1927{
1928 int dev_idx = id->driver_data;
1929 int rc;
1930 struct i7core_dev *i7core_dev;
1931
1932 /*
d4c27795 1933 * All memory controllers are allocated at the first pass.
f4742949
MCC
1934 */
1935 if (unlikely(dev_idx >= 1))
1936 return -EINVAL;
1937
1938 /* get the pci devices we want to reserve for our use */
1939 mutex_lock(&i7core_edac_lock);
de06eeef 1940
bd9e19ca 1941 rc = i7core_get_devices(pci_dev_table);
f4742949
MCC
1942 if (unlikely(rc < 0))
1943 goto fail0;
1944
1945 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1946 int channels;
1947 int csrows;
1948
1949 /* Check the number of active and not disabled channels */
1950 rc = i7core_get_active_channels(i7core_dev->socket,
1951 &channels, &csrows);
1952 if (unlikely(rc < 0))
1953 goto fail1;
1954
d4c27795
MCC
1955 rc = i7core_register_mci(i7core_dev, channels, csrows);
1956 if (unlikely(rc < 0))
1957 goto fail1;
d5381642
MCC
1958 }
1959
ef708b53 1960 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1961
66607706 1962 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1963 return 0;
1964
66607706 1965fail1:
13d6e9b6 1966 i7core_put_all_devices();
66607706
MCC
1967fail0:
1968 mutex_unlock(&i7core_edac_lock);
b7c76151 1969 return rc;
a0c36a1f
MCC
1970}
1971
1972/*
1973 * i7core_remove destructor for one instance of device
1974 *
1975 */
1976static void __devexit i7core_remove(struct pci_dev *pdev)
1977{
1978 struct mem_ctl_info *mci;
22e6bcbd 1979 struct i7core_dev *i7core_dev, *tmp;
a0c36a1f
MCC
1980
1981 debugf0(__FILE__ ": %s()\n", __func__);
1982
1983 if (i7core_pci)
1984 edac_pci_release_generic_ctl(i7core_pci);
1985
22e6bcbd
MCC
1986 /*
1987 * we have a trouble here: pdev value for removal will be wrong, since
1988 * it will point to the X58 register used to detect that the machine
1989 * is a Nehalem or upper design. However, due to the way several PCI
1990 * devices are grouped together to provide MC functionality, we need
1991 * to use a different method for releasing the devices
1992 */
87d1d272 1993
66607706 1994 mutex_lock(&i7core_edac_lock);
22e6bcbd
MCC
1995 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1996 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1997 if (mci) {
1998 struct i7core_pvt *pvt = mci->pvt_info;
1999
2000 i7core_dev = pvt->i7core_dev;
2001 edac_mce_unregister(&pvt->edac_mce);
2002 kfree(mci->ctl_name);
2003 edac_mc_free(mci);
2004 i7core_put_devices(i7core_dev);
2005 } else {
2006 i7core_printk(KERN_ERR,
2007 "Couldn't find mci for socket %d\n",
2008 i7core_dev->socket);
2009 }
2010 }
66607706 2011 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
2012}
2013
a0c36a1f
MCC
2014MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2015
2016/*
2017 * i7core_driver pci_driver structure for this module
2018 *
2019 */
2020static struct pci_driver i7core_driver = {
2021 .name = "i7core_edac",
2022 .probe = i7core_probe,
2023 .remove = __devexit_p(i7core_remove),
2024 .id_table = i7core_pci_tbl,
2025};
2026
2027/*
2028 * i7core_init Module entry function
2029 * Try to initialize this module for its devices
2030 */
2031static int __init i7core_init(void)
2032{
2033 int pci_rc;
2034
2035 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2036
2037 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2038 opstate_init();
2039
bd9e19ca 2040 i7core_xeon_pci_fixup(pci_dev_table);
bc2d7245 2041
a0c36a1f
MCC
2042 pci_rc = pci_register_driver(&i7core_driver);
2043
3ef288a9
MCC
2044 if (pci_rc >= 0)
2045 return 0;
2046
2047 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2048 pci_rc);
2049
2050 return pci_rc;
a0c36a1f
MCC
2051}
2052
2053/*
2054 * i7core_exit() Module exit function
2055 * Unregister the driver
2056 */
2057static void __exit i7core_exit(void)
2058{
2059 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2060 pci_unregister_driver(&i7core_driver);
2061}
2062
2063module_init(i7core_init);
2064module_exit(i7core_exit);
2065
2066MODULE_LICENSE("GPL");
2067MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2068MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2069MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2070 I7CORE_REVISION);
2071
2072module_param(edac_op_state, int, 0444);
2073MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");