]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/kernel/cpu/intel_cacheinfo.c
x86/process: Allow runtime control of Speculative Store Bypass
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / intel_cacheinfo.c
1 /*
2 * Routines to identify caches on Intel CPU.
3 *
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
9
10 #include <linux/slab.h>
11 #include <linux/cacheinfo.h>
12 #include <linux/cpu.h>
13 #include <linux/sched.h>
14 #include <linux/capability.h>
15 #include <linux/sysfs.h>
16 #include <linux/pci.h>
17
18 #include <asm/cpufeature.h>
19 #include <asm/amd_nb.h>
20 #include <asm/smp.h>
21
22 #define LVL_1_INST 1
23 #define LVL_1_DATA 2
24 #define LVL_2 3
25 #define LVL_3 4
26 #define LVL_TRACE 5
27
28 struct _cache_table {
29 unsigned char descriptor;
30 char cache_type;
31 short size;
32 };
33
34 #define MB(x) ((x) * 1024)
35
36 /* All the cache descriptor types we care about (no TLB or
37 trace cache entries) */
38
39 static const struct _cache_table cache_table[] =
40 {
41 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
42 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
43 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
44 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
45 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
46 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
47 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
56 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
57 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
58 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
59 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
60 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
61 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
69 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
70 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
71 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
72 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
73 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
74 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
75 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
76 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
77 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
78 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
80 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
81 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
82 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
83 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
84 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
85 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
86 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
89 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
90 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
91 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
92 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
93 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
94 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
95 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
96 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
97 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
98 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
99 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
100 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
101 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
102 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
103 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
104 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
105 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
106 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
107 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
108 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
109 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
110 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
111 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
112 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
113 { 0x00, 0, 0}
114 };
115
116
117 enum _cache_type {
118 CTYPE_NULL = 0,
119 CTYPE_DATA = 1,
120 CTYPE_INST = 2,
121 CTYPE_UNIFIED = 3
122 };
123
124 union _cpuid4_leaf_eax {
125 struct {
126 enum _cache_type type:5;
127 unsigned int level:3;
128 unsigned int is_self_initializing:1;
129 unsigned int is_fully_associative:1;
130 unsigned int reserved:4;
131 unsigned int num_threads_sharing:12;
132 unsigned int num_cores_on_die:6;
133 } split;
134 u32 full;
135 };
136
137 union _cpuid4_leaf_ebx {
138 struct {
139 unsigned int coherency_line_size:12;
140 unsigned int physical_line_partition:10;
141 unsigned int ways_of_associativity:10;
142 } split;
143 u32 full;
144 };
145
146 union _cpuid4_leaf_ecx {
147 struct {
148 unsigned int number_of_sets:32;
149 } split;
150 u32 full;
151 };
152
153 struct _cpuid4_info_regs {
154 union _cpuid4_leaf_eax eax;
155 union _cpuid4_leaf_ebx ebx;
156 union _cpuid4_leaf_ecx ecx;
157 unsigned int id;
158 unsigned long size;
159 struct amd_northbridge *nb;
160 };
161
162 static unsigned short num_cache_leaves;
163
164 /* AMD doesn't have CPUID4. Emulate it here to report the same
165 information to the user. This makes some assumptions about the machine:
166 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
167
168 In theory the TLBs could be reported as fake type (they are in "dummy").
169 Maybe later */
170 union l1_cache {
171 struct {
172 unsigned line_size:8;
173 unsigned lines_per_tag:8;
174 unsigned assoc:8;
175 unsigned size_in_kb:8;
176 };
177 unsigned val;
178 };
179
180 union l2_cache {
181 struct {
182 unsigned line_size:8;
183 unsigned lines_per_tag:4;
184 unsigned assoc:4;
185 unsigned size_in_kb:16;
186 };
187 unsigned val;
188 };
189
190 union l3_cache {
191 struct {
192 unsigned line_size:8;
193 unsigned lines_per_tag:4;
194 unsigned assoc:4;
195 unsigned res:2;
196 unsigned size_encoded:14;
197 };
198 unsigned val;
199 };
200
201 static const unsigned short assocs[] = {
202 [1] = 1,
203 [2] = 2,
204 [4] = 4,
205 [6] = 8,
206 [8] = 16,
207 [0xa] = 32,
208 [0xb] = 48,
209 [0xc] = 64,
210 [0xd] = 96,
211 [0xe] = 128,
212 [0xf] = 0xffff /* fully associative - no way to show this currently */
213 };
214
215 static const unsigned char levels[] = { 1, 1, 2, 3 };
216 static const unsigned char types[] = { 1, 2, 3, 3 };
217
218 static const enum cache_type cache_type_map[] = {
219 [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
220 [CTYPE_DATA] = CACHE_TYPE_DATA,
221 [CTYPE_INST] = CACHE_TYPE_INST,
222 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
223 };
224
225 static void
226 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
227 union _cpuid4_leaf_ebx *ebx,
228 union _cpuid4_leaf_ecx *ecx)
229 {
230 unsigned dummy;
231 unsigned line_size, lines_per_tag, assoc, size_in_kb;
232 union l1_cache l1i, l1d;
233 union l2_cache l2;
234 union l3_cache l3;
235 union l1_cache *l1 = &l1d;
236
237 eax->full = 0;
238 ebx->full = 0;
239 ecx->full = 0;
240
241 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
242 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
243
244 switch (leaf) {
245 case 1:
246 l1 = &l1i;
247 case 0:
248 if (!l1->val)
249 return;
250 assoc = assocs[l1->assoc];
251 line_size = l1->line_size;
252 lines_per_tag = l1->lines_per_tag;
253 size_in_kb = l1->size_in_kb;
254 break;
255 case 2:
256 if (!l2.val)
257 return;
258 assoc = assocs[l2.assoc];
259 line_size = l2.line_size;
260 lines_per_tag = l2.lines_per_tag;
261 /* cpu_data has errata corrections for K7 applied */
262 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
263 break;
264 case 3:
265 if (!l3.val)
266 return;
267 assoc = assocs[l3.assoc];
268 line_size = l3.line_size;
269 lines_per_tag = l3.lines_per_tag;
270 size_in_kb = l3.size_encoded * 512;
271 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
272 size_in_kb = size_in_kb >> 1;
273 assoc = assoc >> 1;
274 }
275 break;
276 default:
277 return;
278 }
279
280 eax->split.is_self_initializing = 1;
281 eax->split.type = types[leaf];
282 eax->split.level = levels[leaf];
283 eax->split.num_threads_sharing = 0;
284 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
285
286
287 if (assoc == 0xffff)
288 eax->split.is_fully_associative = 1;
289 ebx->split.coherency_line_size = line_size - 1;
290 ebx->split.ways_of_associativity = assoc - 1;
291 ebx->split.physical_line_partition = lines_per_tag - 1;
292 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
293 (ebx->split.ways_of_associativity + 1) - 1;
294 }
295
296 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
297
298 /*
299 * L3 cache descriptors
300 */
301 static void amd_calc_l3_indices(struct amd_northbridge *nb)
302 {
303 struct amd_l3_cache *l3 = &nb->l3_cache;
304 unsigned int sc0, sc1, sc2, sc3;
305 u32 val = 0;
306
307 pci_read_config_dword(nb->misc, 0x1C4, &val);
308
309 /* calculate subcache sizes */
310 l3->subcaches[0] = sc0 = !(val & BIT(0));
311 l3->subcaches[1] = sc1 = !(val & BIT(4));
312
313 if (boot_cpu_data.x86 == 0x15) {
314 l3->subcaches[0] = sc0 += !(val & BIT(1));
315 l3->subcaches[1] = sc1 += !(val & BIT(5));
316 }
317
318 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
319 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
320
321 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
322 }
323
324 /*
325 * check whether a slot used for disabling an L3 index is occupied.
326 * @l3: L3 cache descriptor
327 * @slot: slot number (0..1)
328 *
329 * @returns: the disabled index if used or negative value if slot free.
330 */
331 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
332 {
333 unsigned int reg = 0;
334
335 pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
336
337 /* check whether this slot is activated already */
338 if (reg & (3UL << 30))
339 return reg & 0xfff;
340
341 return -1;
342 }
343
344 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
345 unsigned int slot)
346 {
347 int index;
348 struct amd_northbridge *nb = this_leaf->priv;
349
350 index = amd_get_l3_disable_slot(nb, slot);
351 if (index >= 0)
352 return sprintf(buf, "%d\n", index);
353
354 return sprintf(buf, "FREE\n");
355 }
356
357 #define SHOW_CACHE_DISABLE(slot) \
358 static ssize_t \
359 cache_disable_##slot##_show(struct device *dev, \
360 struct device_attribute *attr, char *buf) \
361 { \
362 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
363 return show_cache_disable(this_leaf, buf, slot); \
364 }
365 SHOW_CACHE_DISABLE(0)
366 SHOW_CACHE_DISABLE(1)
367
368 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
369 unsigned slot, unsigned long idx)
370 {
371 int i;
372
373 idx |= BIT(30);
374
375 /*
376 * disable index in all 4 subcaches
377 */
378 for (i = 0; i < 4; i++) {
379 u32 reg = idx | (i << 20);
380
381 if (!nb->l3_cache.subcaches[i])
382 continue;
383
384 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
385
386 /*
387 * We need to WBINVD on a core on the node containing the L3
388 * cache which indices we disable therefore a simple wbinvd()
389 * is not sufficient.
390 */
391 wbinvd_on_cpu(cpu);
392
393 reg |= BIT(31);
394 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
395 }
396 }
397
398 /*
399 * disable a L3 cache index by using a disable-slot
400 *
401 * @l3: L3 cache descriptor
402 * @cpu: A CPU on the node containing the L3 cache
403 * @slot: slot number (0..1)
404 * @index: index to disable
405 *
406 * @return: 0 on success, error status on failure
407 */
408 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
409 unsigned slot, unsigned long index)
410 {
411 int ret = 0;
412
413 /* check if @slot is already used or the index is already disabled */
414 ret = amd_get_l3_disable_slot(nb, slot);
415 if (ret >= 0)
416 return -EEXIST;
417
418 if (index > nb->l3_cache.indices)
419 return -EINVAL;
420
421 /* check whether the other slot has disabled the same index already */
422 if (index == amd_get_l3_disable_slot(nb, !slot))
423 return -EEXIST;
424
425 amd_l3_disable_index(nb, cpu, slot, index);
426
427 return 0;
428 }
429
430 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
431 const char *buf, size_t count,
432 unsigned int slot)
433 {
434 unsigned long val = 0;
435 int cpu, err = 0;
436 struct amd_northbridge *nb = this_leaf->priv;
437
438 if (!capable(CAP_SYS_ADMIN))
439 return -EPERM;
440
441 cpu = cpumask_first(&this_leaf->shared_cpu_map);
442
443 if (kstrtoul(buf, 10, &val) < 0)
444 return -EINVAL;
445
446 err = amd_set_l3_disable_slot(nb, cpu, slot, val);
447 if (err) {
448 if (err == -EEXIST)
449 pr_warn("L3 slot %d in use/index already disabled!\n",
450 slot);
451 return err;
452 }
453 return count;
454 }
455
456 #define STORE_CACHE_DISABLE(slot) \
457 static ssize_t \
458 cache_disable_##slot##_store(struct device *dev, \
459 struct device_attribute *attr, \
460 const char *buf, size_t count) \
461 { \
462 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
463 return store_cache_disable(this_leaf, buf, count, slot); \
464 }
465 STORE_CACHE_DISABLE(0)
466 STORE_CACHE_DISABLE(1)
467
468 static ssize_t subcaches_show(struct device *dev,
469 struct device_attribute *attr, char *buf)
470 {
471 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
472 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
473
474 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
475 }
476
477 static ssize_t subcaches_store(struct device *dev,
478 struct device_attribute *attr,
479 const char *buf, size_t count)
480 {
481 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
482 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
483 unsigned long val;
484
485 if (!capable(CAP_SYS_ADMIN))
486 return -EPERM;
487
488 if (kstrtoul(buf, 16, &val) < 0)
489 return -EINVAL;
490
491 if (amd_set_subcaches(cpu, val))
492 return -EINVAL;
493
494 return count;
495 }
496
497 static DEVICE_ATTR_RW(cache_disable_0);
498 static DEVICE_ATTR_RW(cache_disable_1);
499 static DEVICE_ATTR_RW(subcaches);
500
501 static umode_t
502 cache_private_attrs_is_visible(struct kobject *kobj,
503 struct attribute *attr, int unused)
504 {
505 struct device *dev = kobj_to_dev(kobj);
506 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
507 umode_t mode = attr->mode;
508
509 if (!this_leaf->priv)
510 return 0;
511
512 if ((attr == &dev_attr_subcaches.attr) &&
513 amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
514 return mode;
515
516 if ((attr == &dev_attr_cache_disable_0.attr ||
517 attr == &dev_attr_cache_disable_1.attr) &&
518 amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
519 return mode;
520
521 return 0;
522 }
523
524 static struct attribute_group cache_private_group = {
525 .is_visible = cache_private_attrs_is_visible,
526 };
527
528 static void init_amd_l3_attrs(void)
529 {
530 int n = 1;
531 static struct attribute **amd_l3_attrs;
532
533 if (amd_l3_attrs) /* already initialized */
534 return;
535
536 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
537 n += 2;
538 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
539 n += 1;
540
541 amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
542 if (!amd_l3_attrs)
543 return;
544
545 n = 0;
546 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
547 amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
548 amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
549 }
550 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
551 amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
552
553 cache_private_group.attrs = amd_l3_attrs;
554 }
555
556 const struct attribute_group *
557 cache_get_priv_group(struct cacheinfo *this_leaf)
558 {
559 struct amd_northbridge *nb = this_leaf->priv;
560
561 if (this_leaf->level < 3 || !nb)
562 return NULL;
563
564 if (nb && nb->l3_cache.indices)
565 init_amd_l3_attrs();
566
567 return &cache_private_group;
568 }
569
570 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
571 {
572 int node;
573
574 /* only for L3, and not in virtualized environments */
575 if (index < 3)
576 return;
577
578 node = amd_get_nb_id(smp_processor_id());
579 this_leaf->nb = node_to_amd_nb(node);
580 if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
581 amd_calc_l3_indices(this_leaf->nb);
582 }
583 #else
584 #define amd_init_l3_cache(x, y)
585 #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
586
587 static int
588 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
589 {
590 union _cpuid4_leaf_eax eax;
591 union _cpuid4_leaf_ebx ebx;
592 union _cpuid4_leaf_ecx ecx;
593 unsigned edx;
594
595 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
596 if (boot_cpu_has(X86_FEATURE_TOPOEXT))
597 cpuid_count(0x8000001d, index, &eax.full,
598 &ebx.full, &ecx.full, &edx);
599 else
600 amd_cpuid4(index, &eax, &ebx, &ecx);
601 amd_init_l3_cache(this_leaf, index);
602 } else {
603 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
604 }
605
606 if (eax.split.type == CTYPE_NULL)
607 return -EIO; /* better error ? */
608
609 this_leaf->eax = eax;
610 this_leaf->ebx = ebx;
611 this_leaf->ecx = ecx;
612 this_leaf->size = (ecx.split.number_of_sets + 1) *
613 (ebx.split.coherency_line_size + 1) *
614 (ebx.split.physical_line_partition + 1) *
615 (ebx.split.ways_of_associativity + 1);
616 return 0;
617 }
618
619 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
620 {
621 unsigned int eax, ebx, ecx, edx, op;
622 union _cpuid4_leaf_eax cache_eax;
623 int i = -1;
624
625 if (c->x86_vendor == X86_VENDOR_AMD)
626 op = 0x8000001d;
627 else
628 op = 4;
629
630 do {
631 ++i;
632 /* Do cpuid(op) loop to find out num_cache_leaves */
633 cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
634 cache_eax.full = eax;
635 } while (cache_eax.split.type != CTYPE_NULL);
636 return i;
637 }
638
639 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
640 {
641
642 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
643 num_cache_leaves = find_num_cache_leaves(c);
644 } else if (c->extended_cpuid_level >= 0x80000006) {
645 if (cpuid_edx(0x80000006) & 0xf000)
646 num_cache_leaves = 4;
647 else
648 num_cache_leaves = 3;
649 }
650 }
651
652 unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
653 {
654 /* Cache sizes */
655 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
656 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
657 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
658 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
659 #ifdef CONFIG_SMP
660 unsigned int cpu = c->cpu_index;
661 #endif
662
663 if (c->cpuid_level > 3) {
664 static int is_initialized;
665
666 if (is_initialized == 0) {
667 /* Init num_cache_leaves from boot CPU */
668 num_cache_leaves = find_num_cache_leaves(c);
669 is_initialized++;
670 }
671
672 /*
673 * Whenever possible use cpuid(4), deterministic cache
674 * parameters cpuid leaf to find the cache details
675 */
676 for (i = 0; i < num_cache_leaves; i++) {
677 struct _cpuid4_info_regs this_leaf = {};
678 int retval;
679
680 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
681 if (retval < 0)
682 continue;
683
684 switch (this_leaf.eax.split.level) {
685 case 1:
686 if (this_leaf.eax.split.type == CTYPE_DATA)
687 new_l1d = this_leaf.size/1024;
688 else if (this_leaf.eax.split.type == CTYPE_INST)
689 new_l1i = this_leaf.size/1024;
690 break;
691 case 2:
692 new_l2 = this_leaf.size/1024;
693 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
694 index_msb = get_count_order(num_threads_sharing);
695 l2_id = c->apicid & ~((1 << index_msb) - 1);
696 break;
697 case 3:
698 new_l3 = this_leaf.size/1024;
699 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
700 index_msb = get_count_order(num_threads_sharing);
701 l3_id = c->apicid & ~((1 << index_msb) - 1);
702 break;
703 default:
704 break;
705 }
706 }
707 }
708 /*
709 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
710 * trace cache
711 */
712 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
713 /* supports eax=2 call */
714 int j, n;
715 unsigned int regs[4];
716 unsigned char *dp = (unsigned char *)regs;
717 int only_trace = 0;
718
719 if (num_cache_leaves != 0 && c->x86 == 15)
720 only_trace = 1;
721
722 /* Number of times to iterate */
723 n = cpuid_eax(2) & 0xFF;
724
725 for (i = 0 ; i < n ; i++) {
726 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
727
728 /* If bit 31 is set, this is an unknown format */
729 for (j = 0 ; j < 3 ; j++)
730 if (regs[j] & (1 << 31))
731 regs[j] = 0;
732
733 /* Byte 0 is level count, not a descriptor */
734 for (j = 1 ; j < 16 ; j++) {
735 unsigned char des = dp[j];
736 unsigned char k = 0;
737
738 /* look up this descriptor in the table */
739 while (cache_table[k].descriptor != 0) {
740 if (cache_table[k].descriptor == des) {
741 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
742 break;
743 switch (cache_table[k].cache_type) {
744 case LVL_1_INST:
745 l1i += cache_table[k].size;
746 break;
747 case LVL_1_DATA:
748 l1d += cache_table[k].size;
749 break;
750 case LVL_2:
751 l2 += cache_table[k].size;
752 break;
753 case LVL_3:
754 l3 += cache_table[k].size;
755 break;
756 case LVL_TRACE:
757 trace += cache_table[k].size;
758 break;
759 }
760
761 break;
762 }
763
764 k++;
765 }
766 }
767 }
768 }
769
770 if (new_l1d)
771 l1d = new_l1d;
772
773 if (new_l1i)
774 l1i = new_l1i;
775
776 if (new_l2) {
777 l2 = new_l2;
778 #ifdef CONFIG_SMP
779 per_cpu(cpu_llc_id, cpu) = l2_id;
780 #endif
781 }
782
783 if (new_l3) {
784 l3 = new_l3;
785 #ifdef CONFIG_SMP
786 per_cpu(cpu_llc_id, cpu) = l3_id;
787 #endif
788 }
789
790 #ifdef CONFIG_SMP
791 /*
792 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
793 * turns means that the only possibility is SMT (as indicated in
794 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
795 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
796 * c->phys_proc_id.
797 */
798 if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
799 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
800 #endif
801
802 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
803
804 return l2;
805 }
806
807 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
808 struct _cpuid4_info_regs *base)
809 {
810 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
811 struct cacheinfo *this_leaf;
812 int i, sibling;
813
814 /*
815 * For L3, always use the pre-calculated cpu_llc_shared_mask
816 * to derive shared_cpu_map.
817 */
818 if (index == 3) {
819 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
820 this_cpu_ci = get_cpu_cacheinfo(i);
821 if (!this_cpu_ci->info_list)
822 continue;
823 this_leaf = this_cpu_ci->info_list + index;
824 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
825 if (!cpu_online(sibling))
826 continue;
827 cpumask_set_cpu(sibling,
828 &this_leaf->shared_cpu_map);
829 }
830 }
831 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
832 unsigned int apicid, nshared, first, last;
833
834 this_leaf = this_cpu_ci->info_list + index;
835 nshared = base->eax.split.num_threads_sharing + 1;
836 apicid = cpu_data(cpu).apicid;
837 first = apicid - (apicid % nshared);
838 last = first + nshared - 1;
839
840 for_each_online_cpu(i) {
841 this_cpu_ci = get_cpu_cacheinfo(i);
842 if (!this_cpu_ci->info_list)
843 continue;
844
845 apicid = cpu_data(i).apicid;
846 if ((apicid < first) || (apicid > last))
847 continue;
848
849 this_leaf = this_cpu_ci->info_list + index;
850
851 for_each_online_cpu(sibling) {
852 apicid = cpu_data(sibling).apicid;
853 if ((apicid < first) || (apicid > last))
854 continue;
855 cpumask_set_cpu(sibling,
856 &this_leaf->shared_cpu_map);
857 }
858 }
859 } else
860 return 0;
861
862 return 1;
863 }
864
865 static void __cache_cpumap_setup(unsigned int cpu, int index,
866 struct _cpuid4_info_regs *base)
867 {
868 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
869 struct cacheinfo *this_leaf, *sibling_leaf;
870 unsigned long num_threads_sharing;
871 int index_msb, i;
872 struct cpuinfo_x86 *c = &cpu_data(cpu);
873
874 if (c->x86_vendor == X86_VENDOR_AMD) {
875 if (__cache_amd_cpumap_setup(cpu, index, base))
876 return;
877 }
878
879 this_leaf = this_cpu_ci->info_list + index;
880 num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
881
882 cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
883 if (num_threads_sharing == 1)
884 return;
885
886 index_msb = get_count_order(num_threads_sharing);
887
888 for_each_online_cpu(i)
889 if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
890 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
891
892 if (i == cpu || !sib_cpu_ci->info_list)
893 continue;/* skip if itself or no cacheinfo */
894 sibling_leaf = sib_cpu_ci->info_list + index;
895 cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
896 cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
897 }
898 }
899
900 static void ci_leaf_init(struct cacheinfo *this_leaf,
901 struct _cpuid4_info_regs *base)
902 {
903 this_leaf->id = base->id;
904 this_leaf->attributes = CACHE_ID;
905 this_leaf->level = base->eax.split.level;
906 this_leaf->type = cache_type_map[base->eax.split.type];
907 this_leaf->coherency_line_size =
908 base->ebx.split.coherency_line_size + 1;
909 this_leaf->ways_of_associativity =
910 base->ebx.split.ways_of_associativity + 1;
911 this_leaf->size = base->size;
912 this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
913 this_leaf->physical_line_partition =
914 base->ebx.split.physical_line_partition + 1;
915 this_leaf->priv = base->nb;
916 }
917
918 static int __init_cache_level(unsigned int cpu)
919 {
920 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
921
922 if (!num_cache_leaves)
923 return -ENOENT;
924 if (!this_cpu_ci)
925 return -EINVAL;
926 this_cpu_ci->num_levels = 3;
927 this_cpu_ci->num_leaves = num_cache_leaves;
928 return 0;
929 }
930
931 /*
932 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
933 * ECX as cache index. Then right shift apicid by the number's order to get
934 * cache id for this cache node.
935 */
936 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
937 {
938 struct cpuinfo_x86 *c = &cpu_data(cpu);
939 unsigned long num_threads_sharing;
940 int index_msb;
941
942 num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
943 index_msb = get_count_order(num_threads_sharing);
944 id4_regs->id = c->apicid >> index_msb;
945 }
946
947 static int __populate_cache_leaves(unsigned int cpu)
948 {
949 unsigned int idx, ret;
950 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
951 struct cacheinfo *this_leaf = this_cpu_ci->info_list;
952 struct _cpuid4_info_regs id4_regs = {};
953
954 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
955 ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
956 if (ret)
957 return ret;
958 get_cache_id(cpu, &id4_regs);
959 ci_leaf_init(this_leaf++, &id4_regs);
960 __cache_cpumap_setup(cpu, idx, &id4_regs);
961 }
962 this_cpu_ci->cpu_map_populated = true;
963
964 return 0;
965 }
966
967 DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
968 DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)