]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | #include <linux/init.h> |
2 | #include <linux/bitops.h> | |
3 | #include <linux/mm.h> | |
8d71a2ea | 4 | |
1da177e4 LT |
5 | #include <asm/io.h> |
6 | #include <asm/processor.h> | |
d3f7eae1 | 7 | #include <asm/apic.h> |
1da177e4 | 8 | |
8d71a2ea YL |
9 | #ifdef CONFIG_X86_64 |
10 | # include <asm/numa_64.h> | |
11 | # include <asm/mmconfig.h> | |
12 | # include <asm/cacheflush.h> | |
13 | #endif | |
14 | ||
dd46e3ca | 15 | #include <mach_apic.h> |
8d71a2ea | 16 | |
1da177e4 LT |
17 | #include "cpu.h" |
18 | ||
6c62aa4a | 19 | #ifdef CONFIG_X86_32 |
1da177e4 LT |
20 | /* |
21 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause | |
22 | * misexecution of code under Linux. Owners of such processors should | |
23 | * contact AMD for precise details and a CPU swap. | |
24 | * | |
25 | * See http://www.multimania.com/poulot/k6bug.html | |
26 | * http://www.amd.com/K6/k6docs/revgd.html | |
27 | * | |
28 | * The following test is erm.. interesting. AMD neglected to up | |
29 | * the chip setting when fixing the bug but they also tweaked some | |
30 | * performance at the same time.. | |
31 | */ | |
fb87a298 | 32 | |
1da177e4 LT |
33 | extern void vide(void); |
34 | __asm__(".align 4\nvide: ret"); | |
35 | ||
11fdd252 YL |
36 | static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) |
37 | { | |
38 | /* | |
39 | * General Systems BIOSen alias the cpu frequency registers | |
40 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux | |
41 | * drivers subsequently pokes it, and changes the CPU speed. | |
42 | * Workaround : Remove the unneeded alias. | |
43 | */ | |
44 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ | |
45 | #define CBAR_ENB (0x80000000) | |
46 | #define CBAR_KEY (0X000000CB) | |
47 | if (c->x86_model == 9 || c->x86_model == 10) { | |
48 | if (inl (CBAR) & CBAR_ENB) | |
49 | outl (0 | CBAR_KEY, CBAR); | |
50 | } | |
51 | } | |
52 | ||
53 | ||
54 | static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | |
55 | { | |
56 | u32 l, h; | |
57 | int mbytes = num_physpages >> (20-PAGE_SHIFT); | |
58 | ||
59 | if (c->x86_model < 6) { | |
60 | /* Based on AMD doc 20734R - June 2000 */ | |
61 | if (c->x86_model == 0) { | |
62 | clear_cpu_cap(c, X86_FEATURE_APIC); | |
63 | set_cpu_cap(c, X86_FEATURE_PGE); | |
64 | } | |
65 | return; | |
66 | } | |
67 | ||
68 | if (c->x86_model == 6 && c->x86_mask == 1) { | |
69 | const int K6_BUG_LOOP = 1000000; | |
70 | int n; | |
71 | void (*f_vide)(void); | |
72 | unsigned long d, d2; | |
73 | ||
74 | printk(KERN_INFO "AMD K6 stepping B detected - "); | |
75 | ||
76 | /* | |
77 | * It looks like AMD fixed the 2.6.2 bug and improved indirect | |
78 | * calls at the same time. | |
79 | */ | |
80 | ||
81 | n = K6_BUG_LOOP; | |
82 | f_vide = vide; | |
83 | rdtscl(d); | |
84 | while (n--) | |
85 | f_vide(); | |
86 | rdtscl(d2); | |
87 | d = d2-d; | |
88 | ||
89 | if (d > 20*K6_BUG_LOOP) | |
90 | printk("system stability may be impaired when more than 32 MB are used.\n"); | |
91 | else | |
92 | printk("probably OK (after B9730xxxx).\n"); | |
93 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | |
94 | } | |
95 | ||
96 | /* K6 with old style WHCR */ | |
97 | if (c->x86_model < 8 || | |
98 | (c->x86_model == 8 && c->x86_mask < 8)) { | |
99 | /* We can only write allocate on the low 508Mb */ | |
100 | if (mbytes > 508) | |
101 | mbytes = 508; | |
102 | ||
103 | rdmsr(MSR_K6_WHCR, l, h); | |
104 | if ((l&0x0000FFFF) == 0) { | |
105 | unsigned long flags; | |
106 | l = (1<<0)|((mbytes/4)<<1); | |
107 | local_irq_save(flags); | |
108 | wbinvd(); | |
109 | wrmsr(MSR_K6_WHCR, l, h); | |
110 | local_irq_restore(flags); | |
111 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", | |
112 | mbytes); | |
113 | } | |
114 | return; | |
115 | } | |
116 | ||
117 | if ((c->x86_model == 8 && c->x86_mask > 7) || | |
118 | c->x86_model == 9 || c->x86_model == 13) { | |
119 | /* The more serious chips .. */ | |
120 | ||
121 | if (mbytes > 4092) | |
122 | mbytes = 4092; | |
123 | ||
124 | rdmsr(MSR_K6_WHCR, l, h); | |
125 | if ((l&0xFFFF0000) == 0) { | |
126 | unsigned long flags; | |
127 | l = ((mbytes>>2)<<22)|(1<<16); | |
128 | local_irq_save(flags); | |
129 | wbinvd(); | |
130 | wrmsr(MSR_K6_WHCR, l, h); | |
131 | local_irq_restore(flags); | |
132 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", | |
133 | mbytes); | |
134 | } | |
135 | ||
136 | return; | |
137 | } | |
138 | ||
139 | if (c->x86_model == 10) { | |
140 | /* AMD Geode LX is model 10 */ | |
141 | /* placeholder for any needed mods */ | |
142 | return; | |
143 | } | |
144 | } | |
145 | ||
146 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | |
147 | { | |
148 | u32 l, h; | |
149 | ||
150 | /* | |
151 | * Bit 15 of Athlon specific MSR 15, needs to be 0 | |
152 | * to enable SSE on Palomino/Morgan/Barton CPU's. | |
153 | * If the BIOS didn't enable it already, enable it here. | |
154 | */ | |
155 | if (c->x86_model >= 6 && c->x86_model <= 10) { | |
156 | if (!cpu_has(c, X86_FEATURE_XMM)) { | |
157 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | |
158 | rdmsr(MSR_K7_HWCR, l, h); | |
159 | l &= ~0x00008000; | |
160 | wrmsr(MSR_K7_HWCR, l, h); | |
161 | set_cpu_cap(c, X86_FEATURE_XMM); | |
162 | } | |
163 | } | |
164 | ||
165 | /* | |
166 | * It's been determined by AMD that Athlons since model 8 stepping 1 | |
167 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx | |
168 | * As per AMD technical note 27212 0.2 | |
169 | */ | |
170 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | |
171 | rdmsr(MSR_K7_CLK_CTL, l, h); | |
172 | if ((l & 0xfff00000) != 0x20000000) { | |
173 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | |
174 | ((l & 0x000fffff)|0x20000000)); | |
175 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | |
176 | } | |
177 | } | |
178 | ||
179 | set_cpu_cap(c, X86_FEATURE_K7); | |
180 | } | |
6c62aa4a YL |
181 | #endif |
182 | ||
183 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | |
184 | static int __cpuinit nearby_node(int apicid) | |
185 | { | |
186 | int i, node; | |
187 | ||
188 | for (i = apicid - 1; i >= 0; i--) { | |
189 | node = apicid_to_node[i]; | |
190 | if (node != NUMA_NO_NODE && node_online(node)) | |
191 | return node; | |
192 | } | |
193 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | |
194 | node = apicid_to_node[i]; | |
195 | if (node != NUMA_NO_NODE && node_online(node)) | |
196 | return node; | |
197 | } | |
198 | return first_node(node_online_map); /* Shouldn't happen */ | |
199 | } | |
200 | #endif | |
11fdd252 YL |
201 | |
202 | /* | |
203 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | |
204 | * Assumes number of cores is a power of two. | |
205 | */ | |
206 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |
207 | { | |
208 | #ifdef CONFIG_X86_HT | |
209 | unsigned bits; | |
210 | ||
211 | bits = c->x86_coreid_bits; | |
212 | ||
213 | /* Low order bits define the core id (index of core in socket) */ | |
214 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | |
215 | /* Convert the initial APIC ID into the socket ID */ | |
216 | c->phys_proc_id = c->initial_apicid >> bits; | |
217 | #endif | |
218 | } | |
219 | ||
6c62aa4a YL |
220 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
221 | { | |
222 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | |
223 | int cpu = smp_processor_id(); | |
224 | int node; | |
225 | unsigned apicid = hard_smp_processor_id(); | |
226 | ||
227 | node = c->phys_proc_id; | |
228 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | |
229 | node = apicid_to_node[apicid]; | |
230 | if (!node_online(node)) { | |
231 | /* Two possibilities here: | |
232 | - The CPU is missing memory and no node was created. | |
233 | In that case try picking one from a nearby CPU | |
234 | - The APIC IDs differ from the HyperTransport node IDs | |
235 | which the K8 northbridge parsing fills in. | |
236 | Assume they are all increased by a constant offset, | |
237 | but in the same order as the HT nodeids. | |
238 | If that doesn't result in a usable node fall back to the | |
239 | path for the previous case. */ | |
240 | ||
241 | int ht_nodeid = c->initial_apicid; | |
242 | ||
243 | if (ht_nodeid >= 0 && | |
244 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | |
245 | node = apicid_to_node[ht_nodeid]; | |
246 | /* Pick a nearby node */ | |
247 | if (!node_online(node)) | |
248 | node = nearby_node(apicid); | |
249 | } | |
250 | numa_set_node(cpu, node); | |
251 | ||
823b259b | 252 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); |
6c62aa4a YL |
253 | #endif |
254 | } | |
255 | ||
11fdd252 YL |
256 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) |
257 | { | |
258 | #ifdef CONFIG_X86_HT | |
259 | unsigned bits, ecx; | |
260 | ||
261 | /* Multi core CPU? */ | |
262 | if (c->extended_cpuid_level < 0x80000008) | |
263 | return; | |
264 | ||
265 | ecx = cpuid_ecx(0x80000008); | |
266 | ||
267 | c->x86_max_cores = (ecx & 0xff) + 1; | |
268 | ||
269 | /* CPU telling us the core id bits shift? */ | |
270 | bits = (ecx >> 12) & 0xF; | |
271 | ||
272 | /* Otherwise recompute */ | |
273 | if (bits == 0) { | |
274 | while ((1 << bits) < c->x86_max_cores) | |
275 | bits++; | |
276 | } | |
277 | ||
278 | c->x86_coreid_bits = bits; | |
279 | #endif | |
280 | } | |
281 | ||
03ae5768 | 282 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) |
2b16a235 | 283 | { |
11fdd252 YL |
284 | early_init_amd_mc(c); |
285 | ||
40fb1715 VP |
286 | /* |
287 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | |
288 | * with P/T states and does not stop in deep C-states | |
289 | */ | |
290 | if (c->x86_power & (1 << 8)) { | |
e3224234 | 291 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
40fb1715 VP |
292 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
293 | } | |
5fef55fd | 294 | |
6c62aa4a YL |
295 | #ifdef CONFIG_X86_64 |
296 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | |
297 | #else | |
5fef55fd | 298 | /* Set MTRR capability flag if appropriate */ |
6c62aa4a YL |
299 | if (c->x86 == 5) |
300 | if (c->x86_model == 13 || c->x86_model == 9 || | |
301 | (c->x86_model == 8 && c->x86_mask >= 8)) | |
302 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | |
303 | #endif | |
2b16a235 AK |
304 | } |
305 | ||
b4af3f7c | 306 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
1da177e4 | 307 | { |
7d318d77 | 308 | #ifdef CONFIG_SMP |
3c92c2ba | 309 | unsigned long long value; |
7d318d77 | 310 | |
fb87a298 PC |
311 | /* |
312 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | |
7d318d77 AK |
313 | * bit 6 of msr C001_0015 |
314 | * | |
315 | * Errata 63 for SH-B3 steppings | |
316 | * Errata 122 for all steppings (F+ have it disabled by default) | |
317 | */ | |
11fdd252 | 318 | if (c->x86 == 0xf) { |
7d318d77 AK |
319 | rdmsrl(MSR_K7_HWCR, value); |
320 | value |= 1 << 6; | |
321 | wrmsrl(MSR_K7_HWCR, value); | |
322 | } | |
323 | #endif | |
324 | ||
2b16a235 AK |
325 | early_init_amd(c); |
326 | ||
fb87a298 PC |
327 | /* |
328 | * Bit 31 in normal CPUID used for nonstandard 3DNow ID; | |
16282a8e | 329 | * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway |
fb87a298 | 330 | */ |
16282a8e | 331 | clear_cpu_cap(c, 0*32+31); |
fb87a298 | 332 | |
6c62aa4a YL |
333 | #ifdef CONFIG_X86_64 |
334 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | |
335 | if (c->x86 == 0xf) { | |
336 | u32 level; | |
337 | ||
338 | level = cpuid_eax(1); | |
339 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | |
340 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
341 | } | |
342 | if (c->x86 == 0x10 || c->x86 == 0x11) | |
343 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
344 | #else | |
345 | ||
346 | /* | |
347 | * FIXME: We should handle the K5 here. Set up the write | |
348 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | |
349 | * no bus pipeline) | |
350 | */ | |
351 | ||
fb87a298 PC |
352 | switch (c->x86) { |
353 | case 4: | |
11fdd252 YL |
354 | init_amd_k5(c); |
355 | break; | |
fb87a298 | 356 | case 5: |
11fdd252 | 357 | init_amd_k6(c); |
1da177e4 | 358 | break; |
11fdd252 YL |
359 | case 6: /* An Athlon/Duron */ |
360 | init_amd_k7(c); | |
1da177e4 LT |
361 | break; |
362 | } | |
11fdd252 YL |
363 | |
364 | /* K6s reports MCEs but don't actually have all the MSRs */ | |
365 | if (c->x86 < 6) | |
366 | clear_cpu_cap(c, X86_FEATURE_MCE); | |
6c62aa4a | 367 | #endif |
11fdd252 | 368 | |
6c62aa4a | 369 | /* Enable workaround for FXSAVE leak */ |
18bd057b | 370 | if (c->x86 >= 6) |
16282a8e | 371 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); |
1da177e4 | 372 | |
11fdd252 YL |
373 | if (!c->x86_model_id[0]) { |
374 | switch (c->x86) { | |
375 | case 0xf: | |
376 | /* Should distinguish Models here, but this is only | |
377 | a fallback anyways. */ | |
378 | strcpy(c->x86_model_id, "Hammer"); | |
379 | break; | |
380 | } | |
381 | } | |
3dd9d514 | 382 | |
11fdd252 | 383 | display_cacheinfo(c); |
3dd9d514 | 384 | |
11fdd252 | 385 | /* Multi core CPU? */ |
6c62aa4a | 386 | if (c->extended_cpuid_level >= 0x80000008) { |
11fdd252 | 387 | amd_detect_cmp(c); |
6c62aa4a YL |
388 | srat_detect_node(c); |
389 | } | |
faee9a5d | 390 | |
6c62aa4a | 391 | #ifdef CONFIG_X86_32 |
11fdd252 | 392 | detect_ht(c); |
6c62aa4a | 393 | #endif |
39b3a791 | 394 | |
11fdd252 YL |
395 | if (c->extended_cpuid_level >= 0x80000006) { |
396 | if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) | |
67cddd94 AK |
397 | num_cache_leaves = 4; |
398 | else | |
399 | num_cache_leaves = 3; | |
400 | } | |
3556ddfa | 401 | |
11fdd252 YL |
402 | if (c->x86 >= 0xf && c->x86 <= 0x11) |
403 | set_cpu_cap(c, X86_FEATURE_K8); | |
de421863 | 404 | |
11fdd252 YL |
405 | if (cpu_has_xmm2) { |
406 | /* MFENCE stops RDTSC speculation */ | |
16282a8e | 407 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); |
11fdd252 | 408 | } |
6c62aa4a YL |
409 | |
410 | #ifdef CONFIG_X86_64 | |
411 | if (c->x86 == 0x10) { | |
412 | /* do this for boot cpu */ | |
413 | if (c == &boot_cpu_data) | |
414 | check_enable_amd_mmconf_dmi(); | |
415 | ||
416 | fam10h_check_enable_mmcfg(); | |
417 | } | |
418 | ||
419 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | |
420 | unsigned long long tseg; | |
421 | ||
422 | /* | |
423 | * Split up direct mapping around the TSEG SMM area. | |
424 | * Don't do it for gbpages because there seems very little | |
425 | * benefit in doing so. | |
426 | */ | |
427 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | |
428 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | |
429 | if ((tseg>>PMD_SHIFT) < | |
430 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | |
431 | ((tseg>>PMD_SHIFT) < | |
432 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | |
433 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | |
434 | set_memory_4k((unsigned long)__va(tseg), 1); | |
435 | } | |
436 | } | |
437 | #endif | |
1da177e4 LT |
438 | } |
439 | ||
6c62aa4a | 440 | #ifdef CONFIG_X86_32 |
fb87a298 | 441 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
1da177e4 LT |
442 | { |
443 | /* AMD errata T13 (order #21922) */ | |
444 | if ((c->x86 == 6)) { | |
445 | if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ | |
446 | size = 64; | |
447 | if (c->x86_model == 4 && | |
fb87a298 | 448 | (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ |
1da177e4 LT |
449 | size = 256; |
450 | } | |
451 | return size; | |
452 | } | |
6c62aa4a | 453 | #endif |
1da177e4 | 454 | |
95414930 | 455 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { |
1da177e4 | 456 | .c_vendor = "AMD", |
fb87a298 | 457 | .c_ident = { "AuthenticAMD" }, |
6c62aa4a | 458 | #ifdef CONFIG_X86_32 |
1da177e4 LT |
459 | .c_models = { |
460 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = | |
461 | { | |
462 | [3] = "486 DX/2", | |
463 | [7] = "486 DX/2-WB", | |
fb87a298 PC |
464 | [8] = "486 DX/4", |
465 | [9] = "486 DX/4-WB", | |
1da177e4 | 466 | [14] = "Am5x86-WT", |
fb87a298 | 467 | [15] = "Am5x86-WB" |
1da177e4 LT |
468 | } |
469 | }, | |
470 | }, | |
6c62aa4a YL |
471 | .c_size_cache = amd_size_cache, |
472 | #endif | |
03ae5768 | 473 | .c_early_init = early_init_amd, |
1da177e4 | 474 | .c_init = init_amd, |
10a434fc | 475 | .c_x86_vendor = X86_VENDOR_AMD, |
1da177e4 LT |
476 | }; |
477 | ||
10a434fc | 478 | cpu_dev_register(amd_cpu_dev); |