]>
Commit | Line | Data |
---|---|---|
99ef7c2a JH |
1 | /* |
2 | * arch/metag/mm/cache.c | |
3 | * | |
4 | * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it under | |
7 | * the terms of the GNU General Public License version 2 as published by the | |
8 | * Free Software Foundation. | |
9 | * | |
10 | * Cache control code | |
11 | */ | |
12 | ||
13 | #include <linux/export.h> | |
14 | #include <linux/io.h> | |
15 | #include <asm/cacheflush.h> | |
16 | #include <asm/core_reg.h> | |
883a6355 | 17 | #include <asm/global_lock.h> |
99ef7c2a JH |
18 | #include <asm/metag_isa.h> |
19 | #include <asm/metag_mem.h> | |
20 | #include <asm/metag_regs.h> | |
21 | ||
22 | #define DEFAULT_CACHE_WAYS_LOG2 2 | |
23 | ||
24 | /* | |
25 | * Size of a set in the caches. Initialised for default 16K stride, adjusted | |
26 | * according to values passed through TBI global heap segment via LDLK (on ATP) | |
27 | * or config registers (on HTP/MTP) | |
28 | */ | |
29 | static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 | |
30 | - DEFAULT_CACHE_WAYS_LOG2; | |
31 | static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 | |
32 | - DEFAULT_CACHE_WAYS_LOG2; | |
33 | /* | |
34 | * The number of sets in the caches. Initialised for HTP/ATP, adjusted | |
35 | * according to NOMMU setting in config registers | |
36 | */ | |
37 | static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; | |
38 | static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; | |
39 | ||
883a6355 JH |
40 | #ifndef CONFIG_METAG_META12 |
41 | /** | |
42 | * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache | |
43 | */ | |
44 | static volatile u32 lnkget_testdata[16] __initdata __aligned(64); | |
45 | ||
46 | #define LNKGET_CONSTANT 0xdeadbeef | |
47 | ||
42ad59e3 | 48 | static void __init metag_lnkget_probe(void) |
883a6355 JH |
49 | { |
50 | int temp; | |
51 | long flags; | |
52 | ||
53 | /* | |
54 | * It's conceivable the user has configured a globally coherent cache | |
55 | * shared with non-Linux hardware threads, so use LOCK2 to prevent them | |
56 | * from executing and causing cache eviction during the test. | |
57 | */ | |
58 | __global_lock2(flags); | |
59 | ||
60 | /* read a value to bring it into the cache */ | |
61 | (void)lnkget_testdata[0]; | |
62 | lnkget_testdata[0] = 0; | |
63 | ||
64 | /* lnkget/lnkset it to modify it */ | |
65 | asm volatile( | |
66 | "1: LNKGETD %0, [%1]\n" | |
67 | " LNKSETD [%1], %2\n" | |
68 | " DEFR %0, TXSTAT\n" | |
69 | " ANDT %0, %0, #HI(0x3f000000)\n" | |
70 | " CMPT %0, #HI(0x02000000)\n" | |
71 | " BNZ 1b\n" | |
72 | : "=&d" (temp) | |
73 | : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT) | |
74 | : "cc"); | |
75 | ||
76 | /* re-read it to see if the cached value changed */ | |
77 | temp = lnkget_testdata[0]; | |
78 | ||
79 | __global_unlock2(flags); | |
80 | ||
81 | /* flush the cache line to fix any incoherency */ | |
82 | __builtin_dcache_flush((void *)&lnkget_testdata[0]); | |
83 | ||
84 | #if defined(CONFIG_METAG_LNKGET_AROUND_CACHE) | |
85 | /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */ | |
86 | if (temp == LNKGET_CONSTANT) | |
87 | pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n"); | |
88 | #elif defined(CONFIG_METAG_ATOMICITY_LNKGET) | |
89 | /* | |
90 | * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary | |
91 | * because the kernel is configured to use LNKGET/SET for atomicity | |
92 | */ | |
93 | WARN(temp != LNKGET_CONSTANT, | |
94 | "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" | |
95 | "Expect kernel failure as it's used for atomicity primitives\n"); | |
96 | #elif defined(CONFIG_SMP) | |
97 | /* | |
98 | * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the | |
99 | * gateway page won't flush and userland could break. | |
100 | */ | |
101 | WARN(temp != LNKGET_CONSTANT, | |
102 | "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" | |
103 | "Expect userland failure as it's used for user gateway page\n"); | |
104 | #else | |
105 | /* | |
106 | * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it | |
107 | * doesn't actually matter as it doesn't have any effect on !SMP && | |
108 | * !ATOMICITY_LNKGET. | |
109 | */ | |
110 | if (temp != LNKGET_CONSTANT) | |
111 | pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"); | |
112 | #endif | |
113 | } | |
114 | #endif /* !CONFIG_METAG_META12 */ | |
115 | ||
99ef7c2a JH |
116 | /** |
117 | * metag_cache_probe() - Probe L1 cache configuration. | |
118 | * | |
119 | * Probe the L1 cache configuration to aid the L1 physical cache flushing | |
120 | * functions. | |
121 | */ | |
0a38a8ad | 122 | void __init metag_cache_probe(void) |
99ef7c2a JH |
123 | { |
124 | #ifndef CONFIG_METAG_META12 | |
125 | int coreid = metag_in32(METAC_CORE_ID); | |
126 | int config = metag_in32(METAC_CORE_CONFIG2); | |
127 | int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS; | |
128 | ||
129 | if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 || | |
130 | cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) { | |
131 | icache_sets_log2 = 1; | |
132 | dcache_sets_log2 = 1; | |
133 | } | |
134 | ||
135 | /* For normal size caches, the smallest size is 4Kb. | |
136 | For small caches, the smallest size is 64b */ | |
137 | icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT) | |
138 | ? 6 : 12; | |
139 | icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS) | |
140 | >> METAC_CORE_C2ICSZ_S; | |
141 | icache_set_shift -= icache_sets_log2; | |
142 | ||
143 | dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT) | |
144 | ? 6 : 12; | |
145 | dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS) | |
146 | >> METAC_CORECFG2_DCSZ_S; | |
147 | dcache_set_shift -= dcache_sets_log2; | |
883a6355 JH |
148 | |
149 | metag_lnkget_probe(); | |
99ef7c2a JH |
150 | #else |
151 | /* Extract cache sizes from global heap segment */ | |
152 | unsigned long val, u; | |
153 | int width, shift, addend; | |
154 | PTBISEG seg; | |
155 | ||
156 | seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL, | |
157 | TBID_SEGSCOPE_GLOBAL, | |
158 | TBID_SEGTYPE_HEAP)); | |
159 | if (seg != NULL) { | |
160 | val = seg->Data[1]; | |
161 | ||
162 | /* Work out width of I-cache size bit-field */ | |
163 | u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS) | |
164 | >> METAG_TBI_ICACHE_SIZE_S; | |
165 | width = 0; | |
166 | while (u & 1) { | |
167 | width++; | |
168 | u >>= 1; | |
169 | } | |
170 | /* Extract sign-extended size addend value */ | |
171 | shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width); | |
172 | addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS) | |
173 | << shift) | |
174 | >> (shift + METAG_TBI_ICACHE_SIZE_S); | |
175 | /* Now calculate I-cache set size */ | |
176 | icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 | |
177 | - DEFAULT_CACHE_WAYS_LOG2) | |
178 | + addend; | |
179 | ||
180 | /* Similarly for D-cache */ | |
181 | u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS) | |
182 | >> METAG_TBI_DCACHE_SIZE_S; | |
183 | width = 0; | |
184 | while (u & 1) { | |
185 | width++; | |
186 | u >>= 1; | |
187 | } | |
188 | shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width); | |
189 | addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS) | |
190 | << shift) | |
191 | >> (shift + METAG_TBI_DCACHE_SIZE_S); | |
192 | dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 | |
193 | - DEFAULT_CACHE_WAYS_LOG2) | |
194 | + addend; | |
195 | } | |
196 | #endif | |
197 | } | |
198 | ||
199 | static void metag_phys_data_cache_flush(const void *start) | |
200 | { | |
201 | unsigned long flush0, flush1, flush2, flush3; | |
202 | int loops, step; | |
203 | int thread; | |
204 | int part, offset; | |
205 | int set_shift; | |
206 | ||
207 | /* Use a sequence of writes to flush the cache region requested */ | |
208 | thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) | |
209 | >> TXENABLE_THREAD_S; | |
210 | ||
211 | /* Cache is broken into sets which lie in contiguous RAMs */ | |
212 | set_shift = dcache_set_shift; | |
213 | ||
214 | /* Move to the base of the physical cache flush region */ | |
215 | flush0 = LINSYSCFLUSH_DCACHE_LINE; | |
216 | step = 64; | |
217 | ||
218 | /* Get partition data for this thread */ | |
219 | part = metag_in32(SYSC_DCPART0 + | |
220 | (SYSC_xCPARTn_STRIDE * thread)); | |
221 | ||
222 | if ((int)start < 0) | |
223 | /* Access Global vs Local partition */ | |
224 | part >>= SYSC_xCPARTG_AND_S | |
225 | - SYSC_xCPARTL_AND_S; | |
226 | ||
227 | /* Extract offset and move SetOff */ | |
228 | offset = (part & SYSC_xCPARTL_OR_BITS) | |
229 | >> SYSC_xCPARTL_OR_S; | |
230 | flush0 += (offset << (set_shift - 4)); | |
231 | ||
232 | /* Shrink size */ | |
233 | part = (part & SYSC_xCPARTL_AND_BITS) | |
234 | >> SYSC_xCPARTL_AND_S; | |
235 | loops = ((part + 1) << (set_shift - 4)); | |
236 | ||
237 | /* Reduce loops by step of cache line size */ | |
238 | loops /= step; | |
239 | ||
240 | flush1 = flush0 + (1 << set_shift); | |
241 | flush2 = flush0 + (2 << set_shift); | |
242 | flush3 = flush0 + (3 << set_shift); | |
243 | ||
244 | if (dcache_sets_log2 == 1) { | |
245 | flush2 = flush1; | |
246 | flush3 = flush1 + step; | |
247 | flush1 = flush0 + step; | |
248 | step <<= 1; | |
249 | loops >>= 1; | |
250 | } | |
251 | ||
252 | /* Clear loops ways in cache */ | |
253 | while (loops-- != 0) { | |
254 | /* Clear the ways. */ | |
255 | #if 0 | |
256 | /* | |
257 | * GCC doesn't generate very good code for this so we | |
258 | * provide inline assembly instead. | |
259 | */ | |
260 | metag_out8(0, flush0); | |
261 | metag_out8(0, flush1); | |
262 | metag_out8(0, flush2); | |
263 | metag_out8(0, flush3); | |
264 | ||
265 | flush0 += step; | |
266 | flush1 += step; | |
267 | flush2 += step; | |
268 | flush3 += step; | |
269 | #else | |
270 | asm volatile ( | |
271 | "SETB\t[%0+%4++],%5\n" | |
272 | "SETB\t[%1+%4++],%5\n" | |
273 | "SETB\t[%2+%4++],%5\n" | |
274 | "SETB\t[%3+%4++],%5\n" | |
275 | : "+e" (flush0), | |
276 | "+e" (flush1), | |
277 | "+e" (flush2), | |
278 | "+e" (flush3) | |
279 | : "e" (step), "a" (0)); | |
280 | #endif | |
281 | } | |
282 | } | |
283 | ||
284 | void metag_data_cache_flush_all(const void *start) | |
285 | { | |
286 | if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) | |
287 | /* No need to flush the data cache it's not actually enabled */ | |
288 | return; | |
289 | ||
290 | metag_phys_data_cache_flush(start); | |
291 | } | |
292 | ||
293 | void metag_data_cache_flush(const void *start, int bytes) | |
294 | { | |
295 | unsigned long flush0; | |
296 | int loops, step; | |
297 | ||
298 | if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) | |
299 | /* No need to flush the data cache it's not actually enabled */ | |
300 | return; | |
301 | ||
302 | if (bytes >= 4096) { | |
303 | metag_phys_data_cache_flush(start); | |
304 | return; | |
305 | } | |
306 | ||
307 | /* Use linear cache flush mechanism on META IP */ | |
308 | flush0 = (int)start; | |
309 | loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes + | |
310 | (DCACHE_LINE_BYTES - 1); | |
311 | loops >>= DCACHE_LINE_S; | |
312 | ||
313 | #define PRIM_FLUSH(addr, offset) do { \ | |
314 | int __addr = ((int) (addr)) + ((offset) * 64); \ | |
315 | __builtin_dcache_flush((void *)(__addr)); \ | |
316 | } while (0) | |
317 | ||
318 | #define LOOP_INC (4*64) | |
319 | ||
320 | do { | |
321 | /* By default stop */ | |
322 | step = 0; | |
323 | ||
324 | switch (loops) { | |
325 | /* Drop Thru Cases! */ | |
326 | default: | |
327 | PRIM_FLUSH(flush0, 3); | |
328 | loops -= 4; | |
329 | step = 1; | |
330 | case 3: | |
331 | PRIM_FLUSH(flush0, 2); | |
332 | case 2: | |
333 | PRIM_FLUSH(flush0, 1); | |
334 | case 1: | |
335 | PRIM_FLUSH(flush0, 0); | |
336 | flush0 += LOOP_INC; | |
337 | case 0: | |
338 | break; | |
339 | } | |
340 | } while (step); | |
341 | } | |
342 | EXPORT_SYMBOL(metag_data_cache_flush); | |
343 | ||
344 | static void metag_phys_code_cache_flush(const void *start, int bytes) | |
345 | { | |
346 | unsigned long flush0, flush1, flush2, flush3, end_set; | |
347 | int loops, step; | |
348 | int thread; | |
349 | int set_shift, set_size; | |
350 | int part, offset; | |
351 | ||
352 | /* Use a sequence of writes to flush the cache region requested */ | |
353 | thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) | |
354 | >> TXENABLE_THREAD_S; | |
355 | set_shift = icache_set_shift; | |
356 | ||
357 | /* Move to the base of the physical cache flush region */ | |
358 | flush0 = LINSYSCFLUSH_ICACHE_LINE; | |
359 | step = 64; | |
360 | ||
361 | /* Get partition code for this thread */ | |
362 | part = metag_in32(SYSC_ICPART0 + | |
363 | (SYSC_xCPARTn_STRIDE * thread)); | |
364 | ||
365 | if ((int)start < 0) | |
366 | /* Access Global vs Local partition */ | |
367 | part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S; | |
368 | ||
369 | /* Extract offset and move SetOff */ | |
370 | offset = (part & SYSC_xCPARTL_OR_BITS) | |
371 | >> SYSC_xCPARTL_OR_S; | |
372 | flush0 += (offset << (set_shift - 4)); | |
373 | ||
374 | /* Shrink size */ | |
375 | part = (part & SYSC_xCPARTL_AND_BITS) | |
376 | >> SYSC_xCPARTL_AND_S; | |
377 | loops = ((part + 1) << (set_shift - 4)); | |
378 | ||
379 | /* Where does the Set end? */ | |
380 | end_set = flush0 + loops; | |
381 | set_size = loops; | |
382 | ||
383 | #ifdef CONFIG_METAG_META12 | |
384 | if ((bytes < 4096) && (bytes < loops)) { | |
385 | /* Unreachable on HTP/MTP */ | |
386 | /* Only target the sets that could be relavent */ | |
387 | flush0 += (loops - step) & ((int) start); | |
388 | loops = (((int) start) & (step-1)) + bytes + step - 1; | |
389 | } | |
390 | #endif | |
391 | ||
392 | /* Reduce loops by step of cache line size */ | |
393 | loops /= step; | |
394 | ||
395 | flush1 = flush0 + (1<<set_shift); | |
396 | flush2 = flush0 + (2<<set_shift); | |
397 | flush3 = flush0 + (3<<set_shift); | |
398 | ||
399 | if (icache_sets_log2 == 1) { | |
400 | flush2 = flush1; | |
401 | flush3 = flush1 + step; | |
402 | flush1 = flush0 + step; | |
403 | #if 0 | |
404 | /* flush0 will stop one line early in this case | |
405 | * (flush1 will do the final line). | |
406 | * However we don't correct end_set here at the moment | |
407 | * because it will never wrap on HTP/MTP | |
408 | */ | |
409 | end_set -= step; | |
410 | #endif | |
411 | step <<= 1; | |
412 | loops >>= 1; | |
413 | } | |
414 | ||
415 | /* Clear loops ways in cache */ | |
416 | while (loops-- != 0) { | |
417 | #if 0 | |
418 | /* | |
419 | * GCC doesn't generate very good code for this so we | |
420 | * provide inline assembly instead. | |
421 | */ | |
422 | /* Clear the ways */ | |
423 | metag_out8(0, flush0); | |
424 | metag_out8(0, flush1); | |
425 | metag_out8(0, flush2); | |
426 | metag_out8(0, flush3); | |
427 | ||
428 | flush0 += step; | |
429 | flush1 += step; | |
430 | flush2 += step; | |
431 | flush3 += step; | |
432 | #else | |
433 | asm volatile ( | |
434 | "SETB\t[%0+%4++],%5\n" | |
435 | "SETB\t[%1+%4++],%5\n" | |
436 | "SETB\t[%2+%4++],%5\n" | |
437 | "SETB\t[%3+%4++],%5\n" | |
438 | : "+e" (flush0), | |
439 | "+e" (flush1), | |
440 | "+e" (flush2), | |
441 | "+e" (flush3) | |
442 | : "e" (step), "a" (0)); | |
443 | #endif | |
444 | ||
445 | if (flush0 == end_set) { | |
446 | /* Wrap within Set 0 */ | |
447 | flush0 -= set_size; | |
448 | flush1 -= set_size; | |
449 | flush2 -= set_size; | |
450 | flush3 -= set_size; | |
451 | } | |
452 | } | |
453 | } | |
454 | ||
455 | void metag_code_cache_flush_all(const void *start) | |
456 | { | |
457 | if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) | |
458 | /* No need to flush the code cache it's not actually enabled */ | |
459 | return; | |
460 | ||
461 | metag_phys_code_cache_flush(start, 4096); | |
462 | } | |
f626dc70 | 463 | EXPORT_SYMBOL(metag_code_cache_flush_all); |
99ef7c2a JH |
464 | |
465 | void metag_code_cache_flush(const void *start, int bytes) | |
466 | { | |
467 | #ifndef CONFIG_METAG_META12 | |
468 | void *flush; | |
469 | int loops, step; | |
470 | #endif /* !CONFIG_METAG_META12 */ | |
471 | ||
472 | if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) | |
473 | /* No need to flush the code cache it's not actually enabled */ | |
474 | return; | |
475 | ||
476 | #ifdef CONFIG_METAG_META12 | |
477 | /* CACHEWD isn't available on Meta1, so always do full cache flush */ | |
478 | metag_phys_code_cache_flush(start, bytes); | |
479 | ||
480 | #else /* CONFIG_METAG_META12 */ | |
481 | /* If large size do full physical cache flush */ | |
482 | if (bytes >= 4096) { | |
483 | metag_phys_code_cache_flush(start, bytes); | |
484 | return; | |
485 | } | |
486 | ||
487 | /* Use linear cache flush mechanism on META IP */ | |
488 | flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1)); | |
489 | loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes + | |
490 | (ICACHE_LINE_BYTES-1); | |
491 | loops >>= ICACHE_LINE_S; | |
492 | ||
493 | #define PRIM_IFLUSH(addr, offset) \ | |
494 | __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT) | |
495 | ||
496 | #define LOOP_INC (4*64) | |
497 | ||
498 | do { | |
499 | /* By default stop */ | |
500 | step = 0; | |
501 | ||
502 | switch (loops) { | |
503 | /* Drop Thru Cases! */ | |
504 | default: | |
505 | PRIM_IFLUSH(flush, 3); | |
506 | loops -= 4; | |
507 | step = 1; | |
508 | case 3: | |
509 | PRIM_IFLUSH(flush, 2); | |
510 | case 2: | |
511 | PRIM_IFLUSH(flush, 1); | |
512 | case 1: | |
513 | PRIM_IFLUSH(flush, 0); | |
514 | flush += LOOP_INC; | |
515 | case 0: | |
516 | break; | |
517 | } | |
518 | } while (step); | |
519 | #endif /* !CONFIG_METAG_META12 */ | |
520 | } | |
521 | EXPORT_SYMBOL(metag_code_cache_flush); |