]>
Commit | Line | Data |
---|---|---|
084cfca1 | 1 | /* |
79713752 | 2 | * Info about, and flushing the host cpu caches. |
084cfca1 RH |
3 | * |
4 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
5 | * See the COPYING file in the top-level directory. | |
6 | */ | |
7 | ||
8 | #include "qemu/osdep.h" | |
9 | #include "qemu/cacheflush.h" | |
ad768e6f | 10 | #include "qemu/cacheinfo.h" |
664a7973 | 11 | #include "qemu/bitops.h" |
79713752 RH |
12 | #include "qemu/host-utils.h" |
13 | #include "qemu/atomic.h" | |
084cfca1 RH |
14 | |
15 | ||
79713752 RH |
16 | int qemu_icache_linesize = 0; |
17 | int qemu_icache_linesize_log; | |
18 | int qemu_dcache_linesize = 0; | |
19 | int qemu_dcache_linesize_log; | |
20 | ||
21 | /* | |
22 | * Operating system specific cache detection mechanisms. | |
23 | */ | |
24 | ||
25 | #if defined(_WIN32) | |
26 | ||
27 | static void sys_cache_info(int *isize, int *dsize) | |
28 | { | |
29 | SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; | |
30 | DWORD size = 0; | |
31 | BOOL success; | |
32 | size_t i, n; | |
33 | ||
34 | /* | |
35 | * Check for the required buffer size first. Note that if the zero | |
36 | * size we use for the probe results in success, then there is no | |
37 | * data available; fail in that case. | |
38 | */ | |
39 | success = GetLogicalProcessorInformation(0, &size); | |
40 | if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { | |
41 | return; | |
42 | } | |
43 | ||
44 | n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); | |
45 | size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); | |
46 | buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); | |
47 | if (!GetLogicalProcessorInformation(buf, &size)) { | |
48 | goto fail; | |
49 | } | |
50 | ||
51 | for (i = 0; i < n; i++) { | |
52 | if (buf[i].Relationship == RelationCache | |
53 | && buf[i].Cache.Level == 1) { | |
54 | switch (buf[i].Cache.Type) { | |
55 | case CacheUnified: | |
56 | *isize = *dsize = buf[i].Cache.LineSize; | |
57 | break; | |
58 | case CacheInstruction: | |
59 | *isize = buf[i].Cache.LineSize; | |
60 | break; | |
61 | case CacheData: | |
62 | *dsize = buf[i].Cache.LineSize; | |
63 | break; | |
64 | default: | |
65 | break; | |
66 | } | |
67 | } | |
68 | } | |
69 | fail: | |
70 | g_free(buf); | |
71 | } | |
72 | ||
bdd50dc7 | 73 | #elif defined(CONFIG_DARWIN) |
79713752 RH |
74 | # include <sys/sysctl.h> |
75 | static void sys_cache_info(int *isize, int *dsize) | |
76 | { | |
77 | /* There's only a single sysctl for both I/D cache line sizes. */ | |
78 | long size; | |
79 | size_t len = sizeof(size); | |
80 | if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { | |
81 | *isize = *dsize = size; | |
82 | } | |
83 | } | |
84 | #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) | |
85 | # include <sys/sysctl.h> | |
86 | static void sys_cache_info(int *isize, int *dsize) | |
87 | { | |
88 | /* There's only a single sysctl for both I/D cache line sizes. */ | |
89 | int size; | |
90 | size_t len = sizeof(size); | |
91 | if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { | |
92 | *isize = *dsize = size; | |
93 | } | |
94 | } | |
95 | #else | |
96 | /* POSIX */ | |
97 | ||
98 | static void sys_cache_info(int *isize, int *dsize) | |
99 | { | |
100 | # ifdef _SC_LEVEL1_ICACHE_LINESIZE | |
101 | int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); | |
102 | if (tmp_isize > 0) { | |
103 | *isize = tmp_isize; | |
104 | } | |
105 | # endif | |
106 | # ifdef _SC_LEVEL1_DCACHE_LINESIZE | |
107 | int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); | |
108 | if (tmp_dsize > 0) { | |
109 | *dsize = tmp_dsize; | |
110 | } | |
111 | # endif | |
112 | } | |
113 | #endif /* sys_cache_info */ | |
114 | ||
115 | ||
116 | /* | |
117 | * Architecture (+ OS) specific cache detection mechanisms. | |
118 | */ | |
119 | ||
c79a8e84 NP |
120 | #if defined(__powerpc__) |
121 | static bool have_coherent_icache; | |
122 | #endif | |
123 | ||
b3c32602 PB |
124 | #if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32) |
125 | /* | |
126 | * Apple does not expose CTR_EL0, so we must use system interfaces. | |
127 | * Windows neither, but we use a generic implementation of flush_idcache_range | |
128 | * in this case. | |
129 | */ | |
bdd50dc7 | 130 | static uint64_t save_ctr_el0; |
79713752 RH |
131 | static void arch_cache_info(int *isize, int *dsize) |
132 | { | |
bdd50dc7 | 133 | uint64_t ctr; |
79713752 | 134 | |
bdd50dc7 RH |
135 | /* |
136 | * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, | |
137 | * but (at least under Linux) these are marked protected by the | |
138 | * kernel. However, CTR_EL0 contains the minimum linesize in the | |
139 | * entire hierarchy, and is used by userspace cache flushing. | |
140 | * | |
141 | * We will also use this value in flush_idcache_range. | |
142 | */ | |
143 | asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); | |
144 | save_ctr_el0 = ctr; | |
145 | ||
146 | if (*isize == 0 || *dsize == 0) { | |
79713752 RH |
147 | if (*isize == 0) { |
148 | *isize = 4 << (ctr & 0xf); | |
149 | } | |
150 | if (*dsize == 0) { | |
151 | *dsize = 4 << ((ctr >> 16) & 0xf); | |
152 | } | |
153 | } | |
154 | } | |
155 | ||
156 | #elif defined(_ARCH_PPC) && defined(__linux__) | |
157 | # include "elf.h" | |
158 | ||
159 | static void arch_cache_info(int *isize, int *dsize) | |
160 | { | |
161 | if (*isize == 0) { | |
162 | *isize = qemu_getauxval(AT_ICACHEBSIZE); | |
163 | } | |
164 | if (*dsize == 0) { | |
165 | *dsize = qemu_getauxval(AT_DCACHEBSIZE); | |
166 | } | |
c79a8e84 | 167 | have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; |
79713752 RH |
168 | } |
169 | ||
170 | #else | |
171 | static void arch_cache_info(int *isize, int *dsize) { } | |
172 | #endif /* arch_cache_info */ | |
173 | ||
174 | /* | |
175 | * ... and if all else fails ... | |
176 | */ | |
177 | ||
178 | static void fallback_cache_info(int *isize, int *dsize) | |
179 | { | |
180 | /* If we can only find one of the two, assume they're the same. */ | |
181 | if (*isize) { | |
182 | if (*dsize) { | |
183 | /* Success! */ | |
184 | } else { | |
185 | *dsize = *isize; | |
186 | } | |
187 | } else if (*dsize) { | |
188 | *isize = *dsize; | |
189 | } else { | |
190 | #if defined(_ARCH_PPC) | |
191 | /* | |
192 | * For PPC, we're going to use the cache sizes computed for | |
193 | * flush_idcache_range. Which means that we must use the | |
194 | * architecture minimum. | |
195 | */ | |
196 | *isize = *dsize = 16; | |
197 | #else | |
198 | /* Otherwise, 64 bytes is not uncommon. */ | |
199 | *isize = *dsize = 64; | |
200 | #endif | |
201 | } | |
202 | } | |
203 | ||
204 | static void __attribute__((constructor)) init_cache_info(void) | |
205 | { | |
206 | int isize = 0, dsize = 0; | |
207 | ||
208 | sys_cache_info(&isize, &dsize); | |
209 | arch_cache_info(&isize, &dsize); | |
210 | fallback_cache_info(&isize, &dsize); | |
211 | ||
212 | assert((isize & (isize - 1)) == 0); | |
213 | assert((dsize & (dsize - 1)) == 0); | |
214 | ||
215 | qemu_icache_linesize = isize; | |
216 | qemu_icache_linesize_log = ctz32(isize); | |
217 | qemu_dcache_linesize = dsize; | |
218 | qemu_dcache_linesize_log = ctz32(dsize); | |
219 | ||
220 | qatomic64_init(); | |
221 | } | |
222 | ||
223 | ||
224 | /* | |
225 | * Architecture (+ OS) specific cache flushing mechanisms. | |
226 | */ | |
227 | ||
084cfca1 RH |
228 | #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) |
229 | ||
230 | /* Caches are coherent and do not require flushing; symbol inline. */ | |
231 | ||
b3c32602 PB |
232 | #elif defined(__aarch64__) && !defined(CONFIG_WIN32) |
233 | /* | |
234 | * For Windows, we use generic implementation of flush_idcache_range, that | |
235 | * performs a call to FlushInstructionCache, through __builtin___clear_cache. | |
236 | */ | |
664a7973 RH |
237 | |
238 | #ifdef CONFIG_DARWIN | |
239 | /* Apple does not expose CTR_EL0, so we must use system interfaces. */ | |
240 | extern void sys_icache_invalidate(void *start, size_t len); | |
241 | extern void sys_dcache_flush(void *start, size_t len); | |
242 | void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) | |
243 | { | |
244 | sys_dcache_flush((void *)rw, len); | |
245 | sys_icache_invalidate((void *)rx, len); | |
246 | } | |
247 | #else | |
248 | ||
664a7973 RH |
249 | /* |
250 | * This is a copy of gcc's __aarch64_sync_cache_range, modified | |
251 | * to fit this three-operand interface. | |
252 | */ | |
253 | void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) | |
254 | { | |
255 | const unsigned CTR_IDC = 1u << 28; | |
256 | const unsigned CTR_DIC = 1u << 29; | |
acd15fc2 | 257 | const uint64_t ctr_el0 = save_ctr_el0; |
bdd50dc7 RH |
258 | const uintptr_t icache_lsize = qemu_icache_linesize; |
259 | const uintptr_t dcache_lsize = qemu_dcache_linesize; | |
664a7973 RH |
260 | uintptr_t p; |
261 | ||
262 | /* | |
263 | * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification | |
264 | * is not required for instruction to data coherence. | |
265 | */ | |
266 | if (!(ctr_el0 & CTR_IDC)) { | |
267 | /* | |
268 | * Loop over the address range, clearing one cache line at once. | |
269 | * Data cache must be flushed to unification first to make sure | |
270 | * the instruction cache fetches the updated data. | |
271 | */ | |
272 | for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { | |
273 | asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); | |
274 | } | |
275 | asm volatile("dsb\tish" : : : "memory"); | |
276 | } | |
277 | ||
278 | /* | |
279 | * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point | |
280 | * of Unification is not required for instruction to data coherence. | |
281 | */ | |
282 | if (!(ctr_el0 & CTR_DIC)) { | |
283 | for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { | |
284 | asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); | |
285 | } | |
286 | asm volatile ("dsb\tish" : : : "memory"); | |
287 | } | |
288 | ||
289 | asm volatile("isb" : : : "memory"); | |
290 | } | |
291 | #endif /* CONFIG_DARWIN */ | |
292 | ||
084cfca1 RH |
293 | #elif defined(__mips__) |
294 | ||
295 | #ifdef __OpenBSD__ | |
296 | #include <machine/sysarch.h> | |
297 | #else | |
298 | #include <sys/cachectl.h> | |
299 | #endif | |
300 | ||
1da8de39 | 301 | void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) |
084cfca1 | 302 | { |
1da8de39 RH |
303 | if (rx != rw) { |
304 | cacheflush((void *)rw, len, DCACHE); | |
305 | } | |
306 | cacheflush((void *)rx, len, ICACHE); | |
084cfca1 RH |
307 | } |
308 | ||
309 | #elif defined(__powerpc__) | |
310 | ||
1da8de39 | 311 | void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) |
084cfca1 | 312 | { |
1da8de39 | 313 | uintptr_t p, b, e; |
c79a8e84 NP |
314 | size_t dsize, isize; |
315 | ||
316 | /* | |
317 | * Some processors have coherent caches and support a simplified | |
318 | * flushing procedure. See | |
319 | * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) | |
320 | * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k | |
321 | */ | |
322 | if (have_coherent_icache) { | |
323 | asm volatile ("sync\n\t" | |
324 | "icbi 0,%0\n\t" | |
325 | "isync" | |
326 | : : "r"(rx) : "memory"); | |
327 | return; | |
328 | } | |
329 | ||
330 | dsize = qemu_dcache_linesize; | |
331 | isize = qemu_icache_linesize; | |
084cfca1 | 332 | |
1da8de39 RH |
333 | b = rw & ~(dsize - 1); |
334 | e = (rw + len + dsize - 1) & ~(dsize - 1); | |
335 | for (p = b; p < e; p += dsize) { | |
084cfca1 RH |
336 | asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); |
337 | } | |
338 | asm volatile ("sync" : : : "memory"); | |
339 | ||
1da8de39 RH |
340 | b = rx & ~(isize - 1); |
341 | e = (rx + len + isize - 1) & ~(isize - 1); | |
342 | for (p = b; p < e; p += isize) { | |
084cfca1 RH |
343 | asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); |
344 | } | |
345 | asm volatile ("sync" : : : "memory"); | |
346 | asm volatile ("isync" : : : "memory"); | |
347 | } | |
348 | ||
349 | #elif defined(__sparc__) | |
350 | ||
1da8de39 | 351 | void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) |
084cfca1 | 352 | { |
1da8de39 RH |
353 | /* No additional data flush to the RW virtual address required. */ |
354 | uintptr_t p, end = (rx + len + 7) & -8; | |
355 | for (p = rx & -8; p < end; p += 8) { | |
084cfca1 RH |
356 | __asm__ __volatile__("flush\t%0" : : "r" (p)); |
357 | } | |
358 | } | |
359 | ||
360 | #else | |
361 | ||
1da8de39 | 362 | void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) |
084cfca1 | 363 | { |
1da8de39 RH |
364 | if (rw != rx) { |
365 | __builtin___clear_cache((char *)rw, (char *)rw + len); | |
366 | } | |
367 | __builtin___clear_cache((char *)rx, (char *)rx + len); | |
084cfca1 RH |
368 | } |
369 | ||
370 | #endif |