]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/cputlb.c
cputlb: Move body of cpu_ldst_template.h out of line
[mirror_qemu.git] / accel / tcg / cputlb.c
1 /*
2 * Common CPU TLB handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #include "trace-root.h"
38 #include "qemu/plugin.h"
39 #include "trace/mem.h"
40 #ifdef CONFIG_PLUGIN
41 #include "qemu/plugin-memory.h"
42 #endif
43
44 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
45 /* #define DEBUG_TLB */
46 /* #define DEBUG_TLB_LOG */
47
48 #ifdef DEBUG_TLB
49 # define DEBUG_TLB_GATE 1
50 # ifdef DEBUG_TLB_LOG
51 # define DEBUG_TLB_LOG_GATE 1
52 # else
53 # define DEBUG_TLB_LOG_GATE 0
54 # endif
55 #else
56 # define DEBUG_TLB_GATE 0
57 # define DEBUG_TLB_LOG_GATE 0
58 #endif
59
60 #define tlb_debug(fmt, ...) do { \
61 if (DEBUG_TLB_LOG_GATE) { \
62 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
63 ## __VA_ARGS__); \
64 } else if (DEBUG_TLB_GATE) { \
65 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
66 } \
67 } while (0)
68
69 #define assert_cpu_is_self(cpu) do { \
70 if (DEBUG_TLB_GATE) { \
71 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
72 } \
73 } while (0)
74
75 /* run_on_cpu_data.target_ptr should always be big enough for a
76 * target_ulong even on 32 bit builds */
77 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
78
79 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
80 */
81 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
82 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
83
84 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
85 {
86 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
87 }
88
89 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
90 size_t max_entries)
91 {
92 desc->window_begin_ns = ns;
93 desc->window_max_entries = max_entries;
94 }
95
96 static void tlb_dyn_init(CPUArchState *env)
97 {
98 int i;
99
100 for (i = 0; i < NB_MMU_MODES; i++) {
101 CPUTLBDesc *desc = &env_tlb(env)->d[i];
102 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
103
104 tlb_window_reset(desc, get_clock_realtime(), 0);
105 desc->n_used_entries = 0;
106 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
107 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
108 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
109 }
110 }
111
112 /**
113 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
114 * @env: CPU that owns the TLB
115 * @mmu_idx: MMU index of the TLB
116 *
117 * Called with tlb_lock_held.
118 *
119 * We have two main constraints when resizing a TLB: (1) we only resize it
120 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
121 * the array or unnecessarily flushing it), which means we do not control how
122 * frequently the resizing can occur; (2) we don't have access to the guest's
123 * future scheduling decisions, and therefore have to decide the magnitude of
124 * the resize based on past observations.
125 *
126 * In general, a memory-hungry process can benefit greatly from an appropriately
127 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
128 * we just have to make the TLB as large as possible; while an oversized TLB
129 * results in minimal TLB miss rates, it also takes longer to be flushed
130 * (flushes can be _very_ frequent), and the reduced locality can also hurt
131 * performance.
132 *
133 * To achieve near-optimal performance for all kinds of workloads, we:
134 *
135 * 1. Aggressively increase the size of the TLB when the use rate of the
136 * TLB being flushed is high, since it is likely that in the near future this
137 * memory-hungry process will execute again, and its memory hungriness will
138 * probably be similar.
139 *
140 * 2. Slowly reduce the size of the TLB as the use rate declines over a
141 * reasonably large time window. The rationale is that if in such a time window
142 * we have not observed a high TLB use rate, it is likely that we won't observe
143 * it in the near future. In that case, once a time window expires we downsize
144 * the TLB to match the maximum use rate observed in the window.
145 *
146 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
147 * since in that range performance is likely near-optimal. Recall that the TLB
148 * is direct mapped, so we want the use rate to be low (or at least not too
149 * high), since otherwise we are likely to have a significant amount of
150 * conflict misses.
151 */
152 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
153 {
154 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
155 size_t old_size = tlb_n_entries(env, mmu_idx);
156 size_t rate;
157 size_t new_size = old_size;
158 int64_t now = get_clock_realtime();
159 int64_t window_len_ms = 100;
160 int64_t window_len_ns = window_len_ms * 1000 * 1000;
161 bool window_expired = now > desc->window_begin_ns + window_len_ns;
162
163 if (desc->n_used_entries > desc->window_max_entries) {
164 desc->window_max_entries = desc->n_used_entries;
165 }
166 rate = desc->window_max_entries * 100 / old_size;
167
168 if (rate > 70) {
169 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
170 } else if (rate < 30 && window_expired) {
171 size_t ceil = pow2ceil(desc->window_max_entries);
172 size_t expected_rate = desc->window_max_entries * 100 / ceil;
173
174 /*
175 * Avoid undersizing when the max number of entries seen is just below
176 * a pow2. For instance, if max_entries == 1025, the expected use rate
177 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
178 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
179 * later. Thus, make sure that the expected use rate remains below 70%.
180 * (and since we double the size, that means the lowest rate we'd
181 * expect to get is 35%, which is still in the 30-70% range where
182 * we consider that the size is appropriate.)
183 */
184 if (expected_rate > 70) {
185 ceil *= 2;
186 }
187 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
188 }
189
190 if (new_size == old_size) {
191 if (window_expired) {
192 tlb_window_reset(desc, now, desc->n_used_entries);
193 }
194 return;
195 }
196
197 g_free(env_tlb(env)->f[mmu_idx].table);
198 g_free(env_tlb(env)->d[mmu_idx].iotlb);
199
200 tlb_window_reset(desc, now, 0);
201 /* desc->n_used_entries is cleared by the caller */
202 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
203 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
204 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
205 /*
206 * If the allocations fail, try smaller sizes. We just freed some
207 * memory, so going back to half of new_size has a good chance of working.
208 * Increased memory pressure elsewhere in the system might cause the
209 * allocations to fail though, so we progressively reduce the allocation
210 * size, aborting if we cannot even allocate the smallest TLB we support.
211 */
212 while (env_tlb(env)->f[mmu_idx].table == NULL ||
213 env_tlb(env)->d[mmu_idx].iotlb == NULL) {
214 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
215 error_report("%s: %s", __func__, strerror(errno));
216 abort();
217 }
218 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
219 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
220
221 g_free(env_tlb(env)->f[mmu_idx].table);
222 g_free(env_tlb(env)->d[mmu_idx].iotlb);
223 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
224 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
225 }
226 }
227
228 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
229 {
230 tlb_mmu_resize_locked(env, mmu_idx);
231 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
232 env_tlb(env)->d[mmu_idx].n_used_entries = 0;
233 }
234
235 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
236 {
237 env_tlb(env)->d[mmu_idx].n_used_entries++;
238 }
239
240 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
241 {
242 env_tlb(env)->d[mmu_idx].n_used_entries--;
243 }
244
245 void tlb_init(CPUState *cpu)
246 {
247 CPUArchState *env = cpu->env_ptr;
248
249 qemu_spin_init(&env_tlb(env)->c.lock);
250
251 /* Ensure that cpu_reset performs a full flush. */
252 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
253
254 tlb_dyn_init(env);
255 }
256
257 /* flush_all_helper: run fn across all cpus
258 *
259 * If the wait flag is set then the src cpu's helper will be queued as
260 * "safe" work and the loop exited creating a synchronisation point
261 * where all queued work will be finished before execution starts
262 * again.
263 */
264 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
265 run_on_cpu_data d)
266 {
267 CPUState *cpu;
268
269 CPU_FOREACH(cpu) {
270 if (cpu != src) {
271 async_run_on_cpu(cpu, fn, d);
272 }
273 }
274 }
275
276 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
277 {
278 CPUState *cpu;
279 size_t full = 0, part = 0, elide = 0;
280
281 CPU_FOREACH(cpu) {
282 CPUArchState *env = cpu->env_ptr;
283
284 full += atomic_read(&env_tlb(env)->c.full_flush_count);
285 part += atomic_read(&env_tlb(env)->c.part_flush_count);
286 elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
287 }
288 *pfull = full;
289 *ppart = part;
290 *pelide = elide;
291 }
292
293 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
294 {
295 tlb_table_flush_by_mmuidx(env, mmu_idx);
296 env_tlb(env)->d[mmu_idx].large_page_addr = -1;
297 env_tlb(env)->d[mmu_idx].large_page_mask = -1;
298 env_tlb(env)->d[mmu_idx].vindex = 0;
299 memset(env_tlb(env)->d[mmu_idx].vtable, -1,
300 sizeof(env_tlb(env)->d[0].vtable));
301 }
302
303 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
304 {
305 CPUArchState *env = cpu->env_ptr;
306 uint16_t asked = data.host_int;
307 uint16_t all_dirty, work, to_clean;
308
309 assert_cpu_is_self(cpu);
310
311 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
312
313 qemu_spin_lock(&env_tlb(env)->c.lock);
314
315 all_dirty = env_tlb(env)->c.dirty;
316 to_clean = asked & all_dirty;
317 all_dirty &= ~to_clean;
318 env_tlb(env)->c.dirty = all_dirty;
319
320 for (work = to_clean; work != 0; work &= work - 1) {
321 int mmu_idx = ctz32(work);
322 tlb_flush_one_mmuidx_locked(env, mmu_idx);
323 }
324
325 qemu_spin_unlock(&env_tlb(env)->c.lock);
326
327 cpu_tb_jmp_cache_clear(cpu);
328
329 if (to_clean == ALL_MMUIDX_BITS) {
330 atomic_set(&env_tlb(env)->c.full_flush_count,
331 env_tlb(env)->c.full_flush_count + 1);
332 } else {
333 atomic_set(&env_tlb(env)->c.part_flush_count,
334 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
335 if (to_clean != asked) {
336 atomic_set(&env_tlb(env)->c.elide_flush_count,
337 env_tlb(env)->c.elide_flush_count +
338 ctpop16(asked & ~to_clean));
339 }
340 }
341 }
342
343 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
344 {
345 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
346
347 if (cpu->created && !qemu_cpu_is_self(cpu)) {
348 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
349 RUN_ON_CPU_HOST_INT(idxmap));
350 } else {
351 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
352 }
353 }
354
355 void tlb_flush(CPUState *cpu)
356 {
357 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
358 }
359
360 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
361 {
362 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
363
364 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
365
366 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
367 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
368 }
369
370 void tlb_flush_all_cpus(CPUState *src_cpu)
371 {
372 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
373 }
374
375 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
376 {
377 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
378
379 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
380
381 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
382 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
383 }
384
385 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
386 {
387 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
388 }
389
390 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
391 target_ulong page)
392 {
393 return tlb_hit_page(tlb_entry->addr_read, page) ||
394 tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
395 tlb_hit_page(tlb_entry->addr_code, page);
396 }
397
398 /**
399 * tlb_entry_is_empty - return true if the entry is not in use
400 * @te: pointer to CPUTLBEntry
401 */
402 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
403 {
404 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
405 }
406
407 /* Called with tlb_c.lock held */
408 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
409 target_ulong page)
410 {
411 if (tlb_hit_page_anyprot(tlb_entry, page)) {
412 memset(tlb_entry, -1, sizeof(*tlb_entry));
413 return true;
414 }
415 return false;
416 }
417
418 /* Called with tlb_c.lock held */
419 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
420 target_ulong page)
421 {
422 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
423 int k;
424
425 assert_cpu_is_self(env_cpu(env));
426 for (k = 0; k < CPU_VTLB_SIZE; k++) {
427 if (tlb_flush_entry_locked(&d->vtable[k], page)) {
428 tlb_n_used_entries_dec(env, mmu_idx);
429 }
430 }
431 }
432
433 static void tlb_flush_page_locked(CPUArchState *env, int midx,
434 target_ulong page)
435 {
436 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
437 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
438
439 /* Check if we need to flush due to large pages. */
440 if ((page & lp_mask) == lp_addr) {
441 tlb_debug("forcing full flush midx %d ("
442 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
443 midx, lp_addr, lp_mask);
444 tlb_flush_one_mmuidx_locked(env, midx);
445 } else {
446 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
447 tlb_n_used_entries_dec(env, midx);
448 }
449 tlb_flush_vtlb_page_locked(env, midx, page);
450 }
451 }
452
453 /* As we are going to hijack the bottom bits of the page address for a
454 * mmuidx bit mask we need to fail to build if we can't do that
455 */
456 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
457
458 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
459 run_on_cpu_data data)
460 {
461 CPUArchState *env = cpu->env_ptr;
462 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
463 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
464 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
465 int mmu_idx;
466
467 assert_cpu_is_self(cpu);
468
469 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
470 addr, mmu_idx_bitmap);
471
472 qemu_spin_lock(&env_tlb(env)->c.lock);
473 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
474 if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
475 tlb_flush_page_locked(env, mmu_idx, addr);
476 }
477 }
478 qemu_spin_unlock(&env_tlb(env)->c.lock);
479
480 tb_flush_jmp_cache(cpu, addr);
481 }
482
483 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
484 {
485 target_ulong addr_and_mmu_idx;
486
487 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
488
489 /* This should already be page aligned */
490 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
491 addr_and_mmu_idx |= idxmap;
492
493 if (!qemu_cpu_is_self(cpu)) {
494 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
495 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
496 } else {
497 tlb_flush_page_by_mmuidx_async_work(
498 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
499 }
500 }
501
502 void tlb_flush_page(CPUState *cpu, target_ulong addr)
503 {
504 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
505 }
506
507 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
508 uint16_t idxmap)
509 {
510 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
511 target_ulong addr_and_mmu_idx;
512
513 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
514
515 /* This should already be page aligned */
516 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
517 addr_and_mmu_idx |= idxmap;
518
519 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
520 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
521 }
522
523 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
524 {
525 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
526 }
527
528 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
529 target_ulong addr,
530 uint16_t idxmap)
531 {
532 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
533 target_ulong addr_and_mmu_idx;
534
535 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
536
537 /* This should already be page aligned */
538 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
539 addr_and_mmu_idx |= idxmap;
540
541 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
542 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
543 }
544
545 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
546 {
547 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
548 }
549
550 /* update the TLBs so that writes to code in the virtual page 'addr'
551 can be detected */
552 void tlb_protect_code(ram_addr_t ram_addr)
553 {
554 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
555 DIRTY_MEMORY_CODE);
556 }
557
558 /* update the TLB so that writes in physical page 'phys_addr' are no longer
559 tested for self modifying code */
560 void tlb_unprotect_code(ram_addr_t ram_addr)
561 {
562 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
563 }
564
565
566 /*
567 * Dirty write flag handling
568 *
569 * When the TCG code writes to a location it looks up the address in
570 * the TLB and uses that data to compute the final address. If any of
571 * the lower bits of the address are set then the slow path is forced.
572 * There are a number of reasons to do this but for normal RAM the
573 * most usual is detecting writes to code regions which may invalidate
574 * generated code.
575 *
576 * Other vCPUs might be reading their TLBs during guest execution, so we update
577 * te->addr_write with atomic_set. We don't need to worry about this for
578 * oversized guests as MTTCG is disabled for them.
579 *
580 * Called with tlb_c.lock held.
581 */
582 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
583 uintptr_t start, uintptr_t length)
584 {
585 uintptr_t addr = tlb_entry->addr_write;
586
587 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
588 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
589 addr &= TARGET_PAGE_MASK;
590 addr += tlb_entry->addend;
591 if ((addr - start) < length) {
592 #if TCG_OVERSIZED_GUEST
593 tlb_entry->addr_write |= TLB_NOTDIRTY;
594 #else
595 atomic_set(&tlb_entry->addr_write,
596 tlb_entry->addr_write | TLB_NOTDIRTY);
597 #endif
598 }
599 }
600 }
601
602 /*
603 * Called with tlb_c.lock held.
604 * Called only from the vCPU context, i.e. the TLB's owner thread.
605 */
606 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
607 {
608 *d = *s;
609 }
610
611 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
612 * the target vCPU).
613 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
614 * thing actually updated is the target TLB entry ->addr_write flags.
615 */
616 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
617 {
618 CPUArchState *env;
619
620 int mmu_idx;
621
622 env = cpu->env_ptr;
623 qemu_spin_lock(&env_tlb(env)->c.lock);
624 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
625 unsigned int i;
626 unsigned int n = tlb_n_entries(env, mmu_idx);
627
628 for (i = 0; i < n; i++) {
629 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
630 start1, length);
631 }
632
633 for (i = 0; i < CPU_VTLB_SIZE; i++) {
634 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
635 start1, length);
636 }
637 }
638 qemu_spin_unlock(&env_tlb(env)->c.lock);
639 }
640
641 /* Called with tlb_c.lock held */
642 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
643 target_ulong vaddr)
644 {
645 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
646 tlb_entry->addr_write = vaddr;
647 }
648 }
649
650 /* update the TLB corresponding to virtual page vaddr
651 so that it is no longer dirty */
652 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
653 {
654 CPUArchState *env = cpu->env_ptr;
655 int mmu_idx;
656
657 assert_cpu_is_self(cpu);
658
659 vaddr &= TARGET_PAGE_MASK;
660 qemu_spin_lock(&env_tlb(env)->c.lock);
661 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
662 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
663 }
664
665 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
666 int k;
667 for (k = 0; k < CPU_VTLB_SIZE; k++) {
668 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
669 }
670 }
671 qemu_spin_unlock(&env_tlb(env)->c.lock);
672 }
673
674 /* Our TLB does not support large pages, so remember the area covered by
675 large pages and trigger a full TLB flush if these are invalidated. */
676 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
677 target_ulong vaddr, target_ulong size)
678 {
679 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
680 target_ulong lp_mask = ~(size - 1);
681
682 if (lp_addr == (target_ulong)-1) {
683 /* No previous large page. */
684 lp_addr = vaddr;
685 } else {
686 /* Extend the existing region to include the new page.
687 This is a compromise between unnecessary flushes and
688 the cost of maintaining a full variable size TLB. */
689 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
690 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
691 lp_mask <<= 1;
692 }
693 }
694 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
695 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
696 }
697
698 /* Add a new TLB entry. At most one entry for a given virtual address
699 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
700 * supplied size is only used by tlb_flush_page.
701 *
702 * Called from TCG-generated code, which is under an RCU read-side
703 * critical section.
704 */
705 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
706 hwaddr paddr, MemTxAttrs attrs, int prot,
707 int mmu_idx, target_ulong size)
708 {
709 CPUArchState *env = cpu->env_ptr;
710 CPUTLB *tlb = env_tlb(env);
711 CPUTLBDesc *desc = &tlb->d[mmu_idx];
712 MemoryRegionSection *section;
713 unsigned int index;
714 target_ulong address;
715 target_ulong write_address;
716 uintptr_t addend;
717 CPUTLBEntry *te, tn;
718 hwaddr iotlb, xlat, sz, paddr_page;
719 target_ulong vaddr_page;
720 int asidx = cpu_asidx_from_attrs(cpu, attrs);
721 int wp_flags;
722 bool is_ram, is_romd;
723
724 assert_cpu_is_self(cpu);
725
726 if (size <= TARGET_PAGE_SIZE) {
727 sz = TARGET_PAGE_SIZE;
728 } else {
729 tlb_add_large_page(env, mmu_idx, vaddr, size);
730 sz = size;
731 }
732 vaddr_page = vaddr & TARGET_PAGE_MASK;
733 paddr_page = paddr & TARGET_PAGE_MASK;
734
735 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
736 &xlat, &sz, attrs, &prot);
737 assert(sz >= TARGET_PAGE_SIZE);
738
739 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
740 " prot=%x idx=%d\n",
741 vaddr, paddr, prot, mmu_idx);
742
743 address = vaddr_page;
744 if (size < TARGET_PAGE_SIZE) {
745 /* Repeat the MMU check and TLB fill on every access. */
746 address |= TLB_INVALID_MASK;
747 }
748 if (attrs.byte_swap) {
749 address |= TLB_BSWAP;
750 }
751
752 is_ram = memory_region_is_ram(section->mr);
753 is_romd = memory_region_is_romd(section->mr);
754
755 if (is_ram || is_romd) {
756 /* RAM and ROMD both have associated host memory. */
757 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
758 } else {
759 /* I/O does not; force the host address to NULL. */
760 addend = 0;
761 }
762
763 write_address = address;
764 if (is_ram) {
765 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
766 /*
767 * Computing is_clean is expensive; avoid all that unless
768 * the page is actually writable.
769 */
770 if (prot & PAGE_WRITE) {
771 if (section->readonly) {
772 write_address |= TLB_DISCARD_WRITE;
773 } else if (cpu_physical_memory_is_clean(iotlb)) {
774 write_address |= TLB_NOTDIRTY;
775 }
776 }
777 } else {
778 /* I/O or ROMD */
779 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
780 /*
781 * Writes to romd devices must go through MMIO to enable write.
782 * Reads to romd devices go through the ram_ptr found above,
783 * but of course reads to I/O must go through MMIO.
784 */
785 write_address |= TLB_MMIO;
786 if (!is_romd) {
787 address = write_address;
788 }
789 }
790
791 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
792 TARGET_PAGE_SIZE);
793
794 index = tlb_index(env, mmu_idx, vaddr_page);
795 te = tlb_entry(env, mmu_idx, vaddr_page);
796
797 /*
798 * Hold the TLB lock for the rest of the function. We could acquire/release
799 * the lock several times in the function, but it is faster to amortize the
800 * acquisition cost by acquiring it just once. Note that this leads to
801 * a longer critical section, but this is not a concern since the TLB lock
802 * is unlikely to be contended.
803 */
804 qemu_spin_lock(&tlb->c.lock);
805
806 /* Note that the tlb is no longer clean. */
807 tlb->c.dirty |= 1 << mmu_idx;
808
809 /* Make sure there's no cached translation for the new page. */
810 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
811
812 /*
813 * Only evict the old entry to the victim tlb if it's for a
814 * different page; otherwise just overwrite the stale data.
815 */
816 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
817 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
818 CPUTLBEntry *tv = &desc->vtable[vidx];
819
820 /* Evict the old entry into the victim tlb. */
821 copy_tlb_helper_locked(tv, te);
822 desc->viotlb[vidx] = desc->iotlb[index];
823 tlb_n_used_entries_dec(env, mmu_idx);
824 }
825
826 /* refill the tlb */
827 /*
828 * At this point iotlb contains a physical section number in the lower
829 * TARGET_PAGE_BITS, and either
830 * + the ram_addr_t of the page base of the target RAM (RAM)
831 * + the offset within section->mr of the page base (I/O, ROMD)
832 * We subtract the vaddr_page (which is page aligned and thus won't
833 * disturb the low bits) to give an offset which can be added to the
834 * (non-page-aligned) vaddr of the eventual memory access to get
835 * the MemoryRegion offset for the access. Note that the vaddr we
836 * subtract here is that of the page base, and not the same as the
837 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
838 */
839 desc->iotlb[index].addr = iotlb - vaddr_page;
840 desc->iotlb[index].attrs = attrs;
841
842 /* Now calculate the new entry */
843 tn.addend = addend - vaddr_page;
844 if (prot & PAGE_READ) {
845 tn.addr_read = address;
846 if (wp_flags & BP_MEM_READ) {
847 tn.addr_read |= TLB_WATCHPOINT;
848 }
849 } else {
850 tn.addr_read = -1;
851 }
852
853 if (prot & PAGE_EXEC) {
854 tn.addr_code = address;
855 } else {
856 tn.addr_code = -1;
857 }
858
859 tn.addr_write = -1;
860 if (prot & PAGE_WRITE) {
861 tn.addr_write = write_address;
862 if (prot & PAGE_WRITE_INV) {
863 tn.addr_write |= TLB_INVALID_MASK;
864 }
865 if (wp_flags & BP_MEM_WRITE) {
866 tn.addr_write |= TLB_WATCHPOINT;
867 }
868 }
869
870 copy_tlb_helper_locked(te, &tn);
871 tlb_n_used_entries_inc(env, mmu_idx);
872 qemu_spin_unlock(&tlb->c.lock);
873 }
874
875 /* Add a new TLB entry, but without specifying the memory
876 * transaction attributes to be used.
877 */
878 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
879 hwaddr paddr, int prot,
880 int mmu_idx, target_ulong size)
881 {
882 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
883 prot, mmu_idx, size);
884 }
885
886 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
887 {
888 ram_addr_t ram_addr;
889
890 ram_addr = qemu_ram_addr_from_host(ptr);
891 if (ram_addr == RAM_ADDR_INVALID) {
892 error_report("Bad ram pointer %p", ptr);
893 abort();
894 }
895 return ram_addr;
896 }
897
898 /*
899 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
900 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
901 * be discarded and looked up again (e.g. via tlb_entry()).
902 */
903 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
904 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
905 {
906 CPUClass *cc = CPU_GET_CLASS(cpu);
907 bool ok;
908
909 /*
910 * This is not a probe, so only valid return is success; failure
911 * should result in exception + longjmp to the cpu loop.
912 */
913 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
914 assert(ok);
915 }
916
917 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
918 int mmu_idx, target_ulong addr, uintptr_t retaddr,
919 MMUAccessType access_type, MemOp op)
920 {
921 CPUState *cpu = env_cpu(env);
922 hwaddr mr_offset;
923 MemoryRegionSection *section;
924 MemoryRegion *mr;
925 uint64_t val;
926 bool locked = false;
927 MemTxResult r;
928
929 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
930 mr = section->mr;
931 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
932 cpu->mem_io_pc = retaddr;
933 if (!cpu->can_do_io) {
934 cpu_io_recompile(cpu, retaddr);
935 }
936
937 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
938 qemu_mutex_lock_iothread();
939 locked = true;
940 }
941 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
942 if (r != MEMTX_OK) {
943 hwaddr physaddr = mr_offset +
944 section->offset_within_address_space -
945 section->offset_within_region;
946
947 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
948 mmu_idx, iotlbentry->attrs, r, retaddr);
949 }
950 if (locked) {
951 qemu_mutex_unlock_iothread();
952 }
953
954 return val;
955 }
956
957 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
958 int mmu_idx, uint64_t val, target_ulong addr,
959 uintptr_t retaddr, MemOp op)
960 {
961 CPUState *cpu = env_cpu(env);
962 hwaddr mr_offset;
963 MemoryRegionSection *section;
964 MemoryRegion *mr;
965 bool locked = false;
966 MemTxResult r;
967
968 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
969 mr = section->mr;
970 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
971 if (!cpu->can_do_io) {
972 cpu_io_recompile(cpu, retaddr);
973 }
974 cpu->mem_io_pc = retaddr;
975
976 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
977 qemu_mutex_lock_iothread();
978 locked = true;
979 }
980 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
981 if (r != MEMTX_OK) {
982 hwaddr physaddr = mr_offset +
983 section->offset_within_address_space -
984 section->offset_within_region;
985
986 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
987 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
988 retaddr);
989 }
990 if (locked) {
991 qemu_mutex_unlock_iothread();
992 }
993 }
994
995 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
996 {
997 #if TCG_OVERSIZED_GUEST
998 return *(target_ulong *)((uintptr_t)entry + ofs);
999 #else
1000 /* ofs might correspond to .addr_write, so use atomic_read */
1001 return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1002 #endif
1003 }
1004
1005 /* Return true if ADDR is present in the victim tlb, and has been copied
1006 back to the main tlb. */
1007 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1008 size_t elt_ofs, target_ulong page)
1009 {
1010 size_t vidx;
1011
1012 assert_cpu_is_self(env_cpu(env));
1013 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1014 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1015 target_ulong cmp;
1016
1017 /* elt_ofs might correspond to .addr_write, so use atomic_read */
1018 #if TCG_OVERSIZED_GUEST
1019 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1020 #else
1021 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1022 #endif
1023
1024 if (cmp == page) {
1025 /* Found entry in victim tlb, swap tlb and iotlb. */
1026 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1027
1028 qemu_spin_lock(&env_tlb(env)->c.lock);
1029 copy_tlb_helper_locked(&tmptlb, tlb);
1030 copy_tlb_helper_locked(tlb, vtlb);
1031 copy_tlb_helper_locked(vtlb, &tmptlb);
1032 qemu_spin_unlock(&env_tlb(env)->c.lock);
1033
1034 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1035 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1036 tmpio = *io; *io = *vio; *vio = tmpio;
1037 return true;
1038 }
1039 }
1040 return false;
1041 }
1042
1043 /* Macro to call the above, with local variables from the use context. */
1044 #define VICTIM_TLB_HIT(TY, ADDR) \
1045 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1046 (ADDR) & TARGET_PAGE_MASK)
1047
1048 /*
1049 * Return a ram_addr_t for the virtual address for execution.
1050 *
1051 * Return -1 if we can't translate and execute from an entire page
1052 * of RAM. This will force us to execute by loading and translating
1053 * one insn at a time, without caching.
1054 *
1055 * NOTE: This function will trigger an exception if the page is
1056 * not executable.
1057 */
1058 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1059 void **hostp)
1060 {
1061 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1062 uintptr_t index = tlb_index(env, mmu_idx, addr);
1063 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1064 void *p;
1065
1066 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1067 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1068 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1069 index = tlb_index(env, mmu_idx, addr);
1070 entry = tlb_entry(env, mmu_idx, addr);
1071
1072 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1073 /*
1074 * The MMU protection covers a smaller range than a target
1075 * page, so we must redo the MMU check for every insn.
1076 */
1077 return -1;
1078 }
1079 }
1080 assert(tlb_hit(entry->addr_code, addr));
1081 }
1082
1083 if (unlikely(entry->addr_code & TLB_MMIO)) {
1084 /* The region is not backed by RAM. */
1085 if (hostp) {
1086 *hostp = NULL;
1087 }
1088 return -1;
1089 }
1090
1091 p = (void *)((uintptr_t)addr + entry->addend);
1092 if (hostp) {
1093 *hostp = p;
1094 }
1095 return qemu_ram_addr_from_host_nofail(p);
1096 }
1097
1098 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1099 {
1100 return get_page_addr_code_hostp(env, addr, NULL);
1101 }
1102
1103 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1104 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1105 {
1106 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1107
1108 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1109
1110 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1111 struct page_collection *pages
1112 = page_collection_lock(ram_addr, ram_addr + size);
1113 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1114 page_collection_unlock(pages);
1115 }
1116
1117 /*
1118 * Set both VGA and migration bits for simplicity and to remove
1119 * the notdirty callback faster.
1120 */
1121 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1122
1123 /* We remove the notdirty callback only if the code has been flushed. */
1124 if (!cpu_physical_memory_is_clean(ram_addr)) {
1125 trace_memory_notdirty_set_dirty(mem_vaddr);
1126 tlb_set_dirty(cpu, mem_vaddr);
1127 }
1128 }
1129
1130 /*
1131 * Probe for whether the specified guest access is permitted. If it is not
1132 * permitted then an exception will be taken in the same way as if this
1133 * were a real access (and we will not return).
1134 * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1135 * returns the address of the host page similar to tlb_vaddr_to_host().
1136 */
1137 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1138 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1139 {
1140 uintptr_t index = tlb_index(env, mmu_idx, addr);
1141 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1142 target_ulong tlb_addr;
1143 size_t elt_ofs;
1144 int wp_access;
1145
1146 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1147
1148 switch (access_type) {
1149 case MMU_DATA_LOAD:
1150 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1151 wp_access = BP_MEM_READ;
1152 break;
1153 case MMU_DATA_STORE:
1154 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1155 wp_access = BP_MEM_WRITE;
1156 break;
1157 case MMU_INST_FETCH:
1158 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1159 wp_access = BP_MEM_READ;
1160 break;
1161 default:
1162 g_assert_not_reached();
1163 }
1164 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1165
1166 if (unlikely(!tlb_hit(tlb_addr, addr))) {
1167 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1168 addr & TARGET_PAGE_MASK)) {
1169 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1170 /* TLB resize via tlb_fill may have moved the entry. */
1171 index = tlb_index(env, mmu_idx, addr);
1172 entry = tlb_entry(env, mmu_idx, addr);
1173 }
1174 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1175 }
1176
1177 if (!size) {
1178 return NULL;
1179 }
1180
1181 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1182 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1183
1184 /* Reject I/O access, or other required slow-path. */
1185 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1186 return NULL;
1187 }
1188
1189 /* Handle watchpoints. */
1190 if (tlb_addr & TLB_WATCHPOINT) {
1191 cpu_check_watchpoint(env_cpu(env), addr, size,
1192 iotlbentry->attrs, wp_access, retaddr);
1193 }
1194
1195 /* Handle clean RAM pages. */
1196 if (tlb_addr & TLB_NOTDIRTY) {
1197 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1198 }
1199 }
1200
1201 return (void *)((uintptr_t)addr + entry->addend);
1202 }
1203
1204 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1205 MMUAccessType access_type, int mmu_idx)
1206 {
1207 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1208 target_ulong tlb_addr, page;
1209 size_t elt_ofs;
1210
1211 switch (access_type) {
1212 case MMU_DATA_LOAD:
1213 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1214 break;
1215 case MMU_DATA_STORE:
1216 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1217 break;
1218 case MMU_INST_FETCH:
1219 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1220 break;
1221 default:
1222 g_assert_not_reached();
1223 }
1224
1225 page = addr & TARGET_PAGE_MASK;
1226 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1227
1228 if (!tlb_hit_page(tlb_addr, page)) {
1229 uintptr_t index = tlb_index(env, mmu_idx, addr);
1230
1231 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1232 CPUState *cs = env_cpu(env);
1233 CPUClass *cc = CPU_GET_CLASS(cs);
1234
1235 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1236 /* Non-faulting page table read failed. */
1237 return NULL;
1238 }
1239
1240 /* TLB resize via tlb_fill may have moved the entry. */
1241 entry = tlb_entry(env, mmu_idx, addr);
1242 }
1243 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1244 }
1245
1246 if (tlb_addr & ~TARGET_PAGE_MASK) {
1247 /* IO access */
1248 return NULL;
1249 }
1250
1251 return (void *)((uintptr_t)addr + entry->addend);
1252 }
1253
1254
1255 #ifdef CONFIG_PLUGIN
1256 /*
1257 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1258 * This should be a hot path as we will have just looked this path up
1259 * in the softmmu lookup code (or helper). We don't handle re-fills or
1260 * checking the victim table. This is purely informational.
1261 *
1262 * This should never fail as the memory access being instrumented
1263 * should have just filled the TLB.
1264 */
1265
1266 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1267 bool is_store, struct qemu_plugin_hwaddr *data)
1268 {
1269 CPUArchState *env = cpu->env_ptr;
1270 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1271 uintptr_t index = tlb_index(env, mmu_idx, addr);
1272 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1273
1274 if (likely(tlb_hit(tlb_addr, addr))) {
1275 /* We must have an iotlb entry for MMIO */
1276 if (tlb_addr & TLB_MMIO) {
1277 CPUIOTLBEntry *iotlbentry;
1278 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1279 data->is_io = true;
1280 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1281 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1282 } else {
1283 data->is_io = false;
1284 data->v.ram.hostaddr = addr + tlbe->addend;
1285 }
1286 return true;
1287 }
1288 return false;
1289 }
1290
1291 #endif
1292
1293 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1294 * operations, or io operations to proceed. Return the host address. */
1295 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1296 TCGMemOpIdx oi, uintptr_t retaddr)
1297 {
1298 size_t mmu_idx = get_mmuidx(oi);
1299 uintptr_t index = tlb_index(env, mmu_idx, addr);
1300 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1301 target_ulong tlb_addr = tlb_addr_write(tlbe);
1302 MemOp mop = get_memop(oi);
1303 int a_bits = get_alignment_bits(mop);
1304 int s_bits = mop & MO_SIZE;
1305 void *hostaddr;
1306
1307 /* Adjust the given return address. */
1308 retaddr -= GETPC_ADJ;
1309
1310 /* Enforce guest required alignment. */
1311 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1312 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1313 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1314 mmu_idx, retaddr);
1315 }
1316
1317 /* Enforce qemu required alignment. */
1318 if (unlikely(addr & ((1 << s_bits) - 1))) {
1319 /* We get here if guest alignment was not requested,
1320 or was not enforced by cpu_unaligned_access above.
1321 We might widen the access and emulate, but for now
1322 mark an exception and exit the cpu loop. */
1323 goto stop_the_world;
1324 }
1325
1326 /* Check TLB entry and enforce page permissions. */
1327 if (!tlb_hit(tlb_addr, addr)) {
1328 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1329 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1330 mmu_idx, retaddr);
1331 index = tlb_index(env, mmu_idx, addr);
1332 tlbe = tlb_entry(env, mmu_idx, addr);
1333 }
1334 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1335 }
1336
1337 /* Notice an IO access or a needs-MMU-lookup access */
1338 if (unlikely(tlb_addr & TLB_MMIO)) {
1339 /* There's really nothing that can be done to
1340 support this apart from stop-the-world. */
1341 goto stop_the_world;
1342 }
1343
1344 /* Let the guest notice RMW on a write-only page. */
1345 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1346 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1347 mmu_idx, retaddr);
1348 /* Since we don't support reads and writes to different addresses,
1349 and we do have the proper page loaded for write, this shouldn't
1350 ever return. But just in case, handle via stop-the-world. */
1351 goto stop_the_world;
1352 }
1353
1354 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1355
1356 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1357 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1358 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1359 }
1360
1361 return hostaddr;
1362
1363 stop_the_world:
1364 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1365 }
1366
1367 /*
1368 * Load Helpers
1369 *
1370 * We support two different access types. SOFTMMU_CODE_ACCESS is
1371 * specifically for reading instructions from system memory. It is
1372 * called by the translation loop and in some helpers where the code
1373 * is disassembled. It shouldn't be called directly by guest code.
1374 */
1375
1376 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1377 TCGMemOpIdx oi, uintptr_t retaddr);
1378
1379 static inline uint64_t QEMU_ALWAYS_INLINE
1380 load_memop(const void *haddr, MemOp op)
1381 {
1382 switch (op) {
1383 case MO_UB:
1384 return ldub_p(haddr);
1385 case MO_BEUW:
1386 return lduw_be_p(haddr);
1387 case MO_LEUW:
1388 return lduw_le_p(haddr);
1389 case MO_BEUL:
1390 return (uint32_t)ldl_be_p(haddr);
1391 case MO_LEUL:
1392 return (uint32_t)ldl_le_p(haddr);
1393 case MO_BEQ:
1394 return ldq_be_p(haddr);
1395 case MO_LEQ:
1396 return ldq_le_p(haddr);
1397 default:
1398 qemu_build_not_reached();
1399 }
1400 }
1401
1402 static inline uint64_t QEMU_ALWAYS_INLINE
1403 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1404 uintptr_t retaddr, MemOp op, bool code_read,
1405 FullLoadHelper *full_load)
1406 {
1407 uintptr_t mmu_idx = get_mmuidx(oi);
1408 uintptr_t index = tlb_index(env, mmu_idx, addr);
1409 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1410 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1411 const size_t tlb_off = code_read ?
1412 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1413 const MMUAccessType access_type =
1414 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1415 unsigned a_bits = get_alignment_bits(get_memop(oi));
1416 void *haddr;
1417 uint64_t res;
1418 size_t size = memop_size(op);
1419
1420 /* Handle CPU specific unaligned behaviour */
1421 if (addr & ((1 << a_bits) - 1)) {
1422 cpu_unaligned_access(env_cpu(env), addr, access_type,
1423 mmu_idx, retaddr);
1424 }
1425
1426 /* If the TLB entry is for a different page, reload and try again. */
1427 if (!tlb_hit(tlb_addr, addr)) {
1428 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1429 addr & TARGET_PAGE_MASK)) {
1430 tlb_fill(env_cpu(env), addr, size,
1431 access_type, mmu_idx, retaddr);
1432 index = tlb_index(env, mmu_idx, addr);
1433 entry = tlb_entry(env, mmu_idx, addr);
1434 }
1435 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1436 tlb_addr &= ~TLB_INVALID_MASK;
1437 }
1438
1439 /* Handle anything that isn't just a straight memory access. */
1440 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1441 CPUIOTLBEntry *iotlbentry;
1442 bool need_swap;
1443
1444 /* For anything that is unaligned, recurse through full_load. */
1445 if ((addr & (size - 1)) != 0) {
1446 goto do_unaligned_access;
1447 }
1448
1449 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1450
1451 /* Handle watchpoints. */
1452 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1453 /* On watchpoint hit, this will longjmp out. */
1454 cpu_check_watchpoint(env_cpu(env), addr, size,
1455 iotlbentry->attrs, BP_MEM_READ, retaddr);
1456 }
1457
1458 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1459
1460 /* Handle I/O access. */
1461 if (likely(tlb_addr & TLB_MMIO)) {
1462 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1463 access_type, op ^ (need_swap * MO_BSWAP));
1464 }
1465
1466 haddr = (void *)((uintptr_t)addr + entry->addend);
1467
1468 /*
1469 * Keep these two load_memop separate to ensure that the compiler
1470 * is able to fold the entire function to a single instruction.
1471 * There is a build-time assert inside to remind you of this. ;-)
1472 */
1473 if (unlikely(need_swap)) {
1474 return load_memop(haddr, op ^ MO_BSWAP);
1475 }
1476 return load_memop(haddr, op);
1477 }
1478
1479 /* Handle slow unaligned access (it spans two pages or IO). */
1480 if (size > 1
1481 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1482 >= TARGET_PAGE_SIZE)) {
1483 target_ulong addr1, addr2;
1484 uint64_t r1, r2;
1485 unsigned shift;
1486 do_unaligned_access:
1487 addr1 = addr & ~((target_ulong)size - 1);
1488 addr2 = addr1 + size;
1489 r1 = full_load(env, addr1, oi, retaddr);
1490 r2 = full_load(env, addr2, oi, retaddr);
1491 shift = (addr & (size - 1)) * 8;
1492
1493 if (memop_big_endian(op)) {
1494 /* Big-endian combine. */
1495 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1496 } else {
1497 /* Little-endian combine. */
1498 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1499 }
1500 return res & MAKE_64BIT_MASK(0, size * 8);
1501 }
1502
1503 haddr = (void *)((uintptr_t)addr + entry->addend);
1504 return load_memop(haddr, op);
1505 }
1506
1507 /*
1508 * For the benefit of TCG generated code, we want to avoid the
1509 * complication of ABI-specific return type promotion and always
1510 * return a value extended to the register size of the host. This is
1511 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1512 * data, and for that we always have uint64_t.
1513 *
1514 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1515 */
1516
1517 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1518 TCGMemOpIdx oi, uintptr_t retaddr)
1519 {
1520 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1521 }
1522
1523 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1524 TCGMemOpIdx oi, uintptr_t retaddr)
1525 {
1526 return full_ldub_mmu(env, addr, oi, retaddr);
1527 }
1528
1529 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1530 TCGMemOpIdx oi, uintptr_t retaddr)
1531 {
1532 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1533 full_le_lduw_mmu);
1534 }
1535
1536 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1537 TCGMemOpIdx oi, uintptr_t retaddr)
1538 {
1539 return full_le_lduw_mmu(env, addr, oi, retaddr);
1540 }
1541
1542 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1543 TCGMemOpIdx oi, uintptr_t retaddr)
1544 {
1545 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1546 full_be_lduw_mmu);
1547 }
1548
1549 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1550 TCGMemOpIdx oi, uintptr_t retaddr)
1551 {
1552 return full_be_lduw_mmu(env, addr, oi, retaddr);
1553 }
1554
1555 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1556 TCGMemOpIdx oi, uintptr_t retaddr)
1557 {
1558 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1559 full_le_ldul_mmu);
1560 }
1561
1562 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1563 TCGMemOpIdx oi, uintptr_t retaddr)
1564 {
1565 return full_le_ldul_mmu(env, addr, oi, retaddr);
1566 }
1567
1568 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1569 TCGMemOpIdx oi, uintptr_t retaddr)
1570 {
1571 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1572 full_be_ldul_mmu);
1573 }
1574
1575 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1576 TCGMemOpIdx oi, uintptr_t retaddr)
1577 {
1578 return full_be_ldul_mmu(env, addr, oi, retaddr);
1579 }
1580
1581 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1582 TCGMemOpIdx oi, uintptr_t retaddr)
1583 {
1584 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1585 helper_le_ldq_mmu);
1586 }
1587
1588 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1589 TCGMemOpIdx oi, uintptr_t retaddr)
1590 {
1591 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1592 helper_be_ldq_mmu);
1593 }
1594
1595 /*
1596 * Provide signed versions of the load routines as well. We can of course
1597 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1598 */
1599
1600
1601 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1602 TCGMemOpIdx oi, uintptr_t retaddr)
1603 {
1604 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1605 }
1606
1607 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1608 TCGMemOpIdx oi, uintptr_t retaddr)
1609 {
1610 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1611 }
1612
1613 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1614 TCGMemOpIdx oi, uintptr_t retaddr)
1615 {
1616 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1617 }
1618
1619 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1620 TCGMemOpIdx oi, uintptr_t retaddr)
1621 {
1622 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1623 }
1624
1625 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1626 TCGMemOpIdx oi, uintptr_t retaddr)
1627 {
1628 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1629 }
1630
1631 /*
1632 * Load helpers for cpu_ldst.h.
1633 */
1634
1635 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1636 int mmu_idx, uintptr_t retaddr,
1637 MemOp op, FullLoadHelper *full_load)
1638 {
1639 uint16_t meminfo;
1640 TCGMemOpIdx oi;
1641 uint64_t ret;
1642
1643 meminfo = trace_mem_get_info(op, mmu_idx, false);
1644 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1645
1646 op &= ~MO_SIGN;
1647 oi = make_memop_idx(op, mmu_idx);
1648 ret = full_load(env, addr, oi, retaddr);
1649
1650 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1651
1652 return ret;
1653 }
1654
1655 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1656 int mmu_idx, uintptr_t ra)
1657 {
1658 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1659 }
1660
1661 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1662 int mmu_idx, uintptr_t ra)
1663 {
1664 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1665 full_ldub_mmu);
1666 }
1667
1668 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1669 int mmu_idx, uintptr_t ra)
1670 {
1671 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW,
1672 MO_TE == MO_LE
1673 ? full_le_lduw_mmu : full_be_lduw_mmu);
1674 }
1675
1676 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1677 int mmu_idx, uintptr_t ra)
1678 {
1679 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW,
1680 MO_TE == MO_LE
1681 ? full_le_lduw_mmu : full_be_lduw_mmu);
1682 }
1683
1684 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1685 int mmu_idx, uintptr_t ra)
1686 {
1687 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL,
1688 MO_TE == MO_LE
1689 ? full_le_ldul_mmu : full_be_ldul_mmu);
1690 }
1691
1692 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1693 int mmu_idx, uintptr_t ra)
1694 {
1695 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ,
1696 MO_TE == MO_LE
1697 ? helper_le_ldq_mmu : helper_be_ldq_mmu);
1698 }
1699
1700 /*
1701 * Store Helpers
1702 */
1703
1704 static inline void QEMU_ALWAYS_INLINE
1705 store_memop(void *haddr, uint64_t val, MemOp op)
1706 {
1707 switch (op) {
1708 case MO_UB:
1709 stb_p(haddr, val);
1710 break;
1711 case MO_BEUW:
1712 stw_be_p(haddr, val);
1713 break;
1714 case MO_LEUW:
1715 stw_le_p(haddr, val);
1716 break;
1717 case MO_BEUL:
1718 stl_be_p(haddr, val);
1719 break;
1720 case MO_LEUL:
1721 stl_le_p(haddr, val);
1722 break;
1723 case MO_BEQ:
1724 stq_be_p(haddr, val);
1725 break;
1726 case MO_LEQ:
1727 stq_le_p(haddr, val);
1728 break;
1729 default:
1730 qemu_build_not_reached();
1731 }
1732 }
1733
1734 static inline void QEMU_ALWAYS_INLINE
1735 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1736 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1737 {
1738 uintptr_t mmu_idx = get_mmuidx(oi);
1739 uintptr_t index = tlb_index(env, mmu_idx, addr);
1740 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1741 target_ulong tlb_addr = tlb_addr_write(entry);
1742 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1743 unsigned a_bits = get_alignment_bits(get_memop(oi));
1744 void *haddr;
1745 size_t size = memop_size(op);
1746
1747 /* Handle CPU specific unaligned behaviour */
1748 if (addr & ((1 << a_bits) - 1)) {
1749 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1750 mmu_idx, retaddr);
1751 }
1752
1753 /* If the TLB entry is for a different page, reload and try again. */
1754 if (!tlb_hit(tlb_addr, addr)) {
1755 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1756 addr & TARGET_PAGE_MASK)) {
1757 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1758 mmu_idx, retaddr);
1759 index = tlb_index(env, mmu_idx, addr);
1760 entry = tlb_entry(env, mmu_idx, addr);
1761 }
1762 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1763 }
1764
1765 /* Handle anything that isn't just a straight memory access. */
1766 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1767 CPUIOTLBEntry *iotlbentry;
1768 bool need_swap;
1769
1770 /* For anything that is unaligned, recurse through byte stores. */
1771 if ((addr & (size - 1)) != 0) {
1772 goto do_unaligned_access;
1773 }
1774
1775 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1776
1777 /* Handle watchpoints. */
1778 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1779 /* On watchpoint hit, this will longjmp out. */
1780 cpu_check_watchpoint(env_cpu(env), addr, size,
1781 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1782 }
1783
1784 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1785
1786 /* Handle I/O access. */
1787 if (tlb_addr & TLB_MMIO) {
1788 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1789 op ^ (need_swap * MO_BSWAP));
1790 return;
1791 }
1792
1793 /* Ignore writes to ROM. */
1794 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1795 return;
1796 }
1797
1798 /* Handle clean RAM pages. */
1799 if (tlb_addr & TLB_NOTDIRTY) {
1800 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1801 }
1802
1803 haddr = (void *)((uintptr_t)addr + entry->addend);
1804
1805 /*
1806 * Keep these two store_memop separate to ensure that the compiler
1807 * is able to fold the entire function to a single instruction.
1808 * There is a build-time assert inside to remind you of this. ;-)
1809 */
1810 if (unlikely(need_swap)) {
1811 store_memop(haddr, val, op ^ MO_BSWAP);
1812 } else {
1813 store_memop(haddr, val, op);
1814 }
1815 return;
1816 }
1817
1818 /* Handle slow unaligned access (it spans two pages or IO). */
1819 if (size > 1
1820 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1821 >= TARGET_PAGE_SIZE)) {
1822 int i;
1823 uintptr_t index2;
1824 CPUTLBEntry *entry2;
1825 target_ulong page2, tlb_addr2;
1826 size_t size2;
1827
1828 do_unaligned_access:
1829 /*
1830 * Ensure the second page is in the TLB. Note that the first page
1831 * is already guaranteed to be filled, and that the second page
1832 * cannot evict the first.
1833 */
1834 page2 = (addr + size) & TARGET_PAGE_MASK;
1835 size2 = (addr + size) & ~TARGET_PAGE_MASK;
1836 index2 = tlb_index(env, mmu_idx, page2);
1837 entry2 = tlb_entry(env, mmu_idx, page2);
1838 tlb_addr2 = tlb_addr_write(entry2);
1839 if (!tlb_hit_page(tlb_addr2, page2)) {
1840 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
1841 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
1842 mmu_idx, retaddr);
1843 index2 = tlb_index(env, mmu_idx, page2);
1844 entry2 = tlb_entry(env, mmu_idx, page2);
1845 }
1846 tlb_addr2 = tlb_addr_write(entry2);
1847 }
1848
1849 /*
1850 * Handle watchpoints. Since this may trap, all checks
1851 * must happen before any store.
1852 */
1853 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1854 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
1855 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1856 BP_MEM_WRITE, retaddr);
1857 }
1858 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
1859 cpu_check_watchpoint(env_cpu(env), page2, size2,
1860 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
1861 BP_MEM_WRITE, retaddr);
1862 }
1863
1864 /*
1865 * XXX: not efficient, but simple.
1866 * This loop must go in the forward direction to avoid issues
1867 * with self-modifying code in Windows 64-bit.
1868 */
1869 for (i = 0; i < size; ++i) {
1870 uint8_t val8;
1871 if (memop_big_endian(op)) {
1872 /* Big-endian extract. */
1873 val8 = val >> (((size - 1) * 8) - (i * 8));
1874 } else {
1875 /* Little-endian extract. */
1876 val8 = val >> (i * 8);
1877 }
1878 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
1879 }
1880 return;
1881 }
1882
1883 haddr = (void *)((uintptr_t)addr + entry->addend);
1884 store_memop(haddr, val, op);
1885 }
1886
1887 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
1888 TCGMemOpIdx oi, uintptr_t retaddr)
1889 {
1890 store_helper(env, addr, val, oi, retaddr, MO_UB);
1891 }
1892
1893 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1894 TCGMemOpIdx oi, uintptr_t retaddr)
1895 {
1896 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
1897 }
1898
1899 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1900 TCGMemOpIdx oi, uintptr_t retaddr)
1901 {
1902 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
1903 }
1904
1905 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1906 TCGMemOpIdx oi, uintptr_t retaddr)
1907 {
1908 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
1909 }
1910
1911 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1912 TCGMemOpIdx oi, uintptr_t retaddr)
1913 {
1914 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
1915 }
1916
1917 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1918 TCGMemOpIdx oi, uintptr_t retaddr)
1919 {
1920 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
1921 }
1922
1923 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1924 TCGMemOpIdx oi, uintptr_t retaddr)
1925 {
1926 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
1927 }
1928
1929 /*
1930 * Store Helpers for cpu_ldst.h
1931 */
1932
1933 static inline void QEMU_ALWAYS_INLINE
1934 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1935 int mmu_idx, uintptr_t retaddr, MemOp op)
1936 {
1937 TCGMemOpIdx oi;
1938 uint16_t meminfo;
1939
1940 meminfo = trace_mem_get_info(op, mmu_idx, true);
1941 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1942
1943 oi = make_memop_idx(op, mmu_idx);
1944 store_helper(env, addr, val, oi, retaddr, op);
1945
1946 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1947 }
1948
1949 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
1950 int mmu_idx, uintptr_t retaddr)
1951 {
1952 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
1953 }
1954
1955 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
1956 int mmu_idx, uintptr_t retaddr)
1957 {
1958 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW);
1959 }
1960
1961 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
1962 int mmu_idx, uintptr_t retaddr)
1963 {
1964 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL);
1965 }
1966
1967 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
1968 int mmu_idx, uintptr_t retaddr)
1969 {
1970 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ);
1971 }
1972
1973 /* First set of helpers allows passing in of OI and RETADDR. This makes
1974 them callable from other helpers. */
1975
1976 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
1977 #define ATOMIC_NAME(X) \
1978 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1979 #define ATOMIC_MMU_DECLS
1980 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
1981 #define ATOMIC_MMU_CLEANUP
1982 #define ATOMIC_MMU_IDX get_mmuidx(oi)
1983
1984 #include "atomic_common.inc.c"
1985
1986 #define DATA_SIZE 1
1987 #include "atomic_template.h"
1988
1989 #define DATA_SIZE 2
1990 #include "atomic_template.h"
1991
1992 #define DATA_SIZE 4
1993 #include "atomic_template.h"
1994
1995 #ifdef CONFIG_ATOMIC64
1996 #define DATA_SIZE 8
1997 #include "atomic_template.h"
1998 #endif
1999
2000 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2001 #define DATA_SIZE 16
2002 #include "atomic_template.h"
2003 #endif
2004
2005 /* Second set of helpers are directly callable from TCG as helpers. */
2006
2007 #undef EXTRA_ARGS
2008 #undef ATOMIC_NAME
2009 #undef ATOMIC_MMU_LOOKUP
2010 #define EXTRA_ARGS , TCGMemOpIdx oi
2011 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2012 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
2013
2014 #define DATA_SIZE 1
2015 #include "atomic_template.h"
2016
2017 #define DATA_SIZE 2
2018 #include "atomic_template.h"
2019
2020 #define DATA_SIZE 4
2021 #include "atomic_template.h"
2022
2023 #ifdef CONFIG_ATOMIC64
2024 #define DATA_SIZE 8
2025 #include "atomic_template.h"
2026 #endif
2027 #undef ATOMIC_MMU_IDX
2028
2029 /* Code access functions. */
2030
2031 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
2032 TCGMemOpIdx oi, uintptr_t retaddr)
2033 {
2034 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
2035 }
2036
2037 uint8_t helper_ret_ldub_cmmu(CPUArchState *env, target_ulong addr,
2038 TCGMemOpIdx oi, uintptr_t retaddr)
2039 {
2040 return full_ldub_cmmu(env, addr, oi, retaddr);
2041 }
2042
2043 int8_t helper_ret_ldsb_cmmu(CPUArchState *env, target_ulong addr,
2044 TCGMemOpIdx oi, uintptr_t retaddr)
2045 {
2046 return (int8_t) full_ldub_cmmu(env, addr, oi, retaddr);
2047 }
2048
2049 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
2050 TCGMemOpIdx oi, uintptr_t retaddr)
2051 {
2052 return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
2053 full_le_lduw_cmmu);
2054 }
2055
2056 uint16_t helper_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
2057 TCGMemOpIdx oi, uintptr_t retaddr)
2058 {
2059 return full_le_lduw_cmmu(env, addr, oi, retaddr);
2060 }
2061
2062 int16_t helper_le_ldsw_cmmu(CPUArchState *env, target_ulong addr,
2063 TCGMemOpIdx oi, uintptr_t retaddr)
2064 {
2065 return (int16_t) full_le_lduw_cmmu(env, addr, oi, retaddr);
2066 }
2067
2068 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
2069 TCGMemOpIdx oi, uintptr_t retaddr)
2070 {
2071 return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
2072 full_be_lduw_cmmu);
2073 }
2074
2075 uint16_t helper_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
2076 TCGMemOpIdx oi, uintptr_t retaddr)
2077 {
2078 return full_be_lduw_cmmu(env, addr, oi, retaddr);
2079 }
2080
2081 int16_t helper_be_ldsw_cmmu(CPUArchState *env, target_ulong addr,
2082 TCGMemOpIdx oi, uintptr_t retaddr)
2083 {
2084 return (int16_t) full_be_lduw_cmmu(env, addr, oi, retaddr);
2085 }
2086
2087 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
2088 TCGMemOpIdx oi, uintptr_t retaddr)
2089 {
2090 return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
2091 full_le_ldul_cmmu);
2092 }
2093
2094 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
2095 TCGMemOpIdx oi, uintptr_t retaddr)
2096 {
2097 return full_le_ldul_cmmu(env, addr, oi, retaddr);
2098 }
2099
2100 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
2101 TCGMemOpIdx oi, uintptr_t retaddr)
2102 {
2103 return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
2104 full_be_ldul_cmmu);
2105 }
2106
2107 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
2108 TCGMemOpIdx oi, uintptr_t retaddr)
2109 {
2110 return full_be_ldul_cmmu(env, addr, oi, retaddr);
2111 }
2112
2113 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
2114 TCGMemOpIdx oi, uintptr_t retaddr)
2115 {
2116 return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
2117 helper_le_ldq_cmmu);
2118 }
2119
2120 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
2121 TCGMemOpIdx oi, uintptr_t retaddr)
2122 {
2123 return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
2124 helper_be_ldq_cmmu);
2125 }