]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/cputlb.c
cputlb: Remove cpu->mem_io_vaddr
[mirror_qemu.git] / accel / tcg / cputlb.c
1 /*
2 * Common CPU TLB handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37
38 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
39 /* #define DEBUG_TLB */
40 /* #define DEBUG_TLB_LOG */
41
42 #ifdef DEBUG_TLB
43 # define DEBUG_TLB_GATE 1
44 # ifdef DEBUG_TLB_LOG
45 # define DEBUG_TLB_LOG_GATE 1
46 # else
47 # define DEBUG_TLB_LOG_GATE 0
48 # endif
49 #else
50 # define DEBUG_TLB_GATE 0
51 # define DEBUG_TLB_LOG_GATE 0
52 #endif
53
54 #define tlb_debug(fmt, ...) do { \
55 if (DEBUG_TLB_LOG_GATE) { \
56 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
57 ## __VA_ARGS__); \
58 } else if (DEBUG_TLB_GATE) { \
59 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
60 } \
61 } while (0)
62
63 #define assert_cpu_is_self(cpu) do { \
64 if (DEBUG_TLB_GATE) { \
65 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
66 } \
67 } while (0)
68
69 /* run_on_cpu_data.target_ptr should always be big enough for a
70 * target_ulong even on 32 bit builds */
71 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
72
73 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
74 */
75 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
76 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
77
78 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
79 {
80 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
81 }
82
83 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
84 size_t max_entries)
85 {
86 desc->window_begin_ns = ns;
87 desc->window_max_entries = max_entries;
88 }
89
90 static void tlb_dyn_init(CPUArchState *env)
91 {
92 int i;
93
94 for (i = 0; i < NB_MMU_MODES; i++) {
95 CPUTLBDesc *desc = &env_tlb(env)->d[i];
96 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
97
98 tlb_window_reset(desc, get_clock_realtime(), 0);
99 desc->n_used_entries = 0;
100 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
101 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
102 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
103 }
104 }
105
106 /**
107 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
108 * @env: CPU that owns the TLB
109 * @mmu_idx: MMU index of the TLB
110 *
111 * Called with tlb_lock_held.
112 *
113 * We have two main constraints when resizing a TLB: (1) we only resize it
114 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
115 * the array or unnecessarily flushing it), which means we do not control how
116 * frequently the resizing can occur; (2) we don't have access to the guest's
117 * future scheduling decisions, and therefore have to decide the magnitude of
118 * the resize based on past observations.
119 *
120 * In general, a memory-hungry process can benefit greatly from an appropriately
121 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
122 * we just have to make the TLB as large as possible; while an oversized TLB
123 * results in minimal TLB miss rates, it also takes longer to be flushed
124 * (flushes can be _very_ frequent), and the reduced locality can also hurt
125 * performance.
126 *
127 * To achieve near-optimal performance for all kinds of workloads, we:
128 *
129 * 1. Aggressively increase the size of the TLB when the use rate of the
130 * TLB being flushed is high, since it is likely that in the near future this
131 * memory-hungry process will execute again, and its memory hungriness will
132 * probably be similar.
133 *
134 * 2. Slowly reduce the size of the TLB as the use rate declines over a
135 * reasonably large time window. The rationale is that if in such a time window
136 * we have not observed a high TLB use rate, it is likely that we won't observe
137 * it in the near future. In that case, once a time window expires we downsize
138 * the TLB to match the maximum use rate observed in the window.
139 *
140 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
141 * since in that range performance is likely near-optimal. Recall that the TLB
142 * is direct mapped, so we want the use rate to be low (or at least not too
143 * high), since otherwise we are likely to have a significant amount of
144 * conflict misses.
145 */
146 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
147 {
148 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
149 size_t old_size = tlb_n_entries(env, mmu_idx);
150 size_t rate;
151 size_t new_size = old_size;
152 int64_t now = get_clock_realtime();
153 int64_t window_len_ms = 100;
154 int64_t window_len_ns = window_len_ms * 1000 * 1000;
155 bool window_expired = now > desc->window_begin_ns + window_len_ns;
156
157 if (desc->n_used_entries > desc->window_max_entries) {
158 desc->window_max_entries = desc->n_used_entries;
159 }
160 rate = desc->window_max_entries * 100 / old_size;
161
162 if (rate > 70) {
163 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
164 } else if (rate < 30 && window_expired) {
165 size_t ceil = pow2ceil(desc->window_max_entries);
166 size_t expected_rate = desc->window_max_entries * 100 / ceil;
167
168 /*
169 * Avoid undersizing when the max number of entries seen is just below
170 * a pow2. For instance, if max_entries == 1025, the expected use rate
171 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
172 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
173 * later. Thus, make sure that the expected use rate remains below 70%.
174 * (and since we double the size, that means the lowest rate we'd
175 * expect to get is 35%, which is still in the 30-70% range where
176 * we consider that the size is appropriate.)
177 */
178 if (expected_rate > 70) {
179 ceil *= 2;
180 }
181 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
182 }
183
184 if (new_size == old_size) {
185 if (window_expired) {
186 tlb_window_reset(desc, now, desc->n_used_entries);
187 }
188 return;
189 }
190
191 g_free(env_tlb(env)->f[mmu_idx].table);
192 g_free(env_tlb(env)->d[mmu_idx].iotlb);
193
194 tlb_window_reset(desc, now, 0);
195 /* desc->n_used_entries is cleared by the caller */
196 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
197 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
198 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
199 /*
200 * If the allocations fail, try smaller sizes. We just freed some
201 * memory, so going back to half of new_size has a good chance of working.
202 * Increased memory pressure elsewhere in the system might cause the
203 * allocations to fail though, so we progressively reduce the allocation
204 * size, aborting if we cannot even allocate the smallest TLB we support.
205 */
206 while (env_tlb(env)->f[mmu_idx].table == NULL ||
207 env_tlb(env)->d[mmu_idx].iotlb == NULL) {
208 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
209 error_report("%s: %s", __func__, strerror(errno));
210 abort();
211 }
212 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
213 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
214
215 g_free(env_tlb(env)->f[mmu_idx].table);
216 g_free(env_tlb(env)->d[mmu_idx].iotlb);
217 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
218 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
219 }
220 }
221
222 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
223 {
224 tlb_mmu_resize_locked(env, mmu_idx);
225 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
226 env_tlb(env)->d[mmu_idx].n_used_entries = 0;
227 }
228
229 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
230 {
231 env_tlb(env)->d[mmu_idx].n_used_entries++;
232 }
233
234 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
235 {
236 env_tlb(env)->d[mmu_idx].n_used_entries--;
237 }
238
239 void tlb_init(CPUState *cpu)
240 {
241 CPUArchState *env = cpu->env_ptr;
242
243 qemu_spin_init(&env_tlb(env)->c.lock);
244
245 /* Ensure that cpu_reset performs a full flush. */
246 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
247
248 tlb_dyn_init(env);
249 }
250
251 /* flush_all_helper: run fn across all cpus
252 *
253 * If the wait flag is set then the src cpu's helper will be queued as
254 * "safe" work and the loop exited creating a synchronisation point
255 * where all queued work will be finished before execution starts
256 * again.
257 */
258 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
259 run_on_cpu_data d)
260 {
261 CPUState *cpu;
262
263 CPU_FOREACH(cpu) {
264 if (cpu != src) {
265 async_run_on_cpu(cpu, fn, d);
266 }
267 }
268 }
269
270 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
271 {
272 CPUState *cpu;
273 size_t full = 0, part = 0, elide = 0;
274
275 CPU_FOREACH(cpu) {
276 CPUArchState *env = cpu->env_ptr;
277
278 full += atomic_read(&env_tlb(env)->c.full_flush_count);
279 part += atomic_read(&env_tlb(env)->c.part_flush_count);
280 elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
281 }
282 *pfull = full;
283 *ppart = part;
284 *pelide = elide;
285 }
286
287 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
288 {
289 tlb_table_flush_by_mmuidx(env, mmu_idx);
290 env_tlb(env)->d[mmu_idx].large_page_addr = -1;
291 env_tlb(env)->d[mmu_idx].large_page_mask = -1;
292 env_tlb(env)->d[mmu_idx].vindex = 0;
293 memset(env_tlb(env)->d[mmu_idx].vtable, -1,
294 sizeof(env_tlb(env)->d[0].vtable));
295 }
296
297 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
298 {
299 CPUArchState *env = cpu->env_ptr;
300 uint16_t asked = data.host_int;
301 uint16_t all_dirty, work, to_clean;
302
303 assert_cpu_is_self(cpu);
304
305 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
306
307 qemu_spin_lock(&env_tlb(env)->c.lock);
308
309 all_dirty = env_tlb(env)->c.dirty;
310 to_clean = asked & all_dirty;
311 all_dirty &= ~to_clean;
312 env_tlb(env)->c.dirty = all_dirty;
313
314 for (work = to_clean; work != 0; work &= work - 1) {
315 int mmu_idx = ctz32(work);
316 tlb_flush_one_mmuidx_locked(env, mmu_idx);
317 }
318
319 qemu_spin_unlock(&env_tlb(env)->c.lock);
320
321 cpu_tb_jmp_cache_clear(cpu);
322
323 if (to_clean == ALL_MMUIDX_BITS) {
324 atomic_set(&env_tlb(env)->c.full_flush_count,
325 env_tlb(env)->c.full_flush_count + 1);
326 } else {
327 atomic_set(&env_tlb(env)->c.part_flush_count,
328 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
329 if (to_clean != asked) {
330 atomic_set(&env_tlb(env)->c.elide_flush_count,
331 env_tlb(env)->c.elide_flush_count +
332 ctpop16(asked & ~to_clean));
333 }
334 }
335 }
336
337 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
338 {
339 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
340
341 if (cpu->created && !qemu_cpu_is_self(cpu)) {
342 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
343 RUN_ON_CPU_HOST_INT(idxmap));
344 } else {
345 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
346 }
347 }
348
349 void tlb_flush(CPUState *cpu)
350 {
351 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
352 }
353
354 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
355 {
356 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
357
358 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
359
360 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
361 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
362 }
363
364 void tlb_flush_all_cpus(CPUState *src_cpu)
365 {
366 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
367 }
368
369 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
370 {
371 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
372
373 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
374
375 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
376 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
377 }
378
379 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
380 {
381 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
382 }
383
384 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
385 target_ulong page)
386 {
387 return tlb_hit_page(tlb_entry->addr_read, page) ||
388 tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
389 tlb_hit_page(tlb_entry->addr_code, page);
390 }
391
392 /**
393 * tlb_entry_is_empty - return true if the entry is not in use
394 * @te: pointer to CPUTLBEntry
395 */
396 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
397 {
398 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
399 }
400
401 /* Called with tlb_c.lock held */
402 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
403 target_ulong page)
404 {
405 if (tlb_hit_page_anyprot(tlb_entry, page)) {
406 memset(tlb_entry, -1, sizeof(*tlb_entry));
407 return true;
408 }
409 return false;
410 }
411
412 /* Called with tlb_c.lock held */
413 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
414 target_ulong page)
415 {
416 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
417 int k;
418
419 assert_cpu_is_self(env_cpu(env));
420 for (k = 0; k < CPU_VTLB_SIZE; k++) {
421 if (tlb_flush_entry_locked(&d->vtable[k], page)) {
422 tlb_n_used_entries_dec(env, mmu_idx);
423 }
424 }
425 }
426
427 static void tlb_flush_page_locked(CPUArchState *env, int midx,
428 target_ulong page)
429 {
430 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
431 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
432
433 /* Check if we need to flush due to large pages. */
434 if ((page & lp_mask) == lp_addr) {
435 tlb_debug("forcing full flush midx %d ("
436 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
437 midx, lp_addr, lp_mask);
438 tlb_flush_one_mmuidx_locked(env, midx);
439 } else {
440 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
441 tlb_n_used_entries_dec(env, midx);
442 }
443 tlb_flush_vtlb_page_locked(env, midx, page);
444 }
445 }
446
447 /* As we are going to hijack the bottom bits of the page address for a
448 * mmuidx bit mask we need to fail to build if we can't do that
449 */
450 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
451
452 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
453 run_on_cpu_data data)
454 {
455 CPUArchState *env = cpu->env_ptr;
456 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
457 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
458 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
459 int mmu_idx;
460
461 assert_cpu_is_self(cpu);
462
463 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
464 addr, mmu_idx_bitmap);
465
466 qemu_spin_lock(&env_tlb(env)->c.lock);
467 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
468 if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
469 tlb_flush_page_locked(env, mmu_idx, addr);
470 }
471 }
472 qemu_spin_unlock(&env_tlb(env)->c.lock);
473
474 tb_flush_jmp_cache(cpu, addr);
475 }
476
477 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
478 {
479 target_ulong addr_and_mmu_idx;
480
481 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
482
483 /* This should already be page aligned */
484 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
485 addr_and_mmu_idx |= idxmap;
486
487 if (!qemu_cpu_is_self(cpu)) {
488 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
489 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
490 } else {
491 tlb_flush_page_by_mmuidx_async_work(
492 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
493 }
494 }
495
496 void tlb_flush_page(CPUState *cpu, target_ulong addr)
497 {
498 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
499 }
500
501 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
502 uint16_t idxmap)
503 {
504 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
505 target_ulong addr_and_mmu_idx;
506
507 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
508
509 /* This should already be page aligned */
510 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
511 addr_and_mmu_idx |= idxmap;
512
513 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
514 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
515 }
516
517 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
518 {
519 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
520 }
521
522 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
523 target_ulong addr,
524 uint16_t idxmap)
525 {
526 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
527 target_ulong addr_and_mmu_idx;
528
529 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
530
531 /* This should already be page aligned */
532 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
533 addr_and_mmu_idx |= idxmap;
534
535 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
536 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
537 }
538
539 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
540 {
541 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
542 }
543
544 /* update the TLBs so that writes to code in the virtual page 'addr'
545 can be detected */
546 void tlb_protect_code(ram_addr_t ram_addr)
547 {
548 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
549 DIRTY_MEMORY_CODE);
550 }
551
552 /* update the TLB so that writes in physical page 'phys_addr' are no longer
553 tested for self modifying code */
554 void tlb_unprotect_code(ram_addr_t ram_addr)
555 {
556 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
557 }
558
559
560 /*
561 * Dirty write flag handling
562 *
563 * When the TCG code writes to a location it looks up the address in
564 * the TLB and uses that data to compute the final address. If any of
565 * the lower bits of the address are set then the slow path is forced.
566 * There are a number of reasons to do this but for normal RAM the
567 * most usual is detecting writes to code regions which may invalidate
568 * generated code.
569 *
570 * Other vCPUs might be reading their TLBs during guest execution, so we update
571 * te->addr_write with atomic_set. We don't need to worry about this for
572 * oversized guests as MTTCG is disabled for them.
573 *
574 * Called with tlb_c.lock held.
575 */
576 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
577 uintptr_t start, uintptr_t length)
578 {
579 uintptr_t addr = tlb_entry->addr_write;
580
581 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
582 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
583 addr &= TARGET_PAGE_MASK;
584 addr += tlb_entry->addend;
585 if ((addr - start) < length) {
586 #if TCG_OVERSIZED_GUEST
587 tlb_entry->addr_write |= TLB_NOTDIRTY;
588 #else
589 atomic_set(&tlb_entry->addr_write,
590 tlb_entry->addr_write | TLB_NOTDIRTY);
591 #endif
592 }
593 }
594 }
595
596 /*
597 * Called with tlb_c.lock held.
598 * Called only from the vCPU context, i.e. the TLB's owner thread.
599 */
600 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
601 {
602 *d = *s;
603 }
604
605 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
606 * the target vCPU).
607 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
608 * thing actually updated is the target TLB entry ->addr_write flags.
609 */
610 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
611 {
612 CPUArchState *env;
613
614 int mmu_idx;
615
616 env = cpu->env_ptr;
617 qemu_spin_lock(&env_tlb(env)->c.lock);
618 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
619 unsigned int i;
620 unsigned int n = tlb_n_entries(env, mmu_idx);
621
622 for (i = 0; i < n; i++) {
623 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
624 start1, length);
625 }
626
627 for (i = 0; i < CPU_VTLB_SIZE; i++) {
628 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
629 start1, length);
630 }
631 }
632 qemu_spin_unlock(&env_tlb(env)->c.lock);
633 }
634
635 /* Called with tlb_c.lock held */
636 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
637 target_ulong vaddr)
638 {
639 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
640 tlb_entry->addr_write = vaddr;
641 }
642 }
643
644 /* update the TLB corresponding to virtual page vaddr
645 so that it is no longer dirty */
646 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
647 {
648 CPUArchState *env = cpu->env_ptr;
649 int mmu_idx;
650
651 assert_cpu_is_self(cpu);
652
653 vaddr &= TARGET_PAGE_MASK;
654 qemu_spin_lock(&env_tlb(env)->c.lock);
655 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
656 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
657 }
658
659 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
660 int k;
661 for (k = 0; k < CPU_VTLB_SIZE; k++) {
662 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
663 }
664 }
665 qemu_spin_unlock(&env_tlb(env)->c.lock);
666 }
667
668 /* Our TLB does not support large pages, so remember the area covered by
669 large pages and trigger a full TLB flush if these are invalidated. */
670 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
671 target_ulong vaddr, target_ulong size)
672 {
673 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
674 target_ulong lp_mask = ~(size - 1);
675
676 if (lp_addr == (target_ulong)-1) {
677 /* No previous large page. */
678 lp_addr = vaddr;
679 } else {
680 /* Extend the existing region to include the new page.
681 This is a compromise between unnecessary flushes and
682 the cost of maintaining a full variable size TLB. */
683 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
684 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
685 lp_mask <<= 1;
686 }
687 }
688 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
689 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
690 }
691
692 /* Add a new TLB entry. At most one entry for a given virtual address
693 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
694 * supplied size is only used by tlb_flush_page.
695 *
696 * Called from TCG-generated code, which is under an RCU read-side
697 * critical section.
698 */
699 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
700 hwaddr paddr, MemTxAttrs attrs, int prot,
701 int mmu_idx, target_ulong size)
702 {
703 CPUArchState *env = cpu->env_ptr;
704 CPUTLB *tlb = env_tlb(env);
705 CPUTLBDesc *desc = &tlb->d[mmu_idx];
706 MemoryRegionSection *section;
707 unsigned int index;
708 target_ulong address;
709 target_ulong write_address;
710 uintptr_t addend;
711 CPUTLBEntry *te, tn;
712 hwaddr iotlb, xlat, sz, paddr_page;
713 target_ulong vaddr_page;
714 int asidx = cpu_asidx_from_attrs(cpu, attrs);
715 int wp_flags;
716 bool is_ram, is_romd;
717
718 assert_cpu_is_self(cpu);
719
720 if (size <= TARGET_PAGE_SIZE) {
721 sz = TARGET_PAGE_SIZE;
722 } else {
723 tlb_add_large_page(env, mmu_idx, vaddr, size);
724 sz = size;
725 }
726 vaddr_page = vaddr & TARGET_PAGE_MASK;
727 paddr_page = paddr & TARGET_PAGE_MASK;
728
729 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
730 &xlat, &sz, attrs, &prot);
731 assert(sz >= TARGET_PAGE_SIZE);
732
733 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
734 " prot=%x idx=%d\n",
735 vaddr, paddr, prot, mmu_idx);
736
737 address = vaddr_page;
738 if (size < TARGET_PAGE_SIZE) {
739 /* Repeat the MMU check and TLB fill on every access. */
740 address |= TLB_INVALID_MASK;
741 }
742 if (attrs.byte_swap) {
743 address |= TLB_BSWAP;
744 }
745
746 is_ram = memory_region_is_ram(section->mr);
747 is_romd = memory_region_is_romd(section->mr);
748
749 if (is_ram || is_romd) {
750 /* RAM and ROMD both have associated host memory. */
751 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
752 } else {
753 /* I/O does not; force the host address to NULL. */
754 addend = 0;
755 }
756
757 write_address = address;
758 if (is_ram) {
759 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
760 /*
761 * Computing is_clean is expensive; avoid all that unless
762 * the page is actually writable.
763 */
764 if (prot & PAGE_WRITE) {
765 if (section->readonly) {
766 write_address |= TLB_DISCARD_WRITE;
767 } else if (cpu_physical_memory_is_clean(iotlb)) {
768 write_address |= TLB_NOTDIRTY;
769 }
770 }
771 } else {
772 /* I/O or ROMD */
773 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
774 /*
775 * Writes to romd devices must go through MMIO to enable write.
776 * Reads to romd devices go through the ram_ptr found above,
777 * but of course reads to I/O must go through MMIO.
778 */
779 write_address |= TLB_MMIO;
780 if (!is_romd) {
781 address = write_address;
782 }
783 }
784
785 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
786 TARGET_PAGE_SIZE);
787
788 index = tlb_index(env, mmu_idx, vaddr_page);
789 te = tlb_entry(env, mmu_idx, vaddr_page);
790
791 /*
792 * Hold the TLB lock for the rest of the function. We could acquire/release
793 * the lock several times in the function, but it is faster to amortize the
794 * acquisition cost by acquiring it just once. Note that this leads to
795 * a longer critical section, but this is not a concern since the TLB lock
796 * is unlikely to be contended.
797 */
798 qemu_spin_lock(&tlb->c.lock);
799
800 /* Note that the tlb is no longer clean. */
801 tlb->c.dirty |= 1 << mmu_idx;
802
803 /* Make sure there's no cached translation for the new page. */
804 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
805
806 /*
807 * Only evict the old entry to the victim tlb if it's for a
808 * different page; otherwise just overwrite the stale data.
809 */
810 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
811 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
812 CPUTLBEntry *tv = &desc->vtable[vidx];
813
814 /* Evict the old entry into the victim tlb. */
815 copy_tlb_helper_locked(tv, te);
816 desc->viotlb[vidx] = desc->iotlb[index];
817 tlb_n_used_entries_dec(env, mmu_idx);
818 }
819
820 /* refill the tlb */
821 /*
822 * At this point iotlb contains a physical section number in the lower
823 * TARGET_PAGE_BITS, and either
824 * + the ram_addr_t of the page base of the target RAM (RAM)
825 * + the offset within section->mr of the page base (I/O, ROMD)
826 * We subtract the vaddr_page (which is page aligned and thus won't
827 * disturb the low bits) to give an offset which can be added to the
828 * (non-page-aligned) vaddr of the eventual memory access to get
829 * the MemoryRegion offset for the access. Note that the vaddr we
830 * subtract here is that of the page base, and not the same as the
831 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
832 */
833 desc->iotlb[index].addr = iotlb - vaddr_page;
834 desc->iotlb[index].attrs = attrs;
835
836 /* Now calculate the new entry */
837 tn.addend = addend - vaddr_page;
838 if (prot & PAGE_READ) {
839 tn.addr_read = address;
840 if (wp_flags & BP_MEM_READ) {
841 tn.addr_read |= TLB_WATCHPOINT;
842 }
843 } else {
844 tn.addr_read = -1;
845 }
846
847 if (prot & PAGE_EXEC) {
848 tn.addr_code = address;
849 } else {
850 tn.addr_code = -1;
851 }
852
853 tn.addr_write = -1;
854 if (prot & PAGE_WRITE) {
855 tn.addr_write = write_address;
856 if (prot & PAGE_WRITE_INV) {
857 tn.addr_write |= TLB_INVALID_MASK;
858 }
859 if (wp_flags & BP_MEM_WRITE) {
860 tn.addr_write |= TLB_WATCHPOINT;
861 }
862 }
863
864 copy_tlb_helper_locked(te, &tn);
865 tlb_n_used_entries_inc(env, mmu_idx);
866 qemu_spin_unlock(&tlb->c.lock);
867 }
868
869 /* Add a new TLB entry, but without specifying the memory
870 * transaction attributes to be used.
871 */
872 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
873 hwaddr paddr, int prot,
874 int mmu_idx, target_ulong size)
875 {
876 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
877 prot, mmu_idx, size);
878 }
879
880 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
881 {
882 ram_addr_t ram_addr;
883
884 ram_addr = qemu_ram_addr_from_host(ptr);
885 if (ram_addr == RAM_ADDR_INVALID) {
886 error_report("Bad ram pointer %p", ptr);
887 abort();
888 }
889 return ram_addr;
890 }
891
892 /*
893 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
894 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
895 * be discarded and looked up again (e.g. via tlb_entry()).
896 */
897 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
898 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
899 {
900 CPUClass *cc = CPU_GET_CLASS(cpu);
901 bool ok;
902
903 /*
904 * This is not a probe, so only valid return is success; failure
905 * should result in exception + longjmp to the cpu loop.
906 */
907 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
908 assert(ok);
909 }
910
911 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
912 int mmu_idx, target_ulong addr, uintptr_t retaddr,
913 MMUAccessType access_type, MemOp op)
914 {
915 CPUState *cpu = env_cpu(env);
916 hwaddr mr_offset;
917 MemoryRegionSection *section;
918 MemoryRegion *mr;
919 uint64_t val;
920 bool locked = false;
921 MemTxResult r;
922
923 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
924 mr = section->mr;
925 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
926 cpu->mem_io_pc = retaddr;
927 if (!cpu->can_do_io) {
928 cpu_io_recompile(cpu, retaddr);
929 }
930
931 cpu->mem_io_access_type = access_type;
932
933 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
934 qemu_mutex_lock_iothread();
935 locked = true;
936 }
937 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
938 if (r != MEMTX_OK) {
939 hwaddr physaddr = mr_offset +
940 section->offset_within_address_space -
941 section->offset_within_region;
942
943 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
944 mmu_idx, iotlbentry->attrs, r, retaddr);
945 }
946 if (locked) {
947 qemu_mutex_unlock_iothread();
948 }
949
950 return val;
951 }
952
953 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
954 int mmu_idx, uint64_t val, target_ulong addr,
955 uintptr_t retaddr, MemOp op)
956 {
957 CPUState *cpu = env_cpu(env);
958 hwaddr mr_offset;
959 MemoryRegionSection *section;
960 MemoryRegion *mr;
961 bool locked = false;
962 MemTxResult r;
963
964 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
965 mr = section->mr;
966 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
967 if (!cpu->can_do_io) {
968 cpu_io_recompile(cpu, retaddr);
969 }
970 cpu->mem_io_pc = retaddr;
971
972 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
973 qemu_mutex_lock_iothread();
974 locked = true;
975 }
976 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
977 if (r != MEMTX_OK) {
978 hwaddr physaddr = mr_offset +
979 section->offset_within_address_space -
980 section->offset_within_region;
981
982 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
983 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
984 retaddr);
985 }
986 if (locked) {
987 qemu_mutex_unlock_iothread();
988 }
989 }
990
991 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
992 {
993 #if TCG_OVERSIZED_GUEST
994 return *(target_ulong *)((uintptr_t)entry + ofs);
995 #else
996 /* ofs might correspond to .addr_write, so use atomic_read */
997 return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
998 #endif
999 }
1000
1001 /* Return true if ADDR is present in the victim tlb, and has been copied
1002 back to the main tlb. */
1003 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1004 size_t elt_ofs, target_ulong page)
1005 {
1006 size_t vidx;
1007
1008 assert_cpu_is_self(env_cpu(env));
1009 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1010 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1011 target_ulong cmp;
1012
1013 /* elt_ofs might correspond to .addr_write, so use atomic_read */
1014 #if TCG_OVERSIZED_GUEST
1015 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1016 #else
1017 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1018 #endif
1019
1020 if (cmp == page) {
1021 /* Found entry in victim tlb, swap tlb and iotlb. */
1022 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1023
1024 qemu_spin_lock(&env_tlb(env)->c.lock);
1025 copy_tlb_helper_locked(&tmptlb, tlb);
1026 copy_tlb_helper_locked(tlb, vtlb);
1027 copy_tlb_helper_locked(vtlb, &tmptlb);
1028 qemu_spin_unlock(&env_tlb(env)->c.lock);
1029
1030 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1031 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1032 tmpio = *io; *io = *vio; *vio = tmpio;
1033 return true;
1034 }
1035 }
1036 return false;
1037 }
1038
1039 /* Macro to call the above, with local variables from the use context. */
1040 #define VICTIM_TLB_HIT(TY, ADDR) \
1041 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1042 (ADDR) & TARGET_PAGE_MASK)
1043
1044 /*
1045 * Return a ram_addr_t for the virtual address for execution.
1046 *
1047 * Return -1 if we can't translate and execute from an entire page
1048 * of RAM. This will force us to execute by loading and translating
1049 * one insn at a time, without caching.
1050 *
1051 * NOTE: This function will trigger an exception if the page is
1052 * not executable.
1053 */
1054 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1055 {
1056 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1057 uintptr_t index = tlb_index(env, mmu_idx, addr);
1058 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1059 void *p;
1060
1061 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1062 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1063 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1064 index = tlb_index(env, mmu_idx, addr);
1065 entry = tlb_entry(env, mmu_idx, addr);
1066
1067 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1068 /*
1069 * The MMU protection covers a smaller range than a target
1070 * page, so we must redo the MMU check for every insn.
1071 */
1072 return -1;
1073 }
1074 }
1075 assert(tlb_hit(entry->addr_code, addr));
1076 }
1077
1078 if (unlikely(entry->addr_code & TLB_MMIO)) {
1079 /* The region is not backed by RAM. */
1080 return -1;
1081 }
1082
1083 p = (void *)((uintptr_t)addr + entry->addend);
1084 return qemu_ram_addr_from_host_nofail(p);
1085 }
1086
1087 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1088 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1089 {
1090 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1091
1092 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1093
1094 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1095 struct page_collection *pages
1096 = page_collection_lock(ram_addr, ram_addr + size);
1097
1098 /* We require mem_io_pc in tb_invalidate_phys_page_range. */
1099 cpu->mem_io_pc = retaddr;
1100
1101 tb_invalidate_phys_page_fast(pages, ram_addr, size);
1102 page_collection_unlock(pages);
1103 }
1104
1105 /*
1106 * Set both VGA and migration bits for simplicity and to remove
1107 * the notdirty callback faster.
1108 */
1109 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1110
1111 /* We remove the notdirty callback only if the code has been flushed. */
1112 if (!cpu_physical_memory_is_clean(ram_addr)) {
1113 trace_memory_notdirty_set_dirty(mem_vaddr);
1114 tlb_set_dirty(cpu, mem_vaddr);
1115 }
1116 }
1117
1118 /*
1119 * Probe for whether the specified guest access is permitted. If it is not
1120 * permitted then an exception will be taken in the same way as if this
1121 * were a real access (and we will not return).
1122 * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1123 * returns the address of the host page similar to tlb_vaddr_to_host().
1124 */
1125 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1126 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1127 {
1128 uintptr_t index = tlb_index(env, mmu_idx, addr);
1129 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1130 target_ulong tlb_addr;
1131 size_t elt_ofs;
1132 int wp_access;
1133
1134 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1135
1136 switch (access_type) {
1137 case MMU_DATA_LOAD:
1138 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1139 wp_access = BP_MEM_READ;
1140 break;
1141 case MMU_DATA_STORE:
1142 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1143 wp_access = BP_MEM_WRITE;
1144 break;
1145 case MMU_INST_FETCH:
1146 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1147 wp_access = BP_MEM_READ;
1148 break;
1149 default:
1150 g_assert_not_reached();
1151 }
1152 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1153
1154 if (unlikely(!tlb_hit(tlb_addr, addr))) {
1155 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1156 addr & TARGET_PAGE_MASK)) {
1157 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1158 /* TLB resize via tlb_fill may have moved the entry. */
1159 index = tlb_index(env, mmu_idx, addr);
1160 entry = tlb_entry(env, mmu_idx, addr);
1161 }
1162 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1163 }
1164
1165 if (!size) {
1166 return NULL;
1167 }
1168
1169 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1170 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1171
1172 /* Reject I/O access, or other required slow-path. */
1173 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1174 return NULL;
1175 }
1176
1177 /* Handle watchpoints. */
1178 if (tlb_addr & TLB_WATCHPOINT) {
1179 cpu_check_watchpoint(env_cpu(env), addr, size,
1180 iotlbentry->attrs, wp_access, retaddr);
1181 }
1182
1183 /* Handle clean RAM pages. */
1184 if (tlb_addr & TLB_NOTDIRTY) {
1185 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1186 }
1187 }
1188
1189 return (void *)((uintptr_t)addr + entry->addend);
1190 }
1191
1192 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1193 MMUAccessType access_type, int mmu_idx)
1194 {
1195 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1196 uintptr_t tlb_addr, page;
1197 size_t elt_ofs;
1198
1199 switch (access_type) {
1200 case MMU_DATA_LOAD:
1201 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1202 break;
1203 case MMU_DATA_STORE:
1204 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1205 break;
1206 case MMU_INST_FETCH:
1207 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1208 break;
1209 default:
1210 g_assert_not_reached();
1211 }
1212
1213 page = addr & TARGET_PAGE_MASK;
1214 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1215
1216 if (!tlb_hit_page(tlb_addr, page)) {
1217 uintptr_t index = tlb_index(env, mmu_idx, addr);
1218
1219 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1220 CPUState *cs = env_cpu(env);
1221 CPUClass *cc = CPU_GET_CLASS(cs);
1222
1223 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1224 /* Non-faulting page table read failed. */
1225 return NULL;
1226 }
1227
1228 /* TLB resize via tlb_fill may have moved the entry. */
1229 entry = tlb_entry(env, mmu_idx, addr);
1230 }
1231 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1232 }
1233
1234 if (tlb_addr & ~TARGET_PAGE_MASK) {
1235 /* IO access */
1236 return NULL;
1237 }
1238
1239 return (void *)((uintptr_t)addr + entry->addend);
1240 }
1241
1242 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1243 * operations, or io operations to proceed. Return the host address. */
1244 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1245 TCGMemOpIdx oi, uintptr_t retaddr)
1246 {
1247 size_t mmu_idx = get_mmuidx(oi);
1248 uintptr_t index = tlb_index(env, mmu_idx, addr);
1249 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1250 target_ulong tlb_addr = tlb_addr_write(tlbe);
1251 MemOp mop = get_memop(oi);
1252 int a_bits = get_alignment_bits(mop);
1253 int s_bits = mop & MO_SIZE;
1254 void *hostaddr;
1255
1256 /* Adjust the given return address. */
1257 retaddr -= GETPC_ADJ;
1258
1259 /* Enforce guest required alignment. */
1260 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1261 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1262 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1263 mmu_idx, retaddr);
1264 }
1265
1266 /* Enforce qemu required alignment. */
1267 if (unlikely(addr & ((1 << s_bits) - 1))) {
1268 /* We get here if guest alignment was not requested,
1269 or was not enforced by cpu_unaligned_access above.
1270 We might widen the access and emulate, but for now
1271 mark an exception and exit the cpu loop. */
1272 goto stop_the_world;
1273 }
1274
1275 /* Check TLB entry and enforce page permissions. */
1276 if (!tlb_hit(tlb_addr, addr)) {
1277 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1278 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1279 mmu_idx, retaddr);
1280 index = tlb_index(env, mmu_idx, addr);
1281 tlbe = tlb_entry(env, mmu_idx, addr);
1282 }
1283 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1284 }
1285
1286 /* Notice an IO access or a needs-MMU-lookup access */
1287 if (unlikely(tlb_addr & TLB_MMIO)) {
1288 /* There's really nothing that can be done to
1289 support this apart from stop-the-world. */
1290 goto stop_the_world;
1291 }
1292
1293 /* Let the guest notice RMW on a write-only page. */
1294 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1295 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1296 mmu_idx, retaddr);
1297 /* Since we don't support reads and writes to different addresses,
1298 and we do have the proper page loaded for write, this shouldn't
1299 ever return. But just in case, handle via stop-the-world. */
1300 goto stop_the_world;
1301 }
1302
1303 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1304
1305 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1306 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1307 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1308 }
1309
1310 return hostaddr;
1311
1312 stop_the_world:
1313 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1314 }
1315
1316 /*
1317 * Load Helpers
1318 *
1319 * We support two different access types. SOFTMMU_CODE_ACCESS is
1320 * specifically for reading instructions from system memory. It is
1321 * called by the translation loop and in some helpers where the code
1322 * is disassembled. It shouldn't be called directly by guest code.
1323 */
1324
1325 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1326 TCGMemOpIdx oi, uintptr_t retaddr);
1327
1328 static inline uint64_t QEMU_ALWAYS_INLINE
1329 load_memop(const void *haddr, MemOp op)
1330 {
1331 switch (op) {
1332 case MO_UB:
1333 return ldub_p(haddr);
1334 case MO_BEUW:
1335 return lduw_be_p(haddr);
1336 case MO_LEUW:
1337 return lduw_le_p(haddr);
1338 case MO_BEUL:
1339 return (uint32_t)ldl_be_p(haddr);
1340 case MO_LEUL:
1341 return (uint32_t)ldl_le_p(haddr);
1342 case MO_BEQ:
1343 return ldq_be_p(haddr);
1344 case MO_LEQ:
1345 return ldq_le_p(haddr);
1346 default:
1347 qemu_build_not_reached();
1348 }
1349 }
1350
1351 static inline uint64_t QEMU_ALWAYS_INLINE
1352 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1353 uintptr_t retaddr, MemOp op, bool code_read,
1354 FullLoadHelper *full_load)
1355 {
1356 uintptr_t mmu_idx = get_mmuidx(oi);
1357 uintptr_t index = tlb_index(env, mmu_idx, addr);
1358 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1359 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1360 const size_t tlb_off = code_read ?
1361 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1362 const MMUAccessType access_type =
1363 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1364 unsigned a_bits = get_alignment_bits(get_memop(oi));
1365 void *haddr;
1366 uint64_t res;
1367 size_t size = memop_size(op);
1368
1369 /* Handle CPU specific unaligned behaviour */
1370 if (addr & ((1 << a_bits) - 1)) {
1371 cpu_unaligned_access(env_cpu(env), addr, access_type,
1372 mmu_idx, retaddr);
1373 }
1374
1375 /* If the TLB entry is for a different page, reload and try again. */
1376 if (!tlb_hit(tlb_addr, addr)) {
1377 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1378 addr & TARGET_PAGE_MASK)) {
1379 tlb_fill(env_cpu(env), addr, size,
1380 access_type, mmu_idx, retaddr);
1381 index = tlb_index(env, mmu_idx, addr);
1382 entry = tlb_entry(env, mmu_idx, addr);
1383 }
1384 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1385 tlb_addr &= ~TLB_INVALID_MASK;
1386 }
1387
1388 /* Handle anything that isn't just a straight memory access. */
1389 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1390 CPUIOTLBEntry *iotlbentry;
1391 bool need_swap;
1392
1393 /* For anything that is unaligned, recurse through full_load. */
1394 if ((addr & (size - 1)) != 0) {
1395 goto do_unaligned_access;
1396 }
1397
1398 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1399
1400 /* Handle watchpoints. */
1401 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1402 /* On watchpoint hit, this will longjmp out. */
1403 cpu_check_watchpoint(env_cpu(env), addr, size,
1404 iotlbentry->attrs, BP_MEM_READ, retaddr);
1405 }
1406
1407 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1408
1409 /* Handle I/O access. */
1410 if (likely(tlb_addr & TLB_MMIO)) {
1411 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1412 access_type, op ^ (need_swap * MO_BSWAP));
1413 }
1414
1415 haddr = (void *)((uintptr_t)addr + entry->addend);
1416
1417 /*
1418 * Keep these two load_memop separate to ensure that the compiler
1419 * is able to fold the entire function to a single instruction.
1420 * There is a build-time assert inside to remind you of this. ;-)
1421 */
1422 if (unlikely(need_swap)) {
1423 return load_memop(haddr, op ^ MO_BSWAP);
1424 }
1425 return load_memop(haddr, op);
1426 }
1427
1428 /* Handle slow unaligned access (it spans two pages or IO). */
1429 if (size > 1
1430 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1431 >= TARGET_PAGE_SIZE)) {
1432 target_ulong addr1, addr2;
1433 uint64_t r1, r2;
1434 unsigned shift;
1435 do_unaligned_access:
1436 addr1 = addr & ~((target_ulong)size - 1);
1437 addr2 = addr1 + size;
1438 r1 = full_load(env, addr1, oi, retaddr);
1439 r2 = full_load(env, addr2, oi, retaddr);
1440 shift = (addr & (size - 1)) * 8;
1441
1442 if (memop_big_endian(op)) {
1443 /* Big-endian combine. */
1444 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1445 } else {
1446 /* Little-endian combine. */
1447 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1448 }
1449 return res & MAKE_64BIT_MASK(0, size * 8);
1450 }
1451
1452 haddr = (void *)((uintptr_t)addr + entry->addend);
1453 return load_memop(haddr, op);
1454 }
1455
1456 /*
1457 * For the benefit of TCG generated code, we want to avoid the
1458 * complication of ABI-specific return type promotion and always
1459 * return a value extended to the register size of the host. This is
1460 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1461 * data, and for that we always have uint64_t.
1462 *
1463 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1464 */
1465
1466 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1467 TCGMemOpIdx oi, uintptr_t retaddr)
1468 {
1469 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1470 }
1471
1472 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1473 TCGMemOpIdx oi, uintptr_t retaddr)
1474 {
1475 return full_ldub_mmu(env, addr, oi, retaddr);
1476 }
1477
1478 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1479 TCGMemOpIdx oi, uintptr_t retaddr)
1480 {
1481 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1482 full_le_lduw_mmu);
1483 }
1484
1485 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1486 TCGMemOpIdx oi, uintptr_t retaddr)
1487 {
1488 return full_le_lduw_mmu(env, addr, oi, retaddr);
1489 }
1490
1491 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1492 TCGMemOpIdx oi, uintptr_t retaddr)
1493 {
1494 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1495 full_be_lduw_mmu);
1496 }
1497
1498 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1499 TCGMemOpIdx oi, uintptr_t retaddr)
1500 {
1501 return full_be_lduw_mmu(env, addr, oi, retaddr);
1502 }
1503
1504 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1505 TCGMemOpIdx oi, uintptr_t retaddr)
1506 {
1507 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1508 full_le_ldul_mmu);
1509 }
1510
1511 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1512 TCGMemOpIdx oi, uintptr_t retaddr)
1513 {
1514 return full_le_ldul_mmu(env, addr, oi, retaddr);
1515 }
1516
1517 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1518 TCGMemOpIdx oi, uintptr_t retaddr)
1519 {
1520 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1521 full_be_ldul_mmu);
1522 }
1523
1524 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1525 TCGMemOpIdx oi, uintptr_t retaddr)
1526 {
1527 return full_be_ldul_mmu(env, addr, oi, retaddr);
1528 }
1529
1530 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1531 TCGMemOpIdx oi, uintptr_t retaddr)
1532 {
1533 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1534 helper_le_ldq_mmu);
1535 }
1536
1537 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1538 TCGMemOpIdx oi, uintptr_t retaddr)
1539 {
1540 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1541 helper_be_ldq_mmu);
1542 }
1543
1544 /*
1545 * Provide signed versions of the load routines as well. We can of course
1546 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1547 */
1548
1549
1550 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1551 TCGMemOpIdx oi, uintptr_t retaddr)
1552 {
1553 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1554 }
1555
1556 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1557 TCGMemOpIdx oi, uintptr_t retaddr)
1558 {
1559 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1560 }
1561
1562 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1563 TCGMemOpIdx oi, uintptr_t retaddr)
1564 {
1565 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1566 }
1567
1568 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1569 TCGMemOpIdx oi, uintptr_t retaddr)
1570 {
1571 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1572 }
1573
1574 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1575 TCGMemOpIdx oi, uintptr_t retaddr)
1576 {
1577 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1578 }
1579
1580 /*
1581 * Store Helpers
1582 */
1583
1584 static inline void QEMU_ALWAYS_INLINE
1585 store_memop(void *haddr, uint64_t val, MemOp op)
1586 {
1587 switch (op) {
1588 case MO_UB:
1589 stb_p(haddr, val);
1590 break;
1591 case MO_BEUW:
1592 stw_be_p(haddr, val);
1593 break;
1594 case MO_LEUW:
1595 stw_le_p(haddr, val);
1596 break;
1597 case MO_BEUL:
1598 stl_be_p(haddr, val);
1599 break;
1600 case MO_LEUL:
1601 stl_le_p(haddr, val);
1602 break;
1603 case MO_BEQ:
1604 stq_be_p(haddr, val);
1605 break;
1606 case MO_LEQ:
1607 stq_le_p(haddr, val);
1608 break;
1609 default:
1610 qemu_build_not_reached();
1611 }
1612 }
1613
1614 static inline void QEMU_ALWAYS_INLINE
1615 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1616 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1617 {
1618 uintptr_t mmu_idx = get_mmuidx(oi);
1619 uintptr_t index = tlb_index(env, mmu_idx, addr);
1620 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1621 target_ulong tlb_addr = tlb_addr_write(entry);
1622 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1623 unsigned a_bits = get_alignment_bits(get_memop(oi));
1624 void *haddr;
1625 size_t size = memop_size(op);
1626
1627 /* Handle CPU specific unaligned behaviour */
1628 if (addr & ((1 << a_bits) - 1)) {
1629 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1630 mmu_idx, retaddr);
1631 }
1632
1633 /* If the TLB entry is for a different page, reload and try again. */
1634 if (!tlb_hit(tlb_addr, addr)) {
1635 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1636 addr & TARGET_PAGE_MASK)) {
1637 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1638 mmu_idx, retaddr);
1639 index = tlb_index(env, mmu_idx, addr);
1640 entry = tlb_entry(env, mmu_idx, addr);
1641 }
1642 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1643 }
1644
1645 /* Handle anything that isn't just a straight memory access. */
1646 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1647 CPUIOTLBEntry *iotlbentry;
1648 bool need_swap;
1649
1650 /* For anything that is unaligned, recurse through byte stores. */
1651 if ((addr & (size - 1)) != 0) {
1652 goto do_unaligned_access;
1653 }
1654
1655 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1656
1657 /* Handle watchpoints. */
1658 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1659 /* On watchpoint hit, this will longjmp out. */
1660 cpu_check_watchpoint(env_cpu(env), addr, size,
1661 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1662 }
1663
1664 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1665
1666 /* Handle I/O access. */
1667 if (tlb_addr & TLB_MMIO) {
1668 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1669 op ^ (need_swap * MO_BSWAP));
1670 return;
1671 }
1672
1673 /* Ignore writes to ROM. */
1674 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1675 return;
1676 }
1677
1678 /* Handle clean RAM pages. */
1679 if (tlb_addr & TLB_NOTDIRTY) {
1680 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1681 }
1682
1683 haddr = (void *)((uintptr_t)addr + entry->addend);
1684
1685 /*
1686 * Keep these two store_memop separate to ensure that the compiler
1687 * is able to fold the entire function to a single instruction.
1688 * There is a build-time assert inside to remind you of this. ;-)
1689 */
1690 if (unlikely(need_swap)) {
1691 store_memop(haddr, val, op ^ MO_BSWAP);
1692 } else {
1693 store_memop(haddr, val, op);
1694 }
1695 return;
1696 }
1697
1698 /* Handle slow unaligned access (it spans two pages or IO). */
1699 if (size > 1
1700 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1701 >= TARGET_PAGE_SIZE)) {
1702 int i;
1703 uintptr_t index2;
1704 CPUTLBEntry *entry2;
1705 target_ulong page2, tlb_addr2;
1706 size_t size2;
1707
1708 do_unaligned_access:
1709 /*
1710 * Ensure the second page is in the TLB. Note that the first page
1711 * is already guaranteed to be filled, and that the second page
1712 * cannot evict the first.
1713 */
1714 page2 = (addr + size) & TARGET_PAGE_MASK;
1715 size2 = (addr + size) & ~TARGET_PAGE_MASK;
1716 index2 = tlb_index(env, mmu_idx, page2);
1717 entry2 = tlb_entry(env, mmu_idx, page2);
1718 tlb_addr2 = tlb_addr_write(entry2);
1719 if (!tlb_hit_page(tlb_addr2, page2)) {
1720 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
1721 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
1722 mmu_idx, retaddr);
1723 index2 = tlb_index(env, mmu_idx, page2);
1724 entry2 = tlb_entry(env, mmu_idx, page2);
1725 }
1726 tlb_addr2 = tlb_addr_write(entry2);
1727 }
1728
1729 /*
1730 * Handle watchpoints. Since this may trap, all checks
1731 * must happen before any store.
1732 */
1733 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1734 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
1735 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1736 BP_MEM_WRITE, retaddr);
1737 }
1738 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
1739 cpu_check_watchpoint(env_cpu(env), page2, size2,
1740 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
1741 BP_MEM_WRITE, retaddr);
1742 }
1743
1744 /*
1745 * XXX: not efficient, but simple.
1746 * This loop must go in the forward direction to avoid issues
1747 * with self-modifying code in Windows 64-bit.
1748 */
1749 for (i = 0; i < size; ++i) {
1750 uint8_t val8;
1751 if (memop_big_endian(op)) {
1752 /* Big-endian extract. */
1753 val8 = val >> (((size - 1) * 8) - (i * 8));
1754 } else {
1755 /* Little-endian extract. */
1756 val8 = val >> (i * 8);
1757 }
1758 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
1759 }
1760 return;
1761 }
1762
1763 haddr = (void *)((uintptr_t)addr + entry->addend);
1764 store_memop(haddr, val, op);
1765 }
1766
1767 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
1768 TCGMemOpIdx oi, uintptr_t retaddr)
1769 {
1770 store_helper(env, addr, val, oi, retaddr, MO_UB);
1771 }
1772
1773 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1774 TCGMemOpIdx oi, uintptr_t retaddr)
1775 {
1776 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
1777 }
1778
1779 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1780 TCGMemOpIdx oi, uintptr_t retaddr)
1781 {
1782 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
1783 }
1784
1785 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1786 TCGMemOpIdx oi, uintptr_t retaddr)
1787 {
1788 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
1789 }
1790
1791 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1792 TCGMemOpIdx oi, uintptr_t retaddr)
1793 {
1794 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
1795 }
1796
1797 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1798 TCGMemOpIdx oi, uintptr_t retaddr)
1799 {
1800 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
1801 }
1802
1803 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1804 TCGMemOpIdx oi, uintptr_t retaddr)
1805 {
1806 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
1807 }
1808
1809 /* First set of helpers allows passing in of OI and RETADDR. This makes
1810 them callable from other helpers. */
1811
1812 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
1813 #define ATOMIC_NAME(X) \
1814 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1815 #define ATOMIC_MMU_DECLS
1816 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
1817 #define ATOMIC_MMU_CLEANUP
1818
1819 #define DATA_SIZE 1
1820 #include "atomic_template.h"
1821
1822 #define DATA_SIZE 2
1823 #include "atomic_template.h"
1824
1825 #define DATA_SIZE 4
1826 #include "atomic_template.h"
1827
1828 #ifdef CONFIG_ATOMIC64
1829 #define DATA_SIZE 8
1830 #include "atomic_template.h"
1831 #endif
1832
1833 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1834 #define DATA_SIZE 16
1835 #include "atomic_template.h"
1836 #endif
1837
1838 /* Second set of helpers are directly callable from TCG as helpers. */
1839
1840 #undef EXTRA_ARGS
1841 #undef ATOMIC_NAME
1842 #undef ATOMIC_MMU_LOOKUP
1843 #define EXTRA_ARGS , TCGMemOpIdx oi
1844 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1845 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
1846
1847 #define DATA_SIZE 1
1848 #include "atomic_template.h"
1849
1850 #define DATA_SIZE 2
1851 #include "atomic_template.h"
1852
1853 #define DATA_SIZE 4
1854 #include "atomic_template.h"
1855
1856 #ifdef CONFIG_ATOMIC64
1857 #define DATA_SIZE 8
1858 #include "atomic_template.h"
1859 #endif
1860
1861 /* Code access functions. */
1862
1863 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
1864 TCGMemOpIdx oi, uintptr_t retaddr)
1865 {
1866 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
1867 }
1868
1869 uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,
1870 TCGMemOpIdx oi, uintptr_t retaddr)
1871 {
1872 return full_ldub_cmmu(env, addr, oi, retaddr);
1873 }
1874
1875 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
1876 TCGMemOpIdx oi, uintptr_t retaddr)
1877 {
1878 return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
1879 full_le_lduw_cmmu);
1880 }
1881
1882 uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,
1883 TCGMemOpIdx oi, uintptr_t retaddr)
1884 {
1885 return full_le_lduw_cmmu(env, addr, oi, retaddr);
1886 }
1887
1888 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
1889 TCGMemOpIdx oi, uintptr_t retaddr)
1890 {
1891 return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
1892 full_be_lduw_cmmu);
1893 }
1894
1895 uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,
1896 TCGMemOpIdx oi, uintptr_t retaddr)
1897 {
1898 return full_be_lduw_cmmu(env, addr, oi, retaddr);
1899 }
1900
1901 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
1902 TCGMemOpIdx oi, uintptr_t retaddr)
1903 {
1904 return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
1905 full_le_ldul_cmmu);
1906 }
1907
1908 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
1909 TCGMemOpIdx oi, uintptr_t retaddr)
1910 {
1911 return full_le_ldul_cmmu(env, addr, oi, retaddr);
1912 }
1913
1914 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
1915 TCGMemOpIdx oi, uintptr_t retaddr)
1916 {
1917 return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
1918 full_be_ldul_cmmu);
1919 }
1920
1921 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
1922 TCGMemOpIdx oi, uintptr_t retaddr)
1923 {
1924 return full_be_ldul_cmmu(env, addr, oi, retaddr);
1925 }
1926
1927 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
1928 TCGMemOpIdx oi, uintptr_t retaddr)
1929 {
1930 return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
1931 helper_le_ldq_cmmu);
1932 }
1933
1934 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
1935 TCGMemOpIdx oi, uintptr_t retaddr)
1936 {
1937 return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
1938 helper_be_ldq_cmmu);
1939 }