]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/cputlb.c
Merge remote-tracking branch 'remotes/kraxel/tags/modules-20201022-pull-request'...
[mirror_qemu.git] / accel / tcg / cputlb.c
1 /*
2 * Common CPU TLB handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #include "trace/trace-root.h"
38 #include "trace/mem.h"
39 #ifdef CONFIG_PLUGIN
40 #include "qemu/plugin-memory.h"
41 #endif
42
43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
44 /* #define DEBUG_TLB */
45 /* #define DEBUG_TLB_LOG */
46
47 #ifdef DEBUG_TLB
48 # define DEBUG_TLB_GATE 1
49 # ifdef DEBUG_TLB_LOG
50 # define DEBUG_TLB_LOG_GATE 1
51 # else
52 # define DEBUG_TLB_LOG_GATE 0
53 # endif
54 #else
55 # define DEBUG_TLB_GATE 0
56 # define DEBUG_TLB_LOG_GATE 0
57 #endif
58
59 #define tlb_debug(fmt, ...) do { \
60 if (DEBUG_TLB_LOG_GATE) { \
61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
62 ## __VA_ARGS__); \
63 } else if (DEBUG_TLB_GATE) { \
64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
65 } \
66 } while (0)
67
68 #define assert_cpu_is_self(cpu) do { \
69 if (DEBUG_TLB_GATE) { \
70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
71 } \
72 } while (0)
73
74 /* run_on_cpu_data.target_ptr should always be big enough for a
75 * target_ulong even on 32 bit builds */
76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
77
78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
79 */
80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
82
83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
84 {
85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
86 }
87
88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
89 {
90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
91 }
92
93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
94 size_t max_entries)
95 {
96 desc->window_begin_ns = ns;
97 desc->window_max_entries = max_entries;
98 }
99
100 /**
101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
102 * @desc: The CPUTLBDesc portion of the TLB
103 * @fast: The CPUTLBDescFast portion of the same TLB
104 *
105 * Called with tlb_lock_held.
106 *
107 * We have two main constraints when resizing a TLB: (1) we only resize it
108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
109 * the array or unnecessarily flushing it), which means we do not control how
110 * frequently the resizing can occur; (2) we don't have access to the guest's
111 * future scheduling decisions, and therefore have to decide the magnitude of
112 * the resize based on past observations.
113 *
114 * In general, a memory-hungry process can benefit greatly from an appropriately
115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
116 * we just have to make the TLB as large as possible; while an oversized TLB
117 * results in minimal TLB miss rates, it also takes longer to be flushed
118 * (flushes can be _very_ frequent), and the reduced locality can also hurt
119 * performance.
120 *
121 * To achieve near-optimal performance for all kinds of workloads, we:
122 *
123 * 1. Aggressively increase the size of the TLB when the use rate of the
124 * TLB being flushed is high, since it is likely that in the near future this
125 * memory-hungry process will execute again, and its memory hungriness will
126 * probably be similar.
127 *
128 * 2. Slowly reduce the size of the TLB as the use rate declines over a
129 * reasonably large time window. The rationale is that if in such a time window
130 * we have not observed a high TLB use rate, it is likely that we won't observe
131 * it in the near future. In that case, once a time window expires we downsize
132 * the TLB to match the maximum use rate observed in the window.
133 *
134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
135 * since in that range performance is likely near-optimal. Recall that the TLB
136 * is direct mapped, so we want the use rate to be low (or at least not too
137 * high), since otherwise we are likely to have a significant amount of
138 * conflict misses.
139 */
140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
141 int64_t now)
142 {
143 size_t old_size = tlb_n_entries(fast);
144 size_t rate;
145 size_t new_size = old_size;
146 int64_t window_len_ms = 100;
147 int64_t window_len_ns = window_len_ms * 1000 * 1000;
148 bool window_expired = now > desc->window_begin_ns + window_len_ns;
149
150 if (desc->n_used_entries > desc->window_max_entries) {
151 desc->window_max_entries = desc->n_used_entries;
152 }
153 rate = desc->window_max_entries * 100 / old_size;
154
155 if (rate > 70) {
156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
157 } else if (rate < 30 && window_expired) {
158 size_t ceil = pow2ceil(desc->window_max_entries);
159 size_t expected_rate = desc->window_max_entries * 100 / ceil;
160
161 /*
162 * Avoid undersizing when the max number of entries seen is just below
163 * a pow2. For instance, if max_entries == 1025, the expected use rate
164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
166 * later. Thus, make sure that the expected use rate remains below 70%.
167 * (and since we double the size, that means the lowest rate we'd
168 * expect to get is 35%, which is still in the 30-70% range where
169 * we consider that the size is appropriate.)
170 */
171 if (expected_rate > 70) {
172 ceil *= 2;
173 }
174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
175 }
176
177 if (new_size == old_size) {
178 if (window_expired) {
179 tlb_window_reset(desc, now, desc->n_used_entries);
180 }
181 return;
182 }
183
184 g_free(fast->table);
185 g_free(desc->iotlb);
186
187 tlb_window_reset(desc, now, 0);
188 /* desc->n_used_entries is cleared by the caller */
189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
190 fast->table = g_try_new(CPUTLBEntry, new_size);
191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
192
193 /*
194 * If the allocations fail, try smaller sizes. We just freed some
195 * memory, so going back to half of new_size has a good chance of working.
196 * Increased memory pressure elsewhere in the system might cause the
197 * allocations to fail though, so we progressively reduce the allocation
198 * size, aborting if we cannot even allocate the smallest TLB we support.
199 */
200 while (fast->table == NULL || desc->iotlb == NULL) {
201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
202 error_report("%s: %s", __func__, strerror(errno));
203 abort();
204 }
205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
207
208 g_free(fast->table);
209 g_free(desc->iotlb);
210 fast->table = g_try_new(CPUTLBEntry, new_size);
211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
212 }
213 }
214
215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
216 {
217 desc->n_used_entries = 0;
218 desc->large_page_addr = -1;
219 desc->large_page_mask = -1;
220 desc->vindex = 0;
221 memset(fast->table, -1, sizeof_tlb(fast));
222 memset(desc->vtable, -1, sizeof(desc->vtable));
223 }
224
225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
226 int64_t now)
227 {
228 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
229 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
230
231 tlb_mmu_resize_locked(desc, fast, now);
232 tlb_mmu_flush_locked(desc, fast);
233 }
234
235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
236 {
237 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
238
239 tlb_window_reset(desc, now, 0);
240 desc->n_used_entries = 0;
241 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
242 fast->table = g_new(CPUTLBEntry, n_entries);
243 desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
244 tlb_mmu_flush_locked(desc, fast);
245 }
246
247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
248 {
249 env_tlb(env)->d[mmu_idx].n_used_entries++;
250 }
251
252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
253 {
254 env_tlb(env)->d[mmu_idx].n_used_entries--;
255 }
256
257 void tlb_init(CPUState *cpu)
258 {
259 CPUArchState *env = cpu->env_ptr;
260 int64_t now = get_clock_realtime();
261 int i;
262
263 qemu_spin_init(&env_tlb(env)->c.lock);
264
265 /* All tlbs are initialized flushed. */
266 env_tlb(env)->c.dirty = 0;
267
268 for (i = 0; i < NB_MMU_MODES; i++) {
269 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
270 }
271 }
272
273 void tlb_destroy(CPUState *cpu)
274 {
275 CPUArchState *env = cpu->env_ptr;
276 int i;
277
278 qemu_spin_destroy(&env_tlb(env)->c.lock);
279 for (i = 0; i < NB_MMU_MODES; i++) {
280 CPUTLBDesc *desc = &env_tlb(env)->d[i];
281 CPUTLBDescFast *fast = &env_tlb(env)->f[i];
282
283 g_free(fast->table);
284 g_free(desc->iotlb);
285 }
286 }
287
288 /* flush_all_helper: run fn across all cpus
289 *
290 * If the wait flag is set then the src cpu's helper will be queued as
291 * "safe" work and the loop exited creating a synchronisation point
292 * where all queued work will be finished before execution starts
293 * again.
294 */
295 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
296 run_on_cpu_data d)
297 {
298 CPUState *cpu;
299
300 CPU_FOREACH(cpu) {
301 if (cpu != src) {
302 async_run_on_cpu(cpu, fn, d);
303 }
304 }
305 }
306
307 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
308 {
309 CPUState *cpu;
310 size_t full = 0, part = 0, elide = 0;
311
312 CPU_FOREACH(cpu) {
313 CPUArchState *env = cpu->env_ptr;
314
315 full += qatomic_read(&env_tlb(env)->c.full_flush_count);
316 part += qatomic_read(&env_tlb(env)->c.part_flush_count);
317 elide += qatomic_read(&env_tlb(env)->c.elide_flush_count);
318 }
319 *pfull = full;
320 *ppart = part;
321 *pelide = elide;
322 }
323
324 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
325 {
326 CPUArchState *env = cpu->env_ptr;
327 uint16_t asked = data.host_int;
328 uint16_t all_dirty, work, to_clean;
329 int64_t now = get_clock_realtime();
330
331 assert_cpu_is_self(cpu);
332
333 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
334
335 qemu_spin_lock(&env_tlb(env)->c.lock);
336
337 all_dirty = env_tlb(env)->c.dirty;
338 to_clean = asked & all_dirty;
339 all_dirty &= ~to_clean;
340 env_tlb(env)->c.dirty = all_dirty;
341
342 for (work = to_clean; work != 0; work &= work - 1) {
343 int mmu_idx = ctz32(work);
344 tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
345 }
346
347 qemu_spin_unlock(&env_tlb(env)->c.lock);
348
349 cpu_tb_jmp_cache_clear(cpu);
350
351 if (to_clean == ALL_MMUIDX_BITS) {
352 qatomic_set(&env_tlb(env)->c.full_flush_count,
353 env_tlb(env)->c.full_flush_count + 1);
354 } else {
355 qatomic_set(&env_tlb(env)->c.part_flush_count,
356 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
357 if (to_clean != asked) {
358 qatomic_set(&env_tlb(env)->c.elide_flush_count,
359 env_tlb(env)->c.elide_flush_count +
360 ctpop16(asked & ~to_clean));
361 }
362 }
363 }
364
365 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
366 {
367 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
368
369 if (cpu->created && !qemu_cpu_is_self(cpu)) {
370 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
371 RUN_ON_CPU_HOST_INT(idxmap));
372 } else {
373 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
374 }
375 }
376
377 void tlb_flush(CPUState *cpu)
378 {
379 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
380 }
381
382 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
383 {
384 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
385
386 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
387
388 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
389 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
390 }
391
392 void tlb_flush_all_cpus(CPUState *src_cpu)
393 {
394 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
395 }
396
397 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
398 {
399 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
400
401 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
402
403 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
404 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
405 }
406
407 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
408 {
409 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
410 }
411
412 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
413 target_ulong page, target_ulong mask)
414 {
415 page &= mask;
416 mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
417
418 return (page == (tlb_entry->addr_read & mask) ||
419 page == (tlb_addr_write(tlb_entry) & mask) ||
420 page == (tlb_entry->addr_code & mask));
421 }
422
423 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
424 target_ulong page)
425 {
426 return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
427 }
428
429 /**
430 * tlb_entry_is_empty - return true if the entry is not in use
431 * @te: pointer to CPUTLBEntry
432 */
433 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
434 {
435 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
436 }
437
438 /* Called with tlb_c.lock held */
439 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
440 target_ulong page,
441 target_ulong mask)
442 {
443 if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
444 memset(tlb_entry, -1, sizeof(*tlb_entry));
445 return true;
446 }
447 return false;
448 }
449
450 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
451 target_ulong page)
452 {
453 return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
454 }
455
456 /* Called with tlb_c.lock held */
457 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx,
458 target_ulong page,
459 target_ulong mask)
460 {
461 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
462 int k;
463
464 assert_cpu_is_self(env_cpu(env));
465 for (k = 0; k < CPU_VTLB_SIZE; k++) {
466 if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
467 tlb_n_used_entries_dec(env, mmu_idx);
468 }
469 }
470 }
471
472 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
473 target_ulong page)
474 {
475 tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1);
476 }
477
478 static void tlb_flush_page_locked(CPUArchState *env, int midx,
479 target_ulong page)
480 {
481 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
482 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
483
484 /* Check if we need to flush due to large pages. */
485 if ((page & lp_mask) == lp_addr) {
486 tlb_debug("forcing full flush midx %d ("
487 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
488 midx, lp_addr, lp_mask);
489 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
490 } else {
491 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
492 tlb_n_used_entries_dec(env, midx);
493 }
494 tlb_flush_vtlb_page_locked(env, midx, page);
495 }
496 }
497
498 /**
499 * tlb_flush_page_by_mmuidx_async_0:
500 * @cpu: cpu on which to flush
501 * @addr: page of virtual address to flush
502 * @idxmap: set of mmu_idx to flush
503 *
504 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
505 * at @addr from the tlbs indicated by @idxmap from @cpu.
506 */
507 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
508 target_ulong addr,
509 uint16_t idxmap)
510 {
511 CPUArchState *env = cpu->env_ptr;
512 int mmu_idx;
513
514 assert_cpu_is_self(cpu);
515
516 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
517
518 qemu_spin_lock(&env_tlb(env)->c.lock);
519 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
520 if ((idxmap >> mmu_idx) & 1) {
521 tlb_flush_page_locked(env, mmu_idx, addr);
522 }
523 }
524 qemu_spin_unlock(&env_tlb(env)->c.lock);
525
526 tb_flush_jmp_cache(cpu, addr);
527 }
528
529 /**
530 * tlb_flush_page_by_mmuidx_async_1:
531 * @cpu: cpu on which to flush
532 * @data: encoded addr + idxmap
533 *
534 * Helper for tlb_flush_page_by_mmuidx and friends, called through
535 * async_run_on_cpu. The idxmap parameter is encoded in the page
536 * offset of the target_ptr field. This limits the set of mmu_idx
537 * that can be passed via this method.
538 */
539 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
540 run_on_cpu_data data)
541 {
542 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
543 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
544 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
545
546 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
547 }
548
549 typedef struct {
550 target_ulong addr;
551 uint16_t idxmap;
552 } TLBFlushPageByMMUIdxData;
553
554 /**
555 * tlb_flush_page_by_mmuidx_async_2:
556 * @cpu: cpu on which to flush
557 * @data: allocated addr + idxmap
558 *
559 * Helper for tlb_flush_page_by_mmuidx and friends, called through
560 * async_run_on_cpu. The addr+idxmap parameters are stored in a
561 * TLBFlushPageByMMUIdxData structure that has been allocated
562 * specifically for this helper. Free the structure when done.
563 */
564 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
565 run_on_cpu_data data)
566 {
567 TLBFlushPageByMMUIdxData *d = data.host_ptr;
568
569 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
570 g_free(d);
571 }
572
573 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
574 {
575 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
576
577 /* This should already be page aligned */
578 addr &= TARGET_PAGE_MASK;
579
580 if (qemu_cpu_is_self(cpu)) {
581 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
582 } else if (idxmap < TARGET_PAGE_SIZE) {
583 /*
584 * Most targets have only a few mmu_idx. In the case where
585 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
586 * allocating memory for this operation.
587 */
588 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
589 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
590 } else {
591 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
592
593 /* Otherwise allocate a structure, freed by the worker. */
594 d->addr = addr;
595 d->idxmap = idxmap;
596 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
597 RUN_ON_CPU_HOST_PTR(d));
598 }
599 }
600
601 void tlb_flush_page(CPUState *cpu, target_ulong addr)
602 {
603 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
604 }
605
606 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
607 uint16_t idxmap)
608 {
609 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
610
611 /* This should already be page aligned */
612 addr &= TARGET_PAGE_MASK;
613
614 /*
615 * Allocate memory to hold addr+idxmap only when needed.
616 * See tlb_flush_page_by_mmuidx for details.
617 */
618 if (idxmap < TARGET_PAGE_SIZE) {
619 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
620 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
621 } else {
622 CPUState *dst_cpu;
623
624 /* Allocate a separate data block for each destination cpu. */
625 CPU_FOREACH(dst_cpu) {
626 if (dst_cpu != src_cpu) {
627 TLBFlushPageByMMUIdxData *d
628 = g_new(TLBFlushPageByMMUIdxData, 1);
629
630 d->addr = addr;
631 d->idxmap = idxmap;
632 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
633 RUN_ON_CPU_HOST_PTR(d));
634 }
635 }
636 }
637
638 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
639 }
640
641 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
642 {
643 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
644 }
645
646 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
647 target_ulong addr,
648 uint16_t idxmap)
649 {
650 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
651
652 /* This should already be page aligned */
653 addr &= TARGET_PAGE_MASK;
654
655 /*
656 * Allocate memory to hold addr+idxmap only when needed.
657 * See tlb_flush_page_by_mmuidx for details.
658 */
659 if (idxmap < TARGET_PAGE_SIZE) {
660 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
661 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
662 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
663 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
664 } else {
665 CPUState *dst_cpu;
666 TLBFlushPageByMMUIdxData *d;
667
668 /* Allocate a separate data block for each destination cpu. */
669 CPU_FOREACH(dst_cpu) {
670 if (dst_cpu != src_cpu) {
671 d = g_new(TLBFlushPageByMMUIdxData, 1);
672 d->addr = addr;
673 d->idxmap = idxmap;
674 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
675 RUN_ON_CPU_HOST_PTR(d));
676 }
677 }
678
679 d = g_new(TLBFlushPageByMMUIdxData, 1);
680 d->addr = addr;
681 d->idxmap = idxmap;
682 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
683 RUN_ON_CPU_HOST_PTR(d));
684 }
685 }
686
687 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
688 {
689 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
690 }
691
692 static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
693 target_ulong page, unsigned bits)
694 {
695 CPUTLBDesc *d = &env_tlb(env)->d[midx];
696 CPUTLBDescFast *f = &env_tlb(env)->f[midx];
697 target_ulong mask = MAKE_64BIT_MASK(0, bits);
698
699 /*
700 * If @bits is smaller than the tlb size, there may be multiple entries
701 * within the TLB; otherwise all addresses that match under @mask hit
702 * the same TLB entry.
703 *
704 * TODO: Perhaps allow bits to be a few bits less than the size.
705 * For now, just flush the entire TLB.
706 */
707 if (mask < f->mask) {
708 tlb_debug("forcing full flush midx %d ("
709 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
710 midx, page, mask);
711 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
712 return;
713 }
714
715 /* Check if we need to flush due to large pages. */
716 if ((page & d->large_page_mask) == d->large_page_addr) {
717 tlb_debug("forcing full flush midx %d ("
718 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
719 midx, d->large_page_addr, d->large_page_mask);
720 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
721 return;
722 }
723
724 if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) {
725 tlb_n_used_entries_dec(env, midx);
726 }
727 tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
728 }
729
730 typedef struct {
731 target_ulong addr;
732 uint16_t idxmap;
733 uint16_t bits;
734 } TLBFlushPageBitsByMMUIdxData;
735
736 static void
737 tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu,
738 TLBFlushPageBitsByMMUIdxData d)
739 {
740 CPUArchState *env = cpu->env_ptr;
741 int mmu_idx;
742
743 assert_cpu_is_self(cpu);
744
745 tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n",
746 d.addr, d.bits, d.idxmap);
747
748 qemu_spin_lock(&env_tlb(env)->c.lock);
749 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
750 if ((d.idxmap >> mmu_idx) & 1) {
751 tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits);
752 }
753 }
754 qemu_spin_unlock(&env_tlb(env)->c.lock);
755
756 tb_flush_jmp_cache(cpu, d.addr);
757 }
758
759 static bool encode_pbm_to_runon(run_on_cpu_data *out,
760 TLBFlushPageBitsByMMUIdxData d)
761 {
762 /* We need 6 bits to hold to hold @bits up to 63. */
763 if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) {
764 *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits);
765 return true;
766 }
767 return false;
768 }
769
770 static TLBFlushPageBitsByMMUIdxData
771 decode_runon_to_pbm(run_on_cpu_data data)
772 {
773 target_ulong addr_map_bits = (target_ulong) data.target_ptr;
774 return (TLBFlushPageBitsByMMUIdxData){
775 .addr = addr_map_bits & TARGET_PAGE_MASK,
776 .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6,
777 .bits = addr_map_bits & 0x3f
778 };
779 }
780
781 static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu,
782 run_on_cpu_data runon)
783 {
784 tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon));
785 }
786
787 static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu,
788 run_on_cpu_data data)
789 {
790 TLBFlushPageBitsByMMUIdxData *d = data.host_ptr;
791 tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d);
792 g_free(d);
793 }
794
795 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
796 uint16_t idxmap, unsigned bits)
797 {
798 TLBFlushPageBitsByMMUIdxData d;
799 run_on_cpu_data runon;
800
801 /* If all bits are significant, this devolves to tlb_flush_page. */
802 if (bits >= TARGET_LONG_BITS) {
803 tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
804 return;
805 }
806 /* If no page bits are significant, this devolves to tlb_flush. */
807 if (bits < TARGET_PAGE_BITS) {
808 tlb_flush_by_mmuidx(cpu, idxmap);
809 return;
810 }
811
812 /* This should already be page aligned */
813 d.addr = addr & TARGET_PAGE_MASK;
814 d.idxmap = idxmap;
815 d.bits = bits;
816
817 if (qemu_cpu_is_self(cpu)) {
818 tlb_flush_page_bits_by_mmuidx_async_0(cpu, d);
819 } else if (encode_pbm_to_runon(&runon, d)) {
820 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
821 } else {
822 TLBFlushPageBitsByMMUIdxData *p
823 = g_new(TLBFlushPageBitsByMMUIdxData, 1);
824
825 /* Otherwise allocate a structure, freed by the worker. */
826 *p = d;
827 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2,
828 RUN_ON_CPU_HOST_PTR(p));
829 }
830 }
831
832 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
833 target_ulong addr,
834 uint16_t idxmap,
835 unsigned bits)
836 {
837 TLBFlushPageBitsByMMUIdxData d;
838 run_on_cpu_data runon;
839
840 /* If all bits are significant, this devolves to tlb_flush_page. */
841 if (bits >= TARGET_LONG_BITS) {
842 tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
843 return;
844 }
845 /* If no page bits are significant, this devolves to tlb_flush. */
846 if (bits < TARGET_PAGE_BITS) {
847 tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap);
848 return;
849 }
850
851 /* This should already be page aligned */
852 d.addr = addr & TARGET_PAGE_MASK;
853 d.idxmap = idxmap;
854 d.bits = bits;
855
856 if (encode_pbm_to_runon(&runon, d)) {
857 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
858 } else {
859 CPUState *dst_cpu;
860 TLBFlushPageBitsByMMUIdxData *p;
861
862 /* Allocate a separate data block for each destination cpu. */
863 CPU_FOREACH(dst_cpu) {
864 if (dst_cpu != src_cpu) {
865 p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
866 *p = d;
867 async_run_on_cpu(dst_cpu,
868 tlb_flush_page_bits_by_mmuidx_async_2,
869 RUN_ON_CPU_HOST_PTR(p));
870 }
871 }
872 }
873
874 tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d);
875 }
876
877 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
878 target_ulong addr,
879 uint16_t idxmap,
880 unsigned bits)
881 {
882 TLBFlushPageBitsByMMUIdxData d;
883 run_on_cpu_data runon;
884
885 /* If all bits are significant, this devolves to tlb_flush_page. */
886 if (bits >= TARGET_LONG_BITS) {
887 tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
888 return;
889 }
890 /* If no page bits are significant, this devolves to tlb_flush. */
891 if (bits < TARGET_PAGE_BITS) {
892 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
893 return;
894 }
895
896 /* This should already be page aligned */
897 d.addr = addr & TARGET_PAGE_MASK;
898 d.idxmap = idxmap;
899 d.bits = bits;
900
901 if (encode_pbm_to_runon(&runon, d)) {
902 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
903 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1,
904 runon);
905 } else {
906 CPUState *dst_cpu;
907 TLBFlushPageBitsByMMUIdxData *p;
908
909 /* Allocate a separate data block for each destination cpu. */
910 CPU_FOREACH(dst_cpu) {
911 if (dst_cpu != src_cpu) {
912 p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
913 *p = d;
914 async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
915 RUN_ON_CPU_HOST_PTR(p));
916 }
917 }
918
919 p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
920 *p = d;
921 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
922 RUN_ON_CPU_HOST_PTR(p));
923 }
924 }
925
926 /* update the TLBs so that writes to code in the virtual page 'addr'
927 can be detected */
928 void tlb_protect_code(ram_addr_t ram_addr)
929 {
930 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
931 DIRTY_MEMORY_CODE);
932 }
933
934 /* update the TLB so that writes in physical page 'phys_addr' are no longer
935 tested for self modifying code */
936 void tlb_unprotect_code(ram_addr_t ram_addr)
937 {
938 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
939 }
940
941
942 /*
943 * Dirty write flag handling
944 *
945 * When the TCG code writes to a location it looks up the address in
946 * the TLB and uses that data to compute the final address. If any of
947 * the lower bits of the address are set then the slow path is forced.
948 * There are a number of reasons to do this but for normal RAM the
949 * most usual is detecting writes to code regions which may invalidate
950 * generated code.
951 *
952 * Other vCPUs might be reading their TLBs during guest execution, so we update
953 * te->addr_write with qatomic_set. We don't need to worry about this for
954 * oversized guests as MTTCG is disabled for them.
955 *
956 * Called with tlb_c.lock held.
957 */
958 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
959 uintptr_t start, uintptr_t length)
960 {
961 uintptr_t addr = tlb_entry->addr_write;
962
963 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
964 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
965 addr &= TARGET_PAGE_MASK;
966 addr += tlb_entry->addend;
967 if ((addr - start) < length) {
968 #if TCG_OVERSIZED_GUEST
969 tlb_entry->addr_write |= TLB_NOTDIRTY;
970 #else
971 qatomic_set(&tlb_entry->addr_write,
972 tlb_entry->addr_write | TLB_NOTDIRTY);
973 #endif
974 }
975 }
976 }
977
978 /*
979 * Called with tlb_c.lock held.
980 * Called only from the vCPU context, i.e. the TLB's owner thread.
981 */
982 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
983 {
984 *d = *s;
985 }
986
987 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
988 * the target vCPU).
989 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
990 * thing actually updated is the target TLB entry ->addr_write flags.
991 */
992 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
993 {
994 CPUArchState *env;
995
996 int mmu_idx;
997
998 env = cpu->env_ptr;
999 qemu_spin_lock(&env_tlb(env)->c.lock);
1000 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1001 unsigned int i;
1002 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
1003
1004 for (i = 0; i < n; i++) {
1005 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
1006 start1, length);
1007 }
1008
1009 for (i = 0; i < CPU_VTLB_SIZE; i++) {
1010 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
1011 start1, length);
1012 }
1013 }
1014 qemu_spin_unlock(&env_tlb(env)->c.lock);
1015 }
1016
1017 /* Called with tlb_c.lock held */
1018 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
1019 target_ulong vaddr)
1020 {
1021 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
1022 tlb_entry->addr_write = vaddr;
1023 }
1024 }
1025
1026 /* update the TLB corresponding to virtual page vaddr
1027 so that it is no longer dirty */
1028 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
1029 {
1030 CPUArchState *env = cpu->env_ptr;
1031 int mmu_idx;
1032
1033 assert_cpu_is_self(cpu);
1034
1035 vaddr &= TARGET_PAGE_MASK;
1036 qemu_spin_lock(&env_tlb(env)->c.lock);
1037 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1038 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
1039 }
1040
1041 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1042 int k;
1043 for (k = 0; k < CPU_VTLB_SIZE; k++) {
1044 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
1045 }
1046 }
1047 qemu_spin_unlock(&env_tlb(env)->c.lock);
1048 }
1049
1050 /* Our TLB does not support large pages, so remember the area covered by
1051 large pages and trigger a full TLB flush if these are invalidated. */
1052 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
1053 target_ulong vaddr, target_ulong size)
1054 {
1055 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
1056 target_ulong lp_mask = ~(size - 1);
1057
1058 if (lp_addr == (target_ulong)-1) {
1059 /* No previous large page. */
1060 lp_addr = vaddr;
1061 } else {
1062 /* Extend the existing region to include the new page.
1063 This is a compromise between unnecessary flushes and
1064 the cost of maintaining a full variable size TLB. */
1065 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
1066 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
1067 lp_mask <<= 1;
1068 }
1069 }
1070 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
1071 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
1072 }
1073
1074 /* Add a new TLB entry. At most one entry for a given virtual address
1075 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
1076 * supplied size is only used by tlb_flush_page.
1077 *
1078 * Called from TCG-generated code, which is under an RCU read-side
1079 * critical section.
1080 */
1081 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
1082 hwaddr paddr, MemTxAttrs attrs, int prot,
1083 int mmu_idx, target_ulong size)
1084 {
1085 CPUArchState *env = cpu->env_ptr;
1086 CPUTLB *tlb = env_tlb(env);
1087 CPUTLBDesc *desc = &tlb->d[mmu_idx];
1088 MemoryRegionSection *section;
1089 unsigned int index;
1090 target_ulong address;
1091 target_ulong write_address;
1092 uintptr_t addend;
1093 CPUTLBEntry *te, tn;
1094 hwaddr iotlb, xlat, sz, paddr_page;
1095 target_ulong vaddr_page;
1096 int asidx = cpu_asidx_from_attrs(cpu, attrs);
1097 int wp_flags;
1098 bool is_ram, is_romd;
1099
1100 assert_cpu_is_self(cpu);
1101
1102 if (size <= TARGET_PAGE_SIZE) {
1103 sz = TARGET_PAGE_SIZE;
1104 } else {
1105 tlb_add_large_page(env, mmu_idx, vaddr, size);
1106 sz = size;
1107 }
1108 vaddr_page = vaddr & TARGET_PAGE_MASK;
1109 paddr_page = paddr & TARGET_PAGE_MASK;
1110
1111 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
1112 &xlat, &sz, attrs, &prot);
1113 assert(sz >= TARGET_PAGE_SIZE);
1114
1115 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
1116 " prot=%x idx=%d\n",
1117 vaddr, paddr, prot, mmu_idx);
1118
1119 address = vaddr_page;
1120 if (size < TARGET_PAGE_SIZE) {
1121 /* Repeat the MMU check and TLB fill on every access. */
1122 address |= TLB_INVALID_MASK;
1123 }
1124 if (attrs.byte_swap) {
1125 address |= TLB_BSWAP;
1126 }
1127
1128 is_ram = memory_region_is_ram(section->mr);
1129 is_romd = memory_region_is_romd(section->mr);
1130
1131 if (is_ram || is_romd) {
1132 /* RAM and ROMD both have associated host memory. */
1133 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
1134 } else {
1135 /* I/O does not; force the host address to NULL. */
1136 addend = 0;
1137 }
1138
1139 write_address = address;
1140 if (is_ram) {
1141 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1142 /*
1143 * Computing is_clean is expensive; avoid all that unless
1144 * the page is actually writable.
1145 */
1146 if (prot & PAGE_WRITE) {
1147 if (section->readonly) {
1148 write_address |= TLB_DISCARD_WRITE;
1149 } else if (cpu_physical_memory_is_clean(iotlb)) {
1150 write_address |= TLB_NOTDIRTY;
1151 }
1152 }
1153 } else {
1154 /* I/O or ROMD */
1155 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
1156 /*
1157 * Writes to romd devices must go through MMIO to enable write.
1158 * Reads to romd devices go through the ram_ptr found above,
1159 * but of course reads to I/O must go through MMIO.
1160 */
1161 write_address |= TLB_MMIO;
1162 if (!is_romd) {
1163 address = write_address;
1164 }
1165 }
1166
1167 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
1168 TARGET_PAGE_SIZE);
1169
1170 index = tlb_index(env, mmu_idx, vaddr_page);
1171 te = tlb_entry(env, mmu_idx, vaddr_page);
1172
1173 /*
1174 * Hold the TLB lock for the rest of the function. We could acquire/release
1175 * the lock several times in the function, but it is faster to amortize the
1176 * acquisition cost by acquiring it just once. Note that this leads to
1177 * a longer critical section, but this is not a concern since the TLB lock
1178 * is unlikely to be contended.
1179 */
1180 qemu_spin_lock(&tlb->c.lock);
1181
1182 /* Note that the tlb is no longer clean. */
1183 tlb->c.dirty |= 1 << mmu_idx;
1184
1185 /* Make sure there's no cached translation for the new page. */
1186 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
1187
1188 /*
1189 * Only evict the old entry to the victim tlb if it's for a
1190 * different page; otherwise just overwrite the stale data.
1191 */
1192 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
1193 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
1194 CPUTLBEntry *tv = &desc->vtable[vidx];
1195
1196 /* Evict the old entry into the victim tlb. */
1197 copy_tlb_helper_locked(tv, te);
1198 desc->viotlb[vidx] = desc->iotlb[index];
1199 tlb_n_used_entries_dec(env, mmu_idx);
1200 }
1201
1202 /* refill the tlb */
1203 /*
1204 * At this point iotlb contains a physical section number in the lower
1205 * TARGET_PAGE_BITS, and either
1206 * + the ram_addr_t of the page base of the target RAM (RAM)
1207 * + the offset within section->mr of the page base (I/O, ROMD)
1208 * We subtract the vaddr_page (which is page aligned and thus won't
1209 * disturb the low bits) to give an offset which can be added to the
1210 * (non-page-aligned) vaddr of the eventual memory access to get
1211 * the MemoryRegion offset for the access. Note that the vaddr we
1212 * subtract here is that of the page base, and not the same as the
1213 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
1214 */
1215 desc->iotlb[index].addr = iotlb - vaddr_page;
1216 desc->iotlb[index].attrs = attrs;
1217
1218 /* Now calculate the new entry */
1219 tn.addend = addend - vaddr_page;
1220 if (prot & PAGE_READ) {
1221 tn.addr_read = address;
1222 if (wp_flags & BP_MEM_READ) {
1223 tn.addr_read |= TLB_WATCHPOINT;
1224 }
1225 } else {
1226 tn.addr_read = -1;
1227 }
1228
1229 if (prot & PAGE_EXEC) {
1230 tn.addr_code = address;
1231 } else {
1232 tn.addr_code = -1;
1233 }
1234
1235 tn.addr_write = -1;
1236 if (prot & PAGE_WRITE) {
1237 tn.addr_write = write_address;
1238 if (prot & PAGE_WRITE_INV) {
1239 tn.addr_write |= TLB_INVALID_MASK;
1240 }
1241 if (wp_flags & BP_MEM_WRITE) {
1242 tn.addr_write |= TLB_WATCHPOINT;
1243 }
1244 }
1245
1246 copy_tlb_helper_locked(te, &tn);
1247 tlb_n_used_entries_inc(env, mmu_idx);
1248 qemu_spin_unlock(&tlb->c.lock);
1249 }
1250
1251 /* Add a new TLB entry, but without specifying the memory
1252 * transaction attributes to be used.
1253 */
1254 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
1255 hwaddr paddr, int prot,
1256 int mmu_idx, target_ulong size)
1257 {
1258 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
1259 prot, mmu_idx, size);
1260 }
1261
1262 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1263 {
1264 ram_addr_t ram_addr;
1265
1266 ram_addr = qemu_ram_addr_from_host(ptr);
1267 if (ram_addr == RAM_ADDR_INVALID) {
1268 error_report("Bad ram pointer %p", ptr);
1269 abort();
1270 }
1271 return ram_addr;
1272 }
1273
1274 /*
1275 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1276 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1277 * be discarded and looked up again (e.g. via tlb_entry()).
1278 */
1279 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1280 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1281 {
1282 CPUClass *cc = CPU_GET_CLASS(cpu);
1283 bool ok;
1284
1285 /*
1286 * This is not a probe, so only valid return is success; failure
1287 * should result in exception + longjmp to the cpu loop.
1288 */
1289 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1290 assert(ok);
1291 }
1292
1293 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1294 int mmu_idx, target_ulong addr, uintptr_t retaddr,
1295 MMUAccessType access_type, MemOp op)
1296 {
1297 CPUState *cpu = env_cpu(env);
1298 hwaddr mr_offset;
1299 MemoryRegionSection *section;
1300 MemoryRegion *mr;
1301 uint64_t val;
1302 bool locked = false;
1303 MemTxResult r;
1304
1305 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1306 mr = section->mr;
1307 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1308 cpu->mem_io_pc = retaddr;
1309 if (!cpu->can_do_io) {
1310 cpu_io_recompile(cpu, retaddr);
1311 }
1312
1313 if (!qemu_mutex_iothread_locked()) {
1314 qemu_mutex_lock_iothread();
1315 locked = true;
1316 }
1317 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1318 if (r != MEMTX_OK) {
1319 hwaddr physaddr = mr_offset +
1320 section->offset_within_address_space -
1321 section->offset_within_region;
1322
1323 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1324 mmu_idx, iotlbentry->attrs, r, retaddr);
1325 }
1326 if (locked) {
1327 qemu_mutex_unlock_iothread();
1328 }
1329
1330 return val;
1331 }
1332
1333 /*
1334 * Save a potentially trashed IOTLB entry for later lookup by plugin.
1335 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
1336 * because of the side effect of io_writex changing memory layout.
1337 */
1338 static void save_iotlb_data(CPUState *cs, hwaddr addr,
1339 MemoryRegionSection *section, hwaddr mr_offset)
1340 {
1341 #ifdef CONFIG_PLUGIN
1342 SavedIOTLB *saved = &cs->saved_iotlb;
1343 saved->addr = addr;
1344 saved->section = section;
1345 saved->mr_offset = mr_offset;
1346 #endif
1347 }
1348
1349 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1350 int mmu_idx, uint64_t val, target_ulong addr,
1351 uintptr_t retaddr, MemOp op)
1352 {
1353 CPUState *cpu = env_cpu(env);
1354 hwaddr mr_offset;
1355 MemoryRegionSection *section;
1356 MemoryRegion *mr;
1357 bool locked = false;
1358 MemTxResult r;
1359
1360 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1361 mr = section->mr;
1362 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1363 if (!cpu->can_do_io) {
1364 cpu_io_recompile(cpu, retaddr);
1365 }
1366 cpu->mem_io_pc = retaddr;
1367
1368 /*
1369 * The memory_region_dispatch may trigger a flush/resize
1370 * so for plugins we save the iotlb_data just in case.
1371 */
1372 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
1373
1374 if (!qemu_mutex_iothread_locked()) {
1375 qemu_mutex_lock_iothread();
1376 locked = true;
1377 }
1378 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1379 if (r != MEMTX_OK) {
1380 hwaddr physaddr = mr_offset +
1381 section->offset_within_address_space -
1382 section->offset_within_region;
1383
1384 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1385 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1386 retaddr);
1387 }
1388 if (locked) {
1389 qemu_mutex_unlock_iothread();
1390 }
1391 }
1392
1393 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1394 {
1395 #if TCG_OVERSIZED_GUEST
1396 return *(target_ulong *)((uintptr_t)entry + ofs);
1397 #else
1398 /* ofs might correspond to .addr_write, so use qatomic_read */
1399 return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
1400 #endif
1401 }
1402
1403 /* Return true if ADDR is present in the victim tlb, and has been copied
1404 back to the main tlb. */
1405 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1406 size_t elt_ofs, target_ulong page)
1407 {
1408 size_t vidx;
1409
1410 assert_cpu_is_self(env_cpu(env));
1411 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1412 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1413 target_ulong cmp;
1414
1415 /* elt_ofs might correspond to .addr_write, so use qatomic_read */
1416 #if TCG_OVERSIZED_GUEST
1417 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1418 #else
1419 cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1420 #endif
1421
1422 if (cmp == page) {
1423 /* Found entry in victim tlb, swap tlb and iotlb. */
1424 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1425
1426 qemu_spin_lock(&env_tlb(env)->c.lock);
1427 copy_tlb_helper_locked(&tmptlb, tlb);
1428 copy_tlb_helper_locked(tlb, vtlb);
1429 copy_tlb_helper_locked(vtlb, &tmptlb);
1430 qemu_spin_unlock(&env_tlb(env)->c.lock);
1431
1432 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1433 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1434 tmpio = *io; *io = *vio; *vio = tmpio;
1435 return true;
1436 }
1437 }
1438 return false;
1439 }
1440
1441 /* Macro to call the above, with local variables from the use context. */
1442 #define VICTIM_TLB_HIT(TY, ADDR) \
1443 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1444 (ADDR) & TARGET_PAGE_MASK)
1445
1446 /*
1447 * Return a ram_addr_t for the virtual address for execution.
1448 *
1449 * Return -1 if we can't translate and execute from an entire page
1450 * of RAM. This will force us to execute by loading and translating
1451 * one insn at a time, without caching.
1452 *
1453 * NOTE: This function will trigger an exception if the page is
1454 * not executable.
1455 */
1456 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1457 void **hostp)
1458 {
1459 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1460 uintptr_t index = tlb_index(env, mmu_idx, addr);
1461 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1462 void *p;
1463
1464 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1465 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1466 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1467 index = tlb_index(env, mmu_idx, addr);
1468 entry = tlb_entry(env, mmu_idx, addr);
1469
1470 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1471 /*
1472 * The MMU protection covers a smaller range than a target
1473 * page, so we must redo the MMU check for every insn.
1474 */
1475 return -1;
1476 }
1477 }
1478 assert(tlb_hit(entry->addr_code, addr));
1479 }
1480
1481 if (unlikely(entry->addr_code & TLB_MMIO)) {
1482 /* The region is not backed by RAM. */
1483 if (hostp) {
1484 *hostp = NULL;
1485 }
1486 return -1;
1487 }
1488
1489 p = (void *)((uintptr_t)addr + entry->addend);
1490 if (hostp) {
1491 *hostp = p;
1492 }
1493 return qemu_ram_addr_from_host_nofail(p);
1494 }
1495
1496 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1497 {
1498 return get_page_addr_code_hostp(env, addr, NULL);
1499 }
1500
1501 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1502 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1503 {
1504 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1505
1506 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1507
1508 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1509 struct page_collection *pages
1510 = page_collection_lock(ram_addr, ram_addr + size);
1511 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1512 page_collection_unlock(pages);
1513 }
1514
1515 /*
1516 * Set both VGA and migration bits for simplicity and to remove
1517 * the notdirty callback faster.
1518 */
1519 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1520
1521 /* We remove the notdirty callback only if the code has been flushed. */
1522 if (!cpu_physical_memory_is_clean(ram_addr)) {
1523 trace_memory_notdirty_set_dirty(mem_vaddr);
1524 tlb_set_dirty(cpu, mem_vaddr);
1525 }
1526 }
1527
1528 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1529 int fault_size, MMUAccessType access_type,
1530 int mmu_idx, bool nonfault,
1531 void **phost, uintptr_t retaddr)
1532 {
1533 uintptr_t index = tlb_index(env, mmu_idx, addr);
1534 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1535 target_ulong tlb_addr, page_addr;
1536 size_t elt_ofs;
1537 int flags;
1538
1539 switch (access_type) {
1540 case MMU_DATA_LOAD:
1541 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1542 break;
1543 case MMU_DATA_STORE:
1544 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1545 break;
1546 case MMU_INST_FETCH:
1547 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1548 break;
1549 default:
1550 g_assert_not_reached();
1551 }
1552 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1553
1554 page_addr = addr & TARGET_PAGE_MASK;
1555 if (!tlb_hit_page(tlb_addr, page_addr)) {
1556 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1557 CPUState *cs = env_cpu(env);
1558 CPUClass *cc = CPU_GET_CLASS(cs);
1559
1560 if (!cc->tlb_fill(cs, addr, fault_size, access_type,
1561 mmu_idx, nonfault, retaddr)) {
1562 /* Non-faulting page table read failed. */
1563 *phost = NULL;
1564 return TLB_INVALID_MASK;
1565 }
1566
1567 /* TLB resize via tlb_fill may have moved the entry. */
1568 entry = tlb_entry(env, mmu_idx, addr);
1569 }
1570 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1571 }
1572 flags = tlb_addr & TLB_FLAGS_MASK;
1573
1574 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
1575 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1576 *phost = NULL;
1577 return TLB_MMIO;
1578 }
1579
1580 /* Everything else is RAM. */
1581 *phost = (void *)((uintptr_t)addr + entry->addend);
1582 return flags;
1583 }
1584
1585 int probe_access_flags(CPUArchState *env, target_ulong addr,
1586 MMUAccessType access_type, int mmu_idx,
1587 bool nonfault, void **phost, uintptr_t retaddr)
1588 {
1589 int flags;
1590
1591 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1592 nonfault, phost, retaddr);
1593
1594 /* Handle clean RAM pages. */
1595 if (unlikely(flags & TLB_NOTDIRTY)) {
1596 uintptr_t index = tlb_index(env, mmu_idx, addr);
1597 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1598
1599 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1600 flags &= ~TLB_NOTDIRTY;
1601 }
1602
1603 return flags;
1604 }
1605
1606 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1607 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1608 {
1609 void *host;
1610 int flags;
1611
1612 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1613
1614 flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1615 false, &host, retaddr);
1616
1617 /* Per the interface, size == 0 merely faults the access. */
1618 if (size == 0) {
1619 return NULL;
1620 }
1621
1622 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1623 uintptr_t index = tlb_index(env, mmu_idx, addr);
1624 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1625
1626 /* Handle watchpoints. */
1627 if (flags & TLB_WATCHPOINT) {
1628 int wp_access = (access_type == MMU_DATA_STORE
1629 ? BP_MEM_WRITE : BP_MEM_READ);
1630 cpu_check_watchpoint(env_cpu(env), addr, size,
1631 iotlbentry->attrs, wp_access, retaddr);
1632 }
1633
1634 /* Handle clean RAM pages. */
1635 if (flags & TLB_NOTDIRTY) {
1636 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1637 }
1638 }
1639
1640 return host;
1641 }
1642
1643 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1644 MMUAccessType access_type, int mmu_idx)
1645 {
1646 void *host;
1647 int flags;
1648
1649 flags = probe_access_internal(env, addr, 0, access_type,
1650 mmu_idx, true, &host, 0);
1651
1652 /* No combination of flags are expected by the caller. */
1653 return flags ? NULL : host;
1654 }
1655
1656 #ifdef CONFIG_PLUGIN
1657 /*
1658 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1659 * This should be a hot path as we will have just looked this path up
1660 * in the softmmu lookup code (or helper). We don't handle re-fills or
1661 * checking the victim table. This is purely informational.
1662 *
1663 * This almost never fails as the memory access being instrumented
1664 * should have just filled the TLB. The one corner case is io_writex
1665 * which can cause TLB flushes and potential resizing of the TLBs
1666 * losing the information we need. In those cases we need to recover
1667 * data from a copy of the iotlbentry. As long as this always occurs
1668 * from the same thread (which a mem callback will be) this is safe.
1669 */
1670
1671 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1672 bool is_store, struct qemu_plugin_hwaddr *data)
1673 {
1674 CPUArchState *env = cpu->env_ptr;
1675 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1676 uintptr_t index = tlb_index(env, mmu_idx, addr);
1677 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1678
1679 if (likely(tlb_hit(tlb_addr, addr))) {
1680 /* We must have an iotlb entry for MMIO */
1681 if (tlb_addr & TLB_MMIO) {
1682 CPUIOTLBEntry *iotlbentry;
1683 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1684 data->is_io = true;
1685 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1686 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1687 } else {
1688 data->is_io = false;
1689 data->v.ram.hostaddr = addr + tlbe->addend;
1690 }
1691 return true;
1692 } else {
1693 SavedIOTLB *saved = &cpu->saved_iotlb;
1694 data->is_io = true;
1695 data->v.io.section = saved->section;
1696 data->v.io.offset = saved->mr_offset;
1697 return true;
1698 }
1699 }
1700
1701 #endif
1702
1703 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1704 * operations, or io operations to proceed. Return the host address. */
1705 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1706 TCGMemOpIdx oi, uintptr_t retaddr)
1707 {
1708 size_t mmu_idx = get_mmuidx(oi);
1709 uintptr_t index = tlb_index(env, mmu_idx, addr);
1710 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1711 target_ulong tlb_addr = tlb_addr_write(tlbe);
1712 MemOp mop = get_memop(oi);
1713 int a_bits = get_alignment_bits(mop);
1714 int s_bits = mop & MO_SIZE;
1715 void *hostaddr;
1716
1717 /* Adjust the given return address. */
1718 retaddr -= GETPC_ADJ;
1719
1720 /* Enforce guest required alignment. */
1721 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1722 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1723 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1724 mmu_idx, retaddr);
1725 }
1726
1727 /* Enforce qemu required alignment. */
1728 if (unlikely(addr & ((1 << s_bits) - 1))) {
1729 /* We get here if guest alignment was not requested,
1730 or was not enforced by cpu_unaligned_access above.
1731 We might widen the access and emulate, but for now
1732 mark an exception and exit the cpu loop. */
1733 goto stop_the_world;
1734 }
1735
1736 /* Check TLB entry and enforce page permissions. */
1737 if (!tlb_hit(tlb_addr, addr)) {
1738 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1739 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1740 mmu_idx, retaddr);
1741 index = tlb_index(env, mmu_idx, addr);
1742 tlbe = tlb_entry(env, mmu_idx, addr);
1743 }
1744 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1745 }
1746
1747 /* Notice an IO access or a needs-MMU-lookup access */
1748 if (unlikely(tlb_addr & TLB_MMIO)) {
1749 /* There's really nothing that can be done to
1750 support this apart from stop-the-world. */
1751 goto stop_the_world;
1752 }
1753
1754 /* Let the guest notice RMW on a write-only page. */
1755 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1756 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1757 mmu_idx, retaddr);
1758 /* Since we don't support reads and writes to different addresses,
1759 and we do have the proper page loaded for write, this shouldn't
1760 ever return. But just in case, handle via stop-the-world. */
1761 goto stop_the_world;
1762 }
1763
1764 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1765
1766 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1767 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1768 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1769 }
1770
1771 return hostaddr;
1772
1773 stop_the_world:
1774 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1775 }
1776
1777 /*
1778 * Load Helpers
1779 *
1780 * We support two different access types. SOFTMMU_CODE_ACCESS is
1781 * specifically for reading instructions from system memory. It is
1782 * called by the translation loop and in some helpers where the code
1783 * is disassembled. It shouldn't be called directly by guest code.
1784 */
1785
1786 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1787 TCGMemOpIdx oi, uintptr_t retaddr);
1788
1789 static inline uint64_t QEMU_ALWAYS_INLINE
1790 load_memop(const void *haddr, MemOp op)
1791 {
1792 switch (op) {
1793 case MO_UB:
1794 return ldub_p(haddr);
1795 case MO_BEUW:
1796 return lduw_be_p(haddr);
1797 case MO_LEUW:
1798 return lduw_le_p(haddr);
1799 case MO_BEUL:
1800 return (uint32_t)ldl_be_p(haddr);
1801 case MO_LEUL:
1802 return (uint32_t)ldl_le_p(haddr);
1803 case MO_BEQ:
1804 return ldq_be_p(haddr);
1805 case MO_LEQ:
1806 return ldq_le_p(haddr);
1807 default:
1808 qemu_build_not_reached();
1809 }
1810 }
1811
1812 static inline uint64_t QEMU_ALWAYS_INLINE
1813 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1814 uintptr_t retaddr, MemOp op, bool code_read,
1815 FullLoadHelper *full_load)
1816 {
1817 uintptr_t mmu_idx = get_mmuidx(oi);
1818 uintptr_t index = tlb_index(env, mmu_idx, addr);
1819 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1820 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1821 const size_t tlb_off = code_read ?
1822 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1823 const MMUAccessType access_type =
1824 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1825 unsigned a_bits = get_alignment_bits(get_memop(oi));
1826 void *haddr;
1827 uint64_t res;
1828 size_t size = memop_size(op);
1829
1830 /* Handle CPU specific unaligned behaviour */
1831 if (addr & ((1 << a_bits) - 1)) {
1832 cpu_unaligned_access(env_cpu(env), addr, access_type,
1833 mmu_idx, retaddr);
1834 }
1835
1836 /* If the TLB entry is for a different page, reload and try again. */
1837 if (!tlb_hit(tlb_addr, addr)) {
1838 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1839 addr & TARGET_PAGE_MASK)) {
1840 tlb_fill(env_cpu(env), addr, size,
1841 access_type, mmu_idx, retaddr);
1842 index = tlb_index(env, mmu_idx, addr);
1843 entry = tlb_entry(env, mmu_idx, addr);
1844 }
1845 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1846 tlb_addr &= ~TLB_INVALID_MASK;
1847 }
1848
1849 /* Handle anything that isn't just a straight memory access. */
1850 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1851 CPUIOTLBEntry *iotlbentry;
1852 bool need_swap;
1853
1854 /* For anything that is unaligned, recurse through full_load. */
1855 if ((addr & (size - 1)) != 0) {
1856 goto do_unaligned_access;
1857 }
1858
1859 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1860
1861 /* Handle watchpoints. */
1862 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1863 /* On watchpoint hit, this will longjmp out. */
1864 cpu_check_watchpoint(env_cpu(env), addr, size,
1865 iotlbentry->attrs, BP_MEM_READ, retaddr);
1866 }
1867
1868 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1869
1870 /* Handle I/O access. */
1871 if (likely(tlb_addr & TLB_MMIO)) {
1872 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1873 access_type, op ^ (need_swap * MO_BSWAP));
1874 }
1875
1876 haddr = (void *)((uintptr_t)addr + entry->addend);
1877
1878 /*
1879 * Keep these two load_memop separate to ensure that the compiler
1880 * is able to fold the entire function to a single instruction.
1881 * There is a build-time assert inside to remind you of this. ;-)
1882 */
1883 if (unlikely(need_swap)) {
1884 return load_memop(haddr, op ^ MO_BSWAP);
1885 }
1886 return load_memop(haddr, op);
1887 }
1888
1889 /* Handle slow unaligned access (it spans two pages or IO). */
1890 if (size > 1
1891 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1892 >= TARGET_PAGE_SIZE)) {
1893 target_ulong addr1, addr2;
1894 uint64_t r1, r2;
1895 unsigned shift;
1896 do_unaligned_access:
1897 addr1 = addr & ~((target_ulong)size - 1);
1898 addr2 = addr1 + size;
1899 r1 = full_load(env, addr1, oi, retaddr);
1900 r2 = full_load(env, addr2, oi, retaddr);
1901 shift = (addr & (size - 1)) * 8;
1902
1903 if (memop_big_endian(op)) {
1904 /* Big-endian combine. */
1905 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1906 } else {
1907 /* Little-endian combine. */
1908 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1909 }
1910 return res & MAKE_64BIT_MASK(0, size * 8);
1911 }
1912
1913 haddr = (void *)((uintptr_t)addr + entry->addend);
1914 return load_memop(haddr, op);
1915 }
1916
1917 /*
1918 * For the benefit of TCG generated code, we want to avoid the
1919 * complication of ABI-specific return type promotion and always
1920 * return a value extended to the register size of the host. This is
1921 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1922 * data, and for that we always have uint64_t.
1923 *
1924 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1925 */
1926
1927 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1928 TCGMemOpIdx oi, uintptr_t retaddr)
1929 {
1930 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1931 }
1932
1933 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1934 TCGMemOpIdx oi, uintptr_t retaddr)
1935 {
1936 return full_ldub_mmu(env, addr, oi, retaddr);
1937 }
1938
1939 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1940 TCGMemOpIdx oi, uintptr_t retaddr)
1941 {
1942 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1943 full_le_lduw_mmu);
1944 }
1945
1946 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1947 TCGMemOpIdx oi, uintptr_t retaddr)
1948 {
1949 return full_le_lduw_mmu(env, addr, oi, retaddr);
1950 }
1951
1952 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1953 TCGMemOpIdx oi, uintptr_t retaddr)
1954 {
1955 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1956 full_be_lduw_mmu);
1957 }
1958
1959 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1960 TCGMemOpIdx oi, uintptr_t retaddr)
1961 {
1962 return full_be_lduw_mmu(env, addr, oi, retaddr);
1963 }
1964
1965 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1966 TCGMemOpIdx oi, uintptr_t retaddr)
1967 {
1968 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1969 full_le_ldul_mmu);
1970 }
1971
1972 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1973 TCGMemOpIdx oi, uintptr_t retaddr)
1974 {
1975 return full_le_ldul_mmu(env, addr, oi, retaddr);
1976 }
1977
1978 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1979 TCGMemOpIdx oi, uintptr_t retaddr)
1980 {
1981 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1982 full_be_ldul_mmu);
1983 }
1984
1985 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1986 TCGMemOpIdx oi, uintptr_t retaddr)
1987 {
1988 return full_be_ldul_mmu(env, addr, oi, retaddr);
1989 }
1990
1991 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1992 TCGMemOpIdx oi, uintptr_t retaddr)
1993 {
1994 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1995 helper_le_ldq_mmu);
1996 }
1997
1998 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1999 TCGMemOpIdx oi, uintptr_t retaddr)
2000 {
2001 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
2002 helper_be_ldq_mmu);
2003 }
2004
2005 /*
2006 * Provide signed versions of the load routines as well. We can of course
2007 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
2008 */
2009
2010
2011 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
2012 TCGMemOpIdx oi, uintptr_t retaddr)
2013 {
2014 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
2015 }
2016
2017 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
2018 TCGMemOpIdx oi, uintptr_t retaddr)
2019 {
2020 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
2021 }
2022
2023 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
2024 TCGMemOpIdx oi, uintptr_t retaddr)
2025 {
2026 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
2027 }
2028
2029 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
2030 TCGMemOpIdx oi, uintptr_t retaddr)
2031 {
2032 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
2033 }
2034
2035 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
2036 TCGMemOpIdx oi, uintptr_t retaddr)
2037 {
2038 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
2039 }
2040
2041 /*
2042 * Load helpers for cpu_ldst.h.
2043 */
2044
2045 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
2046 int mmu_idx, uintptr_t retaddr,
2047 MemOp op, FullLoadHelper *full_load)
2048 {
2049 uint16_t meminfo;
2050 TCGMemOpIdx oi;
2051 uint64_t ret;
2052
2053 meminfo = trace_mem_get_info(op, mmu_idx, false);
2054 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2055
2056 op &= ~MO_SIGN;
2057 oi = make_memop_idx(op, mmu_idx);
2058 ret = full_load(env, addr, oi, retaddr);
2059
2060 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2061
2062 return ret;
2063 }
2064
2065 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2066 int mmu_idx, uintptr_t ra)
2067 {
2068 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
2069 }
2070
2071 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2072 int mmu_idx, uintptr_t ra)
2073 {
2074 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
2075 full_ldub_mmu);
2076 }
2077
2078 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2079 int mmu_idx, uintptr_t ra)
2080 {
2081 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
2082 }
2083
2084 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2085 int mmu_idx, uintptr_t ra)
2086 {
2087 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
2088 full_be_lduw_mmu);
2089 }
2090
2091 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2092 int mmu_idx, uintptr_t ra)
2093 {
2094 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
2095 }
2096
2097 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2098 int mmu_idx, uintptr_t ra)
2099 {
2100 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
2101 }
2102
2103 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2104 int mmu_idx, uintptr_t ra)
2105 {
2106 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
2107 }
2108
2109 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2110 int mmu_idx, uintptr_t ra)
2111 {
2112 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
2113 full_le_lduw_mmu);
2114 }
2115
2116 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2117 int mmu_idx, uintptr_t ra)
2118 {
2119 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
2120 }
2121
2122 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
2123 int mmu_idx, uintptr_t ra)
2124 {
2125 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
2126 }
2127
2128 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
2129 uintptr_t retaddr)
2130 {
2131 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2132 }
2133
2134 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2135 {
2136 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2137 }
2138
2139 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
2140 uintptr_t retaddr)
2141 {
2142 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2143 }
2144
2145 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2146 {
2147 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2148 }
2149
2150 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
2151 uintptr_t retaddr)
2152 {
2153 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2154 }
2155
2156 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
2157 uintptr_t retaddr)
2158 {
2159 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2160 }
2161
2162 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
2163 uintptr_t retaddr)
2164 {
2165 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2166 }
2167
2168 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
2169 {
2170 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2171 }
2172
2173 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
2174 uintptr_t retaddr)
2175 {
2176 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2177 }
2178
2179 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
2180 uintptr_t retaddr)
2181 {
2182 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
2183 }
2184
2185 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
2186 {
2187 return cpu_ldub_data_ra(env, ptr, 0);
2188 }
2189
2190 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
2191 {
2192 return cpu_ldsb_data_ra(env, ptr, 0);
2193 }
2194
2195 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
2196 {
2197 return cpu_lduw_be_data_ra(env, ptr, 0);
2198 }
2199
2200 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
2201 {
2202 return cpu_ldsw_be_data_ra(env, ptr, 0);
2203 }
2204
2205 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
2206 {
2207 return cpu_ldl_be_data_ra(env, ptr, 0);
2208 }
2209
2210 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
2211 {
2212 return cpu_ldq_be_data_ra(env, ptr, 0);
2213 }
2214
2215 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
2216 {
2217 return cpu_lduw_le_data_ra(env, ptr, 0);
2218 }
2219
2220 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
2221 {
2222 return cpu_ldsw_le_data_ra(env, ptr, 0);
2223 }
2224
2225 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
2226 {
2227 return cpu_ldl_le_data_ra(env, ptr, 0);
2228 }
2229
2230 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
2231 {
2232 return cpu_ldq_le_data_ra(env, ptr, 0);
2233 }
2234
2235 /*
2236 * Store Helpers
2237 */
2238
2239 static inline void QEMU_ALWAYS_INLINE
2240 store_memop(void *haddr, uint64_t val, MemOp op)
2241 {
2242 switch (op) {
2243 case MO_UB:
2244 stb_p(haddr, val);
2245 break;
2246 case MO_BEUW:
2247 stw_be_p(haddr, val);
2248 break;
2249 case MO_LEUW:
2250 stw_le_p(haddr, val);
2251 break;
2252 case MO_BEUL:
2253 stl_be_p(haddr, val);
2254 break;
2255 case MO_LEUL:
2256 stl_le_p(haddr, val);
2257 break;
2258 case MO_BEQ:
2259 stq_be_p(haddr, val);
2260 break;
2261 case MO_LEQ:
2262 stq_le_p(haddr, val);
2263 break;
2264 default:
2265 qemu_build_not_reached();
2266 }
2267 }
2268
2269 static void __attribute__((noinline))
2270 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
2271 uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
2272 bool big_endian)
2273 {
2274 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2275 uintptr_t index, index2;
2276 CPUTLBEntry *entry, *entry2;
2277 target_ulong page2, tlb_addr, tlb_addr2;
2278 TCGMemOpIdx oi;
2279 size_t size2;
2280 int i;
2281
2282 /*
2283 * Ensure the second page is in the TLB. Note that the first page
2284 * is already guaranteed to be filled, and that the second page
2285 * cannot evict the first.
2286 */
2287 page2 = (addr + size) & TARGET_PAGE_MASK;
2288 size2 = (addr + size) & ~TARGET_PAGE_MASK;
2289 index2 = tlb_index(env, mmu_idx, page2);
2290 entry2 = tlb_entry(env, mmu_idx, page2);
2291
2292 tlb_addr2 = tlb_addr_write(entry2);
2293 if (!tlb_hit_page(tlb_addr2, page2)) {
2294 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2295 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2296 mmu_idx, retaddr);
2297 index2 = tlb_index(env, mmu_idx, page2);
2298 entry2 = tlb_entry(env, mmu_idx, page2);
2299 }
2300 tlb_addr2 = tlb_addr_write(entry2);
2301 }
2302
2303 index = tlb_index(env, mmu_idx, addr);
2304 entry = tlb_entry(env, mmu_idx, addr);
2305 tlb_addr = tlb_addr_write(entry);
2306
2307 /*
2308 * Handle watchpoints. Since this may trap, all checks
2309 * must happen before any store.
2310 */
2311 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2312 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2313 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2314 BP_MEM_WRITE, retaddr);
2315 }
2316 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2317 cpu_check_watchpoint(env_cpu(env), page2, size2,
2318 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2319 BP_MEM_WRITE, retaddr);
2320 }
2321
2322 /*
2323 * XXX: not efficient, but simple.
2324 * This loop must go in the forward direction to avoid issues
2325 * with self-modifying code in Windows 64-bit.
2326 */
2327 oi = make_memop_idx(MO_UB, mmu_idx);
2328 if (big_endian) {
2329 for (i = 0; i < size; ++i) {
2330 /* Big-endian extract. */
2331 uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
2332 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2333 }
2334 } else {
2335 for (i = 0; i < size; ++i) {
2336 /* Little-endian extract. */
2337 uint8_t val8 = val >> (i * 8);
2338 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2339 }
2340 }
2341 }
2342
2343 static inline void QEMU_ALWAYS_INLINE
2344 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2345 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
2346 {
2347 uintptr_t mmu_idx = get_mmuidx(oi);
2348 uintptr_t index = tlb_index(env, mmu_idx, addr);
2349 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
2350 target_ulong tlb_addr = tlb_addr_write(entry);
2351 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2352 unsigned a_bits = get_alignment_bits(get_memop(oi));
2353 void *haddr;
2354 size_t size = memop_size(op);
2355
2356 /* Handle CPU specific unaligned behaviour */
2357 if (addr & ((1 << a_bits) - 1)) {
2358 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
2359 mmu_idx, retaddr);
2360 }
2361
2362 /* If the TLB entry is for a different page, reload and try again. */
2363 if (!tlb_hit(tlb_addr, addr)) {
2364 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
2365 addr & TARGET_PAGE_MASK)) {
2366 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
2367 mmu_idx, retaddr);
2368 index = tlb_index(env, mmu_idx, addr);
2369 entry = tlb_entry(env, mmu_idx, addr);
2370 }
2371 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
2372 }
2373
2374 /* Handle anything that isn't just a straight memory access. */
2375 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
2376 CPUIOTLBEntry *iotlbentry;
2377 bool need_swap;
2378
2379 /* For anything that is unaligned, recurse through byte stores. */
2380 if ((addr & (size - 1)) != 0) {
2381 goto do_unaligned_access;
2382 }
2383
2384 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2385
2386 /* Handle watchpoints. */
2387 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2388 /* On watchpoint hit, this will longjmp out. */
2389 cpu_check_watchpoint(env_cpu(env), addr, size,
2390 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2391 }
2392
2393 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2394
2395 /* Handle I/O access. */
2396 if (tlb_addr & TLB_MMIO) {
2397 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2398 op ^ (need_swap * MO_BSWAP));
2399 return;
2400 }
2401
2402 /* Ignore writes to ROM. */
2403 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2404 return;
2405 }
2406
2407 /* Handle clean RAM pages. */
2408 if (tlb_addr & TLB_NOTDIRTY) {
2409 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2410 }
2411
2412 haddr = (void *)((uintptr_t)addr + entry->addend);
2413
2414 /*
2415 * Keep these two store_memop separate to ensure that the compiler
2416 * is able to fold the entire function to a single instruction.
2417 * There is a build-time assert inside to remind you of this. ;-)
2418 */
2419 if (unlikely(need_swap)) {
2420 store_memop(haddr, val, op ^ MO_BSWAP);
2421 } else {
2422 store_memop(haddr, val, op);
2423 }
2424 return;
2425 }
2426
2427 /* Handle slow unaligned access (it spans two pages or IO). */
2428 if (size > 1
2429 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2430 >= TARGET_PAGE_SIZE)) {
2431 do_unaligned_access:
2432 store_helper_unaligned(env, addr, val, retaddr, size,
2433 mmu_idx, memop_big_endian(op));
2434 return;
2435 }
2436
2437 haddr = (void *)((uintptr_t)addr + entry->addend);
2438 store_memop(haddr, val, op);
2439 }
2440
2441 void __attribute__((noinline))
2442 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2443 TCGMemOpIdx oi, uintptr_t retaddr)
2444 {
2445 store_helper(env, addr, val, oi, retaddr, MO_UB);
2446 }
2447
2448 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2449 TCGMemOpIdx oi, uintptr_t retaddr)
2450 {
2451 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2452 }
2453
2454 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2455 TCGMemOpIdx oi, uintptr_t retaddr)
2456 {
2457 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2458 }
2459
2460 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2461 TCGMemOpIdx oi, uintptr_t retaddr)
2462 {
2463 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2464 }
2465
2466 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2467 TCGMemOpIdx oi, uintptr_t retaddr)
2468 {
2469 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2470 }
2471
2472 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2473 TCGMemOpIdx oi, uintptr_t retaddr)
2474 {
2475 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2476 }
2477
2478 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2479 TCGMemOpIdx oi, uintptr_t retaddr)
2480 {
2481 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2482 }
2483
2484 /*
2485 * Store Helpers for cpu_ldst.h
2486 */
2487
2488 static inline void QEMU_ALWAYS_INLINE
2489 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2490 int mmu_idx, uintptr_t retaddr, MemOp op)
2491 {
2492 TCGMemOpIdx oi;
2493 uint16_t meminfo;
2494
2495 meminfo = trace_mem_get_info(op, mmu_idx, true);
2496 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2497
2498 oi = make_memop_idx(op, mmu_idx);
2499 store_helper(env, addr, val, oi, retaddr, op);
2500
2501 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2502 }
2503
2504 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2505 int mmu_idx, uintptr_t retaddr)
2506 {
2507 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2508 }
2509
2510 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2511 int mmu_idx, uintptr_t retaddr)
2512 {
2513 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2514 }
2515
2516 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2517 int mmu_idx, uintptr_t retaddr)
2518 {
2519 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2520 }
2521
2522 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2523 int mmu_idx, uintptr_t retaddr)
2524 {
2525 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2526 }
2527
2528 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2529 int mmu_idx, uintptr_t retaddr)
2530 {
2531 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2532 }
2533
2534 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2535 int mmu_idx, uintptr_t retaddr)
2536 {
2537 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2538 }
2539
2540 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2541 int mmu_idx, uintptr_t retaddr)
2542 {
2543 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2544 }
2545
2546 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2547 uint32_t val, uintptr_t retaddr)
2548 {
2549 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2550 }
2551
2552 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2553 uint32_t val, uintptr_t retaddr)
2554 {
2555 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2556 }
2557
2558 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2559 uint32_t val, uintptr_t retaddr)
2560 {
2561 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2562 }
2563
2564 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2565 uint64_t val, uintptr_t retaddr)
2566 {
2567 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2568 }
2569
2570 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2571 uint32_t val, uintptr_t retaddr)
2572 {
2573 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2574 }
2575
2576 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2577 uint32_t val, uintptr_t retaddr)
2578 {
2579 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2580 }
2581
2582 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2583 uint64_t val, uintptr_t retaddr)
2584 {
2585 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2586 }
2587
2588 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2589 {
2590 cpu_stb_data_ra(env, ptr, val, 0);
2591 }
2592
2593 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2594 {
2595 cpu_stw_be_data_ra(env, ptr, val, 0);
2596 }
2597
2598 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2599 {
2600 cpu_stl_be_data_ra(env, ptr, val, 0);
2601 }
2602
2603 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2604 {
2605 cpu_stq_be_data_ra(env, ptr, val, 0);
2606 }
2607
2608 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2609 {
2610 cpu_stw_le_data_ra(env, ptr, val, 0);
2611 }
2612
2613 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2614 {
2615 cpu_stl_le_data_ra(env, ptr, val, 0);
2616 }
2617
2618 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2619 {
2620 cpu_stq_le_data_ra(env, ptr, val, 0);
2621 }
2622
2623 /* First set of helpers allows passing in of OI and RETADDR. This makes
2624 them callable from other helpers. */
2625
2626 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
2627 #define ATOMIC_NAME(X) \
2628 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2629 #define ATOMIC_MMU_DECLS
2630 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2631 #define ATOMIC_MMU_CLEANUP
2632 #define ATOMIC_MMU_IDX get_mmuidx(oi)
2633
2634 #include "atomic_common.c.inc"
2635
2636 #define DATA_SIZE 1
2637 #include "atomic_template.h"
2638
2639 #define DATA_SIZE 2
2640 #include "atomic_template.h"
2641
2642 #define DATA_SIZE 4
2643 #include "atomic_template.h"
2644
2645 #ifdef CONFIG_ATOMIC64
2646 #define DATA_SIZE 8
2647 #include "atomic_template.h"
2648 #endif
2649
2650 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2651 #define DATA_SIZE 16
2652 #include "atomic_template.h"
2653 #endif
2654
2655 /* Second set of helpers are directly callable from TCG as helpers. */
2656
2657 #undef EXTRA_ARGS
2658 #undef ATOMIC_NAME
2659 #undef ATOMIC_MMU_LOOKUP
2660 #define EXTRA_ARGS , TCGMemOpIdx oi
2661 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2662 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
2663
2664 #define DATA_SIZE 1
2665 #include "atomic_template.h"
2666
2667 #define DATA_SIZE 2
2668 #include "atomic_template.h"
2669
2670 #define DATA_SIZE 4
2671 #include "atomic_template.h"
2672
2673 #ifdef CONFIG_ATOMIC64
2674 #define DATA_SIZE 8
2675 #include "atomic_template.h"
2676 #endif
2677 #undef ATOMIC_MMU_IDX
2678
2679 /* Code access functions. */
2680
2681 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2682 TCGMemOpIdx oi, uintptr_t retaddr)
2683 {
2684 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2685 }
2686
2687 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2688 {
2689 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2690 return full_ldub_code(env, addr, oi, 0);
2691 }
2692
2693 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2694 TCGMemOpIdx oi, uintptr_t retaddr)
2695 {
2696 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2697 }
2698
2699 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2700 {
2701 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2702 return full_lduw_code(env, addr, oi, 0);
2703 }
2704
2705 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2706 TCGMemOpIdx oi, uintptr_t retaddr)
2707 {
2708 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2709 }
2710
2711 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2712 {
2713 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2714 return full_ldl_code(env, addr, oi, 0);
2715 }
2716
2717 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2718 TCGMemOpIdx oi, uintptr_t retaddr)
2719 {
2720 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2721 }
2722
2723 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2724 {
2725 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2726 return full_ldq_code(env, addr, oi, 0);
2727 }